├── .github └── workflows │ └── static.yml ├── .gitignore ├── .gitmodules ├── .prettierrc.js ├── README.md ├── _build.sh ├── _index.txt ├── _index.yml ├── docs ├── .nojekyll ├── ethereum.html ├── ethereum │ ├── arrow_glacier.html │ ├── arrow_glacier │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── base_types.html │ ├── berlin.html │ ├── berlin │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── byzantium.html │ ├── byzantium │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── constantinople.html │ ├── constantinople │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── crypto.html │ ├── crypto │ │ ├── alt_bn128.html │ │ ├── blake2.html │ │ ├── elliptic_curve.html │ │ ├── finite_field.html │ │ └── hash.html │ ├── dao_fork.html │ ├── dao_fork │ │ ├── bloom.html │ │ ├── dao.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── ethash.html │ ├── exceptions.html │ ├── fork_criteria.html │ ├── frontier.html │ ├── frontier │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── genesis.html │ ├── gray_glacier.html │ ├── gray_glacier │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── homestead.html │ ├── homestead │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── istanbul.html │ ├── istanbul │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── london.html │ ├── london │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── muir_glacier.html │ ├── muir_glacier │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── paris.html │ ├── paris │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── rlp.html │ ├── shanghai.html │ ├── shanghai │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── spurious_dragon.html │ ├── spurious_dragon │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── tangerine_whistle.html │ ├── tangerine_whistle │ │ ├── bloom.html │ │ ├── fork.html │ │ ├── fork_types.html │ │ ├── state.html │ │ ├── trie.html │ │ ├── utils.html │ │ ├── utils │ │ │ ├── address.html │ │ │ ├── hexadecimal.html │ │ │ └── message.html │ │ └── vm.html │ ├── trace.html │ ├── utils.html │ └── utils │ │ ├── byte.html │ │ ├── ensure.html │ │ ├── hexadecimal.html │ │ ├── numeric.html │ │ └── safe_arithmetic.html ├── index.html └── search.js ├── legacy ├── Building-EVM-LLVM.md ├── Compiling-smart-contracts.md ├── Emitting-Program-Metadata.md ├── Example:-Compiling-using-existing-language-frontend.md ├── Function-Layouts.md ├── Future-Works.md ├── Handling-EVM-specific-operations.md ├── Home.md ├── Intrinsic-Functions.md ├── Language-Frontend-Integration.md ├── README.md ├── Running-integrated-tests-in-EVM-environment.md ├── Stack-and-Memory-management.md ├── The-EVM-Calling-Conventions.md ├── Types-and-type-conversions.md ├── block_construction │ └── src │ │ └── ProtoBlock │ │ └── README.md ├── erigion │ ├── Choice-of-storage-engine.md │ ├── Consensus-Engine-separation.md │ ├── Criteria-for-transitioning-from-Alpha-to-Beta.md │ ├── EVM-with-abstract-interpretation-and-backtracking.md │ ├── Erigon-Beta-1-announcement.md │ ├── Erigon2-prototype.md │ ├── Header-downloader.md │ ├── Home.md │ ├── LMDB-freelist-illustrated-guide.md │ ├── LMDB-freelist.md │ ├── State-sync-design.md │ ├── TEVM---Transpiled-EVM:-accelerate-EVM-improvement-R&D,-but-learning-from-eWASM.md │ ├── Transaction-Pool-Design.md │ └── Using-Postman-to-test-RPC.md ├── evm-illustrated │ ├── LICENSE │ ├── README.md │ ├── changelog.md │ ├── ethereum_evm_illustrated.pdf │ └── src │ │ └── ethereum_evm_illustrated.pptx ├── evm-modules │ ├── LICENSE │ ├── README.md │ └── gasometer │ │ ├── README.md │ │ └── sstore │ │ ├── net.md │ │ └── simple.md ├── files │ ├── Generating_stack_machine_code_using_LLVM.pdf │ └── LLVM_talk.pdf ├── gas-estimator │ ├── .dockerignore │ ├── .gitignore │ ├── .gitmodules │ ├── Dockerfile.evmone │ ├── Dockerfile.geth │ ├── Dockerfile.openethereum │ ├── Makefile │ ├── README.md │ ├── docs │ │ ├── notes │ │ │ ├── execution_comparison.md │ │ │ ├── instrumentation_measurement │ │ │ │ ├── docker_timer.md │ │ │ │ ├── evmone.md │ │ │ │ ├── example_bytecode_programs.md │ │ │ │ ├── geth.md │ │ │ │ ├── openethereum.md │ │ │ │ ├── openethereum_ewasm.md │ │ │ │ └── other_tools.md │ │ │ ├── measurement_standard_ruleset.md │ │ │ ├── meetings │ │ │ │ ├── 2020-10-30.md │ │ │ │ ├── 2020-11-06.md │ │ │ │ ├── 2020-11-13.md │ │ │ │ ├── 2020-11-20.md │ │ │ │ ├── 2020-11-27.md │ │ │ │ ├── 2020-12-04.md │ │ │ │ ├── 2020-12-11.md │ │ │ │ ├── 2020-12-18.md │ │ │ │ ├── 2020-12-22.md │ │ │ │ ├── 2021-01-08.md │ │ │ │ ├── 2021-01-22.md │ │ │ │ └── 2021-02-05.md │ │ │ ├── papers │ │ │ │ ├── adaptive_gas_cost_mechanism.md │ │ │ │ ├── bic_to_cpu.md │ │ │ │ ├── broken_metre.md │ │ │ │ ├── bytecode_monitoring_of_java.md │ │ │ │ ├── empirically_analyzing.md │ │ │ │ ├── holimans_gist_benchmarks.md │ │ │ │ ├── instruction_timing_model_1976.md │ │ │ │ ├── opbench.md │ │ │ │ ├── other.md │ │ │ │ ├── performance_benchmarking.md │ │ │ │ ├── timing_of_jvm_instructions.md │ │ │ │ └── vm_matters.md │ │ │ └── program_generator │ │ │ │ └── notes.md │ │ ├── report_stage_i.md │ │ └── report_stage_i_assets │ │ │ └── implementation_relative_all_opcodes.svg │ └── src │ │ ├── analysis │ │ ├── README.md │ │ ├── exploration.Rmd │ │ ├── exploration_timer_overhead.Rmd │ │ └── exploration_timers.Rmd │ │ ├── check_clocksource.sh │ │ ├── instrumentation_measurement │ │ ├── README.md │ │ ├── clock_resolution_go │ │ │ └── main.go │ │ ├── geth │ │ │ ├── README.md │ │ │ ├── instrumenter │ │ │ │ ├── instrumenter.go │ │ │ │ └── time.go │ │ │ └── main.go │ │ └── measurements.py │ │ └── program_generator │ │ ├── README.md │ │ ├── constants.py │ │ ├── data │ │ ├── README.md │ │ ├── opcodes.csv │ │ ├── opcodes_ewasm.csv │ │ ├── selection.csv │ │ ├── selection_ewasm.csv │ │ ├── selection_ewasm_first_pass.csv │ │ └── selection_ewasm_from_spec.csv │ │ ├── program_generator.py │ │ └── requirements.txt ├── glossary │ └── abi.html ├── llc │ └── opcodes │ │ └── chainId+selfbalance.md ├── opcodes │ └── src │ │ ├── README.md │ │ └── table.mediawiki ├── test_evm │ └── contracts │ │ └── opodes │ │ └── test_all_opcodes.txt ├── tools │ └── evm │ │ └── words │ │ └── numWords.java └── wiki │ ├── Building-EVM-LLVM.md │ ├── Compiling-smart-contracts.md │ ├── Emitting-Program-Metadata.md │ ├── Example:-Compiling-using-existing-language-frontend.md │ ├── Function-Layouts.md │ ├── Future-Works.md │ ├── Handling-EVM-specific-operations.md │ ├── Home.md │ ├── Intrinsic-Functions.md │ ├── Language-Frontend-Integration.md │ ├── Running-integrated-tests-in-EVM-environment.md │ ├── Stack-and-Memory-management.md │ ├── The-EVM-Calling-Conventions.md │ ├── Types-and-type-conversions.md │ └── files │ ├── Generating_stack_machine_code_using_LLVM.pdf │ └── LLVM_talk.pdf ├── main.js ├── package-lock.json └── package.json /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["gh-pages"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Single deploy job since we're just deploying 26 | deploy: 27 | environment: 28 | name: github-pages 29 | url: ${{ steps.deployment.outputs.page_url }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v4 34 | - name: Setup Pages 35 | uses: actions/configure-pages@v5 36 | - name: Upload artifact 37 | uses: actions/upload-pages-artifact@v3 38 | with: 39 | # Upload entire repository 40 | path: '.' 41 | - name: Deploy to GitHub Pages 42 | id: deployment 43 | uses: actions/deploy-pages@v4 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | gh-pages/ 3 | *~ 4 | .*sw? 5 | \#* 6 | .DS_Store 7 | 8 | *.rej 9 | *.orig 10 | 11 | *.pro 12 | /packages/rpm/doxygen.spec 13 | *.idb 14 | *.pdb 15 | 16 | /doxygen_docs 17 | /doxygen.tag 18 | /build* 19 | /qtools_docs 20 | /warnings.log 21 | 22 | tags 23 | 24 | .idea 25 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "evmc"] 2 | path = evmc 3 | url = https://github.com/ethereum/evmc 4 | -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | // "schema": "https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/prettierrc.json", 3 | // "$schema": "http://json.schemastore.org/prettierrc", 4 | module.exports = { 5 | arrowParens: 'always', 6 | bracketSpacing: true, 7 | endOfLine: 'lf', 8 | printWidth: 100, 9 | proseWrap: 'never', 10 | singleQuote: true, 11 | tabWidth: 2, 12 | trailingComma: 'all', 13 | quoteProps: 'as-needed', 14 | semi: true, 15 | overrides: [ 16 | { 17 | files: '*.md', 18 | options: { 19 | parser: 'markdown', 20 | printWidth: 120, 21 | proseWrap: 'never', 22 | tabWidth: 4, 23 | useTabs: true, 24 | singleQuote: false, 25 | bracketSpacing: true, 26 | }, 27 | }, 28 | ], 29 | }; 30 | -------------------------------------------------------------------------------- /_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat Building-EVM-LLVM.md Compiling-smart-contracts.md Emitting-Program-Metadata.md Example:-Compiling-using-existing-language-frontend.md Function-Layouts.md Future-Works.md Handling-EVM-specific-operations.md Home.md Intrinsic-Functions.md Language-Frontend-Integration.md Running-integrated-tests-in-EVM-environment.md Stack-and-Memory-management.md The-EVM-Calling-Conventions.md Types-and-type-conversions.md > OMNIBUS.md -------------------------------------------------------------------------------- /_index.txt: -------------------------------------------------------------------------------- 1 | Home.md 2 | The-EVM-Calling-Conventions.md 3 | Building-EVM-LLVM.md 4 | Function-Layouts.md 5 | Stack-and-Memory-management.md 6 | Types-and-type-conversions.md 7 | Compiling-smart-contracts.md 8 | Emitting-Program-Metadata.md 9 | Example:-Compiling-using-existing-language-frontend.md 10 | Handling-EVM-specific-operations.md 11 | Intrinsic-Functions.md 12 | Language-Frontend-Integration.md 13 | Running-integrated-tests-in-EVM-environment.md 14 | Future-Works.md -------------------------------------------------------------------------------- /_index.yml: -------------------------------------------------------------------------------- 1 | evm: 2 | Home.md 3 | The-EVM-Calling-Conventions.md 4 | Building-EVM-LLVM.md 5 | Function-Layouts.md 6 | Stack-and-Memory-management.md 7 | Types-and-type-conversions.md 8 | Compiling-smart-contracts.md 9 | Emitting-Program-Metadata.md 10 | Example:-Compiling-using-existing-language-frontend.md 11 | Handling-EVM-specific-operations.md 12 | Intrinsic-Functions.md 13 | Language-Frontend-Integration.md 14 | Running-integrated-tests-in-EVM-environment.md 15 | Future-Works.md -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/docs/.nojekyll -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /legacy/Building-EVM-LLVM.md: -------------------------------------------------------------------------------- 1 | The project compiles like other LLVM projects. The target's name is `EVM`, but since it is not yet finalized, you have to specify `-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM` when you compile it. 2 | 3 | In short, you can use the following to build the backend: 4 | 5 | ``` 6 | git clone git@github.com:etclabscore/evm_llvm.git 7 | cd evm_llvm 8 | git checkout EVM 9 | mkdir build && cd build 10 | cmake -DLLVM_TARGETS_TO_BUILD=EVM -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM .. 11 | make -j8 12 | ``` 13 | -------------------------------------------------------------------------------- /legacy/Compiling-smart-contracts.md: -------------------------------------------------------------------------------- 1 | ### The Contract constructor function 2 | 3 | Because EVM's execution always start from the beginning of the code (`pc = 0`), there must be a way to handle more complicated contract behaviours. In EVM LLVM, we use a function to describe the function handling. It is called contract constructor function. To implement the function, developers are expected to respect the following contract constructor properties: 4 | 5 | - The constructor should be the first function in the generated LLVM IR. 6 | - The constructor should be named `solidity.main` or `main` (could change in the future). The backend recognizes these specific names and will generate different call codes. 7 | - The constructor should not take any arguments. 8 | - The constructor should initialize the function's `free memory pointer`, which is located at address `0x40`. The `free memory pointer` is like the usual frame pointer, used to calculate function frames and stack allocations. Because it is located at `0x40`, so you cannot initialize it to a smaller number. 9 | 10 | ### Skeleton example of a very small constructor function 11 | 12 | Here is an illustration of the skeleton of a small smart contract: 13 | 14 | ``` 15 | declare i256 @llvm.evm.calldataload(i256) 16 | declare void @llvm.evm.return(i256, i256) 17 | declare void @llvm.evm.mstore(i256, i256) 18 | 19 | define void @main() { 20 | entry: 21 | call void @llvm.evm.mstore(i256 64, i256 128) 22 | %0 = call i256 @llvm.evm.calldataload(i256 0) 23 | %1 = call i256 @llvm.evm.calldataload(i256 32) 24 | %2 = call i256 @add(i256 %0, i256 %1) 25 | call void @llvm.evm.mstore(i256 0, i256 %2) 26 | call void @llvm.evm.return(i256 0, i256 32) 27 | unreachable 28 | } 29 | 30 | define i256 @add(i256, i256) #0 { 31 | %3 = alloca i256, align 4 32 | %4 = alloca i256, align 4 33 | store i256 %0, i256* %3, align 4 34 | store i256 %1, i256* %4, align 4 35 | %5 = load i256, i256* %3, align 4 36 | %6 = load i256, i256* %4, align 4 37 | %7 = add nsw i256 %5, %6 38 | ret i256 %7 39 | } 40 | ``` 41 | 42 | **Usually, it is the frontend's responsibility to do the smart contract's plumbing, including the contract's constructor function. ** We need the language frontends to generate corresponding LLVM IR code. 43 | 44 | This smart contract does the following things; 45 | 46 | - Initialize the `free memory pointer` to 128 47 | - parse the first two 32-byte inputs 48 | - call the `@add` function and supply it with the two parsed arguments 49 | - In the function `@add`, we simply add the two arguments, and return it 50 | - In the `@main` function, return the retrieved value using `llvm.evm.return` intrinsic. 51 | 52 | ### Compiling the smart contract 53 | 54 | Let's put the above smart contract code into a file named `test.ll`, and we use `llc` to generate EVM binary: 55 | 56 | ``` 57 | llc -mtriple=evm -filetype=obj test.ll -o test.o 58 | ``` 59 | 60 | ### Running the contract 61 | 62 | A generated `.o` file is in binary format. To see its content in hex, try to use `xxd`, for example: 63 | 64 | ``` 65 | xxd -p -cols 65536 test.o 66 | ``` 67 | 68 | The `xxd` will emit a hex string representation of the binary format. `xxd` will try to break the line if it is too long. Here we specify `-cols 65536` to avoid linebreaking. After calling `xxd`, you should see some output such as: 69 | 70 | ``` 71 | 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056 72 | ``` 73 | 74 | That is what we need to execute using an EVM engine. Let's try to do it using Geth's EVM. Remember that we need to supply two input arguments, so the command line should be like: 75 | 76 | ``` 77 | evm --input 1234567890123456789012345678901234567890123456789012345678901234 --code 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056 run 78 | ``` 79 | 80 | `evm` will emit the result of the two added files: 81 | 82 | ``` 83 | 0x468acf08a2468acf08a2468acf08a2468acf08a2468acf08a2468acf08a24634 84 | ``` 85 | -------------------------------------------------------------------------------- /legacy/Emitting-Program-Metadata.md: -------------------------------------------------------------------------------- 1 | EVM LLVM provides a way to emit program's metadata for various of purposes. For examples, a symbol table that records the jump destinations can be emitted along with the generated binary. 2 | 3 | Developers can use this utility to emit more program information. 4 | 5 | ## Existing implementation 6 | 7 | When compiling a contract, a file named `EVMMeta.txt` will be generated along with the binary code. The file contains the function symbol table in the compiled program, along with the offset of each function. The metadata file can be used for various purposes, such as debugging, manual linking, analysis, and so on. 8 | 9 | To specify a custom metadata file name if you do not want to use the `EVMMeta.txt` filename, option `-evm_md_file` can be used. 10 | 11 | # Limitation 12 | 13 | Existing implementation of EVM metadata emitting is limited to `MachineCode` module/level, which means that if there are any transformations at a higher level such as in the IR level, it will not be shown in the result. 14 | -------------------------------------------------------------------------------- /legacy/Example:-Compiling-using-existing-language-frontend.md: -------------------------------------------------------------------------------- 1 | Let's try to use a simple C file to test our compiler: 2 | 3 | ```sh 4 | cat < test.c 5 | unsigned x; 6 | int abc(unsigned a, unsigned b, unsigned c) { 7 | if (c > 0) { 8 | return a + x; 9 | } else { 10 | return a + b; 11 | } 12 | } 13 | EOF 14 | ``` 15 | 16 | Prerequisite: You have to install `clang` and use it to generate LLVM IR first: 17 | 18 | ``` 19 | clang -S -emit-llvm test.c 20 | ``` 21 | 22 | This will generate a `test.ll` file which should be the LLVM IR equivalent of our `test.c` file. Then we can generate EVM binary or assembly from it. In order to use the backend to generate EVM assembly, you have to specify `-mtriple=evm` when calling `llc`. An example is as follows: 23 | 24 | ``` 25 | ./build/bin/llc -mtriple=evm test.ll -o test.s 26 | ``` 27 | 28 | The generated `test.s` file contains the compiled EVM assembly code. Note that the generated code is the function body itself. In order to generate a complete smart contract source code we need to use a smart contract creator function, which we will talk about it in another page. 29 | 30 | Notice that you can also get the binary code of the function body by emitting an object file: 31 | 32 | ``` 33 | ./build/bin/llc -mtriple=evm -filetype=obj test.ll -o test.o 34 | ``` 35 | -------------------------------------------------------------------------------- /legacy/Function-Layouts.md: -------------------------------------------------------------------------------- 1 | #### Address layout 2 | 3 | EVM bytecode has a flat structure. It does not have explicit function entries, nor symbol tables. All executions starts from address `0x00`. 4 | 5 | #### Limitations 6 | 7 | Notice that at this moment this backend is limited to generate correct code for a single compilation unit. 8 | 9 | In order to link more than one compilation units, one shall inline existing compilation units in the frontend so that the frontend can generate correct `main` (the `function dispatcher` function) for the whole smart contract. 10 | 11 | #### The function dispatcher (meta function) 12 | 13 | The `function dispatcher` function (usually called `main` function in some contexts) is always placed at the beginning of the generated binary bytecode. The dispatcher is responsible for: 14 | 15 | 1. parse the call data and find the called function address in the jump table using the hash value provided in the call data. 16 | 2. extract the call arguments, and push them on to stack. 17 | 3. call the function address specified in the jump table. 18 | 19 | ``` 20 | Start of address 21 | +----------------> +-------------------------+ 22 | | Function dispatcher | 23 | | Jump Table | 24 | | (Func1, | 25 | | Func2, | 26 | | Func3) | 27 | +-------------------------+ 28 | | | 29 | | Func1 | 30 | | | 31 | +-------------------------+ 32 | | | 33 | | Func2 | 34 | | | 35 | +-------------------------+ 36 | | | 37 | | Func3 | 38 | | | 39 | +-------------------------+ 40 | ``` 41 | 42 | #### Moving the function dispatcher to front of the LLVM IR function list 43 | 44 | At this moment it is up to the frontend developer to move the LLVM IR function to the beginning of the function list. You can do something like this when creating function dispatcher: 45 | 46 | ``` 47 | // Let's say you have a dispatcher function named "dispatcher" 48 | 49 | // You should include "llvm/IR/SymbolTableListTraits.h" here 50 | using FunctionListType = SymbolTableList; 51 | FunctionListType &FuncList = TheModule->getFunctionList(); 52 | FuncList.remove(dispatcher); 53 | FuncList.insert(FuncList.begin(), dispatcher); 54 | ``` 55 | -------------------------------------------------------------------------------- /legacy/Future-Works.md: -------------------------------------------------------------------------------- 1 | # Functionalities 2 | 3 | ## Experimental support of landing pad 4 | 5 | Landingpad is used to support exception handling. 6 | 7 | ## Experimental support of simulating heap allocations 8 | 9 | EVM does not have a heap space, so we cannot use heap allocations. We might be able to do around it. 10 | 11 | ## Constant table support 12 | 13 | Having a constant table in the smart contract could potentially save some code size if the elements in the table are reused. 14 | 15 | ## Metadata export 16 | 17 | We could export more metadata for debugging, analyzing, and so on. 18 | 19 | # Optimizations 20 | 21 | ## Support more than 16 local variables 22 | 23 | EVM can only support retrieval of an element up to depth of 16 from the stack top using instructions `SWAP1` to `SWAP16` -- resulting a limitation in Solidity compiler that can only support 16 local variables. At this moment, EVM LLVM will also face a `stack too deep` issue if the variables in a single basic block is more than 16. 24 | 25 | But in LLVM we can totally work around this issue, and do a much better job. With dataflow analysis and register allocation algorithm, we can have near-optimal variable assignment (on the stack or on memory stack) in linear time. 26 | 27 | ## Instruction scheduling 28 | 29 | Arranging the order of the opcodes in EVM binary is critical to its performance. Instructions has to be arranged so that we have minimal stack manipulation over head (the opcodes that does not do actual computation, but rather, reorder stack operands' relative position to the top of stack). 30 | 31 | EVM LLVM backend is designed in such a way that a scheduler before register allocation can be implemented to reduce the stack operation overhead. 32 | 33 | ## Improve EVM calling conventions 34 | 35 | When calling a subroutine, The return address is the first argument and resides at top of stack. This is non-optimal because the return address will definitely not be used until the very end of the subroutine, and taking up a visible slot is expensive. We can re-arrange the return address to be at the end of argument so it will not have to be reached until we want to return from subroutine. 36 | 37 | ## Re-materialization of constants 38 | 39 | usual small constants should not stay in stack --- they should be rematerialized whenever it is needed. 40 | -------------------------------------------------------------------------------- /legacy/Handling-EVM-specific-operations.md: -------------------------------------------------------------------------------- 1 | Ethereum Virtual Machine specific operations, such as accessing storage, retrieve block information, etc, are through EVM specific instructions. Solidity language automatically generates necessary EVM-specific instructions under the hood so as to hide the details from Solidity developers. However, as a compiler backend, the input to EVM LLVM is LLVM IR format, which is unable to hold any language specific semantics that is higher than the C language level. So it is up to compiler frontends to lower language specific semantics onto LLVM IR level. 2 | 3 | Intrinsic functions are used to represent EVM-specific semantics in the input LLVM IR. Intrinsic functions are usually higher level representations of architecture-specific instructions. In EVM LLVM, we allow users to leverage EVM-specific instructions that are used to interact with the chain or storage by exposing those EVM instructions in the form of intrinsic functions. 4 | 5 | - This [page](https://github.com/etclabscore/evm_llvm/wiki/Intrinsic-Functions) lists the intrinsic functions that frontend developers can use. 6 | - Intrinsics are defined [here](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/include/llvm/IR/IntrinsicsEVM.td#L14). 7 | - And here are examples on [how to leverage intrinsics](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/test/CodeGen/EVM/intrinsics.ll#L1) 8 | -------------------------------------------------------------------------------- /legacy/Home.md: -------------------------------------------------------------------------------- 1 | ![evm-llvm-green-dragon](https://user-images.githubusercontent.com/450283/63640209-85cb3c00-c66b-11e9-9610-0c339ae66ac7.png) 2 | 3 | Welcome to the `evm_llvm` wiki! This project aims at bringing LLVM infrastructure to the EVM world where smart contracts are widely deployed. 4 | 5 | EVM LLVM is an EVM architecture backend for LLVM. With EVM LLVM you can generate EVM binary code with LLVM-based compilers. The backend does not assume a language frontend, so you should be able to plug in a new smart contract language frontend to generate EVM binary. 6 | 7 | The goal of this project is to make it able to for various of platforms, tools and smart contract programming language projects be able to quickly adapt a high-performance EVM backend. 8 | -------------------------------------------------------------------------------- /legacy/Language-Frontend-Integration.md: -------------------------------------------------------------------------------- 1 | ## EVM target specific changes 2 | 3 | ### Frontend is expected to emit 256bit values LLVM IR 4 | 5 | The EVM architecture is the only 256-bit machine out there in the market, and so far it have not yet received support from LLVM community. We added 256-bit and 160-bit support in the LLVM IR level. 6 | 7 | In order to utilize 256-bit and 160-bit operands, developers are expected to emit `i256` and `i160` data types in their IR code generation. Include the `evm_llvm`'s header files in `include/llvm` folders so that these two pre-defined data types can be properly generated. 8 | 9 | ### Frontend needs to generate compatible LLVM IR 10 | 11 | Notice that development of this backend is based on LLVM 10, which is released in March 2020. We also have a LLVM 8 branch just to support those who creates their frontends in LLVM 8. 12 | 13 | We could do back porting to other lower versions such as LLVM 9 at the request of developers for better stability or compatibility. Please let me know if you have such needs. 14 | -------------------------------------------------------------------------------- /legacy/Running-integrated-tests-in-EVM-environment.md: -------------------------------------------------------------------------------- 1 | EVM is different than other execution platform in that it is on blockchain. The result of the execution of a smart contract will be dependent on the state of the blockchain as well. So, we have to integrate EVM execution environment (in this early stage, `geth`) into our tests. 2 | 3 | ## Constructor 4 | 5 | Unit tests will only focus on small test functions. But you cannot execute a function independently on blockchain, we need to have a contract constructor and dispatcher as the first function in the file. The reason is that EVM will always start its execution from address `0x00` -- where the contract header / constructor /dispatcher resides. The header then tries to set up the contract -- allocating memory/storage or parsing incoming parameters, et cetera. 6 | 7 | Here is the commentated constructor code we use for handling unit tests: 8 | 9 | ``` 10 | define void @main() { 11 | entry: 12 | %0 = call i256 @llvm.evm.calldataload(i256 0) ; extract first 32-byte argument 13 | %1 = call i256 @llvm.evm.calldataload(i256 32); extract second 32-byte argument 14 | %2 = call i256 @test(i256 %0, i256 %1) ; execute the unit test function 15 | call void @llvm.evm.mstore(i256 0, i256 %2) ; store the returned value to memory address `0x00` 16 | call void @llvm.evm.return(i256 0, i256 32) ; call "return" to return the value returned by @test 17 | unreachable 18 | } 19 | ``` 20 | 21 | Notice that the `@test` function takes 2 parameters, so we will have two calls to `@llvm.evm.calldataload`. 22 | 23 | The unit test is compiled using `llc` with options: `-mtriple=evm -filetype=obj`. Then the code is executed using `geth`'s `evm` command. 24 | 25 | ## Testing utilities 26 | 27 | A Python script is used to handle the testing, file `evm_llvm/tools/evm-test/evm_test.py` is the script we created to test functionalities of the llvm backend. Here are what it does: 28 | 29 | - call evm_llvm backend to compile an LLVM IR file (`.ll` file) into object file (`.o`) file. The file should contain the function we are going to verify along with a smart contract constructor header which is used to handle input arguments. The function should be at the beginning of the IR file (the first function). 30 | - extract the contract opcodes from the `.o` file and prepare the input arguments (by padded each arguments to be 32 bytes long and concatenate everything into a long string). 31 | - Run the executable binary using geth's `evm`, get the result from the print, And compare the result with expected value. 32 | 33 | ## How to run testings 34 | 35 | 1. Install Python3 36 | 2. Run `evm_llvm/tools/evm-test/evm_test.py` then you should see the results. 37 | 38 | ## How to add new tests 39 | 40 | Please take a look at the `evm_llvm/tools/evm-test/evm_testsuit.py` file, it organizes tests by categorizing them into different `OrderedList`. Each element of the list contains the following information: 41 | 42 | - the name of the test 43 | - the array of input arguments 44 | - the path of the unit test source code file (in LLVM IR form) 45 | - the expected result value 46 | 47 | When adding new tests, you should: 48 | 49 | - put your test files into `evm_llvm/test/CodeGen/EVM` folder. 50 | - add the test file path and expected results to the `evm_testsuit.py` file. (We might change it when the file gets too large). 51 | 52 | ## TODO lists 53 | 54 | - add blockchain state related tests 55 | - add re-entrance tests (which are also related to changes of blockchain states) 56 | 57 | Please help improve the test utility! 58 | -------------------------------------------------------------------------------- /legacy/Stack-and-Memory-management.md: -------------------------------------------------------------------------------- 1 | ## Variables 2 | 3 | In the context of stack machine, a variable refers to an operand that will be consumed by an opcode. In EVM LLVM, variables are treated as virtual registers, until they are _stackfied_ (convert register-based code to stack-based code) right before lowering to machine code. 4 | 5 | In LLVM's internal SSA representation mode, it is fairly easy to compute a register's live range (the range from its assignment to its last use). Variables are treated differently with regard to its live range. Local variables (variables that its liveness only extends within a single basic block) will live entirely on the stack, while non-local variables (variables that live across basic blocks) will be spilled to a memory slot allocated by the compiler. 6 | 7 | #### Frame Objects 8 | 9 | Frame objects will be allocated either on stack or on memory space. Since each of the elements are 256bits, we have to ensure that frame objects are 256bits in length as well. Frame objects with smaller length is not supported. 10 | 11 | It is possible for a frame object to be allocated on to memory space, if we are consuming too much of stack space. The stack allocation pass will try to find an efficient way to decide which goes to the memory and which stays in stack. 12 | 13 | ### Frame Pointer (or Free Memory Pointer) 14 | 15 | [Stack pointers and frame pointers](https://en.wikipedia.org/wiki/Call_stack#Stack_and_frame_pointers) are essential to support subroutine calls. Frame pointer is used to record the structure of stack frames. Because we do not have registers in EVM, we will have to store stack frame pointer in memory locations. Usually, we put stack frame pointer at location `0x40`, and we follow Solidity compiler's convention to initialize it to value `128`. So the stack frame of the first function starts at that location. The value of frame pointer changes as the contract calls a subroutine or exits from a subroutine. Whenever we need to have access to frame pointer, we will retrieve its value from that specific location. 16 | 17 | ### Memory stack 18 | 19 | Part of the memory is used as a stack for function calls and variable spills. The structure is described as follows: 20 | 21 | - The stack goes from lower address to higher address, as different from usual hardware implementations. 22 | - The frame is arranged into 3 parts: 23 | - **frame object locations**. Each frame object has its own frame slot. Frame object `x` will have a 32 byte space starting from `$fp + (x * 32)`, where `$fp` is the frame pointer, and is stored at location `0x40`. 24 | - **spilled variables**. Variable that are unable to be fully stackified will reside on the memory stack. In codegen, each spilled variable will have an index, and each index refers to a memory slot. A spilled variable that bears index `y`, will reside at location `$fp + (number_of_frame_objects * 32) + (y * 32)`. 25 | - **subroutine context**. Like a regular register machine, the memory stack is used to store subroutine context so as to support function calls. Two slots are allocated at the end of current frame for a) the existing frame pointer, and b) return `PC` address. 26 | 27 | Here is an example showing a stack frame right before we jump into a subroutine: 28 | 29 | ``` 30 | Stack top Higher address 31 | +-----------> +----------------------------+ <--------------+ 32 | | | 33 | | Return Address | 34 | | | 35 | +----------------------------+ 36 | | | 37 | | Function argument | 38 | new FP | | 39 | +-----------> +----------------------------+ 40 | | | 41 | | Saved frame pointer | 42 | | (Start of frame) | 43 | +----------------------------+ 44 | | | 45 | | Stack Object 1 | 46 | | | 47 | +----------------------------+ 48 | | | 49 | | Frame Object 2 | 50 | | | 51 | +----------------------------+ 52 | | | 53 | | Frame Object 1 | 54 | Start of frame | | Lower address 55 | +------------> +----------------------------+ <----------------+ 56 | ``` 57 | -------------------------------------------------------------------------------- /legacy/The-EVM-Calling-Conventions.md: -------------------------------------------------------------------------------- 1 | The EVM architecture is a simplistic structure, but it has everything we need to do usual program computations. 2 | 3 | ## Types of calls 4 | 5 | There are two types of calls in an EVM smart contract: 6 | 7 | 1. **Internal calls**. Internal calls are referred to function calls within a smart contract. An example is that we have two defined function `A` and `B`, and somewhere in `A` we save our context and change our execution flow to the beginning of `B`. 8 | 2. **External calls**. Or cross-contract calls. `A` and `B` are defined in different deployed EVM contract and `A` calls `B` in its context. 9 | 10 | ## Internal call conventions 11 | 12 | Up to ETH 1.5, there is no link and jump EVM opcode for easy handling of subroutines(even though some [discussions](https://github.com/ethereum/EIPs/issues/2315) are on-going). So we have to manually handle subroutine calls. Here are the calling conventions for an internal calls: 13 | 14 | - current subroutine's frame pointer is saved at stack, at memory location `$fp - 32` where `$fp` is the subroutine call's frame pointer. 15 | - arguments are all pushed on stack, along with the return address. Argument with smaller index number occupies a stack slot on top of another argument with a larger index number. For example, when we want to do a function call: `func abc(x, y, z)`, here is the arrangement of the arguments: 16 | 17 | ``` 18 | +-----------+ 19 | |Return Addr| 20 | +-----------+ 21 | | X | 22 | +-----------+ 23 | | Y | 24 | +-----------+ 25 | Current FP | Z | 26 | +------------> +-----------+ 27 | | Old FP | 28 | +-----------+ 29 | | ..... | 30 | +-----------+ 31 | ``` 32 | 33 | _Note: Putting the return address on top of the stack is because it is easier to compute the location, but this will result in more stack manipulation overhead for the subroutine calls. We will improve this design in a later version._ 34 | 35 | - A subroutine's return value is stored on stack top. _Note: currently we only support one return value. In the future we will improve it by supporting multiple return values._ 36 | 37 | ## Procedure of a subroutine call 38 | 39 | To illustrate the procedure for a subroutine call, we need to do the following to save the context of current function execution: 40 | 41 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`. 42 | 2. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`. 43 | 3. push all subroutine arguments in order on to stack. 44 | 4. push return address onto stack. (At this moment, the return address is `PC + 6`). 45 | 5. push the beginning address of subroutine and jump. 46 | 47 | Right before we return from a subroutine, the stack should be empty and the return address should be at the top of the stack. When returning from a subroutine call, we should do the following: 48 | 49 | 1. push return value on to top of stack. 50 | 2. Do a `swap1` to move the return address to top of stack 51 | 3. jump to return address and resume the execution in caller function. If the function returns nothing, simply jump to return address. 52 | 53 | After jumping back to caller, we have to resume the execution: 54 | 55 | 1. restore caller's frame pointer by storing the value at location `$fp - 32` to `0x40`. 56 | 57 | ## [EIP2315](https://eips.ethereum.org/EIPS/eip-2315) Support: Subroutine calls 58 | 59 | The support of subroutines inside EVM enables compiler to generate better performance code. To be more specific: With EIP235, it is up to EVM to maintain the stack: 60 | 61 | 1. the return address stack is only accessible to VM 62 | 2. the stack is invisible to users and compilers 63 | 64 | A better calling convention is made with the support of EIP2315: 65 | 66 | ### To generate a call procedure 67 | 68 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`. 69 | 2. save existing frame pointer at memory location `$fp + %frame_size - 32`. The frame pointer is maintained at `0x40`. 70 | 3. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`. 71 | 4. push all subroutine arguments in order on to stack. 72 | 5. push the beginning address of subroutine and call `JUMPSUB` 73 | 74 | ### To generate the return 75 | 76 | 1. push return value on to top of stack. 77 | 2. call `RETURNSUB` to resume execution of caller function. 78 | 79 | ## External calls 80 | 81 | External calls are implemented using intrinsic calls. 82 | -------------------------------------------------------------------------------- /legacy/Types-and-type-conversions.md: -------------------------------------------------------------------------------- 1 | ## Newly supported Types 2 | 3 | So far the open-source LLVM trunk has not yet implemented bit size support larger than 128bits. We have implemented 256bit supports in our own backend, and is considering contributing them back to main trunk. 4 | 5 | Users are allowed to use `i256` and `i160` data types in their generated LLVM IR, which represent 256bit integer types and 160bit integer types respectively. 6 | 7 | Even though all EVM data types are 256bit in length internally. We are still able to offer support to smaller data types. However, users are encouraged to use 256bit data types internally because it is free. 8 | 9 | ## Contract Input Argument Types -- The Solidity convention 10 | 11 | Contract arguments are passed to EVM via the call data field. The function dispatcher is responsible to extract input arguments from call data. 12 | 13 | In Solidity's convention, the arguments in call data are padded to 32 bytes long if its data type's length is shorter. So, in order to maintain the convention, the function dispatcher needs to truncate the input arguments to the defined size in the function that is going to be called. 14 | 15 | This is undoubtedly inefficient, so users are discouraged to use smaller data types. 16 | -------------------------------------------------------------------------------- /legacy/block_construction/src/ProtoBlock/README.md: -------------------------------------------------------------------------------- 1 | # Block Construction: Proto Block 2 | 3 | A proto-block is a block that has been executed but has not been sealed. The header is missing the nonce and mixhash, and can still accept extra data. 4 | 5 | Proto-blocks are produced when transactions are executed, and can be turned into full valid blocks. 6 | 7 | A **block header** that has not finished being sealed. 8 | 9 | **toHeader**: Seals the header into a block header 10 | 11 | **proto-block body**: is the representation of the intermediate form of a block body before being sealed. 12 | 13 | ```kotlin 14 | /* source: https://github.com/apache/incubator-tuweni/blob/main/eth-blockprocessor/src/main/kotlin/org/apache/tuweni/blockprocessor/ProtoBlock.kt */ 15 | 16 | /** 17 | * A block header that has not finished being sealed. 18 | */ 19 | data class SealableHeader( 20 | val parentHash: Hash, 21 | val stateRoot: Hash, 22 | val transactionsRoot: Hash, 23 | val receiptsRoot: Hash, 24 | val logsBloom: Bytes, 25 | val number: UInt256, 26 | val gasLimit: Gas, 27 | val gasUsed: Gas, 28 | ) { 29 | 30 | /** 31 | * Seals the header into a block header 32 | */ 33 | fun toHeader( 34 | ommersHash: Hash, 35 | coinbase: Address, 36 | difficulty: UInt256, 37 | timestamp: Instant, 38 | extraData: Bytes, 39 | mixHash: Hash, 40 | nonce: UInt64, 41 | ): BlockHeader { 42 | return BlockHeader( 43 | parentHash, 44 | ommersHash, 45 | coinbase, 46 | stateRoot, 47 | transactionsRoot, 48 | receiptsRoot, 49 | logsBloom, 50 | difficulty, 51 | number, 52 | gasLimit, 53 | gasUsed, 54 | timestamp, 55 | extraData, 56 | mixHash, 57 | nonce 58 | ) 59 | } 60 | } 61 | 62 | /** 63 | * A proto-block body is the representation of the intermediate form of a block body before being sealed. 64 | */ 65 | data class ProtoBlockBody(val transactions: List) { 66 | /** 67 | * Transforms the proto-block body into a valid block body by adding ommers. 68 | */ 69 | fun toBlockBody(ommers: List): BlockBody { 70 | return BlockBody(transactions, ommers) 71 | } 72 | } 73 | 74 | /** 75 | * A proto-block is a block that has been executed but has not been sealed. 76 | * The header is missing the nonce and mixhash, and can still accept extra data. 77 | * 78 | * Proto-blocks are produced when transactions are executed, and can be turned into full valid blocks. 79 | */ 80 | class ProtoBlock( 81 | val header: SealableHeader, 82 | val body: ProtoBlockBody, 83 | val transactionReceipts: List, 84 | val stateChanges: TransientStateRepository 85 | ) { 86 | 87 | fun toBlock( 88 | ommers: List, 89 | coinbase: Address, 90 | difficulty: UInt256, 91 | timestamp: Instant, 92 | extraData: Bytes, 93 | mixHash: Hash, 94 | nonce: UInt64, 95 | ): Block { 96 | val ommersHash = Hash.hash(RLP.encodeList { writer -> ommers.forEach { writer.writeValue(it.hash) } }) 97 | return Block( 98 | header.toHeader(ommersHash, coinbase, difficulty, timestamp, extraData, mixHash, nonce), 99 | body.toBlockBody(ommers) 100 | ) 101 | } 102 | } 103 | ``` 104 | -------------------------------------------------------------------------------- /legacy/erigion/Choice-of-storage-engine.md: -------------------------------------------------------------------------------- 1 | We often get asked why we opted for our current storage engine, [MDBX](https://github.com/erthink/libmdbx). 2 | 3 | # Why not LevelDB / RocksDB? 4 | 5 | Answer is pretty simple: no MVCC. 6 | 7 | MVCC allows us to "stitch together" more complex data objects from more normalised form that is stored in the DB, without loss of consistency (if you do it all in a single read-only transaction). This is used quite a lot in the RPC daemon and simplifies the code a lot. You do not need to explicitly link all the data in the application-level code, you just trust that the database will give you consistent snapshot. 8 | 9 | Other than that, Level and Rocks are not ACID. This makes them extremely brittle and prone to corruption on application crash or power failure. Given that node sync from genesis is not cheap or instantaneous, this is a non-starter for us. 10 | 11 | # Why not BadgerDB? 12 | 13 | BadgerDB, unlike Level or Rocks, does provide transactions. However, there is a next issue we run into: Badger is based on [Log-structured merge-tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree). 14 | 15 | Badger (and all LSM-based DBs) has background compaction. It's good for some projects and bad for others. 16 | 17 | In Erigon we eliminated most of concurrency (goroutines) for many reasons (too many things happening at the same time). We found that modern SSD (and NVMe) are still pretty bad with concurrent writes - they are way better than HDD, but sequential read is still order of magnitude faster than random reads. Meaning 1 thread touching disk vs 2 threads touching disk - can show 10x degradation. 18 | 19 | _How does this apply to us?_ 20 | 21 | **We removed parallel writes and moved to control all disk touches**. Now we don't really care about "how much WPS database can handle" because now we can fit all writes into 1 write transaction. Doesn't matter if it happens once per 10 minutes or once per 1 second - as long as it's not thousands of parallel WPS. In LMDB 1 write transaction is equal to 1 fsync syscall - all writes during transaction are happening in RAM. 22 | 23 | LSM databases (Badger, LevelDB) are slower on average for random reads, and that read times are more volatile. B+tree is faster and more predictable for random reads. 24 | 25 | # Why not BoltDB? 26 | 27 | Unlike Badger, Bolt is a Go library, providing storage engine based on B+tree. It originally fit well, and we had BoltDB backend available until September 2020. 28 | 29 | Bolt lacks certain advanced features that we found useful, like LMDB's sorted duplicates (DupSort). It allows to save space without resorting to compression by storing repetitive keys only once. 30 | 31 | Bolt is not actively maintained anymore, [although there is an active fork by etcd team](https://github.com/etcd-io/bbolt). And finally, it is a Go library, precluding usage and binary compatibility with [Silkworm](https://github.com/torquem-ch/silkworm) and [Akula](https://github.com/rust-ethereum/akula). 32 | 33 | For all these reasons we switched to LMDB. 34 | 35 | # Why not LMDB? 36 | 37 | [See this post.](https://github.com/ledgerwatch/erigon/wiki/Criteria-for-transitioning-from-Alpha-to-Beta#switch-from-lmdb-to-mdbx) 38 | -------------------------------------------------------------------------------- /legacy/erigion/Consensus-Engine-separation.md: -------------------------------------------------------------------------------- 1 | ## Validation of headers 2 | 3 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-1.png) 4 | 5 | ## Validation of uncles (EtHash) 6 | 7 | To the best of our knowledge, EtHash is the only algorithm where this functionality is required. But something similar may come up with DAG-based algorithms, where headers have more than just a parent, but also alternative ancestors. In that case, a lot of the interface may need to be generalised accordingly. 8 | 9 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-4.png) 10 | 11 | ## Use of smart contract state for Consensus Engine 12 | 13 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-2.png) 14 | 15 | In algorithms like AuRa (Authority Round), verification of headers requires access to the state of smart contracts (where, for example, set of validators is stored), as well as emitted events (requests for inducting new validators). In order to accommodate this, we would introduce another message type from Consensus to Core. 16 | 17 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-6.png) 18 | 19 | ## Solution for Staged sync 20 | 21 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-3.png) 22 | 23 | ## Fork choice rule 24 | 25 | For choice rule can be thought of a partial order relationship among the set of possible headers. Being partial order, fork choice rule is: 26 | 27 | 1. reflexive or irreflexive, depending on whether non-strict or strict definition is required. header `A` is either better than itself (non-strict, `<=`), or not better than itself (strict, `<`) 28 | 2. anti-symmetric. if `A` better than `B`, then `B` is worse than `A`) 29 | 3. transitive. if `A` better than `B` and `B` is better than ` 30 | 31 | Core is asking the Consensus Engine to infer the relation between given headers, and perform topological sort. 32 | 33 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-5.png) 34 | 35 | ## Finalisation Code 36 | 37 | When the verification results in `Valid` message from Consensus Engine to the Core, an extra field `Finalisation Code` is attached. This code is expresses in an extension of EVM, which is currently called TEVM, and it needs to be run at the end of processing of the corresponding block (where there is access to the state etc.). For example, for assigning mining reward, the following very generic code can be finalisation code can be sent: 38 | 39 | ``` 40 | PUSH32 41 | COINBASE 42 | ADDBALANCE 43 | ``` 44 | 45 | Note that EVM does not have `ADDBALANCE` opcode, this would be part of TEVM extension, and this particular opcode would be run in a privileged mode only (meaning that only system parts like Consensus Engine and Transpiler may emit this code, but no user code with such code will be able to run). Similarly, EVM lacks introspection of uncles attached to the block. With extension allowing for that, the code above can be made to also add uncle rewards. In other consensus algorithms, the Finalisation code can be made to issue POS rewards, do slashing, etc., and it may either be very specific for every header or block, or parametrised via using extension opcodes in TEVM. 46 | 47 | ## Notes from Dragan 48 | 49 | Hello, for consensus engine in OE there is main abstraction that all Engines implement: https://github.com/openethereum/openethereum/blob/32d8b5487a6fc12c8295ebf9833c74857f5e7354/crates/ethcore/src/engines/mod.rs#L304 And few months ago I wanted to see where and who is using engine and while doing that came up with this document: https://docs.google.com/spreadsheets/d/1gzkq_m7rHZKP7tPDyJiBykDjPgV8BCgiEwSZ35Zx3ho/edit?usp=sharing maybe it can be useful for AuRa 50 | 51 | ## Implementation 52 | 53 | 1. There are a few AuRa-based chain: Kovan, Sokol, xDAI (POS DAO) 54 | 2. We take Sokol as the first example, because Kovan has WASM contracts. 55 | 3. Re-add `eth/65` to `--download.v2` (messages with RequestID) 56 | 4. Connect to Sokol network from TG with `--download.v2` 57 | 5. Add `seal` into the header structure to parse header. Once it is done, we will see the signatures. 58 | 6. Implement finality rule using signatures. 59 | 7. .... 60 | -------------------------------------------------------------------------------- /legacy/erigion/Criteria-for-transitioning-from-Alpha-to-Beta.md: -------------------------------------------------------------------------------- 1 | We do not define a specific deadline for transitioning from **Alpha** to **Beta** stage. Instead, we define the criteria that should help us decide when turbo-geth is ready for **Beta**. Here is the list of things that need implemented for these criteria to be met. 2 | 3 | ## Mining 4 | 5 | The challenge of implementing efficient mining support in turbo-geth is the fact that there is only one "canonical" state at any given time. Mining, however, requires production of "speculative" blocks, and then "speculative" state, in order to compute the state root hash for the header. Current idea for the "speculative" state is an in-memory cache that can be "cloned". By "cloning", we mean creating a lazy-shallow copy of the cache such that the changes to the cloned state do not affect the canonical state. However, it turns out that the current data model of `HashedState` and `IntermediateHashes` is not well suited for the maintenance of such cache. The work is on-going to correct the data model and implement the clone-able state cache, and subsequently, the mining functionality. 6 | 7 | ## Simplified downloading of block headers and block bodies 8 | 9 | Currently downloading block headers and block bodies are stage 1 and stage 3 of staged sync, respectively. These stages look and feel quite different from the other stages, because they were created on the foundations of the header/body/receipt/state downloading code inherited from go-ethereum. This code does much more than turbo-geth's staged sync requires, and can (and should) be replaced with a simplified version. A working proof-of-concept of this simplified version has been created, and now is the time for tests and documentation. 10 | 11 | ## Consensus Engine component 12 | 13 | One may have heard about the concept of "pluggable consensus", meaning that it should be easy to switch from Proof Of Work to Proof of Authority, and also from one variant of POW to another one, and from one variant of POA to another one. In practice, pluggable consensus was some implementation with interfaces, but still always running in the same process, and often deeply intertwined with the rest of the code. We have taken steps to design the interface that would allow running consensus engine in the separate process. With such interface, it should be possible to run it in the same process, but the existence of the interface makes it much more straightforward to keep consensus engine properly separated. We have proof-of-concept implementation that works for EtHash POW and Clique POA, now it is time for integration, tests, and documentation. 14 | 15 | ## Switch from LMDB to MDBX 16 | 17 | Erigon started off with the BoltDB database backend, then adding the support for BadgerDB, and then eventually migrating exclusively to LMDB. At some point we have encountered stability issues that were caused by our usage of LMDB that was not envisaged by the creators. We have since then been looking at a well-supported derivative of LMDB, called MDBX, and hoping to use their stability improvement, and potentially working more together in the future. The integration of MDBX is done, now it is time for more testing and documentation. 18 | 19 | Benefits of transitioning from LMDB to MDBX: 20 | 21 | 1. Database file growth "geometry" works properly. This is important especially on Windows. In LMDB, one has to specify the memory map size once in advance (currently we use 2Tb by default), and if the database file grows over that limit, one has to restart the process. On Windows, setting memory map size to 2Tb makes database file 2Tb large on the onset, which is not very convenient. With MDBX, memory map size is increased in 2Gb increments. This means occasional remapping, but results in a better user experience. 22 | 2. MDBX has more strict checks on concurrent use of the transaction handles, as well as overlap read and write transaction within the same thread of execution. This allowed us to find some non-obvious bugs and make behaviour more predictable. 23 | 3. Over the period of more than 5 years (since it split from LMDB), MDBX accumulated a lot of safety fixes and heisenbug fixes that are still present in LMDB to the best of our knowledge. Some of them we have discovered during our testing, and MDBX maintainer took them seriously and worked on the fixes promptly. 24 | 4. When it comes to databases that constantly modify data, they generate quite a lot of reclaimable space (also known as "freelist" in LMDB terminology). We had to patch LMDB to fix most serious drawbacks when working with reclaimable space (analysis here: https://github.com/ledgerwatch/erigon/wiki/LMDB-freelist-illustrated-guide). MDBX takes special care of efficient handling of reclaimable space and so far no patches were required. 25 | 5. According to our tests, MDBX performs slightly better on our workloads. 26 | 6. MDBX exposes more internal telemetry - more metrics of what happening inside DB. And we have them in Grafana - to make better decisions on app design. For example, after complete transition to MDBX (removing LMDB support) we will implement "commit half-full transactions" strategy to avoid spill/unspill disk touches. This will simplify our code further without affecting performance. 27 | 7. MDBX has support for "Exclusive open" mode - we using it for DB migrations, to prevent any other reader from accessing the database while DB migration is in progress. 28 | -------------------------------------------------------------------------------- /legacy/erigion/EVM-with-abstract-interpretation-and-backtracking.md: -------------------------------------------------------------------------------- 1 | # Goal 2 | 3 | Describe what abstract interpretation and backtracking means, and how to implement it efficiently 4 | 5 | # EVM resources 6 | 7 | By resources here we understand things that programmer of EVM may use to store and manipulate data, and to perform computations. Some of the resources EVM are accessible directly via opcodes, whereas others - indirectly via side-effects of certain operations. 8 | 9 | ## Execution frames (substates) 10 | 11 | When EVM is activated (which is usually) by sending a transaction to a deployed smart contract, with some input, the first execution frame is created. It gets its program counter, gas counter, "read only" flag (whether any mutating operations are allowed), input data in memory, and output data region in memory. Execution frame is mostly segregated from other execution frames, but there are few ways they can communicate: 12 | 13 | 1. Via input data 14 | 2. Via gas counter 15 | 3. Via storage writes (only for execution frames of the same contract) 16 | 17 | ## Stacks 18 | 19 | Each execution frame has its own stack, and it is only accessible from that one execution frame. 20 | 21 | ## Memories 22 | 23 | Each execution frame has its own memory. Memory expands in chunks of 32 bytes, when used, but is accessible with the granularity of a single byte. 24 | 25 | ## State caches 26 | 27 | Whenever an item is read from the state, it potentially modifies state cache, which has an impact on the gas cost of subsequent operations with the same state item. Whenever a state item is created or updated, it also modifies state cache in a different way, which affects the cost of subsequent update operations for the same state item. State caches can be explicitly modelled as EVM resources for better specification and less error-prone implementation, but also for the purpose of implementing the backtracking. 28 | 29 | ## Access lists 30 | 31 | Access lists are related to the state caches in a way that they pre-initialise read caches in a certain way. 32 | 33 | ## Self-destruct lists 34 | 35 | Which accounts will be self-destructed and removed from the state at the end of a transaction. Self-destruct lists needs to be explicitly modelled for better specification and less error-prone implementation, but also for the purpose of implementing the backtracking. 36 | 37 | ## Block context 38 | 39 | Timestamp, block hash, gas limit, base fee 40 | 41 | ## Transaction context 42 | 43 | ## Extra context 44 | 45 | Recent block hashes 46 | 47 | # Extended domain for stack elements 48 | 49 | Abstract interpretation (as opposed to "concrete" interpretation) allows us to replace some concrete values on the stack (and then perhaps in memory or state caches) with `unknown` values. This effectively extends the domain of possible values from numbers `0`...`2^256-1` to also include `unknown` or potentially multiple types of unknown. For more rigorous approach, it may make sense to introduce at least two types of unknowns, one meaning that the value "does not exist", and another that the value is "unknown". The reason why we need "does not exist" is to perform the unification of stacks when resolving loops, for example. In order to describe the abstract interpretation a bit more formally, we need to define what a "stack" is (and then perhaps also what other resources are). The stack is the sequence of objects from the domain {`0`...`2^256-1`, `NE`, `NK`}, where `NE` means does not exist, and `NK` means not known. In order words, stack can be thought of a tuple of certain "maximum" size, for example, 100. So in our model, all possible stacks will be of fixed size (let's say 100), and to model a smaller stack, we fill the rest of elements with `NE` objects. For example, the stack 50 | 51 | ``` 52 | 4 53 | 5 54 | 6 55 | NK 56 | 7 57 | ``` 58 | 59 | will in fact be represented as 60 | 61 | ``` 62 | 4 63 | 5 64 | 6 65 | NK 66 | 7 67 | NE 68 | NE 69 | ... 70 | NE 71 | ``` 72 | 73 | where the total length of the stack is 100. Why is it important that all stacks are of the same size? Because then we can define some operations on the domain of all possible stacks and express them in terms of operations on the individual elements. 74 | 75 | Lets create an example of a simple loop: 76 | 77 | ``` 78 | 0: PUSH1 10 # initial value of the loop counter 79 | 2: JUMPDEST # this is where iteration of the loop returns 80 | 3: PUSH1 1 # to perform counter-- 81 | 5: SUB 82 | 6: DUP1 # make sure we don't destroy the only value of the counter by ISZERO 83 | 7: ISZERO # top of the stack is 1 if counter == 0 84 | 8: ISZERO # top of the stack is 1 is counter > 0 85 | 9: PUSH1 2 86 | 11: SWAP1 87 | 12: JUMPI # jump if still counter > 0 88 | ``` 89 | 90 | If we perform abstract interpretation of this program, this is how we could go. We start with an empty stack (full of `NE`s). As we go along, we create a mapping `PC (program counter) => stack`, which will help us understand whether we returned to the place we've been before. 91 | 92 | ``` 93 | NE 94 | NE 95 | ... 96 | NE 97 | ``` 98 | 99 | ### 0: PUSH1 10 100 | 101 | we shift the stack downwards and replace first element with 10 (the last `NE` gets discarded) 102 | 103 | ``` 104 | 10 105 | NE 106 | ... 107 | NE 108 | ``` 109 | 110 | ### 2: JUMPDEST 111 | 112 | Nothing happens here, it is just a "goto label" 113 | 114 | ### 3: PUSH1 1 115 | 116 | ``` 117 | 1 118 | 10 119 | NE 120 | ... 121 | NE 122 | ``` 123 | 124 | ### 5: SUB 125 | 126 | ``` 127 | 9 128 | NE 129 | ... 130 | NE 131 | ``` 132 | 133 | ### 6: DUP1 134 | 135 | ``` 136 | 9 137 | 9 138 | NE 139 | ... 140 | NE 141 | ``` 142 | 143 | ### 7: ISZERO 144 | 145 | ``` 146 | 0 147 | 9 148 | NE 149 | ... 150 | NE 151 | ``` 152 | 153 | ### 8: ISZERO 154 | 155 | ``` 156 | 1 157 | 9 158 | NE 159 | ... 160 | NE 161 | ``` 162 | -------------------------------------------------------------------------------- /legacy/erigion/Home.md: -------------------------------------------------------------------------------- 1 | Welcome to the turbo-geth wiki! 2 | -------------------------------------------------------------------------------- /legacy/erigion/Using-Postman-to-test-RPC.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | In this article, we provide instructions for using [Postman](https://www.postman.com/) to test Erigon's RPC interfaces. We begin with instructions for how to get started, we then explain a few thoughts on using Postman, and finally, we list a few other things we can do with the collection file in the future. 4 | 5 | ## Getting Started 6 | 7 | ### Install Postman and Erigon 8 | 9 | In these instructions, we assume you have Postman installed and are logged in. You may have to create a user account on their website in order to download the software. You do not need to create or join a team. 10 | 11 | We also assume that you have a copy of the Erigon source code in a folder called `$ERIGON`. 12 | 13 | ### Import the RPC Testing Collection 14 | 15 | Once you have Postman running: 16 | 17 | - Click on the `Import` button at the top left of the screen. This will open the Import Dialog. 18 | - Click on the `File` tab and then the `Upload Files` button in the middle of the screen. 19 | - Navigate to the folder `$ERIGON/cmd/rpcdaemon/postman`. 20 | - Select the file `RPC_Testing.json` and confirm the import. 21 | - Click on the `Collections` subtab just below the main menu. 22 | 23 | You should now have an opened collection called `RPC_Testing`. If something doesn't work, please let us know by creating an issue. 24 | 25 | ### Create Global and Environment Variables 26 | 27 | Postman allows the user to specify custom `variables`, which can be used, for example, to run the same test collection against an API at multiple different endpoints. We will use `variables` for exactly this reason. 28 | 29 | In order for this to work, we need to create both `global` and `environmental` variables. We do that next. 30 | 31 | #### Globals 32 | 33 | Near the top right of the screen is a small icon that looks like an eyeball and is labeled `Environment quick look`. Click on that icon and then `Edit` under the **Globals** section. You should be in the `Manage Environment` dialog. 34 | 35 | Add two variables (you may add more later): 36 | 37 | | VARIABLE | INITIAL VALUE | 38 | | ---------- | ------------------------------------ | 39 | | ERIGON | http://localhost:8545 | 40 | | NETHERMIND | http://archive02.archivenode.io:8545 | 41 | 42 | Click on `Persist All` and then `Save`. Close the `Manage Environment` dialog. 43 | 44 | #### Environments 45 | 46 | Now we need to create a testing environment. Do this by clicking on the eyeball icon again. This time, click on the `Add` link next to the **Environment** section. Call your environment `Erigon Testing` and add this variable: 47 | 48 | | VARIABLE | INITIAL VALUE | 49 | | -------- | ------------- | 50 | | HOST | {{ERIGON}} | 51 | 52 | `Persist All` and click on `Add` to save the environment. Close the dialog to return to the main screen. 53 | 54 | You should be ready to test. If not, please post an issue. 55 | 56 | #### Testing Other Endpoints 57 | 58 | Optionally, you may create a second environment (`Nethermind Testing`) and set the `HOST` variable to `{{NETHERMIND}}`. This will allow you to test other endpoints. We leave that as an exercise. 59 | 60 | ### Running Tests 61 | 62 | You are now ready to run the tests. To do that: 63 | 64 | - Start your Erigon node. 65 | - Start your Erigon RPC daemon. (If you're testing all endpoints, start with all namespaces enabled `build/bin/rpcdaemon --private.api.addr=localhost:9090 --http.api=eth,debug,net,web3,trace,db,shh,tg`.) 66 | - Click on the `Runner` button at the top left of the Postman screen. This will open a new window called `Collection Runner`. 67 | - Select the `RPC_Testing` collection. 68 | - Select an environment (`Erigon Testing` for example). 69 | - Press `Run RPC_Testing` 70 | 71 | This should run all the currently enabled tests. Note that you may run individual tests directly from the Postman screen. 72 | 73 | See the notes below for more information. 74 | 75 | ## Discussion 76 | 77 | We think Postman is a good choice to create, edit, test, and document Erigon's RPC. The file created by Postman, `RPC_Testing.json`, is a full specification of the API including example usage, test cases, and text that may be used to generate documentation using various tools such as Swagger. Additionally, Postman allows one to create a automated monitors that watch your API. And finally, it works with your CI (continuous integration) with a tool call Newman (sp?). 78 | 79 | ## Notes: 80 | 81 | - The RPC_Testing file contains tests that are disabled by default. You may enable them by adding a **Global** variable called `TEST_NOT_IMPLEMENTED` and/or `TEST_DEPRECATED` and setting their value to `true`. 82 | - Many of the tests hard code both the body of the test and the expected results. Eventually, we'd like to use parameterized test data files instead. This will allow us to run multiple different tests against the same API endpoints. 83 | - The tests run against the Ethereum main net and expect an Ethereum archive node to work (for example, some of the tests query historical account balances). Future version could be customized for non-archive nodes. 84 | 85 | ## Other Possible Uses for the Collection 86 | 87 | - Test against other RPC endpoints (including other nodes types) 88 | - Generate help documentation 89 | - Verify RPC interfaces 90 | - Use in CI (continuous integration) pipeline 91 | - Generate RPC APIs for other languages (such as C++) 92 | -------------------------------------------------------------------------------- /legacy/evm-illustrated/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Takenobu Tani 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /legacy/evm-illustrated/README.md: -------------------------------------------------------------------------------- 1 | # Ethereum EVM illustrated 2 | 3 | This is an illustrated document about the EVM(Ethereum Virtual Machine). 4 | 5 | Here is: [Ethereum EVM illustrated](http://takenobu-hs.github.io/downloads/ethereum_evm_illustrated.pdf) (PDF). 6 | 7 | ## Contents 8 | 9 | 1 Introduction 10 | 11 | - Blockchain 12 | - World state 13 | - Account 14 | - Transaction 15 | - Message 16 | - Decentralised database 17 | - Atomicity and order 18 | 19 | 2 Virtual machine 20 | 21 | - Ethereum virtual machine (EVM) 22 | - Message call 23 | - Exception 24 | - Gas and fee 25 | - Input and output 26 | - Byte order 27 | - Instruction set 28 | - Miscellaneous 29 | 30 | Appendix A : Implementation 31 | 32 | - Source code in Geth 33 | - EVM developer utility 34 | - Solidity ABI 35 | 36 | Appendix B : User interface 37 | 38 | - Web3 API 39 | - Geth, Mist, Solc, Remix, Truffle, ... 40 | 41 | References 42 | -------------------------------------------------------------------------------- /legacy/evm-illustrated/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.1.1 _Mar 2018_ 4 | 5 | - Modify exception page temporarily on page 59 6 | 7 | - thanks to ubuntaire and smarx at Reddit 8 | 9 | - Add reference "[E9] ethereum/wiki Subtleties" on page 113 10 | 11 | ## 0.1.0 _Mar 2018_ 12 | 13 | - First release 14 | -------------------------------------------------------------------------------- /legacy/evm-illustrated/ethereum_evm_illustrated.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/evm-illustrated/ethereum_evm_illustrated.pdf -------------------------------------------------------------------------------- /legacy/evm-illustrated/src/ethereum_evm_illustrated.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/evm-illustrated/src/ethereum_evm_illustrated.pptx -------------------------------------------------------------------------------- /legacy/evm-modules/README.md: -------------------------------------------------------------------------------- 1 | # Core Paper Project of EVM 2 | 3 | The Core Paper Project of EVM aims at providing a modular and general purpose specification for Ethereum Virtual Machine. Ethereum Virtual Machine, or EVM, is a widely used stack-based virtual machine and binary instruction format. 4 | 5 | EVM is initially designed for Ethereum and Ethereum Classic, with VM structures specific to those blockchains. However, it's being adopted in a wide range of other projects, such as [Parity Substrate](https://github.com/paritytech/substrate/pull/3927). Those projects have vastly different requirements compared with Ethereum and Ethereum Classic, and as a result, they would benefit from a standalone specification process. 6 | 7 | We design this specification to be modular, from a basic layer caller EVM Core, which has minimal assumptions about the environment. Modules are then provided on top of EVM Core, which makes additional assumptions about the environment. Many layers, including EVM Core, does not contain the gasometer, which means it's suitable to be used in general-purpose environments and is much easier to be implemented. 8 | 9 | ## Philosophy 10 | 11 | The current EIP and ECIP process basically composes of "changelogs". We define, as informal specifications, about what is changed when the EIP is applied. This works well for simple changes such as gas cost modification and opcode addition, because the change is only at a single point and assumed not to affect the rest of the system. 12 | 13 | However, totally relying on changelog format has its expressiveness limit. For pressing issues on Ethereum we're facing nowadays, many structual and potentially complex changes of the EVM are required. When writing them under EIP "changelog" format, it's both hard for authors to express themselves, and for readers to understand the specification. This has led to confusions and implementation consensus issues in the past. What's more, some of the previously-thought single point changes turned out to affect a larger part of the EVM, such as EIP-1283 and EIP-1884, relying on changelog format solely made it harder for readers to review those effects. 14 | 15 | The Core Paper Project of EVM is an attempt to address those issues. Instead of one-step "changelog" process as in EIP and ECIP, here feature upgrades are defined under a two-step process: 16 | 17 | - **Refactoring**: Any new feature upgrades is identified as a "module change". We first refactor the _whole EVM_ specification to get a _functionally equivalent_ specification. 18 | - **Module change**: We then add the module change, and write the "changelog" simply as the actual module change. 19 | 20 | As an example, to add new EVM features that require additional validation step in the beginning, we first refactor the whole EVM specification to have a no-op validation step, which is functionallly equivalent to what we have now. After that, the new feature can simply be added as an additional module. This process is much more clear compared with the changelog process. 21 | 22 | At the same time, we hope the modular design and specification allow reusibility outside of the context of Ethereum and Ethereum Classic, and can encourage better standardization, for EVM features that are not designed for Ethereum or Ethereum Classic mainnet. 23 | 24 | ## Modules 25 | 26 | ### EVM Core 27 | 28 | EVM Core defines the base layer of execution. The VM has access to the following information: 29 | 30 | - **Data**: a bytearray defining the input of the VM. 31 | - **Code**: a bytearray defining the code being executed. 32 | - **Program Counter**: an integer, pointing to the position of the next instruction being executed. 33 | - **Jump Validity Map**: a boolean list the same size as the code bytearray. It is generated in the beginning of the program execution, and sets all valid `JUMPDEST` position to true. 34 | - **Memory**: A linear memory of bytes, of given limit. 35 | - **Stack**: A stack, containing values of 256-bit. 36 | 37 | Valid instructions of EVM Core are: 38 | 39 | - **Stop and Arithmetic**: `STOP`, `ADD`, `MUL`, `SUB`, `DIV`, `SDIV`, `MOD`, `SMOD`, `ADDMOD`, `MULMOD`, `EXP`, `SIGNEXTEND`. 40 | - **Comparison and Bitwise Logic**: `LT`, `GT`, `SLT`, `SGT`, `EQ`, `ISZERO`, `AND`, `OR`, `XOR`, `NOT`, `BYTE`, `SHL`, `SHR`, `SAR`. 41 | - **Code and Data Access**: `CALLDATALOAD`, `CALLDATASIZE`, `CALLDATACOPY`, `CODESIZE`, `CODECOPY`. 42 | - **Stack, Memory and Flow Control**: `POP`, `PUSHn`, `DUPn`, `SWAPn`, `MLOAD`, `MSTORE`, `MSTORE8`, `JUMP`, `JUMPI`, `PC`, `MSIZE`, `JUMPDEST`, `RETURN`, `REVERT`, `INVALID`. 43 | 44 | ### EVM ROM 45 | 46 | The EVM ROM layer can be built on top of EVM Core to provide access to a range of read-only memory. We define the following structure: 47 | 48 | - **Read-only Memory**: A range of read-only memory that can be accessed by specific opcodes. 49 | 50 | We redefine the following opcodes to be access of read-only memory. Here we define read-only memory to have index every 32 bytes. 51 | 52 | - `ADDRESS` (`0x30`): `READROM 0x0` Push index `0` of read-only memory onto stack. 53 | - `ORIGIN` (`0x32`): `READROM 0x1` Push index `1` of read-only memory onto stack. 54 | - `CALLER` (`0x33`): `READROM 0x3` Push index `2` of read-only memory onto stack. 55 | - `CALLVALUE` (`0x34`): `READROM 0x4` Push index `3` of read-only memory onto stack. 56 | - `GASPRICE` (`0x3a`): `READROM 0x5` Push index `4` of read-only memory onto stack. 57 | - `COINBASE` (`0x41`): `READROM 0x6` Push index `5` of read-only memory onto stack. 58 | - `TIMESTAMP` (`0x42`): `READROM 0x7` Push index `6` of read-only memory onto stack. 59 | - `NUMBER` (`0x43`): `READROM 0x8` Push index `7` of read-only memory onto stack. 60 | - `DIFFICULTY` (`0x44`): `READROM 0x9` Push index `8` of read-only memory onto stack. 61 | - `GASLIMIT` (`0x45`): `READROM 0xa` Push index `9` of read-only memory onto stack. 62 | - `CHAINID` (`0x46`): `READROM 0xb` Push index `10` of read-only memory onto stack. 63 | - `SELFBALANCE` (`0x47`): `READROM 0xc` Push index `11` of read-only memory onto stack. 64 | 65 | ### EVM Storage 66 | 67 | The EVM Storage layer provides opcodes for access of a persistent storage: 68 | 69 | - **Storage**: External storage that can be read or write by the contract. 70 | 71 | Opcodes `SLOAD` and `SSTORE` are defined in this layer. 72 | 73 | ### EVM Log 74 | 75 | The EVM Log layer provides opcodes for logging: 76 | 77 | - **Log**: Append-only data structure with structure `{ topics: Vec, data: Vec }`, where `topics` can at most be length 4. 78 | 79 | Opcodes `LOGn` are defined in this layer. 80 | 81 | ### EVM Ethereum 82 | 83 | We define all Ethereum specific opcodes in this layer. This includes: 84 | 85 | - **Sha3**: `SHA3` 86 | - **Environmental Information**: `BALANCE`, `EXTCODESIZE`, `EXTCODECOPY` 87 | - **Block Information**: `BLOCKHASH` 88 | - **Gasometer**: `GAS` 89 | - **System Operations**: `CREATE`, `CREATE2`, `CALL`, `CALLCODE`, `DELEGATECALL`, `STATICCALL` 90 | 91 | ## License 92 | 93 | This work is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/). 94 | -------------------------------------------------------------------------------- /legacy/evm-modules/gasometer/README.md: -------------------------------------------------------------------------------- 1 | # Gasometer 2 | 3 | This defines the gas cost calculation module for EVM. 4 | 5 | ## Imports 6 | 7 | The gasometer has access to the following information. Note that each opcode cost module may require access to additional information. 8 | 9 | - **Memory effective length**: The effective length of memory, defined in EVM Core. 10 | 11 | ## Constants 12 | 13 | - `G_MEMORY`: Used to calculate memory gas from memory effective length. 14 | - **Opcode Cost Modules**: With gasometer in place, each valid opcode is assigned with an opcode cost module. This constant is a mapping of opcode to its opcode cost module. 15 | 16 | ## Data Structures 17 | 18 | The gasometer maintains: 19 | 20 | - **Status**: Can be two values -- either "okay" or "error". Error indicates that an out of gas error has already happened. 21 | - **Gas limit**: The current gas limit. 22 | - **Used gas counter**: Unsigned counter for used gas. 23 | - **Refund gas counter**: Signed counter for gas refund. 24 | 25 | ## Methods 26 | 27 | ### `gasometer.record_used(gas)` 28 | 29 | Increase the used gas counter by the amount of `gas`. If the increment leads to the condition that used gas counter is greater than gas limit, set used gas counter to gas limit, and set status to error. 30 | 31 | Returns okay if the status ended up being okay, otherwise return error. 32 | 33 | ### `gasometer.record_refund(refund)` 34 | 35 | Increase or decrease the refund gas counter, based on `refund`'s sign. 36 | 37 | ### `gasometer.total_used_gas()` 38 | 39 | Calculate the total used gas of a gasometer. 40 | 41 | Calculate memory gas, with the formular `G_MEMORY * a + a * a // 512`, where `a` is the memory effective length. Return memory gas plus used gas counter. 42 | 43 | ### `gasometer.gas_left()` 44 | 45 | Return _gas limit_ minus `gasometer.total_used_gas()`. 46 | 47 | ### `gasometer.effective_used_gas()` 48 | 49 | Calculate the effective used gas for a transaction, based on total used gas and gas limit. 50 | 51 | Calculate the effective refund gas. If refund gas counter is negative, the effective refund gas is 0. Otherwise, cap the refund gas at half of the total used gas. 52 | 53 | Return total used gas minus effective refund gas. 54 | 55 | ### `gasometer.record_opcode(opcode)` 56 | 57 | Use the corresponding opcode cost module of the given opcode to calculate the gas cost and gas refund. Call the result `gas` with `gasometer.record_used`. Call the result `refund` with `gasometer.record_refund`. 58 | -------------------------------------------------------------------------------- /legacy/evm-modules/gasometer/sstore/net.md: -------------------------------------------------------------------------------- 1 | # Net SSTORE Gas Cost Module 2 | 3 | This defines the gas cost calculation module for SSTORE with net gas metering. 4 | 5 | ## Imports 6 | 7 | This gas cost module has access to the following information. 8 | 9 | - **Stack**: The EVM stack. 10 | - **Storage**: EVM storage of the current operating address. 11 | - **Original storage**: EVM storage state at the beginning of the current transaction. 12 | - **Gasometer gas left**: The current remaining gas value of the gasometer. 13 | 14 | ## Constants 15 | 16 | - `G_SSTORE_SET`: Gas cost for setting a storage value from zero to non-zero. 17 | - `G_SSTORE_RESET`: Gas cost for setting a storage value otherwise. 18 | - `G_SLOAD`: Gas cost for SLOAD operation and SSTORE when a value is unchanged. 19 | - `R_SSTORE_CLEAR`: Refund for setting a storage value from non-zero to zero. 20 | - `G_STIPEND`: Stipend paid for CALL opcode with value transfer. 21 | 22 | ## Calculations 23 | 24 | Interpret stack item at index `0` as the index, and stack item at index `1` as the _new value_. Fetch from _storage_ at _index_ as the _current value_. Fetch from _original storage_ at _index_ as the _original value_. 25 | 26 | ### Gas Cost 27 | 28 | - If _gasometer gas left_ is less than or equal to `G_STIPEND`, return `G_STIPEND + 1`. 29 | - If _current value_ equals _new value_, return `G_SLOAD`. 30 | - If _current value_ does not equal _new_value_ 31 | - If _original value_ equals _current value_ 32 | - If _original value_ is zero, return `G_SSTORE_SET`. 33 | - Otherwise, return `G_SSTORE_RESET`. 34 | - Otherwise, return `SLOAD_GAS`. 35 | 36 | ### Gas Refund 37 | 38 | - If _original value_ equals _current value_, and _new value_ is zero, return `R_SSTORE_CLEAR`. 39 | - Otherwise, create a local variable `refund`. 40 | - If _original value_ is not zero 41 | - If _current value_ is zero, remove `R_SSTORE_CLEAR` from `refund`. 42 | - Otherwise, if _new value_ is zero, add `R_SSTORE_CLEAR` to `refund`. 43 | - If _original value_ equals _new value_ 44 | - If _original value_ is zero, add `G_SSTORE_SET - G_SLOAD` to `refund`. 45 | - Otherwise, add `GSSTORE_RESET - G_SLOAD` to `refund`. 46 | - Return `refund`. 47 | -------------------------------------------------------------------------------- /legacy/evm-modules/gasometer/sstore/simple.md: -------------------------------------------------------------------------------- 1 | # Simple SSTORE Gas Cost Module 2 | 3 | This defines the gas cost calculation module for SSTORE without net gas metering. 4 | 5 | ## Imports 6 | 7 | This gas cost module has access to the following information. 8 | 9 | - **Stack**: The EVM stack. 10 | - **Storage**: EVM storage of the current operating address. 11 | 12 | ## Constants 13 | 14 | - `G_SSTORE_SET`: Gas cost for setting a storage value from zero to non-zero. 15 | - `G_SSTORE_RESET`: Gas cost for setting a storage value otherwise. 16 | - `R_SSTORE_CLEAR`: Refund for setting a storage value from non-zero to zero. 17 | 18 | ## Calculations 19 | 20 | Interpret stack item at index `0` as the index, and stack item at index `1` as the _new value_. Fetch from _storage_ at _index_ as the _current value_. 21 | 22 | ### Gas Cost 23 | 24 | If _current value_ is zero, and _new value_ is not zero, return `G_SSTORE_SET`. Otherwise, return `G_SSTORE_RESET`. 25 | 26 | ### Refund 27 | 28 | If _current value_ is not zero, and _new value_ is zero, return `R_SSTORE_CLEAR`. Otherwise, return `0`. 29 | -------------------------------------------------------------------------------- /legacy/files/Generating_stack_machine_code_using_LLVM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/files/Generating_stack_machine_code_using_LLVM.pdf -------------------------------------------------------------------------------- /legacy/files/LLVM_talk.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/files/LLVM_talk.pdf -------------------------------------------------------------------------------- /legacy/gas-estimator/.dockerignore: -------------------------------------------------------------------------------- 1 | src/instrumentation_measurement/openethereum/ethcore/res/wasm-tests 2 | src/instrumentation_measurement/openethereum/ethcore/res/ethereum/tests 3 | src/instrumentation_measurement/openethereum/target 4 | 5 | src/instrumentation_measurement/evmone/build 6 | 7 | /.dockerignore 8 | /Dockerfile* 9 | -------------------------------------------------------------------------------- /legacy/gas-estimator/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | src/.RData 3 | 4 | src/.Rhistory 5 | 6 | __pycache__ 7 | 8 | *.nb.html 9 | -------------------------------------------------------------------------------- /legacy/gas-estimator/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/instrumentation_measurement/openethereum"] 2 | path = src/instrumentation_measurement/openethereum 3 | url = ../openethereum.git 4 | [submodule "src/instrumentation_measurement/evmone"] 5 | path = src/instrumentation_measurement/evmone 6 | url = ../evmone.git 7 | -------------------------------------------------------------------------------- /legacy/gas-estimator/Dockerfile.evmone: -------------------------------------------------------------------------------- 1 | FROM python:3.8-alpine 2 | 3 | RUN apk update && apk add g++ cmake git make 4 | 5 | WORKDIR /srv/app/ 6 | 7 | # base for python 8 | COPY ./src/program_generator/requirements.txt /srv/app/src/program_generator/requirements.txt 9 | RUN pip install -r src/program_generator/requirements.txt 10 | 11 | # base for evmone 12 | WORKDIR /srv/ 13 | RUN git clone --recursive https://github.com/imapp-pl/evmone.git temp/evmone_builder 14 | RUN mkdir -p /srv/temp/evmone_builder/build 15 | WORKDIR /srv/temp/evmone_builder/build 16 | 17 | RUN cmake .. -DEVMONE_TESTING=ON \ 18 | && cmake --build . -- 19 | 20 | # get our files for evmone 21 | # NOTE: we don't do `RUN git submodule update --init`. You should do this in the host 22 | COPY ./src/instrumentation_measurement/evmone /srv/app/src/instrumentation_measurement/evmone 23 | 24 | # refresh the evmone build from `master` to our branch 25 | RUN mv /srv/temp/evmone_builder/build /srv/app/src/instrumentation_measurement/evmone 26 | WORKDIR /srv/app/src/instrumentation_measurement/evmone/build 27 | RUN rm /srv/app/src/instrumentation_measurement/evmone/build/CMakeCache.txt 28 | 29 | RUN cmake .. -DEVMONE_TESTING=ON \ 30 | && cmake --build . -- 31 | 32 | # get the remainder of our files 33 | COPY ./src/ /srv/app/src/ 34 | 35 | WORKDIR /srv/app/ 36 | 37 | # check correct host configuration 38 | RUN chmod a+x /srv/app/src/check_clocksource.sh 39 | RUN /srv/app/src/check_clocksource.sh 40 | -------------------------------------------------------------------------------- /legacy/gas-estimator/Dockerfile.geth: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | ARG MEASUREMENT_MODE=all 4 | 5 | WORKDIR /srv/app/ 6 | 7 | # base for python 8 | COPY ./src/program_generator/requirements.txt /srv/app/src/program_generator/requirements.txt 9 | RUN pip install -r src/program_generator/requirements.txt 10 | 11 | # base for golang 12 | RUN wget --no-verbose --show-progress --progress=bar:force:noscroll \ 13 | https://golang.org/dl/go1.17.1.linux-amd64.tar.gz 14 | 15 | RUN tar -C /usr/local -xzf ./go1.17.1.linux-amd64.tar.gz 16 | RUN rm go1.17.1.linux-amd64.tar.gz 17 | 18 | ENV PATH=$PATH:/usr/local/go/bin 19 | ENV GOPATH=/srv/app/.go 20 | ENV GO111MODULE=off 21 | ENV GOBIN=/srv/app/.go/bin 22 | 23 | # fixed golang dependencies 24 | RUN go get github.com/ethereum/go-ethereum 25 | 26 | # get our files 27 | WORKDIR /srv/app/.go/src/github.com/ethereum/go-ethereum 28 | RUN git remote add imapp-pl https://github.com/imapp-pl/go-ethereum.git 29 | RUN git fetch imapp-pl wallclock-${MEASUREMENT_MODE} 30 | RUN git checkout wallclock-${MEASUREMENT_MODE} 31 | COPY ./src/ /srv/app/src/ 32 | 33 | WORKDIR /srv/app/src/instrumentation_measurement 34 | RUN go get ./geth/... 35 | 36 | WORKDIR /srv/app/ 37 | 38 | # check correct host configuration 39 | RUN chmod a+x ./src/check_clocksource.sh 40 | RUN ./src/check_clocksource.sh 41 | 42 | # our runtime config 43 | ENV GOGC=off 44 | -------------------------------------------------------------------------------- /legacy/gas-estimator/Dockerfile.openethereum: -------------------------------------------------------------------------------- 1 | FROM rust:1.55.0 2 | 3 | # RUN apk update && apk add rust cargo yasm cmake 4 | RUN apt update 5 | RUN apt install -y yasm cmake python3-pip 6 | RUN alias python=python3 7 | 8 | WORKDIR /srv/app/ 9 | 10 | # base for python 11 | COPY ./src/program_generator/requirements.txt /srv/app/src/program_generator/requirements.txt 12 | RUN pip install -r src/program_generator/requirements.txt 13 | 14 | # get our files for openethereum 15 | # NOTE: we don't do `RUN git submodule update --init`. You should do this in the host 16 | COPY ./src/instrumentation_measurement/openethereum /srv/app/src/instrumentation_measurement/openethereum 17 | WORKDIR /srv/app/src/instrumentation_measurement/openethereum/evmbin/ 18 | 19 | RUN cargo build --release 20 | 21 | # get the remainder of our files 22 | COPY ./src/ /srv/app/src/ 23 | 24 | WORKDIR /srv/app/ 25 | 26 | # check correct host configuration 27 | RUN chmod a+x /srv/app/src/check_clocksource.sh 28 | RUN /srv/app/src/check_clocksource.sh 29 | -------------------------------------------------------------------------------- /legacy/gas-estimator/Makefile: -------------------------------------------------------------------------------- 1 | MEASUREMENT_MODE ?= all 2 | IMAGE_VERSION ?= latest 3 | 4 | build: build-geth build-evmone build-openethereum 5 | 6 | build-geth: 7 | docker build -f Dockerfile.geth \ 8 | --tag "gas-cost-estimator/geth_${MEASUREMENT_MODE}:${IMAGE_VERSION}" \ 9 | --build-arg MEASUREMENT_MODE=${MEASUREMENT_MODE} \ 10 | . 11 | 12 | build-evmone: 13 | docker build -f Dockerfile.evmone --tag "gas-cost-estimator/evmone_${MEASUREMENT_MODE}:${IMAGE_VERSION}" . 14 | 15 | build-openethereum: 16 | docker build -f Dockerfile.openethereum --tag "gas-cost-estimator/openethereum_${MEASUREMENT_MODE}:${IMAGE_VERSION}" . 17 | 18 | measure-geth: 19 | docker run --rm \ 20 | --privileged \ 21 | --security-opt seccomp:unconfined \ 22 | -it gas-cost-estimator/geth_${MEASUREMENT_MODE}:${IMAGE_VERSION} \ 23 | sh -c "cd src && python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode ${MEASUREMENT_MODE} --sampleSize=5 --nSamples=1" 24 | -------------------------------------------------------------------------------- /legacy/gas-estimator/README.md: -------------------------------------------------------------------------------- 1 | gas-cost-estimator 2 | 3 | [Stage I report](https://github.com/imapp-pl/gas-cost-estimator/blob/master/docs/report_stage_i.md) 4 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/execution_comparison.md: -------------------------------------------------------------------------------- 1 | ### Execution Comparison 2 | 3 | This document will analyze and compare the exact flow of execution of the interpreter loop, and how is its computational cost measured. 4 | 5 | The goal is to know, whether the measurements, as compared between various EVM implementations and various OPCODEs, are collected in a "fair" fashion. "Fair" in this context mean not only (or not as much as) fairness between implementations, but rather _fair relative treatment_ of all OPCODEs in all implementations. 6 | 7 | For now, we focus on the individual OPCODE measurements, which we used in preliminary exploration. **TODO (optional)** repeat this for whole-program measurements, if we do them. 8 | 9 | ### Notes 10 | 11 | 1. `geth` incorporates a lot of setup which gets measured along with the _first_ instruction. Later this is worked around for programs, where only a single instruction is interesting, by prepending a throw-away `PUSH1`, wherever the interesting instructions would be the first one. `evmone` and `OpenEthereum` don't have this. 12 | - this should be fixed by moving the `CaptureStart` in a forked `go-ethereum` implementation. It should be placed deeper down the call stack, just before entering the first interpreter loop iteration 13 | - **EDIT**: this has been solved differently: we modify the interpreter code 14 | 2. In order to ensure standardization and portability, easy and succinct rules of how to measure should be devised, so that such comparisons aren't necessary in the future. See [Measurement standard ruleset](measurement_standard_ruleset.md): 15 | 3. `evmone` does a preprocessing step `analysis.cpp`, which slightly skews measurements - some of the effort to do some OPCODEs will be "put" under "intrinsic OPCODE `BEGINBLOCK`" executing at the end of each code block. `geth` and `OpenEthereum` don't have this. 16 | - `BEGINBLOCK` (manifesting as `JUMPDEST` in OPCODE tracing) needs special attention in larger programs. We must come up with a way to handle it, since other implementations will not have this "intrinsic instruction". **TODO** 17 | 4. `evmone` perceivably measures _only_ the execution of the OPCODE (as opposed to `geth`), but this is not the case. In `evmone` all logic done in the main interpreter loop in `geth` is done deeper down the call stack. 18 | 5. ~`OpenEthereum` excludes the `while` loop condition used in the interpreter loop (`geth` and `evmone` include it)~ 19 | 20 | - **EDIT**: done for `evmone` [here](https://github.com/imapp-pl/evmone/pull/2) 21 | - **EDIT**: done for `openethereum` [here](...) 22 | 23 | 5. `geth` and `evmone` measurements are written to a pre-allocated array on every instruction, ~while `OpenEthereum` write the CSV data straight to `stdout`, this might be slightly unfair~ 24 | 25 | - **EDIT**: done for `openethereum` [here](...) 26 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/instrumentation_measurement/evmone.md: -------------------------------------------------------------------------------- 1 | ## Evmone 2 | 3 | ### Installation and running 4 | 5 | 1. Building 6 | 7 | ``` 8 | mkdir build 9 | git submodule update --init 10 | cd build 11 | cmake .. -DEVMONE_TESTING=ON 12 | cmake --build . -- -j $(nproc) 13 | ``` 14 | 15 | Changes related to the Gas Cost Estimator are in branch `wallclock` in both `evmone` and `evmc` git submodules. 16 | 17 | I got compile errors because of old gcc not supporting C++17 18 | 19 | 1. https://askubuntu.com/questions/466651/how-do-i-use-the-latest-gcc-on-ubuntu/1163021#1163021 20 | 2. then: 21 | 22 | ``` 23 | sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 10 24 | sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 10 25 | ``` 26 | 27 | 2. Running 28 | 29 | From the `build` directory: 30 | 31 | ``` 32 | evmc/bin/evmc run --vm ./lib/libevmone.so [--print-opcodes] [--measure-all] [--measure-total] [--measure-one ] [--repeat ] 33 | ``` 34 | 35 | for example: 36 | 37 | ``` 38 | evmc/bin/evmc run --vm ./lib/libevmone.so --print-opcodes --measure-all --measure-one 3 --repeat 2 602060070260F053600160F0F3 39 | ``` 40 | 41 | To measure timer overheads: 42 | 43 | ``` 44 | evmc/bin/evmc measure-overheads 45 | ``` 46 | 47 | ### Comments 48 | 49 | - evmone adds `5B` (`JUMPDEST`) instruction in the beginning if there is none 50 | 51 | ### Rough notes 52 | 53 | 1. ~Probably not a good fit to meausure, only instrumentation~ EDIT: we'll measure it 54 | 2. EVMC API - these are tools that go with the EVMONE VM implementation. 55 | 1. under `/build/evmc/bin/evmc run --help` one finds help about how to run bytecode 56 | 2. trying `evmc/bin/evmc run 0x60` - this is `PUSH1`, check out https://www.ethervm.io/#60 57 | 1. PUSH 20 58 | 2. PUSH 07 59 | 3. MUL 60 | 4. PUSH F0 (offset) 61 | 5. MSTORE8 62 | 6. PUSH 01 (length) 63 | 7. PUSH F0 (offset) 64 | 8. RETURN 65 | 9. `evmc/bin/evmc run --vm ./lib/libevmone.so 602060070260F053600160F0F3`, nice 66 | 67 | ### Notes on execution 68 | 69 | 1. `auto analysis = analyze(rev, code, code_size);` before execution does some preallocations and preprocessing based on static code information, like assembling information about code blocks - I think it's still "fair", but might definitely cause uneven "gas dynamics" if compared to simple interpreters 70 | - **BUT** - some operations are done per-block, e.g. _static_ gas operations and checks etc. This isn't very fair, it will fatten the perceived cost of 71 | 2. The `JUMPDEST` which appears at the beginning of each program is an intrinsic opcode `BEGINBLOCK`, `evmone` specific 72 | - "These intrinsic instructions may be injected to the code in the analysis phase" 73 | - "This instruction is defined as alias for JUMPDEST and replaces all JUMPDEST instructions" 74 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/instrumentation_measurement/example_bytecode_programs.md: -------------------------------------------------------------------------------- 1 | ## Example bytecode programs 2 | 3 | This is just a quick dump of simple, working programs to smoke test stuff with: 4 | 5 | - `6020` - just push 6 | - `602060070260F053600160F0F3` - push, mul and return 7 | - `62FFFFFF600020` - some keccak 8 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/instrumentation_measurement/openethereum.md: -------------------------------------------------------------------------------- 1 | ## Openethereum 2 | 3 | ### Installation and running 4 | 5 | 1. To build, run in the `evmbin` directory of openethereum repository (submodule), at branch `wallclock` 6 | ``` 7 | cargo build --release 8 | ``` 9 | this should produce `openethereum-evm` binary in openethereum's `target/release/` directory. 10 | 2. Running 11 | 12 | ``` 13 | ./target/release/openethereum-evm --code [--repeat ] [--print-opcodes] [--measure-overhead] 14 | ``` 15 | 16 | for example: 17 | 18 | ``` 19 | ./target/release/openethereum-evm --code 602060070260F053600160F0F3 --repeat 2 20 | ``` 21 | 22 | If `--measure-overhead` is passed, bytecode will not be executed. If `--print-opcodes` is passed, only one repetition will be executed (no matter what `--repeat` value is). 23 | 24 | ### Notes on execution 25 | 26 | 1. only `let result = self.step(ext);` is included under the measurement. To capture most of "the EVM normally does when executing" we should also capture **TODO**: 27 | - `loop {` 28 | - the entire `match result {` 29 | Proposed solution similar to what [this PR for `evmone` suggests](https://github.com/imapp-pl/evmone/pull/2) 30 | 2. what is in `self.step(ext)` except for the expected normal operation? 31 | 32 | - `self.do_trace = self.do_trace && ext.trace_next_instruction(`, with a comment about overhead, but `&&` shortcircuits and I'm assuming `self.do_trace` is false, so this is minor. It also is what normally the node would go through 33 | - similar comment on the `evm_debug!` 34 | 35 | Nothing out of the ordinary there 36 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/instrumentation_measurement/openethereum_ewasm.md: -------------------------------------------------------------------------------- 1 | # Instrumentation and measurement using `OpenEthereum` with Ewasm 2 | 3 | 1. OpenEthereum currently seems to use a relatively old version of (`wasmi`)[https://github.com/paritytech/wasmi] - [`0.3.0`](https://github.com/paritytech/wasmi/tree/0.3), our changes branch off of that (https://github.com/paritytech/wasmi/compare/master...imapp-pl:time_measurement) 4 | 1. Useful reading about Wasm vs `wasmi`/Ewasm: https://github.com/paritytech/wasmi/blob/0.3/src/isa.rs 5 | - the instruction set (e.g. what is printed out in the instrumentation loop) is there, e.g. `I32Add`... 6 | 1. Wasm bytecode starts with: `WASM_BINARY_MAGIC + WASM_BINARY_VERSION` = `0061736d01000000` 7 | 1. https://webassembly.github.io/spec/core/appendix/index-instructions.html - another listing of instructions with stack requirements 8 | 1. Decode Wasm binary format from hex: 9 | ``` 10 | cat wasm.example | python3 -c "import sys, binascii; sys.stdout.buffer.write(binascii.unhexlify(input().strip()))" > wasm.example.bin 11 | ``` 12 | - this can then be loaded to [`wasm2wat`](https://webassembly.github.io/wabt/demo/wasm2wat/) (see below for WABT) 13 | 14 | ### `chfast` notes 15 | 16 | ``` 17 | (func (export "call")) 18 | ``` 19 | 20 | ``` 21 | (module 22 | (func (export "call") 23 | i32.const 2 24 | i32.const 2 25 | i32.add 26 | drop 27 | ) 28 | ) 29 | ``` 30 | 31 | ``` 32 | (module 33 | (func (export "call") 34 | (call "useGas" 4) 35 | i32.const 2 36 | i32.const 2 37 | i32.add 38 | drop 39 | ) 40 | ) 41 | ``` 42 | 43 | wabt https://pengowray.github.io/wasm-ops/ https://webassembly.studio 44 | 45 | https://github.com/ewasm/design/blob/master/metering.md https://github.com/ewasm/design/blob/master/determining_wasm_gas_costs.md 46 | 47 | ### WABT 48 | 49 | 1. It installed as documented in gh for me 50 | 51 | #### Integrate to measurements 52 | 53 | Execute everything from the dir where you have `wabt` and `openethereum` repos, and `example.wat` 2. Generate hex bytecode ` wabt/build/wat2wasm example.wat && cat example.wasm | hexdump -ve '1/1 "%02x"' && echo ` 3. Generate hex bytecode from wat and execute ` wabt/build/wat2wasm example.wat && \ cat example.wasm | \ hexdump -ve '1/1 "%02x"' | \ xargs -L1 \ openethereum/target/release/parity-evm \ --gas 5000 \ --chain openethereum/ethcore/res/instant_seal.json \ --code ` 54 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/instrumentation_measurement/other_tools.md: -------------------------------------------------------------------------------- 1 | Notes on other tools for instrumentation & measurement 2 | 3 | ### Takeaways 4 | 5 | Nothing relevant/useful yet 6 | 7 | ### Rough notes 8 | 9 | 1. http://wingtecher.com/themes/WingTecherResearch/assets/papers/saner-evm.pdf - " EVM\*: From Offline Detection to OnlineReinforcement for Ethereum Virtual Machine" - Not relevant to us, instrumentation for aborting of dangerous txs in the EVM. Not this kind of instrumentation we need. 10 | 2. https://www.researchgate.net/publication/331789943_Analysis_of_Ethereum_Smart_Contracts_and_Opcodes - "Analysis of Ethereum Smart Contracts and Opcodes" 11 | - Not relevant to us, just analysis of frequency of opcodes in the verified contracts (static) 12 | 3. https://ethereum.stackexchange.com/questions/4446/instrumenting-evm - "Instrumenting EVM" 13 | - _Maybe useful_ - "To do this, you need to define a VM log collector, which implements StructLogCollector. This function gets called on every step of the VM, and is provided with copies of the memory, stack, and modified parts of the storage, along with the program counter, current opcode...", this is for `go-ethereum`. 14 | - follow the Nick Johnsons link to etherquery 15 | - (done) revisit if `go-ethereum` specific measuring needs to be done using this 16 | 4. https://publik.tuwien.ac.at/files/publik_278277.pdf - "A Survey of Tools forAnalyzing Ethereum Smart Contracts" - mentions one tool for EVM instrumentation: ContractLarva 17 | - https://www.researchgate.net/publication/327834131_Monitoring_Smart_Contracts_ContractLarva_and_Open_Challenges_Beyond - ContractLarva 18 | - not relevant to us, Solidity level 19 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/measurement_standard_ruleset.md: -------------------------------------------------------------------------------- 1 | ## Measurement standard ruleset 2 | 3 | In order to ensure easy portability and adaptability to various clients and environments, we write down a ruleset of how should OPCODE measurements be conducted. 4 | 5 | ### `measure_all` 6 | 7 | In `measure_all` we measure the individual times of all OPCODEs exectued for a given program. We also measure the timer overhead alongside the OPCODE execution measurement. 8 | 9 | It turned out to be much better to measure by applying crude modifications to the EVM interpreter code, than to measure via calling a callback (e.g. `Tracer.CaptureState` for `geth`). To make this easier and as uniform as possible, follow these guidelines: 10 | 11 | 1. Measure the entire block of code which constitutes a single interpreter iteration. In particular, measure all code which is repeatedly executed as OPCODEs are interpreted. 12 | 2. Leave all preprocessing out. 13 | 3. Make sure all tracing/debugging is off, except what we need to trace. 14 | 4. Gather the measurements consistently. There should be no allocations done by the measurement code. 15 | 5. Measurements should be gathered in a pre-allocated collection. 16 | 6. Don't do IO (`println` etc.) in the loop. 17 | 7. Look into whether preprocessing or similar optimizations don't "move effort" from one instruction to another, like `evmone` does. If so, analyze impact and unfairness. 18 | 8. Use timer with least overhead, the most low-level one available. 19 | 9. When measuring the timer overhead, capture the time in exactly same way as done for the OPCODE measurement. 20 | 21 | Follow this pseudocode pattern: 22 | 23 | ```go 24 | // all preparations/allocations of the EVM code 25 | // instrumentation preparations/allocations 26 | start_time = now() 27 | while { 28 | // EVM code 29 | // OPCODE code etc... 30 | 31 | switch some_end_conditions { 32 | continue: 33 | // EVM code 34 | end_time = now() 35 | // measure the timer overhead 36 | end_timer_time = now() 37 | opcode_duration = end_time - start_time 38 | timer_duration = end_timer_time - end_time 39 | 40 | durations.store_with_no_allocations(opcode_duration, timer_duration) 41 | start_time = now() 42 | break: 43 | // EVM code 44 | end_time = now() 45 | // measure the timer overhead 46 | end_timer_time = now() 47 | opcode_duration = end_time - start_time 48 | timer_duration = end_timer_time - end_time 49 | 50 | durations.store_with_no_allocations(opcode_duration, timer_duration) 51 | // let it break normally 52 | } 53 | } 54 | 55 | durations.print() 56 | ``` 57 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-10-30.md: -------------------------------------------------------------------------------- 1 | 1. Let's do notes from everything 2 | 2. GH project + 1-2 day effort tickets. Cut whenever task extends. Goal: know what we're doing and identify blockers 3 | 3. Choose boring tech 4 | 4. How public? 5 | 6 | - post on ethresearch 7 | - final - yes 8 | - initial - not this week, but when we ramp 9 | - how do we describe funding/association? 10 | 11 | 5. Define and document API's on the 3 domains 12 | 13 | - instrumentation & measurement 14 | - sample programs 15 | - model 16 | 17 | 6. Gather prior docs/materials 18 | 19 | - RZ will send 20 | - Radek to send if there has been an anouncement of our project 21 | 22 | 7. Ideas how to generate sample programs: 23 | 24 | - random, genetic to optimize throughput 25 | - chfast has a measurement tool for geth (EVM) 26 | - we measure 27 | - we model and find gas coefficients for operations 28 | - watch for coefficients prone to attacks 29 | - chfast suggests arithmetics are overpriced. This is an issue given current pressure for statelessness 30 | - let's be flexible 31 | 32 | 8. Do a kick-off call with chfast and Marcin Benke 33 | 34 | - Radek to let them know 35 | 36 | 9. 15-min sync up. 10.00am on Fridays 37 | 38 | - Piotr to invity 39 | 40 | 10. use Hangouts, make a group 41 | 11. Radek to introduce to Jake Hudson from EF 42 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-11-06.md: -------------------------------------------------------------------------------- 1 | 1. measure instruction resource use in isolation or by measuring indirectly (measuring entire program run)? 2 | - insert measurement into the interpreter loop - feasible. Do not write down instructions, do it separately - once for measurement, second for opcodes for minimal 3 | - measure entire time for verification 4 | - feasible as plan A 5 | - sampling and batch measurement as plan B/follow-up/verification 6 | 2. MB: define our value added. RZ: this is exploratory phase, we propose a set of methods 7 | - our focus different than Broken Metre - want to propose a consistent tool for gas pricing 8 | - Stage I should allow to build out the necessary tooling 9 | - Stage I - hacky implementations to test out feasibility are within scope 10 | 3. which resources are we focusing on: clock time only or RAM footprint as well. 11 | - RAM-gas-pricing - cost for RAM rises quadratically - so this is not a first-priority 12 | - PB to read on the takeways from Broken Metre RAM-gas correlation 13 | - Let's focus on CPU-intensity / execution time 14 | 4. is CALLDATACOPY an IO operation? we have it in our list but Broken Metre tells us it is IO (should we take out IO-operations?) 15 | - CALLDATACOPY - should not be considered IO, might be side effect of particular implementation 16 | - something to be careful about 17 | 5. evmone instrumentation - there is just a general idea how to do this, no ready tool yet. PB to let know where to start 18 | 6. fork repos to `imapp-pl` 19 | 20 | ### Action items 21 | 22 | - PB to read on the takeways from Broken Metre RAM-gas correlation 23 | - evmone instrumentation - PB to let know where to start 24 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-11-13.md: -------------------------------------------------------------------------------- 1 | 1. PD: pls feedback on strategy.md early PR 2 | 2. tentatively meet with MB & PB in 2 weeks 3 | 3. scope of stage 1: proposing iterative approach on program generation + spike to explore 1st iterations PD proposition: 4 | - iteration 0 - simplest program to run single OPCODE 5 | - iteration 1 - expand to capture impact of input values 6 | - iteration 2 - expand to capture impact of "surrounding execution" 7 | - etc. 8 | 4. how far to pursue instrumentation & measurement? 9 | - baseline - wallclock time 10 | - other measurements on top of that, but what about portability across implementations? 11 | - try to allow for convenient execution of various measurements 12 | - allow for repetition 13 | - evmone is our "3rd choice" EVM implementation, could be useful for reference/comparison 14 | 15 | ### Action items 16 | 17 | - PD to ask PB about geth flag to measure overhead 18 | - MS to focus on evmone instrumentation&measurement spike first, next OpenEthereum instrumentation&measurement spike 19 | - PD to focus on geth instrumentation&measurement spike, if done spike program generation 20 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-11-20.md: -------------------------------------------------------------------------------- 1 | 1. Python probably the best bet for scripting of sample program generation 2 | 1. Result from research can be either used to update gas cost for OPCODEs or optimize OPCODEs in implementations where they are underperforming 3 | 4 | ### Action items 5 | 6 | 1. RZ to look at repo and PR 7 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-11-27.md: -------------------------------------------------------------------------------- 1 | 1. The approach to measure instructions is not final. Measuring whole programs is still on the list. Measuring whole programs introduces a lot of burden on the analysis stage. 2 | 2. PD plan for today: cleanup scripts, (optional) analyze first results in R, search for prior art on VM instrumentation & analysis. 3 | 3. MS plan for today: cleanup evmone instrumentation, standardize outputs 4 | 4. MS 50% time, PD 20% time, aiming for II half of Jan to have the Stage I report. 5 | 5. Consider measuring only `operation.execute` in `geth`, to fully match instrumentation adopted for `evmone`. For now, both instrumentations measure "the entire interpreter loop", if we neglect the `while(instr is nullptr)` in `evmone`, so measurements are quite compatible. 6 | 6. Need to be mindful about how we want to measure the EVMs and how they are initialized and what do they do in their interpreter loop. Make notes about geth/evmone/openethereum for now, we'll work on making measurements 100% compatible later. 7 | 8 | ### Action items 9 | 10 | 1. PD to check ethereum magicians and eth research for prior art 11 | 2. RZ to review https://github.com/imapp-pl/gas-cost-estimator/pull/5 12 | 3. MS to review https://github.com/imapp-pl/gas-cost-estimator/pull/5 13 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-12-04.md: -------------------------------------------------------------------------------- 1 | 1. turbo geth / silkworm / evmone https://ledgerwatch.github.io/turbo_geth_release.html#Integration-with-evmone-via-EVMC - evmone as first-class citizen EVM to measure 2 | 2. PD: opbench.md and timing_of_jvm_instructions.md, monotonic clocks, nanosecond precision 3 | 3. 0xfe - invalid opcode, let's keep measuring as we do now, but we'll need to revisit. Same JUMP JUMPI 4 | 4. PD, RZ: let's push notes to repo, even messy 5 | 5. PD less available this week 6 | 6. MS: evmone has +1 instruction vs geth - to investigate 7 | 7. MS to push/PR to .py scripts as needed 8 | 8. MS: geth measurements much slower than evmone, and than geth for PD - to investigate 9 | 9. Plan MS: wrapup evmone, send csv to PD (or run R), openethereum/rust ramp up 10 | 10. Plan PD: papers, nanosecond measurements investigation, other measurements for comparison (?) 11 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-12-11.md: -------------------------------------------------------------------------------- 1 | 1. eWasm - want to start next week, preferably MS 2 | 2. Reports ToC - paper like. More or less: 1/ related work 2/ our results (non-final) 3/ detailed plan for Stage II 3 | 3. MS: OpenEthereum 4 | 4. PD: wrap up papers, result comparison in R, add collective measurements and compare 5 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-12-18.md: -------------------------------------------------------------------------------- 1 | 1. PD: importance of measuring and controlling time measurement, see: [here](https://htmlpreview.github.io/?https://github.com/imapp-pl/gas-cost-estimator/blob/master/src/analysis/exploration_timers.nb.html) for details 2 | 2. PD: per-instruction timing would be very good for (3.) 3 | 3. PD: algorithmic generation of most-informative (e.g. highest variance) sets of programs 4 | 4. We'll discuss this with MB & PB next week if possible 5 | 5. MS: we're measuring OpenEthereum instructions, need to do repetition and parameters and output standarization. We'll try to have a csv result to compare td/tmr, for PD to run comparison on (with geth & evmone). Will keep us posted 6 | 6. MS: hurdle with Rust - very slow compilation, looks like a common Rust issue? 7 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2020-12-22.md: -------------------------------------------------------------------------------- 1 | 1. Proposed method makes sense, but need to be careful and do validations 2 | 1. PB: Deep exploring of cost variability of opcodes is interesting, but may be seen as optional. 3 | 1. 6 items to look at until Stage I (tentatively end January) 4 | - eWasm, at least dip the toe in one, best pick: OpenEthereum's wasm. Enough to tell if our method applies or we suggest alternatives we have 5 | - explore timer and measure overhead for Rust 6 | - explore timer and measure overhead for C++ 7 | - look into the differences of measurement implementations in 3 (4) implementations to see if they're fair 8 | - write Stage I report 9 | - ? can't remember :) 10 | 11 | ### Action items 12 | 13 | 1. PB to review evmone measurement 3 PRs 14 | 1. PB to look into OpenEthereum measurement PRs, suggest reviewers 15 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2021-01-08.md: -------------------------------------------------------------------------------- 1 | 1. Aim for 22 or 29th Jan 2 | 2. Plan in GH project 3 | 3. eWASM choice, see https://github.com/imapp-pl/gas-cost-estimator/issues/20 4 | 5 | ### Action items 6 | 7 | 1. RZ, MS to review https://github.com/imapp-pl/gas-cost-estimator/pull/19 8 | 2. MS to prepare list of PRs to review for PB and ping 9 | 3. PD to ping PB and MB to review https://github.com/imapp-pl/gas-cost-estimator/pull/19 10 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2021-01-22.md: -------------------------------------------------------------------------------- 1 | ### Action points 2 | 3 | 1. RZ to setup a call for MS and PB about OpenEthereum/eWASM 4 | 2. MS to make notes and braindump in the meantime 5 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/meetings/2021-02-05.md: -------------------------------------------------------------------------------- 1 | 1. Let's do Ewasm on par with EVM, to demonstrate feasability 2 | 2. Generate programs in wat + wat2wasm + measure per instruction + pick measured instruction, ETA 1-2 weeks from now 3 | 4 | ### Action points 5 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/adaptive_gas_cost_mechanism.md: -------------------------------------------------------------------------------- 1 | ## An Adaptive Gas Cost Mechanism for Ethereum to Defend Against Under-Priced DoS Attacks 2 | 3 | Ting Chen, Xiaoqi Li, Ying Wang, Jiachi Chen, Zihao Li, Xiapu Luo ,Man Ho Au and Xiaosong Zhang 4 | 5 | https://arxiv.org/pdf/1712.06438.pdf 6 | 7 | ### Notes 8 | 9 | 1. "Emulation-based Measurement Framework" - how they measured the underpriced opcodes: 10 | - extract just `.execute` for a stripped-down execution. "various utility func-tions for supporting the execution." (it might be relevant in our case to _include_ those utility functions, since we want to measure node EVM implementations) 11 | - repeat and measure once "we run the interpretationhandler in the emulated environment millions of times, because a single run istoo short to conduct the measurement" 12 | - synthesized environment: "If the operation ma-nipulates the stack/memory/storage, we synthesize the stack/memory/storagewith random length and generates random numbers as their items" 13 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/bic_to_cpu.md: -------------------------------------------------------------------------------- 1 | ## Continuous Bytecode Instruction Counting for CPU Consumption Estimation 2 | 3 | Andrea Camesi Jarle Hulaas Walter Binder 4 | 5 | https://www.researchgate.net/publication/37450070_Continuous_Bytecode_Instruction_Counting_for_CPU_Consumption_Estimation 6 | 7 | ### Takeaways 8 | 9 | Some interesting threads to potentially follow, but this approach doesn't differentiate between different JVM instructions, and also focuses on "typical applications", so different from ours. 10 | 11 | ### Notes 12 | 13 | 1. Is generally about translating a metric "BIC" (bytecode instruction count) to CPU time (exactly what we like) for JVM. "we show experi-mentally that for each platform there is a stable, application-specific bytecode rate that can be used for translating a BIC value into the cor-responding CPU consumption." 14 | 1. visit Java Resource Accounting Framework, Second Edition(J-RAF2, http://www.jraf2.org) 15 | - dead link 16 | 1. ! "use the knowledge ofBRexpin various man-agement tasks, like load-balancing or usage-based billing" ! **usage-based billing**. Follow links resulting for searching for this: 17 | - https://core.ac.uk/download/pdf/82526395.pdf - "Portable Resource Control in Java: Application to Mobile Agent Security" - not relevant 18 | - https://www.researchgate.net/publication/2848223_Portable_Resource_Control_in_Java/fulltext/0e5fb082f0c41c4932e6fc21/Portable-Resource-Control-in-Java.pdf - "Portable Resource Control in Java" - not relevant 19 | - https://www.researchgate.net/publication/223604760_Portable_virtual_cycle_accounting_for_large-scale_distributed_cycle_sharing_systems - "Portable virtual cycle accounting for large-scale distributed cycle sharing systems" - **TODO** optionally get this article, no free access 20 | 1. Follow citations "In contrastto related work which takes a low-level approach [11, 15,20]" 21 | 1. J-RAF2 and BRexp: J-RAF2 collects BIC and they add on CPU time measurement to this. Then they subtract the collecting routine execution time. 22 | 1. try finding Ethereum equivalent of the SPEC JVM98 SPEC JBB2005 Java Grande etc. is there anything like this? 23 | - however: "this benchmark implements a fairly varied set of activities,and that the statistical characteristics of the collected sam-ples, especially the stability ofBRexpare representative ofmany real-world applications" Such a benchmark "many real-world applications" isn't good enough for Ethereum 24 | - nothing found 25 | 1. Rationale for BRexp: a/ measurement precision b/ platform dependence "The objective of determining the CPU consumption forJava bytecodes is difficult because of the level of precisionthat is required: the time taken to execute any single byte-code on recent hardware is usually far below the measure-ment resolution offered by the JVM or by the OS itself.Another difficulty is that the desired timings are specific toeach{JVM, OS, hardware}platform combination" 26 | - how much this applies to EVMs? 27 | - followed: "In previous ex-periments, we used standard APIs (notably the JVMPI [17]profiling API) for measuring elapsed per-thread CPU time,but the inherent lack of resolution" 28 | - nothing 29 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/broken_metre.md: -------------------------------------------------------------------------------- 1 | ## Broken Metre:Attacking Resource Metering in EVM 2 | 3 | Daniel Perez Benjamin Livshits 4 | 5 | https://arxiv.org/pdf/1909.07220.pdf 6 | 7 | ### Thoughts 8 | 9 | 1. We originally wanted to separate sample program generation from model, and have them communicate via some form of an "API". 10 | - But what if we want to generate programs dynamically, based on a result from the model (as Broken Metre does)? 11 | 2. The "references to follow" hold some interesting reading about gas-exploration tools 12 | 13 | ### Rough dump notes 14 | 15 | 1. mining contract history to detect outliers, gas cost vs resources (CPU & RAM) 16 | 2. low-throughput contracts, contracts that cost too little gas to execute 17 | 1. throughput = gas/second 18 | 3. references to follow: 19 | 1. (done) and the gas cost has also been reviewedseveral times [11], [40] to increase the cost of the under-pricedinstructions. 20 | 2. (done) our problem resembles other program synthesistasks [33] [Program Synthesis](https://www.microsoft.com/en-us/research/wp-content/uploads/2017/10/program_synthesis_now.pdf) a generic book on auto-generating programs. Irrelevant for now 21 | 3. (done) Chen etal. [18] propose a mechanism where contracts using a singleinstruction in excess would be penalised. 22 | - aimed at opportunistically punishing the abusing contracts (which do an excessively expensive operation) 23 | - [`adaptive_gas_cost_mechanism.md`](./adaptive_gas_cost_mechanism.md) 24 | 4. (done) IMPORTANT: Yang et al. [58] have recently empiricallyanalysed the resource usage and gas usage of the EVM in-structions. They provide an in-depth analysis of the time takenfor each instructions both on commodity and professionalhardware. 25 | - done in [`empirically_analyzing.md`](./empirically_analyzing.md) 26 | 5. (done, irrelevant) Gas Usage Optimisation:Gasper [17] is one of the firstpaper which has focused on finding gas related anti-patterns forsmart contracts 27 | 6. (done) MadMax [32] is a static analysis tool to find gas-focusedvulnerabilities 28 | - irrelevant: "find patternswhich could cause out-of-gas exceptions and potentially lockthe contract funds, rather than gas-intensive pattern" 29 | 7. (done, irrelevant) Gastap [5] is a static analysis tool which allows to computesound upper bounds for smart contracts 30 | 4. programs where the cache influences exe-cution time by an order of magnitude 31 | 1. This is about page cachin for IO-intensive operations - out of our scope 32 | 5. hardware setup: 33 | 1. We run all of the experiments on a Google CloudPlatform (GCP) [31] instance with 4 cores (8 threads) IntelXeon at 2.20GHz, 8 GB of RAM and an SSD with a 400MB/sthroughput. The machine runs Ubuntu 18.04 with the Linuxkernel version 4.15.0. 34 | 2. (Parity bare metal for comparison) more powerful bare-metal machine with 4 cores (8 threads) at 2.7GHZ, 32GB ofRAM and an SSD with 540MB/s throughput 35 | 6. Garbage Collection - watch out for - they decided to use _aleth_ 36 | 7. Our measurement framework is open-sourced2and 37 | 1. https://github.com/danhper/aleth/tree/measure-gas 38 | - found that their instruction benchmarking function uses `clock_gettime(CLOCK_MONOTONIC)`, which worked very bad on golang (**TODO** investigate further?) - [see `OnOpFunc Executive::benchmarkInstructionsOp()`](https://github.com/danhper/aleth/compare/master...measure-gas#diff-e0d85c8989319d0f013c015e07f88792a12ad13af7b8ff8bf75c1954b7adbf53R520) 39 | 8. time and memory measurement: 40 | 1. Weuse a nanosecond precision clock to measure time and measureboth the time taken to execute a single smart contract and thetime to execute a single instruction. To measure the memoryusage of a single transaction, we override globally thenewanddeleteoperators and record all allocations and deallocationsperformed by the EVM execution within each transaction. Weensure that this is the only way used by the EVM to performmemory allocation. 41 | 2. measure memory, we computethe difference between the total amount of memory allocatedand the total amount of memory deallocated 42 | 3. For CPU, we use clock time measurements as a proxy for the CPU usage. 43 | 4. Finally, for storage usage, we count the number of EVMwords (256 bits) of storage newly allocated per transactions. 44 | 1. for storage usage comparison they used `iotop` 45 | 9. modelling: 46 | 1. ~millions of data points 47 | 2. Pearson score for correlation, gas vs resource 48 | 3. multivariate correlation, gas vs principal components of resources 49 | 4. capturing large variance is important 50 | 10. sample program generation: 51 | 1. This made it easier: The task we solve is different becausewe need to define “valid” but not “meaningful” programs andoptimise for a well-defined metric: gas throughput 52 | 2. caveat: Second, instructions should not try to access random parts ofthe EVM memory, otherwise the program could run out ofgas 53 | 3. they excluded loops and infinite loops 54 | 4. managing items on the stack is important - never pop too much! 55 | 11. TODO: is CALLDATACOPY an IO operation? we have it in our list but this paper tells us it is IO 56 | 12. Section IV.D and iV.E skipped 57 | 13. "long-term fixes" and how do we fit in? 58 | 1. dynamic pricing from Chen et al. - unsure about feasability 59 | 2. importance of stateless clients explained, relates to L2 scaling: 60 | 1. The key ideais that instead of forcing clients to store the whole state,entity emitting transactions must send the transaction, the dataneeded by the transactions, and a proof that this data is correct 61 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/bytecode_monitoring_of_java.md: -------------------------------------------------------------------------------- 1 | ## Bytecode Monitoring of Java Programs 2 | 3 | Wong 4 | 5 | http://www.cs.ox.ac.uk/people/peter.wong/pub/project.pdf 6 | 7 | **TODO** (optional) - read the entire paper, but the interting part is: 8 | 9 | Timing analysis of Java bytecodes - An initiative that is brought up to investigate the implementation of benchmarking the Java Virtual Machine (JVM) Instruction Set (3.1 Finding methods to calculate running time of bytecodes:) 10 | 11 | ### Notes 12 | 13 | 1. The initial idea is to benchmark single bytecode at a time by repetitively executing individual bytecode in multiples of 10s, 100s and 1000s, to enable JVM to monitor these bytecodes a technique so-called Application Response Measurement (ARM) [8] (**TODO** see [8]) 14 | 2. Methods for measurement: hard to follow but: 15 | 1. shell out from C and measure in C 16 | 2. System.currentTimeMillis 17 | 3. clock_gettime system call 18 | 3. bytecodes are duplicated, with the stack being prepared beforehand (it is claimed that the stack size doesn't affect the results). Duplication is done: "1,10,100,1000 and 9000 iteration(s) sequence" 19 | 4. For bytecodes leaving values on the stack, there's a technique similar to `measure inferred` from [`strategy.md`](/docs/strategy.md) - they substract an earlier calculated timing of the `pop`, from the measured opcode timing, they get 20 | 5. They infer JVM optimisation kicked in and "This could be one of the reason (and the same reason) as to why when individual bytecode was timed, one iteration took more time that an average of multiple iterations (e.g. 1000).". Not a problem for us 21 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/empirically_analyzing.md: -------------------------------------------------------------------------------- 1 | ## Empirically Analyzing Ethereum’s Gas Mechanism 2 | 3 | Renlord Yang∗§, Toby Murray∗, Paul Rimba§, Udaya Parampalli∗ 4 | 5 | https://arxiv.org/pdf/1905.00553.pdf 6 | 7 | ### Thoughts 8 | 9 | 1. Good intro to cite the importance of accurate gas costs (node diversity) 10 | 2. Our work could pivot to focus on easy reproducibility and quick time to obtain results and using synthetic data, as opposed to historical data, which has its advantages. 11 | 3. All estimation work, including ours, balances between estimating intrinsic cost of computation and particular optimizations (or lack thereof) of particular node implementations 12 | - in other words: resolving gas cost discrepancies might be done by either updating gas cost of OPCODEs and by optimizing (or aligning optimizations between) node implementations 13 | 4. A synthetic approach (ours) is better versed to estimate gas cost under the assumption that transaction execution might be done concurrently on a single machine. 14 | 15 | ### Rough dump notes 16 | 17 | 1. predates Broken Metre, done independenly, similar conclusions 18 | 2. includes I/O and focuses on those costs 19 | 3. `aleth`-based same as Broken Metre 20 | 4. similar to our current approach, they trace every EVM instruction 21 | 5. approach to resource contention on the test machine: "We electedto use a noisy setup for Machine B as it is representative ofthe hardware choice used by a consumer user." 22 | 6. `BLOCKHASH` is the main offender, but it seems odd that it hasn't been optimized (maybe it has been in more popular node implementations?) 23 | 7. parallel transaction execution is mentioned in Related Work 24 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/holimans_gist_benchmarks.md: -------------------------------------------------------------------------------- 1 | ## "gist and PR from `holiman`" 2 | 3 | Martin Holst Swende `holiman` 4 | 5 | https://gist.github.com/holiman/7153e088af8941379cf21c0e4610d51f (and the original [PR with discussion](https://github.com/ethereum/go-ethereum/pull/21207)) 6 | 7 | ### ELI5 8 | 9 | This is an estimation of the cost (effort) done to _switch context_ when calling a precompiled contract using STATICCALL. [EIP-2046](https://eips.ethereum.org/EIPS/eip-2046) intends to lower STATICCALL for precompileds from 700 to 40. The gist assesses this change in `geth`. 10 | 11 | ### How it measured & instrumented? 12 | 13 | It's not instrumented. 14 | 15 | The measurement is done by doing an infinite loop in the program and seeing how much time until it depletes `100MGas` (look at the `... ns/op` - this is the time how long the "depletion" needs - the less, the more overpriced the operation is. Don't look at the preceding integer value, this is just golang benchmark stuff). The "right" gas cost of context switching is found, when this time is equal when you do the context switch (`staticcall-identity`) or not do it (`loop`), but only balance the stack with POPs. 16 | 17 | It is ran by [golang benchmarks](https://golang.org/pkg/testing/#hdr-Benchmarks), which measures the loop. Inside it works similar to `runtime.Execute` which we're using, but could be a useful example of how to strip down the `runtime.Execute` in the future. 18 | 19 | In the discussion in the [original PR](https://github.com/ethereum/go-ethereum/pull/21207) there's a thread of how the "other" ops (JUMP. PUSH, POP etc) contribute and distort the result. 20 | 21 | ### Takeaways 22 | 23 | 1. Entirely different way to measure effort. 24 | 2. Seeking to _balance_ operations - to equate gas spent on equally hard computations. (should cite when explaining motivation) 25 | 3. `holiman`'s approach (compare using gas depletion, rather than numbers of the loop being iterated) couples the measurement with the gas cost and effort for the accompanying ops (JUMP, PUSH, POP). If we do measurements per operation via instrumentation, we're doing something opposite. 26 | 4. [This is linked](https://github.com/matter-labs/openethereum/commit/77471a1d08a0f088dfd3b30802036b3e0fbb38a6) in the discussion. Possibly useful cheatsheet for OpenEthereum 27 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/instruction_timing_model_1976.md: -------------------------------------------------------------------------------- 1 | ## An Instruction Timing Model of CPU Performance 2 | 3 | Bernard L. Peuto Leonard J. Shustek 4 | 5 | https://inspirehep.net/literature/110758 6 | 7 | **NOTE** this is a really old paper, but it has some inspiring thoughts: 8 | 9 | 1. OpCode pair investigation - on the 70's hardware measurement level opcode pairs were investigated, whether the pairing itself contributes to higher load, warranting distinguishing as a new opcode - we should maybe do a similar exercise? 10 | 2. More generally: we could model and explore _variance_ of computational cost of various OpCodes, not only a static cost estimation. E.g. what if `PUSH` behaves differently very in different circumstances? We could generate programs so that they capture this variation the best. 11 | 3. Taking this further, there could be parameters to each opcode we don't know about, which should modify the gas cost incurred. 12 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/opbench.md: -------------------------------------------------------------------------------- 1 | ## OpBench: A CPU Performance Benchmark for Ethereum Smart Contract Operation Code 2 | 3 | Amjad Aldweesh, Maher Alharby, Maryam Mehrnezhad, Aad van Moorsel 4 | 5 | https://www.researchgate.net/publication/336007166_OpBench_A_CPU_Performance_Benchmark_for_Ethereum_Smart_Contract_Operation_Code 6 | 7 | ### Takeaways 8 | 9 | 1. The approach is similar to our current 10 | 2. We can expand and provide added value by: 11 | - doing OpenEthereum, ewasm 12 | - focus on gas schedule alignment and implementation discrepancies detection 13 | - providing a more standardized procedure to implement instrumentation, e.g. caveats and requirements for a standardized measurement, generic scripts to conduct analysis on other implementations/environments 14 | - running without the need for stack balancing and contract deployment, by executing in an artificial EVM setup 15 | - validating and combining with other kinds of measurements, by conducting a detailed statistical analysis of the data 16 | 17 | ### Notes 18 | 19 | 1. Optimize EVM execution by miners, by benchmarking different environments. "As a consequence,a miner would want to choose a platform that optimizes thereward for the used energy. The benchmark presented in thispaper, when carried out for different platforms, willhelp selectthe best platform." 20 | 2. But also allow to choose "fattest" contracts to execute. "Our opcode benchmark would assist in decidingwhich smartcontracts to execute" 21 | 3. Lastly - alignment of reward and cost 22 | 4. How it is measured? repeatedly single opcode: "In particular, since indi-vidual opcodes take very little time to execute, OpBench1 executes opcodes repeatedly, taking care of stack managementchallenges that result from the small size EVM stack". - measuring every opcode (?). "The computation time of each bytecode is recorded" - but execution takes place in a full contract deployed - "set a timer before and after the executionof each opcode on the EVM." - for `PyEVM` they use [timeit](https://docs.python.org/3/library/timeit.html), which: - turns off GC - runs setup - suggest to only use `min` on the timing vector, not mean/stddev - there is a claim, that benchmarking on a higher (not opcode, but entire contract) level is not sufficient, around citation [15] - followed citation [15] in [`performance_benchmarking.md`](./performance_benchmarking.md) 23 | 5. Program generation: "we generate the bytecode for a fully executable smart contract, which contains repeated bytecode instances of the opcode intended to be measured, as well as the required PUSHs and POPs opcodes to successfully manipulate the EVM stack. " 24 | - for selected opcodes they do different versions for different sizes of the data manipulated 25 | - for selected opcodes ("Formula-based", 6 of them) they craft custom approaches 26 | - Stack Management: for example for ADD, they do PUSH, PUSH, ADD, POP repeatedly 27 | 6. Advertises the approach to be portable to other implementations. 28 | 7. "to the best of ourknowledge, there is no prior systematic approach suggestedfor performance benchmarking of Ethereum opcodes" 29 | 8. Paper seems to focus on miner rewards coming from the gas schedule, instead of network security, balanced execution or enabling execution on consumer hardware. 30 | 9. Paper claims that gas schedule from the yellow paper does not provide a basis for it, but the basis was there (maybe it was not cited in YP) - see the old spreadsheet 31 | 10. References to follow: 32 | - (done irrelevant) GASPER again, as in "Broken Metre" 33 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/other.md: -------------------------------------------------------------------------------- 1 | ## Other resources which might be useful 2 | 3 | 1. Ethereum yellow paper - worth a quick scan, holds an (up-to-date?) list of opcodes in EVM with descriptions and details 4 | 2. https://ethereum.org/en/developers/docs/evm/ - just basics to ramp up with 5 | 3. https://www.ethervm.io/ - reference for EVM opcodes + Decompiler 6 | 4. https://medium.com/swlh/getting-deep-into-evm-how-ethereum-works-backstage-ab6ad9c0d0bf - nice intro for a broader perspective how EVM works 7 | 5. https://docs.google.com/spreadsheets/d/1m89CVujrQe5LAFJ8-YAUCcNK950dUzMQPMJBxRtGCqs/edit#gid=0 - "1.0 gas costs" spreadsheet - ilustrates the original calculations to get gas costs. Seems to work like this: 8 | - calculate how much effort can a single block be (e.g. take x time max, leave y memory footprint, take z storage for block history etc.) - see cells L5-L10. Call this _block effort_limit_ 9 | - set an arbitrary _block gas limit_ 10 | - express the cost of a unit this effort (e.g. a microsecond of computation) in gas using the block gas limit and block effort limit 11 | - measure footprint of each opcode (how?) in these dimensions - see columns B-G 12 | - express this footprint in gas - see column H 13 | 6. https://github.com/wolflo/evm-opcodes - source for `www.ethervm.io`, but with a nice compilation of gas cost formulas from the yellow paper. Not exactly sure about up-to-dateness, e.g. it mentions 700 for STATICCALL, while other sources 40 (after EIP-2046) 14 | 7. https://dave.cheney.net/high-performance-go-workshop/dotgo-paris.html - some materials on go profiling and benchmarks. Not immediately useful but: 15 | - tips on profiling with garbage collector 16 | - compiler optimization traps 17 | - avoiding appending (**TODO** ensure we don't) 18 | 8. https://eips.ethereum.org/EIPS/eip-150 - new gas costs there calculated along the lines of the original model - good to cite for motivations of gas research. Only IO intensive 19 | 9. https://eips.ethereum.org/EIPS/eip-1884 - good to cite for motivations of gas research. Measurements were done on chain history, via ms/MGas metric. Only IO intensive 20 | - **TODO** - gather and write down rationale towards focusing on non-IO operations 21 | 22 | ## Other resources scanned, which aren't relevant to us 23 | 24 | 1. http://bergel.eu/MyPapers/Soto20a-FuzzingSolidity.pdf - "Fuzzing to Estimate Gas Costs of Ethereum Contracts" - irrelevant; is about comparing Solidity static gas cost estimation and estimation using fuzzing testing. 25 | 1. The `evmjit` story (which I stumbled upon [here](https://ethresear.ch/t/evm-performance/2791))- the idea seemed to be to replace patterns of operations with "bulk" operations, e.g. a bunch of static PUSH instructions before a `CALL` to become a single meta-instruction. _It would be of great significance_ to our results, but seems to be discontinued (in `geth` codebase there's no occurrences, similar for `OpenEthereum`, [this is not active](https://github.com/ethereum/evmjit) and then [this](https://github.com/ethereum/go-ethereum/issues/2365#issuecomment-275493369)). 26 | 1. ethresear.ch - search for `evm cpu`, `evm gas`, check tags: https://ethresear.ch/c/evm-ewasm/26, https://ethereum-magicians.org/tag/evm, https://ethereum-magicians.org/tag/evm-evolution, https://ethereum-magicians.org/tag/opcodes 27 | - nothing relevant in here: 28 | - https://ethresear.ch/t/running-deep-learning-on-evm/899 29 | - https://ethresear.ch/t/evm-performance/2791/18 (but interesting read about EVM vs ewasm in general 30 | - https://ethresear.ch/t/dynamic-gas-costs/4375/4 (dynamic opcode pricing via consensus) 31 | - https://ethresear.ch/t/verifiable-precompiled-contracts/7242 32 | - https://ethresear.ch/t/evm-idea-add-access-to-overflow-carry-sign-and-zero-flags-to-reduce-gas-use/782/5 33 | - https://ethresear.ch/t/eth2-authenticated-data-structures-and-gas-costs/6487 34 | - https://ethresear.ch/t/client-side-solidity-evm/4605/5 35 | - https://github.com/pirapira/awesome-ethereum-virtual-machine - great list of resources, but nothing immediately useful. Revisit 36 | - https://ethereum-magicians.org/t/eip-1109-remove-call-costs-for-precompiled-contracts/447/14 37 | - https://ethereum-magicians.org/t/eip-1884-repricing-for-trie-size-dependent-opcodes/3024/38 38 | 1. https://www.codeproject.com/Articles/8672/Virtual-Machine-Opcode-Resolution-Performance-Test - "Virtual Machine Opcode Resolution, Performance Tests" 39 | 1. http://mural.maynoothuniversity.ie/6432/1/JP-Relating-Static.pdf - "Relating Staticand Dynamic Measurements for the Java Virtual Machine Instruction Set" 40 | 1. https://www.researchgate.net/publication/3929823_Measurement_and_Analysis_of_Runtime_Profiling_Data_for_Java_Programs - "Measurement and Analysis of Runtime Profiling Data for Java Programs" 41 | 1. https://stackoverflow.com/questions/37740081/bytecode-instruction-cost - "Bytecode instruction cost" - SO thread for Python, nothing useful 42 | 1. https://www.aminer.org/pub/53e9b6cab7602d97042540cd/a-portable-research-framework-for-the-execution-of-java-bytecode - http://www.sable.mcgill.ca/publications/thesis/phd-gagnon/sable-thesis-2002-phd-gagnon.pdf - "A portable research framework for the execution of java bytecode" 43 | 1. https://www.researchgate.net/publication/2649955_The_Jalapeno_Dynamic_Optimizing_Compiler_for_Java - "The Jalapeño Dynamic Optimizing Compiler for Java" 44 | 1. https://www.researchgate.net/publication/2569394_Characterizing_Computer_Systems%27_Workloads - "Characterizing Computer Systems' Workloads" 45 | 1. 46 | 47 | ## search queries 48 | 49 | 1. ftp://ftp.cs.wisc.edu/paradyn/technical_papers/paradynJ.pdf - "Performance Measurement of Dynamically Compiled Java Executions" 50 | 51 | "virtual machine instruction measurement" and variations using: "java" / "clr cil" / "comparison" / "benchmark", 52 | 53 | measure bytecode instructions performance -"platform independent timing of java" 54 | 55 | time vs instruction count correlation 56 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/performance_benchmarking.md: -------------------------------------------------------------------------------- 1 | ## Performance Benchmarking of Smart Contracts to Assess Miner Incentives in Ethereum 2 | 3 | Amjad Aldweesh, Maher Alharby, Ellis Solaiman, Aad van Moorsel 4 | 5 | https://www.researchgate.net/publication/328908738_Performance_Benchmarking_of_Smart_Contracts_to_Assess_Miner_Incentives_in_Ethereum 6 | 7 | Paper focuses on finding real contracts with highest overall Gas/CPU ratio. 8 | 9 | ### Notes 10 | 11 | 1. Motivation to cite: " More-over, if certain smart contracts are known not to be attractive,transactions using that smart contract would not be executedby miners" - alignment of gas costs impacts dependability of miner work 12 | 2. Intention similar to `gas-cost-estimator`: "We envisage that such abenchmark could be run periodically, on a variety of softwareand hardware platforms, to demonstrate to the community ifand how well costs and benefits are aligned within Ethereum" 13 | 3. Not sure why contract creation is investigted in the context of CPU time. It isn't surprising, that CPU/gas is 6x compared to execution 14 | 4. Environment: PyEthApp on a MacBook 15 | 5. It can be argued, that it would be hard to "prefer" high-yielding contracts at the expense of low-throughput contracts, b/c it's hard to predict accurately, which contracts are called by a tx. 16 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/timing_of_jvm_instructions.md: -------------------------------------------------------------------------------- 1 | ## Platform Independent Timing of Java Virtual Machine Bytecode Instructions 2 | 3 | Jonathan M. Lambert James F.Power 4 | 5 | http://mural.maynoothuniversity.ie/6382/2/JP-Platform.pdf 6 | 7 | ### Notes 8 | 9 | 1. white-box ((done) follow citations [11,6,7], [25]), where JVM source code is available vs black-box (statistically, on a entire program level). This paper is black-box: "to what extent can we reliably predict theexecution timings for JVM bytecode instructions at this kind of platform-independentlevel?" 10 | - they do white-box for calibration/validation using RDTSC 11 | - calibration is linear regression of their method vs RDTSC. 2 outliers (not accounted for). Their method under-predicts by 23%, but what if one takes out the 2 outliers, which are over-prdicted? 12 | 2. Problems with white-box sound JVM specific: "Java bytecode instructions execute within nanoseconds. Attempting to measurethese instructions with a high degree of precision using standard Java library tim-ing methods such asSystem.currentTimeMillis or System.nanoTimeresults in thequantisation errors masking their true execution times.". 13 | - do we have nanosecond accuracy In rust/c? **TODO** (done for golang: https://github.com/imapp-pl/gas-cost-estimator/issues/14) 14 | - there is `System.nanoTime` but "System.nanoTimecannot guarantee nanosecond accuracy" 15 | 3. **TODO** (optional) follow citations [18, 4, 9] in case low-resolution timing handling is required, or at least the paragraph that summarizes them 16 | 4. (can't find) follow [13] and follow [26] "present a technique for the measure-ment of bytecode execution times" 17 | 5. (done) follow [20] "production of aninstruction timing model to model CPU performance measurements" 18 | 6. They estimate timer overhead by 2 consecutive calls - same thing on our list 19 | 7. In JVM you have an instuction to invoke `System.currentTimeMillis`. See Code segment 1 20 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/papers/vm_matters.md: -------------------------------------------------------------------------------- 1 | ## VM Matters: A Comparison of WASM VMs and EVMs in the Performance of Blockchain Smart Contracts 2 | 3 | Shuyu Zheng, Haoyu Wang, Lei Wu, Gang Huang, Xuanzhe Liu 4 | 5 | https://arxiv.org/pdf/2012.01032.pdf 6 | 7 | ### Notes 8 | 9 | 1. "conducts the first measurement study, to measure the performance on WASM VM and EVM for executing smart contracts on blockchain" 10 | 2. "To our surprise, the cur-rent WASM VM does not perform in expected performance. Theoverhead introduced by WASM is really non-trivial. Our resultshighlight the challenges when deploying WASM in practice, andprovide insightful implications for improvement space." 11 | 3. This paper includes comparison of EVM implementations, but does so on the highlevel to seek performance gaps of running smart contracts. We focus to find differences in patterns of relative computational costs that set implementations apart. "RQ2 A Comparison of EVM Engines.As there are several clientsthat support the execution of EVM bytecode, we are wonder-ingare there any performance gaps of running smart contractsamong them?" 12 | 4. This paper indicates the importance of 256bit/64bit versions of benchmarks for Ewasm. Not entirely sure what this means here, but this might be another dimension of variability for Ewasm 13 | -------------------------------------------------------------------------------- /legacy/gas-estimator/docs/notes/program_generator/notes.md: -------------------------------------------------------------------------------- 1 | ## Program generator 2 | 3 | 1. **TODO** clarify `0xfe` `INVALID` - `0xfe` is just an example invalid opcode, among many other bytes that are invalid. How to measure that? 4 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/analysis/README.md: -------------------------------------------------------------------------------- 1 | ## Analysis of the results 2 | 3 | Use Rstudio to run the notebooks. 4 | 5 | You can also view the preview HTMLs using these links: 6 | 7 | - [`exploration`](https://htmlpreview.github.io/?https://github.com/imapp-pl/gas-cost-estimator/blob/master/src/analysis/exploration.nb.html) 8 | - [`exploration timers`](https://htmlpreview.github.io/?https://github.com/imapp-pl/gas-cost-estimator/blob/master/src/analysis/exploration_timers.nb.html) 9 | 10 | (Construct them by prepending `https://htmlpreview.github.io/?` to the full github URL pointing to the HTML file) 11 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/analysis/exploration_timers.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook: exploration of various timers" 3 | output: html_notebook 4 | --- 5 | 6 | Read in the output of `go run ./src/instrumentation_measurement/clock_resolution_go/main.go` (and other routines for overhead measurements): 7 | 8 | ```{r fig.width=20} 9 | setwd("~/sources/imapp/gas-cost-estimator/src") 10 | time_geth = read.csv("../../local/time_geth.csv") 11 | time_evmone = read.csv("../../local/time_evmone.csv", header = FALSE) 12 | time_openethereum = read.csv("../../local/time_openethereum.csv", header = FALSE) 13 | time_all = time_geth[1:200000, ] 14 | time_all$time_evmone = time_evmone[, 1] 15 | time_all$time_openethereum = time_openethereum[, 1] 16 | N = 200000 17 | time = head(time_all, N) 18 | head(time) 19 | ``` 20 | The temporal dynamics of all timers must be accounted for. They all seem to warm up for a long time. 21 | 22 | **NOTE** `purple` line (gotsc) is not in ns but in CPU cycles 23 | 24 | ```{r fig.width=20} 25 | plot(NULL, xlim=c(1, N), ylim=c(0, 3000)) 26 | 27 | geth_color = rgb(0.1,0.1,0.7,0.5) 28 | evmone_color = rgb(0.8,0.1,0.3,0.6) 29 | openethereum_color = rgb(0.1,0.7,0.1,0.5) 30 | 31 | lines(time$clock_gettime, type = "l", col = "red") 32 | lines(time$time, type = "l", col = "blue") 33 | lines(time$runtime_nano, type = "l", col = geth_color) 34 | lines(time$gotsc, type = "l", col = "purple") 35 | lines(time$time_evmone, type = "l", col = evmone_color) 36 | lines(time$time_openethereum, type = "l", col = openethereum_color) 37 | ``` 38 | 39 | ```{r fig.width=20, fig.height=10} 40 | plot(NULL, xlim=c(1, N), ylim=c(0, 100)) 41 | 42 | ma <- function(x, n = 50){stats::filter(x, rep(1 / n, n), sides = 2)} 43 | lines(ma(time$runtime_nano), type = "l", col = geth_color) 44 | ``` 45 | 46 | ```{r fig.width=20} 47 | plot(NULL, xlim=c(1, N), ylim=c(1, 3.5)) 48 | par(ylog=TRUE) 49 | lines(time$clock_gettime, type = "l", col = "red") 50 | lines(time$time, type = "l", col = "blue") 51 | lines(time$runtime_nano, type = "l", col = geth_color) 52 | lines(time$gotsc, type = "l", col = "purple") 53 | lines(time$time_evmone, type = "l", col = evmone_color) 54 | lines(time$time_openethereum, type = "l", col = openethereum_color) 55 | ``` 56 | A closer look at the same, only relevant wallclock measurements: 57 | 58 | ```{r fig.width=20} 59 | plot(NULL, xlim=c(1, 500), ylim=c(15, 25.5)) 60 | lines(time$runtime_nano, type = "l", col = geth_color) 61 | lines(time$time_evmone, type = "l", col = evmone_color) 62 | lines(time$time_openethereum, type = "l", col = openethereum_color) 63 | ``` 64 | And over the entire period, smoothed out: 65 | 66 | ```{r fig.width=20} 67 | min = 16 68 | max = 70 69 | # moving average; from https://stackoverflow.com/questions/743812/calculating-moving-average 70 | ma <- function(x, n = 1000){stats::filter(x, rep(1 / n, n), sides = 2)} 71 | 72 | plot(NULL, xlim=c(1, N), ylim=c((min), (max))) 73 | lines(ma(time$runtime_nano), col = geth_color) 74 | lines(ma(time$time_evmone), col = evmone_color) 75 | lines(ma(time$time_openethereum), col = openethereum_color) 76 | ``` 77 | 78 | It seems `runtimeNano` is the most accurate and stable one. We could perhaps subtract the `Min.` of this from all the measurements 79 | 80 | ```{r fig.width=20} 81 | summary(time) 82 | ``` 83 | 84 | ```{r fig.width=20} 85 | boxplot(time) 86 | ``` 87 | 88 | Explore the effect of the overhead increasing for all timers. We're trimming down the data frame to observe correlations sensibly: 89 | ```{r fig.width=20} 90 | time_sample = time[sample(nrow(time), 100), ] 91 | var(time_sample) 92 | cor(time_sample) 93 | 94 | # cleanup 95 | rm(time_sample) 96 | ``` 97 | Deeper analysis of the two best clocks: `runtimeNano` and `gotsc`, plus the `evmone` and `openethereum` wall clocks: 98 | 99 | ```{r fig.width=20} 100 | par(mfrow=c(4,1)) 101 | frequencies = sort(table(time$runtime_nano), decreasing=TRUE) 102 | # take all frequencies minus the most outlying ones 103 | n = length(frequencies)/2 104 | plot(frequencies[1:n], col=geth_color) 105 | frequencies = sort(table(time$gotsc), decreasing=TRUE) 106 | n = length(frequencies)/2 107 | plot(frequencies[1:n], col="purple") 108 | frequencies = sort(table(time$time_evmone), decreasing=TRUE) 109 | n = length(frequencies)/2 110 | plot(frequencies[1:n], col=evmone_color) 111 | frequencies = sort(table(time$time_openethereum), decreasing=TRUE) 112 | n = length(frequencies)/2 113 | plot(frequencies[1:n], col=openethereum_color) 114 | 115 | # cleanup 116 | rm(frequencies) 117 | ``` 118 | 119 | ```{r fig.width=20} 120 | quantile(time$runtime_nano, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999)) 121 | quantile(time$gotsc, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999)) 122 | quantile(time$time_evmone, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999)) 123 | quantile(time$time_openethereum, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999)) 124 | ``` 125 | 126 | Summary: 127 | 128 | 1. ~We should discard about 5000 first observations~ EDIT: with our current clocks, and fresh measurements from @magdasta, there doesn't seem to be a need. This was machine specific most likely. 129 | 2. It is probably a good idea to monitor and register the timer overhead during the opcode measurements (**TODO**). 130 | 2. Due to periods of increased overhead, sometimes the measurements might be over-timed for several consecutive measurements. Should we discard all measurements where "just time" measurement is above a threshold? (**TODO**) 131 | 3. `runtimeNano` is clearly the winner, but it still has high values quite often, and is subject to large overhead during warm-up and during the "temporary increase periods" 132 | - **UPDATE** - it is a winner in wall-clock category, but probably CPU cycles using `gotsc` (based on TSC and in-sync with what `evmone` measurements use - RDTSC) is even better 133 | 4. We can also subtract the minimum (or mean/median) observed timer overhead of `runtimeNano` from all the measurements 134 | - **UPDATE** - if we go for CPU cycles it's 35. Interestingly though, the `gotsc` library tells us "TSC Overhead: 31" 135 | 5. Next step would be to consider subtracting more, considering it's a justified move (**TODO**) 136 | 6. Alternatively, we could do more in-depth analysis of the behaviors observed and try to normalize the timer readings more (**TODO** optional) 137 | 138 | **TODO** also another timer, another one tried by `chfast` for evmone https://godoc.org/github.com/lanl/go-papi 139 | **TODO** explore C++ and Rust timers similarly (we have `runtimeNano` counterparts, do others) 140 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/check_clocksource.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -xe 3 | 4 | if [ `cat /sys/devices/system/clocksource/clocksource0/current_clocksource` != 'tsc' ]; then 5 | echo "clocksource should be tsc, found:" 6 | cat /sys/devices/system/clocksource/clocksource0/current_clocksource 7 | echo "see docker_timer.md somewhere in the docses" 8 | exit 1 9 | fi 10 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/instrumentation_measurement/README.md: -------------------------------------------------------------------------------- 1 | # Running with program generator 2 | 3 | From `instrumentation_measurement` directory: 4 | 5 | ``` 6 | python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode all --sampleSize=50 --nSamples=3 > ../../geth.csv 7 | ``` 8 | 9 | By default programs are executed in geth. To change EVM specify `--evm` parameter: 10 | 11 | ``` 12 | python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode all --sampleSize=50 --nSamples=3 --evm evmone > ../../evmone.csv 13 | ``` 14 | 15 | ### Running measurements via `docker` 16 | 17 | From the repo root. 18 | 19 | Build (pick tag name as desired): 20 | 21 | ``` 22 | sudo docker build -t measurements-geth -f Dockerfile.geth . 23 | ``` 24 | 25 | Run: 26 | 27 | ``` 28 | sudo docker run --rm --privileged --security-opt seccomp:unconfined \ 29 | -it measurements-geth \ 30 | sh -c "cd src && python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode all --sampleSize=5 --nSamples=1" 31 | ``` 32 | 33 | For other EVMs use respective `Dockerfile`s and use the `--evm` flag on the `measure` command, e.g. `measure --evm openethereum` 34 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/instrumentation_measurement/clock_resolution_go/main.go: -------------------------------------------------------------------------------- 1 | // from https://stackoverflow.com/questions/14610459/how-precise-is-gos-time-really 2 | 3 | // run this to compare the interval measurement with least overhead 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "github.com/dterei/gotsc" 10 | "golang.org/x/sys/unix" 11 | "os" 12 | "time" 13 | ) 14 | 15 | import _ "unsafe" 16 | 17 | // runtimeNano returns the current value of the runtime clock in nanoseconds. 18 | //go:linkname runtimeNano runtime.nanotime 19 | func runtimeNano() int64 20 | 21 | func main() { 22 | res := unix.Timespec{} 23 | unix.ClockGetres(unix.CLOCK_MONOTONIC, &res) 24 | fmt.Fprintf(os.Stderr, "Monotonic clock resolution is %d nanoseconds\n", res.Nsec) 25 | 26 | tsc := gotsc.TSCOverhead() 27 | fmt.Fprintf(os.Stderr, "TSC Overhead: %d\n", tsc) 28 | 29 | const N = 2000000 30 | res1 := unix.Timespec{} 31 | res2 := unix.Timespec{} 32 | sinceClockGettime := int64(0) 33 | time1 := time.Time{} 34 | time2 := time.Time{} 35 | sinceTime := time.Duration(0) 36 | runtimeNano1 := int64(0) 37 | runtimeNano2 := int64(0) 38 | sinceRuntimeNano := int64(0) 39 | gotsc1 := uint64(0) 40 | gotsc2 := uint64(0) 41 | sinceGotsc := uint64(0) 42 | 43 | fmt.Println("clock_gettime,time,runtime_nano,gotsc") 44 | 45 | for i := 1; i < N; i++ { 46 | unix.ClockGettime(unix.CLOCK_MONOTONIC, &res1) 47 | unix.ClockGettime(unix.CLOCK_MONOTONIC, &res2) 48 | sinceClockGettime = res2.Nsec - res1.Nsec 49 | time1 = time.Now() 50 | time2 = time.Now() 51 | sinceTime = time2.Sub(time1) 52 | runtimeNano1 = runtimeNano() 53 | runtimeNano2 = runtimeNano() 54 | sinceRuntimeNano = runtimeNano2 - runtimeNano1 55 | 56 | gotsc1 = gotsc.BenchStart() 57 | gotsc2 = gotsc.BenchEnd() 58 | sinceGotsc = gotsc2 - gotsc1 59 | fmt.Printf("%d,%d,%d,%d\n", sinceClockGettime, sinceTime, sinceRuntimeNano, sinceGotsc) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/instrumentation_measurement/geth/README.md: -------------------------------------------------------------------------------- 1 | ### `geth` instrumentation 2 | 3 | See [here](/docs/notes/instrumentation_measurement/geth.md) for description and notes. 4 | 5 | ### Usage 6 | 7 | 0. Need to use `go-ethereum` with moved `CaptureState` in `github.com/ethereum/go-ethereum/core/vm/interpreter.go`, `CaptureState` must be after `execute` 8 | 1. `GOGC=off go run main.go --bytecode 62FFFFFF60002062FFFFFF600020` 9 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/instrumentation_measurement/geth/instrumenter/instrumenter.go: -------------------------------------------------------------------------------- 1 | // based on `StructLogger` from `github.com/ethereum/go-ethereum/core/vm/logger.go:123` 2 | 3 | package instrumenter 4 | 5 | import ( 6 | "fmt" 7 | "io" 8 | "math/big" 9 | "time" 10 | 11 | "github.com/ethereum/go-ethereum/common" 12 | "github.com/ethereum/go-ethereum/core/vm" 13 | ) 14 | 15 | type LogConfig struct { 16 | } 17 | 18 | //go:generate gencodec -type InstrumenterLog -field-override structLogMarshaling -out gen_structlog.go 19 | 20 | // InstrumenterLog is emitted to the vm.EVM each cycle and lists information about the current internal state 21 | // prior to the execution of the statement. 22 | type InstrumenterLog struct { 23 | Pc uint64 `json:"pc"` 24 | Op vm.OpCode `json:"op"` 25 | TimeNs int64 `json:"timeNs"` 26 | TimerTimeNs int64 `json:"timerTimeNs"` 27 | } 28 | 29 | // InstrumenterLogger is an vm.EVM state logger and implements Tracer. 30 | type InstrumenterLogger struct { 31 | cfg LogConfig 32 | 33 | logs []InstrumenterLog 34 | startTime int64 35 | 36 | // worker fields, just to avoid reallocation of local vars 37 | opCodeDuration int64 38 | timerDuration int64 39 | log InstrumenterLog 40 | } 41 | 42 | // NewInstrumenterLogger returns a new logger 43 | func NewInstrumenterLogger(cfg *LogConfig) *InstrumenterLogger { 44 | logger := &InstrumenterLogger{} 45 | if cfg != nil { 46 | logger.cfg = *cfg 47 | } 48 | return logger 49 | } 50 | 51 | // CaptureStart implements the Tracer interface to initialize the tracing operation. 52 | func (l *InstrumenterLogger) CaptureStart(from common.Address, to common.Address, create bool, input []byte, gas uint64, value *big.Int) error { 53 | l.startTime = runtimeNano() 54 | return nil 55 | } 56 | 57 | // CaptureState logs a new structured log message and pushes it out to the environment 58 | func (l *InstrumenterLogger) CaptureState(env *vm.EVM, pc uint64, op vm.OpCode, gas, cost uint64, memory *vm.Memory, stack *vm.Stack, rStack *vm.ReturnStack, rData []byte, contract *vm.Contract, depth int, err error) error { 59 | // measure the current iteration (we'll deduct startTime below) 60 | l.opCodeDuration = runtimeNano() 61 | 62 | // measure the most current timer overhead, take a new measurement and later deduct the 63 | // previous timer reading 64 | l.timerDuration = runtimeNano() 65 | l.timerDuration -= l.opCodeDuration 66 | l.opCodeDuration -= l.startTime 67 | 68 | // add to log 69 | l.log = InstrumenterLog{pc, op, l.opCodeDuration, l.timerDuration} 70 | l.logs = append(l.logs, l.log) 71 | 72 | // start timing the next iteration 73 | l.startTime = runtimeNano() 74 | return nil 75 | } 76 | 77 | // CaptureFault implements the Tracer interface to trace an execution fault 78 | // while running an opcode. 79 | func (l *InstrumenterLogger) CaptureFault(env *vm.EVM, pc uint64, op vm.OpCode, gas, cost uint64, memory *vm.Memory, stack *vm.Stack, rStack *vm.ReturnStack, contract *vm.Contract, depth int, err error) error { 80 | return nil 81 | } 82 | 83 | // CaptureEnd is called after the call finishes to finalize the tracing. 84 | func (l *InstrumenterLogger) CaptureEnd(output []byte, gasUsed uint64, t time.Duration, err error) error { 85 | return nil 86 | } 87 | 88 | // InstrumenterLogs returns the captured log entries. 89 | func (l *InstrumenterLogger) InstrumenterLogs() []InstrumenterLog { return l.logs } 90 | 91 | // WriteTrace writes a formatted trace to the given writer 92 | func WriteTrace(writer io.Writer, logs []InstrumenterLog) { 93 | for _, log := range logs { 94 | fmt.Fprintf(writer, "%-16spc=%08d time_ns=%v timer_time_ns=%v", log.Op, log.Pc, log.TimeNs, log.TimerTimeNs) 95 | fmt.Fprintln(writer) 96 | } 97 | } 98 | 99 | func WriteCSVTrace(writer io.Writer, logs []InstrumenterLog, runId int) { 100 | // CSV header must be in sync with these fields here :(, but it's in measurements.py 101 | for instructionId, log := range logs { 102 | fmt.Fprintf(writer, "%v,%v,%v,%v", runId, instructionId, log.TimeNs, log.TimerTimeNs) 103 | fmt.Fprintln(writer) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/instrumentation_measurement/geth/instrumenter/time.go: -------------------------------------------------------------------------------- 1 | package instrumenter 2 | 3 | // this portion ensures that we have access to the least-overhead timer 4 | 5 | import _ "unsafe" 6 | 7 | // runtimeNano returns the current value of the runtime clock in nanoseconds. 8 | //go:linkname runtimeNano runtime.nanotime 9 | func runtimeNano() int64 10 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/instrumentation_measurement/geth/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "math" 7 | "math/big" 8 | "os" 9 | go_runtime "runtime" 10 | "time" 11 | 12 | _ "unsafe" 13 | 14 | "github.com/ethereum/go-ethereum/common" 15 | "github.com/ethereum/go-ethereum/core/rawdb" 16 | "github.com/ethereum/go-ethereum/core/state" 17 | "github.com/ethereum/go-ethereum/core/vm" 18 | "github.com/ethereum/go-ethereum/core/vm/runtime" 19 | "github.com/ethereum/go-ethereum/crypto" 20 | "github.com/ethereum/go-ethereum/params" 21 | ) 22 | 23 | func main() { 24 | 25 | bytecodePtr := flag.String("bytecode", "", "EVM bytecode to execute and measure") 26 | sampleSizePtr := flag.Int("sampleSize", 1, "Size of the sample - number of measured repetitions of execution") 27 | printEachPtr := flag.Bool("printEach", true, "If false, printing of each execution time is skipped") 28 | printCSVPtr := flag.Bool("printCSV", false, "If true, will print a CSV with standard results to STDOUT") 29 | modePtr := flag.String("mode", "all", "Measurement mode. Available options: all") 30 | 31 | flag.Parse() 32 | 33 | bytecode := common.Hex2Bytes(*bytecodePtr) 34 | sampleSize := *sampleSizePtr 35 | printEach := *printEachPtr 36 | printCSV := *printCSVPtr 37 | mode := *modePtr 38 | 39 | if mode != "all" && mode != "total" { 40 | fmt.Fprintln(os.Stderr, "Invalid measurement mode: ", mode) 41 | os.Exit(1) 42 | } 43 | 44 | cfg := new(runtime.Config) 45 | setDefaults(cfg) 46 | // from `github.com/ethereum/go-ethereum/core/vm/runtime/runtime.go:109` 47 | cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) 48 | 49 | // Warm-up. **NOTE** we're keeping tracing on during warm-up, otherwise measurements are off 50 | cfg.EVMConfig.Debug = false 51 | cfg.EVMConfig.Instrumenter = vm.NewInstrumenterLogger() 52 | retWarmUp, _, errWarmUp := runtime.Execute(bytecode, nil, cfg) 53 | // End warm-up 54 | 55 | sampleStart := time.Now() 56 | for i := 0; i < sampleSize; i++ { 57 | if mode == "all" { 58 | MeasureAll(cfg, bytecode, printEach, printCSV, i) 59 | } else { 60 | MeasureTotal(cfg, bytecode, printEach, printCSV, i) 61 | } 62 | } 63 | 64 | sampleDuration := time.Since(sampleStart) 65 | 66 | if errWarmUp != nil { 67 | fmt.Fprintln(os.Stderr, errWarmUp) 68 | } 69 | fmt.Fprintln(os.Stderr, "Program: ", *bytecodePtr) 70 | fmt.Fprintln(os.Stderr, "Return:", retWarmUp) 71 | fmt.Fprintln(os.Stderr, "Sample duration:", sampleDuration) 72 | 73 | } 74 | 75 | func MeasureTotal(cfg *runtime.Config, bytecode []byte, printEach bool, printCSV bool, sampleId int) { 76 | cfg.EVMConfig.Instrumenter = vm.NewInstrumenterLogger() 77 | go_runtime.GC() 78 | 79 | cfg.EVMConfig.Instrumenter.StartTime = runtimeNano() 80 | _, _, err := runtime.Execute(bytecode, nil, cfg) 81 | 82 | // Measure runtime 83 | cfg.EVMConfig.Instrumenter.TotalExecutionDuration = runtimeNano() 84 | cfg.EVMConfig.Instrumenter.TimerDuration = runtimeNano() 85 | cfg.EVMConfig.Instrumenter.TimerDuration -= cfg.EVMConfig.Instrumenter.TotalExecutionDuration 86 | cfg.EVMConfig.Instrumenter.TotalExecutionDuration -= cfg.EVMConfig.Instrumenter.StartTime 87 | 88 | if err != nil { 89 | fmt.Fprintln(os.Stderr, err) 90 | } 91 | 92 | if printCSV { 93 | vm.WriteCSVInstrumentationTotal(os.Stdout, cfg.EVMConfig.Instrumenter, sampleId) 94 | } 95 | } 96 | 97 | func MeasureAll(cfg *runtime.Config, bytecode []byte, printEach bool, printCSV bool, sampleId int) { 98 | cfg.EVMConfig.Instrumenter = vm.NewInstrumenterLogger() 99 | go_runtime.GC() 100 | start := time.Now() 101 | _, _, err := runtime.Execute(bytecode, nil, cfg) 102 | duration := time.Since(start) 103 | 104 | if err != nil { 105 | fmt.Fprintln(os.Stderr, err) 106 | } 107 | if printEach { 108 | fmt.Fprintln(os.Stderr, "Run duration:", duration) 109 | 110 | instrumenterLogs := cfg.EVMConfig.Instrumenter.Logs 111 | vm.WriteInstrumentation(os.Stderr, instrumenterLogs) 112 | } 113 | 114 | if printCSV { 115 | instrumenterLogs := cfg.EVMConfig.Instrumenter.Logs 116 | vm.WriteCSVInstrumentationAll(os.Stdout, instrumenterLogs, sampleId) 117 | } 118 | } 119 | 120 | // copied directly from github.com/ethereum/go-ethereum/core/vm/runtime/runtime.go 121 | // so that we skip this in measured code 122 | func setDefaults(cfg *runtime.Config) { 123 | if cfg.ChainConfig == nil { 124 | cfg.ChainConfig = ¶ms.ChainConfig{ 125 | ChainID: big.NewInt(1), 126 | HomesteadBlock: new(big.Int), 127 | DAOForkBlock: new(big.Int), 128 | DAOForkSupport: false, 129 | EIP150Block: new(big.Int), 130 | EIP150Hash: common.Hash{}, 131 | EIP155Block: new(big.Int), 132 | EIP158Block: new(big.Int), 133 | ByzantiumBlock: new(big.Int), 134 | ConstantinopleBlock: new(big.Int), 135 | PetersburgBlock: new(big.Int), 136 | IstanbulBlock: new(big.Int), 137 | MuirGlacierBlock: new(big.Int), 138 | YoloV2Block: nil, 139 | } 140 | } 141 | 142 | if cfg.Difficulty == nil { 143 | cfg.Difficulty = new(big.Int) 144 | } 145 | if cfg.Time == nil { 146 | cfg.Time = big.NewInt(time.Now().Unix()) 147 | } 148 | if cfg.GasLimit == 0 { 149 | cfg.GasLimit = math.MaxUint64 150 | } 151 | if cfg.GasPrice == nil { 152 | cfg.GasPrice = new(big.Int) 153 | } 154 | if cfg.Value == nil { 155 | cfg.Value = new(big.Int) 156 | } 157 | if cfg.BlockNumber == nil { 158 | cfg.BlockNumber = new(big.Int) 159 | } 160 | if cfg.GetHashFn == nil { 161 | cfg.GetHashFn = func(n uint64) common.Hash { 162 | return common.BytesToHash(crypto.Keccak256([]byte(new(big.Int).SetUint64(n).String()))) 163 | } 164 | } 165 | } 166 | 167 | // runtimeNano returns the current value of the runtime clock in nanoseconds. 168 | //go:linkname runtimeNano runtime.nanotime 169 | func runtimeNano() int64 170 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/README.md: -------------------------------------------------------------------------------- 1 | ## Program generator 2 | 3 | ### Installation 4 | 5 | ``` 6 | virtualenv --python=python3 ~/.venv/gce 7 | source ~/.venv/gce/bin/activate 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ### Usage 12 | 13 | ``` 14 | python3 program_generator.py generate --help 15 | ``` 16 | 17 | #### Use together with `instrumenter.go` 18 | 19 | From `src` 20 | 21 | ``` 22 | export GOPATH= 23 | export GOGC=off 24 | export GO111MODULE=off 25 | python3 program_generator/program_generator.py generate | xargs -L1 go run ./instrumentation_measurement/geth/main.go --bytecode 26 | ``` 27 | 28 | #### (Ewasm) use together with `openethereum-evm` 29 | 30 | From `src` 31 | 32 | ``` 33 | # ensure `wabt` binaries are in PATH 34 | # ensure `parity-evm` binaries are in PATH 35 | python3 program_generator/program_generator.py generate --ewasm | xargs -L1 parity-evm --gas 5000 --chain ../../openethereum/ethcore/res/instant_seal.json --code 36 | ``` 37 | 38 | #### Use together with `measurements.py` 39 | 40 | From `src` 41 | 42 | (`go` exports as above) 43 | 44 | ``` 45 | python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --sampleSize=50 --nSamples=4 > ../../result_geth.csv 46 | ``` 47 | 48 | or similar. 49 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/constants.py: -------------------------------------------------------------------------------- 1 | 2 | # CONSTANTS 3 | 4 | EVM_PUSHES = """ 5 | 0x60 PUSH1 6 | 0x61 PUSH2 7 | 0x62 PUSH3 8 | 0x63 PUSH4 9 | 0x64 PUSH5 10 | 0x65 PUSH6 11 | 0x66 PUSH7 12 | 0x67 PUSH8 13 | 0x68 PUSH9 14 | 0x69 PUSH10 15 | 0x6a PUSH11 16 | 0x6b PUSH12 17 | 0x6c PUSH13 18 | 0x6d PUSH14 19 | 0x6e PUSH15 20 | 0x6f PUSH16 21 | 0x70 PUSH17 22 | 0x71 PUSH18 23 | 0x72 PUSH19 24 | 0x73 PUSH20 25 | 0x74 PUSH21 26 | 0x75 PUSH22 27 | 0x76 PUSH23 28 | 0x77 PUSH24 29 | 0x78 PUSH25 30 | 0x79 PUSH26 31 | 0x7a PUSH27 32 | 0x7b PUSH28 33 | 0x7c PUSH29 34 | 0x7d PUSH30 35 | 0x7e PUSH31 36 | 0x7f PUSH32 37 | """ 38 | EVM_DUPS = """ 39 | 0x80 DUP1 40 | 0x81 DUP2 41 | 0x82 DUP3 42 | 0x83 DUP4 43 | 0x84 DUP5 44 | 0x85 DUP6 45 | 0x86 DUP7 46 | 0x87 DUP8 47 | 0x88 DUP9 48 | 0x89 DUP10 49 | 0x8a DUP11 50 | 0x8b DUP12 51 | 0x8c DUP13 52 | 0x8d DUP14 53 | 0x8e DUP15 54 | 0x8f DUP16 55 | """ 56 | EVM_SWAPS = """ 57 | 0x90 SWAP1 58 | 0x91 SWAP2 59 | 0x92 SWAP3 60 | 0x93 SWAP4 61 | 0x94 SWAP5 62 | 0x95 SWAP6 63 | 0x96 SWAP7 64 | 0x97 SWAP8 65 | 0x98 SWAP9 66 | 0x99 SWAP10 67 | 0x9a SWAP11 68 | 0x9b SWAP12 69 | 0x9c SWAP13 70 | 0x9d SWAP14 71 | 0x9e SWAP15 72 | 0x9f SWAP16 73 | """ 74 | EVM_SOMETHING = '600050' 75 | EVM_SOMETHING_LENGTH = 2 76 | 77 | EWASM_PREAMBLE = """ 78 | (module 79 | (func (export "call") (local $x i32) 80 | """ 81 | EWASM_DROP = """ 82 | drop 83 | """ 84 | EWASM_CLOSING_PARENTHESIS = """ 85 | )) 86 | """ 87 | EWASM_SOMETHING = """ 88 | i32.const 1234 89 | drop 90 | """ 91 | EWASM_SOMETHING_LENGTH = 2 92 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/README.md: -------------------------------------------------------------------------------- 1 | `opcodes.csv` from https://github.com/djrtwo/evm-opcode-gas-costs/blob/master/opcode-gas-costs_EIP-150_revision-1e18248_2017-04-12.csv 2 | 3 | - UPDATE: RETURNDATASIZE and RETURNDATACOPY from the EIP 4 | - it's still missing PUSH/DUP/SWAP opcodes in standard format, so the script fills this in 5 | - UPDATE: REVERT from the EIP 6 | 7 | `selection.csv` from specs of "EVM Gas Cost Estimator.pdf" 8 | 9 | --- 10 | 11 | `opcodes_ewasm.csv` from specs of "EVM Gas Cost Estimator.pdf" with corrections (dropping a stray `f64` instruction). 12 | 13 | - stack requirements taken from [webassembly.github.io page](https://webassembly.github.io/spec/core/appendix/index-instructions.html) 14 | - parameters added 15 | 16 | `selection_ewasm_from_spec.csv` from specs of "EVM Gas Cost Estimator.pdf" 17 | 18 | `selection_ewasm.csv` taken from the above, limited to selection provided by `chfast`, excluding irrelevant flow control meta-instructions. 19 | 20 | `selection_ewasm_first_pass.csv` taken from the above, excluding memory instructions and `64` bit instructions for a working first draft program generation 21 | 22 | - UPDATE: `0xC0 i32.extend8_s`, `0xC1 i32.extend16_s` return `Error: EVM: Internal error: Error deserializing contract code (UnknownOpcode(192))` (and `193` resp.) from `openethereum` Ewasm, dropping them for first pass 23 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/opcodes.csv: -------------------------------------------------------------------------------- 1 | Value,Mnemonic,Gas Used,Subset,Removed from stack,Added to stack,Notes,Formula Notes 2 | 0x00,STOP,0,zero,0,0,Halts execution., 3 | 0x01,ADD,3,verylow,2,1,Addition operation, 4 | 0x02,MUL,5,low,2,1,Multiplication operation., 5 | 0x03,SUB,3,verylow,2,1,Subtraction operation., 6 | 0x04,DIV,5,low,2,1,Integer division operation., 7 | 0x05,SDIV,5,low,2,1,Signed integer division operation (truncated)., 8 | 0x06,MOD,5,low,2,1,Modulo remainder operation, 9 | 0x07,SMOD,5,low,2,1,Signed modulo remainder operation., 10 | 0x08,ADDMOD,8,mid,3,1,Modulo addition operation., 11 | 0x09,MULMOD,8,mid,3,1,Modulo multiplication operation., 12 | 0x0a,EXP,(exp == 0) ? 10 : (10 + 10 * (1 + log256(exp))),,2,1,Exponential operation.,"If exponent is 0, gas used is 10. If exponent is greater than 0, gas used is 10 plus 10 times a factor related to how large the log of the exponent is." 13 | 0x0b,SIGNEXTEND,5,low,2,1,Extend length of two’s complement signed integer., 14 | 0x10,LT,3,verylow,2,1,Less-than comparison., 15 | 0x11,GT,3,verylow,2,1,Greater-than comparison., 16 | 0x12,SLT,3,verylow,2,1,Signed less-than comparison., 17 | 0x13,SGT,3,verylow,2,1,Signed greater-than comparison., 18 | 0x14,EQ,3,verylow,2,1,Equality comparison., 19 | 0x15,ISZERO,3,verylow,1,1,Simple not operator., 20 | 0x16,AND,3,verylow,2,1,Bitwise AND operation., 21 | 0x17,OR,3,verylow,2,1,Bitwise OR operation, 22 | 0x18,XOR,3,verylow,2,1,Bitwise XOR operation., 23 | 0x19,NOT,3,verylow,1,1,Bitwise NOT operation., 24 | 0x1a,BYTE,3,verylow,2,1,Retrieve single byte from word, 25 | 0x20,SHA3,30 + 6 * (size of input in words),,2,1,Compute Keccak-256 hash.,30 is the paid for the operation plus 6 paid for each word (rounded up) for the input data. 26 | 0x30,ADDRESS,2,base,0,1,Get address of currently executing account., 27 | 0x31,BALANCE,400,,1,1,Get balance of the given account., 28 | 0x32,ORIGIN,2,base,0,1,Get execution origination address., 29 | 0x33,CALLER,2,base,0,1,Get caller address., 30 | 0x34,CALLVALUE,2,base,0,1,Get deposited value by the instruction/transaction responsible for this execution., 31 | 0x35,CALLDATALOAD,3,verylow,1,1,Get input data of current environment., 32 | 0x36,CALLDATASIZE,2,base,0,1,Get size of input data in current environment., 33 | 0x37,CALLDATACOPY,"2 + 3 * (number of words copied, rounded up)",,3,0,Copy input data in current environment to memory.,2 is paid for the operation plus 3 for each word copied (rounded up). 34 | 0x38,CODESIZE,2,base,0,1,Get size of code running in current environment., 35 | 0x39,CODECOPY,"2 + 3 * (number of words copied, rounded up)",,3,0,Copy code running in current environment to memory.,2 is paid for the operation plus 3 for each word copied (rounded up). 36 | 0x3a,GASPRICE,2,base,0,1,Get price of gas in current environment., 37 | 0x3b,EXTCODESIZE,700,extcode,1,1,Get size of an account’s code., 38 | 0x3c,EXTCODECOPY,"700 + 3 * (number of words copied, rounded up)",,4,0,Copy an account’s code to memory.,700 is paid for the operation plus 3 for each word copied (rounded up). 39 | 0x3d,RETURNDATASIZE,2,,0,1,Pushes the size of the return data buffer onto the stack, 40 | 0x3e,RETURNDATACOPY,"3 + 3 * ceil(amount / 32)",,3,0,This opcode has similar semantics to CALLDATACOPY, but instead of copying data from the call data, it copies data from the return data buffer, 41 | 0x40,BLOCKHASH,20,,1,1,Get the hash of one of the 256 most recent complete blocks., 42 | 0x41,COINBASE,2,base,0,1,Get the block’s beneficiary address., 43 | 0x42,TIMESTAMP,2,base,0,1,Get the block’s timestamp., 44 | 0x43,NUMBER,2,base,0,1,Get the block’s number., 45 | 0x44,DIFFICULTY,2,base,0,1,Get the block’s difficulty., 46 | 0x45,GASLIMIT,2,base,0,1,Get the block’s gas limit., 47 | 0x50,POP,2,base,1,0,Remove item from stack., 48 | 0x51,MLOAD,3,verylow,1,1,Load word from memory., 49 | 0x52,MSTORE,3,verylow,2,0,Save word to memory, 50 | 0x53,MSTORE8,3,verylow,2,0,Save byte to memory., 51 | 0x54,SLOAD,200,,1,1,Load word from storage, 52 | 0x55,SSTORE,((value != 0) && (storage_location == 0)) ? 20000 : 5000,,1,1,Save word to storage.,20000 is paid when storage value is set to non-zero from zero. 5000 is paid when the storage value's zeroness remains unchanged or is set to zero. 53 | 0x56,JUMP,8,mid,1,0,Alter the program counter, 54 | 0x57,JUMPI,10,high,2,0,Conditionally alter the program counter., 55 | 0x58,PC,2,base,0,1,Get the value of the program counter prior to the increment corresponding to this instruction., 56 | 0x59,MSIZE,2,base,0,1,Get the size of active memory in bytes., 57 | 0x5a,GAS,2,base,0,1,"Get the amount of available gas, including the corresponding reduction for the cost of this instruction.", 58 | 0x5b,JUMPDEST,1,,0,0,Mark a valid destination for jumps, 59 | 0x60 -- 0x7f,PUSH*,3,verylow,0,1,Place * byte item on stack. 0 < * <= 32, 60 | 0x80 -- 0x8f,DUP*,3,verylow,*,* + 1,Duplicate *th stack item. 0 < * <= 16, 61 | 0x90 -- 0x9f,SWAP*,3,verylow,* + 1,* + 1,Exchange 1st and (* + 1)th stack items., 62 | 0xa0,LOG0,375 + 8 * (number of bytes in log data),,2,0,Append log record with no topics.,375 is paid for operation plus 8 for each byte in data to be logged. 63 | 0xa1,LOG1,375 + 8 * (number of bytes in log data) + 375,,3,0,Append log record with one topic.,375 is paid for operation plus 8 for each byte in data to be logged plus 375 for the 1 topic to be logged. 64 | 0xa2,LOG2,375 + 8 * (number of bytes in log data) + 2 * 375,,4,0,Append log record with two topics.,375 is paid for operation plus 8 for each byte in data to be logged plus 2 * 375 for the 2 topics to be logged. 65 | 0xa3,LOG3,375 + 8 * (number of bytes in log data) + 3 * 375,,5,0,Append log record with three topics.,375 is paid for operation plus 8 for each byte in data to be logged plus 3 * 375 for the 3 topics to be logged. 66 | 0xa4,LOG4,375 + 8 * (number of bytes in log data) + 4 * 375,,6,0,Append log record with four topics.,375 is paid for operation plus 8 for each byte in data to be logged plus 4 * 375 for the 4 topics to be logged. 67 | 0xf0,CREATE,32000,,3,1,Create a new account with associated code., 68 | 0xf1,CALL,Complex -- see yellow paper Appendix H,,7,1,Message-call into an account., 69 | 0xf2,CALLCODE,Complex -- see yellow paper Appendix H,,7,1,Message-call into this account with an alternative account’s code., 70 | 0xf3,RETURN,0,zero,2,0,Halt execution returning output data., 71 | 0xf4,DELEGATECALL,Complex -- see yellow paper Appendix H,,6,1,"Message-call into this account with an alternative account’s code, but persisting the current values for sender and value.", 72 | 0xfd,REVERT,,,2,0,End execution, revert state changes, return data mem[p…(p+s)), 73 | 0xfe,INVALID,NA,,NA,NA,Designated invalid instruction., 74 | 0xff,SELFDESTRUCT,5000 + ((create_new_account) ? 25000 : 0),,1,0,Halt execution and register account for later deletion,5000 for the operation plus 25000 if a new account is also created. A refund of 24000 gas is also added to the refund counter for self-destructing the account. 75 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/opcodes_ewasm.csv: -------------------------------------------------------------------------------- 1 | Value,Mnemonic,Removed from stack,Added to stack,Notes,Formula Notes 2 | 0x00,unreachable,1,1,, 3 | 0x01,nop,0,0,, 4 | 0x02,block,1,1,, 5 | 0x03,loop,1,1,, 6 | 0x04,if,1,1,, 7 | 0x05,else,0,0,, 8 | 0x0B,end,0,0,, 9 | 0x0C,br,2,1,, 10 | 0x0D,br_if,2,1,, 11 | 0x0E,br_table,3,1,, 12 | 0x0F,return,2,1,, 13 | 0x10,call,1,1,, 14 | 0x11,call_indirect,2,1,, 15 | 0x1A,drop,1,0,, 16 | 0x1B,select,3,1,, 17 | 0x20,local.get $x,0,1,, 18 | 0x21,local.set $x,1,0,, 19 | 0x22,local.tee $x,1,1,, 20 | 0x23,global.get $x,0,1,, 21 | 0x24,global.set $x,1,0,, 22 | 0x28,i32.load,1,1,, 23 | 0x29,i64.load,1,1,, 24 | 0x2C,i32.load8_s,1,1,, 25 | 0x2D,i32.load8_u,1,1,, 26 | 0x2E,i32.load16_s,1,1,, 27 | 0x2F,i32.load16_u,1,1,, 28 | 0x30,i64.load8_s,1,1,, 29 | 0x31,i64.load8_u,1,1,, 30 | 0x32,i64.load16_s,1,1,, 31 | 0x33,i64.load16_u,1,1,, 32 | 0x34,i64.load32_s,1,1,, 33 | 0x35,i64.load32_u,1,1,, 34 | 0x36,i32.store,2,0,, 35 | 0x37,i64.store,2,0,, 36 | 0x3A,i32.store8,2,0,, 37 | 0x3B,i32.store16,2,0,, 38 | 0x3C,i64.store8,2,0,, 39 | 0x3D,64.store16,2,0,, 40 | 0x3E,i64.store32,2,0,, 41 | 0x3F,memory.size,0,1,, 42 | 0x40,memory.grow,1,1,, 43 | 0x41,i32.const 32,0,1,, 44 | 0x42,i64.const 32,0,1,, 45 | 0x45,i32.eqz,1,1,, 46 | 0x46,i32.eq,2,1,, 47 | 0x47,i32.ne,2,1,, 48 | 0x48,i32.lt_s,2,1,, 49 | 0x49,i32.lt_u,2,1,, 50 | 0x4A,i32.gt_s,2,1,, 51 | 0x4B,i32.gt_u,2,1,, 52 | 0x4C,i32.le_s,2,1,, 53 | 0x4D,i32.le_u,2,1,, 54 | 0x4E,i32.ge_s,2,1,, 55 | 0x4F,i32.ge_u,2,1,, 56 | 0x50,i64.eqz,1,1,, 57 | 0x51,i64.eq,2,1,, 58 | 0x52,i64.ne,2,1,, 59 | 0x53,i64.lt_s,2,1,, 60 | 0x54,i64.lt_u,2,1,, 61 | 0x55,i64.gt_s,2,1,, 62 | 0x56,i64.gt_u,2,1,, 63 | 0x57,i64.le_s,2,1,, 64 | 0x58,i64.le_u,2,1,, 65 | 0x59,i64.ge_s,2,1,, 66 | 0x5A,i64.ge_u,2,1,, 67 | 0x67,i32.clz,1,1,, 68 | 0x68,i32.ctz,1,1,, 69 | 0x69,i32.popcnt,1,1,, 70 | 0x6A,i32.add,2,1,, 71 | 0x6B,i32.sub,2,1,, 72 | 0x6C,i32.mul,2,1,, 73 | 0x6D,i32.div_s,2,1,, 74 | 0x6E,i32.div_u,2,1,, 75 | 0x6F,i32.rem_s,2,1,, 76 | 0x70,i32.rem_u,2,1,, 77 | 0x71,i32.and,2,1,, 78 | 0x72,i32.or,2,1,, 79 | 0x73,i32.xor,2,1,, 80 | 0x74,i32.shl,2,1,, 81 | 0x75,i32.shr_s,2,1,, 82 | 0x76,i32.shr_u,2,1,, 83 | 0x77,i32.rotl,2,1,, 84 | 0x78,i32.rotr,2,1,, 85 | 0x79,i64.clz,1,1,, 86 | 0x7A,i64.ctz,1,1,, 87 | 0x7B,i64.popcnt,1,1,, 88 | 0x7C,i64.add,2,1,, 89 | 0x7D,i64.sub,2,1,, 90 | 0x7E,i64.mul,2,1,, 91 | 0x7F,i64.div_s,2,1,, 92 | 0x80,i64.div_u,2,1,, 93 | 0x81,i64.rem_s,2,1,, 94 | 0x82,i64.rem_u,2,1,, 95 | 0x83,i64.and,2,1,, 96 | 0x84,i64.or,2,1,, 97 | 0x85,i64.xor,2,1,, 98 | 0x86,i64.shl,2,1,, 99 | 0x87,i64.shr_s,2,1,, 100 | 0x88,i64.shr_u,2,1,, 101 | 0x89,i64.rotl,2,1,, 102 | 0x8A,i64.rotr,2,1,, 103 | 0xA7,i32.wrap_i64,1,1,, 104 | 0xAC,i64.extend_i32_s,1,1,, 105 | 0xAD,i64.extend_i32_u,1,1,, 106 | 0xC0,i32.extend8_s,1,1,, 107 | 0xC1,i32.extend16_s,1,1,, 108 | 0xC2,i64.extend8_s,1,1,, 109 | 0xC3,i64.extend16_s,1,1,, 110 | 0xC4,i64.extend32_s,1,1,, 111 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/selection.csv: -------------------------------------------------------------------------------- 1 | Opcode Name 2 | 0x00 STOP 3 | 0x01 ADD 4 | 0x02 MUL 5 | 0x03 SUB 6 | 0x04 DIV 7 | 0x05 SDIV 8 | 0x06 MOD 9 | 0x07 SMOD 10 | 0x08 ADDMOD 11 | 0x09 MULMOD 12 | 0x0a EXP 13 | 0x0b SIGNEXTEND 14 | 0x10 LT 15 | 0x11 GT 16 | 0x12 SLT 17 | 0x13 SGT 18 | 0x14 EQ 19 | 0x15 ISZERO 20 | 0x16 AND 21 | 0x17 OR 22 | 0x18 XOR 23 | 0x19 NOT 24 | 0x1a BYTE 25 | 0x30 ADDRESS 26 | 0x32 ORIGIN 27 | 0x33 CALLER 28 | 0x34 CALLVALUE 29 | 0x35 CALLDATALOAD 30 | 0x36 CALLDATASIZE 31 | 0x37 CALLDATACOPY 32 | 0x38 CODESIZE 33 | 0x39 CODECOPY 34 | 0x3a GASPRICE 35 | 0x3d RETURNDATASIZE 36 | 0x3e RETURNDATACOPY 37 | 0x41 COINBASE 38 | 0x42 TIMESTAMP 39 | 0x43 NUMBER 40 | 0x44 DIFFICULTY 41 | 0x45 GASLIMIT 42 | 0x50 POP 43 | 0x51 MLOAD 44 | 0x52 MSTORE 45 | 0x53 MSTORE8 46 | 0x56 JUMP 47 | 0x57 JUMPI 48 | 0x58 PC 49 | 0x59 MSIZE 50 | 0x5a GAS 51 | 0x5b JUMPDEST 52 | 0x60 PUSH1 53 | 0x61 PUSH2 54 | 0x62 PUSH3 55 | 0x63 PUSH4 56 | 0x64 PUSH5 57 | 0x65 PUSH6 58 | 0x66 PUSH7 59 | 0x67 PUSH8 60 | 0x68 PUSH9 61 | 0x69 PUSH10 62 | 0x6a PUSH11 63 | 0x6b PUSH12 64 | 0x6c PUSH13 65 | 0x6d PUSH14 66 | 0x6e PUSH15 67 | 0x6f PUSH16 68 | 0x70 PUSH17 69 | 0x71 PUSH18 70 | 0x72 PUSH19 71 | 0x73 PUSH20 72 | 0x74 PUSH21 73 | 0x75 PUSH22 74 | 0x76 PUSH23 75 | 0x77 PUSH24 76 | 0x78 PUSH25 77 | 0x79 PUSH26 78 | 0x7a PUSH27 79 | 0x7b PUSH28 80 | 0x7c PUSH29 81 | 0x7d PUSH30 82 | 0x7e PUSH31 83 | 0x7f PUSH32 84 | 0x80 DUP1 85 | 0x81 DUP2 86 | 0x82 DUP3 87 | 0x83 DUP4 88 | 0x84 DUP5 89 | 0x85 DUP6 90 | 0x86 DUP7 91 | 0x87 DUP8 92 | 0x88 DUP9 93 | 0x89 DUP10 94 | 0x8a DUP11 95 | 0x8b DUP12 96 | 0x8c DUP13 97 | 0x8d DUP14 98 | 0x8e DUP15 99 | 0x8f DUP16 100 | 0x90 SWAP1 101 | 0x91 SWAP2 102 | 0x92 SWAP3 103 | 0x93 SWAP4 104 | 0x94 SWAP5 105 | 0x95 SWAP6 106 | 0x96 SWAP7 107 | 0x97 SWAP8 108 | 0x98 SWAP9 109 | 0x99 SWAP10 110 | 0x9a SWAP11 111 | 0x9b SWAP12 112 | 0x9c SWAP13 113 | 0x9d SWAP14 114 | 0x9e SWAP15 115 | 0x9f SWAP16 116 | 0xf3 RETURN 117 | 0xfd REVERT 118 | 0xfe INVALID 119 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/selection_ewasm.csv: -------------------------------------------------------------------------------- 1 | Opcode Name 2 | 0x01 nop 3 | 0x1A drop 4 | 0x1B select 5 | 0x20 local.get 6 | 0x21 local.set 7 | 0x22 local.tee 8 | 0x28 i32.load 9 | 0x29 i64.load 10 | 0x2C i32.load8_s 11 | 0x2D i32.load8_u 12 | 0x2E i32.load16_s 13 | 0x2F i32.load16_u 14 | 0x30 i64.load8_s 15 | 0x31 i64.load8_u 16 | 0x32 i64.load16_s 17 | 0x33 i64.load16_u 18 | 0x34 i64.load32_s 19 | 0x35 i64.load32_u 20 | 0x36 i32.store 21 | 0x37 i64.store 22 | 0x3A i32.store8 23 | 0x3B i32.store16 24 | 0x3C i64.store8 25 | 0x3D 64.store16 26 | 0x3E i64.store32 27 | 0x3F memory.size 28 | 0x40 memory.grow 29 | 0x41 i32.const 30 | 0x42 i64.const 31 | 0x45 i32.eqz 32 | 0x46 i32.eq 33 | 0x47 i32.ne 34 | 0x48 i32.lt_s 35 | 0x49 i32.lt_u 36 | 0x4A i32.gt_s 37 | 0x4B i32.gt_u 38 | 0x4C i32.le_s 39 | 0x4D i32.le_u 40 | 0x4E i32.ge_s 41 | 0x4F i32.ge_u 42 | 0x50 i64.eqz 43 | 0x51 i64.eq 44 | 0x52 i64.ne 45 | 0x53 i64.lt_s 46 | 0x54 i64.lt_u 47 | 0x55 i64.gt_s 48 | 0x56 i64.gt_u 49 | 0x57 i64.le_s 50 | 0x58 i64.le_u 51 | 0x59 i64.ge_s 52 | 0x5A i64.ge_u 53 | 0x67 i32.clz 54 | 0x68 i32.ctz 55 | 0x69 i32.popcnt 56 | 0x6A i32.add 57 | 0x6B i32.sub 58 | 0x6C i32.mul 59 | 0x6D i32.div_s 60 | 0x6E i32.div_u 61 | 0x6F i32.rem_s 62 | 0x70 i32.rem_u 63 | 0x71 i32.and 64 | 0x72 i32.or 65 | 0x73 i32.xor 66 | 0x74 i32.shl 67 | 0x75 i32.shr_s 68 | 0x76 i32.shr_u 69 | 0x77 i32.rotl 70 | 0x78 i32.rotr 71 | 0x79 i64.clz 72 | 0x7A i64.ctz 73 | 0x7B i64.popcnt 74 | 0x7C i64.add 75 | 0x7D i64.sub 76 | 0x7E i64.mul 77 | 0x7F i64.div_s 78 | 0x80 i64.div_u 79 | 0x81 i64.rem_s 80 | 0x82 i64.rem_u 81 | 0x83 i64.and 82 | 0x84 i64.or 83 | 0x85 i64.xor 84 | 0x86 i64.shl 85 | 0x87 i64.shr_s 86 | 0x88 i64.shr_u 87 | 0x89 i64.rotl 88 | 0x8A i64.rotr 89 | 0xA7 i32.wrap_i64 90 | 0xAC i64.extend_i32_s 91 | 0xAD i64.extend_i32_u 92 | 0xC0 i32.extend8_s 93 | 0xC1 i32.extend16_s 94 | 0xC2 i64.extend8_s 95 | 0xC3 i64.extend16_s 96 | 0xC4 i64.extend32_s 97 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/selection_ewasm_first_pass.csv: -------------------------------------------------------------------------------- 1 | Opcode Name 2 | 0x01 nop 3 | 0x1A drop 4 | 0x1B select 5 | 0x20 local.get 6 | 0x21 local.set 7 | 0x22 local.tee 8 | 0x41 i32.const 9 | 0x45 i32.eqz 10 | 0x46 i32.eq 11 | 0x47 i32.ne 12 | 0x48 i32.lt_s 13 | 0x49 i32.lt_u 14 | 0x4A i32.gt_s 15 | 0x4B i32.gt_u 16 | 0x4C i32.le_s 17 | 0x4D i32.le_u 18 | 0x4E i32.ge_s 19 | 0x4F i32.ge_u 20 | 0x67 i32.clz 21 | 0x68 i32.ctz 22 | 0x69 i32.popcnt 23 | 0x6A i32.add 24 | 0x6B i32.sub 25 | 0x6C i32.mul 26 | 0x6D i32.div_s 27 | 0x6E i32.div_u 28 | 0x6F i32.rem_s 29 | 0x70 i32.rem_u 30 | 0x71 i32.and 31 | 0x72 i32.or 32 | 0x73 i32.xor 33 | 0x74 i32.shl 34 | 0x75 i32.shr_s 35 | 0x76 i32.shr_u 36 | 0x77 i32.rotl 37 | 0x78 i32.rotr 38 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/data/selection_ewasm_from_spec.csv: -------------------------------------------------------------------------------- 1 | Opcode Name 2 | 0x00 unreachable 3 | 0x01 nop 4 | 0x02 block 5 | 0x03 loop 6 | 0x04 if 7 | 0x05 else 8 | 0x0B end 9 | 0x0C br 10 | 0x0D br_if 11 | 0x0E br_table 12 | 0x0F return 13 | 0x10 call 14 | 0x11 call_indirect 15 | 0x1A drop 16 | 0x1B select 17 | 0x20 local.get 18 | 0x21 local.set 19 | 0x22 local.tee 20 | 0x23 global.get 21 | 0x24 global.set 22 | 0x28 i32.load 23 | 0x29 i64.load 24 | 0x2C i32.load8_s 25 | 0x2D i32.load8_u 26 | 0x2E i32.load16_s 27 | 0x2F i32.load16_u 28 | 0x30 i64.load8_s 29 | 0x31 i64.load8_u 30 | 0x32 i64.load16_s 31 | 0x33 i64.load16_u 32 | 0x34 i64.load32_s 33 | 0x35 i64.load32_u 34 | 0x36 i32.store 35 | 0x37 i64.store 36 | 0x3A i32.store8 37 | 0x3B i32.store16 38 | 0x3C i64.store8 39 | 0x3D 64.store16 40 | 0x3E i64.store32 41 | 0x3F memory.size 42 | 0x40 memory.grow 43 | 0x41 i32.const 44 | 0x42 i64.const 45 | 0x45 i32.eqz 46 | 0x46 i32.eq 47 | 0x47 i32.ne 48 | 0x48 i32.lt_s 49 | 0x49 i32.lt_u 50 | 0x4A i32.gt_s 51 | 0x4B i32.gt_u 52 | 0x4C i32.le_s 53 | 0x4D i32.le_u 54 | 0x4E i32.ge_s 55 | 0x4F i32.ge_u 56 | 0x50 i64.eqz 57 | 0x51 i64.eq 58 | 0x52 i64.ne 59 | 0x53 i64.lt_s 60 | 0x54 i64.lt_u 61 | 0x55 i64.gt_s 62 | 0x56 i64.gt_u 63 | 0x57 i64.le_s 64 | 0x58 i64.le_u 65 | 0x59 i64.ge_s 66 | 0x5A i64.ge_u 67 | 0x67 i32.clz 68 | 0x68 i32.ctz 69 | 0x69 i32.popcnt 70 | 0x6A i32.add 71 | 0x6B i32.sub 72 | 0x6C i32.mul 73 | 0x6D i32.div_s 74 | 0x6E i32.div_u 75 | 0x6F i32.rem_s 76 | 0x70 i32.rem_u 77 | 0x71 i32.and 78 | 0x72 i32.or 79 | 0x73 i32.xor 80 | 0x74 i32.shl 81 | 0x75 i32.shr_s 82 | 0x76 i32.shr_u 83 | 0x77 i32.rotl 84 | 0x78 i32.rotr 85 | 0x79 i64.clz 86 | 0x7A i64.ctz 87 | 0x7B i64.popcnt 88 | 0x7C i64.add 89 | 0x7D i64.sub 90 | 0x7E i64.mul 91 | 0x7F i64.div_s 92 | 0x80 i64.div_u 93 | 0x81 i64.rem_s 94 | 0x82 i64.rem_u 95 | 0x83 i64.and 96 | 0x84 i64.or 97 | 0x85 i64.xor 98 | 0x86 i64.shl 99 | 0x87 i64.shr_s 100 | 0x88 i64.shr_u 101 | 0x89 i64.rotl 102 | 0x8A i64.rotr 103 | 0xA7 i32.wrap_i64 104 | 0xAC i64.extend_i32_s 105 | 0xAD i64.extend_i32_u 106 | 0xC0 i32.extend8_s 107 | 0xC1 i32.extend16_s 108 | 0xC2 i64.extend8_s 109 | 0xC3 i64.extend16_s 110 | 0xC4 i64.extend32_s 111 | -------------------------------------------------------------------------------- /legacy/gas-estimator/src/program_generator/requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | -------------------------------------------------------------------------------- /legacy/opcodes/src/table.mediawiki: -------------------------------------------------------------------------------- 1 | {| class="wikitable" 2 | |- style="font-weight:bold;" 3 | ! OP 4 | ! Count 5 | ! % 6 | ! 7 | ! pushdata 8 | ! % pushdata 9 | ! % data 10 | |- 11 | | JUMPDEST 12 | | 22,374,953 13 | | 6.57% 14 | | 15 | | 16 | | 17 | | 6.57% 18 | |- 19 | | JUMPI 20 | | 14,963,477 21 | | 4.39% 22 | | 23 | | 24 | | 25 | | 4.39% 26 | |- 27 | | JUMP 28 | | 11,389,635 29 | | 3.34% 30 | | 31 | | 32 | | 33 | | 3.34% 34 | |- 35 | | All PUSH 36 | | 78,137,163 37 | | 22.94% 38 | | 39 | | 261,092,018 40 | | 41 | | 43.39% 42 | |- 43 | | PUSH1 44 | | 37,886,773 45 | | 11.12% 46 | | 1 47 | | 37,886,773 48 | | 14.51% 49 | | 6.30% 50 | |- 51 | | PUSH2 52 | | 28,280,939 53 | | 8.30% 54 | | 2 55 | | 56,561,878 56 | | 21.66% 57 | | 9.40% 58 | |- 59 | | PUSH3 60 | | 219,949 61 | | 0.06% 62 | | 3 63 | | 659,847 64 | | 0.25% 65 | | 0.11% 66 | |- 67 | | PUSH4 68 | | 5,247,460 69 | | 1.54% 70 | | 4 71 | | 20,989,840 72 | | 8.04% 73 | | 3.49% 74 | |- 75 | | PUSH5 76 | | 144,613 77 | | 0.04% 78 | | 5 79 | | 723,065 80 | | 0.28% 81 | | 0.12% 82 | |- 83 | | PUSH6 84 | | 17,898 85 | | 0.01% 86 | | 6 87 | | 107,388 88 | | 0.04% 89 | | 0.02% 90 | |- 91 | | PUSH7 92 | | 4,916 93 | | 0.00% 94 | | 7 95 | | 34,412 96 | | 0.01% 97 | | 0.01% 98 | |- 99 | | PUSH8 100 | | 345,608 101 | | 0.10% 102 | | 8 103 | | 2,764,864 104 | | 1.06% 105 | | 0.46% 106 | |- 107 | | PUSH9 108 | | 17,508 109 | | 0.01% 110 | | 9 111 | | 157,572 112 | | 0.06% 113 | | 0.03% 114 | |- 115 | | PUSH10 116 | | 6,124 117 | | 0.00% 118 | | 10 119 | | 61,240 120 | | 0.02% 121 | | 0.01% 122 | |- 123 | | PUSH11 124 | | 4,005 125 | | 0.00% 126 | | 11 127 | | 44,055 128 | | 0.02% 129 | | 0.01% 130 | |- 131 | | PUSH12 132 | | 61,088 133 | | 0.02% 134 | | 12 135 | | 733,056 136 | | 0.28% 137 | | 0.12% 138 | |- 139 | | PUSH13 140 | | 25,980 141 | | 0.01% 142 | | 13 143 | | 337,740 144 | | 0.13% 145 | | 0.06% 146 | |- 147 | | PUSH14 148 | | 378,201 149 | | 0.11% 150 | | 14 151 | | 5,294,814 152 | | 2.03% 153 | | 0.88% 154 | |- 155 | | PUSH15 156 | | 101,959 157 | | 0.03% 158 | | 15 159 | | 1,529,385 160 | | 0.59% 161 | | 0.25% 162 | |- 163 | | PUSH16 164 | | 108,720 165 | | 0.03% 166 | | 16 167 | | 1,739,520 168 | | 0.67% 169 | | 0.29% 170 | |- 171 | | PUSH17 172 | | 42,547 173 | | 0.01% 174 | | 17 175 | | 723,299 176 | | 0.28% 177 | | 0.12% 178 | |- 179 | | PUSH18 180 | | 318 181 | | 0.00% 182 | | 18 183 | | 5,724 184 | | 0.00% 185 | | 0.00% 186 | |- 187 | | PUSH19 188 | | 2,813 189 | | 0.00% 190 | | 19 191 | | 53,447 192 | | 0.02% 193 | | 0.01% 194 | |- 195 | | PUSH20 196 | | 2,921,374 197 | | 0.86% 198 | | 20 199 | | 58,427,480 200 | | 22.38% 201 | | 9.71% 202 | |- 203 | | PUSH21 204 | | 30,857 205 | | 0.01% 206 | | 21 207 | | 647,997 208 | | 0.25% 209 | | 0.11% 210 | |- 211 | | PUSH22 212 | | 1,828 213 | | 0.00% 214 | | 22 215 | | 40,216 216 | | 0.02% 217 | | 0.01% 218 | |- 219 | | PUSH23 220 | | 658 221 | | 0.00% 222 | | 23 223 | | 15,134 224 | | 0.01% 225 | | 0.00% 226 | |- 227 | | PUSH24 228 | | 350 229 | | 0.00% 230 | | 24 231 | | 8,400 232 | | 0.00% 233 | | 0.00% 234 | |- 235 | | PUSH25 236 | | 14,112 237 | | 0.00% 238 | | 25 239 | | 352,800 240 | | 0.14% 241 | | 0.06% 242 | |- 243 | | PUSH26 244 | | 832 245 | | 0.00% 246 | | 26 247 | | 21,632 248 | | 0.01% 249 | | 0.00% 250 | |- 251 | | PUSH27 252 | | 1,157 253 | | 0.00% 254 | | 27 255 | | 31,239 256 | | 0.01% 257 | | 0.01% 258 | |- 259 | | PUSH28 260 | | 160,053 261 | | 0.05% 262 | | 28 263 | | 4,481,484 264 | | 1.72% 265 | | 0.74% 266 | |- 267 | | PUSH29 268 | | 262,631 269 | | 0.08% 270 | | 29 271 | | 7,616,299 272 | | 2.92% 273 | | 1.27% 274 | |- 275 | | PUSH30 276 | | 172 277 | | 0.00% 278 | | 30 279 | | 5,160 280 | | 0.00% 281 | | 0.00% 282 | |- 283 | | PUSH31 284 | | 26,782 285 | | 0.01% 286 | | 31 287 | | 830,242 288 | | 0.32% 289 | | 0.14% 290 | |- 291 | | PUSH32 292 | | 1,818,938 293 | | 0.53% 294 | | 32 295 | | 58,206,016 296 | | 22.29% 297 | | 9.67% 298 | |} 299 | -------------------------------------------------------------------------------- /legacy/tools/evm/words/numWords.java: -------------------------------------------------------------------------------- 1 | package tech.pegasys.poc.witnesscodeanalysis.vm; 2 | 3 | import tech.pegasys.poc.witnesscodeanalysis.vm.Address; 4 | 5 | import org.apache.tuweni.bytes.Bytes; 6 | import org.apache.tuweni.bytes.Bytes32; 7 | import org.apache.tuweni.bytes.MutableBytes32; 8 | 9 | /** Static utility methods to work with VM words (that is, {@link Bytes32} values). */ 10 | public abstract class Words { 11 | private Words() {} 12 | 13 | /** 14 | * Creates a new word containing the provided address. 15 | * 16 | * @param address The address to convert to a word. 17 | * @return A VM word containing {@code address} (left-padded as according to the VM specification 18 | * (Appendix H. of the Yellow paper)). 19 | */ 20 | public static Bytes32 fromAddress(final Address address) { 21 | final MutableBytes32 bytes = MutableBytes32.create(); 22 | address.copyTo(bytes, Bytes32.SIZE - Address.SIZE); 23 | return bytes; 24 | } 25 | 26 | /** 27 | * Extract an address from the the provided address. 28 | * 29 | * @param bytes The word to extract the address from. 30 | * @return An address build from the right-most 160-bits of the {@code bytes} (as according to the 31 | * VM specification (Appendix H. of the Yellow paper)). 32 | */ 33 | public static Address toAddress(final Bytes32 bytes) { 34 | return Address.wrap(bytes.slice(bytes.size() - Address.SIZE, Address.SIZE).copy()); 35 | } 36 | 37 | /** 38 | * The number of words corresponding to the provided input. 39 | * 40 | *

In other words, this compute {@code input.size() / 32} but rounded up. 41 | * 42 | * @param input the input to check. 43 | * @return the number of (32 bytes) words that {@code input} spans. 44 | */ 45 | public static int numWords(final Bytes input) { 46 | // m/n round up == (m + n - 1)/n: http://www.cs.nott.ac.uk/~psarb2/G51MPC/slides/NumberLogic.pdf 47 | return (input.size() + Bytes32.SIZE - 1) / Bytes32.SIZE; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /legacy/wiki/Building-EVM-LLVM.md: -------------------------------------------------------------------------------- 1 | The project compiles like other LLVM projects. The target's name is `EVM`, but since it is not yet finalized, you have to specify `-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM` when you compile it. 2 | 3 | In short, you can use the following to build the backend: 4 | 5 | ``` 6 | git clone git@github.com:etclabscore/evm_llvm.git 7 | cd evm_llvm 8 | git checkout EVM 9 | mkdir build && cd build 10 | cmake -DLLVM_TARGETS_TO_BUILD=EVM -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM .. 11 | make -j8 12 | ``` 13 | -------------------------------------------------------------------------------- /legacy/wiki/Compiling-smart-contracts.md: -------------------------------------------------------------------------------- 1 | ### The Contract constructor function 2 | 3 | Because EVM's execution always start from the beginning of the code (`pc = 0`), there must be a way to handle more complicated contract behaviours. In EVM LLVM, we use a function to describe the function handling. It is called contract constructor function. To implement the function, developers are expected to respect the following contract constructor properties: 4 | 5 | - The constructor should be the first function in the generated LLVM IR. 6 | - The constructor should be named `solidity.main` or `main` (could change in the future). The backend recognizes these specific names and will generate different call codes. 7 | - The constructor should not take any arguments. 8 | - The constructor should initialize the function's `free memory pointer`, which is located at address `0x40`. The `free memory pointer` is like the usual frame pointer, used to calculate function frames and stack allocations. Because it is located at `0x40`, so you cannot initialize it to a smaller number. 9 | 10 | ### Skeleton example of a very small constructor function 11 | 12 | Here is an illustration of the skeleton of a small smart contract: 13 | 14 | ``` 15 | declare i256 @llvm.evm.calldataload(i256) 16 | declare void @llvm.evm.return(i256, i256) 17 | declare void @llvm.evm.mstore(i256, i256) 18 | 19 | define void @main() { 20 | entry: 21 | call void @llvm.evm.mstore(i256 64, i256 128) 22 | %0 = call i256 @llvm.evm.calldataload(i256 0) 23 | %1 = call i256 @llvm.evm.calldataload(i256 32) 24 | %2 = call i256 @add(i256 %0, i256 %1) 25 | call void @llvm.evm.mstore(i256 0, i256 %2) 26 | call void @llvm.evm.return(i256 0, i256 32) 27 | unreachable 28 | } 29 | 30 | define i256 @add(i256, i256) #0 { 31 | %3 = alloca i256, align 4 32 | %4 = alloca i256, align 4 33 | store i256 %0, i256* %3, align 4 34 | store i256 %1, i256* %4, align 4 35 | %5 = load i256, i256* %3, align 4 36 | %6 = load i256, i256* %4, align 4 37 | %7 = add nsw i256 %5, %6 38 | ret i256 %7 39 | } 40 | ``` 41 | 42 | **Usually, it is the frontend's responsibility to do the smart contract's plumbing, including the contract's constructor function. ** We need the language frontends to generate corresponding LLVM IR code. 43 | 44 | This smart contract does the following things; 45 | 46 | - Initialize the `free memory pointer` to 128 47 | - parse the first two 32-byte inputs 48 | - call the `@add` function and supply it with the two parsed arguments 49 | - In the function `@add`, we simply add the two arguments, and return it 50 | - In the `@main` function, return the retrieved value using `llvm.evm.return` intrinsic. 51 | 52 | ### Compiling the smart contract 53 | 54 | Let's put the above smart contract code into a file named `test.ll`, and we use `llc` to generate EVM binary: 55 | 56 | ``` 57 | llc -mtriple=evm -filetype=obj test.ll -o test.o 58 | ``` 59 | 60 | ### Running the contract 61 | 62 | A generated `.o` file is in binary format. To see its content in hex, try to use `xxd`, for example: 63 | 64 | ``` 65 | xxd -p -cols 65536 test.o 66 | ``` 67 | 68 | The `xxd` will emit a hex string representation of the binary format. `xxd` will try to break the line if it is too long. Here we specify `-cols 65536` to avoid linebreaking. After calling `xxd`, you should see some output such as: 69 | 70 | ``` 71 | 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056 72 | ``` 73 | 74 | That is what we need to execute using an EVM engine. Let's try to do it using Geth's EVM. Remember that we need to supply two input arguments, so the command line should be like: 75 | 76 | ``` 77 | evm --input 1234567890123456789012345678901234567890123456789012345678901234 --code 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056 run 78 | ``` 79 | 80 | `evm` will emit the result of the two added files: 81 | 82 | ``` 83 | 0x468acf08a2468acf08a2468acf08a2468acf08a2468acf08a2468acf08a24634 84 | ``` 85 | -------------------------------------------------------------------------------- /legacy/wiki/Emitting-Program-Metadata.md: -------------------------------------------------------------------------------- 1 | EVM LLVM provides a way to emit program's metadata for various of purposes. For examples, a symbol table that records the jump destinations can be emitted along with the generated binary. 2 | 3 | Developers can use this utility to emit more program information. 4 | 5 | ## Existing implementation 6 | 7 | When compiling a contract, a file named `EVMMeta.txt` will be generated along with the binary code. The file contains the function symbol table in the compiled program, along with the offset of each function. The metadata file can be used for various purposes, such as debugging, manual linking, analysis, and so on. 8 | 9 | To specify a custom metadata file name if you do not want to use the `EVMMeta.txt` filename, option `-evm_md_file` can be used. 10 | 11 | # Limitation 12 | 13 | Existing implementation of EVM metadata emitting is limited to `MachineCode` module/level, which means that if there are any transformations at a higher level such as in the IR level, it will not be shown in the result. 14 | -------------------------------------------------------------------------------- /legacy/wiki/Example:-Compiling-using-existing-language-frontend.md: -------------------------------------------------------------------------------- 1 | Let's try to use a simple C file to test our compiler: 2 | 3 | ```sh 4 | cat < test.c 5 | unsigned x; 6 | int abc(unsigned a, unsigned b, unsigned c) { 7 | if (c > 0) { 8 | return a + x; 9 | } else { 10 | return a + b; 11 | } 12 | } 13 | EOF 14 | ``` 15 | 16 | Prerequisite: You have to install `clang` and use it to generate LLVM IR first: 17 | 18 | ``` 19 | clang -S -emit-llvm test.c 20 | ``` 21 | 22 | This will generate a `test.ll` file which should be the LLVM IR equivalent of our `test.c` file. Then we can generate EVM binary or assembly from it. In order to use the backend to generate EVM assembly, you have to specify `-mtriple=evm` when calling `llc`. An example is as follows: 23 | 24 | ``` 25 | ./build/bin/llc -mtriple=evm test.ll -o test.s 26 | ``` 27 | 28 | The generated `test.s` file contains the compiled EVM assembly code. Note that the generated code is the function body itself. In order to generate a complete smart contract source code we need to use a smart contract creator function, which we will talk about it in another page. 29 | 30 | Notice that you can also get the binary code of the function body by emitting an object file: 31 | 32 | ``` 33 | ./build/bin/llc -mtriple=evm -filetype=obj test.ll -o test.o 34 | ``` 35 | -------------------------------------------------------------------------------- /legacy/wiki/Function-Layouts.md: -------------------------------------------------------------------------------- 1 | #### Address layout 2 | 3 | EVM bytecode has a flat structure. It does not have explicit function entries, nor symbol tables. All executions starts from address `0x00`. 4 | 5 | #### Limitations 6 | 7 | Notice that at this moment this backend is limited to generate correct code for a single compilation unit. 8 | 9 | In order to link more than one compilation units, one shall inline existing compilation units in the frontend so that the frontend can generate correct `main` (the `function dispatcher` function) for the whole smart contract. 10 | 11 | #### The function dispatcher (meta function) 12 | 13 | The `function dispatcher` function (usually called `main` function in some contexts) is always placed at the beginning of the generated binary bytecode. The dispatcher is responsible for: 14 | 15 | 1. parse the call data and find the called function address in the jump table using the hash value provided in the call data. 16 | 2. extract the call arguments, and push them on to stack. 17 | 3. call the function address specified in the jump table. 18 | 19 | ``` 20 | Start of address 21 | +----------------> +-------------------------+ 22 | | Function dispatcher | 23 | | Jump Table | 24 | | (Func1, | 25 | | Func2, | 26 | | Func3) | 27 | +-------------------------+ 28 | | | 29 | | Func1 | 30 | | | 31 | +-------------------------+ 32 | | | 33 | | Func2 | 34 | | | 35 | +-------------------------+ 36 | | | 37 | | Func3 | 38 | | | 39 | +-------------------------+ 40 | ``` 41 | 42 | #### Moving the function dispatcher to front of the LLVM IR function list 43 | 44 | At this moment it is up to the frontend developer to move the LLVM IR function to the beginning of the function list. You can do something like this when creating function dispatcher: 45 | 46 | ``` 47 | // Let's say you have a dispatcher function named "dispatcher" 48 | 49 | // You should include "llvm/IR/SymbolTableListTraits.h" here 50 | using FunctionListType = SymbolTableList; 51 | FunctionListType &FuncList = TheModule->getFunctionList(); 52 | FuncList.remove(dispatcher); 53 | FuncList.insert(FuncList.begin(), dispatcher); 54 | ``` 55 | -------------------------------------------------------------------------------- /legacy/wiki/Future-Works.md: -------------------------------------------------------------------------------- 1 | # Functionalities 2 | 3 | ## Experimental support of landing pad 4 | 5 | Landingpad is used to support exception handling. 6 | 7 | ## Experimental support of simulating heap allocations 8 | 9 | EVM does not have a heap space, so we cannot use heap allocations. We might be able to do around it. 10 | 11 | ## Constant table support 12 | 13 | Having a constant table in the smart contract could potentially save some code size if the elements in the table are reused. 14 | 15 | ## Metadata export 16 | 17 | We could export more metadata for debugging, analyzing, and so on. 18 | 19 | # Optimizations 20 | 21 | ## Support more than 16 local variables 22 | 23 | EVM can only support retrieval of an element up to depth of 16 from the stack top using instructions `SWAP1` to `SWAP16` -- resulting a limitation in Solidity compiler that can only support 16 local variables. At this moment, EVM LLVM will also face a `stack too deep` issue if the variables in a single basic block is more than 16. 24 | 25 | But in LLVM we can totally work around this issue, and do a much better job. With dataflow analysis and register allocation algorithm, we can have near-optimal variable assignment (on the stack or on memory stack) in linear time. 26 | 27 | ## Instruction scheduling 28 | 29 | Arranging the order of the opcodes in EVM binary is critical to its performance. Instructions has to be arranged so that we have minimal stack manipulation over head (the opcodes that does not do actual computation, but rather, reorder stack operands' relative position to the top of stack). 30 | 31 | EVM LLVM backend is designed in such a way that a scheduler before register allocation can be implemented to reduce the stack operation overhead. 32 | 33 | ## Improve EVM calling conventions 34 | 35 | When calling a subroutine, The return address is the first argument and resides at top of stack. This is non-optimal because the return address will definitely not be used until the very end of the subroutine, and taking up a visible slot is expensive. We can re-arrange the return address to be at the end of argument so it will not have to be reached until we want to return from subroutine. 36 | 37 | ## Re-materialization of constants 38 | 39 | usual small constants should not stay in stack --- they should be rematerialized whenever it is needed. 40 | -------------------------------------------------------------------------------- /legacy/wiki/Handling-EVM-specific-operations.md: -------------------------------------------------------------------------------- 1 | Ethereum Virtual Machine specific operations, such as accessing storage, retrieve block information, etc, are through EVM specific instructions. Solidity language automatically generates necessary EVM-specific instructions under the hood so as to hide the details from Solidity developers. However, as a compiler backend, the input to EVM LLVM is LLVM IR format, which is unable to hold any language specific semantics that is higher than the C language level. So it is up to compiler frontends to lower language specific semantics onto LLVM IR level. 2 | 3 | Intrinsic functions are used to represent EVM-specific semantics in the input LLVM IR. Intrinsic functions are usually higher level representations of architecture-specific instructions. In EVM LLVM, we allow users to leverage EVM-specific instructions that are used to interact with the chain or storage by exposing those EVM instructions in the form of intrinsic functions. 4 | 5 | - This [page](https://github.com/etclabscore/evm_llvm/wiki/Intrinsic-Functions) lists the intrinsic functions that frontend developers can use. 6 | - Intrinsics are defined [here](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/include/llvm/IR/IntrinsicsEVM.td#L14). 7 | - And here are examples on [how to leverage intrinsics](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/test/CodeGen/EVM/intrinsics.ll#L1) 8 | -------------------------------------------------------------------------------- /legacy/wiki/Home.md: -------------------------------------------------------------------------------- 1 | ![evm-llvm-green-dragon](https://user-images.githubusercontent.com/450283/63640209-85cb3c00-c66b-11e9-9610-0c339ae66ac7.png) 2 | 3 | Welcome to the `evm_llvm` wiki! This project aims at bringing LLVM infrastructure to the EVM world where smart contracts are widely deployed. 4 | 5 | EVM LLVM is an EVM architecture backend for LLVM. With EVM LLVM you can generate EVM binary code with LLVM-based compilers. The backend does not assume a language frontend, so you should be able to plug in a new smart contract language frontend to generate EVM binary. 6 | 7 | The goal of this project is to make it able to for various of platforms, tools and smart contract programming language projects be able to quickly adapt a high-performance EVM backend. 8 | -------------------------------------------------------------------------------- /legacy/wiki/Language-Frontend-Integration.md: -------------------------------------------------------------------------------- 1 | ## EVM target specific changes 2 | 3 | ### Frontend is expected to emit 256bit values LLVM IR 4 | 5 | The EVM architecture is the only 256-bit machine out there in the market, and so far it have not yet received support from LLVM community. We added 256-bit and 160-bit support in the LLVM IR level. 6 | 7 | In order to utilize 256-bit and 160-bit operands, developers are expected to emit `i256` and `i160` data types in their IR code generation. Include the `evm_llvm`'s header files in `include/llvm` folders so that these two pre-defined data types can be properly generated. 8 | 9 | ### Frontend needs to generate compatible LLVM IR 10 | 11 | Notice that development of this backend is based on LLVM 10, which is released in March 2020. We also have a LLVM 8 branch just to support those who creates their frontends in LLVM 8. 12 | 13 | We could do back porting to other lower versions such as LLVM 9 at the request of developers for better stability or compatibility. Please let me know if you have such needs. 14 | -------------------------------------------------------------------------------- /legacy/wiki/Running-integrated-tests-in-EVM-environment.md: -------------------------------------------------------------------------------- 1 | EVM is different than other execution platform in that it is on blockchain. The result of the execution of a smart contract will be dependent on the state of the blockchain as well. So, we have to integrate EVM execution environment (in this early stage, `geth`) into our tests. 2 | 3 | ## Constructor 4 | 5 | Unit tests will only focus on small test functions. But you cannot execute a function independently on blockchain, we need to have a contract constructor and dispatcher as the first function in the file. The reason is that EVM will always start its execution from address `0x00` -- where the contract header / constructor /dispatcher resides. The header then tries to set up the contract -- allocating memory/storage or parsing incoming parameters, et cetera. 6 | 7 | Here is the commentated constructor code we use for handling unit tests: 8 | 9 | ``` 10 | define void @main() { 11 | entry: 12 | %0 = call i256 @llvm.evm.calldataload(i256 0) ; extract first 32-byte argument 13 | %1 = call i256 @llvm.evm.calldataload(i256 32); extract second 32-byte argument 14 | %2 = call i256 @test(i256 %0, i256 %1) ; execute the unit test function 15 | call void @llvm.evm.mstore(i256 0, i256 %2) ; store the returned value to memory address `0x00` 16 | call void @llvm.evm.return(i256 0, i256 32) ; call "return" to return the value returned by @test 17 | unreachable 18 | } 19 | ``` 20 | 21 | Notice that the `@test` function takes 2 parameters, so we will have two calls to `@llvm.evm.calldataload`. 22 | 23 | The unit test is compiled using `llc` with options: `-mtriple=evm -filetype=obj`. Then the code is executed using `geth`'s `evm` command. 24 | 25 | ## Testing utilities 26 | 27 | A Python script is used to handle the testing, file `evm_llvm/tools/evm-test/evm_test.py` is the script we created to test functionalities of the llvm backend. Here are what it does: 28 | 29 | - call evm_llvm backend to compile an LLVM IR file (`.ll` file) into object file (`.o`) file. The file should contain the function we are going to verify along with a smart contract constructor header which is used to handle input arguments. The function should be at the beginning of the IR file (the first function). 30 | - extract the contract opcodes from the `.o` file and prepare the input arguments (by padded each arguments to be 32 bytes long and concatenate everything into a long string). 31 | - Run the executable binary using geth's `evm`, get the result from the print, And compare the result with expected value. 32 | 33 | ## How to run testings 34 | 35 | 1. Install Python3 36 | 2. Run `evm_llvm/tools/evm-test/evm_test.py` then you should see the results. 37 | 38 | ## How to add new tests 39 | 40 | Please take a look at the `evm_llvm/tools/evm-test/evm_testsuit.py` file, it organizes tests by categorizing them into different `OrderedList`. Each element of the list contains the following information: 41 | 42 | - the name of the test 43 | - the array of input arguments 44 | - the path of the unit test source code file (in LLVM IR form) 45 | - the expected result value 46 | 47 | When adding new tests, you should: 48 | 49 | - put your test files into `evm_llvm/test/CodeGen/EVM` folder. 50 | - add the test file path and expected results to the `evm_testsuit.py` file. (We might change it when the file gets too large). 51 | 52 | ## TODO lists 53 | 54 | - add blockchain state related tests 55 | - add re-entrance tests (which are also related to changes of blockchain states) 56 | 57 | Please help improve the test utility! 58 | -------------------------------------------------------------------------------- /legacy/wiki/Stack-and-Memory-management.md: -------------------------------------------------------------------------------- 1 | ## Variables 2 | 3 | In the context of stack machine, a variable refers to an operand that will be consumed by an opcode. In EVM LLVM, variables are treated as virtual registers, until they are _stackfied_ (convert register-based code to stack-based code) right before lowering to machine code. 4 | 5 | In LLVM's internal SSA representation mode, it is fairly easy to compute a register's live range (the range from its assignment to its last use). Variables are treated differently with regard to its live range. Local variables (variables that its liveness only extends within a single basic block) will live entirely on the stack, while non-local variables (variables that live across basic blocks) will be spilled to a memory slot allocated by the compiler. 6 | 7 | #### Frame Objects 8 | 9 | Frame objects will be allocated either on stack or on memory space. Since each of the elements are 256bits, we have to ensure that frame objects are 256bits in length as well. Frame objects with smaller length is not supported. 10 | 11 | It is possible for a frame object to be allocated on to memory space, if we are consuming too much of stack space. The stack allocation pass will try to find an efficient way to decide which goes to the memory and which stays in stack. 12 | 13 | ### Frame Pointer (or Free Memory Pointer) 14 | 15 | [Stack pointers and frame pointers](https://en.wikipedia.org/wiki/Call_stack#Stack_and_frame_pointers) are essential to support subroutine calls. Frame pointer is used to record the structure of stack frames. Because we do not have registers in EVM, we will have to store stack frame pointer in memory locations. Usually, we put stack frame pointer at location `0x40`, and we follow Solidity compiler's convention to initialize it to value `128`. So the stack frame of the first function starts at that location. The value of frame pointer changes as the contract calls a subroutine or exits from a subroutine. Whenever we need to have access to frame pointer, we will retrieve its value from that specific location. 16 | 17 | ### Memory stack 18 | 19 | Part of the memory is used as a stack for function calls and variable spills. The structure is described as follows: 20 | 21 | - The stack goes from lower address to higher address, as different from usual hardware implementations. 22 | - The frame is arranged into 3 parts: 23 | - **frame object locations**. Each frame object has its own frame slot. Frame object `x` will have a 32 byte space starting from `$fp + (x * 32)`, where `$fp` is the frame pointer, and is stored at location `0x40`. 24 | - **spilled variables**. Variable that are unable to be fully stackified will reside on the memory stack. In codegen, each spilled variable will have an index, and each index refers to a memory slot. A spilled variable that bears index `y`, will reside at location `$fp + (number_of_frame_objects * 32) + (y * 32)`. 25 | - **subroutine context**. Like a regular register machine, the memory stack is used to store subroutine context so as to support function calls. Two slots are allocated at the end of current frame for a) the existing frame pointer, and b) return `PC` address. 26 | 27 | Here is an example showing a stack frame right before we jump into a subroutine: 28 | 29 | ``` 30 | Stack top Higher address 31 | +-----------> +----------------------------+ <--------------+ 32 | | | 33 | | Return Address | 34 | | | 35 | +----------------------------+ 36 | | | 37 | | Function argument | 38 | new FP | | 39 | +-----------> +----------------------------+ 40 | | | 41 | | Saved frame pointer | 42 | | (Start of frame) | 43 | +----------------------------+ 44 | | | 45 | | Stack Object 1 | 46 | | | 47 | +----------------------------+ 48 | | | 49 | | Frame Object 2 | 50 | | | 51 | +----------------------------+ 52 | | | 53 | | Frame Object 1 | 54 | Start of frame | | Lower address 55 | +------------> +----------------------------+ <----------------+ 56 | ``` 57 | -------------------------------------------------------------------------------- /legacy/wiki/The-EVM-Calling-Conventions.md: -------------------------------------------------------------------------------- 1 | The EVM architecture is a simplistic structure, but it has everything we need to do usual program computations. 2 | 3 | ## Types of calls 4 | 5 | There are two types of calls in an EVM smart contract: 6 | 7 | 1. **Internal calls**. Internal calls are referred to function calls within a smart contract. An example is that we have two defined function `A` and `B`, and somewhere in `A` we save our context and change our execution flow to the beginning of `B`. 8 | 2. **External calls**. Or cross-contract calls. `A` and `B` are defined in different deployed EVM contract and `A` calls `B` in its context. 9 | 10 | ## Internal call conventions 11 | 12 | Up to ETH 1.5, there is no link and jump EVM opcode for easy handling of subroutines(even though some [discussions](https://github.com/ethereum/EIPs/issues/2315) are on-going). So we have to manually handle subroutine calls. Here are the calling conventions for an internal calls: 13 | 14 | - current subroutine's frame pointer is saved at stack, at memory location `$fp - 32` where `$fp` is the subroutine call's frame pointer. 15 | - arguments are all pushed on stack, along with the return address. Argument with smaller index number occupies a stack slot on top of another argument with a larger index number. For example, when we want to do a function call: `func abc(x, y, z)`, here is the arrangement of the arguments: 16 | 17 | ``` 18 | +-----------+ 19 | |Return Addr| 20 | +-----------+ 21 | | X | 22 | +-----------+ 23 | | Y | 24 | +-----------+ 25 | Current FP | Z | 26 | +------------> +-----------+ 27 | | Old FP | 28 | +-----------+ 29 | | ..... | 30 | +-----------+ 31 | ``` 32 | 33 | _Note: Putting the return address on top of the stack is because it is easier to compute the location, but this will result in more stack manipulation overhead for the subroutine calls. We will improve this design in a later version._ 34 | 35 | - A subroutine's return value is stored on stack top. _Note: currently we only support one return value. In the future we will improve it by supporting multiple return values._ 36 | 37 | ## Procedure of a subroutine call 38 | 39 | To illustrate the procedure for a subroutine call, we need to do the following to save the context of current function execution: 40 | 41 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`. 42 | 2. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`. 43 | 3. push all subroutine arguments in order on to stack. 44 | 4. push return address onto stack. (At this moment, the return address is `PC + 6`). 45 | 5. push the beginning address of subroutine and jump. 46 | 47 | Right before we return from a subroutine, the stack should be empty and the return address should be at the top of the stack. When returning from a subroutine call, we should do the following: 48 | 49 | 1. push return value on to top of stack. 50 | 2. Do a `swap1` to move the return address to top of stack 51 | 3. jump to return address and resume the execution in caller function. If the function returns nothing, simply jump to return address. 52 | 53 | After jumping back to caller, we have to resume the execution: 54 | 55 | 1. restore caller's frame pointer by storing the value at location `$fp - 32` to `0x40`. 56 | 57 | ## [EIP2315](https://eips.ethereum.org/EIPS/eip-2315) Support: Subroutine calls 58 | 59 | The support of subroutines inside EVM enables compiler to generate better performance code. To be more specific: With EIP235, it is up to EVM to maintain the stack: 60 | 61 | 1. the return address stack is only accessible to VM 62 | 2. the stack is invisible to users and compilers 63 | 64 | A better calling convention is made with the support of EIP2315: 65 | 66 | ### To generate a call procedure 67 | 68 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`. 69 | 2. save existing frame pointer at memory location `$fp + %frame_size - 32`. The frame pointer is maintained at `0x40`. 70 | 3. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`. 71 | 4. push all subroutine arguments in order on to stack. 72 | 5. push the beginning address of subroutine and call `JUMPSUB` 73 | 74 | ### To generate the return 75 | 76 | 1. push return value on to top of stack. 77 | 2. call `RETURNSUB` to resume execution of caller function. 78 | 79 | ## External calls 80 | 81 | External calls are implemented using intrinsic calls. 82 | -------------------------------------------------------------------------------- /legacy/wiki/Types-and-type-conversions.md: -------------------------------------------------------------------------------- 1 | ## Newly supported Types 2 | 3 | So far the open-source LLVM trunk has not yet implemented bit size support larger than 128bits. We have implemented 256bit supports in our own backend, and is considering contributing them back to main trunk. 4 | 5 | Users are allowed to use `i256` and `i160` data types in their generated LLVM IR, which represent 256bit integer types and 160bit integer types respectively. 6 | 7 | Even though all EVM data types are 256bit in length internally. We are still able to offer support to smaller data types. However, users are encouraged to use 256bit data types internally because it is free. 8 | 9 | ## Contract Input Argument Types -- The Solidity convention 10 | 11 | Contract arguments are passed to EVM via the call data field. The function dispatcher is responsible to extract input arguments from call data. 12 | 13 | In Solidity's convention, the arguments in call data are padded to 32 bytes long if its data type's length is shorter. So, in order to maintain the convention, the function dispatcher needs to truncate the input arguments to the defined size in the function that is going to be called. 14 | 15 | This is undoubtedly inefficient, so users are discouraged to use smaller data types. 16 | -------------------------------------------------------------------------------- /legacy/wiki/files/Generating_stack_machine_code_using_LLVM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/wiki/files/Generating_stack_machine_code_using_LLVM.pdf -------------------------------------------------------------------------------- /legacy/wiki/files/LLVM_talk.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/wiki/files/LLVM_talk.pdf -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openevm", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "main.js", 6 | "directories": { 7 | "doc": "docs" 8 | }, 9 | "scripts": { 10 | "test": "echo \"Error: no test specified\" && exit 1" 11 | }, 12 | "keywords": [], 13 | "author": "", 14 | "license": "ISC", 15 | "devDependencies": { 16 | "gh-pages": "^6.1.1" 17 | } 18 | } 19 | --------------------------------------------------------------------------------