├── .github
    └── workflows
    │   └── static.yml
├── .gitignore
├── .gitmodules
├── .prettierrc.js
├── README.md
├── _build.sh
├── _index.txt
├── _index.yml
├── docs
    ├── .nojekyll
    ├── ethereum.html
    ├── ethereum
    │   ├── arrow_glacier.html
    │   ├── arrow_glacier
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── base_types.html
    │   ├── berlin.html
    │   ├── berlin
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── byzantium.html
    │   ├── byzantium
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── constantinople.html
    │   ├── constantinople
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── crypto.html
    │   ├── crypto
    │   │   ├── alt_bn128.html
    │   │   ├── blake2.html
    │   │   ├── elliptic_curve.html
    │   │   ├── finite_field.html
    │   │   └── hash.html
    │   ├── dao_fork.html
    │   ├── dao_fork
    │   │   ├── bloom.html
    │   │   ├── dao.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── ethash.html
    │   ├── exceptions.html
    │   ├── fork_criteria.html
    │   ├── frontier.html
    │   ├── frontier
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── genesis.html
    │   ├── gray_glacier.html
    │   ├── gray_glacier
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── homestead.html
    │   ├── homestead
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── istanbul.html
    │   ├── istanbul
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── london.html
    │   ├── london
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── muir_glacier.html
    │   ├── muir_glacier
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── paris.html
    │   ├── paris
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── rlp.html
    │   ├── shanghai.html
    │   ├── shanghai
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── spurious_dragon.html
    │   ├── spurious_dragon
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── tangerine_whistle.html
    │   ├── tangerine_whistle
    │   │   ├── bloom.html
    │   │   ├── fork.html
    │   │   ├── fork_types.html
    │   │   ├── state.html
    │   │   ├── trie.html
    │   │   ├── utils.html
    │   │   ├── utils
    │   │   │   ├── address.html
    │   │   │   ├── hexadecimal.html
    │   │   │   └── message.html
    │   │   └── vm.html
    │   ├── trace.html
    │   ├── utils.html
    │   └── utils
    │   │   ├── byte.html
    │   │   ├── ensure.html
    │   │   ├── hexadecimal.html
    │   │   ├── numeric.html
    │   │   └── safe_arithmetic.html
    ├── index.html
    └── search.js
├── legacy
    ├── Building-EVM-LLVM.md
    ├── Compiling-smart-contracts.md
    ├── Emitting-Program-Metadata.md
    ├── Example:-Compiling-using-existing-language-frontend.md
    ├── Function-Layouts.md
    ├── Future-Works.md
    ├── Handling-EVM-specific-operations.md
    ├── Home.md
    ├── Intrinsic-Functions.md
    ├── Language-Frontend-Integration.md
    ├── README.md
    ├── Running-integrated-tests-in-EVM-environment.md
    ├── Stack-and-Memory-management.md
    ├── The-EVM-Calling-Conventions.md
    ├── Types-and-type-conversions.md
    ├── block_construction
    │   └── src
    │   │   └── ProtoBlock
    │   │       └── README.md
    ├── erigion
    │   ├── Choice-of-storage-engine.md
    │   ├── Consensus-Engine-separation.md
    │   ├── Criteria-for-transitioning-from-Alpha-to-Beta.md
    │   ├── EVM-with-abstract-interpretation-and-backtracking.md
    │   ├── Erigon-Beta-1-announcement.md
    │   ├── Erigon2-prototype.md
    │   ├── Header-downloader.md
    │   ├── Home.md
    │   ├── LMDB-freelist-illustrated-guide.md
    │   ├── LMDB-freelist.md
    │   ├── State-sync-design.md
    │   ├── TEVM---Transpiled-EVM:-accelerate-EVM-improvement-R&D,-but-learning-from-eWASM.md
    │   ├── Transaction-Pool-Design.md
    │   └── Using-Postman-to-test-RPC.md
    ├── evm-illustrated
    │   ├── LICENSE
    │   ├── README.md
    │   ├── changelog.md
    │   ├── ethereum_evm_illustrated.pdf
    │   └── src
    │   │   └── ethereum_evm_illustrated.pptx
    ├── evm-modules
    │   ├── LICENSE
    │   ├── README.md
    │   └── gasometer
    │   │   ├── README.md
    │   │   └── sstore
    │   │       ├── net.md
    │   │       └── simple.md
    ├── files
    │   ├── Generating_stack_machine_code_using_LLVM.pdf
    │   └── LLVM_talk.pdf
    ├── gas-estimator
    │   ├── .dockerignore
    │   ├── .gitignore
    │   ├── .gitmodules
    │   ├── Dockerfile.evmone
    │   ├── Dockerfile.geth
    │   ├── Dockerfile.openethereum
    │   ├── Makefile
    │   ├── README.md
    │   ├── docs
    │   │   ├── notes
    │   │   │   ├── execution_comparison.md
    │   │   │   ├── instrumentation_measurement
    │   │   │   │   ├── docker_timer.md
    │   │   │   │   ├── evmone.md
    │   │   │   │   ├── example_bytecode_programs.md
    │   │   │   │   ├── geth.md
    │   │   │   │   ├── openethereum.md
    │   │   │   │   ├── openethereum_ewasm.md
    │   │   │   │   └── other_tools.md
    │   │   │   ├── measurement_standard_ruleset.md
    │   │   │   ├── meetings
    │   │   │   │   ├── 2020-10-30.md
    │   │   │   │   ├── 2020-11-06.md
    │   │   │   │   ├── 2020-11-13.md
    │   │   │   │   ├── 2020-11-20.md
    │   │   │   │   ├── 2020-11-27.md
    │   │   │   │   ├── 2020-12-04.md
    │   │   │   │   ├── 2020-12-11.md
    │   │   │   │   ├── 2020-12-18.md
    │   │   │   │   ├── 2020-12-22.md
    │   │   │   │   ├── 2021-01-08.md
    │   │   │   │   ├── 2021-01-22.md
    │   │   │   │   └── 2021-02-05.md
    │   │   │   ├── papers
    │   │   │   │   ├── adaptive_gas_cost_mechanism.md
    │   │   │   │   ├── bic_to_cpu.md
    │   │   │   │   ├── broken_metre.md
    │   │   │   │   ├── bytecode_monitoring_of_java.md
    │   │   │   │   ├── empirically_analyzing.md
    │   │   │   │   ├── holimans_gist_benchmarks.md
    │   │   │   │   ├── instruction_timing_model_1976.md
    │   │   │   │   ├── opbench.md
    │   │   │   │   ├── other.md
    │   │   │   │   ├── performance_benchmarking.md
    │   │   │   │   ├── timing_of_jvm_instructions.md
    │   │   │   │   └── vm_matters.md
    │   │   │   └── program_generator
    │   │   │   │   └── notes.md
    │   │   ├── report_stage_i.md
    │   │   └── report_stage_i_assets
    │   │   │   └── implementation_relative_all_opcodes.svg
    │   └── src
    │   │   ├── analysis
    │   │       ├── README.md
    │   │       ├── exploration.Rmd
    │   │       ├── exploration_timer_overhead.Rmd
    │   │       └── exploration_timers.Rmd
    │   │   ├── check_clocksource.sh
    │   │   ├── instrumentation_measurement
    │   │       ├── README.md
    │   │       ├── clock_resolution_go
    │   │       │   └── main.go
    │   │       ├── geth
    │   │       │   ├── README.md
    │   │       │   ├── instrumenter
    │   │       │   │   ├── instrumenter.go
    │   │       │   │   └── time.go
    │   │       │   └── main.go
    │   │       └── measurements.py
    │   │   └── program_generator
    │   │       ├── README.md
    │   │       ├── constants.py
    │   │       ├── data
    │   │           ├── README.md
    │   │           ├── opcodes.csv
    │   │           ├── opcodes_ewasm.csv
    │   │           ├── selection.csv
    │   │           ├── selection_ewasm.csv
    │   │           ├── selection_ewasm_first_pass.csv
    │   │           └── selection_ewasm_from_spec.csv
    │   │       ├── program_generator.py
    │   │       └── requirements.txt
    ├── glossary
    │   └── abi.html
    ├── llc
    │   └── opcodes
    │   │   └── chainId+selfbalance.md
    ├── opcodes
    │   └── src
    │   │   ├── README.md
    │   │   └── table.mediawiki
    ├── test_evm
    │   └── contracts
    │   │   └── opodes
    │   │       └── test_all_opcodes.txt
    ├── tools
    │   └── evm
    │   │   └── words
    │   │       └── numWords.java
    └── wiki
    │   ├── Building-EVM-LLVM.md
    │   ├── Compiling-smart-contracts.md
    │   ├── Emitting-Program-Metadata.md
    │   ├── Example:-Compiling-using-existing-language-frontend.md
    │   ├── Function-Layouts.md
    │   ├── Future-Works.md
    │   ├── Handling-EVM-specific-operations.md
    │   ├── Home.md
    │   ├── Intrinsic-Functions.md
    │   ├── Language-Frontend-Integration.md
    │   ├── Running-integrated-tests-in-EVM-environment.md
    │   ├── Stack-and-Memory-management.md
    │   ├── The-EVM-Calling-Conventions.md
    │   ├── Types-and-type-conversions.md
    │   └── files
    │       ├── Generating_stack_machine_code_using_LLVM.pdf
    │       └── LLVM_talk.pdf
├── main.js
├── package-lock.json
└── package.json


/.github/workflows/static.yml:
--------------------------------------------------------------------------------
 1 | # Simple workflow for deploying static content to GitHub Pages
 2 | name: Deploy static content to Pages
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["gh-pages"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20 | concurrency:
21 |   group: "pages"
22 |   cancel-in-progress: false
23 | 
24 | jobs:
25 |   # Single deploy job since we're just deploying
26 |   deploy:
27 |     environment:
28 |       name: github-pages
29 |       url: ${{ steps.deployment.outputs.page_url }}
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - name: Checkout
33 |         uses: actions/checkout@v4
34 |       - name: Setup Pages
35 |         uses: actions/configure-pages@v5
36 |       - name: Upload artifact
37 |         uses: actions/upload-pages-artifact@v3
38 |         with:
39 |           # Upload entire repository
40 |           path: '.'
41 |       - name: Deploy to GitHub Pages
42 |         id: deployment
43 |         uses: actions/deploy-pages@v4
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules
 2 | gh-pages/   
 3 | *~
 4 | .*sw?
 5 | \#*
 6 | .DS_Store
 7 | 
 8 | *.rej
 9 | *.orig
10 | 
11 | *.pro
12 | /packages/rpm/doxygen.spec
13 | *.idb
14 | *.pdb
15 | 
16 | /doxygen_docs
17 | /doxygen.tag
18 | /build*
19 | /qtools_docs
20 | /warnings.log
21 | 
22 | tags
23 | 
24 | .idea
25 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "evmc"]
2 | 	path = evmc
3 | 	url = https://github.com/ethereum/evmc
4 | 


--------------------------------------------------------------------------------
/.prettierrc.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | // "schema": "https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/prettierrc.json",
 3 | // "$schema": "http://json.schemastore.org/prettierrc",
 4 | module.exports = {
 5 |   arrowParens: 'always',
 6 |   bracketSpacing: true,
 7 |   endOfLine: 'lf',
 8 |   printWidth: 100,
 9 |   proseWrap: 'never',
10 |   singleQuote: true,
11 |   tabWidth: 2,
12 |   trailingComma: 'all',
13 |   quoteProps: 'as-needed',
14 |   semi: true,
15 |   overrides: [
16 |     {
17 |       files: '*.md',
18 |       options: {
19 |         parser: 'markdown',
20 |         printWidth: 120,
21 |         proseWrap: 'never',
22 |         tabWidth: 4,
23 |         useTabs: true,
24 |         singleQuote: false,
25 |         bracketSpacing: true,
26 |       },
27 |     },
28 |   ],
29 | };
30 | 


--------------------------------------------------------------------------------
/_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cat Building-EVM-LLVM.md Compiling-smart-contracts.md Emitting-Program-Metadata.md Example:-Compiling-using-existing-language-frontend.md Function-Layouts.md Future-Works.md Handling-EVM-specific-operations.md Home.md Intrinsic-Functions.md Language-Frontend-Integration.md Running-integrated-tests-in-EVM-environment.md Stack-and-Memory-management.md The-EVM-Calling-Conventions.md Types-and-type-conversions.md > OMNIBUS.md


--------------------------------------------------------------------------------
/_index.txt:
--------------------------------------------------------------------------------
 1 | Home.md
 2 | The-EVM-Calling-Conventions.md
 3 | Building-EVM-LLVM.md
 4 | Function-Layouts.md
 5 | Stack-and-Memory-management.md
 6 | Types-and-type-conversions.md
 7 | Compiling-smart-contracts.md
 8 | Emitting-Program-Metadata.md
 9 | Example:-Compiling-using-existing-language-frontend.md
10 | Handling-EVM-specific-operations.md
11 | Intrinsic-Functions.md
12 | Language-Frontend-Integration.md
13 | Running-integrated-tests-in-EVM-environment.md
14 | Future-Works.md


--------------------------------------------------------------------------------
/_index.yml:
--------------------------------------------------------------------------------
 1 | evm:
 2 |   Home.md
 3 |   The-EVM-Calling-Conventions.md
 4 |   Building-EVM-LLVM.md
 5 |   Function-Layouts.md
 6 |   Stack-and-Memory-management.md
 7 |   Types-and-type-conversions.md
 8 |   Compiling-smart-contracts.md
 9 |   Emitting-Program-Metadata.md
10 |   Example:-Compiling-using-existing-language-frontend.md
11 |   Handling-EVM-specific-operations.md
12 |   Intrinsic-Functions.md
13 |   Language-Frontend-Integration.md
14 |   Running-integrated-tests-in-EVM-environment.md
15 |   Future-Works.md


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 | <!doctype html>
2 | <html>
3 | <head>
4 |     <meta charset="utf-8">
5 |     <meta http-equiv="refresh" content="0; url=./ethereum.html"/>
6 | </head>
7 | </html>
8 | 


--------------------------------------------------------------------------------
/legacy/Building-EVM-LLVM.md:
--------------------------------------------------------------------------------
 1 | The project compiles like other LLVM projects. The target's name is `EVM`, but since it is not yet finalized, you have to specify `-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM` when you compile it.
 2 | 
 3 | In short, you can use the following to build the backend:
 4 | 
 5 | ```
 6 | git clone git@github.com:etclabscore/evm_llvm.git
 7 | cd evm_llvm
 8 | git checkout EVM
 9 | mkdir build && cd build
10 | cmake -DLLVM_TARGETS_TO_BUILD=EVM -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM ..
11 | make -j8
12 | ```
13 | 


--------------------------------------------------------------------------------
/legacy/Compiling-smart-contracts.md:
--------------------------------------------------------------------------------
 1 | ### The Contract constructor function
 2 | 
 3 | Because EVM's execution always start from the beginning of the code (`pc = 0`), there must be a way to handle more complicated contract behaviours. In EVM LLVM, we use a function to describe the function handling. It is called contract constructor function. To implement the function, developers are expected to respect the following contract constructor properties:
 4 | 
 5 | -   The constructor should be the first function in the generated LLVM IR.
 6 | -   The constructor should be named `solidity.main` or `main` (could change in the future). The backend recognizes these specific names and will generate different call codes.
 7 | -   The constructor should not take any arguments.
 8 | -   The constructor should initialize the function's `free memory pointer`, which is located at address `0x40`. The `free memory pointer` is like the usual frame pointer, used to calculate function frames and stack allocations. Because it is located at `0x40`, so you cannot initialize it to a smaller number.
 9 | 
10 | ### Skeleton example of a very small constructor function
11 | 
12 | Here is an illustration of the skeleton of a small smart contract:
13 | 
14 | ```
15 | declare i256 @llvm.evm.calldataload(i256)
16 | declare void @llvm.evm.return(i256, i256)
17 | declare void @llvm.evm.mstore(i256, i256)
18 | 
19 | define void @main() {
20 | entry:
21 |   call void @llvm.evm.mstore(i256 64, i256 128)
22 |   %0 = call i256 @llvm.evm.calldataload(i256 0)
23 |   %1 = call i256 @llvm.evm.calldataload(i256 32)
24 |   %2 = call i256 @add(i256 %0, i256 %1)
25 |   call void @llvm.evm.mstore(i256 0, i256 %2)
26 |   call void @llvm.evm.return(i256 0, i256 32)
27 |   unreachable
28 | }
29 | 
30 | define i256 @add(i256, i256) #0 {
31 |   %3 = alloca i256, align 4
32 |   %4 = alloca i256, align 4
33 |   store i256 %0, i256* %3, align 4
34 |   store i256 %1, i256* %4, align 4
35 |   %5 = load i256, i256* %3, align 4
36 |   %6 = load i256, i256* %4, align 4
37 |   %7 = add nsw i256 %5, %6
38 |   ret i256 %7
39 | }
40 | ```
41 | 
42 | **Usually, it is the frontend's responsibility to do the smart contract's plumbing, including the contract's constructor function. ** We need the language frontends to generate corresponding LLVM IR code.
43 | 
44 | This smart contract does the following things;
45 | 
46 | -   Initialize the `free memory pointer` to 128
47 | -   parse the first two 32-byte inputs
48 | -   call the `@add` function and supply it with the two parsed arguments
49 | -   In the function `@add`, we simply add the two arguments, and return it
50 | -   In the `@main` function, return the retrieved value using `llvm.evm.return` intrinsic.
51 | 
52 | ### Compiling the smart contract
53 | 
54 | Let's put the above smart contract code into a file named `test.ll`, and we use `llc` to generate EVM binary:
55 | 
56 | ```
57 | llc -mtriple=evm -filetype=obj test.ll -o test.o
58 | ```
59 | 
60 | ### Running the contract
61 | 
62 | A generated `.o` file is in binary format. To see its content in hex, try to use `xxd`, for example:
63 | 
64 | ```
65 | xxd -p -cols 65536 test.o
66 | ```
67 | 
68 | The `xxd` will emit a hex string representation of the binary format. `xxd` will try to break the line if it is too long. Here we specify `-cols 65536` to avoid linebreaking. After calling `xxd`, you should see some output such as:
69 | 
70 | ```
71 | 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056
72 | ```
73 | 
74 | That is what we need to execute using an EVM engine. Let's try to do it using Geth's EVM. Remember that we need to supply two input arguments, so the command line should be like:
75 | 
76 | ```
77 | evm --input 1234567890123456789012345678901234567890123456789012345678901234 --code 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056 run
78 | ```
79 | 
80 | `evm` will emit the result of the two added files:
81 | 
82 | ```
83 | 0x468acf08a2468acf08a2468acf08a2468acf08a2468acf08a2468acf08a24634
84 | ```
85 | 


--------------------------------------------------------------------------------
/legacy/Emitting-Program-Metadata.md:
--------------------------------------------------------------------------------
 1 | EVM LLVM provides a way to emit program's metadata for various of purposes. For examples, a symbol table that records the jump destinations can be emitted along with the generated binary.
 2 | 
 3 | Developers can use this utility to emit more program information.
 4 | 
 5 | ## Existing implementation
 6 | 
 7 | When compiling a contract, a file named `EVMMeta.txt` will be generated along with the binary code. The file contains the function symbol table in the compiled program, along with the offset of each function. The metadata file can be used for various purposes, such as debugging, manual linking, analysis, and so on.
 8 | 
 9 | To specify a custom metadata file name if you do not want to use the `EVMMeta.txt` filename, option `-evm_md_file` can be used.
10 | 
11 | # Limitation
12 | 
13 | Existing implementation of EVM metadata emitting is limited to `MachineCode` module/level, which means that if there are any transformations at a higher level such as in the IR level, it will not be shown in the result.
14 | 


--------------------------------------------------------------------------------
/legacy/Example:-Compiling-using-existing-language-frontend.md:
--------------------------------------------------------------------------------
 1 | Let's try to use a simple C file to test our compiler:
 2 | 
 3 | ```sh
 4 | cat <<EOF > test.c
 5 | unsigned x;
 6 | int abc(unsigned a, unsigned b, unsigned c) {
 7 |   if (c > 0) {
 8 |     return a + x;
 9 |   } else {
10 |     return a + b;
11 |   }
12 | }
13 | EOF
14 | ```
15 | 
16 | Prerequisite: You have to install `clang` and use it to generate LLVM IR first:
17 | 
18 | ```
19 | clang -S -emit-llvm test.c
20 | ```
21 | 
22 | This will generate a `test.ll` file which should be the LLVM IR equivalent of our `test.c` file. Then we can generate EVM binary or assembly from it. In order to use the backend to generate EVM assembly, you have to specify `-mtriple=evm` when calling `llc`. An example is as follows:
23 | 
24 | ```
25 | ./build/bin/llc -mtriple=evm test.ll -o test.s
26 | ```
27 | 
28 | The generated `test.s` file contains the compiled EVM assembly code. Note that the generated code is the function body itself. In order to generate a complete smart contract source code we need to use a smart contract creator function, which we will talk about it in another page.
29 | 
30 | Notice that you can also get the binary code of the function body by emitting an object file:
31 | 
32 | ```
33 | ./build/bin/llc -mtriple=evm -filetype=obj test.ll -o test.o
34 | ```
35 | 


--------------------------------------------------------------------------------
/legacy/Function-Layouts.md:
--------------------------------------------------------------------------------
 1 | #### Address layout
 2 | 
 3 | EVM bytecode has a flat structure. It does not have explicit function entries, nor symbol tables. All executions starts from address `0x00`.
 4 | 
 5 | #### Limitations
 6 | 
 7 | Notice that at this moment this backend is limited to generate correct code for a single compilation unit.
 8 | 
 9 | In order to link more than one compilation units, one shall inline existing compilation units in the frontend so that the frontend can generate correct `main` (the `function dispatcher` function) for the whole smart contract.
10 | 
11 | #### The function dispatcher (meta function)
12 | 
13 | The `function dispatcher` function (usually called `main` function in some contexts) is always placed at the beginning of the generated binary bytecode. The dispatcher is responsible for:
14 | 
15 | 1. parse the call data and find the called function address in the jump table using the hash value provided in the call data.
16 | 2. extract the call arguments, and push them on to stack.
17 | 3. call the function address specified in the jump table.
18 | 
19 | ```
20 |  Start of address
21 | +---------------->  +-------------------------+
22 |                     | Function dispatcher     |
23 |                     |   Jump Table            |
24 |                     |    (Func1,              |
25 |                     |     Func2,              |
26 |                     |     Func3)              |
27 |                     +-------------------------+
28 |                     |                         |
29 |                     |      Func1              |
30 |                     |                         |
31 |                     +-------------------------+
32 |                     |                         |
33 |                     |      Func2              |
34 |                     |                         |
35 |                     +-------------------------+
36 |                     |                         |
37 |                     |      Func3              |
38 |                     |                         |
39 |                     +-------------------------+
40 | ```
41 | 
42 | #### Moving the function dispatcher to front of the LLVM IR function list
43 | 
44 | At this moment it is up to the frontend developer to move the LLVM IR function to the beginning of the function list. You can do something like this when creating function dispatcher:
45 | 
46 | ```
47 | // Let's say you have a dispatcher function named "dispatcher"
48 | 
49 | // You should include "llvm/IR/SymbolTableListTraits.h" here
50 | using FunctionListType = SymbolTableList<Function>;
51 | FunctionListType &FuncList = TheModule->getFunctionList();
52 | FuncList.remove(dispatcher);
53 | FuncList.insert(FuncList.begin(), dispatcher);
54 | ```
55 | 


--------------------------------------------------------------------------------
/legacy/Future-Works.md:
--------------------------------------------------------------------------------
 1 | # Functionalities
 2 | 
 3 | ## Experimental support of landing pad
 4 | 
 5 | Landingpad is used to support exception handling.
 6 | 
 7 | ## Experimental support of simulating heap allocations
 8 | 
 9 | EVM does not have a heap space, so we cannot use heap allocations. We might be able to do around it.
10 | 
11 | ## Constant table support
12 | 
13 | Having a constant table in the smart contract could potentially save some code size if the elements in the table are reused.
14 | 
15 | ## Metadata export
16 | 
17 | We could export more metadata for debugging, analyzing, and so on.
18 | 
19 | # Optimizations
20 | 
21 | ## Support more than 16 local variables
22 | 
23 | EVM can only support retrieval of an element up to depth of 16 from the stack top using instructions `SWAP1` to `SWAP16` -- resulting a limitation in Solidity compiler that can only support 16 local variables. At this moment, EVM LLVM will also face a `stack too deep` issue if the variables in a single basic block is more than 16.
24 | 
25 | But in LLVM we can totally work around this issue, and do a much better job. With dataflow analysis and register allocation algorithm, we can have near-optimal variable assignment (on the stack or on memory stack) in linear time.
26 | 
27 | ## Instruction scheduling
28 | 
29 | Arranging the order of the opcodes in EVM binary is critical to its performance. Instructions has to be arranged so that we have minimal stack manipulation over head (the opcodes that does not do actual computation, but rather, reorder stack operands' relative position to the top of stack).
30 | 
31 | EVM LLVM backend is designed in such a way that a scheduler before register allocation can be implemented to reduce the stack operation overhead.
32 | 
33 | ## Improve EVM calling conventions
34 | 
35 | When calling a subroutine, The return address is the first argument and resides at top of stack. This is non-optimal because the return address will definitely not be used until the very end of the subroutine, and taking up a visible slot is expensive. We can re-arrange the return address to be at the end of argument so it will not have to be reached until we want to return from subroutine.
36 | 
37 | ## Re-materialization of constants
38 | 
39 | usual small constants should not stay in stack --- they should be rematerialized whenever it is needed.
40 | 


--------------------------------------------------------------------------------
/legacy/Handling-EVM-specific-operations.md:
--------------------------------------------------------------------------------
1 | Ethereum Virtual Machine specific operations, such as accessing storage, retrieve block information, etc, are through EVM specific instructions. Solidity language automatically generates necessary EVM-specific instructions under the hood so as to hide the details from Solidity developers. However, as a compiler backend, the input to EVM LLVM is LLVM IR format, which is unable to hold any language specific semantics that is higher than the C language level. So it is up to compiler frontends to lower language specific semantics onto LLVM IR level.
2 | 
3 | Intrinsic functions are used to represent EVM-specific semantics in the input LLVM IR. Intrinsic functions are usually higher level representations of architecture-specific instructions. In EVM LLVM, we allow users to leverage EVM-specific instructions that are used to interact with the chain or storage by exposing those EVM instructions in the form of intrinsic functions.
4 | 
5 | -   This [page](https://github.com/etclabscore/evm_llvm/wiki/Intrinsic-Functions) lists the intrinsic functions that frontend developers can use.
6 | -   Intrinsics are defined [here](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/include/llvm/IR/IntrinsicsEVM.td#L14).
7 | -   And here are examples on [how to leverage intrinsics](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/test/CodeGen/EVM/intrinsics.ll#L1)
8 | 


--------------------------------------------------------------------------------
/legacy/Home.md:
--------------------------------------------------------------------------------
1 | ![evm-llvm-green-dragon](https://user-images.githubusercontent.com/450283/63640209-85cb3c00-c66b-11e9-9610-0c339ae66ac7.png)
2 | 
3 | Welcome to the `evm_llvm` wiki! This project aims at bringing LLVM infrastructure to the EVM world where smart contracts are widely deployed.
4 | 
5 | EVM LLVM is an EVM architecture backend for LLVM. With EVM LLVM you can generate EVM binary code with LLVM-based compilers. The backend does not assume a language frontend, so you should be able to plug in a new smart contract language frontend to generate EVM binary.
6 | 
7 | The goal of this project is to make it able to for various of platforms, tools and smart contract programming language projects be able to quickly adapt a high-performance EVM backend.
8 | 


--------------------------------------------------------------------------------
/legacy/Language-Frontend-Integration.md:
--------------------------------------------------------------------------------
 1 | ## EVM target specific changes
 2 | 
 3 | ### Frontend is expected to emit 256bit values LLVM IR
 4 | 
 5 | The EVM architecture is the only 256-bit machine out there in the market, and so far it have not yet received support from LLVM community. We added 256-bit and 160-bit support in the LLVM IR level.
 6 | 
 7 | In order to utilize 256-bit and 160-bit operands, developers are expected to emit `i256` and `i160` data types in their IR code generation. Include the `evm_llvm`'s header files in `include/llvm` folders so that these two pre-defined data types can be properly generated.
 8 | 
 9 | ### Frontend needs to generate compatible LLVM IR
10 | 
11 | Notice that development of this backend is based on LLVM 10, which is released in March 2020. We also have a LLVM 8 branch just to support those who creates their frontends in LLVM 8.
12 | 
13 | We could do back porting to other lower versions such as LLVM 9 at the request of developers for better stability or compatibility. Please let me know if you have such needs.
14 | 


--------------------------------------------------------------------------------
/legacy/Running-integrated-tests-in-EVM-environment.md:
--------------------------------------------------------------------------------
 1 | EVM is different than other execution platform in that it is on blockchain. The result of the execution of a smart contract will be dependent on the state of the blockchain as well. So, we have to integrate EVM execution environment (in this early stage, `geth`) into our tests.
 2 | 
 3 | ## Constructor
 4 | 
 5 | Unit tests will only focus on small test functions. But you cannot execute a function independently on blockchain, we need to have a contract constructor and dispatcher as the first function in the file. The reason is that EVM will always start its execution from address `0x00` -- where the contract header / constructor /dispatcher resides. The header then tries to set up the contract -- allocating memory/storage or parsing incoming parameters, et cetera.
 6 | 
 7 | Here is the commentated constructor code we use for handling unit tests:
 8 | 
 9 | ```
10 | define void @main() {
11 | entry:
12 |   %0 = call i256 @llvm.evm.calldataload(i256 0) ; extract first 32-byte argument
13 |   %1 = call i256 @llvm.evm.calldataload(i256 32); extract second 32-byte argument
14 |   %2 = call i256 @test(i256 %0, i256 %1)        ;  execute the unit test function
15 |   call void @llvm.evm.mstore(i256 0, i256 %2)   ; store the returned value to memory address `0x00`
16 |   call void @llvm.evm.return(i256 0, i256 32)   ; call "return" to return the value returned by @test
17 |   unreachable
18 | }
19 | ```
20 | 
21 | Notice that the `@test` function takes 2 parameters, so we will have two calls to `@llvm.evm.calldataload`.
22 | 
23 | The unit test is compiled using `llc` with options: `-mtriple=evm -filetype=obj`. Then the code is executed using `geth`'s `evm` command.
24 | 
25 | ## Testing utilities
26 | 
27 | A Python script is used to handle the testing, file `evm_llvm/tools/evm-test/evm_test.py` is the script we created to test functionalities of the llvm backend. Here are what it does:
28 | 
29 | -   call evm_llvm backend to compile an LLVM IR file (`.ll` file) into object file (`.o`) file. The file should contain the function we are going to verify along with a smart contract constructor header which is used to handle input arguments. The function should be at the beginning of the IR file (the first function).
30 | -   extract the contract opcodes from the `.o` file and prepare the input arguments (by padded each arguments to be 32 bytes long and concatenate everything into a long string).
31 | -   Run the executable binary using geth's `evm`, get the result from the print, And compare the result with expected value.
32 | 
33 | ## How to run testings
34 | 
35 | 1. Install Python3
36 | 2. Run `evm_llvm/tools/evm-test/evm_test.py` then you should see the results.
37 | 
38 | ## How to add new tests
39 | 
40 | Please take a look at the `evm_llvm/tools/evm-test/evm_testsuit.py` file, it organizes tests by categorizing them into different `OrderedList`. Each element of the list contains the following information:
41 | 
42 | -   the name of the test
43 | -   the array of input arguments
44 | -   the path of the unit test source code file (in LLVM IR form)
45 | -   the expected result value
46 | 
47 | When adding new tests, you should:
48 | 
49 | -   put your test files into `evm_llvm/test/CodeGen/EVM` folder.
50 | -   add the test file path and expected results to the `evm_testsuit.py` file. (We might change it when the file gets too large).
51 | 
52 | ## TODO lists
53 | 
54 | -   add blockchain state related tests
55 | -   add re-entrance tests (which are also related to changes of blockchain states)
56 | 
57 | Please help improve the test utility!
58 | 


--------------------------------------------------------------------------------
/legacy/Stack-and-Memory-management.md:
--------------------------------------------------------------------------------
 1 | ## Variables
 2 | 
 3 | In the context of stack machine, a variable refers to an operand that will be consumed by an opcode. In EVM LLVM, variables are treated as virtual registers, until they are _stackfied_ (convert register-based code to stack-based code) right before lowering to machine code.
 4 | 
 5 | In LLVM's internal SSA representation mode, it is fairly easy to compute a register's live range (the range from its assignment to its last use). Variables are treated differently with regard to its live range. Local variables (variables that its liveness only extends within a single basic block) will live entirely on the stack, while non-local variables (variables that live across basic blocks) will be spilled to a memory slot allocated by the compiler.
 6 | 
 7 | #### Frame Objects
 8 | 
 9 | Frame objects will be allocated either on stack or on memory space. Since each of the elements are 256bits, we have to ensure that frame objects are 256bits in length as well. Frame objects with smaller length is not supported.
10 | 
11 | It is possible for a frame object to be allocated on to memory space, if we are consuming too much of stack space. The stack allocation pass will try to find an efficient way to decide which goes to the memory and which stays in stack.
12 | 
13 | ### Frame Pointer (or Free Memory Pointer)
14 | 
15 | [Stack pointers and frame pointers](https://en.wikipedia.org/wiki/Call_stack#Stack_and_frame_pointers) are essential to support subroutine calls. Frame pointer is used to record the structure of stack frames. Because we do not have registers in EVM, we will have to store stack frame pointer in memory locations. Usually, we put stack frame pointer at location `0x40`, and we follow Solidity compiler's convention to initialize it to value `128`. So the stack frame of the first function starts at that location. The value of frame pointer changes as the contract calls a subroutine or exits from a subroutine. Whenever we need to have access to frame pointer, we will retrieve its value from that specific location.
16 | 
17 | ### Memory stack
18 | 
19 | Part of the memory is used as a stack for function calls and variable spills. The structure is described as follows:
20 | 
21 | -   The stack goes from lower address to higher address, as different from usual hardware implementations.
22 | -   The frame is arranged into 3 parts:
23 |     -   **frame object locations**. Each frame object has its own frame slot. Frame object `x` will have a 32 byte space starting from `$fp + (x * 32)`, where `$fp` is the frame pointer, and is stored at location `0x40`.
24 |     -   **spilled variables**. Variable that are unable to be fully stackified will reside on the memory stack. In codegen, each spilled variable will have an index, and each index refers to a memory slot. A spilled variable that bears index `y`, will reside at location `$fp + (number_of_frame_objects * 32) + (y * 32)`.
25 |     -   **subroutine context**. Like a regular register machine, the memory stack is used to store subroutine context so as to support function calls. Two slots are allocated at the end of current frame for a) the existing frame pointer, and b) return `PC` address.
26 | 
27 | Here is an example showing a stack frame right before we jump into a subroutine:
28 | 
29 | ```
30 |   Stack top                                    Higher address
31 |  +-----------> +----------------------------+ <--------------+
32 |                |                            |
33 |                |     Return Address         |
34 |                |                            |
35 |                +----------------------------+
36 |                |                            |
37 |                |     Function argument      |
38 |    new FP      |                            |
39 |  +-----------> +----------------------------+
40 |                |                            |
41 |                |    Saved frame pointer     |
42 |                |     (Start of frame)       |
43 |                +----------------------------+
44 |                |                            |
45 |                |     Stack Object 1         |
46 |                |                            |
47 |                +----------------------------+
48 |                |                            |
49 |                |     Frame Object 2         |
50 |                |                            |
51 |                +----------------------------+
52 |                |                            |
53 |                |     Frame Object 1         |
54 | Start of frame |                            |   Lower address
55 | +------------> +----------------------------+ <----------------+
56 | ```
57 | 


--------------------------------------------------------------------------------
/legacy/The-EVM-Calling-Conventions.md:
--------------------------------------------------------------------------------
 1 | The EVM architecture is a simplistic structure, but it has everything we need to do usual program computations.
 2 | 
 3 | ## Types of calls
 4 | 
 5 | There are two types of calls in an EVM smart contract:
 6 | 
 7 | 1. **Internal calls**. Internal calls are referred to function calls within a smart contract. An example is that we have two defined function `A` and `B`, and somewhere in `A` we save our context and change our execution flow to the beginning of `B`.
 8 | 2. **External calls**. Or cross-contract calls. `A` and `B` are defined in different deployed EVM contract and `A` calls `B` in its context.
 9 | 
10 | ## Internal call conventions
11 | 
12 | Up to ETH 1.5, there is no link and jump EVM opcode for easy handling of subroutines(even though some [discussions](https://github.com/ethereum/EIPs/issues/2315) are on-going). So we have to manually handle subroutine calls. Here are the calling conventions for an internal calls:
13 | 
14 | -   current subroutine's frame pointer is saved at stack, at memory location `$fp - 32` where `$fp` is the subroutine call's frame pointer.
15 | -   arguments are all pushed on stack, along with the return address. Argument with smaller index number occupies a stack slot on top of another argument with a larger index number. For example, when we want to do a function call: `func abc(x, y, z)`, here is the arrangement of the arguments:
16 | 
17 | ```
18 |                +-----------+
19 |                |Return Addr|
20 |                +-----------+
21 |                |     X     |
22 |                +-----------+
23 |                |     Y     |
24 |                +-----------+
25 |  Current FP    |     Z     |
26 | +------------> +-----------+
27 |                |  Old FP   |
28 |                +-----------+
29 |                |   .....   |
30 |                +-----------+
31 | ```
32 | 
33 | _Note: Putting the return address on top of the stack is because it is easier to compute the location, but this will result in more stack manipulation overhead for the subroutine calls. We will improve this design in a later version._
34 | 
35 | -   A subroutine's return value is stored on stack top. _Note: currently we only support one return value. In the future we will improve it by supporting multiple return values._
36 | 
37 | ## Procedure of a subroutine call
38 | 
39 | To illustrate the procedure for a subroutine call, we need to do the following to save the context of current function execution:
40 | 
41 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`.
42 | 2. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`.
43 | 3. push all subroutine arguments in order on to stack.
44 | 4. push return address onto stack. (At this moment, the return address is `PC + 6`).
45 | 5. push the beginning address of subroutine and jump.
46 | 
47 | Right before we return from a subroutine, the stack should be empty and the return address should be at the top of the stack. When returning from a subroutine call, we should do the following:
48 | 
49 | 1. push return value on to top of stack.
50 | 2. Do a `swap1` to move the return address to top of stack
51 | 3. jump to return address and resume the execution in caller function. If the function returns nothing, simply jump to return address.
52 | 
53 | After jumping back to caller, we have to resume the execution:
54 | 
55 | 1. restore caller's frame pointer by storing the value at location `$fp - 32` to `0x40`.
56 | 
57 | ## [EIP2315](https://eips.ethereum.org/EIPS/eip-2315) Support: Subroutine calls
58 | 
59 | The support of subroutines inside EVM enables compiler to generate better performance code. To be more specific: With EIP235, it is up to EVM to maintain the stack:
60 | 
61 | 1. the return address stack is only accessible to VM
62 | 2. the stack is invisible to users and compilers
63 | 
64 | A better calling convention is made with the support of EIP2315:
65 | 
66 | ### To generate a call procedure
67 | 
68 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`.
69 | 2. save existing frame pointer at memory location `$fp + %frame_size - 32`. The frame pointer is maintained at `0x40`.
70 | 3. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`.
71 | 4. push all subroutine arguments in order on to stack.
72 | 5. push the beginning address of subroutine and call `JUMPSUB`
73 | 
74 | ### To generate the return
75 | 
76 | 1. push return value on to top of stack.
77 | 2. call `RETURNSUB` to resume execution of caller function.
78 | 
79 | ## External calls
80 | 
81 | External calls are implemented using intrinsic calls.
82 | 


--------------------------------------------------------------------------------
/legacy/Types-and-type-conversions.md:
--------------------------------------------------------------------------------
 1 | ## Newly supported Types
 2 | 
 3 | So far the open-source LLVM trunk has not yet implemented bit size support larger than 128bits. We have implemented 256bit supports in our own backend, and is considering contributing them back to main trunk.
 4 | 
 5 | Users are allowed to use `i256` and `i160` data types in their generated LLVM IR, which represent 256bit integer types and 160bit integer types respectively.
 6 | 
 7 | Even though all EVM data types are 256bit in length internally. We are still able to offer support to smaller data types. However, users are encouraged to use 256bit data types internally because it is free.
 8 | 
 9 | ## Contract Input Argument Types -- The Solidity convention
10 | 
11 | Contract arguments are passed to EVM via the call data field. The function dispatcher is responsible to extract input arguments from call data.
12 | 
13 | In Solidity's convention, the arguments in call data are padded to 32 bytes long if its data type's length is shorter. So, in order to maintain the convention, the function dispatcher needs to truncate the input arguments to the defined size in the function that is going to be called.
14 | 
15 | This is undoubtedly inefficient, so users are discouraged to use smaller data types.
16 | 


--------------------------------------------------------------------------------
/legacy/block_construction/src/ProtoBlock/README.md:
--------------------------------------------------------------------------------
  1 | # Block Construction: Proto Block
  2 | 
  3 | A proto-block is a block that has been executed but has not been sealed. The header is missing the nonce and mixhash, and can still accept extra data.
  4 | 
  5 | Proto-blocks are produced when transactions are executed, and can be turned into full valid blocks.
  6 | 
  7 | A **block header** that has not finished being sealed.
  8 | 
  9 | **toHeader**: Seals the header into a block header
 10 | 
 11 | **proto-block body**: is the representation of the intermediate form of a block body before being sealed.
 12 | 
 13 | ```kotlin
 14 | /* source: https://github.com/apache/incubator-tuweni/blob/main/eth-blockprocessor/src/main/kotlin/org/apache/tuweni/blockprocessor/ProtoBlock.kt */
 15 | 
 16 | /**
 17 |  * A block header that has not finished being sealed.
 18 |  */
 19 | data class SealableHeader(
 20 |   val parentHash: Hash,
 21 |   val stateRoot: Hash,
 22 |   val transactionsRoot: Hash,
 23 |   val receiptsRoot: Hash,
 24 |   val logsBloom: Bytes,
 25 |   val number: UInt256,
 26 |   val gasLimit: Gas,
 27 |   val gasUsed: Gas,
 28 | ) {
 29 | 
 30 |   /**
 31 |    * Seals the header into a block header
 32 |    */
 33 |   fun toHeader(
 34 |     ommersHash: Hash,
 35 |     coinbase: Address,
 36 |     difficulty: UInt256,
 37 |     timestamp: Instant,
 38 |     extraData: Bytes,
 39 |     mixHash: Hash,
 40 |     nonce: UInt64,
 41 |   ): BlockHeader {
 42 |     return BlockHeader(
 43 |       parentHash,
 44 |       ommersHash,
 45 |       coinbase,
 46 |       stateRoot,
 47 |       transactionsRoot,
 48 |       receiptsRoot,
 49 |       logsBloom,
 50 |       difficulty,
 51 |       number,
 52 |       gasLimit,
 53 |       gasUsed,
 54 |       timestamp,
 55 |       extraData,
 56 |       mixHash,
 57 |       nonce
 58 |     )
 59 |   }
 60 | }
 61 | 
 62 | /**
 63 |  * A proto-block body is the representation of the intermediate form of a block body before being sealed.
 64 |  */
 65 | data class ProtoBlockBody(val transactions: List<Transaction>) {
 66 |   /**
 67 |    * Transforms the proto-block body into a valid block body by adding ommers.
 68 |    */
 69 |   fun toBlockBody(ommers: List<BlockHeader>): BlockBody {
 70 |     return BlockBody(transactions, ommers)
 71 |   }
 72 | }
 73 | 
 74 | /**
 75 |  * A proto-block is a block that has been executed but has not been sealed.
 76 |  * The header is missing the nonce and mixhash, and can still accept extra data.
 77 |  *
 78 |  * Proto-blocks are produced when transactions are executed, and can be turned into full valid blocks.
 79 |  */
 80 | class ProtoBlock(
 81 |   val header: SealableHeader,
 82 |   val body: ProtoBlockBody,
 83 |   val transactionReceipts: List<TransactionReceipt>,
 84 |   val stateChanges: TransientStateRepository
 85 | ) {
 86 | 
 87 |   fun toBlock(
 88 |     ommers: List<BlockHeader>,
 89 |     coinbase: Address,
 90 |     difficulty: UInt256,
 91 |     timestamp: Instant,
 92 |     extraData: Bytes,
 93 |     mixHash: Hash,
 94 |     nonce: UInt64,
 95 |   ): Block {
 96 |     val ommersHash = Hash.hash(RLP.encodeList { writer -> ommers.forEach { writer.writeValue(it.hash) } })
 97 |     return Block(
 98 |       header.toHeader(ommersHash, coinbase, difficulty, timestamp, extraData, mixHash, nonce),
 99 |       body.toBlockBody(ommers)
100 |     )
101 |   }
102 | }
103 | ```
104 | 


--------------------------------------------------------------------------------
/legacy/erigion/Choice-of-storage-engine.md:
--------------------------------------------------------------------------------
 1 | We often get asked why we opted for our current storage engine, [MDBX](https://github.com/erthink/libmdbx).
 2 | 
 3 | # Why not LevelDB / RocksDB?
 4 | 
 5 | Answer is pretty simple: no MVCC.
 6 | 
 7 | MVCC allows us to "stitch together" more complex data objects from more normalised form that is stored in the DB, without loss of consistency (if you do it all in a single read-only transaction). This is used quite a lot in the RPC daemon and simplifies the code a lot. You do not need to explicitly link all the data in the application-level code, you just trust that the database will give you consistent snapshot.
 8 | 
 9 | Other than that, Level and Rocks are not ACID. This makes them extremely brittle and prone to corruption on application crash or power failure. Given that node sync from genesis is not cheap or instantaneous, this is a non-starter for us.
10 | 
11 | # Why not BadgerDB?
12 | 
13 | BadgerDB, unlike Level or Rocks, does provide transactions. However, there is a next issue we run into: Badger is based on [Log-structured merge-tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree).
14 | 
15 | Badger (and all LSM-based DBs) has background compaction. It's good for some projects and bad for others.
16 | 
17 | In Erigon we eliminated most of concurrency (goroutines) for many reasons (too many things happening at the same time). We found that modern SSD (and NVMe) are still pretty bad with concurrent writes - they are way better than HDD, but sequential read is still order of magnitude faster than random reads. Meaning 1 thread touching disk vs 2 threads touching disk - can show 10x degradation.
18 | 
19 | _How does this apply to us?_
20 | 
21 | **We removed parallel writes and moved to control all disk touches**. Now we don't really care about "how much WPS database can handle" because now we can fit all writes into 1 write transaction. Doesn't matter if it happens once per 10 minutes or once per 1 second - as long as it's not thousands of parallel WPS. In LMDB 1 write transaction is equal to 1 fsync syscall - all writes during transaction are happening in RAM.
22 | 
23 | LSM databases (Badger, LevelDB) are slower on average for random reads, and that read times are more volatile. B+tree is faster and more predictable for random reads.
24 | 
25 | # Why not BoltDB?
26 | 
27 | Unlike Badger, Bolt is a Go library, providing storage engine based on B+tree. It originally fit well, and we had BoltDB backend available until September 2020.
28 | 
29 | Bolt lacks certain advanced features that we found useful, like LMDB's sorted duplicates (DupSort). It allows to save space without resorting to compression by storing repetitive keys only once.
30 | 
31 | Bolt is not actively maintained anymore, [although there is an active fork by etcd team](https://github.com/etcd-io/bbolt). And finally, it is a Go library, precluding usage and binary compatibility with [Silkworm](https://github.com/torquem-ch/silkworm) and [Akula](https://github.com/rust-ethereum/akula).
32 | 
33 | For all these reasons we switched to LMDB.
34 | 
35 | # Why not LMDB?
36 | 
37 | [See this post.](https://github.com/ledgerwatch/erigon/wiki/Criteria-for-transitioning-from-Alpha-to-Beta#switch-from-lmdb-to-mdbx)
38 | 


--------------------------------------------------------------------------------
/legacy/erigion/Consensus-Engine-separation.md:
--------------------------------------------------------------------------------
 1 | ## Validation of headers
 2 | 
 3 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-1.png)
 4 | 
 5 | ## Validation of uncles (EtHash)
 6 | 
 7 | To the best of our knowledge, EtHash is the only algorithm where this functionality is required. But something similar may come up with DAG-based algorithms, where headers have more than just a parent, but also alternative ancestors. In that case, a lot of the interface may need to be generalised accordingly.
 8 | 
 9 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-4.png)
10 | 
11 | ## Use of smart contract state for Consensus Engine
12 | 
13 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-2.png)
14 | 
15 | In algorithms like AuRa (Authority Round), verification of headers requires access to the state of smart contracts (where, for example, set of validators is stored), as well as emitted events (requests for inducting new validators). In order to accommodate this, we would introduce another message type from Consensus to Core.
16 | 
17 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-6.png)
18 | 
19 | ## Solution for Staged sync
20 | 
21 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-3.png)
22 | 
23 | ## Fork choice rule
24 | 
25 | For choice rule can be thought of a partial order relationship among the set of possible headers. Being partial order, fork choice rule is:
26 | 
27 | 1. reflexive or irreflexive, depending on whether non-strict or strict definition is required. header `A` is either better than itself (non-strict, `<=`), or not better than itself (strict, `<`)
28 | 2. anti-symmetric. if `A` better than `B`, then `B` is worse than `A`)
29 | 3. transitive. if `A` better than `B` and `B` is better than `
30 | 
31 | Core is asking the Consensus Engine to infer the relation between given headers, and perform topological sort.
32 | 
33 | ![](https://github.com/ledgerwatch/erigon/blob/devel/docs/Consensus-Engine-Page-5.png)
34 | 
35 | ## Finalisation Code
36 | 
37 | When the verification results in `Valid` message from Consensus Engine to the Core, an extra field `Finalisation Code` is attached. This code is expresses in an extension of EVM, which is currently called TEVM, and it needs to be run at the end of processing of the corresponding block (where there is access to the state etc.). For example, for assigning mining reward, the following very generic code can be finalisation code can be sent:
38 | 
39 | ```
40 | PUSH32 <block reward>
41 | COINBASE
42 | ADDBALANCE
43 | ```
44 | 
45 | Note that EVM does not have `ADDBALANCE` opcode, this would be part of TEVM extension, and this particular opcode would be run in a privileged mode only (meaning that only system parts like Consensus Engine and Transpiler may emit this code, but no user code with such code will be able to run). Similarly, EVM lacks introspection of uncles attached to the block. With extension allowing for that, the code above can be made to also add uncle rewards. In other consensus algorithms, the Finalisation code can be made to issue POS rewards, do slashing, etc., and it may either be very specific for every header or block, or parametrised via using extension opcodes in TEVM.
46 | 
47 | ## Notes from Dragan
48 | 
49 | Hello, for consensus engine in OE there is main abstraction that all Engines implement: https://github.com/openethereum/openethereum/blob/32d8b5487a6fc12c8295ebf9833c74857f5e7354/crates/ethcore/src/engines/mod.rs#L304 And few months ago I wanted to see where and who is using engine and while doing that came up with this document: https://docs.google.com/spreadsheets/d/1gzkq_m7rHZKP7tPDyJiBykDjPgV8BCgiEwSZ35Zx3ho/edit?usp=sharing maybe it can be useful for AuRa
50 | 
51 | ## Implementation
52 | 
53 | 1. There are a few AuRa-based chain: Kovan, Sokol, xDAI (POS DAO)
54 | 2. We take Sokol as the first example, because Kovan has WASM contracts.
55 | 3. Re-add `eth/65` to `--download.v2` (messages with RequestID)
56 | 4. Connect to Sokol network from TG with `--download.v2`
57 | 5. Add `seal` into the header structure to parse header. Once it is done, we will see the signatures.
58 | 6. Implement finality rule using signatures.
59 | 7. ....
60 | 


--------------------------------------------------------------------------------
/legacy/erigion/Criteria-for-transitioning-from-Alpha-to-Beta.md:
--------------------------------------------------------------------------------
 1 | We do not define a specific deadline for transitioning from **Alpha** to **Beta** stage. Instead, we define the criteria that should help us decide when turbo-geth is ready for **Beta**. Here is the list of things that need implemented for these criteria to be met.
 2 | 
 3 | ## Mining
 4 | 
 5 | The challenge of implementing efficient mining support in turbo-geth is the fact that there is only one "canonical" state at any given time. Mining, however, requires production of "speculative" blocks, and then "speculative" state, in order to compute the state root hash for the header. Current idea for the "speculative" state is an in-memory cache that can be "cloned". By "cloning", we mean creating a lazy-shallow copy of the cache such that the changes to the cloned state do not affect the canonical state. However, it turns out that the current data model of `HashedState` and `IntermediateHashes` is not well suited for the maintenance of such cache. The work is on-going to correct the data model and implement the clone-able state cache, and subsequently, the mining functionality.
 6 | 
 7 | ## Simplified downloading of block headers and block bodies
 8 | 
 9 | Currently downloading block headers and block bodies are stage 1 and stage 3 of staged sync, respectively. These stages look and feel quite different from the other stages, because they were created on the foundations of the header/body/receipt/state downloading code inherited from go-ethereum. This code does much more than turbo-geth's staged sync requires, and can (and should) be replaced with a simplified version. A working proof-of-concept of this simplified version has been created, and now is the time for tests and documentation.
10 | 
11 | ## Consensus Engine component
12 | 
13 | One may have heard about the concept of "pluggable consensus", meaning that it should be easy to switch from Proof Of Work to Proof of Authority, and also from one variant of POW to another one, and from one variant of POA to another one. In practice, pluggable consensus was some implementation with interfaces, but still always running in the same process, and often deeply intertwined with the rest of the code. We have taken steps to design the interface that would allow running consensus engine in the separate process. With such interface, it should be possible to run it in the same process, but the existence of the interface makes it much more straightforward to keep consensus engine properly separated. We have proof-of-concept implementation that works for EtHash POW and Clique POA, now it is time for integration, tests, and documentation.
14 | 
15 | ## Switch from LMDB to MDBX
16 | 
17 | Erigon started off with the BoltDB database backend, then adding the support for BadgerDB, and then eventually migrating exclusively to LMDB. At some point we have encountered stability issues that were caused by our usage of LMDB that was not envisaged by the creators. We have since then been looking at a well-supported derivative of LMDB, called MDBX, and hoping to use their stability improvement, and potentially working more together in the future. The integration of MDBX is done, now it is time for more testing and documentation.
18 | 
19 | Benefits of transitioning from LMDB to MDBX:
20 | 
21 | 1. Database file growth "geometry" works properly. This is important especially on Windows. In LMDB, one has to specify the memory map size once in advance (currently we use 2Tb by default), and if the database file grows over that limit, one has to restart the process. On Windows, setting memory map size to 2Tb makes database file 2Tb large on the onset, which is not very convenient. With MDBX, memory map size is increased in 2Gb increments. This means occasional remapping, but results in a better user experience.
22 | 2. MDBX has more strict checks on concurrent use of the transaction handles, as well as overlap read and write transaction within the same thread of execution. This allowed us to find some non-obvious bugs and make behaviour more predictable.
23 | 3. Over the period of more than 5 years (since it split from LMDB), MDBX accumulated a lot of safety fixes and heisenbug fixes that are still present in LMDB to the best of our knowledge. Some of them we have discovered during our testing, and MDBX maintainer took them seriously and worked on the fixes promptly.
24 | 4. When it comes to databases that constantly modify data, they generate quite a lot of reclaimable space (also known as "freelist" in LMDB terminology). We had to patch LMDB to fix most serious drawbacks when working with reclaimable space (analysis here: https://github.com/ledgerwatch/erigon/wiki/LMDB-freelist-illustrated-guide). MDBX takes special care of efficient handling of reclaimable space and so far no patches were required.
25 | 5. According to our tests, MDBX performs slightly better on our workloads.
26 | 6. MDBX exposes more internal telemetry - more metrics of what happening inside DB. And we have them in Grafana - to make better decisions on app design. For example, after complete transition to MDBX (removing LMDB support) we will implement "commit half-full transactions" strategy to avoid spill/unspill disk touches. This will simplify our code further without affecting performance.
27 | 7. MDBX has support for "Exclusive open" mode - we using it for DB migrations, to prevent any other reader from accessing the database while DB migration is in progress.
28 | 


--------------------------------------------------------------------------------
/legacy/erigion/EVM-with-abstract-interpretation-and-backtracking.md:
--------------------------------------------------------------------------------
  1 | # Goal
  2 | 
  3 | Describe what abstract interpretation and backtracking means, and how to implement it efficiently
  4 | 
  5 | # EVM resources
  6 | 
  7 | By resources here we understand things that programmer of EVM may use to store and manipulate data, and to perform computations. Some of the resources EVM are accessible directly via opcodes, whereas others - indirectly via side-effects of certain operations.
  8 | 
  9 | ## Execution frames (substates)
 10 | 
 11 | When EVM is activated (which is usually) by sending a transaction to a deployed smart contract, with some input, the first execution frame is created. It gets its program counter, gas counter, "read only" flag (whether any mutating operations are allowed), input data in memory, and output data region in memory. Execution frame is mostly segregated from other execution frames, but there are few ways they can communicate:
 12 | 
 13 | 1. Via input data
 14 | 2. Via gas counter
 15 | 3. Via storage writes (only for execution frames of the same contract)
 16 | 
 17 | ## Stacks
 18 | 
 19 | Each execution frame has its own stack, and it is only accessible from that one execution frame.
 20 | 
 21 | ## Memories
 22 | 
 23 | Each execution frame has its own memory. Memory expands in chunks of 32 bytes, when used, but is accessible with the granularity of a single byte.
 24 | 
 25 | ## State caches
 26 | 
 27 | Whenever an item is read from the state, it potentially modifies state cache, which has an impact on the gas cost of subsequent operations with the same state item. Whenever a state item is created or updated, it also modifies state cache in a different way, which affects the cost of subsequent update operations for the same state item. State caches can be explicitly modelled as EVM resources for better specification and less error-prone implementation, but also for the purpose of implementing the backtracking.
 28 | 
 29 | ## Access lists
 30 | 
 31 | Access lists are related to the state caches in a way that they pre-initialise read caches in a certain way.
 32 | 
 33 | ## Self-destruct lists
 34 | 
 35 | Which accounts will be self-destructed and removed from the state at the end of a transaction. Self-destruct lists needs to be explicitly modelled for better specification and less error-prone implementation, but also for the purpose of implementing the backtracking.
 36 | 
 37 | ## Block context
 38 | 
 39 | Timestamp, block hash, gas limit, base fee
 40 | 
 41 | ## Transaction context
 42 | 
 43 | ## Extra context
 44 | 
 45 | Recent block hashes
 46 | 
 47 | # Extended domain for stack elements
 48 | 
 49 | Abstract interpretation (as opposed to "concrete" interpretation) allows us to replace some concrete values on the stack (and then perhaps in memory or state caches) with `unknown` values. This effectively extends the domain of possible values from numbers `0`...`2^256-1` to also include `unknown` or potentially multiple types of unknown. For more rigorous approach, it may make sense to introduce at least two types of unknowns, one meaning that the value "does not exist", and another that the value is "unknown". The reason why we need "does not exist" is to perform the unification of stacks when resolving loops, for example. In order to describe the abstract interpretation a bit more formally, we need to define what a "stack" is (and then perhaps also what other resources are). The stack is the sequence of objects from the domain {`0`...`2^256-1`, `NE`, `NK`}, where `NE` means does not exist, and `NK` means not known. In order words, stack can be thought of a tuple of certain "maximum" size, for example, 100. So in our model, all possible stacks will be of fixed size (let's say 100), and to model a smaller stack, we fill the rest of elements with `NE` objects. For example, the stack
 50 | 
 51 | ```
 52 | 4
 53 | 5
 54 | 6
 55 | NK
 56 | 7
 57 | ```
 58 | 
 59 | will in fact be represented as
 60 | 
 61 | ```
 62 | 4
 63 | 5
 64 | 6
 65 | NK
 66 | 7
 67 | NE
 68 | NE
 69 | ...
 70 | NE
 71 | ```
 72 | 
 73 | where the total length of the stack is 100. Why is it important that all stacks are of the same size? Because then we can define some operations on the domain of all possible stacks and express them in terms of operations on the individual elements.
 74 | 
 75 | Lets create an example of a simple loop:
 76 | 
 77 | ```
 78 | 0: PUSH1 10 # initial value of the loop counter
 79 | 2: JUMPDEST # this is where iteration of the loop returns
 80 | 3: PUSH1 1 # to perform counter--
 81 | 5: SUB
 82 | 6: DUP1 # make sure we don't destroy the only value of the counter by ISZERO
 83 | 7: ISZERO # top of the stack is 1 if counter == 0
 84 | 8: ISZERO # top of the stack is 1 is counter > 0
 85 | 9: PUSH1 2
 86 | 11: SWAP1
 87 | 12: JUMPI # jump if still counter > 0
 88 | ```
 89 | 
 90 | If we perform abstract interpretation of this program, this is how we could go. We start with an empty stack (full of `NE`s). As we go along, we create a mapping `PC (program counter) => stack`, which will help us understand whether we returned to the place we've been before.
 91 | 
 92 | ```
 93 | NE
 94 | NE
 95 | ...
 96 | NE
 97 | ```
 98 | 
 99 | ### 0: PUSH1 10
100 | 
101 | we shift the stack downwards and replace first element with 10 (the last `NE` gets discarded)
102 | 
103 | ```
104 | 10
105 | NE
106 | ...
107 | NE
108 | ```
109 | 
110 | ### 2: JUMPDEST
111 | 
112 | Nothing happens here, it is just a "goto label"
113 | 
114 | ### 3: PUSH1 1
115 | 
116 | ```
117 | 1
118 | 10
119 | NE
120 | ...
121 | NE
122 | ```
123 | 
124 | ### 5: SUB
125 | 
126 | ```
127 | 9
128 | NE
129 | ...
130 | NE
131 | ```
132 | 
133 | ### 6: DUP1
134 | 
135 | ```
136 | 9
137 | 9
138 | NE
139 | ...
140 | NE
141 | ```
142 | 
143 | ### 7: ISZERO
144 | 
145 | ```
146 | 0
147 | 9
148 | NE
149 | ...
150 | NE
151 | ```
152 | 
153 | ### 8: ISZERO
154 | 
155 | ```
156 | 1
157 | 9
158 | NE
159 | ...
160 | NE
161 | ```
162 | 


--------------------------------------------------------------------------------
/legacy/erigion/Home.md:
--------------------------------------------------------------------------------
1 | Welcome to the turbo-geth wiki!
2 | 


--------------------------------------------------------------------------------
/legacy/erigion/Using-Postman-to-test-RPC.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | 
 3 | In this article, we provide instructions for using [Postman](https://www.postman.com/) to test Erigon's RPC interfaces. We begin with instructions for how to get started, we then explain a few thoughts on using Postman, and finally, we list a few other things we can do with the collection file in the future.
 4 | 
 5 | ## Getting Started
 6 | 
 7 | ### Install Postman and Erigon
 8 | 
 9 | In these instructions, we assume you have Postman installed and are logged in. You may have to create a user account on their website in order to download the software. You do not need to create or join a team.
10 | 
11 | We also assume that you have a copy of the Erigon source code in a folder called `$ERIGON`.
12 | 
13 | ### Import the RPC Testing Collection
14 | 
15 | Once you have Postman running:
16 | 
17 | -   Click on the `Import` button at the top left of the screen. This will open the Import Dialog.
18 | -   Click on the `File` tab and then the `Upload Files` button in the middle of the screen.
19 | -   Navigate to the folder `$ERIGON/cmd/rpcdaemon/postman`.
20 | -   Select the file `RPC_Testing.json` and confirm the import.
21 | -   Click on the `Collections` subtab just below the main menu.
22 | 
23 | You should now have an opened collection called `RPC_Testing`. If something doesn't work, please let us know by creating an issue.
24 | 
25 | ### Create Global and Environment Variables
26 | 
27 | Postman allows the user to specify custom `variables`, which can be used, for example, to run the same test collection against an API at multiple different endpoints. We will use `variables` for exactly this reason.
28 | 
29 | In order for this to work, we need to create both `global` and `environmental` variables. We do that next.
30 | 
31 | #### Globals
32 | 
33 | Near the top right of the screen is a small icon that looks like an eyeball and is labeled `Environment quick look`. Click on that icon and then `Edit` under the **Globals** section. You should be in the `Manage Environment` dialog.
34 | 
35 | Add two variables (you may add more later):
36 | 
37 | | VARIABLE   | INITIAL VALUE                        |
38 | | ---------- | ------------------------------------ |
39 | | ERIGON     | http://localhost:8545                |
40 | | NETHERMIND | http://archive02.archivenode.io:8545 |
41 | 
42 | Click on `Persist All` and then `Save`. Close the `Manage Environment` dialog.
43 | 
44 | #### Environments
45 | 
46 | Now we need to create a testing environment. Do this by clicking on the eyeball icon again. This time, click on the `Add` link next to the **Environment** section. Call your environment `Erigon Testing` and add this variable:
47 | 
48 | | VARIABLE | INITIAL VALUE |
49 | | -------- | ------------- |
50 | | HOST     | {{ERIGON}}    |
51 | 
52 | `Persist All` and click on `Add` to save the environment. Close the dialog to return to the main screen.
53 | 
54 | You should be ready to test. If not, please post an issue.
55 | 
56 | #### Testing Other Endpoints
57 | 
58 | Optionally, you may create a second environment (`Nethermind Testing`) and set the `HOST` variable to `{{NETHERMIND}}`. This will allow you to test other endpoints. We leave that as an exercise.
59 | 
60 | ### Running Tests
61 | 
62 | You are now ready to run the tests. To do that:
63 | 
64 | -   Start your Erigon node.
65 | -   Start your Erigon RPC daemon. (If you're testing all endpoints, start with all namespaces enabled `build/bin/rpcdaemon --private.api.addr=localhost:9090 --http.api=eth,debug,net,web3,trace,db,shh,tg`.)
66 | -   Click on the `Runner` button at the top left of the Postman screen. This will open a new window called `Collection Runner`.
67 | -   Select the `RPC_Testing` collection.
68 | -   Select an environment (`Erigon Testing` for example).
69 | -   Press `Run RPC_Testing`
70 | 
71 | This should run all the currently enabled tests. Note that you may run individual tests directly from the Postman screen.
72 | 
73 | See the notes below for more information.
74 | 
75 | ## Discussion
76 | 
77 | We think Postman is a good choice to create, edit, test, and document Erigon's RPC. The file created by Postman, `RPC_Testing.json`, is a full specification of the API including example usage, test cases, and text that may be used to generate documentation using various tools such as Swagger. Additionally, Postman allows one to create a automated monitors that watch your API. And finally, it works with your CI (continuous integration) with a tool call Newman (sp?).
78 | 
79 | ## Notes:
80 | 
81 | -   The RPC_Testing file contains tests that are disabled by default. You may enable them by adding a **Global** variable called `TEST_NOT_IMPLEMENTED` and/or `TEST_DEPRECATED` and setting their value to `true`.
82 | -   Many of the tests hard code both the body of the test and the expected results. Eventually, we'd like to use parameterized test data files instead. This will allow us to run multiple different tests against the same API endpoints.
83 | -   The tests run against the Ethereum main net and expect an Ethereum archive node to work (for example, some of the tests query historical account balances). Future version could be customized for non-archive nodes.
84 | 
85 | ## Other Possible Uses for the Collection
86 | 
87 | -   Test against other RPC endpoints (including other nodes types)
88 | -   Generate help documentation
89 | -   Verify RPC interfaces
90 | -   Use in CI (continuous integration) pipeline
91 | -   Generate RPC APIs for other languages (such as C++)
92 | 


--------------------------------------------------------------------------------
/legacy/evm-illustrated/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Takenobu Tani
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/legacy/evm-illustrated/README.md:
--------------------------------------------------------------------------------
 1 | # Ethereum EVM illustrated
 2 | 
 3 | This is an illustrated document about the EVM(Ethereum Virtual Machine).
 4 | 
 5 | Here is: [Ethereum EVM illustrated](http://takenobu-hs.github.io/downloads/ethereum_evm_illustrated.pdf) (PDF).
 6 | 
 7 | ## Contents
 8 | 
 9 | 1 Introduction
10 | 
11 | -   Blockchain
12 | -   World state
13 | -   Account
14 | -   Transaction
15 | -   Message
16 | -   Decentralised database
17 | -   Atomicity and order
18 | 
19 | 2 Virtual machine
20 | 
21 | -   Ethereum virtual machine (EVM)
22 | -   Message call
23 | -   Exception
24 | -   Gas and fee
25 | -   Input and output
26 | -   Byte order
27 | -   Instruction set
28 | -   Miscellaneous
29 | 
30 | Appendix A : Implementation
31 | 
32 | -   Source code in Geth
33 | -   EVM developer utility
34 | -   Solidity ABI
35 | 
36 | Appendix B : User interface
37 | 
38 | -   Web3 API
39 | -   Geth, Mist, Solc, Remix, Truffle, ...
40 | 
41 | References
42 | 


--------------------------------------------------------------------------------
/legacy/evm-illustrated/changelog.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 0.1.1 _Mar 2018_
 4 | 
 5 | -   Modify exception page temporarily on page 59
 6 | 
 7 |     -   thanks to ubuntaire and smarx at Reddit
 8 | 
 9 | -   Add reference "[E9] ethereum/wiki Subtleties" on page 113
10 | 
11 | ## 0.1.0 _Mar 2018_
12 | 
13 | -   First release
14 | 


--------------------------------------------------------------------------------
/legacy/evm-illustrated/ethereum_evm_illustrated.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/evm-illustrated/ethereum_evm_illustrated.pdf


--------------------------------------------------------------------------------
/legacy/evm-illustrated/src/ethereum_evm_illustrated.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/evm-illustrated/src/ethereum_evm_illustrated.pptx


--------------------------------------------------------------------------------
/legacy/evm-modules/README.md:
--------------------------------------------------------------------------------
 1 | # Core Paper Project of EVM
 2 | 
 3 | The Core Paper Project of EVM aims at providing a modular and general purpose specification for Ethereum Virtual Machine. Ethereum Virtual Machine, or EVM, is a widely used stack-based virtual machine and binary instruction format.
 4 | 
 5 | EVM is initially designed for Ethereum and Ethereum Classic, with VM structures specific to those blockchains. However, it's being adopted in a wide range of other projects, such as [Parity Substrate](https://github.com/paritytech/substrate/pull/3927). Those projects have vastly different requirements compared with Ethereum and Ethereum Classic, and as a result, they would benefit from a standalone specification process.
 6 | 
 7 | We design this specification to be modular, from a basic layer caller EVM Core, which has minimal assumptions about the environment. Modules are then provided on top of EVM Core, which makes additional assumptions about the environment. Many layers, including EVM Core, does not contain the gasometer, which means it's suitable to be used in general-purpose environments and is much easier to be implemented.
 8 | 
 9 | ## Philosophy
10 | 
11 | The current EIP and ECIP process basically composes of "changelogs". We define, as informal specifications, about what is changed when the EIP is applied. This works well for simple changes such as gas cost modification and opcode addition, because the change is only at a single point and assumed not to affect the rest of the system.
12 | 
13 | However, totally relying on changelog format has its expressiveness limit. For pressing issues on Ethereum we're facing nowadays, many structual and potentially complex changes of the EVM are required. When writing them under EIP "changelog" format, it's both hard for authors to express themselves, and for readers to understand the specification. This has led to confusions and implementation consensus issues in the past. What's more, some of the previously-thought single point changes turned out to affect a larger part of the EVM, such as EIP-1283 and EIP-1884, relying on changelog format solely made it harder for readers to review those effects.
14 | 
15 | The Core Paper Project of EVM is an attempt to address those issues. Instead of one-step "changelog" process as in EIP and ECIP, here feature upgrades are defined under a two-step process:
16 | 
17 | -   **Refactoring**: Any new feature upgrades is identified as a "module change". We first refactor the _whole EVM_ specification to get a _functionally equivalent_ specification.
18 | -   **Module change**: We then add the module change, and write the "changelog" simply as the actual module change.
19 | 
20 | As an example, to add new EVM features that require additional validation step in the beginning, we first refactor the whole EVM specification to have a no-op validation step, which is functionallly equivalent to what we have now. After that, the new feature can simply be added as an additional module. This process is much more clear compared with the changelog process.
21 | 
22 | At the same time, we hope the modular design and specification allow reusibility outside of the context of Ethereum and Ethereum Classic, and can encourage better standardization, for EVM features that are not designed for Ethereum or Ethereum Classic mainnet.
23 | 
24 | ## Modules
25 | 
26 | ### EVM Core
27 | 
28 | EVM Core defines the base layer of execution. The VM has access to the following information:
29 | 
30 | -   **Data**: a bytearray defining the input of the VM.
31 | -   **Code**: a bytearray defining the code being executed.
32 | -   **Program Counter**: an integer, pointing to the position of the next instruction being executed.
33 | -   **Jump Validity Map**: a boolean list the same size as the code bytearray. It is generated in the beginning of the program execution, and sets all valid `JUMPDEST` position to true.
34 | -   **Memory**: A linear memory of bytes, of given limit.
35 | -   **Stack**: A stack, containing values of 256-bit.
36 | 
37 | Valid instructions of EVM Core are:
38 | 
39 | -   **Stop and Arithmetic**: `STOP`, `ADD`, `MUL`, `SUB`, `DIV`, `SDIV`, `MOD`, `SMOD`, `ADDMOD`, `MULMOD`, `EXP`, `SIGNEXTEND`.
40 | -   **Comparison and Bitwise Logic**: `LT`, `GT`, `SLT`, `SGT`, `EQ`, `ISZERO`, `AND`, `OR`, `XOR`, `NOT`, `BYTE`, `SHL`, `SHR`, `SAR`.
41 | -   **Code and Data Access**: `CALLDATALOAD`, `CALLDATASIZE`, `CALLDATACOPY`, `CODESIZE`, `CODECOPY`.
42 | -   **Stack, Memory and Flow Control**: `POP`, `PUSHn`, `DUPn`, `SWAPn`, `MLOAD`, `MSTORE`, `MSTORE8`, `JUMP`, `JUMPI`, `PC`, `MSIZE`, `JUMPDEST`, `RETURN`, `REVERT`, `INVALID`.
43 | 
44 | ### EVM ROM
45 | 
46 | The EVM ROM layer can be built on top of EVM Core to provide access to a range of read-only memory. We define the following structure:
47 | 
48 | -   **Read-only Memory**: A range of read-only memory that can be accessed by specific opcodes.
49 | 
50 | We redefine the following opcodes to be access of read-only memory. Here we define read-only memory to have index every 32 bytes.
51 | 
52 | -   `ADDRESS` (`0x30`): `READROM 0x0` Push index `0` of read-only memory onto stack.
53 | -   `ORIGIN` (`0x32`): `READROM 0x1` Push index `1` of read-only memory onto stack.
54 | -   `CALLER` (`0x33`): `READROM 0x3` Push index `2` of read-only memory onto stack.
55 | -   `CALLVALUE` (`0x34`): `READROM 0x4` Push index `3` of read-only memory onto stack.
56 | -   `GASPRICE` (`0x3a`): `READROM 0x5` Push index `4` of read-only memory onto stack.
57 | -   `COINBASE` (`0x41`): `READROM 0x6` Push index `5` of read-only memory onto stack.
58 | -   `TIMESTAMP` (`0x42`): `READROM 0x7` Push index `6` of read-only memory onto stack.
59 | -   `NUMBER` (`0x43`): `READROM 0x8` Push index `7` of read-only memory onto stack.
60 | -   `DIFFICULTY` (`0x44`): `READROM 0x9` Push index `8` of read-only memory onto stack.
61 | -   `GASLIMIT` (`0x45`): `READROM 0xa` Push index `9` of read-only memory onto stack.
62 | -   `CHAINID` (`0x46`): `READROM 0xb` Push index `10` of read-only memory onto stack.
63 | -   `SELFBALANCE` (`0x47`): `READROM 0xc` Push index `11` of read-only memory onto stack.
64 | 
65 | ### EVM Storage
66 | 
67 | The EVM Storage layer provides opcodes for access of a persistent storage:
68 | 
69 | -   **Storage**: External storage that can be read or write by the contract.
70 | 
71 | Opcodes `SLOAD` and `SSTORE` are defined in this layer.
72 | 
73 | ### EVM Log
74 | 
75 | The EVM Log layer provides opcodes for logging:
76 | 
77 | -   **Log**: Append-only data structure with structure `{ topics: Vec<H256>, data: Vec<u8> }`, where `topics` can at most be length 4.
78 | 
79 | Opcodes `LOGn` are defined in this layer.
80 | 
81 | ### EVM Ethereum
82 | 
83 | We define all Ethereum specific opcodes in this layer. This includes:
84 | 
85 | -   **Sha3**: `SHA3`
86 | -   **Environmental Information**: `BALANCE`, `EXTCODESIZE`, `EXTCODECOPY`
87 | -   **Block Information**: `BLOCKHASH`
88 | -   **Gasometer**: `GAS`
89 | -   **System Operations**: `CREATE`, `CREATE2`, `CALL`, `CALLCODE`, `DELEGATECALL`, `STATICCALL`
90 | 
91 | ## License
92 | 
93 | This work is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/).
94 | 


--------------------------------------------------------------------------------
/legacy/evm-modules/gasometer/README.md:
--------------------------------------------------------------------------------
 1 | # Gasometer
 2 | 
 3 | This defines the gas cost calculation module for EVM.
 4 | 
 5 | ## Imports
 6 | 
 7 | The gasometer has access to the following information. Note that each opcode cost module may require access to additional information.
 8 | 
 9 | -   **Memory effective length**: The effective length of memory, defined in EVM Core.
10 | 
11 | ## Constants
12 | 
13 | -   `G_MEMORY`: Used to calculate memory gas from memory effective length.
14 | -   **Opcode Cost Modules**: With gasometer in place, each valid opcode is assigned with an opcode cost module. This constant is a mapping of opcode to its opcode cost module.
15 | 
16 | ## Data Structures
17 | 
18 | The gasometer maintains:
19 | 
20 | -   **Status**: Can be two values -- either "okay" or "error". Error indicates that an out of gas error has already happened.
21 | -   **Gas limit**: The current gas limit.
22 | -   **Used gas counter**: Unsigned counter for used gas.
23 | -   **Refund gas counter**: Signed counter for gas refund.
24 | 
25 | ## Methods
26 | 
27 | ### `gasometer.record_used(gas)`
28 | 
29 | Increase the used gas counter by the amount of `gas`. If the increment leads to the condition that used gas counter is greater than gas limit, set used gas counter to gas limit, and set status to error.
30 | 
31 | Returns okay if the status ended up being okay, otherwise return error.
32 | 
33 | ### `gasometer.record_refund(refund)`
34 | 
35 | Increase or decrease the refund gas counter, based on `refund`'s sign.
36 | 
37 | ### `gasometer.total_used_gas()`
38 | 
39 | Calculate the total used gas of a gasometer.
40 | 
41 | Calculate memory gas, with the formular `G_MEMORY * a + a * a // 512`, where `a` is the memory effective length. Return memory gas plus used gas counter.
42 | 
43 | ### `gasometer.gas_left()`
44 | 
45 | Return _gas limit_ minus `gasometer.total_used_gas()`.
46 | 
47 | ### `gasometer.effective_used_gas()`
48 | 
49 | Calculate the effective used gas for a transaction, based on total used gas and gas limit.
50 | 
51 | Calculate the effective refund gas. If refund gas counter is negative, the effective refund gas is 0. Otherwise, cap the refund gas at half of the total used gas.
52 | 
53 | Return total used gas minus effective refund gas.
54 | 
55 | ### `gasometer.record_opcode(opcode)`
56 | 
57 | Use the corresponding opcode cost module of the given opcode to calculate the gas cost and gas refund. Call the result `gas` with `gasometer.record_used`. Call the result `refund` with `gasometer.record_refund`.
58 | 


--------------------------------------------------------------------------------
/legacy/evm-modules/gasometer/sstore/net.md:
--------------------------------------------------------------------------------
 1 | # Net SSTORE Gas Cost Module
 2 | 
 3 | This defines the gas cost calculation module for SSTORE with net gas metering.
 4 | 
 5 | ## Imports
 6 | 
 7 | This gas cost module has access to the following information.
 8 | 
 9 | -   **Stack**: The EVM stack.
10 | -   **Storage**: EVM storage of the current operating address.
11 | -   **Original storage**: EVM storage state at the beginning of the current transaction.
12 | -   **Gasometer gas left**: The current remaining gas value of the gasometer.
13 | 
14 | ## Constants
15 | 
16 | -   `G_SSTORE_SET`: Gas cost for setting a storage value from zero to non-zero.
17 | -   `G_SSTORE_RESET`: Gas cost for setting a storage value otherwise.
18 | -   `G_SLOAD`: Gas cost for SLOAD operation and SSTORE when a value is unchanged.
19 | -   `R_SSTORE_CLEAR`: Refund for setting a storage value from non-zero to zero.
20 | -   `G_STIPEND`: Stipend paid for CALL opcode with value transfer.
21 | 
22 | ## Calculations
23 | 
24 | Interpret stack item at index `0` as the index, and stack item at index `1` as the _new value_. Fetch from _storage_ at _index_ as the _current value_. Fetch from _original storage_ at _index_ as the _original value_.
25 | 
26 | ### Gas Cost
27 | 
28 | -   If _gasometer gas left_ is less than or equal to `G_STIPEND`, return `G_STIPEND + 1`.
29 | -   If _current value_ equals _new value_, return `G_SLOAD`.
30 | -   If _current value_ does not equal _new_value_
31 |     -   If _original value_ equals _current value_
32 |         -   If _original value_ is zero, return `G_SSTORE_SET`.
33 |         -   Otherwise, return `G_SSTORE_RESET`.
34 |     -   Otherwise, return `SLOAD_GAS`.
35 | 
36 | ### Gas Refund
37 | 
38 | -   If _original value_ equals _current value_, and _new value_ is zero, return `R_SSTORE_CLEAR`.
39 | -   Otherwise, create a local variable `refund`.
40 |     -   If _original value_ is not zero
41 |         -   If _current value_ is zero, remove `R_SSTORE_CLEAR` from `refund`.
42 |         -   Otherwise, if _new value_ is zero, add `R_SSTORE_CLEAR` to `refund`.
43 |     -   If _original value_ equals _new value_
44 |         -   If _original value_ is zero, add `G_SSTORE_SET - G_SLOAD` to `refund`.
45 |         -   Otherwise, add `GSSTORE_RESET - G_SLOAD` to `refund`.
46 |     -   Return `refund`.
47 | 


--------------------------------------------------------------------------------
/legacy/evm-modules/gasometer/sstore/simple.md:
--------------------------------------------------------------------------------
 1 | # Simple SSTORE Gas Cost Module
 2 | 
 3 | This defines the gas cost calculation module for SSTORE without net gas metering.
 4 | 
 5 | ## Imports
 6 | 
 7 | This gas cost module has access to the following information.
 8 | 
 9 | -   **Stack**: The EVM stack.
10 | -   **Storage**: EVM storage of the current operating address.
11 | 
12 | ## Constants
13 | 
14 | -   `G_SSTORE_SET`: Gas cost for setting a storage value from zero to non-zero.
15 | -   `G_SSTORE_RESET`: Gas cost for setting a storage value otherwise.
16 | -   `R_SSTORE_CLEAR`: Refund for setting a storage value from non-zero to zero.
17 | 
18 | ## Calculations
19 | 
20 | Interpret stack item at index `0` as the index, and stack item at index `1` as the _new value_. Fetch from _storage_ at _index_ as the _current value_.
21 | 
22 | ### Gas Cost
23 | 
24 | If _current value_ is zero, and _new value_ is not zero, return `G_SSTORE_SET`. Otherwise, return `G_SSTORE_RESET`.
25 | 
26 | ### Refund
27 | 
28 | If _current value_ is not zero, and _new value_ is zero, return `R_SSTORE_CLEAR`. Otherwise, return `0`.
29 | 


--------------------------------------------------------------------------------
/legacy/files/Generating_stack_machine_code_using_LLVM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/files/Generating_stack_machine_code_using_LLVM.pdf


--------------------------------------------------------------------------------
/legacy/files/LLVM_talk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/files/LLVM_talk.pdf


--------------------------------------------------------------------------------
/legacy/gas-estimator/.dockerignore:
--------------------------------------------------------------------------------
1 | src/instrumentation_measurement/openethereum/ethcore/res/wasm-tests
2 | src/instrumentation_measurement/openethereum/ethcore/res/ethereum/tests
3 | src/instrumentation_measurement/openethereum/target
4 | 
5 | src/instrumentation_measurement/evmone/build
6 | 
7 | /.dockerignore
8 | /Dockerfile*
9 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | src/.RData
3 | 
4 | src/.Rhistory
5 | 
6 | __pycache__
7 | 
8 | *.nb.html
9 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "src/instrumentation_measurement/openethereum"]
2 | 	path = src/instrumentation_measurement/openethereum
3 | 	url = ../openethereum.git
4 | [submodule "src/instrumentation_measurement/evmone"]
5 | 	path = src/instrumentation_measurement/evmone
6 | 	url = ../evmone.git
7 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/Dockerfile.evmone:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-alpine
 2 | 
 3 | RUN apk update && apk add g++ cmake git make
 4 | 
 5 | WORKDIR /srv/app/
 6 | 
 7 | # base for python
 8 | COPY ./src/program_generator/requirements.txt /srv/app/src/program_generator/requirements.txt
 9 | RUN pip install -r src/program_generator/requirements.txt
10 | 
11 | # base for evmone
12 | WORKDIR /srv/
13 | RUN git clone --recursive https://github.com/imapp-pl/evmone.git temp/evmone_builder
14 | RUN mkdir -p /srv/temp/evmone_builder/build
15 | WORKDIR /srv/temp/evmone_builder/build
16 | 
17 | RUN cmake .. -DEVMONE_TESTING=ON \
18 |   && cmake --build . --
19 | 
20 | # get our files for evmone
21 | # NOTE: we don't do `RUN git submodule update --init`. You should do this in the host
22 | COPY ./src/instrumentation_measurement/evmone /srv/app/src/instrumentation_measurement/evmone
23 | 
24 | # refresh the evmone build from `master` to our branch
25 | RUN mv /srv/temp/evmone_builder/build /srv/app/src/instrumentation_measurement/evmone
26 | WORKDIR /srv/app/src/instrumentation_measurement/evmone/build
27 | RUN rm /srv/app/src/instrumentation_measurement/evmone/build/CMakeCache.txt
28 | 
29 | RUN cmake .. -DEVMONE_TESTING=ON \
30 |   && cmake --build . --
31 | 
32 | # get the remainder of our files
33 | COPY ./src/ /srv/app/src/
34 | 
35 | WORKDIR /srv/app/
36 | 
37 | # check correct host configuration
38 | RUN chmod a+x /srv/app/src/check_clocksource.sh
39 | RUN /srv/app/src/check_clocksource.sh
40 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/Dockerfile.geth:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | ARG MEASUREMENT_MODE=all
 4 | 
 5 | WORKDIR /srv/app/
 6 | 
 7 | # base for python
 8 | COPY ./src/program_generator/requirements.txt /srv/app/src/program_generator/requirements.txt
 9 | RUN pip install -r src/program_generator/requirements.txt
10 | 
11 | # base for golang
12 | RUN wget --no-verbose --show-progress --progress=bar:force:noscroll \
13 |   https://golang.org/dl/go1.17.1.linux-amd64.tar.gz
14 | 
15 | RUN tar -C /usr/local -xzf ./go1.17.1.linux-amd64.tar.gz
16 | RUN rm go1.17.1.linux-amd64.tar.gz
17 | 
18 | ENV PATH=$PATH:/usr/local/go/bin
19 | ENV GOPATH=/srv/app/.go
20 | ENV GO111MODULE=off
21 | ENV GOBIN=/srv/app/.go/bin
22 | 
23 | # fixed golang dependencies
24 | RUN go get github.com/ethereum/go-ethereum
25 | 
26 | # get our files
27 | WORKDIR /srv/app/.go/src/github.com/ethereum/go-ethereum
28 | RUN git remote add imapp-pl https://github.com/imapp-pl/go-ethereum.git
29 | RUN git fetch imapp-pl wallclock-${MEASUREMENT_MODE} 
30 | RUN git checkout wallclock-${MEASUREMENT_MODE}
31 | COPY ./src/ /srv/app/src/
32 | 
33 | WORKDIR /srv/app/src/instrumentation_measurement
34 | RUN go get ./geth/...
35 | 
36 | WORKDIR /srv/app/
37 | 
38 | # check correct host configuration
39 | RUN chmod a+x ./src/check_clocksource.sh
40 | RUN ./src/check_clocksource.sh
41 | 
42 | # our runtime config
43 | ENV GOGC=off
44 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/Dockerfile.openethereum:
--------------------------------------------------------------------------------
 1 | FROM rust:1.55.0
 2 | 
 3 | # RUN apk update && apk add rust cargo yasm cmake
 4 | RUN apt update
 5 | RUN apt install -y yasm cmake python3-pip
 6 | RUN alias python=python3
 7 | 
 8 | WORKDIR /srv/app/
 9 | 
10 | # base for python
11 | COPY ./src/program_generator/requirements.txt /srv/app/src/program_generator/requirements.txt
12 | RUN pip install -r src/program_generator/requirements.txt
13 | 
14 | # get our files for openethereum
15 | # NOTE: we don't do `RUN git submodule update --init`. You should do this in the host
16 | COPY ./src/instrumentation_measurement/openethereum /srv/app/src/instrumentation_measurement/openethereum
17 | WORKDIR /srv/app/src/instrumentation_measurement/openethereum/evmbin/
18 | 
19 | RUN cargo build --release
20 | 
21 | # get the remainder of our files
22 | COPY ./src/ /srv/app/src/
23 | 
24 | WORKDIR /srv/app/
25 | 
26 | # check correct host configuration
27 | RUN chmod a+x /srv/app/src/check_clocksource.sh
28 | RUN /srv/app/src/check_clocksource.sh
29 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/Makefile:
--------------------------------------------------------------------------------
 1 | MEASUREMENT_MODE ?= all
 2 | IMAGE_VERSION ?= latest
 3 | 
 4 | build: build-geth build-evmone build-openethereum
 5 | 	
 6 | build-geth:
 7 | 	docker build -f Dockerfile.geth \
 8 | 		--tag  "gas-cost-estimator/geth_${MEASUREMENT_MODE}:${IMAGE_VERSION}" \
 9 | 		--build-arg  MEASUREMENT_MODE=${MEASUREMENT_MODE} \
10 | 		.
11 | 
12 | build-evmone:
13 | 	docker build -f Dockerfile.evmone --tag  "gas-cost-estimator/evmone_${MEASUREMENT_MODE}:${IMAGE_VERSION}" .
14 | 
15 | build-openethereum:
16 | 	docker build -f Dockerfile.openethereum --tag  "gas-cost-estimator/openethereum_${MEASUREMENT_MODE}:${IMAGE_VERSION}" .
17 | 
18 | measure-geth:
19 | 	docker run --rm \
20 | 		--privileged \
21 | 		--security-opt seccomp:unconfined \
22 | 		-it gas-cost-estimator/geth_${MEASUREMENT_MODE}:${IMAGE_VERSION} \
23 | 		sh -c "cd src && python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode ${MEASUREMENT_MODE} --sampleSize=5 --nSamples=1"
24 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/README.md:
--------------------------------------------------------------------------------
1 | gas-cost-estimator
2 | 
3 | [Stage I report](https://github.com/imapp-pl/gas-cost-estimator/blob/master/docs/report_stage_i.md)
4 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/execution_comparison.md:
--------------------------------------------------------------------------------
 1 | ### Execution Comparison
 2 | 
 3 | This document will analyze and compare the exact flow of execution of the interpreter loop, and how is its computational cost measured.
 4 | 
 5 | The goal is to know, whether the measurements, as compared between various EVM implementations and various OPCODEs, are collected in a "fair" fashion. "Fair" in this context mean not only (or not as much as) fairness between implementations, but rather _fair relative treatment_ of all OPCODEs in all implementations.
 6 | 
 7 | For now, we focus on the individual OPCODE measurements, which we used in preliminary exploration. **TODO (optional)** repeat this for whole-program measurements, if we do them.
 8 | 
 9 | ### Notes
10 | 
11 | 1. `geth` incorporates a lot of setup which gets measured along with the _first_ instruction. Later this is worked around for programs, where only a single instruction is interesting, by prepending a throw-away `PUSH1`, wherever the interesting instructions would be the first one. `evmone` and `OpenEthereum` don't have this.
12 |     - this should be fixed by moving the `CaptureStart` in a forked `go-ethereum` implementation. It should be placed deeper down the call stack, just before entering the first interpreter loop iteration
13 |     - **EDIT**: this has been solved differently: we modify the interpreter code
14 | 2. In order to ensure standardization and portability, easy and succinct rules of how to measure should be devised, so that such comparisons aren't necessary in the future. See [Measurement standard ruleset](measurement_standard_ruleset.md):
15 | 3. `evmone` does a preprocessing step `analysis.cpp`, which slightly skews measurements - some of the effort to do some OPCODEs will be "put" under "intrinsic OPCODE `BEGINBLOCK`" executing at the end of each code block. `geth` and `OpenEthereum` don't have this.
16 |     - `BEGINBLOCK` (manifesting as `JUMPDEST` in OPCODE tracing) needs special attention in larger programs. We must come up with a way to handle it, since other implementations will not have this "intrinsic instruction". **TODO**
17 | 4. `evmone` perceivably measures _only_ the execution of the OPCODE (as opposed to `geth`), but this is not the case. In `evmone` all logic done in the main interpreter loop in `geth` is done deeper down the call stack.
18 | 5. ~`OpenEthereum` excludes the `while` loop condition used in the interpreter loop (`geth` and `evmone` include it)~
19 | 
20 | -   **EDIT**: done for `evmone` [here](https://github.com/imapp-pl/evmone/pull/2)
21 | -   **EDIT**: done for `openethereum` [here](...)
22 | 
23 | 5. `geth` and `evmone` measurements are written to a pre-allocated array on every instruction, ~while `OpenEthereum` write the CSV data straight to `stdout`, this might be slightly unfair~
24 | 
25 | -   **EDIT**: done for `openethereum` [here](...)
26 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/instrumentation_measurement/evmone.md:
--------------------------------------------------------------------------------
 1 | ## Evmone
 2 | 
 3 | ### Installation and running
 4 | 
 5 | 1. Building
 6 | 
 7 |     ```
 8 |     mkdir build
 9 |     git submodule update --init
10 |     cd build
11 |     cmake .. -DEVMONE_TESTING=ON
12 |     cmake --build . -- -j $(nproc)
13 |     ```
14 | 
15 |     Changes related to the Gas Cost Estimator are in branch `wallclock` in both `evmone` and `evmc` git submodules.
16 | 
17 |     I got compile errors because of old gcc not supporting C++17
18 | 
19 |     1. https://askubuntu.com/questions/466651/how-do-i-use-the-latest-gcc-on-ubuntu/1163021#1163021
20 |     2. then:
21 | 
22 |     ```
23 |     sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 10
24 |     sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 10
25 |     ```
26 | 
27 | 2. Running
28 | 
29 |     From the `build` directory:
30 | 
31 |     ```
32 |     evmc/bin/evmc run --vm ./lib/libevmone.so [--print-opcodes] [--measure-all] [--measure-total] [--measure-one <instruction number>] [--repeat <number of repetitions>] <bytecode>
33 |     ```
34 | 
35 |     for example:
36 | 
37 |     ```
38 |     evmc/bin/evmc run --vm ./lib/libevmone.so --print-opcodes --measure-all --measure-one 3 --repeat 2 602060070260F053600160F0F3
39 |     ```
40 | 
41 |     To measure timer overheads:
42 | 
43 |     ```
44 |     evmc/bin/evmc measure-overheads
45 |     ```
46 | 
47 | ### Comments
48 | 
49 | -   evmone adds `5B` (`JUMPDEST`) instruction in the beginning if there is none
50 | 
51 | ### Rough notes
52 | 
53 | 1. ~Probably not a good fit to meausure, only instrumentation~ EDIT: we'll measure it
54 | 2. EVMC API - these are tools that go with the EVMONE VM implementation.
55 |     1. under `/build/evmc/bin/evmc run --help` one finds help about how to run bytecode
56 |     2. trying `evmc/bin/evmc run 0x60` - this is `PUSH1`, check out https://www.ethervm.io/#60
57 |         1. PUSH 20
58 |         2. PUSH 07
59 |         3. MUL
60 |         4. PUSH F0 (offset)
61 |         5. MSTORE8
62 |         6. PUSH 01 (length)
63 |         7. PUSH F0 (offset)
64 |         8. RETURN
65 |         9. `evmc/bin/evmc run --vm ./lib/libevmone.so 602060070260F053600160F0F3`, nice
66 | 
67 | ### Notes on execution
68 | 
69 | 1. `auto analysis = analyze(rev, code, code_size);` before execution does some preallocations and preprocessing based on static code information, like assembling information about code blocks - I think it's still "fair", but might definitely cause uneven "gas dynamics" if compared to simple interpreters
70 |     - **BUT** - some operations are done per-block, e.g. _static_ gas operations and checks etc. This isn't very fair, it will fatten the perceived cost of
71 | 2. The `JUMPDEST` which appears at the beginning of each program is an intrinsic opcode `BEGINBLOCK`, `evmone` specific
72 |     - "These intrinsic instructions may be injected to the code in the analysis phase"
73 |     - "This instruction is defined as alias for JUMPDEST and replaces all JUMPDEST instructions"
74 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/instrumentation_measurement/example_bytecode_programs.md:
--------------------------------------------------------------------------------
1 | ## Example bytecode programs
2 | 
3 | This is just a quick dump of simple, working programs to smoke test stuff with:
4 | 
5 | -   `6020` - just push
6 | -   `602060070260F053600160F0F3` - push, mul and return
7 | -   `62FFFFFF600020` - some keccak
8 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/instrumentation_measurement/openethereum.md:
--------------------------------------------------------------------------------
 1 | ## Openethereum
 2 | 
 3 | ### Installation and running
 4 | 
 5 | 1. To build, run in the `evmbin` directory of openethereum repository (submodule), at branch `wallclock`
 6 |     ```
 7 |     cargo build --release
 8 |     ```
 9 |     this should produce `openethereum-evm` binary in openethereum's `target/release/` directory.
10 | 2. Running
11 | 
12 |     ```
13 |     ./target/release/openethereum-evm --code <bytecode> [--repeat <number of repetitions>] [--print-opcodes] [--measure-overhead]
14 |     ```
15 | 
16 |     for example:
17 | 
18 |     ```
19 |     ./target/release/openethereum-evm --code 602060070260F053600160F0F3 --repeat 2
20 |     ```
21 | 
22 |     If `--measure-overhead` is passed, bytecode will not be executed. If `--print-opcodes` is passed, only one repetition will be executed (no matter what `--repeat` value is).
23 | 
24 | ### Notes on execution
25 | 
26 | 1. only `let result = self.step(ext);` is included under the measurement. To capture most of "the EVM normally does when executing" we should also capture **TODO**:
27 |     - `loop {`
28 |     - the entire `match result {`
29 |     Proposed solution similar to what [this PR for `evmone` suggests](https://github.com/imapp-pl/evmone/pull/2)
30 | 2. what is in `self.step(ext)` except for the expected normal operation?
31 | 
32 |     - `self.do_trace = self.do_trace && ext.trace_next_instruction(`, with a comment about overhead, but `&&` shortcircuits and I'm assuming `self.do_trace` is false, so this is minor. It also is what normally the node would go through
33 |     - similar comment on the `evm_debug!`
34 | 
35 |     Nothing out of the ordinary there
36 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/instrumentation_measurement/openethereum_ewasm.md:
--------------------------------------------------------------------------------
 1 | # Instrumentation and measurement using `OpenEthereum` with Ewasm
 2 | 
 3 | 1. OpenEthereum currently seems to use a relatively old version of (`wasmi`)[https://github.com/paritytech/wasmi] - [`0.3.0`](https://github.com/paritytech/wasmi/tree/0.3), our changes branch off of that (https://github.com/paritytech/wasmi/compare/master...imapp-pl:time_measurement)
 4 | 1. Useful reading about Wasm vs `wasmi`/Ewasm: https://github.com/paritytech/wasmi/blob/0.3/src/isa.rs
 5 |     - the instruction set (e.g. what is printed out in the instrumentation loop) is there, e.g. `I32Add`...
 6 | 1. Wasm bytecode starts with: `WASM_BINARY_MAGIC + WASM_BINARY_VERSION` = `0061736d01000000`
 7 | 1. https://webassembly.github.io/spec/core/appendix/index-instructions.html - another listing of instructions with stack requirements
 8 | 1. Decode Wasm binary format from hex:
 9 |     ```
10 |     cat wasm.example | python3 -c "import sys, binascii; sys.stdout.buffer.write(binascii.unhexlify(input().strip()))" > wasm.example.bin
11 |     ```
12 |     - this can then be loaded to [`wasm2wat`](https://webassembly.github.io/wabt/demo/wasm2wat/) (see below for WABT)
13 | 
14 | ### `chfast` notes
15 | 
16 | ```
17 | (func (export "call"))
18 | ```
19 | 
20 | ```
21 | (module
22 |   (func (export "call")
23 |     i32.const 2
24 |     i32.const 2
25 |     i32.add
26 |     drop
27 |   )
28 | )
29 | ```
30 | 
31 | ```
32 | (module
33 |   (func (export "call")
34 |     (call "useGas" 4)
35 |     i32.const 2
36 |     i32.const 2
37 |     i32.add
38 |     drop
39 |   )
40 | )
41 | ```
42 | 
43 | wabt https://pengowray.github.io/wasm-ops/ https://webassembly.studio
44 | 
45 | https://github.com/ewasm/design/blob/master/metering.md https://github.com/ewasm/design/blob/master/determining_wasm_gas_costs.md
46 | 
47 | ### WABT
48 | 
49 | 1. It installed as documented in gh for me
50 | 
51 | #### Integrate to measurements
52 | 
53 | Execute everything from the dir where you have `wabt` and `openethereum` repos, and `example.wat` 2. Generate hex bytecode ` wabt/build/wat2wasm example.wat && cat example.wasm | hexdump -ve '1/1 "%02x"' && echo ` 3. Generate hex bytecode from wat and execute ` wabt/build/wat2wasm example.wat && \ cat example.wasm | \ hexdump -ve '1/1 "%02x"' | \ xargs -L1 \ openethereum/target/release/parity-evm \ --gas 5000 \ --chain openethereum/ethcore/res/instant_seal.json \ --code `
54 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/instrumentation_measurement/other_tools.md:
--------------------------------------------------------------------------------
 1 | Notes on other tools for instrumentation & measurement
 2 | 
 3 | ### Takeaways
 4 | 
 5 | Nothing relevant/useful yet
 6 | 
 7 | ### Rough notes
 8 | 
 9 | 1. http://wingtecher.com/themes/WingTecherResearch/assets/papers/saner-evm.pdf - " EVM\*: From Offline Detection to OnlineReinforcement for Ethereum Virtual Machine" - Not relevant to us, instrumentation for aborting of dangerous txs in the EVM. Not this kind of instrumentation we need.
10 | 2. https://www.researchgate.net/publication/331789943_Analysis_of_Ethereum_Smart_Contracts_and_Opcodes - "Analysis of Ethereum Smart Contracts and Opcodes"
11 |     - Not relevant to us, just analysis of frequency of opcodes in the verified contracts (static)
12 | 3. https://ethereum.stackexchange.com/questions/4446/instrumenting-evm - "Instrumenting EVM"
13 |     - _Maybe useful_ - "To do this, you need to define a VM log collector, which implements StructLogCollector. This function gets called on every step of the VM, and is provided with copies of the memory, stack, and modified parts of the storage, along with the program counter, current opcode...", this is for `go-ethereum`.
14 |     - follow the Nick Johnsons link to etherquery
15 |     - (done) revisit if `go-ethereum` specific measuring needs to be done using this
16 | 4. https://publik.tuwien.ac.at/files/publik_278277.pdf - "A Survey of Tools forAnalyzing Ethereum Smart Contracts" - mentions one tool for EVM instrumentation: ContractLarva
17 |     - https://www.researchgate.net/publication/327834131_Monitoring_Smart_Contracts_ContractLarva_and_Open_Challenges_Beyond - ContractLarva
18 |         - not relevant to us, Solidity level
19 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/measurement_standard_ruleset.md:
--------------------------------------------------------------------------------
 1 | ## Measurement standard ruleset
 2 | 
 3 | In order to ensure easy portability and adaptability to various clients and environments, we write down a ruleset of how should OPCODE measurements be conducted.
 4 | 
 5 | ### `measure_all`
 6 | 
 7 | In `measure_all` we measure the individual times of all OPCODEs exectued for a given program. We also measure the timer overhead alongside the OPCODE execution measurement.
 8 | 
 9 | It turned out to be much better to measure by applying crude modifications to the EVM interpreter code, than to measure via calling a callback (e.g. `Tracer.CaptureState` for `geth`). To make this easier and as uniform as possible, follow these guidelines:
10 | 
11 | 1. Measure the entire block of code which constitutes a single interpreter iteration. In particular, measure all code which is repeatedly executed as OPCODEs are interpreted.
12 | 2. Leave all preprocessing out.
13 | 3. Make sure all tracing/debugging is off, except what we need to trace.
14 | 4. Gather the measurements consistently. There should be no allocations done by the measurement code.
15 | 5. Measurements should be gathered in a pre-allocated collection.
16 | 6. Don't do IO (`println` etc.) in the loop.
17 | 7. Look into whether preprocessing or similar optimizations don't "move effort" from one instruction to another, like `evmone` does. If so, analyze impact and unfairness.
18 | 8. Use timer with least overhead, the most low-level one available.
19 | 9. When measuring the timer overhead, capture the time in exactly same way as done for the OPCODE measurement.
20 | 
21 | Follow this pseudocode pattern:
22 | 
23 | ```go
24 | // all preparations/allocations of the EVM code
25 | // instrumentation preparations/allocations
26 | start_time = now()
27 | while {
28 |     // EVM code
29 |     // OPCODE code etc...
30 | 
31 |     switch some_end_conditions {
32 |         continue:
33 |             // EVM code
34 |             end_time = now()
35 |             // measure the timer overhead
36 |             end_timer_time = now()
37 |             opcode_duration = end_time - start_time
38 |             timer_duration = end_timer_time - end_time
39 | 
40 |             durations.store_with_no_allocations(opcode_duration, timer_duration)
41 |             start_time = now()
42 |         break:
43 |             // EVM code
44 |             end_time = now()
45 |             // measure the timer overhead
46 |             end_timer_time = now()
47 |             opcode_duration = end_time - start_time
48 |             timer_duration = end_timer_time - end_time
49 | 
50 |             durations.store_with_no_allocations(opcode_duration, timer_duration)
51 |             // let it break normally
52 |     }
53 | }
54 | 
55 | durations.print()
56 | ```
57 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-10-30.md:
--------------------------------------------------------------------------------
 1 | 1. Let's do notes from everything
 2 | 2. GH project + 1-2 day effort tickets. Cut whenever task extends. Goal: know what we're doing and identify blockers
 3 | 3. Choose boring tech
 4 | 4. How public?
 5 | 
 6 | -   post on ethresearch
 7 |     -   final - yes
 8 |     -   initial - not this week, but when we ramp
 9 | -   how do we describe funding/association?
10 | 
11 | 5. Define and document API's on the 3 domains
12 | 
13 | -   instrumentation & measurement
14 | -   sample programs
15 | -   model
16 | 
17 | 6. Gather prior docs/materials
18 | 
19 | -   RZ will send
20 | -   Radek to send if there has been an anouncement of our project
21 | 
22 | 7. Ideas how to generate sample programs:
23 | 
24 | -   random, genetic to optimize throughput
25 | -   chfast has a measurement tool for geth (EVM)
26 | -   we measure
27 | -   we model and find gas coefficients for operations
28 | -   watch for coefficients prone to attacks
29 | -   chfast suggests arithmetics are overpriced. This is an issue given current pressure for statelessness
30 | -   let's be flexible
31 | 
32 | 8. Do a kick-off call with chfast and Marcin Benke
33 | 
34 | -   Radek to let them know
35 | 
36 | 9. 15-min sync up. 10.00am on Fridays
37 | 
38 | -   Piotr to invity
39 | 
40 | 10. use Hangouts, make a group
41 | 11. Radek to introduce to Jake Hudson from EF
42 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-11-06.md:
--------------------------------------------------------------------------------
 1 | 1. measure instruction resource use in isolation or by measuring indirectly (measuring entire program run)?
 2 |     - insert measurement into the interpreter loop - feasible. Do not write down instructions, do it separately - once for measurement, second for opcodes for minimal
 3 |     - measure entire time for verification
 4 |     - feasible as plan A
 5 |     - sampling and batch measurement as plan B/follow-up/verification
 6 | 2. MB: define our value added. RZ: this is exploratory phase, we propose a set of methods
 7 |     - our focus different than Broken Metre - want to propose a consistent tool for gas pricing
 8 |     - Stage I should allow to build out the necessary tooling
 9 |     - Stage I - hacky implementations to test out feasibility are within scope
10 | 3. which resources are we focusing on: clock time only or RAM footprint as well.
11 |     - RAM-gas-pricing - cost for RAM rises quadratically - so this is not a first-priority
12 |     - PB to read on the takeways from Broken Metre RAM-gas correlation
13 |     - Let's focus on CPU-intensity / execution time
14 | 4. is CALLDATACOPY an IO operation? we have it in our list but Broken Metre tells us it is IO (should we take out IO-operations?)
15 |     - CALLDATACOPY - should not be considered IO, might be side effect of particular implementation
16 |     - something to be careful about
17 | 5. evmone instrumentation - there is just a general idea how to do this, no ready tool yet. PB to let know where to start
18 | 6. fork repos to `imapp-pl`
19 | 
20 | ### Action items
21 | 
22 | -   PB to read on the takeways from Broken Metre RAM-gas correlation
23 | -   evmone instrumentation - PB to let know where to start
24 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-11-13.md:
--------------------------------------------------------------------------------
 1 | 1. PD: pls feedback on strategy.md early PR
 2 | 2. tentatively meet with MB & PB in 2 weeks
 3 | 3. scope of stage 1: proposing iterative approach on program generation + spike to explore 1st iterations PD proposition:
 4 |     - iteration 0 - simplest program to run single OPCODE
 5 |     - iteration 1 - expand to capture impact of input values
 6 |     - iteration 2 - expand to capture impact of "surrounding execution"
 7 |     - etc.
 8 | 4. how far to pursue instrumentation & measurement?
 9 |     - baseline - wallclock time
10 |     - other measurements on top of that, but what about portability across implementations?
11 |     - try to allow for convenient execution of various measurements
12 |     - allow for repetition
13 |     - evmone is our "3rd choice" EVM implementation, could be useful for reference/comparison
14 | 
15 | ### Action items
16 | 
17 | -   PD to ask PB about geth flag to measure overhead
18 | -   MS to focus on evmone instrumentation&measurement spike first, next OpenEthereum instrumentation&measurement spike
19 | -   PD to focus on geth instrumentation&measurement spike, if done spike program generation
20 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-11-20.md:
--------------------------------------------------------------------------------
1 | 1. Python probably the best bet for scripting of sample program generation
2 | 1. Result from research can be either used to update gas cost for OPCODEs or optimize OPCODEs in implementations where they are underperforming
3 | 
4 | ### Action items
5 | 
6 | 1. RZ to look at repo and PR
7 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-11-27.md:
--------------------------------------------------------------------------------
 1 | 1. The approach to measure instructions is not final. Measuring whole programs is still on the list. Measuring whole programs introduces a lot of burden on the analysis stage.
 2 | 2. PD plan for today: cleanup scripts, (optional) analyze first results in R, search for prior art on VM instrumentation & analysis.
 3 | 3. MS plan for today: cleanup evmone instrumentation, standardize outputs
 4 | 4. MS 50% time, PD 20% time, aiming for II half of Jan to have the Stage I report.
 5 | 5. Consider measuring only `operation.execute` in `geth`, to fully match instrumentation adopted for `evmone`. For now, both instrumentations measure "the entire interpreter loop", if we neglect the `while(instr is nullptr)` in `evmone`, so measurements are quite compatible.
 6 | 6. Need to be mindful about how we want to measure the EVMs and how they are initialized and what do they do in their interpreter loop. Make notes about geth/evmone/openethereum for now, we'll work on making measurements 100% compatible later.
 7 | 
 8 | ### Action items
 9 | 
10 | 1. PD to check ethereum magicians and eth research for prior art
11 | 2. RZ to review https://github.com/imapp-pl/gas-cost-estimator/pull/5
12 | 3. MS to review https://github.com/imapp-pl/gas-cost-estimator/pull/5
13 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-12-04.md:
--------------------------------------------------------------------------------
 1 | 1. turbo geth / silkworm / evmone https://ledgerwatch.github.io/turbo_geth_release.html#Integration-with-evmone-via-EVMC - evmone as first-class citizen EVM to measure
 2 | 2. PD: opbench.md and timing_of_jvm_instructions.md, monotonic clocks, nanosecond precision
 3 | 3. 0xfe - invalid opcode, let's keep measuring as we do now, but we'll need to revisit. Same JUMP JUMPI
 4 | 4. PD, RZ: let's push notes to repo, even messy
 5 | 5. PD less available this week
 6 | 6. MS: evmone has +1 instruction vs geth - to investigate
 7 | 7. MS to push/PR to .py scripts as needed
 8 | 8. MS: geth measurements much slower than evmone, and than geth for PD - to investigate
 9 | 9. Plan MS: wrapup evmone, send csv to PD (or run R), openethereum/rust ramp up
10 | 10. Plan PD: papers, nanosecond measurements investigation, other measurements for comparison (?)
11 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-12-11.md:
--------------------------------------------------------------------------------
1 | 1. eWasm - want to start next week, preferably MS
2 | 2. Reports ToC - paper like. More or less: 1/ related work 2/ our results (non-final) 3/ detailed plan for Stage II
3 | 3. MS: OpenEthereum
4 | 4. PD: wrap up papers, result comparison in R, add collective measurements and compare
5 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-12-18.md:
--------------------------------------------------------------------------------
1 | 1. PD: importance of measuring and controlling time measurement, see: [here](https://htmlpreview.github.io/?https://github.com/imapp-pl/gas-cost-estimator/blob/master/src/analysis/exploration_timers.nb.html) for details
2 | 2. PD: per-instruction timing would be very good for (3.)
3 | 3. PD: algorithmic generation of most-informative (e.g. highest variance) sets of programs
4 | 4. We'll discuss this with MB & PB next week if possible
5 | 5. MS: we're measuring OpenEthereum instructions, need to do repetition and parameters and output standarization. We'll try to have a csv result to compare td/tmr, for PD to run comparison on (with geth & evmone). Will keep us posted
6 | 6. MS: hurdle with Rust - very slow compilation, looks like a common Rust issue?
7 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2020-12-22.md:
--------------------------------------------------------------------------------
 1 | 1. Proposed method makes sense, but need to be careful and do validations
 2 | 1. PB: Deep exploring of cost variability of opcodes is interesting, but may be seen as optional.
 3 | 1. 6 items to look at until Stage I (tentatively end January)
 4 |     - eWasm, at least dip the toe in one, best pick: OpenEthereum's wasm. Enough to tell if our method applies or we suggest alternatives we have
 5 |     - explore timer and measure overhead for Rust
 6 |     - explore timer and measure overhead for C++
 7 |     - look into the differences of measurement implementations in 3 (4) implementations to see if they're fair
 8 |     - write Stage I report
 9 |     - ? can't remember :)
10 | 
11 | ### Action items
12 | 
13 | 1. PB to review evmone measurement 3 PRs
14 | 1. PB to look into OpenEthereum measurement PRs, suggest reviewers
15 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2021-01-08.md:
--------------------------------------------------------------------------------
 1 | 1. Aim for 22 or 29th Jan
 2 | 2. Plan in GH project
 3 | 3. eWASM choice, see https://github.com/imapp-pl/gas-cost-estimator/issues/20
 4 | 
 5 | ### Action items
 6 | 
 7 | 1. RZ, MS to review https://github.com/imapp-pl/gas-cost-estimator/pull/19
 8 | 2. MS to prepare list of PRs to review for PB and ping
 9 | 3. PD to ping PB and MB to review https://github.com/imapp-pl/gas-cost-estimator/pull/19
10 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2021-01-22.md:
--------------------------------------------------------------------------------
1 | ### Action points
2 | 
3 | 1. RZ to setup a call for MS and PB about OpenEthereum/eWASM
4 | 2. MS to make notes and braindump in the meantime
5 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/meetings/2021-02-05.md:
--------------------------------------------------------------------------------
1 | 1. Let's do Ewasm on par with EVM, to demonstrate feasability
2 | 2. Generate programs in wat + wat2wasm + measure per instruction + pick measured instruction, ETA 1-2 weeks from now
3 | 
4 | ### Action points
5 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/adaptive_gas_cost_mechanism.md:
--------------------------------------------------------------------------------
 1 | ## An Adaptive Gas Cost Mechanism for Ethereum to Defend Against Under-Priced DoS Attacks
 2 | 
 3 | Ting Chen, Xiaoqi Li, Ying Wang, Jiachi Chen, Zihao Li, Xiapu Luo ,Man Ho Au and Xiaosong Zhang
 4 | 
 5 | https://arxiv.org/pdf/1712.06438.pdf
 6 | 
 7 | ### Notes
 8 | 
 9 | 1. "Emulation-based Measurement Framework" - how they measured the underpriced opcodes:
10 |     - extract just `.execute` for a stripped-down execution. "various utility func-tions for supporting the execution." (it might be relevant in our case to _include_ those utility functions, since we want to measure node EVM implementations)
11 |     - repeat and measure once "we run the interpretationhandler in the emulated environment millions of times, because a single run istoo short to conduct the measurement"
12 |     - synthesized environment: "If the operation ma-nipulates the stack/memory/storage, we synthesize the stack/memory/storagewith random length and generates random numbers as their items"
13 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/bic_to_cpu.md:
--------------------------------------------------------------------------------
 1 | ## Continuous Bytecode Instruction Counting for CPU Consumption Estimation
 2 | 
 3 | Andrea Camesi Jarle Hulaas Walter Binder
 4 | 
 5 | https://www.researchgate.net/publication/37450070_Continuous_Bytecode_Instruction_Counting_for_CPU_Consumption_Estimation
 6 | 
 7 | ### Takeaways
 8 | 
 9 | Some interesting threads to potentially follow, but this approach doesn't differentiate between different JVM instructions, and also focuses on "typical applications", so different from ours.
10 | 
11 | ### Notes
12 | 
13 | 1. Is generally about translating a metric "BIC" (bytecode instruction count) to CPU time (exactly what we like) for JVM. "we show experi-mentally that for each platform there is a stable, application-specific bytecode rate that can be used for translating a BIC value into the cor-responding CPU consumption."
14 | 1. visit Java Resource Accounting Framework, Second Edition(J-RAF2, http://www.jraf2.org)
15 |     - dead link
16 | 1. ! "use the knowledge ofBRexpin various man-agement tasks, like load-balancing or usage-based billing" ! **usage-based billing**. Follow links resulting for searching for this:
17 |     - https://core.ac.uk/download/pdf/82526395.pdf - "Portable Resource Control in Java: Application to Mobile Agent Security" - not relevant
18 |     - https://www.researchgate.net/publication/2848223_Portable_Resource_Control_in_Java/fulltext/0e5fb082f0c41c4932e6fc21/Portable-Resource-Control-in-Java.pdf - "Portable Resource Control in Java" - not relevant
19 |     - https://www.researchgate.net/publication/223604760_Portable_virtual_cycle_accounting_for_large-scale_distributed_cycle_sharing_systems - "Portable virtual cycle accounting for large-scale distributed cycle sharing systems" - **TODO** optionally get this article, no free access
20 | 1. Follow citations "In contrastto related work which takes a low-level approach [11, 15,20]"
21 | 1. J-RAF2 and BRexp: J-RAF2 collects BIC and they add on CPU time measurement to this. Then they subtract the collecting routine execution time.
22 | 1. try finding Ethereum equivalent of the SPEC JVM98 SPEC JBB2005 Java Grande etc. is there anything like this?
23 |     - however: "this benchmark implements a fairly varied set of activities,and that the statistical characteristics of the collected sam-ples, especially the stability ofBRexpare representative ofmany real-world applications" Such a benchmark "many real-world applications" isn't good enough for Ethereum
24 |     - nothing found
25 | 1. Rationale for BRexp: a/ measurement precision b/ platform dependence "The objective of determining the CPU consumption forJava bytecodes is difficult because of the level of precisionthat is required: the time taken to execute any single byte-code on recent hardware is usually far below the measure-ment resolution offered by the JVM or by the OS itself.Another difficulty is that the desired timings are specific toeach{JVM, OS, hardware}platform combination"
26 |     - how much this applies to EVMs?
27 |     - followed: "In previous ex-periments, we used standard APIs (notably the JVMPI [17]profiling API) for measuring elapsed per-thread CPU time,but the inherent lack of resolution"
28 |         - nothing
29 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/broken_metre.md:
--------------------------------------------------------------------------------
 1 | ## Broken Metre:Attacking Resource Metering in EVM
 2 | 
 3 | Daniel Perez Benjamin Livshits
 4 | 
 5 | https://arxiv.org/pdf/1909.07220.pdf
 6 | 
 7 | ### Thoughts
 8 | 
 9 | 1. We originally wanted to separate sample program generation from model, and have them communicate via some form of an "API".
10 |     - But what if we want to generate programs dynamically, based on a result from the model (as Broken Metre does)?
11 | 2. The "references to follow" hold some interesting reading about gas-exploration tools
12 | 
13 | ### Rough dump notes
14 | 
15 | 1. mining contract history to detect outliers, gas cost vs resources (CPU & RAM)
16 | 2. low-throughput contracts, contracts that cost too little gas to execute
17 |     1. throughput = gas/second
18 | 3. references to follow:
19 |     1. (done) and the gas cost has also been reviewedseveral times [11], [40] to increase the cost of the under-pricedinstructions.
20 |     2. (done) our problem resembles other program synthesistasks [33] [Program Synthesis](https://www.microsoft.com/en-us/research/wp-content/uploads/2017/10/program_synthesis_now.pdf) a generic book on auto-generating programs. Irrelevant for now
21 |     3. (done) Chen etal. [18] propose a mechanism where contracts using a singleinstruction in excess would be penalised.
22 |         - aimed at opportunistically punishing the abusing contracts (which do an excessively expensive operation)
23 |         - [`adaptive_gas_cost_mechanism.md`](./adaptive_gas_cost_mechanism.md)
24 |     4. (done) IMPORTANT: Yang et al. [58] have recently empiricallyanalysed the resource usage and gas usage of the EVM in-structions. They provide an in-depth analysis of the time takenfor each instructions both on commodity and professionalhardware.
25 |         - done in [`empirically_analyzing.md`](./empirically_analyzing.md)
26 |     5. (done, irrelevant) Gas Usage Optimisation:Gasper [17] is one of the firstpaper which has focused on finding gas related anti-patterns forsmart contracts
27 |     6. (done) MadMax [32] is a static analysis tool to find gas-focusedvulnerabilities
28 |         - irrelevant: "find patternswhich could cause out-of-gas exceptions and potentially lockthe contract funds, rather than gas-intensive pattern"
29 |     7. (done, irrelevant) Gastap [5] is a static analysis tool which allows to computesound upper bounds for smart contracts
30 | 4. programs where the cache influences exe-cution time by an order of magnitude
31 |     1. This is about page cachin for IO-intensive operations - out of our scope
32 | 5. hardware setup:
33 |     1. We run all of the experiments on a Google CloudPlatform (GCP) [31] instance with 4 cores (8 threads) IntelXeon at 2.20GHz, 8 GB of RAM and an SSD with a 400MB/sthroughput. The machine runs Ubuntu 18.04 with the Linuxkernel version 4.15.0.
34 |     2. (Parity bare metal for comparison) more powerful bare-metal machine with 4 cores (8 threads) at 2.7GHZ, 32GB ofRAM and an SSD with 540MB/s throughput
35 | 6. Garbage Collection - watch out for - they decided to use _aleth_
36 | 7. Our measurement framework is open-sourced2and
37 |     1. https://github.com/danhper/aleth/tree/measure-gas
38 |         - found that their instruction benchmarking function uses `clock_gettime(CLOCK_MONOTONIC)`, which worked very bad on golang (**TODO** investigate further?) - [see `OnOpFunc Executive::benchmarkInstructionsOp()`](https://github.com/danhper/aleth/compare/master...measure-gas#diff-e0d85c8989319d0f013c015e07f88792a12ad13af7b8ff8bf75c1954b7adbf53R520)
39 | 8. time and memory measurement:
40 |     1. Weuse a nanosecond precision clock to measure time and measureboth the time taken to execute a single smart contract and thetime to execute a single instruction. To measure the memoryusage of a single transaction, we override globally thenewanddeleteoperators and record all allocations and deallocationsperformed by the EVM execution within each transaction. Weensure that this is the only way used by the EVM to performmemory allocation.
41 |     2. measure memory, we computethe difference between the total amount of memory allocatedand the total amount of memory deallocated
42 |     3. For CPU, we use clock time measurements as a proxy for the CPU usage.
43 |     4. Finally, for storage usage, we count the number of EVMwords (256 bits) of storage newly allocated per transactions.
44 |         1. for storage usage comparison they used `iotop`
45 | 9. modelling:
46 |     1. ~millions of data points
47 |     2. Pearson score for correlation, gas vs resource
48 |     3. multivariate correlation, gas vs principal components of resources
49 |     4. capturing large variance is important
50 | 10. sample program generation:
51 |     1. This made it easier: The task we solve is different becausewe need to define “valid” but not “meaningful” programs andoptimise for a well-defined metric: gas throughput
52 |     2. caveat: Second, instructions should not try to access random parts ofthe EVM memory, otherwise the program could run out ofgas
53 |     3. they excluded loops and infinite loops
54 |     4. managing items on the stack is important - never pop too much!
55 | 11. TODO: is CALLDATACOPY an IO operation? we have it in our list but this paper tells us it is IO
56 | 12. Section IV.D and iV.E skipped
57 | 13. "long-term fixes" and how do we fit in?
58 |     1. dynamic pricing from Chen et al. - unsure about feasability
59 |     2. importance of stateless clients explained, relates to L2 scaling:
60 |         1. The key ideais that instead of forcing clients to store the whole state,entity emitting transactions must send the transaction, the dataneeded by the transactions, and a proof that this data is correct
61 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/bytecode_monitoring_of_java.md:
--------------------------------------------------------------------------------
 1 | ## Bytecode Monitoring of Java Programs
 2 | 
 3 | Wong
 4 | 
 5 | http://www.cs.ox.ac.uk/people/peter.wong/pub/project.pdf
 6 | 
 7 | **TODO** (optional) - read the entire paper, but the interting part is:
 8 | 
 9 | Timing analysis of Java bytecodes - An initiative that is brought up to investigate the implementation of benchmarking the Java Virtual Machine (JVM) Instruction Set (3.1 Finding methods to calculate running time of bytecodes:)
10 | 
11 | ### Notes
12 | 
13 | 1. The initial idea is to benchmark single bytecode at a time by repetitively executing individual bytecode in multiples of 10s, 100s and 1000s, to enable JVM to monitor these bytecodes a technique so-called Application Response Measurement (ARM) [8] (**TODO** see [8])
14 | 2. Methods for measurement: hard to follow but:
15 |     1. shell out from C and measure in C
16 |     2. System.currentTimeMillis
17 |     3. clock_gettime system call
18 | 3. bytecodes are duplicated, with the stack being prepared beforehand (it is claimed that the stack size doesn't affect the results). Duplication is done: "1,10,100,1000 and 9000 iteration(s) sequence"
19 | 4. For bytecodes leaving values on the stack, there's a technique similar to `measure inferred` from [`strategy.md`](/docs/strategy.md) - they substract an earlier calculated timing of the `pop`, from the measured opcode timing, they get
20 | 5. They infer JVM optimisation kicked in and "This could be one of the reason (and the same reason) as to why when individual bytecode was timed, one iteration took more time that an average of multiple iterations (e.g. 1000).". Not a problem for us
21 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/empirically_analyzing.md:
--------------------------------------------------------------------------------
 1 | ## Empirically Analyzing Ethereum’s Gas Mechanism
 2 | 
 3 | Renlord Yang∗§, Toby Murray∗, Paul Rimba§, Udaya Parampalli∗
 4 | 
 5 | https://arxiv.org/pdf/1905.00553.pdf
 6 | 
 7 | ### Thoughts
 8 | 
 9 | 1. Good intro to cite the importance of accurate gas costs (node diversity)
10 | 2. Our work could pivot to focus on easy reproducibility and quick time to obtain results and using synthetic data, as opposed to historical data, which has its advantages.
11 | 3. All estimation work, including ours, balances between estimating intrinsic cost of computation and particular optimizations (or lack thereof) of particular node implementations
12 |     - in other words: resolving gas cost discrepancies might be done by either updating gas cost of OPCODEs and by optimizing (or aligning optimizations between) node implementations
13 | 4. A synthetic approach (ours) is better versed to estimate gas cost under the assumption that transaction execution might be done concurrently on a single machine.
14 | 
15 | ### Rough dump notes
16 | 
17 | 1. predates Broken Metre, done independenly, similar conclusions
18 | 2. includes I/O and focuses on those costs
19 | 3. `aleth`-based same as Broken Metre
20 | 4. similar to our current approach, they trace every EVM instruction
21 | 5. approach to resource contention on the test machine: "We electedto use a noisy setup for Machine B as it is representative ofthe hardware choice used by a consumer user."
22 | 6. `BLOCKHASH` is the main offender, but it seems odd that it hasn't been optimized (maybe it has been in more popular node implementations?)
23 | 7. parallel transaction execution is mentioned in Related Work
24 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/holimans_gist_benchmarks.md:
--------------------------------------------------------------------------------
 1 | ## "gist and PR from `holiman`"
 2 | 
 3 | Martin Holst Swende `holiman`
 4 | 
 5 | https://gist.github.com/holiman/7153e088af8941379cf21c0e4610d51f (and the original [PR with discussion](https://github.com/ethereum/go-ethereum/pull/21207))
 6 | 
 7 | ### ELI5
 8 | 
 9 | This is an estimation of the cost (effort) done to _switch context_ when calling a precompiled contract using STATICCALL. [EIP-2046](https://eips.ethereum.org/EIPS/eip-2046) intends to lower STATICCALL for precompileds from 700 to 40. The gist assesses this change in `geth`.
10 | 
11 | ### How it measured & instrumented?
12 | 
13 | It's not instrumented.
14 | 
15 | The measurement is done by doing an infinite loop in the program and seeing how much time until it depletes `100MGas` (look at the `... ns/op` - this is the time how long the "depletion" needs - the less, the more overpriced the operation is. Don't look at the preceding integer value, this is just golang benchmark stuff). The "right" gas cost of context switching is found, when this time is equal when you do the context switch (`staticcall-identity`) or not do it (`loop`), but only balance the stack with POPs.
16 | 
17 | It is ran by [golang benchmarks](https://golang.org/pkg/testing/#hdr-Benchmarks), which measures the loop. Inside it works similar to `runtime.Execute` which we're using, but could be a useful example of how to strip down the `runtime.Execute` in the future.
18 | 
19 | In the discussion in the [original PR](https://github.com/ethereum/go-ethereum/pull/21207) there's a thread of how the "other" ops (JUMP. PUSH, POP etc) contribute and distort the result.
20 | 
21 | ### Takeaways
22 | 
23 | 1. Entirely different way to measure effort.
24 | 2. Seeking to _balance_ operations - to equate gas spent on equally hard computations. (should cite when explaining motivation)
25 | 3. `holiman`'s approach (compare using gas depletion, rather than numbers of the loop being iterated) couples the measurement with the gas cost and effort for the accompanying ops (JUMP, PUSH, POP). If we do measurements per operation via instrumentation, we're doing something opposite.
26 | 4. [This is linked](https://github.com/matter-labs/openethereum/commit/77471a1d08a0f088dfd3b30802036b3e0fbb38a6) in the discussion. Possibly useful cheatsheet for OpenEthereum
27 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/instruction_timing_model_1976.md:
--------------------------------------------------------------------------------
 1 | ## An Instruction Timing Model of CPU Performance
 2 | 
 3 | Bernard L. Peuto Leonard J. Shustek
 4 | 
 5 | https://inspirehep.net/literature/110758
 6 | 
 7 | **NOTE** this is a really old paper, but it has some inspiring thoughts:
 8 | 
 9 | 1. OpCode pair investigation - on the 70's hardware measurement level opcode pairs were investigated, whether the pairing itself contributes to higher load, warranting distinguishing as a new opcode - we should maybe do a similar exercise?
10 | 2. More generally: we could model and explore _variance_ of computational cost of various OpCodes, not only a static cost estimation. E.g. what if `PUSH` behaves differently very in different circumstances? We could generate programs so that they capture this variation the best.
11 | 3. Taking this further, there could be parameters to each opcode we don't know about, which should modify the gas cost incurred.
12 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/opbench.md:
--------------------------------------------------------------------------------
 1 | ## OpBench: A CPU Performance Benchmark for Ethereum Smart Contract Operation Code
 2 | 
 3 | Amjad Aldweesh, Maher Alharby, Maryam Mehrnezhad, Aad van Moorsel
 4 | 
 5 | https://www.researchgate.net/publication/336007166_OpBench_A_CPU_Performance_Benchmark_for_Ethereum_Smart_Contract_Operation_Code
 6 | 
 7 | ### Takeaways
 8 | 
 9 | 1. The approach is similar to our current
10 | 2. We can expand and provide added value by:
11 |     - doing OpenEthereum, ewasm
12 |     - focus on gas schedule alignment and implementation discrepancies detection
13 |     - providing a more standardized procedure to implement instrumentation, e.g. caveats and requirements for a standardized measurement, generic scripts to conduct analysis on other implementations/environments
14 |     - running without the need for stack balancing and contract deployment, by executing in an artificial EVM setup
15 |     - validating and combining with other kinds of measurements, by conducting a detailed statistical analysis of the data
16 | 
17 | ### Notes
18 | 
19 | 1. Optimize EVM execution by miners, by benchmarking different environments. "As a consequence,a miner would want to choose a platform that optimizes thereward for the used energy. The benchmark presented in thispaper, when carried out for different platforms, willhelp selectthe best platform."
20 | 2. But also allow to choose "fattest" contracts to execute. "Our opcode benchmark would assist in decidingwhich smartcontracts to execute"
21 | 3. Lastly - alignment of reward and cost
22 | 4. How it is measured? repeatedly single opcode: "In particular, since indi-vidual opcodes take very little time to execute, OpBench1 executes opcodes repeatedly, taking care of stack managementchallenges that result from the small size EVM stack". - measuring every opcode (?). "The computation time of each bytecode is recorded" - but execution takes place in a full contract deployed - "set a timer before and after the executionof each opcode on the EVM." - for `PyEVM` they use [timeit](https://docs.python.org/3/library/timeit.html), which: - turns off GC - runs setup - suggest to only use `min` on the timing vector, not mean/stddev - there is a claim, that benchmarking on a higher (not opcode, but entire contract) level is not sufficient, around citation [15] - followed citation [15] in [`performance_benchmarking.md`](./performance_benchmarking.md)
23 | 5. Program generation: "we generate the bytecode for a fully executable smart contract, which contains repeated bytecode instances of the opcode intended to be measured, as well as the required PUSHs and POPs opcodes to successfully manipulate the EVM stack. "
24 |     - for selected opcodes they do different versions for different sizes of the data manipulated
25 |     - for selected opcodes ("Formula-based", 6 of them) they craft custom approaches
26 |     - Stack Management: for example for ADD, they do PUSH, PUSH, ADD, POP repeatedly
27 | 6. Advertises the approach to be portable to other implementations.
28 | 7. "to the best of ourknowledge, there is no prior systematic approach suggestedfor performance benchmarking of Ethereum opcodes"
29 | 8. Paper seems to focus on miner rewards coming from the gas schedule, instead of network security, balanced execution or enabling execution on consumer hardware.
30 | 9. Paper claims that gas schedule from the yellow paper does not provide a basis for it, but the basis was there (maybe it was not cited in YP) - see the old spreadsheet
31 | 10. References to follow:
32 |     - (done irrelevant) GASPER again, as in "Broken Metre"
33 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/other.md:
--------------------------------------------------------------------------------
 1 | ## Other resources which might be useful
 2 | 
 3 | 1. Ethereum yellow paper - worth a quick scan, holds an (up-to-date?) list of opcodes in EVM with descriptions and details
 4 | 2. https://ethereum.org/en/developers/docs/evm/ - just basics to ramp up with
 5 | 3. https://www.ethervm.io/ - reference for EVM opcodes + Decompiler
 6 | 4. https://medium.com/swlh/getting-deep-into-evm-how-ethereum-works-backstage-ab6ad9c0d0bf - nice intro for a broader perspective how EVM works
 7 | 5. https://docs.google.com/spreadsheets/d/1m89CVujrQe5LAFJ8-YAUCcNK950dUzMQPMJBxRtGCqs/edit#gid=0 - "1.0 gas costs" spreadsheet - ilustrates the original calculations to get gas costs. Seems to work like this:
 8 |     - calculate how much effort can a single block be (e.g. take x time max, leave y memory footprint, take z storage for block history etc.) - see cells L5-L10. Call this _block effort_limit_
 9 |     - set an arbitrary _block gas limit_
10 |     - express the cost of a unit this effort (e.g. a microsecond of computation) in gas using the block gas limit and block effort limit
11 |     - measure footprint of each opcode (how?) in these dimensions - see columns B-G
12 |     - express this footprint in gas - see column H
13 | 6. https://github.com/wolflo/evm-opcodes - source for `www.ethervm.io`, but with a nice compilation of gas cost formulas from the yellow paper. Not exactly sure about up-to-dateness, e.g. it mentions 700 for STATICCALL, while other sources 40 (after EIP-2046)
14 | 7. https://dave.cheney.net/high-performance-go-workshop/dotgo-paris.html - some materials on go profiling and benchmarks. Not immediately useful but:
15 |     - tips on profiling with garbage collector
16 |     - compiler optimization traps
17 |     - avoiding appending (**TODO** ensure we don't)
18 | 8. https://eips.ethereum.org/EIPS/eip-150 - new gas costs there calculated along the lines of the original model - good to cite for motivations of gas research. Only IO intensive
19 | 9. https://eips.ethereum.org/EIPS/eip-1884 - good to cite for motivations of gas research. Measurements were done on chain history, via ms/MGas metric. Only IO intensive
20 |     - **TODO** - gather and write down rationale towards focusing on non-IO operations
21 | 
22 | ## Other resources scanned, which aren't relevant to us
23 | 
24 | 1. http://bergel.eu/MyPapers/Soto20a-FuzzingSolidity.pdf - "Fuzzing to Estimate Gas Costs of Ethereum Contracts" - irrelevant; is about comparing Solidity static gas cost estimation and estimation using fuzzing testing.
25 | 1. The `evmjit` story (which I stumbled upon [here](https://ethresear.ch/t/evm-performance/2791))- the idea seemed to be to replace patterns of operations with "bulk" operations, e.g. a bunch of static PUSH instructions before a `CALL` to become a single meta-instruction. _It would be of great significance_ to our results, but seems to be discontinued (in `geth` codebase there's no occurrences, similar for `OpenEthereum`, [this is not active](https://github.com/ethereum/evmjit) and then [this](https://github.com/ethereum/go-ethereum/issues/2365#issuecomment-275493369)).
26 | 1. ethresear.ch - search for `evm cpu`, `evm gas`, check tags: https://ethresear.ch/c/evm-ewasm/26, https://ethereum-magicians.org/tag/evm, https://ethereum-magicians.org/tag/evm-evolution, https://ethereum-magicians.org/tag/opcodes
27 |     - nothing relevant in here:
28 |         - https://ethresear.ch/t/running-deep-learning-on-evm/899
29 |         - https://ethresear.ch/t/evm-performance/2791/18 (but interesting read about EVM vs ewasm in general
30 |         - https://ethresear.ch/t/dynamic-gas-costs/4375/4 (dynamic opcode pricing via consensus)
31 |         - https://ethresear.ch/t/verifiable-precompiled-contracts/7242
32 |         - https://ethresear.ch/t/evm-idea-add-access-to-overflow-carry-sign-and-zero-flags-to-reduce-gas-use/782/5
33 |         - https://ethresear.ch/t/eth2-authenticated-data-structures-and-gas-costs/6487
34 |         - https://ethresear.ch/t/client-side-solidity-evm/4605/5
35 |         - https://github.com/pirapira/awesome-ethereum-virtual-machine - great list of resources, but nothing immediately useful. Revisit
36 |         - https://ethereum-magicians.org/t/eip-1109-remove-call-costs-for-precompiled-contracts/447/14
37 |         - https://ethereum-magicians.org/t/eip-1884-repricing-for-trie-size-dependent-opcodes/3024/38
38 | 1. https://www.codeproject.com/Articles/8672/Virtual-Machine-Opcode-Resolution-Performance-Test - "Virtual Machine Opcode Resolution, Performance Tests"
39 | 1. http://mural.maynoothuniversity.ie/6432/1/JP-Relating-Static.pdf - "Relating Staticand Dynamic Measurements for the Java Virtual Machine Instruction Set"
40 | 1. https://www.researchgate.net/publication/3929823_Measurement_and_Analysis_of_Runtime_Profiling_Data_for_Java_Programs - "Measurement and Analysis of Runtime Profiling Data for Java Programs"
41 | 1. https://stackoverflow.com/questions/37740081/bytecode-instruction-cost - "Bytecode instruction cost" - SO thread for Python, nothing useful
42 | 1. https://www.aminer.org/pub/53e9b6cab7602d97042540cd/a-portable-research-framework-for-the-execution-of-java-bytecode - http://www.sable.mcgill.ca/publications/thesis/phd-gagnon/sable-thesis-2002-phd-gagnon.pdf - "A portable research framework for the execution of java bytecode"
43 | 1. https://www.researchgate.net/publication/2649955_The_Jalapeno_Dynamic_Optimizing_Compiler_for_Java - "The Jalapeño Dynamic Optimizing Compiler for Java"
44 | 1. https://www.researchgate.net/publication/2569394_Characterizing_Computer_Systems%27_Workloads - "Characterizing Computer Systems' Workloads"
45 | 1.
46 | 
47 | ## search queries
48 | 
49 | 1. ftp://ftp.cs.wisc.edu/paradyn/technical_papers/paradynJ.pdf - "Performance Measurement of Dynamically Compiled Java Executions"
50 | 
51 | "virtual machine instruction measurement" and variations using: "java" / "clr cil" / "comparison" / "benchmark",
52 | 
53 | measure bytecode instructions performance -"platform independent timing of java"
54 | 
55 | time vs instruction count correlation
56 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/performance_benchmarking.md:
--------------------------------------------------------------------------------
 1 | ## Performance Benchmarking of Smart Contracts to Assess Miner Incentives in Ethereum
 2 | 
 3 | Amjad Aldweesh, Maher Alharby, Ellis Solaiman, Aad van Moorsel
 4 | 
 5 | https://www.researchgate.net/publication/328908738_Performance_Benchmarking_of_Smart_Contracts_to_Assess_Miner_Incentives_in_Ethereum
 6 | 
 7 | Paper focuses on finding real contracts with highest overall Gas/CPU ratio.
 8 | 
 9 | ### Notes
10 | 
11 | 1. Motivation to cite: " More-over, if certain smart contracts are known not to be attractive,transactions using that smart contract would not be executedby miners" - alignment of gas costs impacts dependability of miner work
12 | 2. Intention similar to `gas-cost-estimator`: "We envisage that such abenchmark could be run periodically, on a variety of softwareand hardware platforms, to demonstrate to the community ifand how well costs and benefits are aligned within Ethereum"
13 | 3. Not sure why contract creation is investigted in the context of CPU time. It isn't surprising, that CPU/gas is 6x compared to execution
14 | 4. Environment: PyEthApp on a MacBook
15 | 5. It can be argued, that it would be hard to "prefer" high-yielding contracts at the expense of low-throughput contracts, b/c it's hard to predict accurately, which contracts are called by a tx.
16 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/timing_of_jvm_instructions.md:
--------------------------------------------------------------------------------
 1 | ## Platform Independent Timing of Java Virtual Machine Bytecode Instructions
 2 | 
 3 | Jonathan M. Lambert James F.Power
 4 | 
 5 | http://mural.maynoothuniversity.ie/6382/2/JP-Platform.pdf
 6 | 
 7 | ### Notes
 8 | 
 9 | 1. white-box ((done) follow citations [11,6,7], [25]), where JVM source code is available vs black-box (statistically, on a entire program level). This paper is black-box: "to what extent can we reliably predict theexecution timings for JVM bytecode instructions at this kind of platform-independentlevel?"
10 |     - they do white-box for calibration/validation using RDTSC
11 |     - calibration is linear regression of their method vs RDTSC. 2 outliers (not accounted for). Their method under-predicts by 23%, but what if one takes out the 2 outliers, which are over-prdicted?
12 | 2. Problems with white-box sound JVM specific: "Java bytecode instructions execute within nanoseconds. Attempting to measurethese instructions with a high degree of precision using standard Java library tim-ing methods such asSystem.currentTimeMillis or System.nanoTimeresults in thequantisation errors masking their true execution times.".
13 |     - do we have nanosecond accuracy In rust/c? **TODO** (done for golang: https://github.com/imapp-pl/gas-cost-estimator/issues/14)
14 |     - there is `System.nanoTime` but "System.nanoTimecannot guarantee nanosecond accuracy"
15 | 3. **TODO** (optional) follow citations [18, 4, 9] in case low-resolution timing handling is required, or at least the paragraph that summarizes them
16 | 4. (can't find) follow [13] and follow [26] "present a technique for the measure-ment of bytecode execution times"
17 | 5. (done) follow [20] "production of aninstruction timing model to model CPU performance measurements"
18 | 6. They estimate timer overhead by 2 consecutive calls - same thing on our list
19 | 7. In JVM you have an instuction to invoke `System.currentTimeMillis`. See Code segment 1
20 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/papers/vm_matters.md:
--------------------------------------------------------------------------------
 1 | ## VM Matters: A Comparison of WASM VMs and EVMs in the Performance of Blockchain Smart Contracts
 2 | 
 3 | Shuyu Zheng, Haoyu Wang, Lei Wu, Gang Huang, Xuanzhe Liu
 4 | 
 5 | https://arxiv.org/pdf/2012.01032.pdf
 6 | 
 7 | ### Notes
 8 | 
 9 | 1. "conducts the first measurement study, to measure the performance on WASM VM and EVM for executing smart contracts on blockchain"
10 | 2. "To our surprise, the cur-rent WASM VM does not perform in expected performance. Theoverhead introduced by WASM is really non-trivial. Our resultshighlight the challenges when deploying WASM in practice, andprovide insightful implications for improvement space."
11 | 3. This paper includes comparison of EVM implementations, but does so on the highlevel to seek performance gaps of running smart contracts. We focus to find differences in patterns of relative computational costs that set implementations apart. "RQ2 A Comparison of EVM Engines.As there are several clientsthat support the execution of EVM bytecode, we are wonder-ingare there any performance gaps of running smart contractsamong them?"
12 | 4. This paper indicates the importance of 256bit/64bit versions of benchmarks for Ewasm. Not entirely sure what this means here, but this might be another dimension of variability for Ewasm
13 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/docs/notes/program_generator/notes.md:
--------------------------------------------------------------------------------
1 | ## Program generator
2 | 
3 | 1. **TODO** clarify `0xfe` `INVALID` - `0xfe` is just an example invalid opcode, among many other bytes that are invalid. How to measure that?
4 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/analysis/README.md:
--------------------------------------------------------------------------------
 1 | ## Analysis of the results
 2 | 
 3 | Use Rstudio to run the notebooks.
 4 | 
 5 | You can also view the preview HTMLs using these links:
 6 | 
 7 | -   [`exploration`](https://htmlpreview.github.io/?https://github.com/imapp-pl/gas-cost-estimator/blob/master/src/analysis/exploration.nb.html)
 8 | -   [`exploration timers`](https://htmlpreview.github.io/?https://github.com/imapp-pl/gas-cost-estimator/blob/master/src/analysis/exploration_timers.nb.html)
 9 | 
10 | (Construct them by prepending `https://htmlpreview.github.io/?` to the full github URL pointing to the HTML file)
11 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/analysis/exploration_timers.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "R Notebook: exploration of various timers"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | Read in the output of `go run ./src/instrumentation_measurement/clock_resolution_go/main.go` (and other routines for overhead measurements):
  7 | 
  8 | ```{r fig.width=20}
  9 | setwd("~/sources/imapp/gas-cost-estimator/src")
 10 | time_geth = read.csv("../../local/time_geth.csv")
 11 | time_evmone = read.csv("../../local/time_evmone.csv", header = FALSE)
 12 | time_openethereum = read.csv("../../local/time_openethereum.csv", header = FALSE)
 13 | time_all = time_geth[1:200000, ]
 14 | time_all$time_evmone = time_evmone[, 1]
 15 | time_all$time_openethereum = time_openethereum[, 1]
 16 | N = 200000
 17 | time = head(time_all, N)
 18 | head(time)
 19 | ```
 20 | The temporal dynamics of all timers must be accounted for. They all seem to warm up for a long time.
 21 | 
 22 | **NOTE** `purple` line (gotsc) is not in ns but in CPU cycles
 23 | 
 24 | ```{r fig.width=20}
 25 | plot(NULL, xlim=c(1, N), ylim=c(0, 3000))
 26 | 
 27 | geth_color = rgb(0.1,0.1,0.7,0.5)
 28 | evmone_color = rgb(0.8,0.1,0.3,0.6)
 29 | openethereum_color = rgb(0.1,0.7,0.1,0.5)
 30 | 
 31 | lines(time$clock_gettime, type = "l", col = "red")
 32 | lines(time$time, type = "l", col = "blue")
 33 | lines(time$runtime_nano, type = "l", col = geth_color)
 34 | lines(time$gotsc, type = "l", col = "purple")
 35 | lines(time$time_evmone, type = "l", col = evmone_color)
 36 | lines(time$time_openethereum, type = "l", col = openethereum_color)
 37 | ```
 38 | 
 39 | ```{r fig.width=20, fig.height=10}
 40 | plot(NULL, xlim=c(1, N), ylim=c(0, 100))
 41 | 
 42 | ma <- function(x, n = 50){stats::filter(x, rep(1 / n, n), sides = 2)}
 43 | lines(ma(time$runtime_nano), type = "l", col = geth_color)
 44 | ```
 45 | 
 46 | ```{r fig.width=20}
 47 | plot(NULL, xlim=c(1, N), ylim=c(1, 3.5))
 48 | par(ylog=TRUE)
 49 | lines(time$clock_gettime, type = "l", col = "red")
 50 | lines(time$time, type = "l", col = "blue")
 51 | lines(time$runtime_nano, type = "l", col = geth_color)
 52 | lines(time$gotsc, type = "l", col = "purple")
 53 | lines(time$time_evmone, type = "l", col = evmone_color)
 54 | lines(time$time_openethereum, type = "l", col = openethereum_color)
 55 | ```
 56 | A closer look at the same, only relevant wallclock measurements:
 57 | 
 58 | ```{r fig.width=20}
 59 | plot(NULL, xlim=c(1, 500), ylim=c(15, 25.5))
 60 | lines(time$runtime_nano, type = "l", col = geth_color)
 61 | lines(time$time_evmone, type = "l", col = evmone_color)
 62 | lines(time$time_openethereum, type = "l", col = openethereum_color)
 63 | ```
 64 | And over the entire period, smoothed out:
 65 | 
 66 | ```{r fig.width=20}
 67 | min = 16
 68 | max = 70
 69 | # moving average; from https://stackoverflow.com/questions/743812/calculating-moving-average
 70 | ma <- function(x, n = 1000){stats::filter(x, rep(1 / n, n), sides = 2)}
 71 | 
 72 | plot(NULL, xlim=c(1, N), ylim=c((min), (max)))
 73 | lines(ma(time$runtime_nano), col = geth_color)
 74 | lines(ma(time$time_evmone), col = evmone_color)
 75 | lines(ma(time$time_openethereum), col = openethereum_color)
 76 | ```
 77 | 
 78 | It seems `runtimeNano` is the most accurate and stable one. We could perhaps subtract the `Min.` of this from all the measurements
 79 | 
 80 | ```{r fig.width=20}
 81 | summary(time)
 82 | ```
 83 | 
 84 | ```{r fig.width=20}
 85 | boxplot(time)
 86 | ```
 87 | 
 88 | Explore the effect of the overhead increasing for all timers. We're trimming down the data frame to observe correlations sensibly:
 89 | ```{r fig.width=20}
 90 | time_sample = time[sample(nrow(time), 100), ]
 91 | var(time_sample)
 92 | cor(time_sample)
 93 | 
 94 | # cleanup
 95 | rm(time_sample)
 96 | ```
 97 | Deeper analysis of the two best clocks: `runtimeNano` and `gotsc`, plus the `evmone` and `openethereum` wall clocks:
 98 | 
 99 | ```{r fig.width=20}
100 | par(mfrow=c(4,1))
101 | frequencies = sort(table(time$runtime_nano), decreasing=TRUE)
102 | # take all frequencies minus the most outlying ones
103 | n = length(frequencies)/2
104 | plot(frequencies[1:n], col=geth_color)
105 | frequencies = sort(table(time$gotsc), decreasing=TRUE)
106 | n = length(frequencies)/2
107 | plot(frequencies[1:n], col="purple")
108 | frequencies = sort(table(time$time_evmone), decreasing=TRUE)
109 | n = length(frequencies)/2
110 | plot(frequencies[1:n], col=evmone_color)
111 | frequencies = sort(table(time$time_openethereum), decreasing=TRUE)
112 | n = length(frequencies)/2
113 | plot(frequencies[1:n], col=openethereum_color)
114 | 
115 | # cleanup
116 | rm(frequencies)
117 | ```
118 | 
119 | ```{r fig.width=20}
120 | quantile(time$runtime_nano, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999))
121 | quantile(time$gotsc, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999))
122 | quantile(time$time_evmone, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999))
123 | quantile(time$time_openethereum, probs=c(0.85, 0.9, 0.95, 0.99, 0.999, 0.9999, 0.99999))
124 | ```
125 | 
126 | Summary:
127 | 
128 | 1. ~We should discard about 5000 first observations~ EDIT: with our current clocks, and fresh measurements from @magdasta, there doesn't seem to be a need. This was machine specific most likely.
129 | 2. It is probably a good idea to monitor and register the timer overhead during the opcode measurements (**TODO**).
130 | 2. Due to periods of increased overhead, sometimes the measurements might be over-timed for several consecutive measurements. Should we discard all measurements where "just time" measurement is above a threshold? (**TODO**)
131 | 3. `runtimeNano` is clearly the winner, but it still has high values quite often, and is subject to large overhead during warm-up and during the "temporary increase periods"
132 |     - **UPDATE** - it is a winner in wall-clock category, but probably CPU cycles using `gotsc` (based on TSC and in-sync with what `evmone` measurements use - RDTSC) is even better
133 | 4. We can also subtract the minimum (or mean/median) observed timer overhead of `runtimeNano` from all the measurements
134 |     - **UPDATE** - if we go for CPU cycles it's 35. Interestingly though, the `gotsc` library tells us "TSC Overhead: 31"
135 | 5. Next step would be to consider subtracting more, considering it's a justified move (**TODO**)
136 | 6. Alternatively, we could do more in-depth analysis of the behaviors observed and try to normalize the timer readings more (**TODO** optional)
137 | 
138 | **TODO** also another timer, another one tried by `chfast` for evmone https://godoc.org/github.com/lanl/go-papi
139 | **TODO** explore C++ and Rust timers similarly (we have `runtimeNano` counterparts, do others)
140 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/check_clocksource.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -xe
 3 | 
 4 | if [ `cat /sys/devices/system/clocksource/clocksource0/current_clocksource` != 'tsc' ]; then
 5 |   echo "clocksource should be tsc, found:"
 6 |   cat /sys/devices/system/clocksource/clocksource0/current_clocksource
 7 |   echo "see docker_timer.md somewhere in the docses"
 8 |   exit 1
 9 | fi
10 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/instrumentation_measurement/README.md:
--------------------------------------------------------------------------------
 1 | # Running with program generator
 2 | 
 3 | From `instrumentation_measurement` directory:
 4 | 
 5 | ```
 6 | python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode all --sampleSize=50 --nSamples=3 > ../../geth.csv
 7 | ```
 8 | 
 9 | By default programs are executed in geth. To change EVM specify `--evm` parameter:
10 | 
11 | ```
12 | python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode all --sampleSize=50 --nSamples=3 --evm evmone > ../../evmone.csv
13 | ```
14 | 
15 | ### Running measurements via `docker`
16 | 
17 | From the repo root.
18 | 
19 | Build (pick tag name as desired):
20 | 
21 | ```
22 | sudo docker build -t measurements-geth -f Dockerfile.geth .
23 | ```
24 | 
25 | Run:
26 | 
27 | ```
28 | sudo docker run --rm --privileged --security-opt seccomp:unconfined \
29 |   -it measurements-geth \
30 |   sh -c "cd src && python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --mode all --sampleSize=5 --nSamples=1"
31 | ```
32 | 
33 | For other EVMs use respective `Dockerfile`s and use the `--evm` flag on the `measure` command, e.g. `measure --evm openethereum`
34 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/instrumentation_measurement/clock_resolution_go/main.go:
--------------------------------------------------------------------------------
 1 | // from https://stackoverflow.com/questions/14610459/how-precise-is-gos-time-really
 2 | 
 3 | // run this to compare the interval measurement with least overhead
 4 | 
 5 | package main
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"github.com/dterei/gotsc"
10 | 	"golang.org/x/sys/unix"
11 | 	"os"
12 | 	"time"
13 | )
14 | 
15 | import _ "unsafe"
16 | 
17 | // runtimeNano returns the current value of the runtime clock in nanoseconds.
18 | //go:linkname runtimeNano runtime.nanotime
19 | func runtimeNano() int64
20 | 
21 | func main() {
22 | 	res := unix.Timespec{}
23 | 	unix.ClockGetres(unix.CLOCK_MONOTONIC, &res)
24 | 	fmt.Fprintf(os.Stderr, "Monotonic clock resolution is %d nanoseconds\n", res.Nsec)
25 | 
26 | 	tsc := gotsc.TSCOverhead()
27 | 	fmt.Fprintf(os.Stderr, "TSC Overhead: %d\n", tsc)
28 | 
29 | 	const N = 2000000
30 | 	res1 := unix.Timespec{}
31 | 	res2 := unix.Timespec{}
32 | 	sinceClockGettime := int64(0)
33 | 	time1 := time.Time{}
34 | 	time2 := time.Time{}
35 | 	sinceTime := time.Duration(0)
36 | 	runtimeNano1 := int64(0)
37 | 	runtimeNano2 := int64(0)
38 | 	sinceRuntimeNano := int64(0)
39 | 	gotsc1 := uint64(0)
40 | 	gotsc2 := uint64(0)
41 | 	sinceGotsc := uint64(0)
42 | 
43 | 	fmt.Println("clock_gettime,time,runtime_nano,gotsc")
44 | 
45 | 	for i := 1; i < N; i++ {
46 | 		unix.ClockGettime(unix.CLOCK_MONOTONIC, &res1)
47 | 		unix.ClockGettime(unix.CLOCK_MONOTONIC, &res2)
48 | 		sinceClockGettime = res2.Nsec - res1.Nsec
49 | 		time1 = time.Now()
50 | 		time2 = time.Now()
51 | 		sinceTime = time2.Sub(time1)
52 | 		runtimeNano1 = runtimeNano()
53 | 		runtimeNano2 = runtimeNano()
54 | 		sinceRuntimeNano = runtimeNano2 - runtimeNano1
55 | 
56 | 		gotsc1 = gotsc.BenchStart()
57 | 		gotsc2 = gotsc.BenchEnd()
58 | 		sinceGotsc = gotsc2 - gotsc1
59 | 		fmt.Printf("%d,%d,%d,%d\n", sinceClockGettime, sinceTime, sinceRuntimeNano, sinceGotsc)
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/instrumentation_measurement/geth/README.md:
--------------------------------------------------------------------------------
1 | ### `geth` instrumentation
2 | 
3 | See [here](/docs/notes/instrumentation_measurement/geth.md) for description and notes.
4 | 
5 | ### Usage
6 | 
7 | 0. Need to use `go-ethereum` with moved `CaptureState` in `github.com/ethereum/go-ethereum/core/vm/interpreter.go`, `CaptureState` must be after `execute`
8 | 1. `GOGC=off go run main.go --bytecode 62FFFFFF60002062FFFFFF600020`
9 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/instrumentation_measurement/geth/instrumenter/instrumenter.go:
--------------------------------------------------------------------------------
  1 | // based on `StructLogger` from `github.com/ethereum/go-ethereum/core/vm/logger.go:123`
  2 | 
  3 | package instrumenter
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"math/big"
  9 | 	"time"
 10 | 
 11 | 	"github.com/ethereum/go-ethereum/common"
 12 | 	"github.com/ethereum/go-ethereum/core/vm"
 13 | )
 14 | 
 15 | type LogConfig struct {
 16 | }
 17 | 
 18 | //go:generate gencodec -type InstrumenterLog -field-override structLogMarshaling -out gen_structlog.go
 19 | 
 20 | // InstrumenterLog is emitted to the vm.EVM each cycle and lists information about the current internal state
 21 | // prior to the execution of the statement.
 22 | type InstrumenterLog struct {
 23 | 	Pc          uint64    `json:"pc"`
 24 | 	Op          vm.OpCode `json:"op"`
 25 | 	TimeNs      int64     `json:"timeNs"`
 26 | 	TimerTimeNs int64     `json:"timerTimeNs"`
 27 | }
 28 | 
 29 | // InstrumenterLogger is an vm.EVM state logger and implements Tracer.
 30 | type InstrumenterLogger struct {
 31 | 	cfg LogConfig
 32 | 
 33 | 	logs      []InstrumenterLog
 34 | 	startTime int64
 35 | 
 36 | 	// worker fields, just to avoid reallocation of local vars
 37 | 	opCodeDuration int64
 38 | 	timerDuration  int64
 39 | 	log            InstrumenterLog
 40 | }
 41 | 
 42 | // NewInstrumenterLogger returns a new logger
 43 | func NewInstrumenterLogger(cfg *LogConfig) *InstrumenterLogger {
 44 | 	logger := &InstrumenterLogger{}
 45 | 	if cfg != nil {
 46 | 		logger.cfg = *cfg
 47 | 	}
 48 | 	return logger
 49 | }
 50 | 
 51 | // CaptureStart implements the Tracer interface to initialize the tracing operation.
 52 | func (l *InstrumenterLogger) CaptureStart(from common.Address, to common.Address, create bool, input []byte, gas uint64, value *big.Int) error {
 53 | 	l.startTime = runtimeNano()
 54 | 	return nil
 55 | }
 56 | 
 57 | // CaptureState logs a new structured log message and pushes it out to the environment
 58 | func (l *InstrumenterLogger) CaptureState(env *vm.EVM, pc uint64, op vm.OpCode, gas, cost uint64, memory *vm.Memory, stack *vm.Stack, rStack *vm.ReturnStack, rData []byte, contract *vm.Contract, depth int, err error) error {
 59 | 	// measure the current iteration (we'll deduct startTime below)
 60 | 	l.opCodeDuration = runtimeNano()
 61 | 
 62 | 	// measure the most current timer overhead, take a new measurement and later deduct the
 63 | 	// previous timer reading
 64 | 	l.timerDuration = runtimeNano()
 65 | 	l.timerDuration -= l.opCodeDuration
 66 | 	l.opCodeDuration -= l.startTime
 67 | 
 68 | 	// add to log
 69 | 	l.log = InstrumenterLog{pc, op, l.opCodeDuration, l.timerDuration}
 70 | 	l.logs = append(l.logs, l.log)
 71 | 
 72 | 	// start timing the next iteration
 73 | 	l.startTime = runtimeNano()
 74 | 	return nil
 75 | }
 76 | 
 77 | // CaptureFault implements the Tracer interface to trace an execution fault
 78 | // while running an opcode.
 79 | func (l *InstrumenterLogger) CaptureFault(env *vm.EVM, pc uint64, op vm.OpCode, gas, cost uint64, memory *vm.Memory, stack *vm.Stack, rStack *vm.ReturnStack, contract *vm.Contract, depth int, err error) error {
 80 | 	return nil
 81 | }
 82 | 
 83 | // CaptureEnd is called after the call finishes to finalize the tracing.
 84 | func (l *InstrumenterLogger) CaptureEnd(output []byte, gasUsed uint64, t time.Duration, err error) error {
 85 | 	return nil
 86 | }
 87 | 
 88 | // InstrumenterLogs returns the captured log entries.
 89 | func (l *InstrumenterLogger) InstrumenterLogs() []InstrumenterLog { return l.logs }
 90 | 
 91 | // WriteTrace writes a formatted trace to the given writer
 92 | func WriteTrace(writer io.Writer, logs []InstrumenterLog) {
 93 | 	for _, log := range logs {
 94 | 		fmt.Fprintf(writer, "%-16spc=%08d time_ns=%v timer_time_ns=%v", log.Op, log.Pc, log.TimeNs, log.TimerTimeNs)
 95 | 		fmt.Fprintln(writer)
 96 | 	}
 97 | }
 98 | 
 99 | func WriteCSVTrace(writer io.Writer, logs []InstrumenterLog, runId int) {
100 | 	// CSV header must be in sync with these fields here :(, but it's in measurements.py
101 | 	for instructionId, log := range logs {
102 | 		fmt.Fprintf(writer, "%v,%v,%v,%v", runId, instructionId, log.TimeNs, log.TimerTimeNs)
103 | 		fmt.Fprintln(writer)
104 | 	}
105 | }
106 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/instrumentation_measurement/geth/instrumenter/time.go:
--------------------------------------------------------------------------------
 1 | package instrumenter
 2 | 
 3 | // this portion ensures that we have access to the least-overhead timer
 4 | 
 5 | import _ "unsafe"
 6 | 
 7 | // runtimeNano returns the current value of the runtime clock in nanoseconds.
 8 | //go:linkname runtimeNano runtime.nanotime
 9 | func runtimeNano() int64
10 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/instrumentation_measurement/geth/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"math"
  7 | 	"math/big"
  8 | 	"os"
  9 | 	go_runtime "runtime"
 10 | 	"time"
 11 | 
 12 |   _ "unsafe"
 13 | 
 14 | 	"github.com/ethereum/go-ethereum/common"
 15 | 	"github.com/ethereum/go-ethereum/core/rawdb"
 16 | 	"github.com/ethereum/go-ethereum/core/state"
 17 | 	"github.com/ethereum/go-ethereum/core/vm"
 18 | 	"github.com/ethereum/go-ethereum/core/vm/runtime"
 19 | 	"github.com/ethereum/go-ethereum/crypto"
 20 | 	"github.com/ethereum/go-ethereum/params"
 21 | )
 22 | 
 23 | func main() {
 24 | 
 25 | 	bytecodePtr := flag.String("bytecode", "", "EVM bytecode to execute and measure")
 26 | 	sampleSizePtr := flag.Int("sampleSize", 1, "Size of the sample - number of measured repetitions of execution")
 27 | 	printEachPtr := flag.Bool("printEach", true, "If false, printing of each execution time is skipped")
 28 | 	printCSVPtr := flag.Bool("printCSV", false, "If true, will print a CSV with standard results to STDOUT")
 29 |   modePtr := flag.String("mode", "all", "Measurement mode. Available options: all")
 30 | 
 31 | 	flag.Parse()
 32 | 
 33 | 	bytecode := common.Hex2Bytes(*bytecodePtr)
 34 | 	sampleSize := *sampleSizePtr
 35 | 	printEach := *printEachPtr
 36 | 	printCSV := *printCSVPtr
 37 |   mode := *modePtr
 38 | 
 39 |   if mode != "all" && mode != "total" {
 40 |     fmt.Fprintln(os.Stderr, "Invalid measurement mode: ", mode)
 41 |     os.Exit(1)
 42 |   }
 43 | 
 44 | 	cfg := new(runtime.Config)
 45 | 	setDefaults(cfg)
 46 | 	// from `github.com/ethereum/go-ethereum/core/vm/runtime/runtime.go:109`
 47 | 	cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
 48 | 
 49 | 	// Warm-up. **NOTE** we're keeping tracing on during warm-up, otherwise measurements are off
 50 | 	cfg.EVMConfig.Debug = false
 51 | 	cfg.EVMConfig.Instrumenter = vm.NewInstrumenterLogger()
 52 | 	retWarmUp, _, errWarmUp := runtime.Execute(bytecode, nil, cfg)
 53 | 	// End warm-up
 54 | 
 55 | 	sampleStart := time.Now()
 56 | 	for i := 0; i < sampleSize; i++ {
 57 |     if mode == "all" {
 58 |       MeasureAll(cfg, bytecode, printEach, printCSV, i)
 59 |     } else {
 60 |       MeasureTotal(cfg, bytecode, printEach, printCSV, i)
 61 |     }
 62 | 	}
 63 | 
 64 | 	sampleDuration := time.Since(sampleStart)
 65 | 
 66 | 	if errWarmUp != nil {
 67 | 		fmt.Fprintln(os.Stderr, errWarmUp)
 68 | 	}
 69 | 	fmt.Fprintln(os.Stderr, "Program: ", *bytecodePtr)
 70 | 	fmt.Fprintln(os.Stderr, "Return:", retWarmUp)
 71 | 	fmt.Fprintln(os.Stderr, "Sample duration:", sampleDuration)
 72 | 
 73 | }
 74 | 
 75 | func MeasureTotal(cfg *runtime.Config, bytecode []byte, printEach bool, printCSV bool, sampleId int) {
 76 |   cfg.EVMConfig.Instrumenter = vm.NewInstrumenterLogger()
 77 |   go_runtime.GC()
 78 | 
 79 |   cfg.EVMConfig.Instrumenter.StartTime =  runtimeNano()
 80 |   _, _, err := runtime.Execute(bytecode, nil, cfg)
 81 | 
 82 |   // Measure runtime 
 83 |   cfg.EVMConfig.Instrumenter.TotalExecutionDuration = runtimeNano()
 84 |   cfg.EVMConfig.Instrumenter.TimerDuration = runtimeNano()
 85 |   cfg.EVMConfig.Instrumenter.TimerDuration -= cfg.EVMConfig.Instrumenter.TotalExecutionDuration
 86 |   cfg.EVMConfig.Instrumenter.TotalExecutionDuration -=  cfg.EVMConfig.Instrumenter.StartTime
 87 | 
 88 |   if err != nil {
 89 |     fmt.Fprintln(os.Stderr, err)
 90 |   }
 91 | 
 92 |   if printCSV {
 93 |     vm.WriteCSVInstrumentationTotal(os.Stdout, cfg.EVMConfig.Instrumenter, sampleId)
 94 |   }
 95 | }
 96 | 
 97 | func MeasureAll(cfg *runtime.Config, bytecode []byte, printEach bool, printCSV bool, sampleId int) {
 98 |   cfg.EVMConfig.Instrumenter = vm.NewInstrumenterLogger()
 99 |   go_runtime.GC()
100 |   start := time.Now()
101 |   _, _, err := runtime.Execute(bytecode, nil, cfg)
102 |   duration := time.Since(start)
103 | 
104 |   if err != nil {
105 |     fmt.Fprintln(os.Stderr, err)
106 |   }
107 |   if printEach {
108 |     fmt.Fprintln(os.Stderr, "Run duration:", duration)
109 | 
110 |     instrumenterLogs := cfg.EVMConfig.Instrumenter.Logs
111 |     vm.WriteInstrumentation(os.Stderr, instrumenterLogs)
112 |   }
113 | 
114 |   if printCSV {
115 |     instrumenterLogs := cfg.EVMConfig.Instrumenter.Logs
116 |     vm.WriteCSVInstrumentationAll(os.Stdout, instrumenterLogs, sampleId)
117 |   }
118 | }
119 | 
120 | // copied directly from github.com/ethereum/go-ethereum/core/vm/runtime/runtime.go
121 | // so that we skip this in measured code
122 | func setDefaults(cfg *runtime.Config) {
123 | 	if cfg.ChainConfig == nil {
124 | 		cfg.ChainConfig = &params.ChainConfig{
125 | 			ChainID:             big.NewInt(1),
126 | 			HomesteadBlock:      new(big.Int),
127 | 			DAOForkBlock:        new(big.Int),
128 | 			DAOForkSupport:      false,
129 | 			EIP150Block:         new(big.Int),
130 | 			EIP150Hash:          common.Hash{},
131 | 			EIP155Block:         new(big.Int),
132 | 			EIP158Block:         new(big.Int),
133 | 			ByzantiumBlock:      new(big.Int),
134 | 			ConstantinopleBlock: new(big.Int),
135 | 			PetersburgBlock:     new(big.Int),
136 | 			IstanbulBlock:       new(big.Int),
137 | 			MuirGlacierBlock:    new(big.Int),
138 | 			YoloV2Block:         nil,
139 | 		}
140 | 	}
141 | 
142 | 	if cfg.Difficulty == nil {
143 | 		cfg.Difficulty = new(big.Int)
144 | 	}
145 | 	if cfg.Time == nil {
146 | 		cfg.Time = big.NewInt(time.Now().Unix())
147 | 	}
148 | 	if cfg.GasLimit == 0 {
149 | 		cfg.GasLimit = math.MaxUint64
150 | 	}
151 | 	if cfg.GasPrice == nil {
152 | 		cfg.GasPrice = new(big.Int)
153 | 	}
154 | 	if cfg.Value == nil {
155 | 		cfg.Value = new(big.Int)
156 | 	}
157 | 	if cfg.BlockNumber == nil {
158 | 		cfg.BlockNumber = new(big.Int)
159 | 	}
160 | 	if cfg.GetHashFn == nil {
161 | 		cfg.GetHashFn = func(n uint64) common.Hash {
162 | 			return common.BytesToHash(crypto.Keccak256([]byte(new(big.Int).SetUint64(n).String())))
163 | 		}
164 | 	}
165 | }
166 | 
167 | // runtimeNano returns the current value of the runtime clock in nanoseconds.
168 | //go:linkname runtimeNano runtime.nanotime
169 | func runtimeNano() int64
170 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/README.md:
--------------------------------------------------------------------------------
 1 | ## Program generator
 2 | 
 3 | ### Installation
 4 | 
 5 | ```
 6 | virtualenv --python=python3 ~/.venv/gce
 7 | source ~/.venv/gce/bin/activate
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ### Usage
12 | 
13 | ```
14 | python3 program_generator.py generate --help
15 | ```
16 | 
17 | #### Use together with `instrumenter.go`
18 | 
19 | From `src`
20 | 
21 | ```
22 | export GOPATH=
23 | export GOGC=off
24 | export GO111MODULE=off
25 | python3 program_generator/program_generator.py generate | xargs -L1 go run ./instrumentation_measurement/geth/main.go --bytecode
26 | ```
27 | 
28 | #### (Ewasm) use together with `openethereum-evm`
29 | 
30 | From `src`
31 | 
32 | ```
33 | # ensure `wabt` binaries are in PATH
34 | # ensure `parity-evm` binaries are in PATH
35 | python3 program_generator/program_generator.py generate --ewasm | xargs -L1 parity-evm --gas 5000 --chain ../../openethereum/ethcore/res/instant_seal.json --code
36 | ```
37 | 
38 | #### Use together with `measurements.py`
39 | 
40 | From `src`
41 | 
42 | (`go` exports as above)
43 | 
44 | ```
45 | python3 program_generator/program_generator.py generate --fullCsv | python3 instrumentation_measurement/measurements.py measure --sampleSize=50 --nSamples=4 > ../../result_geth.csv
46 | ```
47 | 
48 | or similar.
49 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/constants.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # CONSTANTS
 3 | 
 4 | EVM_PUSHES = """
 5 | 0x60 PUSH1
 6 | 0x61 PUSH2
 7 | 0x62 PUSH3
 8 | 0x63 PUSH4
 9 | 0x64 PUSH5
10 | 0x65 PUSH6
11 | 0x66 PUSH7
12 | 0x67 PUSH8
13 | 0x68 PUSH9
14 | 0x69 PUSH10
15 | 0x6a PUSH11
16 | 0x6b PUSH12
17 | 0x6c PUSH13
18 | 0x6d PUSH14
19 | 0x6e PUSH15
20 | 0x6f PUSH16
21 | 0x70 PUSH17
22 | 0x71 PUSH18
23 | 0x72 PUSH19
24 | 0x73 PUSH20
25 | 0x74 PUSH21
26 | 0x75 PUSH22
27 | 0x76 PUSH23
28 | 0x77 PUSH24
29 | 0x78 PUSH25
30 | 0x79 PUSH26
31 | 0x7a PUSH27
32 | 0x7b PUSH28
33 | 0x7c PUSH29
34 | 0x7d PUSH30
35 | 0x7e PUSH31
36 | 0x7f PUSH32
37 | """
38 | EVM_DUPS = """
39 | 0x80 DUP1
40 | 0x81 DUP2
41 | 0x82 DUP3
42 | 0x83 DUP4
43 | 0x84 DUP5
44 | 0x85 DUP6
45 | 0x86 DUP7
46 | 0x87 DUP8
47 | 0x88 DUP9
48 | 0x89 DUP10
49 | 0x8a DUP11
50 | 0x8b DUP12
51 | 0x8c DUP13
52 | 0x8d DUP14
53 | 0x8e DUP15
54 | 0x8f DUP16
55 | """
56 | EVM_SWAPS = """
57 | 0x90 SWAP1
58 | 0x91 SWAP2
59 | 0x92 SWAP3
60 | 0x93 SWAP4
61 | 0x94 SWAP5
62 | 0x95 SWAP6
63 | 0x96 SWAP7
64 | 0x97 SWAP8
65 | 0x98 SWAP9
66 | 0x99 SWAP10
67 | 0x9a SWAP11
68 | 0x9b SWAP12
69 | 0x9c SWAP13
70 | 0x9d SWAP14
71 | 0x9e SWAP15
72 | 0x9f SWAP16
73 | """
74 | EVM_SOMETHING = '600050'
75 | EVM_SOMETHING_LENGTH = 2
76 | 
77 | EWASM_PREAMBLE = """
78 | (module
79 |   (func (export "call") (local $x i32)
80 | """
81 | EWASM_DROP = """
82 |     drop
83 | """
84 | EWASM_CLOSING_PARENTHESIS = """
85 | ))
86 | """
87 | EWASM_SOMETHING = """
88 |     i32.const 1234
89 |     drop
90 | """
91 | EWASM_SOMETHING_LENGTH = 2
92 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/README.md:
--------------------------------------------------------------------------------
 1 | `opcodes.csv` from https://github.com/djrtwo/evm-opcode-gas-costs/blob/master/opcode-gas-costs_EIP-150_revision-1e18248_2017-04-12.csv
 2 | 
 3 | -   UPDATE: RETURNDATASIZE and RETURNDATACOPY from the EIP
 4 | -   it's still missing PUSH/DUP/SWAP opcodes in standard format, so the script fills this in
 5 | -   UPDATE: REVERT from the EIP
 6 | 
 7 | `selection.csv` from specs of "EVM Gas Cost Estimator.pdf"
 8 | 
 9 | ---
10 | 
11 | `opcodes_ewasm.csv` from specs of "EVM Gas Cost Estimator.pdf" with corrections (dropping a stray `f64` instruction).
12 | 
13 | -   stack requirements taken from [webassembly.github.io page](https://webassembly.github.io/spec/core/appendix/index-instructions.html)
14 | -   parameters added
15 | 
16 | `selection_ewasm_from_spec.csv` from specs of "EVM Gas Cost Estimator.pdf"
17 | 
18 | `selection_ewasm.csv` taken from the above, limited to selection provided by `chfast`, excluding irrelevant flow control meta-instructions.
19 | 
20 | `selection_ewasm_first_pass.csv` taken from the above, excluding memory instructions and `64` bit instructions for a working first draft program generation
21 | 
22 | -   UPDATE: `0xC0 i32.extend8_s`, `0xC1 i32.extend16_s` return `Error: EVM: Internal error: Error deserializing contract code (UnknownOpcode(192))` (and `193` resp.) from `openethereum` Ewasm, dropping them for first pass
23 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/opcodes.csv:
--------------------------------------------------------------------------------
 1 | Value,Mnemonic,Gas Used,Subset,Removed from stack,Added to stack,Notes,Formula Notes
 2 | 0x00,STOP,0,zero,0,0,Halts execution.,
 3 | 0x01,ADD,3,verylow,2,1,Addition operation,
 4 | 0x02,MUL,5,low,2,1,Multiplication operation.,
 5 | 0x03,SUB,3,verylow,2,1,Subtraction operation.,
 6 | 0x04,DIV,5,low,2,1,Integer division operation.,
 7 | 0x05,SDIV,5,low,2,1,Signed integer division operation (truncated).,
 8 | 0x06,MOD,5,low,2,1,Modulo remainder operation,
 9 | 0x07,SMOD,5,low,2,1,Signed modulo remainder operation.,
10 | 0x08,ADDMOD,8,mid,3,1,Modulo addition operation.,
11 | 0x09,MULMOD,8,mid,3,1,Modulo multiplication operation.,
12 | 0x0a,EXP,(exp == 0) ? 10 : (10 + 10 * (1 + log256(exp))),,2,1,Exponential operation.,"If exponent is 0, gas used is 10. If exponent is greater than 0, gas used is 10 plus 10 times a factor related to how large the log of the exponent is."
13 | 0x0b,SIGNEXTEND,5,low,2,1,Extend length of two’s complement signed integer.,
14 | 0x10,LT,3,verylow,2,1,Less-than comparison.,
15 | 0x11,GT,3,verylow,2,1,Greater-than comparison.,
16 | 0x12,SLT,3,verylow,2,1,Signed less-than comparison.,
17 | 0x13,SGT,3,verylow,2,1,Signed greater-than comparison.,
18 | 0x14,EQ,3,verylow,2,1,Equality comparison.,
19 | 0x15,ISZERO,3,verylow,1,1,Simple not operator.,
20 | 0x16,AND,3,verylow,2,1,Bitwise AND operation.,
21 | 0x17,OR,3,verylow,2,1,Bitwise OR operation,
22 | 0x18,XOR,3,verylow,2,1,Bitwise XOR operation.,
23 | 0x19,NOT,3,verylow,1,1,Bitwise NOT operation.,
24 | 0x1a,BYTE,3,verylow,2,1,Retrieve single byte from word,
25 | 0x20,SHA3,30 + 6 * (size of input in words),,2,1,Compute Keccak-256 hash.,30 is the paid for the operation plus 6 paid for each word (rounded up) for the input data.
26 | 0x30,ADDRESS,2,base,0,1,Get address of currently executing account.,
27 | 0x31,BALANCE,400,,1,1,Get balance of the given account.,
28 | 0x32,ORIGIN,2,base,0,1,Get execution origination address.,
29 | 0x33,CALLER,2,base,0,1,Get caller address.,
30 | 0x34,CALLVALUE,2,base,0,1,Get deposited value by the instruction/transaction responsible for this execution.,
31 | 0x35,CALLDATALOAD,3,verylow,1,1,Get input data of current environment.,
32 | 0x36,CALLDATASIZE,2,base,0,1,Get size of input data in current environment.,
33 | 0x37,CALLDATACOPY,"2 + 3 * (number of words copied, rounded up)",,3,0,Copy input data in current environment to memory.,2 is paid for the operation plus 3 for each word copied (rounded up).
34 | 0x38,CODESIZE,2,base,0,1,Get size of code running in current environment.,
35 | 0x39,CODECOPY,"2 + 3 * (number of words copied, rounded up)",,3,0,Copy code running in current environment to memory.,2 is paid for the operation plus 3 for each word copied (rounded up).
36 | 0x3a,GASPRICE,2,base,0,1,Get price of gas in current environment.,
37 | 0x3b,EXTCODESIZE,700,extcode,1,1,Get size of an account’s code.,
38 | 0x3c,EXTCODECOPY,"700 + 3 * (number of words copied, rounded up)",,4,0,Copy an account’s code to memory.,700 is paid for the operation plus 3 for each word copied (rounded up).
39 | 0x3d,RETURNDATASIZE,2,,0,1,Pushes the size of the return data buffer onto the stack,
40 | 0x3e,RETURNDATACOPY,"3 + 3 * ceil(amount / 32)",,3,0,This opcode has similar semantics to CALLDATACOPY, but instead of copying data from the call data, it copies data from the return data buffer,
41 | 0x40,BLOCKHASH,20,,1,1,Get the hash of one of the 256 most recent complete blocks.,
42 | 0x41,COINBASE,2,base,0,1,Get the block’s beneficiary address.,
43 | 0x42,TIMESTAMP,2,base,0,1,Get the block’s timestamp.,
44 | 0x43,NUMBER,2,base,0,1,Get the block’s number.,
45 | 0x44,DIFFICULTY,2,base,0,1,Get the block’s difficulty.,
46 | 0x45,GASLIMIT,2,base,0,1,Get the block’s gas limit.,
47 | 0x50,POP,2,base,1,0,Remove item from stack.,
48 | 0x51,MLOAD,3,verylow,1,1,Load word from memory.,
49 | 0x52,MSTORE,3,verylow,2,0,Save word to memory,
50 | 0x53,MSTORE8,3,verylow,2,0,Save byte to memory.,
51 | 0x54,SLOAD,200,,1,1,Load word from storage,
52 | 0x55,SSTORE,((value != 0) && (storage_location == 0)) ? 20000 : 5000,,1,1,Save word to storage.,20000 is paid when storage value is set to non-zero from zero. 5000 is paid when the storage value's zeroness remains unchanged or is set to zero.
53 | 0x56,JUMP,8,mid,1,0,Alter the program counter,
54 | 0x57,JUMPI,10,high,2,0,Conditionally alter the program counter.,
55 | 0x58,PC,2,base,0,1,Get the value of the program counter prior to the increment corresponding to this instruction.,
56 | 0x59,MSIZE,2,base,0,1,Get the size of active memory in bytes.,
57 | 0x5a,GAS,2,base,0,1,"Get the amount of available gas, including the corresponding reduction for the cost of this instruction.",
58 | 0x5b,JUMPDEST,1,,0,0,Mark a valid destination for jumps,
59 | 0x60 -- 0x7f,PUSH*,3,verylow,0,1,Place * byte item on stack. 0 < * <= 32,
60 | 0x80 -- 0x8f,DUP*,3,verylow,*,* + 1,Duplicate *th stack item. 0 < * <= 16,
61 | 0x90 -- 0x9f,SWAP*,3,verylow,* + 1,* + 1,Exchange 1st and (* + 1)th stack items.,
62 | 0xa0,LOG0,375 + 8 * (number of bytes in log data),,2,0,Append log record with no topics.,375 is paid for operation plus 8 for each byte in data to be logged.
63 | 0xa1,LOG1,375 + 8 * (number of bytes in log data) + 375,,3,0,Append log record with one topic.,375 is paid for operation plus 8 for each byte in data to be logged plus 375 for the 1 topic to be logged.
64 | 0xa2,LOG2,375 + 8 * (number of bytes in log data) + 2 * 375,,4,0,Append log record with two topics.,375 is paid for operation plus 8 for each byte in data to be logged plus 2 * 375 for the 2 topics to be logged.
65 | 0xa3,LOG3,375 + 8 * (number of bytes in log data) + 3 * 375,,5,0,Append log record with three topics.,375 is paid for operation plus 8 for each byte in data to be logged plus 3 * 375 for the 3 topics to be logged.
66 | 0xa4,LOG4,375 + 8 * (number of bytes in log data) + 4 * 375,,6,0,Append log record with four topics.,375 is paid for operation plus 8 for each byte in data to be logged plus 4 * 375 for the 4 topics to be logged.
67 | 0xf0,CREATE,32000,,3,1,Create a new account with associated code.,
68 | 0xf1,CALL,Complex -- see yellow paper  Appendix H,,7,1,Message-call into an account.,
69 | 0xf2,CALLCODE,Complex -- see yellow paper  Appendix H,,7,1,Message-call into this account with an alternative account’s code.,
70 | 0xf3,RETURN,0,zero,2,0,Halt execution returning output data.,
71 | 0xf4,DELEGATECALL,Complex -- see yellow paper  Appendix H,,6,1,"Message-call into this account with an alternative account’s code, but persisting the current values for sender and value.",
72 | 0xfd,REVERT,,,2,0,End execution, revert state changes, return data mem[p…(p+s)),
73 | 0xfe,INVALID,NA,,NA,NA,Designated invalid instruction.,
74 | 0xff,SELFDESTRUCT,5000 + ((create_new_account) ? 25000 : 0),,1,0,Halt execution and register account for later deletion,5000 for the operation plus 25000 if a new account is also created. A refund of 24000 gas is also added to the refund counter for self-destructing the account.
75 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/opcodes_ewasm.csv:
--------------------------------------------------------------------------------
  1 | Value,Mnemonic,Removed from stack,Added to stack,Notes,Formula Notes
  2 | 0x00,unreachable,1,1,,
  3 | 0x01,nop,0,0,,
  4 | 0x02,block,1,1,,
  5 | 0x03,loop,1,1,,
  6 | 0x04,if,1,1,,
  7 | 0x05,else,0,0,,
  8 | 0x0B,end,0,0,,
  9 | 0x0C,br,2,1,,
 10 | 0x0D,br_if,2,1,,
 11 | 0x0E,br_table,3,1,,
 12 | 0x0F,return,2,1,,
 13 | 0x10,call,1,1,,
 14 | 0x11,call_indirect,2,1,,
 15 | 0x1A,drop,1,0,,
 16 | 0x1B,select,3,1,,
 17 | 0x20,local.get $x,0,1,,
 18 | 0x21,local.set $x,1,0,,
 19 | 0x22,local.tee $x,1,1,,
 20 | 0x23,global.get $x,0,1,,
 21 | 0x24,global.set $x,1,0,,
 22 | 0x28,i32.load,1,1,,
 23 | 0x29,i64.load,1,1,,
 24 | 0x2C,i32.load8_s,1,1,,
 25 | 0x2D,i32.load8_u,1,1,,
 26 | 0x2E,i32.load16_s,1,1,,
 27 | 0x2F,i32.load16_u,1,1,,
 28 | 0x30,i64.load8_s,1,1,,
 29 | 0x31,i64.load8_u,1,1,,
 30 | 0x32,i64.load16_s,1,1,,
 31 | 0x33,i64.load16_u,1,1,,
 32 | 0x34,i64.load32_s,1,1,,
 33 | 0x35,i64.load32_u,1,1,,
 34 | 0x36,i32.store,2,0,,
 35 | 0x37,i64.store,2,0,,
 36 | 0x3A,i32.store8,2,0,,
 37 | 0x3B,i32.store16,2,0,,
 38 | 0x3C,i64.store8,2,0,,
 39 | 0x3D,64.store16,2,0,,
 40 | 0x3E,i64.store32,2,0,,
 41 | 0x3F,memory.size,0,1,,
 42 | 0x40,memory.grow,1,1,,
 43 | 0x41,i32.const 32,0,1,,
 44 | 0x42,i64.const 32,0,1,,
 45 | 0x45,i32.eqz,1,1,,
 46 | 0x46,i32.eq,2,1,,
 47 | 0x47,i32.ne,2,1,,
 48 | 0x48,i32.lt_s,2,1,,
 49 | 0x49,i32.lt_u,2,1,,
 50 | 0x4A,i32.gt_s,2,1,,
 51 | 0x4B,i32.gt_u,2,1,,
 52 | 0x4C,i32.le_s,2,1,,
 53 | 0x4D,i32.le_u,2,1,,
 54 | 0x4E,i32.ge_s,2,1,,
 55 | 0x4F,i32.ge_u,2,1,,
 56 | 0x50,i64.eqz,1,1,,
 57 | 0x51,i64.eq,2,1,,
 58 | 0x52,i64.ne,2,1,,
 59 | 0x53,i64.lt_s,2,1,,
 60 | 0x54,i64.lt_u,2,1,,
 61 | 0x55,i64.gt_s,2,1,,
 62 | 0x56,i64.gt_u,2,1,,
 63 | 0x57,i64.le_s,2,1,,
 64 | 0x58,i64.le_u,2,1,,
 65 | 0x59,i64.ge_s,2,1,,
 66 | 0x5A,i64.ge_u,2,1,,
 67 | 0x67,i32.clz,1,1,,
 68 | 0x68,i32.ctz,1,1,,
 69 | 0x69,i32.popcnt,1,1,,
 70 | 0x6A,i32.add,2,1,,
 71 | 0x6B,i32.sub,2,1,,
 72 | 0x6C,i32.mul,2,1,,
 73 | 0x6D,i32.div_s,2,1,,
 74 | 0x6E,i32.div_u,2,1,,
 75 | 0x6F,i32.rem_s,2,1,,
 76 | 0x70,i32.rem_u,2,1,,
 77 | 0x71,i32.and,2,1,,
 78 | 0x72,i32.or,2,1,,
 79 | 0x73,i32.xor,2,1,,
 80 | 0x74,i32.shl,2,1,,
 81 | 0x75,i32.shr_s,2,1,,
 82 | 0x76,i32.shr_u,2,1,,
 83 | 0x77,i32.rotl,2,1,,
 84 | 0x78,i32.rotr,2,1,,
 85 | 0x79,i64.clz,1,1,,
 86 | 0x7A,i64.ctz,1,1,,
 87 | 0x7B,i64.popcnt,1,1,,
 88 | 0x7C,i64.add,2,1,,
 89 | 0x7D,i64.sub,2,1,,
 90 | 0x7E,i64.mul,2,1,,
 91 | 0x7F,i64.div_s,2,1,,
 92 | 0x80,i64.div_u,2,1,,
 93 | 0x81,i64.rem_s,2,1,,
 94 | 0x82,i64.rem_u,2,1,,
 95 | 0x83,i64.and,2,1,,
 96 | 0x84,i64.or,2,1,,
 97 | 0x85,i64.xor,2,1,,
 98 | 0x86,i64.shl,2,1,,
 99 | 0x87,i64.shr_s,2,1,,
100 | 0x88,i64.shr_u,2,1,,
101 | 0x89,i64.rotl,2,1,,
102 | 0x8A,i64.rotr,2,1,,
103 | 0xA7,i32.wrap_i64,1,1,,
104 | 0xAC,i64.extend_i32_s,1,1,,
105 | 0xAD,i64.extend_i32_u,1,1,,
106 | 0xC0,i32.extend8_s,1,1,,
107 | 0xC1,i32.extend16_s,1,1,,
108 | 0xC2,i64.extend8_s,1,1,,
109 | 0xC3,i64.extend16_s,1,1,,
110 | 0xC4,i64.extend32_s,1,1,,
111 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/selection.csv:
--------------------------------------------------------------------------------
  1 | Opcode Name
  2 | 0x00 STOP
  3 | 0x01 ADD
  4 | 0x02 MUL
  5 | 0x03 SUB
  6 | 0x04 DIV
  7 | 0x05 SDIV
  8 | 0x06 MOD
  9 | 0x07 SMOD
 10 | 0x08 ADDMOD
 11 | 0x09 MULMOD
 12 | 0x0a EXP
 13 | 0x0b SIGNEXTEND
 14 | 0x10 LT
 15 | 0x11 GT
 16 | 0x12 SLT
 17 | 0x13 SGT
 18 | 0x14 EQ
 19 | 0x15 ISZERO
 20 | 0x16 AND
 21 | 0x17 OR
 22 | 0x18 XOR
 23 | 0x19 NOT
 24 | 0x1a BYTE
 25 | 0x30 ADDRESS
 26 | 0x32 ORIGIN
 27 | 0x33 CALLER
 28 | 0x34 CALLVALUE
 29 | 0x35 CALLDATALOAD
 30 | 0x36 CALLDATASIZE
 31 | 0x37 CALLDATACOPY
 32 | 0x38 CODESIZE
 33 | 0x39 CODECOPY
 34 | 0x3a GASPRICE
 35 | 0x3d RETURNDATASIZE
 36 | 0x3e RETURNDATACOPY
 37 | 0x41 COINBASE
 38 | 0x42 TIMESTAMP
 39 | 0x43 NUMBER
 40 | 0x44 DIFFICULTY
 41 | 0x45 GASLIMIT
 42 | 0x50 POP
 43 | 0x51 MLOAD
 44 | 0x52 MSTORE
 45 | 0x53 MSTORE8
 46 | 0x56 JUMP
 47 | 0x57 JUMPI
 48 | 0x58 PC
 49 | 0x59 MSIZE
 50 | 0x5a GAS
 51 | 0x5b JUMPDEST
 52 | 0x60 PUSH1
 53 | 0x61 PUSH2
 54 | 0x62 PUSH3
 55 | 0x63 PUSH4
 56 | 0x64 PUSH5
 57 | 0x65 PUSH6
 58 | 0x66 PUSH7
 59 | 0x67 PUSH8
 60 | 0x68 PUSH9
 61 | 0x69 PUSH10
 62 | 0x6a PUSH11
 63 | 0x6b PUSH12
 64 | 0x6c PUSH13
 65 | 0x6d PUSH14
 66 | 0x6e PUSH15
 67 | 0x6f PUSH16
 68 | 0x70 PUSH17
 69 | 0x71 PUSH18
 70 | 0x72 PUSH19
 71 | 0x73 PUSH20
 72 | 0x74 PUSH21
 73 | 0x75 PUSH22
 74 | 0x76 PUSH23
 75 | 0x77 PUSH24
 76 | 0x78 PUSH25
 77 | 0x79 PUSH26
 78 | 0x7a PUSH27
 79 | 0x7b PUSH28
 80 | 0x7c PUSH29
 81 | 0x7d PUSH30
 82 | 0x7e PUSH31
 83 | 0x7f PUSH32
 84 | 0x80 DUP1
 85 | 0x81 DUP2
 86 | 0x82 DUP3
 87 | 0x83 DUP4
 88 | 0x84 DUP5
 89 | 0x85 DUP6
 90 | 0x86 DUP7
 91 | 0x87 DUP8
 92 | 0x88 DUP9
 93 | 0x89 DUP10
 94 | 0x8a DUP11
 95 | 0x8b DUP12
 96 | 0x8c DUP13
 97 | 0x8d DUP14
 98 | 0x8e DUP15
 99 | 0x8f DUP16
100 | 0x90 SWAP1
101 | 0x91 SWAP2
102 | 0x92 SWAP3
103 | 0x93 SWAP4
104 | 0x94 SWAP5
105 | 0x95 SWAP6
106 | 0x96 SWAP7
107 | 0x97 SWAP8
108 | 0x98 SWAP9
109 | 0x99 SWAP10
110 | 0x9a SWAP11
111 | 0x9b SWAP12
112 | 0x9c SWAP13
113 | 0x9d SWAP14
114 | 0x9e SWAP15
115 | 0x9f SWAP16
116 | 0xf3 RETURN
117 | 0xfd REVERT
118 | 0xfe INVALID
119 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/selection_ewasm.csv:
--------------------------------------------------------------------------------
 1 | Opcode Name
 2 | 0x01 nop
 3 | 0x1A drop
 4 | 0x1B select
 5 | 0x20 local.get
 6 | 0x21 local.set
 7 | 0x22 local.tee
 8 | 0x28 i32.load
 9 | 0x29 i64.load
10 | 0x2C i32.load8_s
11 | 0x2D i32.load8_u
12 | 0x2E i32.load16_s
13 | 0x2F i32.load16_u
14 | 0x30 i64.load8_s
15 | 0x31 i64.load8_u
16 | 0x32 i64.load16_s
17 | 0x33 i64.load16_u
18 | 0x34 i64.load32_s
19 | 0x35 i64.load32_u
20 | 0x36 i32.store
21 | 0x37 i64.store
22 | 0x3A i32.store8
23 | 0x3B i32.store16
24 | 0x3C i64.store8
25 | 0x3D 64.store16
26 | 0x3E i64.store32
27 | 0x3F memory.size
28 | 0x40 memory.grow
29 | 0x41 i32.const
30 | 0x42 i64.const
31 | 0x45 i32.eqz
32 | 0x46 i32.eq
33 | 0x47 i32.ne
34 | 0x48 i32.lt_s
35 | 0x49 i32.lt_u
36 | 0x4A i32.gt_s
37 | 0x4B i32.gt_u
38 | 0x4C i32.le_s
39 | 0x4D i32.le_u
40 | 0x4E i32.ge_s
41 | 0x4F i32.ge_u
42 | 0x50 i64.eqz
43 | 0x51 i64.eq
44 | 0x52 i64.ne
45 | 0x53 i64.lt_s
46 | 0x54 i64.lt_u
47 | 0x55 i64.gt_s
48 | 0x56 i64.gt_u
49 | 0x57 i64.le_s
50 | 0x58 i64.le_u
51 | 0x59 i64.ge_s
52 | 0x5A i64.ge_u
53 | 0x67 i32.clz
54 | 0x68 i32.ctz
55 | 0x69 i32.popcnt
56 | 0x6A i32.add
57 | 0x6B i32.sub
58 | 0x6C i32.mul
59 | 0x6D i32.div_s
60 | 0x6E i32.div_u
61 | 0x6F i32.rem_s
62 | 0x70 i32.rem_u
63 | 0x71 i32.and
64 | 0x72 i32.or
65 | 0x73 i32.xor
66 | 0x74 i32.shl
67 | 0x75 i32.shr_s
68 | 0x76 i32.shr_u
69 | 0x77 i32.rotl
70 | 0x78 i32.rotr
71 | 0x79 i64.clz
72 | 0x7A i64.ctz
73 | 0x7B i64.popcnt
74 | 0x7C i64.add
75 | 0x7D i64.sub
76 | 0x7E i64.mul
77 | 0x7F i64.div_s
78 | 0x80 i64.div_u
79 | 0x81 i64.rem_s
80 | 0x82 i64.rem_u
81 | 0x83 i64.and
82 | 0x84 i64.or
83 | 0x85 i64.xor
84 | 0x86 i64.shl
85 | 0x87 i64.shr_s
86 | 0x88 i64.shr_u
87 | 0x89 i64.rotl
88 | 0x8A i64.rotr
89 | 0xA7 i32.wrap_i64
90 | 0xAC i64.extend_i32_s
91 | 0xAD i64.extend_i32_u
92 | 0xC0 i32.extend8_s
93 | 0xC1 i32.extend16_s
94 | 0xC2 i64.extend8_s
95 | 0xC3 i64.extend16_s
96 | 0xC4 i64.extend32_s
97 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/selection_ewasm_first_pass.csv:
--------------------------------------------------------------------------------
 1 | Opcode Name
 2 | 0x01 nop
 3 | 0x1A drop
 4 | 0x1B select
 5 | 0x20 local.get
 6 | 0x21 local.set
 7 | 0x22 local.tee
 8 | 0x41 i32.const
 9 | 0x45 i32.eqz
10 | 0x46 i32.eq
11 | 0x47 i32.ne
12 | 0x48 i32.lt_s
13 | 0x49 i32.lt_u
14 | 0x4A i32.gt_s
15 | 0x4B i32.gt_u
16 | 0x4C i32.le_s
17 | 0x4D i32.le_u
18 | 0x4E i32.ge_s
19 | 0x4F i32.ge_u
20 | 0x67 i32.clz
21 | 0x68 i32.ctz
22 | 0x69 i32.popcnt
23 | 0x6A i32.add
24 | 0x6B i32.sub
25 | 0x6C i32.mul
26 | 0x6D i32.div_s
27 | 0x6E i32.div_u
28 | 0x6F i32.rem_s
29 | 0x70 i32.rem_u
30 | 0x71 i32.and
31 | 0x72 i32.or
32 | 0x73 i32.xor
33 | 0x74 i32.shl
34 | 0x75 i32.shr_s
35 | 0x76 i32.shr_u
36 | 0x77 i32.rotl
37 | 0x78 i32.rotr
38 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/data/selection_ewasm_from_spec.csv:
--------------------------------------------------------------------------------
  1 | Opcode Name
  2 | 0x00 unreachable
  3 | 0x01 nop
  4 | 0x02 block
  5 | 0x03 loop
  6 | 0x04 if
  7 | 0x05 else
  8 | 0x0B end
  9 | 0x0C br
 10 | 0x0D br_if
 11 | 0x0E br_table
 12 | 0x0F return
 13 | 0x10 call
 14 | 0x11 call_indirect
 15 | 0x1A drop
 16 | 0x1B select
 17 | 0x20 local.get
 18 | 0x21 local.set
 19 | 0x22 local.tee
 20 | 0x23 global.get
 21 | 0x24 global.set
 22 | 0x28 i32.load
 23 | 0x29 i64.load
 24 | 0x2C i32.load8_s
 25 | 0x2D i32.load8_u
 26 | 0x2E i32.load16_s
 27 | 0x2F i32.load16_u
 28 | 0x30 i64.load8_s
 29 | 0x31 i64.load8_u
 30 | 0x32 i64.load16_s
 31 | 0x33 i64.load16_u
 32 | 0x34 i64.load32_s
 33 | 0x35 i64.load32_u
 34 | 0x36 i32.store
 35 | 0x37 i64.store
 36 | 0x3A i32.store8
 37 | 0x3B i32.store16
 38 | 0x3C i64.store8
 39 | 0x3D 64.store16
 40 | 0x3E i64.store32
 41 | 0x3F memory.size
 42 | 0x40 memory.grow
 43 | 0x41 i32.const
 44 | 0x42 i64.const
 45 | 0x45 i32.eqz
 46 | 0x46 i32.eq
 47 | 0x47 i32.ne
 48 | 0x48 i32.lt_s
 49 | 0x49 i32.lt_u
 50 | 0x4A i32.gt_s
 51 | 0x4B i32.gt_u
 52 | 0x4C i32.le_s
 53 | 0x4D i32.le_u
 54 | 0x4E i32.ge_s
 55 | 0x4F i32.ge_u
 56 | 0x50 i64.eqz
 57 | 0x51 i64.eq
 58 | 0x52 i64.ne
 59 | 0x53 i64.lt_s
 60 | 0x54 i64.lt_u
 61 | 0x55 i64.gt_s
 62 | 0x56 i64.gt_u
 63 | 0x57 i64.le_s
 64 | 0x58 i64.le_u
 65 | 0x59 i64.ge_s
 66 | 0x5A i64.ge_u
 67 | 0x67 i32.clz
 68 | 0x68 i32.ctz
 69 | 0x69 i32.popcnt
 70 | 0x6A i32.add
 71 | 0x6B i32.sub
 72 | 0x6C i32.mul
 73 | 0x6D i32.div_s
 74 | 0x6E i32.div_u
 75 | 0x6F i32.rem_s
 76 | 0x70 i32.rem_u
 77 | 0x71 i32.and
 78 | 0x72 i32.or
 79 | 0x73 i32.xor
 80 | 0x74 i32.shl
 81 | 0x75 i32.shr_s
 82 | 0x76 i32.shr_u
 83 | 0x77 i32.rotl
 84 | 0x78 i32.rotr
 85 | 0x79 i64.clz
 86 | 0x7A i64.ctz
 87 | 0x7B i64.popcnt
 88 | 0x7C i64.add
 89 | 0x7D i64.sub
 90 | 0x7E i64.mul
 91 | 0x7F i64.div_s
 92 | 0x80 i64.div_u
 93 | 0x81 i64.rem_s
 94 | 0x82 i64.rem_u
 95 | 0x83 i64.and
 96 | 0x84 i64.or
 97 | 0x85 i64.xor
 98 | 0x86 i64.shl
 99 | 0x87 i64.shr_s
100 | 0x88 i64.shr_u
101 | 0x89 i64.rotl
102 | 0x8A i64.rotr
103 | 0xA7 i32.wrap_i64
104 | 0xAC i64.extend_i32_s
105 | 0xAD i64.extend_i32_u
106 | 0xC0 i32.extend8_s
107 | 0xC1 i32.extend16_s
108 | 0xC2 i64.extend8_s
109 | 0xC3 i64.extend16_s
110 | 0xC4 i64.extend32_s
111 | 


--------------------------------------------------------------------------------
/legacy/gas-estimator/src/program_generator/requirements.txt:
--------------------------------------------------------------------------------
1 | fire
2 | 


--------------------------------------------------------------------------------
/legacy/opcodes/src/table.mediawiki:
--------------------------------------------------------------------------------
  1 | {| class="wikitable" 
  2 | |- style="font-weight:bold;"
  3 | ! OP
  4 | ! Count
  5 | ! %
  6 | ! 
  7 | ! pushdata
  8 | ! % pushdata
  9 | ! % data
 10 | |-
 11 | | JUMPDEST
 12 | | 22,374,953
 13 | | 6.57%
 14 | | 
 15 | | 
 16 | | 
 17 | | 6.57%
 18 | |-
 19 | | JUMPI
 20 | | 14,963,477
 21 | | 4.39%
 22 | | 
 23 | | 
 24 | | 
 25 | | 4.39%
 26 | |-
 27 | | JUMP
 28 | | 11,389,635
 29 | | 3.34%
 30 | | 
 31 | | 
 32 | | 
 33 | | 3.34%
 34 | |-
 35 | | All PUSH
 36 | | 78,137,163
 37 | | 22.94%
 38 | | 
 39 | | 261,092,018
 40 | | 
 41 | | 43.39%
 42 | |-
 43 | | PUSH1
 44 | | 37,886,773
 45 | | 11.12%
 46 | | 1
 47 | | 37,886,773
 48 | | 14.51%
 49 | | 6.30%
 50 | |-
 51 | | PUSH2
 52 | | 28,280,939
 53 | | 8.30%
 54 | | 2
 55 | | 56,561,878
 56 | | 21.66%
 57 | | 9.40%
 58 | |-
 59 | | PUSH3
 60 | | 219,949
 61 | | 0.06%
 62 | | 3
 63 | | 659,847
 64 | | 0.25%
 65 | | 0.11%
 66 | |-
 67 | | PUSH4
 68 | | 5,247,460
 69 | | 1.54%
 70 | | 4
 71 | | 20,989,840
 72 | | 8.04%
 73 | | 3.49%
 74 | |-
 75 | | PUSH5
 76 | | 144,613
 77 | | 0.04%
 78 | | 5
 79 | | 723,065
 80 | | 0.28%
 81 | | 0.12%
 82 | |-
 83 | | PUSH6
 84 | | 17,898
 85 | | 0.01%
 86 | | 6
 87 | | 107,388
 88 | | 0.04%
 89 | | 0.02%
 90 | |-
 91 | | PUSH7
 92 | | 4,916
 93 | | 0.00%
 94 | | 7
 95 | | 34,412
 96 | | 0.01%
 97 | | 0.01%
 98 | |-
 99 | | PUSH8
100 | | 345,608
101 | | 0.10%
102 | | 8
103 | | 2,764,864
104 | | 1.06%
105 | | 0.46%
106 | |-
107 | | PUSH9
108 | | 17,508
109 | | 0.01%
110 | | 9
111 | | 157,572
112 | | 0.06%
113 | | 0.03%
114 | |-
115 | | PUSH10
116 | | 6,124
117 | | 0.00%
118 | | 10
119 | | 61,240
120 | | 0.02%
121 | | 0.01%
122 | |-
123 | | PUSH11
124 | | 4,005
125 | | 0.00%
126 | | 11
127 | | 44,055
128 | | 0.02%
129 | | 0.01%
130 | |-
131 | | PUSH12
132 | | 61,088
133 | | 0.02%
134 | | 12
135 | | 733,056
136 | | 0.28%
137 | | 0.12%
138 | |-
139 | | PUSH13
140 | | 25,980
141 | | 0.01%
142 | | 13
143 | | 337,740
144 | | 0.13%
145 | | 0.06%
146 | |-
147 | | PUSH14
148 | | 378,201
149 | | 0.11%
150 | | 14
151 | | 5,294,814
152 | | 2.03%
153 | | 0.88%
154 | |-
155 | | PUSH15
156 | | 101,959
157 | | 0.03%
158 | | 15
159 | | 1,529,385
160 | | 0.59%
161 | | 0.25%
162 | |-
163 | | PUSH16
164 | | 108,720
165 | | 0.03%
166 | | 16
167 | | 1,739,520
168 | | 0.67%
169 | | 0.29%
170 | |-
171 | | PUSH17
172 | | 42,547
173 | | 0.01%
174 | | 17
175 | | 723,299
176 | | 0.28%
177 | | 0.12%
178 | |-
179 | | PUSH18
180 | | 318
181 | | 0.00%
182 | | 18
183 | | 5,724
184 | | 0.00%
185 | | 0.00%
186 | |-
187 | | PUSH19
188 | | 2,813
189 | | 0.00%
190 | | 19
191 | | 53,447
192 | | 0.02%
193 | | 0.01%
194 | |-
195 | | PUSH20
196 | | 2,921,374
197 | | 0.86%
198 | | 20
199 | | 58,427,480
200 | | 22.38%
201 | | 9.71%
202 | |-
203 | | PUSH21
204 | | 30,857
205 | | 0.01%
206 | | 21
207 | | 647,997
208 | | 0.25%
209 | | 0.11%
210 | |-
211 | | PUSH22
212 | | 1,828
213 | | 0.00%
214 | | 22
215 | | 40,216
216 | | 0.02%
217 | | 0.01%
218 | |-
219 | | PUSH23
220 | | 658
221 | | 0.00%
222 | | 23
223 | | 15,134
224 | | 0.01%
225 | | 0.00%
226 | |-
227 | | PUSH24
228 | | 350
229 | | 0.00%
230 | | 24
231 | | 8,400
232 | | 0.00%
233 | | 0.00%
234 | |-
235 | | PUSH25
236 | | 14,112
237 | | 0.00%
238 | | 25
239 | | 352,800
240 | | 0.14%
241 | | 0.06%
242 | |-
243 | | PUSH26
244 | | 832
245 | | 0.00%
246 | | 26
247 | | 21,632
248 | | 0.01%
249 | | 0.00%
250 | |-
251 | | PUSH27
252 | | 1,157
253 | | 0.00%
254 | | 27
255 | | 31,239
256 | | 0.01%
257 | | 0.01%
258 | |-
259 | | PUSH28
260 | | 160,053
261 | | 0.05%
262 | | 28
263 | | 4,481,484
264 | | 1.72%
265 | | 0.74%
266 | |-
267 | | PUSH29
268 | | 262,631
269 | | 0.08%
270 | | 29
271 | | 7,616,299
272 | | 2.92%
273 | | 1.27%
274 | |-
275 | | PUSH30
276 | | 172
277 | | 0.00%
278 | | 30
279 | | 5,160
280 | | 0.00%
281 | | 0.00%
282 | |-
283 | | PUSH31
284 | | 26,782
285 | | 0.01%
286 | | 31
287 | | 830,242
288 | | 0.32%
289 | | 0.14%
290 | |-
291 | | PUSH32
292 | | 1,818,938
293 | | 0.53%
294 | | 32
295 | | 58,206,016
296 | | 22.29%
297 | | 9.67%
298 | |}
299 | 


--------------------------------------------------------------------------------
/legacy/tools/evm/words/numWords.java:
--------------------------------------------------------------------------------
 1 | package tech.pegasys.poc.witnesscodeanalysis.vm;
 2 | 
 3 | import tech.pegasys.poc.witnesscodeanalysis.vm.Address;
 4 | 
 5 | import org.apache.tuweni.bytes.Bytes;
 6 | import org.apache.tuweni.bytes.Bytes32;
 7 | import org.apache.tuweni.bytes.MutableBytes32;
 8 | 
 9 | /** Static utility methods to work with VM words (that is, {@link Bytes32} values). */
10 | public abstract class Words {
11 |   private Words() {}
12 | 
13 |   /**
14 |    * Creates a new word containing the provided address.
15 |    *
16 |    * @param address The address to convert to a word.
17 |    * @return A VM word containing {@code address} (left-padded as according to the VM specification
18 |    *     (Appendix H. of the Yellow paper)).
19 |    */
20 |   public static Bytes32 fromAddress(final Address address) {
21 |     final MutableBytes32 bytes = MutableBytes32.create();
22 |     address.copyTo(bytes, Bytes32.SIZE - Address.SIZE);
23 |     return bytes;
24 |   }
25 | 
26 |   /**
27 |    * Extract an address from the the provided address.
28 |    *
29 |    * @param bytes The word to extract the address from.
30 |    * @return An address build from the right-most 160-bits of the {@code bytes} (as according to the
31 |    *     VM specification (Appendix H. of the Yellow paper)).
32 |    */
33 |   public static Address toAddress(final Bytes32 bytes) {
34 |     return Address.wrap(bytes.slice(bytes.size() - Address.SIZE, Address.SIZE).copy());
35 |   }
36 | 
37 |   /**
38 |    * The number of words corresponding to the provided input.
39 |    *
40 |    * <p>In other words, this compute {@code input.size() / 32} but rounded up.
41 |    *
42 |    * @param input the input to check.
43 |    * @return the number of (32 bytes) words that {@code input} spans.
44 |    */
45 |   public static int numWords(final Bytes input) {
46 |     // m/n round up == (m + n - 1)/n: http://www.cs.nott.ac.uk/~psarb2/G51MPC/slides/NumberLogic.pdf
47 |     return (input.size() + Bytes32.SIZE - 1) / Bytes32.SIZE;
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/legacy/wiki/Building-EVM-LLVM.md:
--------------------------------------------------------------------------------
 1 | The project compiles like other LLVM projects. The target's name is `EVM`, but since it is not yet finalized, you have to specify `-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM` when you compile it.
 2 | 
 3 | In short, you can use the following to build the backend:
 4 | 
 5 | ```
 6 | git clone git@github.com:etclabscore/evm_llvm.git
 7 | cd evm_llvm
 8 | git checkout EVM
 9 | mkdir build && cd build
10 | cmake -DLLVM_TARGETS_TO_BUILD=EVM -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=EVM ..
11 | make -j8
12 | ```
13 | 


--------------------------------------------------------------------------------
/legacy/wiki/Compiling-smart-contracts.md:
--------------------------------------------------------------------------------
 1 | ### The Contract constructor function
 2 | 
 3 | Because EVM's execution always start from the beginning of the code (`pc = 0`), there must be a way to handle more complicated contract behaviours. In EVM LLVM, we use a function to describe the function handling. It is called contract constructor function. To implement the function, developers are expected to respect the following contract constructor properties:
 4 | 
 5 | -   The constructor should be the first function in the generated LLVM IR.
 6 | -   The constructor should be named `solidity.main` or `main` (could change in the future). The backend recognizes these specific names and will generate different call codes.
 7 | -   The constructor should not take any arguments.
 8 | -   The constructor should initialize the function's `free memory pointer`, which is located at address `0x40`. The `free memory pointer` is like the usual frame pointer, used to calculate function frames and stack allocations. Because it is located at `0x40`, so you cannot initialize it to a smaller number.
 9 | 
10 | ### Skeleton example of a very small constructor function
11 | 
12 | Here is an illustration of the skeleton of a small smart contract:
13 | 
14 | ```
15 | declare i256 @llvm.evm.calldataload(i256)
16 | declare void @llvm.evm.return(i256, i256)
17 | declare void @llvm.evm.mstore(i256, i256)
18 | 
19 | define void @main() {
20 | entry:
21 |   call void @llvm.evm.mstore(i256 64, i256 128)
22 |   %0 = call i256 @llvm.evm.calldataload(i256 0)
23 |   %1 = call i256 @llvm.evm.calldataload(i256 32)
24 |   %2 = call i256 @add(i256 %0, i256 %1)
25 |   call void @llvm.evm.mstore(i256 0, i256 %2)
26 |   call void @llvm.evm.return(i256 0, i256 32)
27 |   unreachable
28 | }
29 | 
30 | define i256 @add(i256, i256) #0 {
31 |   %3 = alloca i256, align 4
32 |   %4 = alloca i256, align 4
33 |   store i256 %0, i256* %3, align 4
34 |   store i256 %1, i256* %4, align 4
35 |   %5 = load i256, i256* %3, align 4
36 |   %6 = load i256, i256* %4, align 4
37 |   %7 = add nsw i256 %5, %6
38 |   ret i256 %7
39 | }
40 | ```
41 | 
42 | **Usually, it is the frontend's responsibility to do the smart contract's plumbing, including the contract's constructor function. ** We need the language frontends to generate corresponding LLVM IR code.
43 | 
44 | This smart contract does the following things;
45 | 
46 | -   Initialize the `free memory pointer` to 128
47 | -   parse the first two 32-byte inputs
48 | -   call the `@add` function and supply it with the two parsed arguments
49 | -   In the function `@add`, we simply add the two arguments, and return it
50 | -   In the `@main` function, return the retrieved value using `llvm.evm.return` intrinsic.
51 | 
52 | ### Compiling the smart contract
53 | 
54 | Let's put the above smart contract code into a file named `test.ll`, and we use `llc` to generate EVM binary:
55 | 
56 | ```
57 | llc -mtriple=evm -filetype=obj test.ll -o test.o
58 | ```
59 | 
60 | ### Running the contract
61 | 
62 | A generated `.o` file is in binary format. To see its content in hex, try to use `xxd`, for example:
63 | 
64 | ```
65 | xxd -p -cols 65536 test.o
66 | ```
67 | 
68 | The `xxd` will emit a hex string representation of the binary format. `xxd` will try to break the line if it is too long. Here we specify `-cols 65536` to avoid linebreaking. After calling `xxd`, you should see some output such as:
69 | 
70 | ```
71 | 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056
72 | ```
73 | 
74 | That is what we need to execute using an EVM engine. Let's try to do it using Geth's EVM. Remember that we need to supply two input arguments, so the command line should be like:
75 | 
76 | ```
77 | evm --input 1234567890123456789012345678901234567890123456789012345678901234 --code 5b600135600080803561003d909192939091604051806108200152604051610840016040526004580192565b60405160209003516040529052602090f35b80826040519190915260206040510152019056 run
78 | ```
79 | 
80 | `evm` will emit the result of the two added files:
81 | 
82 | ```
83 | 0x468acf08a2468acf08a2468acf08a2468acf08a2468acf08a2468acf08a24634
84 | ```
85 | 


--------------------------------------------------------------------------------
/legacy/wiki/Emitting-Program-Metadata.md:
--------------------------------------------------------------------------------
 1 | EVM LLVM provides a way to emit program's metadata for various of purposes. For examples, a symbol table that records the jump destinations can be emitted along with the generated binary.
 2 | 
 3 | Developers can use this utility to emit more program information.
 4 | 
 5 | ## Existing implementation
 6 | 
 7 | When compiling a contract, a file named `EVMMeta.txt` will be generated along with the binary code. The file contains the function symbol table in the compiled program, along with the offset of each function. The metadata file can be used for various purposes, such as debugging, manual linking, analysis, and so on.
 8 | 
 9 | To specify a custom metadata file name if you do not want to use the `EVMMeta.txt` filename, option `-evm_md_file` can be used.
10 | 
11 | # Limitation
12 | 
13 | Existing implementation of EVM metadata emitting is limited to `MachineCode` module/level, which means that if there are any transformations at a higher level such as in the IR level, it will not be shown in the result.
14 | 


--------------------------------------------------------------------------------
/legacy/wiki/Example:-Compiling-using-existing-language-frontend.md:
--------------------------------------------------------------------------------
 1 | Let's try to use a simple C file to test our compiler:
 2 | 
 3 | ```sh
 4 | cat <<EOF > test.c
 5 | unsigned x;
 6 | int abc(unsigned a, unsigned b, unsigned c) {
 7 |   if (c > 0) {
 8 |     return a + x;
 9 |   } else {
10 |     return a + b;
11 |   }
12 | }
13 | EOF
14 | ```
15 | 
16 | Prerequisite: You have to install `clang` and use it to generate LLVM IR first:
17 | 
18 | ```
19 | clang -S -emit-llvm test.c
20 | ```
21 | 
22 | This will generate a `test.ll` file which should be the LLVM IR equivalent of our `test.c` file. Then we can generate EVM binary or assembly from it. In order to use the backend to generate EVM assembly, you have to specify `-mtriple=evm` when calling `llc`. An example is as follows:
23 | 
24 | ```
25 | ./build/bin/llc -mtriple=evm test.ll -o test.s
26 | ```
27 | 
28 | The generated `test.s` file contains the compiled EVM assembly code. Note that the generated code is the function body itself. In order to generate a complete smart contract source code we need to use a smart contract creator function, which we will talk about it in another page.
29 | 
30 | Notice that you can also get the binary code of the function body by emitting an object file:
31 | 
32 | ```
33 | ./build/bin/llc -mtriple=evm -filetype=obj test.ll -o test.o
34 | ```
35 | 


--------------------------------------------------------------------------------
/legacy/wiki/Function-Layouts.md:
--------------------------------------------------------------------------------
 1 | #### Address layout
 2 | 
 3 | EVM bytecode has a flat structure. It does not have explicit function entries, nor symbol tables. All executions starts from address `0x00`.
 4 | 
 5 | #### Limitations
 6 | 
 7 | Notice that at this moment this backend is limited to generate correct code for a single compilation unit.
 8 | 
 9 | In order to link more than one compilation units, one shall inline existing compilation units in the frontend so that the frontend can generate correct `main` (the `function dispatcher` function) for the whole smart contract.
10 | 
11 | #### The function dispatcher (meta function)
12 | 
13 | The `function dispatcher` function (usually called `main` function in some contexts) is always placed at the beginning of the generated binary bytecode. The dispatcher is responsible for:
14 | 
15 | 1. parse the call data and find the called function address in the jump table using the hash value provided in the call data.
16 | 2. extract the call arguments, and push them on to stack.
17 | 3. call the function address specified in the jump table.
18 | 
19 | ```
20 |  Start of address
21 | +---------------->  +-------------------------+
22 |                     | Function dispatcher     |
23 |                     |   Jump Table            |
24 |                     |    (Func1,              |
25 |                     |     Func2,              |
26 |                     |     Func3)              |
27 |                     +-------------------------+
28 |                     |                         |
29 |                     |      Func1              |
30 |                     |                         |
31 |                     +-------------------------+
32 |                     |                         |
33 |                     |      Func2              |
34 |                     |                         |
35 |                     +-------------------------+
36 |                     |                         |
37 |                     |      Func3              |
38 |                     |                         |
39 |                     +-------------------------+
40 | ```
41 | 
42 | #### Moving the function dispatcher to front of the LLVM IR function list
43 | 
44 | At this moment it is up to the frontend developer to move the LLVM IR function to the beginning of the function list. You can do something like this when creating function dispatcher:
45 | 
46 | ```
47 | // Let's say you have a dispatcher function named "dispatcher"
48 | 
49 | // You should include "llvm/IR/SymbolTableListTraits.h" here
50 | using FunctionListType = SymbolTableList<Function>;
51 | FunctionListType &FuncList = TheModule->getFunctionList();
52 | FuncList.remove(dispatcher);
53 | FuncList.insert(FuncList.begin(), dispatcher);
54 | ```
55 | 


--------------------------------------------------------------------------------
/legacy/wiki/Future-Works.md:
--------------------------------------------------------------------------------
 1 | # Functionalities
 2 | 
 3 | ## Experimental support of landing pad
 4 | 
 5 | Landingpad is used to support exception handling.
 6 | 
 7 | ## Experimental support of simulating heap allocations
 8 | 
 9 | EVM does not have a heap space, so we cannot use heap allocations. We might be able to do around it.
10 | 
11 | ## Constant table support
12 | 
13 | Having a constant table in the smart contract could potentially save some code size if the elements in the table are reused.
14 | 
15 | ## Metadata export
16 | 
17 | We could export more metadata for debugging, analyzing, and so on.
18 | 
19 | # Optimizations
20 | 
21 | ## Support more than 16 local variables
22 | 
23 | EVM can only support retrieval of an element up to depth of 16 from the stack top using instructions `SWAP1` to `SWAP16` -- resulting a limitation in Solidity compiler that can only support 16 local variables. At this moment, EVM LLVM will also face a `stack too deep` issue if the variables in a single basic block is more than 16.
24 | 
25 | But in LLVM we can totally work around this issue, and do a much better job. With dataflow analysis and register allocation algorithm, we can have near-optimal variable assignment (on the stack or on memory stack) in linear time.
26 | 
27 | ## Instruction scheduling
28 | 
29 | Arranging the order of the opcodes in EVM binary is critical to its performance. Instructions has to be arranged so that we have minimal stack manipulation over head (the opcodes that does not do actual computation, but rather, reorder stack operands' relative position to the top of stack).
30 | 
31 | EVM LLVM backend is designed in such a way that a scheduler before register allocation can be implemented to reduce the stack operation overhead.
32 | 
33 | ## Improve EVM calling conventions
34 | 
35 | When calling a subroutine, The return address is the first argument and resides at top of stack. This is non-optimal because the return address will definitely not be used until the very end of the subroutine, and taking up a visible slot is expensive. We can re-arrange the return address to be at the end of argument so it will not have to be reached until we want to return from subroutine.
36 | 
37 | ## Re-materialization of constants
38 | 
39 | usual small constants should not stay in stack --- they should be rematerialized whenever it is needed.
40 | 


--------------------------------------------------------------------------------
/legacy/wiki/Handling-EVM-specific-operations.md:
--------------------------------------------------------------------------------
1 | Ethereum Virtual Machine specific operations, such as accessing storage, retrieve block information, etc, are through EVM specific instructions. Solidity language automatically generates necessary EVM-specific instructions under the hood so as to hide the details from Solidity developers. However, as a compiler backend, the input to EVM LLVM is LLVM IR format, which is unable to hold any language specific semantics that is higher than the C language level. So it is up to compiler frontends to lower language specific semantics onto LLVM IR level.
2 | 
3 | Intrinsic functions are used to represent EVM-specific semantics in the input LLVM IR. Intrinsic functions are usually higher level representations of architecture-specific instructions. In EVM LLVM, we allow users to leverage EVM-specific instructions that are used to interact with the chain or storage by exposing those EVM instructions in the form of intrinsic functions.
4 | 
5 | -   This [page](https://github.com/etclabscore/evm_llvm/wiki/Intrinsic-Functions) lists the intrinsic functions that frontend developers can use.
6 | -   Intrinsics are defined [here](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/include/llvm/IR/IntrinsicsEVM.td#L14).
7 | -   And here are examples on [how to leverage intrinsics](https://github.com/etclabscore/evm_llvm/blob/6271ae12899b6b9a2bfbcb3a690ec4b5e8652cfa/test/CodeGen/EVM/intrinsics.ll#L1)
8 | 


--------------------------------------------------------------------------------
/legacy/wiki/Home.md:
--------------------------------------------------------------------------------
1 | ![evm-llvm-green-dragon](https://user-images.githubusercontent.com/450283/63640209-85cb3c00-c66b-11e9-9610-0c339ae66ac7.png)
2 | 
3 | Welcome to the `evm_llvm` wiki! This project aims at bringing LLVM infrastructure to the EVM world where smart contracts are widely deployed.
4 | 
5 | EVM LLVM is an EVM architecture backend for LLVM. With EVM LLVM you can generate EVM binary code with LLVM-based compilers. The backend does not assume a language frontend, so you should be able to plug in a new smart contract language frontend to generate EVM binary.
6 | 
7 | The goal of this project is to make it able to for various of platforms, tools and smart contract programming language projects be able to quickly adapt a high-performance EVM backend.
8 | 


--------------------------------------------------------------------------------
/legacy/wiki/Language-Frontend-Integration.md:
--------------------------------------------------------------------------------
 1 | ## EVM target specific changes
 2 | 
 3 | ### Frontend is expected to emit 256bit values LLVM IR
 4 | 
 5 | The EVM architecture is the only 256-bit machine out there in the market, and so far it have not yet received support from LLVM community. We added 256-bit and 160-bit support in the LLVM IR level.
 6 | 
 7 | In order to utilize 256-bit and 160-bit operands, developers are expected to emit `i256` and `i160` data types in their IR code generation. Include the `evm_llvm`'s header files in `include/llvm` folders so that these two pre-defined data types can be properly generated.
 8 | 
 9 | ### Frontend needs to generate compatible LLVM IR
10 | 
11 | Notice that development of this backend is based on LLVM 10, which is released in March 2020. We also have a LLVM 8 branch just to support those who creates their frontends in LLVM 8.
12 | 
13 | We could do back porting to other lower versions such as LLVM 9 at the request of developers for better stability or compatibility. Please let me know if you have such needs.
14 | 


--------------------------------------------------------------------------------
/legacy/wiki/Running-integrated-tests-in-EVM-environment.md:
--------------------------------------------------------------------------------
 1 | EVM is different than other execution platform in that it is on blockchain. The result of the execution of a smart contract will be dependent on the state of the blockchain as well. So, we have to integrate EVM execution environment (in this early stage, `geth`) into our tests.
 2 | 
 3 | ## Constructor
 4 | 
 5 | Unit tests will only focus on small test functions. But you cannot execute a function independently on blockchain, we need to have a contract constructor and dispatcher as the first function in the file. The reason is that EVM will always start its execution from address `0x00` -- where the contract header / constructor /dispatcher resides. The header then tries to set up the contract -- allocating memory/storage or parsing incoming parameters, et cetera.
 6 | 
 7 | Here is the commentated constructor code we use for handling unit tests:
 8 | 
 9 | ```
10 | define void @main() {
11 | entry:
12 |   %0 = call i256 @llvm.evm.calldataload(i256 0) ; extract first 32-byte argument
13 |   %1 = call i256 @llvm.evm.calldataload(i256 32); extract second 32-byte argument
14 |   %2 = call i256 @test(i256 %0, i256 %1)        ;  execute the unit test function
15 |   call void @llvm.evm.mstore(i256 0, i256 %2)   ; store the returned value to memory address `0x00`
16 |   call void @llvm.evm.return(i256 0, i256 32)   ; call "return" to return the value returned by @test
17 |   unreachable
18 | }
19 | ```
20 | 
21 | Notice that the `@test` function takes 2 parameters, so we will have two calls to `@llvm.evm.calldataload`.
22 | 
23 | The unit test is compiled using `llc` with options: `-mtriple=evm -filetype=obj`. Then the code is executed using `geth`'s `evm` command.
24 | 
25 | ## Testing utilities
26 | 
27 | A Python script is used to handle the testing, file `evm_llvm/tools/evm-test/evm_test.py` is the script we created to test functionalities of the llvm backend. Here are what it does:
28 | 
29 | -   call evm_llvm backend to compile an LLVM IR file (`.ll` file) into object file (`.o`) file. The file should contain the function we are going to verify along with a smart contract constructor header which is used to handle input arguments. The function should be at the beginning of the IR file (the first function).
30 | -   extract the contract opcodes from the `.o` file and prepare the input arguments (by padded each arguments to be 32 bytes long and concatenate everything into a long string).
31 | -   Run the executable binary using geth's `evm`, get the result from the print, And compare the result with expected value.
32 | 
33 | ## How to run testings
34 | 
35 | 1. Install Python3
36 | 2. Run `evm_llvm/tools/evm-test/evm_test.py` then you should see the results.
37 | 
38 | ## How to add new tests
39 | 
40 | Please take a look at the `evm_llvm/tools/evm-test/evm_testsuit.py` file, it organizes tests by categorizing them into different `OrderedList`. Each element of the list contains the following information:
41 | 
42 | -   the name of the test
43 | -   the array of input arguments
44 | -   the path of the unit test source code file (in LLVM IR form)
45 | -   the expected result value
46 | 
47 | When adding new tests, you should:
48 | 
49 | -   put your test files into `evm_llvm/test/CodeGen/EVM` folder.
50 | -   add the test file path and expected results to the `evm_testsuit.py` file. (We might change it when the file gets too large).
51 | 
52 | ## TODO lists
53 | 
54 | -   add blockchain state related tests
55 | -   add re-entrance tests (which are also related to changes of blockchain states)
56 | 
57 | Please help improve the test utility!
58 | 


--------------------------------------------------------------------------------
/legacy/wiki/Stack-and-Memory-management.md:
--------------------------------------------------------------------------------
 1 | ## Variables
 2 | 
 3 | In the context of stack machine, a variable refers to an operand that will be consumed by an opcode. In EVM LLVM, variables are treated as virtual registers, until they are _stackfied_ (convert register-based code to stack-based code) right before lowering to machine code.
 4 | 
 5 | In LLVM's internal SSA representation mode, it is fairly easy to compute a register's live range (the range from its assignment to its last use). Variables are treated differently with regard to its live range. Local variables (variables that its liveness only extends within a single basic block) will live entirely on the stack, while non-local variables (variables that live across basic blocks) will be spilled to a memory slot allocated by the compiler.
 6 | 
 7 | #### Frame Objects
 8 | 
 9 | Frame objects will be allocated either on stack or on memory space. Since each of the elements are 256bits, we have to ensure that frame objects are 256bits in length as well. Frame objects with smaller length is not supported.
10 | 
11 | It is possible for a frame object to be allocated on to memory space, if we are consuming too much of stack space. The stack allocation pass will try to find an efficient way to decide which goes to the memory and which stays in stack.
12 | 
13 | ### Frame Pointer (or Free Memory Pointer)
14 | 
15 | [Stack pointers and frame pointers](https://en.wikipedia.org/wiki/Call_stack#Stack_and_frame_pointers) are essential to support subroutine calls. Frame pointer is used to record the structure of stack frames. Because we do not have registers in EVM, we will have to store stack frame pointer in memory locations. Usually, we put stack frame pointer at location `0x40`, and we follow Solidity compiler's convention to initialize it to value `128`. So the stack frame of the first function starts at that location. The value of frame pointer changes as the contract calls a subroutine or exits from a subroutine. Whenever we need to have access to frame pointer, we will retrieve its value from that specific location.
16 | 
17 | ### Memory stack
18 | 
19 | Part of the memory is used as a stack for function calls and variable spills. The structure is described as follows:
20 | 
21 | -   The stack goes from lower address to higher address, as different from usual hardware implementations.
22 | -   The frame is arranged into 3 parts:
23 |     -   **frame object locations**. Each frame object has its own frame slot. Frame object `x` will have a 32 byte space starting from `$fp + (x * 32)`, where `$fp` is the frame pointer, and is stored at location `0x40`.
24 |     -   **spilled variables**. Variable that are unable to be fully stackified will reside on the memory stack. In codegen, each spilled variable will have an index, and each index refers to a memory slot. A spilled variable that bears index `y`, will reside at location `$fp + (number_of_frame_objects * 32) + (y * 32)`.
25 |     -   **subroutine context**. Like a regular register machine, the memory stack is used to store subroutine context so as to support function calls. Two slots are allocated at the end of current frame for a) the existing frame pointer, and b) return `PC` address.
26 | 
27 | Here is an example showing a stack frame right before we jump into a subroutine:
28 | 
29 | ```
30 |   Stack top                                    Higher address
31 |  +-----------> +----------------------------+ <--------------+
32 |                |                            |
33 |                |     Return Address         |
34 |                |                            |
35 |                +----------------------------+
36 |                |                            |
37 |                |     Function argument      |
38 |    new FP      |                            |
39 |  +-----------> +----------------------------+
40 |                |                            |
41 |                |    Saved frame pointer     |
42 |                |     (Start of frame)       |
43 |                +----------------------------+
44 |                |                            |
45 |                |     Stack Object 1         |
46 |                |                            |
47 |                +----------------------------+
48 |                |                            |
49 |                |     Frame Object 2         |
50 |                |                            |
51 |                +----------------------------+
52 |                |                            |
53 |                |     Frame Object 1         |
54 | Start of frame |                            |   Lower address
55 | +------------> +----------------------------+ <----------------+
56 | ```
57 | 


--------------------------------------------------------------------------------
/legacy/wiki/The-EVM-Calling-Conventions.md:
--------------------------------------------------------------------------------
 1 | The EVM architecture is a simplistic structure, but it has everything we need to do usual program computations.
 2 | 
 3 | ## Types of calls
 4 | 
 5 | There are two types of calls in an EVM smart contract:
 6 | 
 7 | 1. **Internal calls**. Internal calls are referred to function calls within a smart contract. An example is that we have two defined function `A` and `B`, and somewhere in `A` we save our context and change our execution flow to the beginning of `B`.
 8 | 2. **External calls**. Or cross-contract calls. `A` and `B` are defined in different deployed EVM contract and `A` calls `B` in its context.
 9 | 
10 | ## Internal call conventions
11 | 
12 | Up to ETH 1.5, there is no link and jump EVM opcode for easy handling of subroutines(even though some [discussions](https://github.com/ethereum/EIPs/issues/2315) are on-going). So we have to manually handle subroutine calls. Here are the calling conventions for an internal calls:
13 | 
14 | -   current subroutine's frame pointer is saved at stack, at memory location `$fp - 32` where `$fp` is the subroutine call's frame pointer.
15 | -   arguments are all pushed on stack, along with the return address. Argument with smaller index number occupies a stack slot on top of another argument with a larger index number. For example, when we want to do a function call: `func abc(x, y, z)`, here is the arrangement of the arguments:
16 | 
17 | ```
18 |                +-----------+
19 |                |Return Addr|
20 |                +-----------+
21 |                |     X     |
22 |                +-----------+
23 |                |     Y     |
24 |                +-----------+
25 |  Current FP    |     Z     |
26 | +------------> +-----------+
27 |                |  Old FP   |
28 |                +-----------+
29 |                |   .....   |
30 |                +-----------+
31 | ```
32 | 
33 | _Note: Putting the return address on top of the stack is because it is easier to compute the location, but this will result in more stack manipulation overhead for the subroutine calls. We will improve this design in a later version._
34 | 
35 | -   A subroutine's return value is stored on stack top. _Note: currently we only support one return value. In the future we will improve it by supporting multiple return values._
36 | 
37 | ## Procedure of a subroutine call
38 | 
39 | To illustrate the procedure for a subroutine call, we need to do the following to save the context of current function execution:
40 | 
41 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`.
42 | 2. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`.
43 | 3. push all subroutine arguments in order on to stack.
44 | 4. push return address onto stack. (At this moment, the return address is `PC + 6`).
45 | 5. push the beginning address of subroutine and jump.
46 | 
47 | Right before we return from a subroutine, the stack should be empty and the return address should be at the top of the stack. When returning from a subroutine call, we should do the following:
48 | 
49 | 1. push return value on to top of stack.
50 | 2. Do a `swap1` to move the return address to top of stack
51 | 3. jump to return address and resume the execution in caller function. If the function returns nothing, simply jump to return address.
52 | 
53 | After jumping back to caller, we have to resume the execution:
54 | 
55 | 1. restore caller's frame pointer by storing the value at location `$fp - 32` to `0x40`.
56 | 
57 | ## [EIP2315](https://eips.ethereum.org/EIPS/eip-2315) Support: Subroutine calls
58 | 
59 | The support of subroutines inside EVM enables compiler to generate better performance code. To be more specific: With EIP235, it is up to EVM to maintain the stack:
60 | 
61 | 1. the return address stack is only accessible to VM
62 | 2. the stack is invisible to users and compilers
63 | 
64 | A better calling convention is made with the support of EIP2315:
65 | 
66 | ### To generate a call procedure
67 | 
68 | 1. calculate the current frame size. The frame size should be the size sum of: a) slots occupied by frame objects, b) slots occupied by spilled variables, and c) one more slot for storing current frame pointer. let's assume the frame size is calculated to be `%frame_size`.
69 | 2. save existing frame pointer at memory location `$fp + %frame_size - 32`. The frame pointer is maintained at `0x40`.
70 | 3. bump the frame pointer to: `$fp = $fp + %frame_size`. After that, we can easily restore the old frame pointer by looking at location `$fp - 32`.
71 | 4. push all subroutine arguments in order on to stack.
72 | 5. push the beginning address of subroutine and call `JUMPSUB`
73 | 
74 | ### To generate the return
75 | 
76 | 1. push return value on to top of stack.
77 | 2. call `RETURNSUB` to resume execution of caller function.
78 | 
79 | ## External calls
80 | 
81 | External calls are implemented using intrinsic calls.
82 | 


--------------------------------------------------------------------------------
/legacy/wiki/Types-and-type-conversions.md:
--------------------------------------------------------------------------------
 1 | ## Newly supported Types
 2 | 
 3 | So far the open-source LLVM trunk has not yet implemented bit size support larger than 128bits. We have implemented 256bit supports in our own backend, and is considering contributing them back to main trunk.
 4 | 
 5 | Users are allowed to use `i256` and `i160` data types in their generated LLVM IR, which represent 256bit integer types and 160bit integer types respectively.
 6 | 
 7 | Even though all EVM data types are 256bit in length internally. We are still able to offer support to smaller data types. However, users are encouraged to use 256bit data types internally because it is free.
 8 | 
 9 | ## Contract Input Argument Types -- The Solidity convention
10 | 
11 | Contract arguments are passed to EVM via the call data field. The function dispatcher is responsible to extract input arguments from call data.
12 | 
13 | In Solidity's convention, the arguments in call data are padded to 32 bytes long if its data type's length is shorter. So, in order to maintain the convention, the function dispatcher needs to truncate the input arguments to the defined size in the function that is going to be called.
14 | 
15 | This is undoubtedly inefficient, so users are discouraged to use smaller data types.
16 | 


--------------------------------------------------------------------------------
/legacy/wiki/files/Generating_stack_machine_code_using_LLVM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/wiki/files/Generating_stack_machine_code_using_LLVM.pdf


--------------------------------------------------------------------------------
/legacy/wiki/files/LLVM_talk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sambacha/openevm/d04c7663719fc16f6ad3db266cbb19f14a4215ce/legacy/wiki/files/LLVM_talk.pdf


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openevm",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "main": "main.js",
 6 |   "directories": {
 7 |     "doc": "docs"
 8 |   },
 9 |   "scripts": {
10 |     "test": "echo \"Error: no test specified\" && exit 1"
11 |   },
12 |   "keywords": [],
13 |   "author": "",
14 |   "license": "ISC",
15 |   "devDependencies": {
16 |     "gh-pages": "^6.1.1"
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------