├── .gitignore ├── .markdownlint.json ├── README.md ├── book.toml ├── justfile ├── mermaid-init.js ├── mermaid.min.js ├── netlify-build.sh └── src ├── SUMMARY.md ├── acceptance-testing ├── index.md ├── op-acceptor.png ├── release-checklist.md └── release-readiness.md ├── audit-post-mortem.md ├── audit-request-template.md ├── audits.md ├── fmas.md ├── images ├── sdlc-flowchart.mermaid └── sdlc-flowchart.png ├── release-calendar.md ├── release-process.md ├── sdlc.md └── security-readiness-template.md /.gitignore: -------------------------------------------------------------------------------- 1 | book 2 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": true, 3 | "no-hard-tabs": false, 4 | "whitespace": false, 5 | "MD013": { "line_length": 120 }, 6 | "MD033": false, 7 | "MD036": false 8 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OP-Stack Project Management Repository 2 | 3 | This repository documents how the OP Stack is developed. 4 | 5 | If you want to... 6 | 7 | - **read our project management documentation**, check out the [website](https://pm.optimism.io). 8 | - **contribute to our project management documentation**, check out the [`src`](./src) directory. 9 | 10 | ## Development 11 | 12 | To serve the mdbook locally to view your changes run `just serve`. 13 | 14 | ## Deployments 15 | 16 | The website is updated every 3 hours by a scheluded deployment job with what's in the `main` branch. -------------------------------------------------------------------------------- /book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["The Optimism Collective"] 3 | language = "en" 4 | multilingual = false 5 | src = "src" 6 | title = "Optimism PM Docs" 7 | 8 | [preprocessor] 9 | 10 | [preprocessor.mermaid] 11 | command = "mdbook-mermaid" 12 | 13 | [output] 14 | 15 | [output.html] 16 | additional-js = ["mermaid.min.js", "mermaid-init.js"] 17 | git-repository-url = "https://github.com/ethereum-optimism/pm/" 18 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | # The default recipe when just is called without arguments 2 | default: serve 3 | 4 | serve: 5 | mdbook serve 6 | 7 | test: 8 | just lint 9 | mdbook build 10 | 11 | lint: 12 | markdownlint-cli2 "**/*.md" 13 | 14 | lint-fix: 15 | markdownlint-cli2 --fix "**/*.md" 16 | -------------------------------------------------------------------------------- /mermaid-init.js: -------------------------------------------------------------------------------- 1 | (() => { 2 | const darkThemes = ['ayu', 'navy', 'coal']; 3 | const lightThemes = ['light', 'rust']; 4 | 5 | const classList = document.getElementsByTagName('html')[0].classList; 6 | 7 | let lastThemeWasLight = true; 8 | for (const cssClass of classList) { 9 | if (darkThemes.includes(cssClass)) { 10 | lastThemeWasLight = false; 11 | break; 12 | } 13 | } 14 | 15 | const theme = lastThemeWasLight ? 'default' : 'dark'; 16 | mermaid.initialize({ startOnLoad: true, theme }); 17 | 18 | // Simplest way to make mermaid re-render the diagrams in the new theme is via refreshing the page 19 | 20 | for (const darkTheme of darkThemes) { 21 | document.getElementById(darkTheme).addEventListener('click', () => { 22 | if (lastThemeWasLight) { 23 | window.location.reload(); 24 | } 25 | }); 26 | } 27 | 28 | for (const lightTheme of lightThemes) { 29 | document.getElementById(lightTheme).addEventListener('click', () => { 30 | if (!lastThemeWasLight) { 31 | window.location.reload(); 32 | } 33 | }); 34 | } 35 | })(); 36 | -------------------------------------------------------------------------------- /netlify-build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | echo "Installing Rust..." 6 | rustup default stable 7 | 8 | echo "Installing mdbook..." 9 | curl -L https://github.com/rust-lang/mdBook/releases/download/v0.4.44/mdbook-v0.4.44-x86_64-unknown-linux-gnu.tar.gz | tar xvz 10 | 11 | echo "Installing mdbook-mermaid.." 12 | cargo install mdbook-mermaid 13 | 14 | echo "Building book..." 15 | ./mdbook build -------------------------------------------------------------------------------- /src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | - [SDLC](./sdlc.md) 4 | - [Release Process](./release-process.md) 5 | - [Release Calendar](./release-calendar.md) 6 | - [Acceptance Testing](./acceptance-testing/index.md) 7 | - [Release Readiness Process (RRP)](./acceptance-testing/release-readiness.md) 8 | - [Release Readiness Checklist (RRC)](./acceptance-testing/release-checklist.md) -------------------------------------------------------------------------------- /src/acceptance-testing/index.md: -------------------------------------------------------------------------------- 1 | # Acceptance Testing 2 | 3 | Acceptance testing ensures OP Stack networks are feature-complete, reliable, and contain features which are ready for promotion. 4 | 5 | The Platforms team will compile a Release Readiness Process (RRP) document, which will outline how to acceptance test devnets. This will include a list of tests to run - the Release Readiness Checklist (RRC). They will originally be run manually, but we'll automate them over time. 6 | 7 | By automating validation and enforcing quality gates, we reduce risk and increase confidence in releases. Much of this is facilitated by a new tool, op-acceptor, which can run standard Go tests against OP Stack networks and track that network's readiness for promotion. Acceptance testing is a prerequisite for networks to promote from Alphanet, to Betanet, to Testnet. 8 | 9 | This is a shared responsibility between the Platforms and the feature teams: 10 | 11 | | What Is It | Who Does It | 12 | |----------------------------------------------|----------------| 13 | | Maintains acceptance testing tooling | Platforms Team | 14 | | Writes acceptance test for network liveness | Platforms Team | 15 | | Runs acceptance tests | Platforms Team | 16 | | Writes acceptance test for specific features | Feature Team | 17 | | Performs upgrades | Feature Team | 18 | 19 | The Platforms team is responsible for running acceptance tests against each network. To coordinate your feature's 20 | acceptance testing, contact **Stefano** (_stefano_), **Platforms Protocol DevX Pod** (@Protocol DevX Pod) or **Platforms Team** (@Platforms Team) on Discord. 21 | 22 | ## Tooling 23 | 24 | The acceptance tests themselves are written in Go and are run by **op-acceptor** within the [op-acceptance-tests](https://github.com/ethereum-optimism/optimism/tree/develop/op-acceptance-tests) directory of the optimism monorepo. 25 | [op-acceptor](https://github.com/ethereum-optimism/infra/tree/main/op-acceptor) provides a high-level framework for registering, running and viewing the results of acceptance tests. 26 | 27 |
28 | op-acceptor screenshot 29 |
30 | 31 | 32 | ## Tests 33 | To add new acceptance tests see the [README](https://github.com/ethereum-optimism/optimism/blob/develop/op-acceptance-tests/README.md#adding-new-tests) for instructions on how to do this. -------------------------------------------------------------------------------- /src/acceptance-testing/op-acceptor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethereum-optimism/pm/4d19bd39a07b0989630a9d170c2c0b2032873cb5/src/acceptance-testing/op-acceptor.png -------------------------------------------------------------------------------- /src/acceptance-testing/release-checklist.md: -------------------------------------------------------------------------------- 1 | # Release Readiness Checklist (RRC) 2 | 3 | This document provides a detailed checklist of requirements that devnets must meet to be considered ready for release. These are specific tests, metrics, and criteria that are evaluated as part of the [Release Readiness](./release-readiness.md) process. 4 | The most up-to-date list can be found in the Optimism monorepo's [op-acceptance-tests](https://github.com/ethereum-optimism/optimism/tree/develop/op-acceptance-tests). 5 | 6 | The criteria for the checks below apply to all devnets (alphanet, betanet, testnet, etc.) and should be considered a good minimum standard for acceptance. 7 | 8 | 9 | ## Sanity Check 10 | A new Kubernetes-based network typically requires about 30mins to fully startup and settle. After this, we sanity check the basic network health. 11 | 12 | - Check the [Superchain Health Dashboard](https://optimistic.grafana.net/goto/kdxUF-fHR?orgId=1) 13 | - Setup: 14 | - Select infra_env=dev, infra_network=, security_network1= 15 | - Checks: 16 | - Overall Infra reports "Healthy" 17 | - Overall Chain Progression Health reports "Healthy" 18 | - Dispute Mon Security Health 1 reports "Healthy" 19 | - Faultproof Withdrawals Security Health 1 reports "Healthy" (if applicable) 20 | - OP-Challenger Health reports "Healthy" (if applicable) 21 | - Check the [SLA dashboard](https://optimistic.grafana.net/goto/WGOaGN1NR?orgId=1) 22 | - Setup: 23 | - Select the correct network 24 | - SLO Evaluation Window = "10m", Period = "Last 30 minutes" 25 | - Checks: 26 | - Overall SLA should be >=99% 27 | - Check the [Bedrock Networks dashboard](https://optimistic.grafana.net/d/nUSlc3d4k/bedrock-networks?orgId=1&from=now-30m&to=now&timezone=browser&var-network=mainnet-prod&var-node=$__all&var-layer=$__all&var-safety=$__all&refresh=30s) 28 | - Setup: 29 | - Select the correct network 30 | - Period = "Last 30 minutes" 31 | - Checks: 32 | - Chain heads are increasing as expected (unsafe, safe, l1 heads, etc) 33 | - Divergence < 1 for all nodes 34 | - Peer counts are nominal 35 | - Check the [Batcher Dashboard](https://optimistic.grafana.net/d/SF0r6OBVz/bedrock-batcher) 36 | - Setup: 37 | - Select the correct network 38 | - Period = "Last 30 minutes" 39 | - Checks: 40 | - Block height is stricly increasing 41 | - RPC Errors are 0 42 | - No more than 1 pending transaction at any one time 43 | - Check the [Proposer Dashboard](https://optimistic.grafana.net/d/c543e3f0-4d2e-4b49-ac73-99a9363633ad/proposer) 44 | - Setup: 45 | - Select the correct network 46 | - Cluster = "oplabs-dev-infra-primary", Period = "Last 30 minutes" 47 | - Checks: 48 | - Proposed Block Numbers are increasing 49 | - Publishing error count is zero (no-data) 50 | - Balance (ETH) is non-zero 51 | - Check the [Challenger Dashboard](https://optimistic.grafana.net/d/d1dc51c1-2ec5-4b85-8d3c-65eefaeab240/bedrock-challenger) 52 | - Setup: 53 | - Select the correct network 54 | - Period = "Last 30 minutes" 55 | - Checks: 56 | - Should see games in progress 57 | - Challenger Error Logs should be empty (no data) 58 | - Check the [Dispute Mon Dashboard](https://optimistic.grafana.net/d/edc89b93vev40c/dispute-monitor) 59 | - Setup: 60 | - Select the correct network 61 | - Period = "Last 30 minutes" 62 | - Checks: 63 | - No incorrect forecasts / incorrect results or alerts 64 | - Note: It can take awhile for this to show up 65 | - Error Logs should be empty (no data) 66 | - Check the [Conductor Mon Dashboard](https://optimistic.grafana.net/d/cdlf92t90dxq8d/op-conductor-health-monitor) 67 | - Setup: 68 | - Select the correct network 69 | - Period = "Last 30 minutes" 70 | - Checks: 71 | - Leader count should be 1 72 | - Errors should be 0 73 | - Should be showing all the nodes, conductors all unpaused and healthy 74 | - Alerts for the devnet 75 | - In Slack, check our #notify-devnets channel 76 | - All P1 alerts have either been addressed or have a remediation plan 77 | 78 | ## Feature Verification 79 | 80 | Note: For testing a flashblocks-enabled network, refer to this (Flashblocks RRC](https://www.notion.so/oplabs/Flashblocks-Release-Readiness-Checklist-1faf153ee16280ac80d8cda0162f2392). 81 | 82 | ### Automated Testing 83 | Run automated acceptance tests using [op-acceptor](https://github.com/ethereum-optimism/optimism/tree/develop/op-acceptance-tests). 84 | - Use the appropriate feature gate for the target network. This should not be `base`, but will include it. It should be one of the latest forks, such as `interop` or `flashblocks` which pertains to what the network is deployed as and testing. 85 | 86 | The command will be something like so: 87 | ``` 88 | # Navigate to the optimism monorepo 89 | cd optimism/op-acceptance-tests; 90 | 91 | # Set your DEVNET_ENV_URL to point to the absolute path of 92 | # your target networks devnet-env.json 93 | DEVSTACK_ORCHESTRATOR=sysext 94 | DEVNET_ENV_URL=/path/to/the/network/devnet-env.json 95 | 96 | $(mise which op-acceptor) \ 97 | --testdir ../optimism \ 98 | --gate interop \ 99 | --validators ../acceptance-tests.yaml \ 100 | --log.level INFO 101 | ``` 102 | 103 | ### Manual Testing 104 | Manually run any non-automated feature tests. 105 | (Note: This is a temporary step until we automate all of our current tests. Going forward we aim to have no manual feature tests.) 106 | 107 | ## Load Testing 108 | Run automated acceptance tests using op-acceptor 109 | - Use load-testing gate for the network 110 | -------------------------------------------------------------------------------- /src/acceptance-testing/release-readiness.md: -------------------------------------------------------------------------------- 1 | # Release Readiness Process (RRP) 2 | 3 | ## Overview 4 | 5 | This document defines the process and expectations for devnet releases in the OP Stack. It establishes a consistent framework for determining when a devnet is ready for release and how pass/fail determinations are made. By following these procedures, we can ensure that devnets meet quality standards before release. 6 | 7 | While the Platforms team serves as the primary custodian of this release readiness process, its success relies on collaborative ownership between Platforms and Protocol as well as contributions from across the organization. 8 | 9 | ## Roles and Responsibilities 10 | 11 | | Role | Responsibilities | 12 | |------|-----------------| 13 | | **Platforms Team** | • Maintain the Release Readiness Process
• Run acceptance tests
• Make final pass/fail determinations | 14 | | **Feature Teams** | • Write and run feature-specific tests
• Fix identified issues in their features | 15 | 16 | ## Objectives 17 | 18 | The primary objectives of the Devnet Release Readiness process are: 19 | 20 | 1. Release production networks without critical bugs 21 | 2. Ensure feature coverage through comprehensive testing 22 | 3. Establish a clear process for devnet promotion decisions 23 | 24 | ## Release Readiness Process 25 | 26 | ### Prerequisites 27 | 28 | Before a devnet can be considered for release, the following prerequisites must be met: 29 | 30 | 1. All new features must have acceptance testing coverage in [op-acceptance-tests](https://github.com/ethereum-optimism/optimism/tree/main/op-acceptance-tests) 31 | 2. The acceptance tests, as defined by the Release Readiness Checklist, should be passing on a local kurtosis-based devnet 32 | 3. The [FMA](../fmas.md) for the in-scope features should have been started 33 | 34 | ### Readiness Phases 35 | 36 | The devnets are expected to be live for short periods of time. For example, alphanets will be decomissioned after three weeks. 37 | 38 | #### 1. Deployment 39 | 40 | - A devnet is deployed according to the [standard process](https://github.com/ethereum-optimism/devnets/blob/main/README.md) 41 | - Basic infrastructure checks ensure the network is operational (manually for now; to be automated) 42 | 43 | 44 | #### 2. Acceptance Testing 45 | We work through the [Release Readiness Checklist](./release-checklist.md), which includes: 46 | - Automated acceptance tests (using op-acceptor) 47 | - Manual acceptance tests 48 | - Feature teams run specific feature tests 49 | - Platform runs security and load tests 50 | - Exploratory testing is run by all teams (probing of the system looking for things that we previously missed) 51 | 52 | #### 3. Results Analysis 53 | 54 | - Each of the test results are categorized, in line with our internal incident severity matrix, by their potential impact had they been on mainnet: 55 | - **Catastrophic (SEV 0)**: Critical to catastrophic issue that would warrant public notification, leadership awareness (and potential involvement), and potential consultation with legal. A large number of users are impacted by complete or severe loss of functionality, and SLAs have been broken 56 | - **Critical (SEV 1)**: Critical issue that would warrant public notification. A large number of users are impacted by severe loss of functionality, and SLAs may have been broken 57 | - **Major (SEV 2)**: A functionality issue that would actively impact many user' ability to transact, or a critical issue impacting a subset of users 58 | - **Minor (SEV 3)**: Stability or minor customer-impacting issues that would require immediate attention from service owners 59 | 60 | #### 4. Release Determination 61 | 62 | - The Platforms team makes the final pass/fail determination 63 | - A devnet must have ZERO catastrophic or critical issues (SEV 0 or 1) to be considered for promotion 64 | - Major issues must have mitigation plans before promotion 65 | - Minor issues are documented but don't block promotion 66 | 67 | #### 5. Release 68 | When ready, the devnet is made live and public. 69 | 70 | ## Integration with Existing Release Process 71 | 72 | The Devnet Release Readiness process integrates with the existing [Release Process](../release-process.md) as follows: 73 | 74 | 1. **Alphanet**: Before promotion to Betanet, it must pass the Release Readiness process 75 | 2. **Betanet**: Before promotion to Testnet, it must pass the Release Readiness process with stricter criteria 76 | 3. **Testnet**: All features must have successfully passed through Alphanet and Betanet before deployment 77 | 78 | ## Enforcement 79 | 80 | Devnets shall not be released or promoted without following the release process described in this document. The Platforms team serves as the custodians of this document and guardians of the releases, with authority to block promotion of devnets that do not meet the release readiness criteria. 81 | 82 | ## Tools and Resources 83 | 84 | - [op-acceptor](https://github.com/ethereum-optimism/infra/tree/main/op-acceptor) - The acceptance testing framework 85 | - [op-acceptance-tests](https://github.com/ethereum-optimism/optimism/tree/develop/op-acceptance-tests) - Repository of acceptance tests 86 | - [devnets](https://devnets.optimism.io/) - The Optimism devnet environment 87 | - [Acceptance Testing](./index.md) - Additional context on acceptance testing process 88 | 89 | ## Future Considerations and Improvements 90 | 91 | Here are some ideas for future iterations of this process: 92 | 93 | * After each release, a retrospective to identify process improvements 94 | * A Release Coordinator role to coordinate the overall release process, track progress, facilitate communication, and document decisions 95 | * A per-devnet Release Readiness Checklist (RRC) to define specific requirements for each devnet 96 | * Public usage phase to collect feedback from the general public 97 | * Injection testing to see how we can break the network and test incident response runbooks 98 | * Communication through dashboards and weekly calls 99 | * Detailed Release Decision Documentation including summary of test results, list of issues, mitigation plans, and recommendations 100 | * Test Results Reporting through the op-acceptor dashboard and Release Readiness Reports 101 | * Test results comms, including: 102 | 1. The op-acceptor dashboard, showing test status and results 103 | 2. A Release Readiness Report documenting all tests, issues, and recommendations 104 | 3. Updates in the weekly Protocol Upgrades Call 105 | * Security 106 | 1. We should explicitly define what we're checking for and how we do it 107 | 2. For checking smart contracts, we could utilise a tool such as [solcurity](https://github.com/transmissions11/solcurity) 108 | 3. [Audit](https://github.com/ethereum-optimism/pm/pull/49) procurement and execution -------------------------------------------------------------------------------- /src/audit-post-mortem.md: -------------------------------------------------------------------------------- 1 | # Audit Post-Mortem 2 | It is not realistic to ask of anyone to either build code completely free of bugs, nor to catch all bugs in code that has already been written. However, we can should demand that no severities of a certain category are found after applying a number of security measures. In particular, we want to ensure that SEV1+ bugs are never found in the last pre-production layer or during production. 3 | 4 | This is a process to apply when this expectation is not met, and is based in **reasonable expectations** from all involved, with no one expected to have extraordinary capabilities. 5 | 6 | A piece of code is made progressively bug-free by applying layers of security. Unit testing, end-to-end testing, invariant testing, formal verification, peer reviews, internal audits, external audits and bug bounties are all layers of security. 7 | 8 | If a SEV1+ bug is found too close to production, it can only be for two reasons: 9 | 10 | 1. At least one security layer underperformed, probably more than one. 11 | 2. Not enough security layers were applied. 12 | 13 | By comparing the bug found and the security layers, it should be obvious to see if any of them underperformed by assessing the kind of bugs that should *reasonably* caught in it. 14 | 15 | - Did the bug pass through some code that should have been covered by unit testing? 16 | - Maybe the bug depended on the interaction between several components, is this a known scenario that is not covered by end-to-end testing? 17 | - We do invariant testing, but we didn’t test the invariant that would have revealed the bug? 18 | - Is the bug known to the security researcher community at large, but the audits missed it? 19 | 20 | If a security layer is found to have underperformed, then the solution should be to strengthen it. 21 | 22 | However, maybe our existing layers performed reasonably well, but we just didn’t apply enough of them. Maybe the bug was of the kind that would have been caught in an audit, only that we didn’t do one. Maybe the codebase was too complex for all issues to surface in a single audit or contest. 23 | 24 | In that case, it might be that we misclassified the risk or complexity of the code. The process should be strengthened so that risk and complexity are correctly identified. 25 | 26 | Finally, it might just be that risk and complexity were correctly identified, all security layers performed reasonably well, and we still got a bug. That still means that we need more layers, so then the only thing that is left is that the table that tells you how many audits you need is not demanding enough. In that case we shift the requirements to the left, so that the same risk and complexity get more security layers than before. -------------------------------------------------------------------------------- /src/audit-request-template.md: -------------------------------------------------------------------------------- 1 | # Audit Request Template 2 | *Use this template to communicate estimates and get approval for an audit. Please fill out the relevant sections and get approval from the folks listed below. Once you have received approval, you can engage with audit firms on details of the audit and request quotes. A zip request can then be filed to get spend approval.* 3 | 4 | # **Overview** 5 | 6 | Link to Security Readiness Document 7 | 8 | ## **Timeline and key stakeholders:** 9 | 10 | ## **When?** 11 | 12 | Audit Dates: 13 | 14 | Planned Release: 15 | 16 | ## **Who ?** 17 | 18 | Auditors: 19 | 20 | OP Labs Facilitators: 21 | 22 | ## **Costs** 23 | 24 | Anticipated Number of weeks: 25 | 26 | - 27 | 28 | Expected Cost: ??? 29 | 30 | - 31 | 32 | ## **Approved** 33 | 34 | Not started  [Karl Floersch](mailto:karl@oplabs.co) 35 | 36 | Action item: 37 | 38 | - Create a formal zip request once this document has been reviewed and approved. 39 | 40 | 41 | ## References 42 | 43 | This template supersedes the [**Audit Request**](https://www.notion.so/Audit-Request-1a8f153ee1628045b467c262fae21975) template. -------------------------------------------------------------------------------- /src/audits.md: -------------------------------------------------------------------------------- 1 | # Audit Process 2 | 3 | ## Context 4 | 5 | [P1: Audit Process](https://www.notion.so/P1-Audit-Process-153f153ee16280cc8aacc75b955986b3?pvs=21) 6 | 7 | > When teams need an audit, there should be a clear process with owners for all required steps: defining requirements and invariants, getting internal approvals, working with program management, talking to auditors, determining how many audits to get, what kinds of audits, negotiating audit prices, scheduling the audit, determining if a fix review is needed, and what to do with the results of an audit. 8 | > 9 | 10 | This document describes the use of software audits at Op Labs. In includes: 11 | 12 | - [An itemized step-by-step guide.](#summary) 13 | - [Choosing a provider and preparing for the audit.](#audit-Procurement) 14 | - [Executing the audit.](#audit-execution) 15 | - [Reacting to the results of the audit.](#after-each-audit) 16 | - [Updating this process according to results](#updating-this-process) 17 | 18 | The resulting process integrates with the SDLC andenlists PgM and EVM Safety to help the Tech Lead execute the steps that are common to all audits, so that effort and uncertainty are minimised. 19 | 20 | For further context on this process you can read [this companion document](https://www.notion.so/About-the-Audit-Process-1b9f153ee162805e8adcd2d50237c622?pvs=21) and the [references](#references). 21 | 22 | ## Summary 23 | 24 | - [ ] 1. The need for audits is determined during the FMAs in the Design Phase of the SDLC. 25 | - [ ] 2. During the [Design Review](./sdlc.md#step-1-design-review-if-applicable), [start an Security Readiness Review document](./security-readiness-template.md), which will be continuously updated. 26 | - [ ] 3. Once the design is reviewed, [use PgM’s help to engage an auditor](#audit-procurement) and obtain a price estimate. 27 | - [ ] 4. In parallel with the implementation, forward the Audit Readiness Review and price estimate to Karl for approval. 28 | - [ ] 5. Once approved, use PgM’s help to complete the operational aspects of the audit. This process can be executed in parallel with all the remaining steps. 29 | - [ ] 6. As implementation and testing approaches the release date, [decide on an audit start date](#audit-procurement). 30 | - [ ] 7. [Execute the audit](#audit-execution). 31 | - [ ] 8. [Make all required fixes](#audit-execution) and have them reviewed. 32 | - [ ] 9: [Publish the deliverables](#after-each-audit) 33 | - [ ] 10. [If any audit findings are high severity](#after-each-audit) and this is the last scheduled audit: 34 | - [ ] 10.1: [Perform a retro](./audit-post-mortem.md). 35 | - [ ] 10.2: Perform another audit, [go back to 2](#audit-procurement). 36 | 37 | ## Audit Procurement 38 | 39 | The audit requirements are established during the project FMAs in the [Design Review phase of the SDLC](./sdlc.md#step-1-design-review-if-applicable). Both the audit procurement and the feature implementation can start in parallel once the design is reviewed. 40 | 41 | The [Security Readiness Document](./security-readiness-template.md) is one of the deliverables from the design review, which will be updated as necessary during the delivery lifecyle. This document contains: 42 | - A summary of the project (or a link to a suitable summary if it already exists). 43 | - All relevant links to the project documentation, including specs and FMAs. 44 | - The scope for the audit. 45 | 46 | We use Spearbit as our preferred auditing services provider. However, the feature team can choose a different provider from [this list](https://www.notion.so/How-to-Select-an-Audit-Firm-b0dee471e23f4712bb8ddc1fb51938f9?pvs=21), from [past engagements](https://www.notion.so/Security-Audits-e56b4226b9db4f2ca48db42d7d439a98?pvs=21), or from any other source if they have a desirable skillset. Program Management (PgM) is available in the #pmo slack channel for assistance with anything related to engaging auditor services. 47 | 48 | We don’t want to agree audit dates too early, as that will compromise the quality of the software delivery. Instead, we will agree with auditors to keep them tightly informed of the implementation timeline and process, choosing an exact audit date close to the release date. Auditors not wishing to agree to this process should not be selected. 49 | 50 | Auditors must agree to review the fixes to the vulnerabilities reported. Auditors not wishing to agree to this step should not be selected. 51 | 52 | Once an auditor has been selected by the feature team, EVM Safety will assist in estimating the required duration of the audit according to the FMAs. Auditors won't generally be able to provide a quote until the code is complete or nearly complete. Instead, PgM will take the duration estimate and produce a cost estimate based on recent rates from the selected auditor or similarly placed auditors. The Tech Lead and PgM will use these estimates to prepare an [audit request](./audit-request-template.md) which will be forwarded to the CEO for approval. 53 | 54 | Once approved by the CEO, the Tech Lead and PgM will [request the audit on Zip](https://oplabs.ziphq.com/create-workflow-request) as: 55 | - Choose "Request a Purchase/Vendor Onboarding/Purchase Renewal". 56 | - Under "What are you looking to purchase?" select "Other". 57 | - If the auditors have not been engaged in the past they will need to supply legal agreements, which will be also included in the Zip request. 58 | 59 | ## Audit Execution 60 | 61 | A devnet deployment is a [requirement for the audit execution](./sdlc.md#step-2b-security-audit-procurement-if-needed). As the date for the alphanet deployment is known with certainty, a date for the audit can be agreed so that the audit can be executed in parallel with the alphanet and betanet deployments and acceptance testing, and concluded before the testnet deployment. 62 | 63 | We prefer to communicate with auditors over Slack during the audit. Questions from auditors should be answered promptly and carefully. These questions reveal gaps in the [specifications or the scope](./security-readiness-template.md), which should be amended accordingly. 64 | 65 | Each vulnerability disclosed will be considered separately, fixed on an individual commit, and reviewed again by the auditors on the repo. 66 | 67 | For all audit findings that we will fix as part of a later feature, create an issue for each finding in the monorepo. The issue title should be the finding title, the description links to the audit report, and apply the TBD label. 68 | 69 | ## After Each Audit 70 | 71 | Once all the fixes are applied and reviewed, upload the audit report [to our repo](https://github.com/ethereum-optimism/optimism/tree/develop/docs/security-reviews). 72 | 73 | If a valid high severity vulnerability was found, and this is the last expected audit for the project, [**a post-mortem must be conducted](./audit-post-mortem.md) and another audit of the same type must be scheduled**. These new audits follow the same process as any other audit. 74 | 75 | ## Emergency Process 76 | 77 | The audit process is tied to the SDLC process. A fast-track audit process would only be needed if we find out that we need audits later in the SDLC process, most likely as a result of updates to the [risk modelling](./fmas.md) or [excessive vulnerabilities in the last scheduled audit](#after-each-audit). The process described above is still applicable in these cases. 78 | 79 | If the audit process is started in later stages of the SDLC, the documentation will be ready and can be put together as the [Security Readiness Document](./security-readiness-template.md) by including a summary of the project, if that didn’t exist yet. 80 | 81 | We already know that we need an audit, and we can safely assume that an external audit by Spearbit will fulfil the requirements. Time and cost estimates can be provided with the help of PgM. 82 | 83 | The documentation and estimates still need to be approved by the CEO. After approval by the CEO, the audit process doesn’t block delivery unless serious vulnerabilities are found. 84 | 85 | ## Updating This Process 86 | 87 | This process will be reviewed if SEV0 or SEV1 incidents are revealed during **production**, reported through a **bug bounty**, or caught in the **last audit** before production. The [post-mortem](./audit-post-mortem.md) might recommend updating this process. 88 | 89 | Conversely, this process can also be reviewed with the goal of relaxing its requirements if no SEV1 or SEV0 bugs or incidents have happened in production, the bug bounty, or any last audit for at least six months. 90 | 91 | ## References 92 | 93 | - [Additional context on creating this process](https://www.notion.so/About-the-Audit-Process-1b9f153ee162805e8adcd2d50237c622?pvs=21) 94 | - [Calibration of this process against past audits](https://www.notion.so/Calibration-1bbf153ee16280d0a17adebee7f797e3?pvs=21) 95 | - [Repository with all audit reports](https://github.com/ethereum-optimism/optimism/tree/develop/docs/security-reviews) 96 | - Our current framework for audits - https://gov.optimism.io/t/op-labs-audit-framework-when-to-get-external-security-review-and-how-to-prepare-for-it/6864 97 | - An attempt to put an audit process in place - https://github.com/ethereum-optimism/wip-private-pm/blob/main/.github/ISSUE_TEMPLATE/audit.md 98 | - EVM Safety docs on managing audits - [Security Audits](https://www.notion.so/Security-Audits-e56b4226b9db4f2ca48db42d7d439a98?pvs=21),[Audit FAQs](https://www.notion.so/Audit-FAQs-61950fe7ca7c4b2e86b86142d8138d3b?pvs=21),[How to Select an Audit Firm](https://www.notion.so/How-to-Select-an-Audit-Firm-b0dee471e23f4712bb8ddc1fb51938f9?pvs=21) 99 | - [Audit Requirements for Fault Proof Contracts](https://www.notion.so/Audit-Requirements-for-Fault-Proof-Contracts-11cf153ee162803f84fed5d811206333?pvs=21) 100 | - [Audits and shipping secure code](https://www.notion.so/Audits-and-shipping-secure-code-198f153ee162802e8fcae67e7cd15981?pvs=21) from @Paul Dowman summarizing Proofs informal audit framework and adding some ideas. 101 | 102 | ## Next Steps 103 | 104 | - Update the [Failure Mode Analyses (FMAs)](https://www.notion.so/Failure-Mode-Analyses-FMAs-1fb9f65a13e542e5b48af6c850763494?pvs=21) 105 | - Update the [Security <> Developer Interface](https://www.notion.so/Security-Developer-Interface-232f2c43e8474a2a90e07d3cbe0b33bc?pvs=21) 106 | - Ask Karl about preferred content of the [review template](https://docs.google.com/document/d/1dtUrBOl47sVs-Hw_2fxnPHx5JCg7qOU4nomh8KEHONU/edit?tab=t.0) that he will receive. 107 | - Refactor the docs so that they point to the github repo with the reports, instead of pointing at individual reports. 108 | - The success of this initiative depends partially on the SDLC process being adopted and respected. 109 | - Include in the [SDLC](https://www.notion.so/Engineering-SDLC-v1-0-150f153ee16280d1b021c477957fac2f?pvs=21) process that other feature teams and EVM Safety should review specs and scope. 110 | -------------------------------------------------------------------------------- /src/fmas.md: -------------------------------------------------------------------------------- 1 | # Failure Mode Analysis (FMAs) 2 | 3 | # Overview 4 | Our Failure Mode Analysis is loosely based on [FME(C)As from aerospace engineering](https://en.wikipedia.org/wiki/Failure_mode_and_effects_analysis) and are intended to shift left the process of thinking through the risks associated with a project, so that mitigations can be planned for and implemented earlier rather than later. 5 | 6 | # Writing a Failure Modes Analysis 7 | As part of the effort towards working in the open, we have open sourced both the FMA process and the FMA template so protocol developers from the whole collective can adopt this process. 8 | 9 | To write the FMA for your project, follow the [FMA template](https://github.com/ethereum-optimism/design-docs/blob/main/assets/fma-template.md). You can use the many [existing FMAs](https://github.com/ethereum-optimism/design-docs/tree/main/security) as examples to understand how to write yours. 10 | 11 | FMAs live in the [design-docs](https://github.com/ethereum-optimism/design-docs/blob/main/security/failure-modes-analysis.md) or [design-docs-private](https://github.com/ethereum-optimism/design-docs-private) repo. 12 | 13 | # Determine Audit Requirements 14 | The knowledge obtained in writing the FMA will help you determine the [audit](./audits.md) requirements for your project. EVM Safety is available if you need advice on this step. 15 | 16 | 1. Broadly determine the risk of the change. To do that consider the FMA and the [Liveness vs. Safety and Reputational vs. Existential matrix](https://gov.optimism.io/t/op-labs-audit-framework-when-to-get-external-security-review-and-how-to-prepare-for-it/6864#what-code-should-be-audited-3) to find out the [maximum severity incident](https://www.notion.so/Incident-Management-17268107b18d4c7492cab3d319d30533?pvs=21) that can be caused by the software to be audited. Then determine a subjective risk category by comparing your code to the descriptions below: 17 |
**Low**: The feature doesn’t involve any components that can cause a significant incident. 18 |
**Medium**: A bug in the feature could lead to a temporary denial of service, or small loss of value across all users, or a large loss of value across a small amount of users. 19 |
**High**: Bugs in the feature could lead to denials of service lasting days or more, or a significant loss of assets. 20 | 21 | 2. Determine a subjective complexity category by comparing your code to the examples below: 22 |
**Low:** Any code that is easy to explain to a non technical person, and easy reason about as a whole. 23 |
**Medium:** Code with several components that are each easy to reason about, or with a single feature that is complex. 24 |
**High:** Large codebase with several components, which are complex in their own right. Use of math, algorithms, architectural patterns, integration patterns or features that are novel or difficult to explain to a non technical person. 25 | 26 | 3. Find the required audits for your risk and complexity in the table below. Read more about [audit types](https://www.notion.so/About-the-Audit-Process-1b9f153ee162805e8adcd2d50237c622?pvs=21). 27 |
The table was [calibrated to past audits](https://www.notion.so/Calibration-1bbf153ee16280d0a17adebee7f797e3?pvs=21), but it is a statement of minimums and you always have the option to execute more audits than specified. 28 |
Internal audits should be executed before external audits. 29 |
For internal audits we currently use the Coinbase Protocol Security Team. We also have a Spearbit retainer that can be used for internal audits. EVM Safety doesn’t do internal audits. 30 |
Instead of an internal audit, you can also upgrade to a solo external audit or external audit by an up-and-coming team. It is also possible to replace an internal with an external auditor that advises during design and implementation. 31 |
For high complexity features, contests are also an option. 32 | 33 | | | **Low Risk** | **Medium Risk** | **High Risk** | 34 | | --- | --- | --- | --- | 35 | | **Low Complexity** | Peer Review | Internal Audit | External Audit | 36 | | **Medium Complexity** | Internal Audit | External Audit | Internal Audit + External Audit | 37 | | **High Complexity** | External Audit | Internal Audit + External Audit | External Audit x2 | 38 | 39 | It is in the interest of the Tech Lead to accurately estimate the complexity and risk of the feature, with all the help provided. A major update to the FMAs in later stages of the SDLC or a High vulnerability found in the last audit will impact delivery times more than preparing for an audit or two from early stages. 40 | 41 | # Table of Failure Modes Analyses 42 | 43 | [FMAs on Notion](https://www.notion.so/oplabs/Failure-Mode-Analyses-FMAs-1fb9f65a13e542e5b48af6c850763494?pvs=4#7279d87ce31644e4a725f837096bb24c) 44 | -------------------------------------------------------------------------------- /src/images/sdlc-flowchart.mermaid: -------------------------------------------------------------------------------- 1 | flowchart TD 2 | Step0[Step 0: Ideation and Planning] 3 | Step0 --> Step1[Step 1: Design Review] 4 | 5 | Step1 --> Step2[Step 2: Implement] 6 | Step1 -->|"If needed"| Step2b[Step 2b: Security Audit Procurement] 7 | 8 | Step2 --> Step3["Step 3: Create Superchain Ops Tasks
(L1 Upgrades Only)"] 9 | Step2b --> Step3b[Step 3b: Security Audit Execution] 10 | Step2 -->|"If needed"| Step3b 11 | 12 | Step3 --> Step4[Step 4a: Alphanet/Betanet Devnet Rollout] 13 | Step3b --> Step5 14 | 15 | Step4 --> Step5[Step 5: Testnet Rollout] 16 | 17 | Step5 --> Step7[Step 7: Mainnet Rollout] 18 | Step5 -->|"If needed"| Step6[Step 6: Governance Proposal] 19 | 20 | Step6 --> Step7 -------------------------------------------------------------------------------- /src/images/sdlc-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethereum-optimism/pm/4d19bd39a07b0989630a9d170c2c0b2032873cb5/src/images/sdlc-flowchart.png -------------------------------------------------------------------------------- /src/release-calendar.md: -------------------------------------------------------------------------------- 1 | # Release Calendar 2 | 3 | The calendar below shows our planned governance cycles, Alphanets, and Betanets. Each event links out to the GitHub 4 | issue describing it in more detail. 5 | 6 |
7 | -------------------------------------------------------------------------------- /src/release-process.md: -------------------------------------------------------------------------------- 1 | # Release Process 2 | 3 | 4 | 5 | Protocol upgrades run on a regular schedule. This helps resolve some of the challenges we've faced in the past: 6 | 7 | - No more waiting 3-4 months between hard forks 8 | - Teams don't need to rush features to "catch" an upgrade 9 | - Everyone knows when the next release is coming 10 | - We have the opportunity to find integration bugs earlier 11 | - Missing a train isn't a big deal - there's always another one coming 12 | 13 | The thousand-yard view of our release process is as follows: 14 | 15 | 1. Features are developed according to a stable-trunk development model. 16 | 2. Features are deployed to an Alphanet for initial acceptance testing. 17 | 3. If the feature works on Alphanet, it gets deployed to a Betanet for additional testing and upgrade process 18 | validation. 19 | 4. If the feature works on Betanet, it gets deployed to the Sepolia Testnet for governance review. 20 | 5. If governance passes, the feature is deployed to mainnet. 21 | 22 | You will need to budget roughly 6 weeks from the time your feature is code-complete to the time it is deployed on 23 | mainnet, exclusive of audit time. Working backwards from mainnet deployment, the rough timeline is as follows: 24 | 25 | | Time | Activity | 26 | |-----------|---------------------------------------------------| 27 | | T | Mainnet Activation | 28 | | T-1 week | Governance veto starts | 29 | | T-2 weeks | Cut mainnet release, distribute to node operators | 30 | | T-3 weeks | Governance vote starts | 31 | | T-4 weeks | Governance review starts | 32 | | T-4 weeks | Betanet deployment and acceptance testing | 33 | | T-5 weeks | Alphanet deployment and acceptance testing | 34 | | T-6 weeks | Feature is code-complete | 35 | 36 | Check out the [release calendar](./release-calendar.md) for more information on the schedule. 37 | 38 | ## Alphanets 39 | 40 | The Alphanet is the initial integration environment for protocol upgrades. Its primary purpose is to validate 41 | that new features work correctly on a deployed network running real infrastructure before moving on to broader 42 | integration and upgrade testing. 43 | 44 | The Alphanet can contain any combination of L1 and L2 upgrades. It is entirely acceptable to have an Alphanet with 45 | only L1 upgrades, and vice versa. By decoupling these two types of upgrades, we can increase our throughput and 46 | deployment flexibility. 47 | 48 | The scope of each Alphanet is finalized during the weekly Protocol Upgrades Call on Tuesdays. To put in a request 49 | for an Alphanet once the scope has been finalized, create a new issue on the 50 | [devnets repo](https://github.com/ethereum-optimism/devnets/issues/new?template=devnet-request.yml). 51 | 52 | ## Betanets 53 | 54 | The Betanet validates a complete upgrade that will be deployed to a production networks. Unlike the Alphanet, the 55 | Betanet performs the actual upgrade process and confirms that all features work together as intended. 56 | 57 | Betanets are deployed every three weeks, and contain the features that passed the Alphanet. If there are no passing 58 | features, the Betanet will be cancelled. 59 | 60 | The scope of each Betanet is finalized during the weekly Protocol Upgrades Call on Tuesdays. To put in a request 61 | for a Betanet once the scope has been finalized, create a new issue on the 62 | [devnets repo](https://github.com/ethereum-optimism/devnets/issues/new?template=devnet-request.yml). 63 | 64 | ## Acceptance Testing 65 | 66 | Promoting a feature from Alphanet to Betanet and beyond is contingent upon the feature passing automated acceptance 67 | tests. See the [Acceptance Testing](./acceptance-testing.md) document for more information. 68 | 69 | ## Testnet 70 | 71 | The Sepolia Testnet is the first public deployment of protocol upgrades. This allows ecosystem partners to test the 72 | upgrades in a stable environment and runs in parallel with the governance process. Unlike Alphanets and Betanets, 73 | the Testnet directly impacts external users and applications and is considered "production." 74 | 75 | TO provide sufficient time for infrastructure providers to upgrade their systems, Testnet releases must be cut at 76 | least 1 week in advance of any hardfork activation. 77 | 78 | **All features must go through an Alphanet and a Betanet before being deployed on Testnet.** This means that you 79 | should target having your features deployed to the Alphanet and Betanet right before the gov cycle at the very latest. 80 | For example: 81 | 82 | | Governance Cycle | Latest Alphanet | Latest Betanet | 83 | |----------------------------|------------------|-----------------| 84 | | Cycle 34 (Feb 27 - Mar 13) | Badger (Feb 17) | Balrog (Feb 24) | 85 | | Cycle 35 (Mar 20 - Apr 2) | Cheetah (Mar 10) | Cupid (Mar 17) | 86 | 87 | See the [release calendar](./release-calendar.md) for the most up-to-date information on the release schedule. -------------------------------------------------------------------------------- /src/sdlc.md: -------------------------------------------------------------------------------- 1 | # Engineering SDLC v1.0 2 | 3 | # Overview 4 | 5 | This document describes our Software Development Lifecycle (SDLC). Put simply, it walks you through how we ship software. 6 | 7 | # Using This Guide 8 | 9 | Treat this guide like a flowchart. Not every step will be relevant to every project. 10 | 11 | ![SDLC Flowchart](./images/sdlc-flowchart.png) 12 | 13 | # SDLC 14 | 15 | ## Step 0: Ideation and Planning 16 | 17 | While beyond the scope of this document, it’s imperative that you know what you’re building and why before development starts. Work with your Product and Engineering Managers if you’re unsure about what you’re building. Typically, there’s a PRD or Problem/Proposed Solution doc that describes the business rationale for our major projects. 18 | 19 | 1. Start a cross functional discussion with Product, Engineering, DevRel, and PMO to align on the a preliminary scope of work or hypothesis of the work that needs to be delivered. 20 | 2. Typical outputs during Ideation & Planning in which the team builds together: 21 | 1. A PRD or some form of documentation that elaborates on requirements 22 | 1. Acceptance criteria and definition of done 23 | 1. Spike and/or Prototyping: Help elaborate on the requirements to have a better idea on scope/resource/complexity of the solution 24 | 2. Resourcing & Capacity Planning - Do we have the right people to accomplish the work? 25 | 3. Identify risks and dependencies, and achieve alignment/buy-in 26 | 3. Other outputs from this phase that will accelerate the subsequent steps: 27 | 1. Program Dashboard in GitHub 28 | 2. Sequencing and timeline 29 | 3. Communication Plan (Slack, Discord, Meetings) 30 | 31 | ## Step 1: Design Review 32 | 33 | Changes that affect multiple teams, deviate from existing practices, introduce new technologies, or change the protocol need to go through Design Review. The goal is to achieve alignment between engineering teams, document what is known about the project at this stage, and keep the bar for technical rigor high. 34 | 35 | It works like this: 36 | 37 | 1. Create a design doc and post it as a PR in either the [`design-docs`](https://github.com/ethereum-optimism/design-docs) or [`design-docs-private`](https://github.com/ethereum-optimism/design-docs-private) repositories. Use the templates in the repositories. 38 | 2. Loop in the leads of each team that might be impacted by your change. Please be especially cognizant of any downstream impact of your proposed changes and loop in those leads as well. These leads are: 39 | 1. **Product:** @Sam McIngvale 40 | 2. **Ecosystem:** @Faina Shalts or @Hamdi Allam 41 | 3. **DevRel:** @Matthew Cruz (soyboy) 42 | 4. **Protocol:** @Mark Tyneway or @Proto 43 | 5. **Proofs:** @Paul Dowman or @Adrian Sutton 44 | 6. **Infrastructure:** @Zach Howard or @Alfonso Munoz de Laborde 45 | 7. **EVM Safety:** @Kelvin Fichter or @Matt Solomon 46 | 8. **Cloud Security:** @Raffaele Mazzitelli or @Alfonso Munoz de Laborde 47 | 3. Announce the PR in the [#pm](https://discord.com/channels/1244729134312198194/1244729134848938033) channel on Discord. Make sure to tag the required domain reviewers to allow for asynchronous review. Leads may tag in additional people as necessary. 48 | 4. Complete the specs, risk modelling, and governance impact analysis substeps as detailed in the subsections below. 49 | 5. Schedule a synchronous design review meeting to ratify the design doc and merge the PR. “Ratified” in this case means there’s no more blocking feedback on the design, and merging it into the main branch of the design docs repo. It’s up to the design doc’s author to decide when this is. If your design doc doesn’t get consensus, close the PR instead. **The goal of the discussion is to move towards closure, where closure is either ratifying or rejecting the design doc under review.** Don’t leave a design review hanging without clear actions to move it towards either ratification or rejection. 50 | 51 | Keep in mind that the more complex your design is, the longer it will take to review. Use the following SLA as a rule of thumb for how much time to give your reviewers: 52 | 53 | - Short document (e.g., 1-pager): share a minimum of 2 working days prior 54 | - Medium-length document (2-5 pages): share a minimum of 3-5 working days prior 55 | - Long document (6+ pages): share at least 1 week prior 56 | 57 | For more information about the design review process, see [this doc](https://docs.google.com/document/d/1AvPZMUK5aQjdpw8xAFLkQ7d5sYTgaajkG1IEqgXRcu0/edit?tab=t.0). 58 | 59 | ### Write Specs 60 | 61 | Changes that modify smart contracts, consensus, or could otherwise be incorporated into alternative client implementations need to be specified in the [`specs`](https://github.com/ethereum-optimism/specs) repo before being rolled out. This is started during the design review, and regularly updated in parallel with writing the implementation, since we often find ways to improve the spec while writing the code. 62 | 63 | Write your spec by creating a PR against the specs repo and requesting review from one of the specs reviewers identified during the design review. **Loop in the same set of leads as you would for the design doc to review the spec.** They can assign reviewers based on who has the most knowledge of the area of the spec being modified. 64 | 65 | ### Determine Governance Impact 66 | 67 | As you develop a design for your change, you’ll need to determine if the change requires governance. Changes that affect consensus, touch smart contracts on L1, modify predeploys, or impact transaction ordering will generally require governance approval. If you’re unsure, consult @Ben Jones. 68 | 69 | **If YES (Governance Needed):** 70 | 71 | - Follow the governance path below. The risks must be fully detailed in the risk modelling. Writing a governance proposal will be required. 72 | 73 | **If NO (Governance Not Needed):** 74 | 75 | - Implement and test, then ship to testnet and mainnet following our rollout procedures. You still need to execute risk modelling normally. 76 | 77 | The detailed criteria for what does/does not require governance is described below. 78 | 79 | #### Detailed Governance Criteria 80 | 81 | 84 | 85 | The threshold for which changes require a governance vote is based on the User Protections clause of the Law of Chains. In summary, these protections are: 86 | 87 | 1. **State Transition and Messaging Validity:** OP Chain state transitions or cross-chain messages sent to or from OP Chains must follow the rules of the latest governance-approved release of the OP Stack. This means that changes to the block derivation function or messenger contracts are always subject to a governance vote. 88 | 2. **Security, Uptime, and Liveness:** Block production, sequencing, and bridging must satisfy uniform standards for security, uptime, and liveness across all OP Chains. This means that changes that could cause users to be unable to transact (e.g., changing the gas limit to something untenable) are subject to a governance vote. 89 | 3. **Universal, Governance-Approved Upgrades:** OP Chains must upgrade together under OP Stack releases that are approved by governance. Any upgrades that aren’t backwards compatible are therefore subject to a governance vote. 90 | 91 | Using this framework, we can define the following rough upgrade types and whether or not each upgrade type needs a governance vote. If you are uncertain if an upgrade requires governance approval, please request delegate feedback on the forum. 92 | 93 | - **Consensus Changes** 94 | 95 | **Vote required:** Yes 96 | 97 | Consensus changes modify the state transition function or messaging validity. As such, they must be approved by governance to satisfy protection one above. 98 | 99 | For example: 100 | 101 | - Bedrock 102 | - EIP-4844 103 | - Shanghai 104 | - Any L1 upgrade that modifies a contract under the control of the Security Council. The Security Council cannot make any changes to L1 unless they are approved by governance *or* the result of an active or impending security issue. 105 | - **Predeploy Updates** 106 | 107 | **Vote required:** Yes 108 | 109 | Predeploy updates must be approved by governance in order to satisfy protection three above. More specifically, changes to predeploys must be rolled out across all OP Chains in order to prevent functionality on one chain from diverging from all the others. 110 | 111 | - **Cross-Chain Contracts** 112 | 113 | **Vote required:** No 114 | 115 | “Cross-chain contracts” refers to smart contracts like Gnosis SAFE or `create2deployer` which are deployed at the same address across multiple chains. These contracts do not require a governance vote because anyone can deploy them at any time on any chain. This is true even if we decide to add these contracts to the genesis state, since someone could always deploy them after the chain comes online. 116 | 117 | Note that any changes to the `0x42...` namespace *do* need to go through governance, as do any contract deployments that require irregular state transitions. 118 | 119 | - **Parameter Updates** 120 | 121 | **Vote required:** Change Dependent 122 | 123 | Parameter updates that impact protections one or two above will need to be approved by governance. For example, setting the gas limit or changing the EIP-1559 parameters will require governance approval since modifying these parameters can prevent users from transacting. 124 | 125 | Examples: 126 | 127 | - Updating the ProxyAdmin/challenger/guardian addresses requires a governance vote. 128 | - Updating gas parameters require a governance vote until they’re explicitly configurable by the Chain Governor 129 | - Updating the batcher/proposer addresses (among addresses already on the allowlist) do not require a governance vote as long as they are within the set of governance-approved addresses 130 | - **Non-Consensus Client Features** 131 | 132 | **Vote required:** No 133 | 134 | Network-wide features introduce functionality that may require coordination with alt-client developers, but without risk of a chain split. As such these changes satisfy all three user protections above as long as they are backwards-compatible and meet our bar for engineering rigor. 135 | 136 | Examples: 137 | 138 | - Snap sync 139 | - **Changes Affecting Transaction Inclusion/Ordering** 140 | 141 | **Vote required:** Yes 142 | 143 | Even though the mempool is technically not part of consensus, it affects the way in which transactions get included into the chain and can negatively effect user experience. As a result, unilateral changes that affect transaction ordering violate protection two above and therefore need a vote. If the community detects that nonstandard ordering software is being run, it is grounds for removal from the sequencer allowlist. 144 | 145 | Examples: 146 | 147 | - Moving to a public mempool 148 | - Running custom PBS/transaction pool software 149 | - **Non-Consensus, No-Coordination, Non-Ordering Changes** 150 | 151 | **Vote required:** No 152 | 153 | These changes are a catch-all for any change that doesn’t modify consensus or require coordination. These changes can be rolled out unilaterally without input from governance since they do not impact any of the protections described above. 154 | 155 | 156 | *Note: The above sets are not always mutually exclusive. If a given change might fall into multiple buckets, if any one of them requires a vote, then the change requires a vote. If you are unsure if something requires a governance vote, ask @Bobby Dresser or @Ben Jones.* 157 | 158 | ### Risk Modelling 159 | 160 | Engineering owns architecture decisions, and Risk Modelling is the primary way in which it identifies possible risks in the launch and their mitigations. Risk modelling works best when it is started early in the development process, and updated as work progresses and more is known about the project. 161 | 162 | Risk modelling is always required. In its first iteration, it will identify the risk level of the project. For projects on the higher half of the risk spectrum, a second iteration must provide full detail of the risks and suggested mitigations. 163 | 164 | Product must always be consulted when designing risk mitigations, as they can substantially alter the end product. 165 | 166 | Risk modelling will be reviewed by Security before the design documentation is merged. For higher risk projects, Security will require that the risks are exhaustive, their mitigations are acceptable, and overall the risk tolerance is consistent across all projects within the collective. 167 | 168 | The security team signs off on the risk modelling, or provides feedback to engineering on how they can achieve that sign-off. The risk modelling author is responsible for program managing and driving the risk modelling process to completion and Engineering needs to factor in the timeline for its Security review as part of our estimates. 169 | 170 | It is acknowledged that not everything is known about the project when risk modelling is done in the design step. Still, it is expected that the knowledge present at that stage is enough to accurately obtain a rough risk classification, and to identify a proportion of the existing risks and their mitigations. 171 | 172 | The risk modelling will [determine the level of auditing](./audits.md) required for releasing the feature. Follow the linked process to procure and execute the audit. 173 | 174 | The risk modelling might have as an outcome one or more mitigation actions. These actions should be executed during implementation and are reviewed by security, with one security person dedicated per project (throughout the project). Until the feature is released onto mainnet, teams can introduce new or updated failure modes into the risk modelling. Those updates and corresponding mitigation actions are also reviewed by security. 175 | 176 | **Updated Risk Modelling with executed mitigation actions is required prior to devnet deployments.** 177 | 178 | We currently use FMAs as our risk modelling framework, but project teams are encouraged to use other risk modelling frameworks if they are more appropriate and EVM Safety verifies that they provide a similar level of detail. 179 | 180 | To write an FMA, follow the [FMA process](./fmas.md). More details from the [Security <> Developer Interface](https://www.notion.so/Security-Developer-Interface-232f2c43e8474a2a90e07d3cbe0b33bc?pvs=21): 181 | 182 | ## Step 2: Implement 183 | 184 | At this stage, you can start writing your code. Make sure you follow these standards: 185 | 186 | - All consensus code must be behind a hardfork feature flag. 187 | - All changes must go through code review, and have test automation. Use CodeCov to determine how much of your code is tested, and to identify testing gaps. 188 | - For new features, add [acceptance tests](acceptance-testing.md). 189 | - All smart contract changes must meet the following minimum standards: 190 | - Follow the UX and Safety guidelines described [here](https://github.com/ethereum-optimism/design-docs/pull/177). 191 | - When upgrading existing contracts, follow the spec [here](https://github.com/ethereum-optimism/design-docs/blob/main/protocol/l1-upgrades.md). 192 | - Have near 100% test coverage along with invariant tests. 193 | - When useful, changes should be formally verified with Kontrol. 194 | - All actions resulting from the FMA must be completed during implementation and will be reviewed by EVM Safety before the code can be deployed to a devnet. 195 | 196 | 197 | ## Step 2b: Security Audit Procurement (If Needed) 198 | 199 | The risk modelling results will be used to determine the audit requirements for the change. The [process to procure and execute an audit](./audits.md) should be started in parallel with the implementation if it is needed. 200 | 201 | You should factor the amount of time required for both the audit as well as and necessary fix review into delivery timelines. 202 | 203 | ## Step 3: Create Superchain Ops Tasks (L1 Upgrades Only) 204 | 205 | 211 | 212 | If your change modifies L1 smart contracts, you’ll need a `superchain-ops` playbook to execute the multisig transactions with the Security Council. 213 | 214 | ## Step 3b: Audit Execution 215 | 216 | Once the code is completed, the [audit can be executed](./audits.md). The Tech Lead can decide to delay the execution of the audit if they have reasons to think it will be more efficient to do so, but it must be completed before the testnet rollout can begin. 217 | 218 | ## Step 4: Alphanet/Betanet Devnet Rollout 219 | 220 | **All the steps initiated in the design step must be completed before the devnet rollout can begin. That includes specs, risk modelling, and governance impact analysis.** 221 | 222 | Next, it’s time to roll out to the Alphanet, then the Betanet. See the [release process](release-process.md) for 223 | more details. 224 | 225 | ## Step 5: Testnet Rollout 226 | 227 | **If an audit is required, it must be completed including fixes and fix review before the testnet rollout can begin.** 228 | 229 | Next, it’s time to roll out to the official testnet. These networks upgrade multiple chains at once, so they require coordination with DevRel and external partners. These networks are also considered production, so a high degree of stability is expected. 230 | 231 | Testnets should be kept in sync with Mainnet as much as possible, including maintaing the same ownership structure, contract versions and upgrade paths. Therefore upgrading a testnet should not be done until there is a 232 | high degree of confidence that the same upgrade will be deployed to Mainnet. 233 | 234 | The process to upgrade these networks is: 235 | 236 | 1. (As Applicable) Update the Superchain Registry with an updated hardfork time. 237 | 2. (As Applicable) Update OPCM/op-deployer to upgrade smart contracts. 238 | 3. Cut an `rc` build like you would normally. 239 | 4. Loop in @Matthew Cruz to schedule your upgrade. Typically partners need at least a week of notice. 240 | 5. Use `op-workbench` to deploy onto our infrastructure. 241 | 6. Use `op-deployer` to upgrade L1 smart contracts. 242 | 243 | *Note: The governance proposal is not necessarily blocked by the testnet upgrade. However the Mainnet upgrade should not be executed until at least one week has passed since the testnet upgrade.* 244 | 245 | ## Step 6: Governance Proposal (If Governance Is Needed) 246 | 247 | 1. **Prepare Proposal:** 248 | - Reference a stable commit/tag. 249 | - Include risk modelling results, audits, testnet performance, and activation schedule. 250 | - Use the standard governance [template](https://gov.optimism.io/t/season-6-charter-aware-upgrade-proposal-template/8134). 251 | 2. **Review & Post:** 252 | - Obtain Foundation (FND) and Legal approval. 253 | - Loop in @Ben Jones and @Bobby Dresser from the FND. 254 | - Loop in @Trevor Dodge and @Eric Van Wart from legal. 255 | - Post the proposal on governance forums. 256 | - Loop in @Lavande to coordinate delegate approval. 257 | 3. **Approval & Veto:** 258 | - Wait for the vote and veto period to complete. 259 | 260 | ## Step 7: Mainnet Rollout 261 | 262 | 1. Remove the `rc` suffixes from your releases. 263 | 2. Schedule the mainnet upgrade after the veto period expires. 264 | 3. Coordinate with @Maurelian or @Blaine Malone to schedule the mainnet multisig upgrade. 265 | 4. The Product/Dev/PMO Leads to go through their checklist to insure all stakeholder needs, documentation, and communications are in place. This includes working with Marketing, DevRel, Data, Finance, Foundation, etc. 266 | -------------------------------------------------------------------------------- /src/security-readiness-template.md: -------------------------------------------------------------------------------- 1 | # Security Readiness Document Template 2 | Use this template as a guide to create an Audit Readiness Document. Note that such a document will only contain links to documentation elsewhere in the OP Labs documentation repositories. 3 | 4 | ## Summary of the Project, Feature, or Fix 5 | 6 | - *Brief description of the systems, applications, or data to be audited.* 7 | - *Specific areas of concern or risk to be addressed.* 8 | 9 | ## Design 10 | 11 | Link to the design docs in either the [`design-docs`](https://github.com/ethereum-optimism/design-docs) or [`design-docs-private`](https://github.com/ethereum-optimism/design-docs-private) repositories. 12 | 13 | ## FMAs 14 | 15 | Link to the FMAs in either the [`design-docs`](https://github.com/ethereum-optimism/design-docs) or [`design-docs-private`](https://github.com/ethereum-optimism/design-docs-private) repositories. 16 | 17 | ## Feature Specifications 18 | 19 | Link to the relevant files in the [`specs`](https://github.com/ethereum-optimism/specs) repository. 20 | 21 | ## Audit Scope 22 | 23 | In addition to the specs, the change being audited should be defined as a state change, ideally as a diff in the monorepo between two commits, filtered by the files involved. Other scope descriptions are acceptable as long as they list every code change that is part of the feature to be released. 24 | 25 | ## References 26 | 27 | This template supersedes the [**Audit Request**](https://www.notion.so/Audit-Request-1a8f153ee1628045b467c262fae21975) template. --------------------------------------------------------------------------------