├── .github
├── ISSUE_TEMPLATE
│ └── project-proposal.md
└── workflows
│ ├── add-to-pipeline.yml
│ └── assign-reviews.yml
├── .gitignore
├── README.md
├── notebooks
├── ecosystem-dashboard-url-generation-for-maintenance-triage.ipynb
├── ecosystem-github-activity-analysis-202209-ipfscamp2022.ipynb
├── ecosystem_dashboard_utils.py
├── github-event-processing.ipynb
├── lotus-pr-bash-2021Q3.ipynb
├── pl-repositories-dump.ipynb
└── spec-github-activity-analysis-202210.ipynb
└── proposals
├── 112-car-v2.md
├── 116-car-native-dag-store.md
├── 15-message-sending-ux-in-lotus.md
├── 42-human-readable-mutable-names.md
├── 62-nft-storage-for-nft-hack.md
├── 79-typescript-definitions.md
├── 80-lotus-api-audit.md
├── LONG-PITCH-TEMPLATE.md
├── MINIMAL-PITCH-TEMPLATE.md
├── bot-controllers.md
├── daggregator-cli-tool.md
├── digitalocean-droplet.md
├── images
└── bot-arch.png
├── ipfs-blog-enhancements.md
├── ipfs-content-providing.md
├── ipfs-nft-tutorial.md
├── lotus-dynamic-retrieval-pricing.md
├── lotus-miner-runtime-segregation.md
├── migrate-over-ipfs.md
├── nft.storage-chunked-car-uploads.md
├── sketches
├── dynamic-retrieval-pricing.png
└── lotus-miner-runtime-segregation.png
└── storage-and-retrieval-bots.md
/.github/ISSUE_TEMPLATE/project-proposal.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Project proposal
3 | about: A proposal/brief/pitch for a significant project to be undertaken by a team
4 | title: ''
5 | labels: project
6 | assignees: ''
7 |
8 | ---
9 |
10 |
15 |
16 | Project name: [insert name]
17 | Proposal: [link to markdown file in this repo]
18 | Initial PR: [reference PR number with the first proposal draft]
19 |
20 | This issue is for project management of a proposal, deciding, scheduling etc. Discussion about the relative merits of the proposal is welcome here, but discussion about the content of the proposal is better in a PR to the proposal document.
21 |
--------------------------------------------------------------------------------
/.github/workflows/add-to-pipeline.yml:
--------------------------------------------------------------------------------
1 | name: Add new pull requests to Project Pipeline
2 |
3 | on:
4 | pull_request:
5 | types: [created]
6 |
7 | jobs:
8 | automate-project-columns:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: alex-page/github-project-automation-plus@v0.3.0
12 | with:
13 | project: Project Pipeline
14 | column: Needs Review
15 | repo-token: ${{ secrets.GITHUB_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/assign-reviews.yml:
--------------------------------------------------------------------------------
1 | on:
2 | pull_request:
3 | types: [opened, reopened]
4 | name: Assign Reviewers
5 | jobs:
6 | assign:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - name: Mikeal Rogers
10 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
11 | if: contains(fromJSON('["alanshaw", "olizilla", "gozala", "terichadbourne"]'), github.event.pull_request.user.login)
12 | with:
13 | reviewers: "mikeal"
14 | token: ${{ secrets.GITHUB_TOKEN }}
15 | - name: Rod Vagg
16 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
17 | if: contains(fromJSON('["achingbrain","vasco-santos","dirkmc"]'), github.event.pull_request.user.login)
18 | with:
19 | reviewers: "rvagg"
20 | token: ${{ secrets.GITHUB_TOKEN }}
21 | - name: Molly Mackinlay
22 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
23 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
24 | with:
25 | reviewers: "momack2"
26 | token: ${{ secrets.GITHUB_TOKEN }}
27 | - name: Alex North
28 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
29 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
30 | with:
31 | reviewers: "anorth"
32 | token: ${{ secrets.GITHUB_TOKEN }}
33 | - name: Pooja Shah
34 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
35 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
36 | with:
37 | reviewers: "pooja"
38 | token: ${{ secrets.GITHUB_TOKEN }}
39 | - name: Oli Evans
40 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
41 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
42 | with:
43 | reviewers: "olizilla"
44 | token: ${{ secrets.GITHUB_TOKEN }}
45 | - name: Steve Loeppky
46 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
47 | if: contains(fromJSON('["arajasek ","aschmahmann ","lidel","magic6k","marten-seemann","schomatis ","stebalien","warpfork","ZenGround0"]'), github.event.pull_request.user.login)
48 | with:
49 | reviewers: "BigLep"
50 | token: ${{ secrets.GITHUB_TOKEN }}
51 | - name: Alan Shaw
52 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
53 | if: contains(fromJSON('["hugomrdias","gozala","ribasushi","jnthnvctr"]'), github.event.pull_request.user.login)
54 | with:
55 | reviewers: "alanshaw"
56 | token: ${{ secrets.GITHUB_TOKEN }}
57 | - name: Mike Goelzer
58 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
59 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
60 | with:
61 | reviewers: "mgoelzer"
62 | token: ${{ secrets.GITHUB_TOKEN }}
63 | - name: Steven Allen
64 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
65 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
66 | with:
67 | reviewers: "stebalien"
68 | token: ${{ secrets.GITHUB_TOKEN }}
69 | - name: Will Scott
70 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
71 | if: contains(fromJSON('["acruikshank","hannahhoward","gammazero","mvdan"]'), github.event.pull_request.user.login)
72 | with:
73 | reviewers: "willscott"
74 | token: ${{ secrets.GITHUB_TOKEN }}
75 | - name: Kadir Topal
76 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
77 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
78 | with:
79 | reviewers: "atopal"
80 | token: ${{ secrets.GITHUB_TOKEN }}
81 | - name: Jacob Heun
82 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
83 | if: contains(fromJSON('["achingbrain","vasco-santos","dirkmc","rvagg","acruikshank","hannahhoward","gammazero","mvdan","willscott"]'), github.event.pull_request.user.login)
84 | with:
85 | reviewers: "jacobheun"
86 | token: ${{ secrets.GITHUB_TOKEN }}
87 | - name: David Choi
88 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
89 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
90 | with:
91 | reviewers: "dchoi27"
92 | token: ${{ secrets.GITHUB_TOKEN }}
93 | - name: Jonathan Victor
94 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
95 | if: contains(fromJSON('[]'), github.event.pull_request.user.login)
96 | with:
97 | reviewers: "jnthnvctr"
98 | token: ${{ secrets.GITHUB_TOKEN }}
99 | - name: Raul Kripalani
100 | uses: AveryCameronUofR/add-reviewer-gh-action@1.0.3
101 | if: contains(fromJSON('["nonsense", "kubuxu", "dirkmc", "vyzo", "aarshkshah1992"]'), github.event.pull_request.user.login)
102 | with:
103 | reviewers: "raulk"
104 | token: ${{ secrets.GITHUB_TOKEN }}
105 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Specifics to this repo
2 | notebooks/data
3 |
4 | # Items for interacting with the Google Cloud Platform APIs
5 | credentials.json
6 | token.pickle
7 | token.json
8 |
9 | # Operating System files
10 | .DS_Store
11 |
12 | # Stock Python .gitignore below
13 |
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *.py[cod]
17 | *$py.class
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | pip-wheel-metadata/
37 | share/python-wheels/
38 | *.egg-info/
39 | .installed.cfg
40 | *.egg
41 | MANIFEST
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | *.py,cover
64 | .hypothesis/
65 | .pytest_cache/
66 |
67 | # Translations
68 | *.mo
69 | *.pot
70 |
71 | # Django stuff:
72 | *.log
73 | local_settings.py
74 | db.sqlite3
75 | db.sqlite3-journal
76 |
77 | # Flask stuff:
78 | instance/
79 | .webassets-cache
80 |
81 | # Scrapy stuff:
82 | .scrapy
83 |
84 | # Sphinx documentation
85 | docs/_build/
86 |
87 | # PyBuilder
88 | target/
89 |
90 | # Jupyter Notebook
91 | .ipynb_checkpoints
92 |
93 | # IPython
94 | profile_default/
95 | ipython_config.py
96 |
97 | # pyenv
98 | .python-version
99 |
100 | # pipenv
101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | # install all needed dependencies.
105 | #Pipfile.lock
106 |
107 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
108 | __pypackages__/
109 |
110 | # Celery stuff
111 | celerybeat-schedule
112 | celerybeat.pid
113 |
114 | # SageMath parsed files
115 | *.sage.py
116 |
117 | # Environments
118 | .env
119 | .venv
120 | env/
121 | venv/
122 | ENV/
123 | env.bak/
124 | venv.bak/
125 |
126 | # Spyder project settings
127 | .spyderproject
128 | .spyproject
129 |
130 | # Rope project settings
131 | .ropeproject
132 |
133 | # mkdocs documentation
134 | /site
135 |
136 | # mypy
137 | .mypy_cache/
138 | .dmypy.json
139 | dmypy.json
140 |
141 | # Pyre type checker
142 | .pyre/
143 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Web3 Dev Team
2 |
3 | This repo is the project management hub for PL's efforts contributing to the Web3 ecosystem through improvements to libp2p, IPFS, IPLD, Filecoin and other projects.
4 |
5 | The primary planning mechanism is discussion of proposals for significant projects to be undertaken by PL-sponsored project teams. Check them out on the [projects board](https://github.com/protocol/web3-dev-team/projects/3). Pitch a new project by [filing an issue](https://github.com/protocol/web3-dev-team/issues/new/choose) using the project proposal template.
6 |
7 | # Projects
8 |
9 | Projects are 2-8 week chunks of work (though some projects may be larger in some cases, and you should feel free to suggest these anyways) that directly contribute to improving usability and adoption of the InterPlanetary dev stack of protocols (IPFS+IPLD+libp2p+Filecoin).
10 |
11 | Project teams should aim to take on no more than 2 projects at once to encourage focus on landing work before picking up new responsibilities.
12 |
13 | # Project Principles
14 |
15 | - We don’t know all the potential projects up front - create space to define and prioritize new project ideas as we go
16 | - Optimize for leverage - choose projects that help us learn and target our work better over time
17 | - Great ideas come from everywhere - make it fast to capture these ideas and evaluate for action
18 | - Enable any contributor to scope & act on scoped projects (by forcing ourselves to write down what should happen and why)
19 | - There are multiple ways to solve a problem - multiple projects can be scoped from a single idea
20 | - When there are technical debates on how to execute on a project, keep discussion in the relevant protocol repo (ex _go-ipfs_) & synthesize updates/insights in the project proposal
21 | - The best project proposals synthesize data/input/expertise from many sources (protocol stewards/PMs/leads/domain experts/etc)
22 |
23 | # Pipelines
24 |
25 | There are two overlapping pipelines for triaging projects. The first is the "Project Pipeline" which is where all new proposals are reviewed and triaged. A project will either be closed, approved and assigned to a team, or it will be moved to the "Grant Pipeline."
26 |
27 | Proposals are managed in a Project board with the following columns.
28 |
29 | * Needs Review
30 | * Needs Owner
31 | * In Progress
32 | * Grants
33 | * Completed
34 |
35 | As new proposals are created they are assigned reviewers. Once the required number of reviewers have approved, the proposal it will automatically move from the "Needs Review" column to the "Needs Owner" column. Every Monday, the project leads will review pending proposals and decide if they should be:
36 |
37 | * **Closed**. Any proposal that is not something we can assign resources or grant/contract funding.
38 | * **Approved**. The card on the project board will be moved to "In Progress" and the PR will be assigned to the Project Team Lead and Engineering Manager. When the project is finished the PR will be merged.
39 | * **Moved** to the "Grant Pipeline".
40 |
41 | Proposals in the "Grant Pipeline" will be separately triaged / scoped / RFP'd and assigned eventual owners when a match is found.
42 |
43 | # Project selection criteria
44 |
45 | - Solves an urgent need or risk in the ecosystem/stack today (ex security risk)
46 | - Increases our execution velocity, making subsequent work easier / faster / more impactful
47 | - Unlocks leveraged ecosystem experimentation / contribution / impact through our stack
48 | - Blocker for critical high-value user journeys (directly or indirectly)
49 | - Confidence in this project improving our usability & adoption (supported by evidence)
50 | - Ease to complete this project (eng weeks)
51 |
52 |
--------------------------------------------------------------------------------
/notebooks/ecosystem-dashboard-url-generation-for-maintenance-triage.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# About\n",
8 | "This document is used to create URLs for views into the [Ecosystem Dashboard](https://github.com/ipfs-shipyard/ecosystem-dashboard) that help with weekly maintenance triage.\n",
9 | "\n",
10 | "This was originally part of the [202103 Proposal for PL Public GitHub Repository Maintenance](https://www.notion.so/protocollabs/202103-Proposal-for-PL-Public-GitHub-Repository-Maintenance-fed8bb8bf73b418d9ebe9dd30c854d74).\n",
11 | "\n",
12 | "The links generated by this notebook are usually copy/pasted into:\n",
13 | "* [Helia Issue Triage Notes](https://www.notion.so/pl-strflt/Helia-Issue-Triage-Notes-09c8e86b5d744c74a88c61da43899655)\n",
14 | "* [js-libp2p Issue Triage Notes](https://www.notion.so/pl-strflt/js-libp2p-Issue-Triage-Notes-475313c1a8a54a37b66a4855b37d21a3)\n",
15 | "* [Kubo Issue Triage Notes](https://www.notion.so/pl-strflt/Kubo-Issue-Triage-Notes-7d4983e8cf294e07b3cc51b0c60ede9a)\n",
16 | "* [go-libp2p Issue Triage Notes](https://www.notion.so/pl-strflt/go-libp2p-Issue-Triage-Notes-74ee24dd3a15462e8a01e87dcf136706)\n",
17 | "* [IPLD Issue Triage Notes](https://www.notion.so/pl-strflt/IPLD-Triage-Notes-e19f3f592dc9470ebce62b50691a3db5)\n",
18 | "\n",
19 | "Note: there is a backlog item to move this to another environment that would be quicker to edit like Observable: https://github.com/protocol/web3-dev-team/issues/135"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# Helper Functions\n",
27 | "These are helper functions for creating the triage hyperlinks."
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "from urllib.parse import urlparse, parse_qs, urlunparse, urlencode\n",
37 | "from IPython.display import HTML\n",
38 | "\n",
39 | "# Wrapper around create_triage_hyperlink that outputs HTML hyperlink in the cell output.\n",
40 | "def display_triage_hyperlink(query_string_descriptions_to_parts_dict, domain=\"ecosystem-research.herokuapp.com\"):\n",
41 | " display(HTML(create_triage_hyperlink(query_string_descriptions_to_parts_dict, domain)))\n",
42 | "\n",
43 | "# Create HTML anchor tag with a friendly description based on the provided map.\n",
44 | "# See below for usage examples.\n",
45 | "def create_triage_hyperlink(query_string_descriptions_to_parts_dict, domain=\"ecosystem-research.herokuapp.com\"):\n",
46 | " # Merge in all the maps that are provided in query_string_descriptions_to_parts_dict\n",
47 | " # https://stackoverflow.com/a/37358304/16318\n",
48 | " query_string_parts = {key:val for d in query_string_descriptions_to_parts_dict.values() for key,val in d.items()}\n",
49 | " triage_url = create_triage_url(query_string_parts, domain)\n",
50 | " description = \" \".join(query_string_descriptions_to_parts_dict.keys())\n",
51 | " return f'{description}'\n",
52 | "\n",
53 | "# Create a URL to the ecosystem dashboard.\n",
54 | "# Domain is overrideable since Filecoin dashboard has different domain.\n",
55 | "def create_triage_url(query_string_parts, domain=\"ecosystem-research.herokuapp.com\", path=\"/all\"):\n",
56 | " url_parts = {\n",
57 | " \"scheme\" : 'https', \n",
58 | " \"netloc\" : domain, \n",
59 | " \"path\" : path, \n",
60 | " \"params\" : '', \n",
61 | " \"query\" : urlencode(query_string_parts, doseq=True), \n",
62 | " \"fragment\" : ''\n",
63 | " }\n",
64 | " return urlunparse(url_parts.values())\n",
65 | "\n",
66 | "# Gets a map of the query string parts from a URL. \n",
67 | "# Useful for going from an ecosystem dashboard URL to code.\n",
68 | "def get_query_string_parts(url):\n",
69 | " return parse_qs(urlparse(url).query)"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "# Ecosystem Dashboard Query Parameters\n",
77 | "Below are various maps of Ecosystem Dashboard query parameters that can beused to create different URLs."
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "collabs = {\n",
87 | " 'only_collabs': 'true',\n",
88 | " \"exclude_core\" : \"true\",\n",
89 | " 'range': '365',\n",
90 | "}\n",
91 | "community = {\n",
92 | " 'community': 'true',\n",
93 | " \"exclude_core\" : \"true\",\n",
94 | "}\n",
95 | "core = { # PL employees\n",
96 | " \"only_core\" : \"true\",\n",
97 | "}\n",
98 | "no_reponse = {'no_response': 'true'}\n",
99 | "pull_requests = {'type': 'pull_requests'}\n",
100 | "issues = {'type': 'issues'}\n",
101 | "exclude_labels_where_already_triaged_list = [\n",
102 | " \"status/ready\", \n",
103 | " \"ready\", \n",
104 | " \"status/in-progress\", \n",
105 | " \"status/blocked\", \n",
106 | " \"need/analysis\", \n",
107 | " 'kind/question', \n",
108 | " \"question\", \n",
109 | " \"exploration\", \n",
110 | " \"kind/discussion\", \n",
111 | " \"need/author-input\",\n",
112 | " \"P0\",\n",
113 | " \"P1\",\n",
114 | " \"P2\",\n",
115 | " \"P3\",\n",
116 | " \"P4\",\n",
117 | "]\n",
118 | "exclude_labels_where_already_triaged = {\n",
119 | " 'exclude_label[]': exclude_labels_where_already_triaged_list,\n",
120 | "}\n",
121 | "need_author_input = {\n",
122 | " 'label[]': [\"need/author-input\"],\n",
123 | " 'range': '365', # specify a range so that a default value isn't used\n",
124 | "}\n",
125 | "exclude_need_author_input = {\n",
126 | " 'exclude_label[]': [\"need/author-input\"],\n",
127 | "}\n",
128 | "need_triage_label = {'label[]': [\"need/triage\"],}\n",
129 | "blocked_label = {'label[]': [\"status/blocked\"],}\n",
130 | "in_progress_label = {'label[]': [\"status/in-progress\"],}\n",
131 | "ready_label = {'label[]': [\"status/ready\"],}\n",
132 | "exclude_labels_where_already_triaged_and_need_triage = {\n",
133 | " 'exclude_label[]': exclude_labels_where_already_triaged_list + [\"need/triage\"]\n",
134 | "}\n",
135 | "gui_repos = [\n",
136 | " \"ipfs/ipfs-companion\", \n",
137 | " \"ipfs/ipfs-desktop\", \n",
138 | " \"ipfs/ipfs-webui\",\n",
139 | " \"ipfs/public-gateway-checker\",\n",
140 | " \"ipfs/ipfs-update\",\n",
141 | " \"ipfs/ipld-explorer-components\"\n",
142 | "]\n",
143 | "ipfs_community_repos = [\n",
144 | " \"ipfs/awesome-ipfs\",\n",
145 | " \"ipfs/community\",\n",
146 | " \"ipfs/devgrants\",\n",
147 | " \"ipfs/ecosystem-directory\",\n",
148 | "]\n",
149 | "ipfs_website_repos = [\n",
150 | " \"ipfs/ipfs-website\",\n",
151 | " \"ipfs/ipfs-blog\",\n",
152 | "]\n",
153 | "ipfs_shared_implementation_repos = ipfs_website_repos + [\n",
154 | " \"ipfs/ipfs\",\n",
155 | " \"ipfs/ipfs-docs\",\n",
156 | " \"ipfs/specs\",\n",
157 | " \"ipfs/notes\",\n",
158 | " \"ipfs/infra\",\n",
159 | "]\n",
160 | "\n"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "# Go IPFS"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": null,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "go_ipfs = {\n",
177 | " \"org\" : \"ipfs\",\n",
178 | " \"exclude_language[]\" : [\"JavaScript\", \"TypeScript\"],\n",
179 | " \"exclude_repo_full_name[]\" : gui_repos + ipfs_community_repos + ipfs_website_repos + [\n",
180 | " \"ipfs/ipfs-cluster\",\n",
181 | " \"ipfs/go-graphsync\", \n",
182 | " \"ipfs/pinning-services-api-spec\", \n",
183 | " \"ipfs/distributions\", \n",
184 | " \"ipfs/distributed-wikipedia-mirror\",\n",
185 | " \"ipfs/in-web-browsers\",\n",
186 | " \"ipfs/js-ipfs\",\n",
187 | " \"ipfs/aegir\",\n",
188 | " ],\n",
189 | " 'range': '360',\n",
190 | " 'state': 'open',\n",
191 | " \"per_page\" : 100,\n",
192 | " 'sort': 'updated_at',\n",
193 | " 'order': 'desc',\n",
194 | " \"no_boards\" : \"true\",\n",
195 | "}\n",
196 | "go_ipfs_with_boards = go_ipfs | {}\n",
197 | "go_ipfs_with_boards.pop(\"no_boards\")\n",
198 | "# Have to remove the \"no_boards\" key. Setting it to \"false\" doesn't help.\n",
199 | "\n",
200 | "# One-off: need author input\n",
201 | "display_triage_hyperlink({\n",
202 | " \"go-ipfs\" : go_ipfs_with_boards,\n",
203 | " \"need/author-input\" : need_author_input,\n",
204 | " \"\" : {\"order\" : \"asc\"}, # see oldest need author input first so we can potentially close\n",
205 | "})\n",
206 | "\n",
207 | "# Collabs\n",
208 | "display_triage_hyperlink({\n",
209 | " \"go-ipfs\" : go_ipfs,\n",
210 | " \"collabs\" : collabs,\n",
211 | " \"PRs\" : pull_requests,\n",
212 | "})\n",
213 | "display_triage_hyperlink({\n",
214 | " \"go-ipfs\" : go_ipfs,\n",
215 | " \"collabs\" : collabs,\n",
216 | " \"issues\" : issues,\n",
217 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
218 | "})\n",
219 | "display_triage_hyperlink({\n",
220 | " \"go-ipfs\" : go_ipfs,\n",
221 | " \"collabs\" : collabs,\n",
222 | " \"issues\" : issues,\n",
223 | " \"status/blocked\" : blocked_label,\n",
224 | "})\n",
225 | "# Community\n",
226 | "display_triage_hyperlink({\n",
227 | " \"go-ipfs\" : go_ipfs,\n",
228 | " \"community\" : community,\n",
229 | " \"PRs\" : pull_requests,\n",
230 | "})\n",
231 | "display_triage_hyperlink({\n",
232 | " \"go-ipfs\" : go_ipfs,\n",
233 | " \"community\" : community,\n",
234 | " \"issues\" : issues,\n",
235 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
236 | "})\n",
237 | "display_triage_hyperlink({\n",
238 | " \"go-ipfs\" : go_ipfs,\n",
239 | " \"community\" : community,\n",
240 | " \"issues\" : issues,\n",
241 | " \"status/blocked\" : blocked_label,\n",
242 | "})\n",
243 | "# Core\n",
244 | "display_triage_hyperlink({\n",
245 | " \"go-ipfs\" : go_ipfs,\n",
246 | " \"core\" : core,\n",
247 | " \"PRs\" : pull_requests,\n",
248 | "})\n",
249 | "display_triage_hyperlink({\n",
250 | " \"go-ipfs\" : go_ipfs,\n",
251 | " \"core\" : core,\n",
252 | " \"issues\" : issues,\n",
253 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
254 | "})\n",
255 | "display_triage_hyperlink({\n",
256 | " \"go-ipfs\" : go_ipfs,\n",
257 | " \"core\" : core,\n",
258 | " \"issues\" : issues,\n",
259 | " \"status/blocked\" : blocked_label,\n",
260 | "})\n",
261 | "\n",
262 | "# Other repos that care about\n",
263 | "libp2p_repos_that_go_ipfs_owns = [\n",
264 | " \"libp2p/hydra-booster\",\n",
265 | " \"libp2p/hydra-booster-infra\",\n",
266 | " \"libp2p/go-libp2p-kad-dht\",\n",
267 | "]\n",
268 | "other_repos = {\n",
269 | " \"repo_full_name[]\" : libp2p_repos_that_go_ipfs_owns,\n",
270 | " 'range': '360',\n",
271 | " 'state': 'open',\n",
272 | " \"per_page\" : 100,\n",
273 | " 'sort': 'updated_at',\n",
274 | " 'order': 'desc',\n",
275 | " \"no_boards\" : \"true\",\n",
276 | "}\n",
277 | "display_triage_hyperlink({\n",
278 | " \"other_repos\" : other_repos,\n",
279 | " \"PRs\" : pull_requests,\n",
280 | "})\n",
281 | "display_triage_hyperlink({\n",
282 | " \"other_repos\" : other_repos,\n",
283 | " \"issues\" : issues,\n",
284 | "})"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "# go-libp2p"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {},
298 | "outputs": [],
299 | "source": [
300 | "go_libp2p_pubsub_repos = [\n",
301 | " \"libp2p/go-libp2p-pubsub\",\n",
302 | " \"libp2p/go-libp2p-pubsub-router\",\n",
303 | " \"libp2p/go-libp2p-pubsub-tracer\",\n",
304 | "]\n",
305 | "go_libp2p = {\n",
306 | " \"org\" : \"libp2p\",\n",
307 | " \"exclude_language[]\" : [\"JavaScript\", \"TypeScript\", \"Rust\", \"C++\" , \"Kotlin\"],\n",
308 | " \"exclude_repo_full_name[]\" : libp2p_repos_that_go_ipfs_owns + go_libp2p_pubsub_repos + [\n",
309 | " \"libp2p/go-libp2p-routing-helpers\", \"libp2p/go-cidranger\", \"libp2p/py-libp2p\", \"libp2p/punchr\",\n",
310 | " ],\n",
311 | " 'range': '360',\n",
312 | " 'state': 'open',\n",
313 | " \"per_page\" : 100,\n",
314 | " 'sort': 'updated_at',\n",
315 | " 'order': 'desc',\n",
316 | " \"no_boards\" : \"true\",\n",
317 | "}\n",
318 | "go_libp2p_multiformats_repos = [\n",
319 | " \"multiformats/multiaddr\",\n",
320 | " \"multiformats/go-multiaddr\",\n",
321 | " \"multiformats/go-multiaddr-fmt\",\n",
322 | " \"multiformats/go-multistream\",\n",
323 | "]\n",
324 | "go_libp2p_with_boards = go_libp2p | {}\n",
325 | "go_libp2p_with_boards.pop(\"no_boards\")\n",
326 | "# Have to remove the \"no_boards\" key. Setting it to \"false\" doesn't help.\n",
327 | "\n",
328 | "# One-off: need author input\n",
329 | "display_triage_hyperlink({\n",
330 | " \"go-libp2p\" : go_libp2p_with_boards,\n",
331 | " \"need/author-input\" : need_author_input,\n",
332 | "})\n",
333 | "\n",
334 | "# Collabs\n",
335 | "display_triage_hyperlink({\n",
336 | " \"go-libp2p\" : go_libp2p,\n",
337 | " \"collabs\" : collabs,\n",
338 | " \"PRs\" : pull_requests,\n",
339 | " \"\" : exclude_need_author_input,\n",
340 | "})\n",
341 | "display_triage_hyperlink({\n",
342 | " \"go-libp2p\" : go_libp2p,\n",
343 | " \"collabs\" : collabs,\n",
344 | " \"issues\" : issues,\n",
345 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
346 | "})\n",
347 | "# Community\n",
348 | "display_triage_hyperlink({\n",
349 | " \"go-libp2p\" : go_libp2p,\n",
350 | " \"community\" : community,\n",
351 | " \"PRs\" : pull_requests,\n",
352 | " \"\" : exclude_need_author_input,\n",
353 | "})\n",
354 | "display_triage_hyperlink({\n",
355 | " \"go-libp2p\" : go_libp2p,\n",
356 | " \"community\" : community,\n",
357 | " \"issues\" : issues,\n",
358 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
359 | "})\n",
360 | "# Core\n",
361 | "display_triage_hyperlink({\n",
362 | " \"go-libp2p\" : go_libp2p,\n",
363 | " \"core\" : core,\n",
364 | " \"PRs\" : pull_requests,\n",
365 | " \"\" : exclude_need_author_input,\n",
366 | "})\n",
367 | "display_triage_hyperlink({\n",
368 | " \"go-libp2p\" : go_libp2p,\n",
369 | " \"core\" : core,\n",
370 | " \"issues\" : issues,\n",
371 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
372 | "})\n",
373 | "# Multiaddr\n",
374 | "go_libp2p_multiformats = {\n",
375 | " \"repo_full_name[]\" : go_libp2p_multiformats_repos,\n",
376 | " 'range': '360',\n",
377 | " 'state': 'open',\n",
378 | " \"per_page\" : 100,\n",
379 | " 'sort': 'updated_at',\n",
380 | " 'order': 'desc',\n",
381 | " \"no_boards\" : \"true\",\n",
382 | "}\n",
383 | "display_triage_hyperlink({\n",
384 | " \"go-libp2p-multiformats\" : go_libp2p_multiformats,\n",
385 | " \"PRs\" : pull_requests,\n",
386 | "})\n",
387 | "display_triage_hyperlink({\n",
388 | " \"go-libp2p-multiformats\" : go_libp2p_multiformats,\n",
389 | " \"issues\" : issues,\n",
390 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
391 | "})\n",
392 | "# Pubsub\n",
393 | "go_libp2p_pubsub = {\n",
394 | " \"repo_full_name[]\" : go_libp2p_pubsub_repos,\n",
395 | " 'range': '360',\n",
396 | " 'state': 'open',\n",
397 | " \"per_page\" : 100,\n",
398 | " 'sort': 'updated_at',\n",
399 | " 'order': 'desc',\n",
400 | " \"no_boards\" : \"true\",\n",
401 | "}\n",
402 | "display_triage_hyperlink({\n",
403 | " \"go-libp2p-pubsub\" : go_libp2p_pubsub,\n",
404 | " \"PRs\" : pull_requests,\n",
405 | "})\n",
406 | "display_triage_hyperlink({\n",
407 | " \"go-libp2p-pubsub\" : go_libp2p_pubsub,\n",
408 | " \"issues\" : issues,\n",
409 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
410 | "})"
411 | ]
412 | },
413 | {
414 | "cell_type": "markdown",
415 | "metadata": {},
416 | "source": [
417 | "# JS IPFS"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": null,
423 | "metadata": {},
424 | "outputs": [],
425 | "source": [
426 | "js_ipfs = {\n",
427 | " \"org\" : \"ipfs\",\n",
428 | " \"exclude_language[]\" : [\"Go\", \"Shell\", \"Makefile\"],\n",
429 | " \"exclude_repo_full_name[]\" : gui_repos + ipfs_community_repos + ipfs_shared_implementation_repos + [\n",
430 | " \"ipfs/ipfs-cluster\", \n",
431 | " \"ipfs/pinning-services-api-spec\", \n",
432 | " \"ipfs/distributions\", \n",
433 | " \"ipfs/distributed-wikipedia-mirror\",\n",
434 | " \"ipfs/in-web-browsers\",\n",
435 | " \"ipfs/fs-repo-migrations\",\n",
436 | " \"ipfs/bbloom\",\n",
437 | " \"ipfs/ipget\",\n",
438 | " \"ipfs/ipfs-ds-postgres\",\n",
439 | " \"ipfs/hang-fds\",\n",
440 | " \"ipfs/ipfs-ds-convert\",\n",
441 | " \"ipfs/github-mgmt\",\n",
442 | " \"ifps/npm-go-ipfs\",\n",
443 | " ],\n",
444 | " 'range': '360',\n",
445 | " 'state': 'open',\n",
446 | " \"per_page\" : 100,\n",
447 | " 'sort': 'updated_at',\n",
448 | " 'order': 'desc',\n",
449 | " \"no_boards\" : \"true\",\n",
450 | "}\n",
451 | "js_ipfs_with_boards = js_ipfs | {}\n",
452 | "js_ipfs_with_boards.pop(\"no_boards\")\n",
453 | "# Have to remove the \"no_boards\" key. Setting it to \"false\" doesn't help.\n",
454 | "\n",
455 | "# One-off: need author input\n",
456 | "display_triage_hyperlink({\n",
457 | " \"js-ipfs\" : js_ipfs_with_boards,\n",
458 | " \"need/author-input\" : need_author_input,\n",
459 | "})\n",
460 | "\n",
461 | "# Collabs\n",
462 | "display_triage_hyperlink({\n",
463 | " \"js-ipfs\" : js_ipfs,\n",
464 | " \"collabs\" : collabs,\n",
465 | " \"PRs\" : pull_requests,\n",
466 | "})\n",
467 | "display_triage_hyperlink({\n",
468 | " \"js-ipfs\" : js_ipfs,\n",
469 | " \"collabs\" : collabs,\n",
470 | " \"issues\" : issues,\n",
471 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
472 | "})\n",
473 | "# Community\n",
474 | "display_triage_hyperlink({\n",
475 | " \"js-ipfs\" : js_ipfs,\n",
476 | " \"community\" : community,\n",
477 | " \"PRs\" : pull_requests,\n",
478 | "})\n",
479 | "display_triage_hyperlink({\n",
480 | " \"js-ipfs\" : js_ipfs,\n",
481 | " \"community\" : community,\n",
482 | " \"issues\" : issues,\n",
483 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
484 | "})\n",
485 | "# Core\n",
486 | "display_triage_hyperlink({\n",
487 | " \"js-ipfs\" : js_ipfs,\n",
488 | " \"core\" : core,\n",
489 | " \"PRs\" : pull_requests,\n",
490 | "})\n",
491 | "display_triage_hyperlink({\n",
492 | " \"js-ipfs\" : js_ipfs,\n",
493 | " \"core\" : core,\n",
494 | " \"issues\" : issues,\n",
495 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
496 | "})"
497 | ]
498 | },
499 | {
500 | "cell_type": "markdown",
501 | "metadata": {},
502 | "source": [
503 | "# JS libp2p"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": null,
509 | "metadata": {},
510 | "outputs": [],
511 | "source": [
512 | "js_libp2p = {\n",
513 | " \"language[]\" : [\"JavaScript\", \"TypeScript\"],\n",
514 | " \"exclude_repo_full_name[]\" : [\n",
515 | " \"libp2p/github-mgmt\",\n",
516 | " ],\n",
517 | " 'range': '360',\n",
518 | " 'state': 'open',\n",
519 | " \"per_page\" : 100,\n",
520 | " 'sort': 'updated_at',\n",
521 | " 'order': 'desc',\n",
522 | " \"org\" : \"libp2p\",\n",
523 | " \"no_boards\" : \"true\",\n",
524 | "}\n",
525 | "js_libp2p_multiformats_repos = [\n",
526 | " \"multiformats/js-multiaddr\",\n",
527 | " \"multiformats/js-mafmt\",\n",
528 | " \"multiformats/js-multiaddr-to-uri\",\n",
529 | " \"multiformats/js-uri-to-multiaddr\",\n",
530 | " \"multiformats/js-multiformats\", # per https://github.com/multiformats/js-multiformats/issues/273\n",
531 | "]\n",
532 | "\n",
533 | "js_libp2p_with_boards = js_libp2p | {}\n",
534 | "js_libp2p_with_boards.pop(\"no_boards\")\n",
535 | "# Have to remove the \"no_boards\" key. Setting it to \"false\" doesn't help.\n",
536 | "\n",
537 | "# One-off: need author input\n",
538 | "display_triage_hyperlink({\n",
539 | " \"js-libp2p\" : js_libp2p_with_boards,\n",
540 | " \"need/author-input\" : need_author_input,\n",
541 | "})\n",
542 | "\n",
543 | "# One-off: docs\n",
544 | "display_triage_hyperlink({\n",
545 | " \"libp2p/docs\" : {\n",
546 | " \"org\" : \"libp2p\",\n",
547 | " \"repo_full_name[]\" : [\"libp2p/docs\"],\n",
548 | " 'range': '360',\n",
549 | " 'state': 'open',\n",
550 | " \"per_page\" : 100,\n",
551 | " 'sort': 'updated_at',\n",
552 | " 'order': 'desc',\n",
553 | " \"no_boards\" : \"true\",\n",
554 | " },\n",
555 | " \"PRs\" : pull_requests,\n",
556 | "})\n",
557 | "\n",
558 | "# Collabs\n",
559 | "display_triage_hyperlink({\n",
560 | " \"js-libp2p\" : js_libp2p,\n",
561 | " \"collabs\" : collabs,\n",
562 | " \"PRs\" : pull_requests\n",
563 | "})\n",
564 | "display_triage_hyperlink({\n",
565 | " \"js-libp2p\" : js_libp2p,\n",
566 | " \"collabs\" : collabs,\n",
567 | " \"issues\" : issues,\n",
568 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
569 | "})\n",
570 | "# Community\n",
571 | "display_triage_hyperlink({\n",
572 | " \"js-libp2p\" : js_libp2p,\n",
573 | " \"community\" : community,\n",
574 | " \"PRs\" : pull_requests\n",
575 | "})\n",
576 | "display_triage_hyperlink({\n",
577 | " \"js-libp2p\" : js_libp2p,\n",
578 | " \"community\" : community,\n",
579 | " \"issues\" : issues,\n",
580 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
581 | "})\n",
582 | "# Core\n",
583 | "display_triage_hyperlink({\n",
584 | " \"js-libp2p\" : js_libp2p,\n",
585 | " \"core\" : core,\n",
586 | " \"PRs\" : pull_requests\n",
587 | "})\n",
588 | "display_triage_hyperlink({\n",
589 | " \"js-libp2p\" : js_libp2p,\n",
590 | " \"core\" : core,\n",
591 | " \"issues\" : issues,\n",
592 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
593 | "})\n",
594 | "# Multiformats\n",
595 | "js_libp2p_multiformats = {\n",
596 | " \"repo_full_name[]\" : js_libp2p_multiformats_repos,\n",
597 | " 'range': '360',\n",
598 | " 'state': 'open',\n",
599 | " \"per_page\" : 100,\n",
600 | " 'sort': 'updated_at',\n",
601 | " 'order': 'desc',\n",
602 | " \"no_boards\" : \"true\",\n",
603 | "}\n",
604 | "display_triage_hyperlink({\n",
605 | " \"js-libp2p-multiformats\" : js_libp2p_multiformats,\n",
606 | " \"PRs\" : pull_requests,\n",
607 | "})\n",
608 | "display_triage_hyperlink({\n",
609 | " \"js-libp2p-multiformats\" : js_libp2p_multiformats,\n",
610 | " \"issues\" : issues,\n",
611 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
612 | "})"
613 | ]
614 | },
615 | {
616 | "cell_type": "markdown",
617 | "metadata": {},
618 | "source": [
619 | "# IPLD and Multiformats"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": null,
625 | "metadata": {},
626 | "outputs": [],
627 | "source": [
628 | "ipld = {\n",
629 | " \"org[]\" : [\"ipld\", \"multiformats\"],\n",
630 | " \"exclude_language[]\" : [\"Rust\"],\n",
631 | " \"exclude_repo_full_name[]\" : [\n",
632 | " # owned by Bedrock\n",
633 | " \"ipld/go-storethehash\",\n",
634 | " # owned by Nitro/DagHouse\n",
635 | " \"ipld/js-dag-ucan\",\n",
636 | " \"ipld/js-unixfs\",\n",
637 | " # edelweiss\n",
638 | " \"ipld/edelweiss\",\n",
639 | " # personal projects\n",
640 | " \"ipld/go-ipld-graphql\",\n",
641 | " \"ipld/go-ipldtool\",\n",
642 | " \"ipld/go-datalark\",\n",
643 | " # java\n",
644 | " \"ipld/libipld\",\n",
645 | " \"ipld/java-ipld-cbor\",\n",
646 | " \"ipld/java-cid\",\n",
647 | " # other libp2p multiaddr\n",
648 | " \"multiformats/py-multiaddr\",\n",
649 | " # ignite projects\n",
650 | " \"ipld/explore.ipld.io\",\n",
651 | " \"multiformats/cid-utils-website\",\n",
652 | " ] + go_libp2p_multiformats_repos + js_libp2p_multiformats_repos,\n",
653 | " 'range': '360',\n",
654 | " 'state': 'open',\n",
655 | " \"per_page\" : 100,\n",
656 | " 'sort': 'updated_at',\n",
657 | " 'order': 'desc',\n",
658 | " \"no_boards\" : \"true\",\n",
659 | "}\n",
660 | "ipld_with_boards = ipld | {}\n",
661 | "ipld_with_boards.pop(\"no_boards\")\n",
662 | "\n",
663 | "# One-off: need author input\n",
664 | "display_triage_hyperlink({\n",
665 | " \"ipld\" : ipld_with_boards,\n",
666 | " \"need/author-input\" : need_author_input,\n",
667 | "})\n",
668 | "\n",
669 | "# Collabs\n",
670 | "display_triage_hyperlink({\n",
671 | " \"ipld\" : ipld,\n",
672 | " \"collabs\" : collabs,\n",
673 | " \"PRs\" : pull_requests,\n",
674 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
675 | "})\n",
676 | "display_triage_hyperlink({\n",
677 | " \"ipld\" : ipld,\n",
678 | " \"collabs\" : collabs,\n",
679 | " \"issues\" : issues,\n",
680 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
681 | "})\n",
682 | "# Community\n",
683 | "display_triage_hyperlink({\n",
684 | " \"ipld\" : ipld,\n",
685 | " \"community\" : community,\n",
686 | " \"PRs\" : pull_requests,\n",
687 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
688 | "})\n",
689 | "display_triage_hyperlink({\n",
690 | " \"ipld\" : ipld,\n",
691 | " \"community\" : community,\n",
692 | " \"issues\" : issues,\n",
693 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
694 | "})\n",
695 | "# Core\n",
696 | "display_triage_hyperlink({\n",
697 | " \"ipld\" : ipld,\n",
698 | " \"core\" : core,\n",
699 | " \"PRs\" : pull_requests,\n",
700 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
701 | "})\n",
702 | "display_triage_hyperlink({\n",
703 | " \"ipld\" : ipld,\n",
704 | " \"core\" : core,\n",
705 | " \"issues\" : issues,\n",
706 | " \"non-triaged\" : exclude_labels_where_already_triaged,\n",
707 | "})"
708 | ]
709 | },
710 | {
711 | "cell_type": "markdown",
712 | "metadata": {},
713 | "source": [
714 | "# Bootcamp URLs\n",
715 | "Note: this should get moved into its own file but is here currently for easy reuse."
716 | ]
717 | },
718 | {
719 | "cell_type": "code",
720 | "execution_count": null,
721 | "metadata": {},
722 | "outputs": [],
723 | "source": [
724 | "one_off_base_base_qs_parts = {\n",
725 | " 'state': 'open',\n",
726 | " \"per_page\" : 100,\n",
727 | " 'sort': 'updated_at',\n",
728 | " 'order': 'asc',\n",
729 | " 'range': '1080',\n",
730 | " 'label[]': [\"good first issue\", \"topic/bootcamp\", \"help wanted\", \"exp/beginner\"],\n",
731 | "}\n",
732 | "for language in [\"Go\", \"JavaScript\", \"TypeScript\", \"Rust\"]:\n",
733 | " for label in [\"good first issue\", \"topic/bootcamp\", \"exp/beginner\", \"help wanted\"]:\n",
734 | " display_triage_hyperlink({\n",
735 | " \"\" : one_off_base_base_qs_parts,\n",
736 | " language : {\"language\" : language},\n",
737 | " label : {'label[]' : [label]},\n",
738 | " \"issues\" : issues,\n",
739 | " })"
740 | ]
741 | }
742 | ],
743 | "metadata": {
744 | "interpreter": {
745 | "hash": "c6e4e9f98eb68ad3b7c296f83d20e6de614cb42e90992a65aa266555a3137d0d"
746 | },
747 | "kernelspec": {
748 | "display_name": "Python 3.9.2 ('base')",
749 | "language": "python",
750 | "name": "python3"
751 | },
752 | "language_info": {
753 | "codemirror_mode": {
754 | "name": "ipython",
755 | "version": 3
756 | },
757 | "file_extension": ".py",
758 | "mimetype": "text/x-python",
759 | "name": "python",
760 | "nbconvert_exporter": "python",
761 | "pygments_lexer": "ipython3",
762 | "version": "3.9.2"
763 | },
764 | "orig_nbformat": 2
765 | },
766 | "nbformat": 4,
767 | "nbformat_minor": 2
768 | }
769 |
--------------------------------------------------------------------------------
/notebooks/ecosystem-github-activity-analysis-202209-ipfscamp2022.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Purpose\n",
8 | "This is a sample notebook for grabbing all the Github event data that the ecosystem dashboard has.\n",
9 | "\n",
10 | "This is useful if you want to analyze GitHub actions beyond opening PRs and issues. It will also show comments, PR/issue closing, etc.\n",
11 | "\n",
12 | "This was originally put together to help with identifying top contributors on GitHub that should likely be invited to IPFS Camp 2022."
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "%load_ext autoreload\n",
22 | "%autoreload 2\n",
23 | "\n",
24 | "import pandas as pd\n",
25 | "import datetime\n",
26 | "import ecosystem_dashboard_utils"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "report_date = datetime.date.today()\n",
36 | "report_date_str = report_date.strftime(\"%Y-%m-%d\")\n",
37 | "\n",
38 | "from datetime import date\n",
39 | "analysis_start_date = date(2022, 9, 11) # Adjust for how far back you want to look\n",
40 | "number_of_days = (report_date - analysis_start_date).days"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "# The \"payload\" field add a bunch of data that we don't need so we strip it out to make the json more wieldly to consume\n",
50 | "def payload_filter(x): \n",
51 | " del x[\"payload\"]\n",
52 | " return x\n",
53 | "for ecosystem in [\"ipfs\"]: # You could add \"filecoin\"\n",
54 | " for org in [\"ipfs\", \"ipfs-shipyard\"]:\n",
55 | " events_path = f\"{ecosystem}-{org}-events-{report_date_str}.json\"\n",
56 | " ecosystem_dashboard_utils.dump_api(unpaginated_url=f\"https://{ecosystem}.ecosystem-dashboard.com/events.json?range={number_of_days}&org={org}&\", output_path=events_path, filter=payload_filter, page_size=500)"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {},
63 | "outputs": [],
64 | "source": [
65 | "# report_date_str = \"2022-05-19\"\n",
66 | "df = pd.DataFrame()\n",
67 | "for ecosystem in [\"ipfs\"]:\n",
68 | " for org in [\"ipfs\", \"ipfs-shipyard\"]:\n",
69 | " events_path = f\"{ecosystem}-{org}-events-{report_date_str}.json\"\n",
70 | " ecosystem_df = pd.read_json(events_path, orient='records')\n",
71 | " if ecosystem_df.size == 0:\n",
72 | " continue\n",
73 | " ecosystem_df = ecosystem_df.set_index(\"github_id\")\n",
74 | " df = pd.concat([df, ecosystem_df])\n",
75 | "\n",
76 | "# Remove duplciate event.\n",
77 | "# This is needed since there are duplciate repositories in filecoin and ipfs ecosystem dashboards.\n",
78 | "# https://stackoverflow.com/questions/13035764/remove-pandas-rows-with-duplicate-indices\n",
79 | "df = df[~df.index.duplicated(keep='first')]\n",
80 | "df"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "df[\"event/action\"] = df.apply(lambda x: x[\"event_type\"] + \"/\" + x[\"action\"] if x[\"action\"] else x[\"event_type\"], axis=1)\n",
90 | "df"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {},
97 | "outputs": [],
98 | "source": [
99 | "p1_col_names = [\n",
100 | " 'org',\n",
101 | " 'repository_full_name',\n",
102 | " 'actor',\n",
103 | " 'event/action',\n",
104 | "]\n",
105 | "p2_col_names = df.columns.to_list()\n",
106 | "for p1_col_name in p1_col_names:\n",
107 | " p2_col_names.remove(p1_col_name)\n",
108 | "\n",
109 | "ordered_col_names = []\n",
110 | "ordered_col_names.extend(p1_col_names)\n",
111 | "ordered_col_names.extend(p2_col_names)\n",
112 | "ordered_col_names"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "df = df[ordered_col_names]\n",
122 | "df"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "df.to_csv(f\"events-combined-cleaned-{report_date_str}.csv\")"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "# Output\n",
139 | "You now have tabular data for each event, which makes it easy to create pivot tables to summarize how many actions a given user took."
140 | ]
141 | }
142 | ],
143 | "metadata": {
144 | "interpreter": {
145 | "hash": "c6e4e9f98eb68ad3b7c296f83d20e6de614cb42e90992a65aa266555a3137d0d"
146 | },
147 | "kernelspec": {
148 | "display_name": "Python 3.9.2 ('base')",
149 | "language": "python",
150 | "name": "python3"
151 | },
152 | "language_info": {
153 | "codemirror_mode": {
154 | "name": "ipython",
155 | "version": 3
156 | },
157 | "file_extension": ".py",
158 | "mimetype": "text/x-python",
159 | "name": "python",
160 | "nbconvert_exporter": "python",
161 | "pygments_lexer": "ipython3",
162 | "version": "3.9.2"
163 | },
164 | "orig_nbformat": 2
165 | },
166 | "nbformat": 4,
167 | "nbformat_minor": 2
168 | }
169 |
--------------------------------------------------------------------------------
/notebooks/ecosystem_dashboard_utils.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 |
4 | # Dumps the JSON results of the provided ecosystem URL to disk
5 | # by paging through until reaching the last page.
6 | # The unpaginated_url is expecte to:
7 | # 1. Be a .json url
8 | # 2. End with a ? or a & so that the the paging queries can be appended.
9 | # If you are getting timeouts, you can adust the page_size.
10 | # If you want to do a transform on the data before writing it disk, you can apply a filter.
11 | # A common filter is to to remove the "payload" field from the "/events" API result.
12 | def dump_api(unpaginated_url, output_path, filter=lambda x: x, page_size=1000):
13 | page = 1
14 | last_page_json_array_filtered = []
15 | cumulative_json_array = []
16 |
17 | while True:
18 | url = f"{unpaginated_url}per_page={page_size}&page={page}"
19 | print(f"Fetching {url}")
20 | r = requests.get(url)
21 | page_json_array = r.json()
22 | fetched_page_size = len(page_json_array)
23 | print(f"Fetched {fetched_page_size} items")
24 | page_json_array_filtered = []
25 | for obj in page_json_array:
26 | page_json_array_filtered.append(filter(obj))
27 | if page_size != fetched_page_size:
28 | cumulative_json_array.extend(page_json_array_filtered)
29 | break
30 | if last_page_json_array_filtered == page_json_array_filtered: # the array comparison method hasn't always been reliable so it's our second level check
31 | break
32 | cumulative_json_array.extend(page_json_array_filtered)
33 | page = page + 1
34 | last_page_json_array_filtered = page_json_array_filtered
35 |
36 | with open(f"{output_path}", "w") as filecoin_repos_json_file:
37 | json.dump(cumulative_json_array, filecoin_repos_json_file, indent=2)
--------------------------------------------------------------------------------
/notebooks/github-event-processing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Purpose\n",
9 | "\n",
10 | "This notebook takes Ecosystem Dashbaord dumps of Github event data and:\n",
11 | "1. Merges them (as may want to query different data ranges or different database instances as we have IPFS, Filecoin, and libp2p instances)\n",
12 | "2. Deduplicates them (as there is often depulicate events across the IPFS, Filecoin, and libp2p instances)\n",
13 | "3. Cleans them up (as the github event types and actions aren't the most intuitive or concicse)\n",
14 | "4. Adds additional data (tagging data with YYYYMM or YearQuarter is helpful for summary)\n",
15 | "5. Exports to a more useful form of showing a monthly rollup of how many github actions a given actor took in a given month/repo.\n",
16 | "\n",
17 | "This allows for easy import to do further analysis/summary in places like Google Sheets. \n",
18 | "@biglep has been publishing to https://docs.google.com/spreadsheets/d/1jR6ueqrcdg6CYUvV3ibVWMjkGKj5WlU8ysuCO0TrHvo/edit?usp=sharing\n",
19 | "\n",
20 | "This data can be useful for getting insight into github activity in our repos. @biglep has found this useful for:\n",
21 | "1. understanding at a high level who some of our contributors are and how they're changing\n",
22 | "2. getting a pulse at performance review time on where various team members have been contributing"
23 | ]
24 | },
25 | {
26 | "attachments": {},
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "# SQL Query\n",
31 | "Ideally this notebook should do the notebook queries directly, but at least as of 2023-06-19, biglep@ used his previous Postgres connections setup for `pgAdmin` and took dumps from there.\n",
32 | "\n",
33 | "The query being run was:\n",
34 | "\n",
35 | "```sql\n",
36 | "SELECT\n",
37 | " github_id,\n",
38 | " actor,\n",
39 | " event_type,\n",
40 | " action,\n",
41 | " org,\n",
42 | " repository_full_name,\n",
43 | " created_at,\n",
44 | " core,\n",
45 | " bot,\n",
46 | " pmf\n",
47 | "FROM\n",
48 | " events\n",
49 | "WHERE\n",
50 | " -- adjust the dates as needed\n",
51 | " created_at >= DATE '2023-01-01'\n",
52 | " AND created_at < DATE '2023-06-15'\n",
53 | " AND org IN (\n",
54 | " -- These are PL's \"core\" orgs\n",
55 | " 'multiformats',\n",
56 | " 'ipld',\n",
57 | " 'libp2p',\n",
58 | " 'ipfs',\n",
59 | " 'ipfs-examples',\n",
60 | " 'ipfs-shipyard',\n",
61 | " 'ipfs-inactive',\n",
62 | " 'ipfs-cluster',\n",
63 | " 'ipni',\n",
64 | " 'protocol',\n",
65 | " 'web3-storage',\n",
66 | " 'nftstorage',\n",
67 | " 'ProtoSchool',\n",
68 | " 'pl-strflt',\n",
69 | " 'plprobelab',\n",
70 | " 'application-research',\n",
71 | " 'filecoin-project',\n",
72 | " 'filecoin-shipyard',\n",
73 | " 'testground'\n",
74 | " )\n",
75 | " AND event_type IN (\n",
76 | " -- https://docs.github.com/en/developers/webhooks-and-events/events/github-event-types\n",
77 | " 'IssueCommentEvent',\n",
78 | " 'IssuesEvent',\n",
79 | " 'PullRequestEvent',\n",
80 | " 'PullRequestReviewEvent',\n",
81 | " 'PullRequestReviewCommentEvent',\n",
82 | " 'ReleaseEvent'\n",
83 | " )\n",
84 | " AND actor NOT LIKE '%bot%'\n",
85 | " AND actor NOT LIKE '%codecov%';\n",
86 | "```"
87 | ]
88 | },
89 | {
90 | "attachments": {},
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "# Code\n",
95 | "The skeleton of this code was generated from ChatGPT and modified from there.\n",
96 | "It assumes the .csv files from SQL dumps all live in an input directory."
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "import pandas as pd\n",
106 | "import os\n",
107 | "import datetime\n",
108 | "report_date = datetime.date.today()\n",
109 | "report_date_str = report_date.strftime(\"%Y-%m-%d\")\n",
110 | "\n",
111 | "# Initialize an empty DataFrame\n",
112 | "df = pd.DataFrame()\n",
113 | "\n",
114 | "# Specify the directory paths for input and output\n",
115 | "input_directory = 'data/ecosystem-dashboard-github-event-dumps-from-sql'\n",
116 | "output_directory = f\"{input_directory}/output\"\n",
117 | "\n",
118 | "# List all files in the directory\n",
119 | "files = os.listdir(input_directory)\n",
120 | "\n",
121 | "# Iterate over each file\n",
122 | "for file_name in files:\n",
123 | " # Check if the item is a CSV file\n",
124 | " file_path = os.path.join(input_directory, file_name)\n",
125 | " if os.path.isfile(file_path) and file_path.endswith(\".csv\"):\n",
126 | " # Open the file\n",
127 | " print(f\"Reading {file_name}\")\n",
128 | " temp_df = pd.read_csv(file_path, index_col='github_id')\n",
129 | " print(f\"Read {file_name} with {len(temp_df)} rows\")\n",
130 | " df = pd.concat([df, temp_df])\n",
131 | "\n",
132 | "# Deduplicate the rows based on the index (github_id)\n",
133 | "print(f\"Concatenated size: {len(df)} rows\")\n",
134 | "df = df[~df.index.duplicated(keep='first')]\n",
135 | "print(f\"Deduplicated size: {len(df)} rows\")\n",
136 | "\n",
137 | "# Convert 'created_at' column to datetime format\n",
138 | "df['created_at'] = pd.to_datetime(df['created_at'])\n",
139 | "\n",
140 | "# Add 'Year Month' column in 'YYYYMM' format\n",
141 | "df['year_month'] = df['created_at'].dt.strftime('%Y%m')\n",
142 | "\n",
143 | "# Add 'Year Quarter' column\n",
144 | "df['year_quarter'] = df['created_at'].dt.to_period('Q')\n",
145 | "\n",
146 | "# Create 'repository_name' column by extracting repository name\n",
147 | "df['repository_name'] = df['repository_full_name'].str.split('/').str.get(1)\n",
148 | "\n",
149 | "mapping_to_friendly_event_name = {\n",
150 | " \"IssueCommentEvent-created\" : \"Issue Comment\",\n",
151 | "\t\"IssuesEvent-closed\" : \"Issue Close\",\n",
152 | "\t\"IssuesEvent-opened\" : \"Issue Open\",\n",
153 | "\t\"IssuesEvent-reopened\" : \"Issue Reopen\",\n",
154 | "\t\"PullRequestEvent-closed\" : \"PR Close\",\n",
155 | "\t\"PullRequestEvent-opened\" : \"PR Open\",\n",
156 | "\t\"PullRequestEvent-reopened\" : \"PR Reopen\",\n",
157 | "\t\"PullRequestReviewCommentEvent-created\" : \"PR Comment\",\n",
158 | "\t\"PullRequestReviewEvent-created\" : \"PR Review\",\n",
159 | "\t\"ReleaseEvent-published\" : \"Release Publish\",\n",
160 | "}\n",
161 | "\n",
162 | "# Define a function to add a new column based on row values\n",
163 | "# This is so get human friendly \"event_type\" + \"action\" strings\n",
164 | "def get_friendly_event_name(row):\n",
165 | " # Access values of specific columns in the row\n",
166 | " friendly_name = row['event_type'] + \"-\" + row['action']\n",
167 | " return mapping_to_friendly_event_name.get(friendly_name, friendly_name)\n",
168 | "\n",
169 | "# Apply the function to each row and assign the result to a new column\n",
170 | "df['friendly_event_name'] = df.apply(lambda row: get_friendly_event_name(row), axis=1)\n",
171 | "\n",
172 | "df.to_csv(f\"{output_directory}/github-event-data-cleaned-{report_date_str}.csv\")\n",
173 | "\n",
174 | "# Print the resulting DataFrame\n",
175 | "df"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "summary_table = pd.pivot_table(df.reset_index(), \n",
185 | " index=['org', 'repository_name', 'actor', 'year_quarter', 'year_month', 'friendly_event_name'], \n",
186 | " values='github_id', \n",
187 | " aggfunc='count',\n",
188 | " fill_value=0)\n",
189 | "\n",
190 | "summary_table.rename(columns={'github_id': 'count'}, inplace=True)\n",
191 | "\n",
192 | "summary_table"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": [
201 | "summary_table.to_csv(f\"{output_directory}/github-event-monthly-summary-{report_date_str}.csv\")"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": null,
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "audit_table = pd.pivot_table(df.reset_index(), \n",
211 | " index=['year_month'], \n",
212 | " columns=['org'],\n",
213 | " values='github_id', \n",
214 | " aggfunc='count',\n",
215 | " fill_value=0)\n",
216 | "\n",
217 | "audit_table"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {},
224 | "outputs": [],
225 | "source": [
226 | "audit_table.to_csv(f\"{output_directory}/github-event-monthly-summary-audit-{report_date_str}.csv\")"
227 | ]
228 | }
229 | ],
230 | "metadata": {
231 | "kernelspec": {
232 | "display_name": "base",
233 | "language": "python",
234 | "name": "python3"
235 | },
236 | "language_info": {
237 | "codemirror_mode": {
238 | "name": "ipython",
239 | "version": 3
240 | },
241 | "file_extension": ".py",
242 | "mimetype": "text/x-python",
243 | "name": "python",
244 | "nbconvert_exporter": "python",
245 | "pygments_lexer": "ipython3",
246 | "version": "3.9.2"
247 | },
248 | "orig_nbformat": 4
249 | },
250 | "nbformat": 4,
251 | "nbformat_minor": 2
252 | }
253 |
--------------------------------------------------------------------------------
/notebooks/pl-repositories-dump.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "language_info": {
4 | "codemirror_mode": {
5 | "name": "ipython",
6 | "version": 3
7 | },
8 | "file_extension": ".py",
9 | "mimetype": "text/x-python",
10 | "name": "python",
11 | "nbconvert_exporter": "python",
12 | "pygments_lexer": "ipython3",
13 | "version": "3.9.2"
14 | },
15 | "orig_nbformat": 2,
16 | "kernelspec": {
17 | "name": "python3",
18 | "display_name": "Python 3.9.2 64-bit ('base': conda)"
19 | },
20 | "interpreter": {
21 | "hash": "95ec9ec1504d83f612128e0fb229072f90bbb4cb09d9d5d93b5dd26e0ca2cfd1"
22 | }
23 | },
24 | "nbformat": 4,
25 | "nbformat_minor": 2,
26 | "cells": [
27 | {
28 | "cell_type": "markdown",
29 | "source": [
30 | "# About\n",
31 | "This notebook queries the Ecosystem Dashboard to get a dump of repositories under the various PL GitHub organizations.\n",
32 | "\n",
33 | "The Ecosystem Dashboard is queried because:\n",
34 | "1. Avoids getting throttled by GitHub \n",
35 | "2. Doesn't require any API token setup\n",
36 | "3. Has additional metadata about our repos"
37 | ],
38 | "metadata": {}
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "source": [
44 | "%load_ext autoreload\n",
45 | "%autoreload 2\n",
46 | "\n",
47 | "import pandas as pd\n",
48 | "import datetime\n",
49 | "import ecosystem_dashboard_utils"
50 | ],
51 | "outputs": [],
52 | "metadata": {}
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "source": [
58 | "report_date = datetime.date.today()\n",
59 | "report_date_str = report_date.strftime(\"%Y-%m-%d\")"
60 | ],
61 | "outputs": [],
62 | "metadata": {}
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "source": [
68 | "# Fetch repo data from the Ecosystem Dasbhaord\n",
69 | "ipfs_repos_path = f\"ipfs-repos-{report_date_str}.json\"\n",
70 | "ecosystem_dashboard_utils.dump_api(unpaginated_url=\"https://ipfs.ecosystem-dashboard.com/repositories.json?\", output_path=ipfs_repos_path)\n",
71 | "filecoin_repos_path = f\"filecoin-repos-{report_date_str}.json\"\n",
72 | "ecosystem_dashboard_utils.dump_api(unpaginated_url=\"https://filecoin.ecosystem-dashboard.com/repositories.json?\", output_path=filecoin_repos_path)"
73 | ],
74 | "outputs": [],
75 | "metadata": {}
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "source": [
81 | "# Read the data into Pandas\n",
82 | "df = pd.read_json(ipfs_repos_path, orient='records').append(pd.read_json(filecoin_repos_path, orient='records'), ignore_index=True)\n",
83 | "df = df.set_index(\"github_id\")\n",
84 | "df"
85 | ],
86 | "outputs": [],
87 | "metadata": {}
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "source": [
93 | "# Put the most import columns first\n",
94 | "p1_col_names = [\n",
95 | " 'org',\n",
96 | " 'full_name',\n",
97 | " 'language',\n",
98 | " 'score',\n",
99 | " 'stargazers_count',\n",
100 | " 'forks_count',\n",
101 | " 'subscribers_count',\n",
102 | " 'open_issues_count',\n",
103 | " 'archived',\n",
104 | " 'description',\n",
105 | "]\n",
106 | "p2_col_names = df.columns.to_list()\n",
107 | "for p1_col_name in p1_col_names:\n",
108 | " p2_col_names.remove(p1_col_name)\n",
109 | "\n",
110 | "ordered_col_names = []\n",
111 | "ordered_col_names.extend(p1_col_names)\n",
112 | "ordered_col_names.extend(p2_col_names)\n",
113 | "ordered_col_names"
114 | ],
115 | "outputs": [],
116 | "metadata": {}
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "source": [
122 | "df = df[ordered_col_names]\n",
123 | "df"
124 | ],
125 | "outputs": [],
126 | "metadata": {}
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "source": [
132 | "df = df.sort_values(by=[\"org\", \"language\", \"score\", \"full_name\"], ascending=[True, True, False, True])\n",
133 | "df"
134 | ],
135 | "outputs": [],
136 | "metadata": {}
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "source": [
142 | "df.to_csv(f\"pl-repos-cleaned-{report_date_str}.csv\")"
143 | ],
144 | "outputs": [],
145 | "metadata": {}
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "source": [
150 | "# Additional analsysis example: filter to active JS repos\n",
151 | "Filter down to the list of repos that are JS-based"
152 | ],
153 | "metadata": {}
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "source": [
159 | "js_df = df[((df['language'] == \"JavaScript\") | (df['language'] == \"TypeScript\")) & (df['archived'] != True)]\n",
160 | "js_df"
161 | ],
162 | "outputs": [],
163 | "metadata": {}
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "source": [
169 | "js_df.to_csv(f\"pl-js-active-repos-cleaned-{report_date_str}.csv\")"
170 | ],
171 | "outputs": [],
172 | "metadata": {}
173 | }
174 | ]
175 | }
--------------------------------------------------------------------------------
/notebooks/spec-github-activity-analysis-202210.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Purpose\n",
8 | "This is a notebook for grabbing all the Github event data that the ecosystem dashboard has for our \"spec and improvement proposal\" repos.\n",
9 | "\n",
10 | "This is useful if you want to analyze GitHub actions beyond opening PRs and issues. It will also show comments, PR/issue closing, etc.\n",
11 | "\n",
12 | "It was used as part of the PL EngRes summit to populate the \"Network Native Development\" slide: https://docs.google.com/presentation/d/1dRgEgEpR2htMgyIVXG0fwhBMVwnAsEtXNfvrmzHTqfI/edit#slide=id.g14b7a7f445c_0_476"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "%load_ext autoreload\n",
22 | "%autoreload 2\n",
23 | "\n",
24 | "import pandas as pd\n",
25 | "import datetime\n",
26 | "import ecosystem_dashboard_utils"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "report_date = datetime.date.today()\n",
36 | "report_date_str = report_date.strftime(\"%Y-%m-%d\")\n",
37 | "\n",
38 | "from datetime import date\n",
39 | "analysis_start_date = date(2022, 1, 1) # Adjust for how far back you want to look\n",
40 | "number_of_days = (report_date - analysis_start_date).days"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "# Ecosystem dashboard URLs are generated based on off of these\n",
50 | "repo_configs = [\n",
51 | " {\n",
52 | " \"ecosystem\" : \"ipfs\",\n",
53 | " \"org\" : \"ipfs\",\n",
54 | " \"repo\" : \"specs\"\n",
55 | " }, \n",
56 | " {\n",
57 | " \"ecosystem\" : \"ipfs\",\n",
58 | " \"org\" : \"libp2p\",\n",
59 | " \"repo\" : \"specs\"\n",
60 | " },\n",
61 | " {\n",
62 | " \"ecosystem\" : \"filecoin\",\n",
63 | " \"org\" : \"filecoin-project\",\n",
64 | " \"repo\" : \"FIPs\"\n",
65 | " },\n",
66 | "]"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": null,
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "# The \"payload\" field add a bunch of data that we don't need so we strip it out to make the json more wieldly to consume\n",
76 | "def payload_filter(x): \n",
77 | " del x[\"payload\"]\n",
78 | " return x\n",
79 | "\n",
80 | "for repo_config in repo_configs:\n",
81 | " ecosystem = repo_config[\"ecosystem\"]\n",
82 | " org = repo_config[\"org\"]\n",
83 | " repo = repo_config[\"repo\"]\n",
84 | " events_path = f\"{org}-{repo}-events-{report_date_str}.json\"\n",
85 | " ecosystem_dashboard_utils.dump_api(unpaginated_url=f\"https://{ecosystem}.ecosystem-dashboard.com/events.json?range={number_of_days}&repo_full_name={org}%2F{repo}&\", output_path=events_path, filter=payload_filter, page_size=200)"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "# Read everything back in that was persisted to disk\n",
95 | "\n",
96 | "# report_date_str = \"2022-05-19\"\n",
97 | "df = pd.DataFrame()\n",
98 | "\n",
99 | "for repo_config in repo_configs:\n",
100 | " ecosystem = repo_config[\"ecosystem\"]\n",
101 | " org = repo_config[\"org\"]\n",
102 | " repo = repo_config[\"repo\"]\n",
103 | " events_path = f\"{org}-{repo}-events-{report_date_str}.json\"\n",
104 | " ecosystem_df = pd.read_json(events_path, orient='records')\n",
105 | " ecosystem_df = ecosystem_df.set_index(\"github_id\")\n",
106 | " df = pd.concat([df, ecosystem_df])"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {},
113 | "outputs": [],
114 | "source": [
115 | "# Combine the event and action columns for analysis later.\n",
116 | "df[\"event/action\"] = df.apply(lambda x: x[\"event_type\"] + \"/\" + x[\"action\"] if x[\"action\"] else x[\"event_type\"], axis=1)\n",
117 | "df"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": null,
123 | "metadata": {},
124 | "outputs": [],
125 | "source": [
126 | "# Classify the event/action based on what time of behavior it signals.\n",
127 | "\n",
128 | "event_action_classifications = {\n",
129 | " \"IssuesEvent/opened\" : \"issue_engagement\",\n",
130 | " \"IssueCommentEvent/created\" : \"issue_engagement\",\n",
131 | " \"PullRequestReviewCommentEvent/created\" : \"code_review_engagement\",\n",
132 | " \"PullRequestReviewEvent/created\" : \"code_review_engagement\",\n",
133 | " \"PullRequestEvent/opened\" : \"code_creation\",\n",
134 | " \"PushEvent\" : \"code_creation\",\n",
135 | "}\n",
136 | "df[\"event_action_classification\"] = df[\"event/action\"].map(event_action_classifications)\n",
137 | "df"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "# Sort the column names.\n",
147 | "p1_col_names = [\n",
148 | " 'org',\n",
149 | " 'repository_full_name',\n",
150 | " 'actor',\n",
151 | " 'event/action',\n",
152 | " \"event_action_classification\"\n",
153 | "]\n",
154 | "p2_col_names = df.columns.to_list()\n",
155 | "for p1_col_name in p1_col_names:\n",
156 | " p2_col_names.remove(p1_col_name)\n",
157 | "\n",
158 | "ordered_col_names = []\n",
159 | "ordered_col_names.extend(p1_col_names)\n",
160 | "ordered_col_names.extend(p2_col_names)\n",
161 | "ordered_col_names"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "df = df[ordered_col_names]\n",
171 | "df"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "# Get a summary of the number of \"actors\" for a given type of activity.\n",
181 | "\n",
182 | "# https://stackoverflow.com/questions/12860421/how-to-aggregate-unique-count-with-pandas-pivot-table\n",
183 | "table = pd.pivot_table(df, values='actor', index=[\"repository_full_name\", \"event_action_classification\"], aggfunc=pd.Series.nunique, fill_value=0)\n",
184 | "table"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "# Collect stats on PRs opened and closed\n",
194 | "\n",
195 | "table = pd.pivot_table(df.loc[df['event_type'] == \"PullRequestEvent\"], values='id', index=[\"repository_full_name\", \"event/action\"], aggfunc=\"count\", fill_value=0)\n",
196 | "table"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": null,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "# Useful debugging for what kind of event/actions are most popular.\n",
206 | "\n",
207 | "table = pd.pivot_table(df, values='id', index=['event/action'], aggfunc=\"count\", fill_value=0)\n",
208 | "table"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {},
215 | "outputs": [],
216 | "source": [
217 | "# Inspect how much activity indvidauls are having.\n",
218 | "table = pd.pivot_table(df, values='id', index=['actor'], aggfunc=\"count\", fill_value=0)\n",
219 | "table.sort_values(\"id\")"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": [
228 | "# Dump the event data for additional analysis\n",
229 | "df.to_csv(f\"spec-github-activity-events-combined-cleaned-{report_date_str}.csv\")"
230 | ]
231 | },
232 | {
233 | "cell_type": "markdown",
234 | "metadata": {},
235 | "source": [
236 | "# Output\n",
237 | "You now have tabular data for each event, which makes it easy to create pivot tables to summarize how many actions a given user took."
238 | ]
239 | }
240 | ],
241 | "metadata": {
242 | "interpreter": {
243 | "hash": "c6e4e9f98eb68ad3b7c296f83d20e6de614cb42e90992a65aa266555a3137d0d"
244 | },
245 | "kernelspec": {
246 | "display_name": "Python 3.9.2 ('base')",
247 | "language": "python",
248 | "name": "python3"
249 | },
250 | "language_info": {
251 | "codemirror_mode": {
252 | "name": "ipython",
253 | "version": 3
254 | },
255 | "file_extension": ".py",
256 | "mimetype": "text/x-python",
257 | "name": "python",
258 | "nbconvert_exporter": "python",
259 | "pygments_lexer": "ipython3",
260 | "version": "3.9.2"
261 | },
262 | "orig_nbformat": 2
263 | },
264 | "nbformat": 4,
265 | "nbformat_minor": 2
266 | }
267 |
--------------------------------------------------------------------------------
/proposals/112-car-v2.md:
--------------------------------------------------------------------------------
1 | # CAR v2
2 |
3 | Authors: @willscott
4 |
5 | ## What is the problem this project solves?
6 | _Describe the status quo, including any relevant context on the problem you're seeing that this project should solve. Who is the user you're solving for, and why do they care about this problem? Wherever possible, include pain points or problems that you've seen users experience to help motivate why solving this problem works towards top-line objectives._
7 |
8 | This project implements a second version of the Content addressed data ARchive format.
9 | The goal is to provide a standardized way to take a collection of content addressed data, as currently exists in the car format, and be able to efficiently provide random access reads and appends to it.
10 | We know through the carbs and carbon experiments that both of these are possible.
11 |
12 | With this functionality we can avoid the need to take an existing car file and import it back into a blockstore like badger before being able to perform random access reads over it.
13 |
14 | ## Impact
15 | _What goals/OKRs are being addressed (for w3dt, a specific program, etc.)? Why is this project important? What do we get with this project that we can't get without it?_
16 |
17 | This project aims to impact the goals of:
18 | - low latency retrieval from filecoin
19 |
20 | These are core goals that the Bedrock program aims to address.
21 |
22 | ## The idea
23 | _Describe the proposed project solution, at a very high level. Stay at the level of the high-level requirements. Diagrams and interface descriptions can be useful, if you have any that help clarify and explain the idea._
24 |
25 | As described in the [sharded dag store](https://docs.google.com/document/d/118fJdf8HGK8tHYOCQfMVURW9gTF738D0weX-hbG-BvY/edit?ts=60bf961f) design document we want a minimal format for carv2 consisting of
26 | - a magic byte sequence to cause previous car1 libraries to properly warn they aren't using a compatible version
27 | - a fixed length header indicating features and offsets
28 | - the exact bytes currently in a carv1
29 | - an index compatible with carbs
30 |
31 | A more complete spec is described [here](https://github.com/ipld/specs/pull/248#issuecomment-833141588)
32 |
33 | ## Success/acceptance criteria (optional)
34 | _How do we know we're done with this project? How do we know we're successful? This field is OPTIONAL for the first draft of an MPP. Sometimes this field needs to be filled out once we have more detail on the shape of the actual solution._
35 |
36 | Initial success/acceptance criteria:
37 | - Can read and write carv2 bytes on disk as described
38 | - provides an efficient read/write blockstore interface
39 | - Can export the index of a carv2 to bytes for separate storage
40 | - Can extract the carv1 bytes from a carv2
41 | - Can combine a carv1 and a spearate index to get carv2 equivalent behavior (and to write to a carv2 if desired)
42 |
43 |
44 | ## Detailed plans (optional)
45 | _Link to more detailed project plans, e.g. product requirements documents (PRDs) and technical design docs, once they have been created for this project._
46 |
47 | Ongoing work is in the [wip/v2](https://github.com/ipld/go-car/tree/wip/v2) branch of go-car.
48 |
49 | ## Program (optional)
50 | _If this project is part of a program, please indicate the relevant program here._
51 |
52 | This project is part of the golden path. It most directly affects **Bedrock** program OKRs.
53 |
--------------------------------------------------------------------------------
/proposals/116-car-native-dag-store.md:
--------------------------------------------------------------------------------
1 | # CAR-native DAG store
2 |
3 | Authors: @raulk
4 |
5 | ## What is the problem this project solves?
6 |
7 | _Describe the status quo, including any relevant context on the problem you're seeing that this project should solve. Who is the user you're solving for, and why do they care about this problem? Wherever possible, include pain points or problems that you've seen users experience to help motivate why solving this problem works towards top-line objectives._
8 |
9 | In Lotus, the monolith Badger blockstore is a point of contention during deal making. Inbound data transfers are placed in the Badger store, and outbound data transfers are staged in the badger store from their unsealed copies. Other processes such as commP calculation also feed off the Badger store. In reality, the only reason that Badger is used is to provide random access to IPLD DAGs. Unfortunately Badger does not scale well for this use case beyond 100s GiB, thus becoming a bottleneck and fragile element in the deal-making process.
10 |
11 | The purpose of the CARv2 + DAG store endeavour is to eliminate overhead from the deal-making processes with the mission of unlocking scalability, performance, and resource frugality on both miner and client side within the Filecoin network.
12 |
13 | ## Impact
14 | _What goals/OKRs are being addressed (for w3dt, a specific program, etc.)? Why is this project important? What do we get with this project that we can't get without it?_
15 |
16 | As the volume and frequency of deals increases in the networks, miners need to be able to handle those volumes, otherwise the foundation is shaky. Badger is currently a weak link that introduces enormous overhead. Eliminating this buffering will make deal-making more efficient, stable, and performant.
17 |
18 | ## The idea
19 | _Describe the proposed project solution, at a very high level. Stay at the level of the high-level requirements. Diagrams and interface descriptions can be useful, if you have any that help clarify and explain the idea._
20 |
21 | Refer to https://github.com/filecoin-project/dagstore/pull/2.
22 |
23 | ## Success/acceptance criteria (optional)
24 | _How do we know we're done with this project? How do we know we're successful? This field is OPTIONAL for the first draft of an MPP. Sometimes this field needs to be filled out once we have more detail on the shape of the actual solution._
25 |
26 | DAG store is implemented and integrated in Lotus.
27 |
28 | ## Detailed plans (optional)
29 | _Link to more detailed project plans, e.g. product requirements documents (PRDs) and technical design docs, once they have been created for this project._
30 |
31 | Technical architecture doc: https://docs.google.com/document/d/118fJdf8HGK8tHYOCQfMVURW9gTF738D0weX-hbG-BvY/edit#.
32 | Technical design: https://github.com/filecoin-project/dagstore/pull/2.
33 | Technical plan: https://linear.app/protocol/issue/W3D-6/
34 |
35 | ## Program (optional)
36 |
37 | Bedrock M1.
38 |
--------------------------------------------------------------------------------
/proposals/15-message-sending-ux-in-lotus.md:
--------------------------------------------------------------------------------
1 | # Message Sending User Experience in Lotus
2 |
3 | Authors: @Kubuxu
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/15
6 |
7 |
8 | ## Purpose & impact
9 | #### Background & intent
10 |
11 |
14 |
15 | Wide range of users interacting with Filecoin network through Lotus stumble, again and again, across current experience of sending messages. There are multiple caveats to actively interacting with the chain.
16 |
17 | Some of them are:
18 | - sending messages with unsynced node
19 | - sending messages with too low fees given current network conditions
20 | - having too little funds to send given message
21 | - head of the line blocking on previous messages
22 |
23 | Many of the issues that users hit while trying to interact with the chain stem from the fact that Lotus CLI is lacking interactive feedback. The CLI does not warn the user when these situations happen, and if it does the only resolution available right now is failing (and expose flag like `--force` for advanced use-cases). This approach only increases user confusion as its not clear for them how to proceed.
24 |
25 | The aim of the project is to make actively interacting with Filecoin network through Lotus a delightful experience by removing the multitude of sharp corners awaiting the user.
26 |
27 |
28 | #### Assumptions & hypotheses
29 |
30 |
31 | - Lotus is used directly to interact with the chain. This will be the case until applications that fulfill roles of wallets, storage clients, retrieval clients are built. Even still application developers will likely use Lotus directly.
32 | - Sharp edges of Lotus in this area are causing users to abandon/reduce their involvement.
33 | -
34 | #### User workflow example
35 |
36 |
37 |
38 | _All of the below are reported issues with message sending UX._
39 |
40 | - Sending message from (partially) unsynced node, resulting in wrongly estimated gas fees.
41 | - **Solution**: warn the user that node is not in sync, ask for confirmation to send the message
42 | - Default `MaxFee` configuration option results in message fees being too low, resulting in message not landing on chain:
43 | - **Solution**: for interactive usage use the `MaxFee` as guideline. If message wouldn't immediately land on chain due to it (or would be at a risk of being squeezed out in short time), suggest to the user alternative fee. The alternative fee can be accepted, refused or modified. If low fee is specified, estimate the time message will need to land.
44 | - Previous messages are blocking new messages due to changing network conditions or insufficient funds.
45 | - **Solution part 1**: Warn users when new messages are submitted and ask for confirmation that user wants to proceed.
46 | - **Solution part 2**: Build an interactive tool for observing state of messages, allowing the user to reprice them, discard them and showing the user what is the exact issue with them (head of the line blocking, insufficient funds, too low fee)
47 |
48 | #### Impact
49 |
50 |
51 | 🔥🔥
52 | This project has potential to decrease attrition of new users that are starting to experiment with Filecoin, while at the same time improving the experience of existing users.
53 |
54 |
58 |
59 | #### Leverage
60 | \_How much would nailing this project improve our knowledge and ability to execute future projects?\_
61 |
62 | 🎯
63 |
64 | This project develops features needed by other functionalities.
65 |
66 |
67 |
69 |
70 | #### Confidence
71 | \_How sure are we that this impact would be realized? Label from \[this scale\](https://medium.com/@nimay/inside-pro1duct-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)\_.
72 |
73 |
74 |
75 | ## Project definition
76 | #### Brief plan of attack
77 |
78 |
79 | - Assemble a prioritized list of message sending issues users have encountered from Lotus issue tracker, user reports, Slack.
80 | - Fix issues that can be resolved with detect and warn/fail approach.
81 | - Design and implement interactive message sending UI.
82 | - Design and implement interactive message watching UI - it should fulfill following roles for the user.
83 | - Clear indication of whether the message will land on chain in short time
84 | - Clear indication of past messages, number of confirmations, exit status.
85 | - Provide a way to resolve "struck" messages, interactive functionality of `mpool replace`.
86 |
87 | #### What does done look like?
88 |
89 | Issues identified in the first step are resolved, users are protected against issues mentioned in this document.
90 |
91 | #### What does success look like?
92 |
93 | Number of issues and complaints by users and end users regarding message related interactions with Filecoin is reduced. Message sending related problems are no longer mentioned when on-boarding new users.
94 |
97 |
98 | #### Counterpoints & pre-mortem
99 |
100 | - Lotus CLI is part of the interface for user interaction but also for scripting, agreeing both of these world can be problematic but can be done.
101 | - Lotus CLI can stop being the primary way developers/users actively interact with Filecoin. It is almost sure in the long term but in short to medium term it is very much unlikely.
102 |
103 | #### Alternatives
104 |
105 | - Building a separate Wallet + Storage and Retrieval client interface. This will happen in medium-term but it will not stop Louts CLI from being used buy both new and old users.
106 | -
107 |
108 | #### Dependencies/prerequisites
109 |
110 | None.
111 |
112 | #### Future opportunities
113 |
114 | - "Bring your own signature" workflow in Louts - it needs similar facilities
115 | - Message Pricing Policies - allow users to specify fee settings per message type or user defined tag instead of one global setting.
116 | - Split of Porcelain and Plumbing APIs in Lotus
117 |
118 | ## Required resources
119 |
120 | #### Effort estimate
121 |
129 | Medium with some variance depending on semi-unknown complexity of modifying Lotus to allow user interactions for messages sent internally in the API
130 |
131 |
132 | #### Roles / skills needed
133 |
134 | - 2 x engineers with knowledge of Lotus
135 | - 0.25x PM
136 |
--------------------------------------------------------------------------------
/proposals/42-human-readable-mutable-names.md:
--------------------------------------------------------------------------------
1 | # Configurable DNS resolvers for human-readable mutable names
2 |
3 | Authors: @lidel
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/42
6 | ## Purpose & impact
7 |
8 | Improve the way mutable names work in our stack.
9 |
10 | - Empower developers with reliable primitive for publishing updates on human-readable names
11 | - Future-proof the way we do human readable names via DNS interop
12 | - Reduce privacy risks related to DNS leaks in plaintext
13 |
14 | #### Background & intent
15 | _Describe the desired state of the world after this project? Why does that matter?_
16 |
19 |
20 | We see CID, IPNS record, and DNSLink being de-facto standard for cryptographically-verifiable content-addressing and human-readable names on the distributed web.
21 |
22 | There are three distinct types of addresses:
23 | - immutable content: `/ipfs/{cid}`
24 | - mutable content
25 | - cryptographic, IPNS record signed with a specific libp2p key: `/ipns/{libp2p-key-cid}`
26 | - human-readable, [DNSLink](https://dnslink.io): `/ipns/{dns-name}`
27 | (which in turn, resolves to either an immutable or mutable content path from DNS TXT record)
28 |
29 |
30 | **There are known gaps in the mutable story:**
31 | - IPNS records are only published by the original author, which severely limits the utility of our mutable names
32 | - It is not possible to pin/republish `/ipns/{libp2p-key-cid}` names by other peers, so when original publisher disappears, IPNS links no longer resolve and we see "IPNS rot".
33 | - **Due to IPNS rot people simply don't use it:**
34 | - Instead publishing updates via `ipfs name publish {cid}`, website operators choose to constantly update DNSLink's DNS TXT record
35 | - They point at reliable immutable paths (`dnslink=/ipfs/{cid}`) (no IPNS record means it resolves fast and never break).
36 | - It works around the need for running a service responsible for constant republishing of IPNS record.
37 | - This is a pragmatic choice, however:
38 | - adds admin related to DNS updates (need to learn DNS provider's API for automating this, which introduces soft vendor-lock-in)
39 | - most likely not the best choice for security-sensitive use cases where human-readable names are not required: means DNS is used in contexts where cryptographic addressing should be used instead.
40 | - **OS-level DNS resolver is used for all DNS TXT record lookups.**
41 | - IPFS node is unable to control which DNS resolver is used for specific TLD, all queries go to global DNS server configured in the operating system
42 | - This harms competition and slows down innovation in the decentralized naming space. For example, if user changes their DNS resolver to one that is capable of resolving all TLDs over ENS, they are unable to resolve TLDs from OpenNIC or UnstoppableDomains.
43 | - Given how DNS is implemented in OS and ISPs, queries are most likely sent in plain text and can be spoofed in LAN or MITMd at the ISP level. At the time of writing this proposal DNS over HTTPS is not supported, and it is not trivial for regular user to set up a custom DNS server to change this behavior.
44 |
45 | **TLDR:** **We need to fix and future-proof the way mutable names work in our stack:**
46 | - Make IPNS useful on its own, and inside of DNSLink records.
47 | - DNS TXT record should be set only once, updates should be published over IPNS.
48 | - This is out of the scope of this proposal, for IPNS improvements see [Proposal #19: Proposal: Reliable Mutability Primitive](https://github.com/protocol/web3-dev-team/pull/19)
49 | - Future-proof the way we do human readable names by
50 | - leveraging DNS protocol (not the DNS network run by ICANN) for interop with existing and future user agents and naming systems
51 | - allowing flexible configuration to improve security (DoH) and interop (DNSLink)
52 |
53 |
54 |
55 |
56 | #### Assumptions & hypotheses
57 | _What must be true for this project to matter?_
58 |
59 |
60 | - **NFTs require dedicated, user-friendly dapps .**
61 | - Browser is the main distribution channel for dapps and NFTs.
62 | - **Dapps are simply a subset of all static websites loaded from IPFS.**
63 | - Most of websites hosted on IPFS have DNSLink set up for human-readable name.
64 | - We have mainstream browsers like Opera and [Brave](https://brave.com/ipfs-support/) ship support for `ipfs://` and `ipns://` URIs.
65 | - Human readable names like `ipns://en.wikipedia-on-ipfs.org` work thanks to DNSLink
66 | - This removes huge UX friction for dapp developers.
67 | - Vendors like Brave are committed to [surfacing DNSLink supportin the UI](https://github.com/brave/brave-browser/issues/13609) which will improve onboarding even further.
68 | - Alternatives to [ICANN](https://en.wikipedia.org/wiki/ICANN) exist, and we want them to work with our stack to decrease dependency on a single organization.
69 | - [ENS](https://ens.domains) provides custom TLDs (`.eth`) and leverage DNSLink for interop with IPFS and delegated lookups with clients that are unable to run own blockchain resolver.
70 | - Updating `ipfs-ns` `contenthash` is a chore and costs extra (gas etc).
71 | - Using `ipns-ns` means setting `contenthash` only once, introduces cost savings and simplifies publishing.
72 | - We see multiple actors in the space providing either own TLDs or attempting to replace ICANN as the top-level authority: https://unstoppabledomains.com , http://opennic.org, https://handshake.org etc.
73 | - Brave is looking into using DNS over HTTPS to resolve DNSLin for non-ICANN TLDs like ENS (resolve `*.eth` via `https://eth.link/dns-query` etc)
74 | - We want to enable innovation in the decentralized naming space. This means removing ourselves as gatekeepers of what a valid domain name is.
75 | - When embedded in user agent (Brave) we want to follow user choices regarding DNS resolution.
76 |
77 |
78 |
79 |
80 | #### User workflow example
81 | _How would a developer or user use this new capability?_
82 |
83 |
84 | - Opening `ipns://mydapp.tld` "just works" in Brave
85 | - go-ipfs ships with implicit resolvers for non-ICANN naming systems (where feasible)
86 | - OS-level resolver as the default
87 | - https://eth.link/dns-query for `*.eth`
88 | - TBD for UnstoppableDomains
89 | - User or user agent is able to override implicit resolver for all or specific TLDs
90 | - use encrypted DoH resolver for all DNS lookups
91 | (eg. for privacy reasons, or lack of trust for OS-level resolver from ISP)
92 | - run local Ethereum client and set up IPNS node to resolve ENS natively via localhost resolver (removing the need for trusting eth.link)
93 |
94 |
95 |
96 |
97 | #### Impact
98 | _How would this directly contribute to web3 dev stack product-market fit?_
99 |
100 |
104 |
105 | - Human-readable naming removes huge DX/UX friction for multiple stakeholders:
106 | - Dapp developers can focus on dev instead of onboarding story
107 | - Dapp marketing/onboarding collapses to:
108 | > Install Brave or Opera and open ipns://mydapp.eth
109 | - Alternative naming systems have way lower barier of entry
110 | - Interop story with the old web and the entire IPFS ecosystem collapses to "repeat https://eth.link story":
111 | > Expose DNS endpoint that returns A (gateway) and TXT (DNSLink) records
112 | - One-liner for setting up a custom DNSLink resolver for specific TLD removes adoption barriers and improves onboarding for new naming systems, as those are no longer blocked by the lack of IPFS stewards blessing.
113 | - Browser vendors have more confidence in running IPFS node
114 | - Vendors like Brave are able to provide unified UI for changing name resolution settings in a single place, without fear that IPFS node will ignore user's choice and be responsible for any privacy leaks
115 | - IPFS involvement is no longer needed when browser vendor wants to support new naming system
116 |
117 |
118 |
119 |
120 | #### Leverage
121 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
122 |
123 |
126 |
127 | - NFT dapp developers need marketable names and a clear user onboarding story.
128 | - Brave wants to support alternatives to ICANN out of the box.
129 | - By having resolver configurable per TLD namespace, we will be able to integrate with their resolution logic, providing seamless experience on `ipns://` out-of-the-box, acting as a template for others vendors who want to follow.
130 | - Every new project can experiment with IPFS via DNSLink integration without our involvement.
131 | - We no longer need to say "no" just to keep our code base small or worry about optics of "picking winners and losers".
132 | - DNSLink provides interop for human-readable names and enables independence from PKI and ICANN.
133 | - Just Works (TM)
134 | - No vendor-specific client is included.
135 | - No proprietary APIs or formats. RFC-compliant DNS only.
136 | - We promote competition, letting the best solution win.
137 | - User agency is respected.
138 |
139 |
140 |
141 | #### Confidence
142 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
143 |
144 | Medium.
145 |
146 |
147 |
148 | - After five years we see DNSLink being the only viable solution for human-readable names.
149 | - ENS works out of the box on every IPFS Gateway and in Brave thanks to DNSLink interop.
150 | - Cloudflare embraced DNSlink years ago, and they took over [the eth.link ENS resolver/gateway](https://blog.cloudflare.com/cloudflare-distributed-web-resolver/).
151 | - `ipns://` in address bar of Brave is an extremely powerful visual and a confidence booster about entire stack
152 | - We see alternatives to ENS popping up, but enabling them requires changes to our code bases and is not a trivial task.
153 | - Brave confirmed they will use DNS over HTTPS and expects go-ipfs to provide configuration option per TLD.
154 |
155 | ## Project definition
156 | #### Brief plan of attack
157 |
158 |
159 |
160 | - Adding DNS over HTTPS support to entire stack
161 | - Make if possible to configura different DNS resolver per TLD
162 | - Decide on short list of mature systems that don't collide with ICANN TLDs and can be included as implicit defaults
163 |
164 | #### What does done look like?
165 | _What specific deliverables should completed to consider this project done?_
166 |
167 | - DNS over HTTPS is supported in js/go-ipfs/libp2p
168 | - User agency around DNS is respected: every code path doing a DNS lookup (resolving DNSLink, `/dnsaddr`, `/dns*` multiaddrs) should use either explicit or implicit DNS resolvers defined in configuration file / constructor params.
169 | - By default OS-level DNS resolver is used as catch-all, but the user can delegate resolution of all or specific TLDs to specific DoH resolvers.
170 | - Node tries to resolve with the most specific resolver (if present for TLD), and then fall back to the global one.
171 | - Visual Aid: a config mock-up for go-ipfs
172 | ```json
173 | "DNS": {
174 | "Resolvers": {
175 | "*.eth": "https://eth.link-or-cloudflare/path/to/dns-query",
176 | "*.crypto": "https://unstoppablesomething.example.com/path/to/dns-query",
177 | "*.libre": "https://www.opennic.org/path/to/their/dns-query",
178 | "*": "https://mozilla.cloudflare-dns.com/dns-query"
179 | }
180 | }
181 | ```
182 | - DNSLink provides interop for human-readable names and enables independence from ICANN.
183 | - Anyone can set up and run own naming system without our involvement, blessing or endorsement.
184 | - No vendor-specific client is included.
185 |
186 |
187 | #### What does success look like?
188 | _Success means impact. How will we know we did the right thing?_
189 |
190 |
193 |
194 | - Brave ships support for more than one alternative TLD backed by local IPFS node
195 | - We see % of requests to non-ICANN TLDs going up (Brave / dweb.link)
196 | - We no longer see requests for supporting new naming systems
197 | - We no longer see issues filled about our stack leaking DNS names in plaintext
198 |
199 | #### Counterpoints & pre-mortem
200 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
201 |
202 | - Mainstream browser vendors may decide supporting alternative TLDs is not worth future headache when ICANN decides to sell same name.
203 | - Per TLDs [DNS over HTTPS](https://en.wikipedia.org/wiki/DNS_over_HTTPS) resolver may be not enough to mitigate concerns around centralized nature of ICANN's DNS, namely most of users or user agents pointing their nodes at DoH endpoint from Google or Cloudflare.
204 | - This is necessary gruntwork, but we may not see DNSLink adoption going up until we provide truly decentralized solution (see Future opportunities).
205 |
206 |
207 | #### Alternatives
208 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
209 |
210 | - Deprecate IPNS.
211 | - Announce ENS being the official solution for DNS names and replace every DNS lookup to go via ENS.
212 | - Do nothing and pay the opportunity cost when a better alternative to ENS appears but can't be used with DNSLink due to non-ICANN TLDs.
213 |
214 | #### Dependencies/prerequisites
215 |
216 |
217 | - Remove hardcoding of `.eth` → `.eth.link` from go-ipfs
218 | - DNS over HTTPS support in go/js-ipfs and go-/js-libp2p
219 |
220 |
221 | #### Future opportunities
222 |
223 |
224 | - User agents like Brave looking into deeper integration of IPFS into their UI and network stack
225 | - Every Dapp with DNSLink could get [Open from IPFS](https://github.com/brave/brave-browser/issues/13609) badge etc
226 | - Collaborations with various naming projects to provide DNSLink gateway (similar to https://eth.link) and DNS over HTTPS endpoint for resolving them securely in user agents and JS in browsers.
227 | - Supporting competition, providing venue for rapid adoption and user onboarding, seeing what sticks.
228 | - Implement [content routing hint via DNS records](https://github.com/ipfs/go-ipfs/issues/6516) and act on it with more confidence
229 | - Look into replacing semi-centralized DoH endpoints with P2P swarm resolvers and quorum acceptance criteria
230 | - We should look into a way to harden DNS resolution without introducing any dependency on any complex public PKI (DNSLink should not require DNSSEC).
231 | - Instead of delegating DNS lookup to some DoH endpoint, leverage swarm of peers to increase trust in DNS lookup results.
232 | - Ask a subset of peers (from different networks, jurisdictions etc) to resolve name for you, and pick record passing some quorum criteria.
233 | - End-to-end website publishing and persistence with our stack
234 | - Pinning Services support for DNSLink update as part of pinning
235 | - Updating CID for a pin named `mysite.example.eth` triggers DNSlink update (or IPNS update)
236 | - Leveraging DNSLink for petnames in private swarms
237 |
238 |
239 | ## Required resources
240 |
241 | #### Effort estimate
242 |
250 |
251 | - Medium: 1-3 weeks design/prototyping, 1-2 weeks implementation
252 |
253 | #### Roles / skills needed
254 |
255 |
256 | - js-ipfs/libp2p dev
257 | - go-ipfs/libp2p dev
258 | - DNSlink / DNS / DoH expertise
259 |
--------------------------------------------------------------------------------
/proposals/62-nft-storage-for-nft-hack.md:
--------------------------------------------------------------------------------
1 | # `nft.storage` for [NFTHack](https://nfthack.ethglobal.co/)
2 |
3 | Authors: [@alanshaw](https://github.com/alanshaw)
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/62
6 |
7 |
11 |
19 |
22 |
23 | This is a proposal for time limited storage of NFT data on IPFS, backed by Filecoin and provided **free** to NFTHack participants during the hackathon (March 19 2021).
24 |
25 | * Register `nft.storage` (or other cool domain name) and expose a [pinning service API](https://blog.ipfs.io/2021-02-19-go-ipfs-0-8-0/#remote-pinning-services).
26 |
27 | This will either be a proxy to [Pinata](https://pinata.cloud/) (or other pinning service) or use PL's own [pinbot](https://twitter.com/ipfspin).
28 |
29 | This gives us space to experiment. Data storage and retrieval is de-risked by the service we pin the data to. It affords us a playground with "_real_" data that we can store on Filecoin.
30 |
31 | Here's some of the AWESOME things we could build and deploy in this space:
32 |
33 | * Create a library that automatically makes deals on Filecoin for the pinned data (use/adapt the dealbot?). This is mentioned in the [AWS Facade proposal](https://github.com/protocol/web3-dev-team/pull/34) and could be re-used there.
34 |
35 | * Implement a [deal batching service](https://github.com/protocol/web3-dev-team/pull/60) prototype for increased successful deal probability.
36 |
37 | * Run a "[Free Retrieval via IPFS](https://github.com/protocol/web3-dev-team/pull/52) Lotus node" that `nft.storage` can make deals with and that will expose the NFT data to IPFS.
38 |
39 | * Build and run a "[Retrieval from Filecoin](https://github.com/protocol/web3-dev-team/pull/57) IPFS node" that will allow NFT data we store in deals to be pulled directly from Filecoin and available via IPFS.
40 |
41 | * Create a single page website at `nft.storage` explaining how to use the service with registration for API keys.
42 |
43 | * Implement the [remote pinning service in js-ipfs](https://github.com/protocol/web3-dev-team/pull/58) so that it can be used in web based NFT hacks.
44 |
45 | If this is not ready on time, folks can always use the Pinata API directly. Obviously, for non-web based hacks users can run a go-ipfs node, configured to use `nft.storage` as their remote pinning service.
46 |
47 | Essentially, everything after building a simple pinning service API and website is a bonus and won't effect QoS for NFTHack participants negatively.
48 |
49 | It gives us purpose, a (albeit soft) deadline and a safe area for building out and deploying project proposals _in production_ that directly address PMF issues like deal flow and Filecoin ↔️ IPFS interop.
50 |
51 | As an added bonus, we'll be dogfooding our own tech and ideas for greater understanding and appreciation of any difficulties. It also will help to validate assumptions made in our project proposals.
52 |
53 | ## Purpose & impact
54 | #### Background & intent
55 | _Describe the desired state of the world after this project? Why does that matter?_
56 |
59 |
60 | A remote pinning service API and a simple website for information and registration will exist for use by NFTHack participants to store NFT data for free for a limited time.
61 |
62 | This will cement IPFS as the primary means of off-chain NFT storage and will raise awareness of Filecoin as a storage provider.
63 |
64 | Currently a lot of NFT data is stored on IPFS with no clear story for permenance. Leveraging the remote pinning service API and providing guarantees of availability for the duration of the hack will prompt developers building NFT related software to think about permenance and who provides it. It will present Filecoin as an option for consideration and will likely drive some traffic to pinning services like Pinata.
65 |
66 | #### Assumptions & hypotheses
67 | _What must be true for this project to matter?_
68 |
69 |
70 | * NFT users want to store NFT data on IPFS.
71 | * NFT users want guarantees of data availability.
72 | * NFT platforms care about their end users being able to take control of their data.
73 | * e.g. An artist would be able to retrieve their data even if X app went away tomorrow.
74 |
75 | #### User workflow example
76 | _How would a developer or user use this new capability?_
77 |
78 |
79 | * Register an API token on `nft.storage`.
80 | * Run IPFS with remote pinning service configured as `nft.storage`.
81 | * Pin data to IPFS node: it is stored for e.g. 1 year on `nft.storage`.
82 |
83 | #### Impact
84 | _How would this directly contribute to web3 dev stack product-market fit?_
85 |
86 |
90 |
91 | * This primarily exposes and builds on the status quo of using IPFS to store NFT data. It roughly equates to advertising for IPFS and Filecoin
92 | * Secondarily, it gives us purpose, a (albeit soft) deadline and a safe area for building out and deploying project proposals _in production_ that directly address PMF issues like deal flow and Filecoin ↔️ IPFS interop.
93 |
94 | #### Leverage
95 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
96 |
97 |
100 |
101 | We may see an uptick in the number of developers working with NFTs that use IPFS for storage.
102 |
103 | The component part of actually storing the NFT data on Filecoin has invaluable potential to inform us on real world usage of the API's involved with making a deal. It should enable us to more confidently build solutions knowing they are the right thing to build.
104 |
105 | One other potential future for this project is to extend its lifetime so that we continue to receive NFT data for storage, this would allow us to measure our ability to make deals on the Filecoin network over time and make optimizations for the size and structure of NFT data.
106 |
107 | #### Confidence
108 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
109 |
110 |
111 |
112 | Med-Low
113 |
114 | We do not know if any NFTHack participants will even use the service.
115 |
116 | ## Project definition
117 | #### Brief plan of attack
118 |
119 |
120 |
121 | See overview at top of page.
122 |
123 | #### What does done look like?
124 | _What specific deliverables should completed to consider this project done?_
125 |
126 | * A remote pinning API is available at `nft.storage` that pins to the chosen pinning service.
127 | * It exposes a feed of pinned CIDs (to be used for persisting NFT data to Filecoin).
128 | * A service runs that consumes the pinned CIDs and attempts to store them on Filecoin.
129 | * A beautiful and engaging website exists.
130 | * Has information on how to configure go-ipfs/js-ipfs to use `nft.storage` as the remote pinning service.
131 | * Has functionality for registration, login and API key generation.
132 | * Draws attention to data being backed by Filecoin.
133 | * Explicitly outlines period for free storage and other rules.
134 |
135 | #### What does success look like?
136 | _Success means impact. How will we know we did the right thing?_
137 |
138 |
141 |
142 | * \>25% of NFTHack participants use the service for persisting their NFT data.
143 | * \>50% of NFT data submitted during the hackathon is also stored on Filecoin.
144 | * Increased adoption of the remote pinning API in developer applications.
145 |
146 | #### Counterpoints & pre-mortem
147 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
148 |
149 | There is not a lot of time before the hackathon 😬.
150 |
151 | #### Alternatives
152 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
153 |
154 | #### Dependencies/prerequisites
155 |
156 |
157 | #### Future opportunities
158 |
159 |
160 | * Extend it as a paid for service and gift it to a pinning service to run/maintain?
161 | * Add community funding to keep a CID alive. Any member of the community can chose to contribute to the hosting cost for a given CID, and you can see how many years it has on clock.
162 |
163 | ## Required resources
164 |
165 | #### Effort estimate
166 |
174 |
175 | Medium
176 |
177 | #### Roles / skills needed
178 |
179 |
180 | * Frontend Engineer
181 | * Go Engineer x2
182 | * Web Designer
183 |
184 | Resource from other w3dt teams to implement project proposals described above:
185 |
186 | * Sudo
187 | * Datasystems
188 |
--------------------------------------------------------------------------------
/proposals/79-typescript-definitions.md:
--------------------------------------------------------------------------------
1 | # TypeScript Definitions for core libraries
2 |
3 | Authors: @rvagg
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/79
6 |
7 | ## Purpose & impact
8 |
9 | Consumers of our JavaScript libraries & components should have sufficient TypeScript definitions, available through standard means, to write fully typed TypeScript code, or use that code to drive tooling that consumes these definitions - such as VS Code editing enhancements, documentation production pipelines, type checking tooling for test & CI pipelines.
10 |
11 | This work is high-value and high-impact for the JavaScript ecosystem and those of us working on open source JavaScript libraries can all provide anecdotal evidence for the frequency with which developers request better TypeScript annotations for our libraries. The rate of TypeScript adoption, particularly within larger-scale projects, is increasing, but TypeScript definitions are increasingly useful as they are included in general JavaScript linting, checking and documentation tooling.
12 |
13 | This work provides high-leverage within our suite of JavaScript tools as we mature, refactor, modularize and create. We are already establishing a suite of practices and tooling that are used in varying ways across PL JavaScript projects that use definitions, even though we have very few TypeScript projects throughout our GitHub orgs ( being a rare example, which was initially contributed by Textile). It is reasonable to expect that the majority of new JavaScript code produced by Protocol Labs into the future will make use of TypeScript annotations in some way.
14 |
15 | Work on this effort has been largely completed, thanks primarily to @hugomrdias, @Gozala, and @achingbrain with assistance from the entire JavaScript team in various ways. The bulk of the activity has been tracked as a roll-up in to js-ipfs @ .
16 |
17 | Remaining work to integrate the current set of js-ipfs dependencies into js-ipfs and address the typing for code directly in js-ipfs is happening here: https://github.com/ipfs/js-ipfs/pull/3550
18 |
19 | Aside from completing the remaining js-ipfs integration work, the scope of this project includes some additional libraries that are not currently part of the js-ipfs dependency tree, including:
20 |
21 | * Next-generation IPLD codec libraries (using the js-multiformats pattern)
22 | * [js-multiformats legacy interface](https://github.com/multiformats/js-multiformats/issues/67) needs updating to match the newly exported js-ipfs/js-ipld types.
23 | * [js-multiaddr](https://github.com/multiformats/js-multiaddr/pull/159) is mostly done, but needs to be a non-breaking change to land
24 | * js-libp2p core types had a first iteration, but there are a few gaps that should be addressed, specially in the configuration, [as follow up](https://github.com/libp2p/js-libp2p/issues/830).
25 | * _Scope:_ the priority for js-libp2p is in the generally exported API, so direct users of js-libp2p have types for that interface.
26 | * _Out of scope for this project:_ there is also a general libp2p typescript [tracking](https://github.com/libp2p/js-libp2p/issues/659) with all the libp2p modules, but these do not appaer to be high priority at the moment, as most users typically only interact with the core API.
27 |
28 | Currently the next-generation IPLD codecs and js-multiformats do not contribute directly to other parts of the JS stack (js-ipfs most notably), however:
29 | * There is ongoing [work to integrate these into the js-ipfs stack](https://github.com/ipfs/js-ipfs-unixfs/pull/116) due to a desire to retire old components. Incomplete typing on these new components will be a blocker for easier integration.
30 | * Our ecosystem is already consuming these newer components - in particular we advise all new codec development to consume the js-multiformats pattern ([e.g.](https://github.com/ceramicnetwork/js-dag-jose/)), much of which uses TypeScript.
31 |
32 | During execution of this project, where questions of scope arise that are not covered above, library-specific decisions will be made regarding the depth of TypeScript definitional work using the following criteria:
33 | * Projects with greater expected future usage should include full type checking in CI and will therefore require basic inline TypeScript annotations.
34 | * Projects that are dependencies but are not expected to be actively maintained or developed further into the future may just include basic API type definitions so that dependents can make use of those.
35 | * Any work estimated to be consisting of more than 2 days for 1 FTE will be either scoped as a separate project (or bundled into another, existing project, collecting future work), or be brought back to [the PR for this proposal](https://github.com/protocol/web3-dev-team/pull/79) for further discussion of expansion of scope.
36 |
37 | #### Background & intent
38 | _Describe the desired state of the world after this project? Why does that matter?_
39 | #### Assumptions & hypotheses
40 | _What must be true for this project to matter?_
41 | #### User workflow example
42 | _How would a developer or user use this new capability?_
43 | #### Impact
44 | _How would this directly contribute to web3 dev stack product-market fit?_
45 |
46 | Roughly half of the projects that depend on our core JavaScript stack are using TypeScript in some way according to metrics we have. This is expected to expand over time given the adoption rate of TypeScript and TypeScript annotations.
47 |
48 | Anecdotally, developers find type annotations useful in the development process even if they don't use TypeScript. It also allows for additional checking in the test/CI process for dependents of our libraries.
49 |
50 | #### Leverage
51 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
52 |
53 | We have some existing TypeScript in the PL suite of JavaScript. This may expand as we have some developers who have a preference for working with strongly typed, or minimally type checked code. Annotations on our core libraries allows us to add tighter type checking to our CI process for any existing and new libraries that consume that code.
54 |
55 | We already have [one example](https://github.com/ipfs/js-dag-service) of code being contributed to the PL stack from a third-party that uses TypeScript and could be improved by typing in the rest of our stack.
56 |
57 | Work on typing in the js-ipfs stack has already surfaced a large number of bugs. Typing on complex codebases is a proven method of increasing code quality, both in the codebase and by consumers of the codebases.
58 |
59 | #### Confidence
60 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
61 |
62 | ## Project definition
63 |
64 | #### Brief plan of attack
65 |
66 | #### What does done look like?
67 | _What specific deliverables should completed to consider this project done?_
68 |
69 | By identifying a small number of key downstream projects that consume our libraries and also use TypeScript and counting the use of `@ts-ignore` annotations that exist to deal with lack of exported typing from our libraries we have a metric for degree of success.
70 |
71 | * https://github.com/ceramicnetwork/js-ceramic - contains a considerable number of `@ts-ignore` statements that relate to js-ipfs
72 | * https://github.com/ceramicnetwork/js-dag-jose - is currently unable to consume js-multiformats to properly execute its test suite
73 |
74 | Also see Scope description above.
75 |
76 | #### What does success look like?
77 | _Success means impact. How will we know we did the right thing?_
78 |
79 | #### Counterpoints & pre-mortem
80 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
81 |
82 | Historically it has been very difficult to estimate and scope the TypeScript work in the PL stack. This work has been underway for approximately 1 year and there exists a risk of scope-creep and a strong risk of estimation-error. It will be important to track scope and be able to cut losses when sufficient value has been achieved _or_ alternative, higher-value, opportunities for our developer-time investment are identified.
83 |
84 | #### Alternatives
85 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
86 |
87 | #### Dependencies/prerequisites
88 |
89 | #### Future opportunities
90 |
91 | * Deeper typing in our stack, such as in the dependencies of js-libp2p which are not in scope of this project.
92 | * More sophisticated test/CI integration across our stack to test against the typing data.
93 |
94 | ## Required resources
95 |
96 | #### Effort estimate
97 |
98 | S _(with some risk of M depending on discoveries along the way, see notes regarding Scope above)_
99 |
100 | #### Roles / skills needed
101 |
102 | * JavaScript
103 | * TypeScript
104 | * js-ipfs/ipld/libp2p and related stack expertise
105 |
--------------------------------------------------------------------------------
/proposals/80-lotus-api-audit.md:
--------------------------------------------------------------------------------
1 | # Lotus / Filecoin RPC and Library Audit
2 |
3 | Authors: @rvagg
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/80
6 |
7 | ## Purpose & impact
8 |
9 | #### Background & intent
10 |
11 | _Describe the desired state of the world after this project? Why does that matter?_
12 |
13 | The Lotus RPC and its associated JavaScript libraries would be catalogued, their feature set and deficiencies would be well understood, and this information is available for the production of better documentation and other educational materials for users.
14 |
15 | Production of documentation and other educational materials is not specifically part of the scope of this project but it may be expanded to include this as a collaboration with the individuals who own / specialise in this area (i.e. this likely turns into a collaboration with the Dev Adoption & Onboarding team).
16 |
17 | There currently exists a small but sprawling and poorly documented, ecosystem of JavaScript libraries interact with the Lotus RPC API. There are also some known deficiencies with the API (e.g. the external wallet problem) that are not documented, just known by people who have bumped into the particular problem(s).
18 |
19 | This project aims to do a mini audit of the landscape of libraries and the abilities offered by the Lotus RPC API to developers interacting with Lotus from the outside—for any purpose, including simple wallet/send actions, general message crafting and submitting, deal-making, etc.
20 |
21 | Outputs of this project includes:
22 |
23 | * A catalog of existing libraries used to interact with the Lotus RPC API, including (currently known to be roughly within this category):
24 | * Those in [filecoin-shipyard](https://github.com/filecoin-shipyard)
25 | * Those currently published under the [@glif npm namespace](https://github.com/glifio/modules/tree/primary/packages/)
26 | * Those maintained for/by the [Truffle suite](https://github.com/trufflesuite/ganache-filecoin-alpha-cli)
27 | * An indication of quality or status and a recommendation for action; smaller-scope actions may be taken as part of this project. Such actions may include:
28 | * Further documentation in specific areas
29 | * Deprecation / archival
30 | * Improvements and feature additions
31 | * Key information required to create/improve higher-level documentation containing recommendations for users seeking to interact with the Lotus RPC (production of / contribution to is not a necessary part of this project, but generating the resources and/or knowledge to do so it part of this project).
32 | * A catalog of areas for high-value improvement, including the creation of new project proposals or the provision of more details to existing project proposals, for:
33 | * The Lotus RPC
34 | * Relevant JS libraries
35 |
36 | #### Assumptions & hypotheses
37 |
38 | _What must be true for this project to matter?_
39 |
40 | That developers want to be able to interact with Lotus, and Filecoin more broadly, via JavaScript and that the path to doing this is currently via RPC APIs.
41 |
42 | #### User workflow example
43 |
44 | _How would a developer or user use this new capability?_
45 |
46 | #### Impact
47 |
48 | _How would this directly contribute to web3 dev stack product-market fit?_
49 |
50 | Maturity of Filecoin depends on a developer ecosystem building layer 1 (and beyond) technologies to serve users. As with IPFS, it is expected that a large portion (possibly a majority) will be leaning on JavaScript application stacks, and dapp developers will be leaning on the browser JavaScript environment. It is critical that we provide a solid foundation for building libraries to interact with Filecoin and the current technical focus for this is Lotus and its RPC. This will evolve over time but the current state suggests high-value will be derived from basic investment in the landscape of JavaScript libraries that interact with (and above) Lotus, and the Lotus RPC itself.
51 |
52 | #### Leverage
53 |
54 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
55 |
56 | This project will serve as a foundation for additional investment in the JS (and general API) layer above Filecoin to unlock additional web3 developer opportunities. Being able to know _what we have_ and what the status of it is critical to understanding where to invest.
57 |
58 | #### Confidence
59 |
60 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
61 |
62 | ## Project definition
63 |
64 | #### Brief plan of attack
65 |
66 | Executing this project would include:
67 |
68 | * Seeking input on the current status and future plans of individuals / teams involved in maintaining or authoring relevant libraries, including (but not limited to):
69 | * Jim Pick (ecosystem / grantee, ex-PL)
70 | * Alan Shaw (PL)
71 | * Glif.io / Infinite Scroll
72 | * Truffle / Ganache contributors (initially via Jim Pick)
73 | * Other PL staff who may have an interest in, or experience with these libraries and the RPC itself
74 | * Attemtping to use the libraries as they currently exist, and possibly the Lotus RPC directly and documenting the process to contribute to the project outputs listed above
75 |
76 | #### What does done look like?
77 |
78 | _What specific deliverables should completed to consider this project done?_
79 |
80 | This project will produce a brief report covering:
81 |
82 | * A map of the currently available JavaScript libraries available for interacting with Filecoin (RPC API and other)
83 | * A pre-documentation catalog of the functionality available:
84 | * in those JavaScript libraries
85 | * via the Lotus RPC
86 | * _Note that producing actual documentation is a further extension of this project to be scoped accordingly._
87 |
88 | The report will aim to provide a clear, shared understanding of the quality and state of these libraries and the scope of their utility for achieving basic tasks with Filecoin (wallet transactions, state inspection, deal making, etc.) (i.e. "shared" because it is critical that this is communicated and understood by relevant parties)). An ideal outcome of this would be further project proposals to undertake improvements.
89 |
90 | The report should provide sufficient clarity to be able to rate (and priority sort) proposals for further work on relevant libraries (or non-existing libraries) to interact with Filecoin, and the Lotus RPC itself with regards to its affordances for external users.
91 |
92 | #### What does success look like?
93 |
94 | _Success means impact. How will we know we did the right thing?_
95 |
96 | A clearer path to the improvement of external Lotus interaction
97 |
98 | #### Counterpoints & pre-mortem
99 |
100 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
101 |
102 | #### Alternatives
103 |
104 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
105 |
106 | #### Dependencies/prerequisites
107 |
108 | #### Future opportunities
109 |
110 | ## Required resources
111 |
112 | #### Effort estimate
113 |
114 |
122 |
123 | Small for 3-5 FTEs (i.e. lower for smaller investment, as available to Sudo at the time of writing)
124 |
125 | #### Roles / skills needed
126 |
127 |
128 |
129 | * JavaScript
130 | * TypeScript
131 | * Lotus (some Go)
132 | * Basic Filecoin understanding
133 |
--------------------------------------------------------------------------------
/proposals/LONG-PITCH-TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # [outcome or objective here]
2 |
3 | Authors:
4 |
5 | Initial PR: TBD
6 |
7 |
11 |
19 |
22 |
23 | ## Purpose & impact
24 | #### Background & intent
25 | _Describe the desired state of the world after this project? Why does that matter?_
26 |
29 |
30 | #### Assumptions & hypotheses
31 | _What must be true for this project to matter?_
32 |
33 |
34 | #### User workflow example
35 | _How would a developer or user use this new capability?_
36 |
37 |
38 | #### Impact
39 | _How would this directly contribute to web3 dev stack product-market fit?_
40 |
41 |
45 |
46 | #### Internal leverage
47 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
48 |
49 |
52 |
53 | #### Confidence
54 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
55 |
56 |
57 |
58 |
59 | ## Project definition
60 | #### Brief plan of attack
61 |
62 |
63 |
64 | #### What does done look like?
65 | _What specific deliverables should completed to consider this project done?_
66 |
67 | #### What does success look like?
68 | _Success means impact. How will we know we did the right thing?_
69 |
70 |
73 |
74 | #### Counterpoints & pre-mortem
75 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
76 |
77 | #### Alternatives
78 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
79 |
80 | #### Dependencies/prerequisites
81 |
82 |
83 | #### Future opportunities
84 |
85 |
86 | ## Required resources
87 |
88 | #### Effort estimate
89 |
97 |
98 | #### Roles / skills needed
99 |
100 |
--------------------------------------------------------------------------------
/proposals/MINIMAL-PITCH-TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # [project name here]
2 |
3 | Authors:
4 |
5 |
9 |
19 |
22 |
23 | ## What is the problem this project solves?
24 | _Describe the status quo, including any relevant context on the problem you're seeing that this project should solve. Who is the user you're solving for, and why do they care about this problem? Wherever possible, include pain points or problems that you've seen users experience to help motivate why solving this problem works towards top-line objectives._
25 |
26 | ## Impact
27 | _What goals/OKRs are being addressed (for w3dt, a specific program, etc.)? Why is this project important? What do we get with this project that we can't get without it?_
28 |
29 | ## The idea
30 | _Describe the proposed project solution, at a very high level. Stay at the level of the high-level requirements. Diagrams and interface descriptions can be useful, if you have any that help clarify and explain the idea._
31 |
32 | ## Success/acceptance criteria (optional)
33 | _How do we know we're done with this project? How do we know we're successful? This field is OPTIONAL for the first draft of an MPP. Sometimes this field needs to be filled out once we have more detail on the shape of the actual solution._
34 |
35 | ## Detailed plans (optional)
36 | _Link to more detailed project plans, e.g. product requirements documents (PRDs) and technical design docs, once they have been created for this project._
37 |
38 | ## Program (optional)
39 | _If this project is part of a program, please indicate the relevant program here._
--------------------------------------------------------------------------------
/proposals/bot-controllers.md:
--------------------------------------------------------------------------------
1 | # Dealbot Controller for Deal Testing and Metrics
2 |
3 | Authors: @mgoelzer
4 |
5 | Initial PR: [#87](https://github.com/protocol/web3-dev-team/pull/87)
6 |
7 |
11 |
19 |
22 |
23 | ## Purpose & impact
24 | #### Background & intent
25 | _Describe the desired state of the world after this project? Why does that matter?_
26 |
29 |
30 | Let's start with the state of the world *before* this project is completed.
31 |
32 | Currently, we are about to start a project to build [storage and retrieval dealbots](https://github.com/protocol/web3-dev-team/pull/84). These bots will attempt storage and retrieval deals on the Filecoin mainnet based on input provided to them on stdin (or a command line supplied input file). They will output their results to stdout, with that output then piped to a log aggregator for visualization (eg, Kibana, Observable, etc).
33 |
34 | For most use cases of these bots, we don't just want to invoke them manually on the command line. Instead, we need a higher-level orchestration system that will carry out a series of storage and retrieval attempts to generate the data we want for that use case.
35 |
36 | One such controller will be the one for deal success testing. This PR covers only that controller. Future controllers (in future PRs) will include those for reputation systems, scraping data from other sources and storing it in Filecoin, etc.
37 |
38 | The desired state of the world after building this deal success controller is:
39 |
40 | - We will have a long-running daemon program that repeatedly attempts storage and retrieval deals on mainnet
41 | - This collection of results from many storage and retrieval deals will get aggregated into the log aggregator that the bots output to.
42 | - These aggregate results will be visualized via the [deal success dashboards project](https://github.com/protocol/web3-dev-team/pull/85).
43 |
44 |
45 | #### Assumptions & hypotheses
46 | _What must be true for this project to matter?_
47 |
48 |
49 | - The storage and retrieval bots are built and able to output test results to a dashboard.
50 | - The bots are capable of accepting instructions on stdin or a unix socket or some other kind of command issuance interface.
51 |
52 | #### User workflow example
53 | _How would a developer or user use this new capability?_
54 |
55 |
56 | ```
57 | $ ./dsr-bot-controller --input my-tests.json start
58 | ```
59 |
60 | The input file `my-tests.json` might look something like this:
61 |
62 | ```
63 | [
64 | // storing and then retrieving some time interval later is the most common use case
65 | {
66 | "dealType":"storeThenRetrieve",
67 | "storeThenRetrieveParameters":
68 | {
69 | "retrievalDelayHours":"72", // store the file, wait 72 hrs, then try to retrieve it
70 | },
71 | "miners":["f0xxxx","f0yyyyy"],
72 | "dataToStore":"random", // this could alternatively be a file path
73 | "randomDataParameters":
74 | {
75 | "sizeBytes":"1073741824", // 1 GiB
76 | },
77 | "schedule":
78 | {
79 | "startDateTime":"yyyy-mm-dd_hh:mm:ss",
80 | "endDateTime":"yyyy-mm-dd_hh:mm:ss",
81 | "repeatIntervalDays":"7", // means the test is re-run every 7 days
82 | },
83 | },
84 |
85 | // retrieval-only is another use-case (eg, Slingshot retrievals)
86 | {
87 | "dealType":"retrievalOnly",
88 | "retrievalOnlyParameters":
89 | {
90 | "cid":"baf...xyz",
91 | },
92 | "miner":["f0xxxx","f0yyyyy"],
93 | "schedule":
94 | {
95 | "startDateTime":"yyyy-mm-dd_hh:mm:ss",
96 | "endDateTime":"yyyy-mm-dd_hh:mm:ss",
97 | "repeatIntervalDays":"7", // means the test is re-run every 7 days
98 | },
99 | },
100 |
101 | // storage-only is a P2 since right now there is no specific use case for this.
102 | {
103 | "dealType":"storageOnly",
104 | "miner":["f0xxxx","f0yyyyy"],
105 | "dataToStore":"random", // this could alternatively be a file path
106 | "randomDataParameters":
107 | {
108 | "sizeBytes":"1073741824", // 1 GiB
109 | },
110 | "schedule":
111 | {
112 | "startDateTime":"yyyy-mm-dd_hh:mm:ss",
113 | "endDateTime":"yyyy-mm-dd_hh:mm:ss",
114 | "repeatIntervalDays":"7", // means the test is re-run every 7 days
115 | },
116 | },
117 |
118 | ]
119 | ```
120 |
121 | #### Impact
122 | _How would this directly contribute to web3 dev stack product-market fit?_
123 |
124 |
128 |
129 | - To achieve PMF, we need storage and retrieval deals to be highly reliable (success >= 99% on first attempt)
130 | - Regardless of the type of user, reliability is critical
131 |
132 | #### Leverage
133 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
134 |
135 |
138 |
139 | - We need an controller like this to make the dealbots useful for deal success testing
140 | - The combination of dealbots and this controller will give us data on deal success rates
141 | - Knowledge of deal success rates will help us know where to focus our debugging efforts as we improve the Filecoin network
142 |
143 | #### Confidence
144 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
145 |
146 |
147 |
148 | Confidence = 8
149 |
150 | The reasons why this impact might not be achieved are covered in the Pre-mortem section below.
151 |
152 |
153 | ## Project definition
154 | #### Brief plan of attack
155 |
156 |
157 |
158 | #### What does done look like?
159 | _What specific deliverables should completed to consider this project done?_
160 |
161 | - An orchetration program to drive the dealbots exists
162 | - The orchestration program follows the general usage design described in User Workflow Example above
163 | - CI unit tests demonstrate the controller does enqueue deals and does follow up on them
164 | - The controller process schedules across multiple storage/retrieval bots
165 |
166 | #### What does success look like?
167 | _Success means impact. How will we know we did the right thing?_
168 |
169 |
172 |
173 | - We are getting a continuous feed of storage+retrieval tests in our dashboard visualization system
174 | - Stakeholders and decision makers are using these dashboards:
175 | - To make decisions about where to focus debugging efforts
176 | - To understand how reliable storage and retrieval deals are on the network
177 |
178 | #### Counterpoints & pre-mortem
179 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
180 |
181 | - The controller, which is a long-running daemon program, crashes and no one is available to restart it and debug the cause of the crash
182 | - The controller tests artificial conditions that do not represent real world usage on the network
183 | - The controller is not robust enough to handle complex tests like multiple retrievals, storage of a wide range of file sizes -- AND testing these turns out to be important/relevant
184 |
185 | #### Alternatives
186 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
187 |
188 | - We could build a more informal controller, like a simple bash script that we run on a cron schedule
189 |
190 | #### Dependencies/prerequisites
191 |
192 |
193 | - [Storage and retrieval bots project](https://github.com/protocol/web3-dev-team/pull/84) is completed
194 |
195 | #### Future opportunities
196 |
197 |
198 | This project will serve as a prototype for how to write other controllers that utilize the dealbots in different ways, such as:
199 |
200 | - Reputation systems
201 | - New kinds of KPIs we want to track
202 | - Scraping data and persisting it on Filecoin
203 |
204 | This project will also provide open source example code that community members can use as a starting point to build their own controllers.
205 |
206 | ## Required resources
207 |
208 | #### Effort estimate
209 |
217 |
218 | Medium
219 |
220 | #### Roles / skills needed
221 |
222 |
--------------------------------------------------------------------------------
/proposals/daggregator-cli-tool.md:
--------------------------------------------------------------------------------
1 | # go-daggregator CLI tool
2 |
3 | Authors: @dchoi27
4 |
5 |
9 |
19 |
22 |
23 | ## What is the problem this project solves?
24 | Currently, clients making storage deals themselves for files smaller than a sector generally have to make a decision to either put each file into a deal, or aggregate multiple files into a compressed file and put that into a deal. The former introduces complexity to the dealmaking process and increases the costs to the miner, but the latter forces them to often retrieve more than just the files they need.
25 |
26 | By allowing clients to aggregate files into CARs that can take advantage of CARv2 / partial retrieval, they can get the best of both worlds.
27 |
28 | ## Impact
29 | This will increase the efficiency of users making deals directly with miners, especially those with large datasets. It also tells a better story around retrievability of their data, which usually isn't a top-level concern for them (since these are often archival use cases), but can be. For instance, in the next few months Internet Archive is looking to build an interactive use case on top of Filecoin, and is interested in partial retrieval.
30 |
31 | ## The idea
32 | Package [go-daggregator](https://pkg.go.dev/github.com/filecoin-project/go-dagaggregator-unixfs) into a CLI tool that can read from an IPFS blockstore and aggregate DAGs specified by their CIDs into a Filecoin-compatible .car file with a manifest file compatible with CARv2. Riba estimated this would take a few hours of work for a Go dev.
33 | 
34 |
35 | ## Success/acceptance criteria (optional)
36 | _How do we know we're done with this project? How do we know we're successful? This field is OPTIONAL for the first draft of an MPP. Sometimes this field needs to be filled out once we have more detail on the shape of the actual solution._
37 |
38 | ## Detailed plans (optional)
39 | _Link to more detailed project plans, e.g. product requirements documents (PRDs) and technical design docs, once they have been created for this project._
40 |
41 | ## Program (optional)
42 | _If this project is part of a program, please indicate the relevant program here._
43 |
--------------------------------------------------------------------------------
/proposals/digitalocean-droplet.md:
--------------------------------------------------------------------------------
1 | # "One-Click" for running Lotus on DigitalOcean
2 |
3 | Authors: johndmulhausen
4 |
5 | Initial PR: #63
6 |
7 | ## Purpose & impact
8 | #### Background & intent
9 |
10 | Ship one-click machine image for Lotus nodes so that it is easy to get started, and bake-in some state downloading so we can jumpstart the sync process.
11 |
12 | #### Assumptions & hypotheses
13 |
14 | We believe being able to deploy Lotus nodes with a single click will significantly reduce startup friction. The proposed image will download a `.car` file from an S3 bucket with a usable chain state, which should complete about 20 minutes after the one-click is fired up, rather than taking days.
15 |
16 | #### User workflow example
17 |
18 | User is linked to marketplace.digitalocean.com URL from the docs, then they click "Create" to fire up a Lotus node on their DigitalOcean account. After making a couple selections about the machine they'd like to run the software on, it is created with Lotus installed, and a script fires to download the .car file. Shortly afterwards (ETA 20 minutes) the node is live.
19 |
20 | #### Impact
21 |
22 | 🔥🔥🔥
23 |
24 | This is a huge pain point now, we're reducing the number of steps significantly and the startup time is reduced to a fraction of the original.
25 |
26 | #### Confidence
27 |
28 | High
29 |
30 | ## Project definition
31 | #### Brief plan of attack
32 |
33 | - Create Image
34 | - Create Vendor account w/DigitalOcean
35 | - Work w/Marketplace team to make sure image scans are clean
36 | - Ship!
37 |
38 | #### What does done look like?
39 |
40 | Marketplace URL is live.
41 |
42 | #### What does success look like?
43 |
44 | We can point to a startup process that takes less than a half hour rather than days.
45 |
46 | #### Counterpoints & pre-mortem
47 |
48 | TBD
49 |
50 | #### Alternatives
51 |
52 | TBD
53 |
54 | #### Dependencies/prerequisites
55 |
56 | TBD
57 |
58 | #### Future opportunities
59 |
60 | TBD
61 |
62 | ## Required resources
63 |
64 | #### Effort estimate
65 |
66 | - Medium: Days
67 |
68 | #### Roles / skills needed
69 |
70 | - Ops/dev (image creation)
71 | - Tech Writer (documentation, image submission)
72 |
--------------------------------------------------------------------------------
/proposals/images/bot-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/protocol/web3-dev-team/c9297fe220b846bcae9e80642186f70cdd87c9fd/proposals/images/bot-arch.png
--------------------------------------------------------------------------------
/proposals/ipfs-blog-enhancements.md:
--------------------------------------------------------------------------------
1 | # IPFS blog enhancements
2 |
3 | Author: @jessicaschilling
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/74
6 |
7 | Larger-scale proposal for overall ipfs.io work, including this effort: https://github.com/protocol/web3-dev-team/blob/ipfsio-modular-rework/proposals/ipfsio-modular-rework.md
8 |
9 | Project tracking board for overall ipfs.io work: https://github.com/orgs/ipfs/projects/11
10 | Repo for the new blog: http://github.com/ipfs/ipfs-blog
11 |
12 | ## Purpose & impact
13 | #### Background & intent
14 | _Describe the desired state of the world after this project? Why does that matter?_
15 |
16 | Prospective and current IPFS and w3dt developers have a “one-stop shop” for up-to-date IPFS info that can be easily filtered and searched to meet the specific needs of a wide variety of audiences (technical users, newcomers, etc) as well as to optimize cross-item discovery, metrics collection, and inbound SEO. Primary content focus is on our (already frequently updated) blog posts, but includes a mix of other relevant content in a manner that surfaces relevant info without creating an ongoing maintenance burden on our core team. This front-end presentation is matched on the back end by a Markdown-driven content rendering engine that significantly reduces friction for post authors/editors, as well as a content submission form accessible by the public and easily managed by administrators.
17 |
18 | #### Assumptions & hypotheses
19 | _What must be true for this project to matter?_
20 |
21 | ✓ Our blog is a valuable source of content for prospective and current devs
22 |
23 | ✓ Those audiences would also benefit from single-source access to other IPFS-related content: news stories, release notes, tutorials, videos, academic papers, event listings, etc
24 |
25 | ✓ Not having search or tagging on blog posts is a missed opportunity, both for our readers and for our metrics collection
26 |
27 | ✓ We can use readership metrics to better understand our audience and PMF in general
28 |
29 | ✓ Labbers would significantly benefit from an easier post-authorship process and smoother deployments
30 |
31 | #### User workflow example
32 | _How would a developer or user use this new capability?_
33 |
34 | Reader:
35 | - Visits the blog, either directly or via shared link, search, etc
36 | - Explores additional content beyond the page they originally visited thanks to text search, content type filter, tag cross-discovery, author cross-discovery, etc
37 | - Engages with content (comments on a blog post, shares an item organically or through social-share links, etc)
38 | - Submits content (news link, event, etc) through submission form
39 | - Subscribes to newsletter through direct submission fields in site footer
40 |
41 | Post author/editor:
42 | - Drafts content in easy-to-use, WYSIWYG-enabled Forestry CMS
43 | - Uploads images in correctly sized dimensions through Forestry and our custom image scale/crop tool
44 | - Sees instant previews of their work within Forestry
45 | - PRs the Forestry staging environment to prod for approval and easy Fleek-based deployment
46 |
47 | #### Impact
48 | _How would this directly contribute to web3 dev stack product-market fit?_
49 |
50 | Improving our front-of-the-funnel comms and educational resources, such as the IPFS blog, directly enables early-stage education, exploration and onboarding. Additionally, providing a "one-stop shop" for news, tutorials, videos, events and similar resources ensure that our websites are positioned as a complete, reputable source of information — furthering our trust profile, increasing SEO and site traffic, and generally increasing the size of the funnel as a whole.
51 |
52 | Reducing friction for blog authors and maintainers also enables us to increase our overall comms velocity and efficiency, enabling us to create faster, better content — and avoid wasting resources that could better be repurposed to other PMF efforts.
53 |
54 | #### Leverage
55 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
56 |
57 | This work lays the foundation for future improvements to ipfs.io as a whole, particularly in terms of overall IA of header, footer, nav and other "furniture" — a lot of work will already have been done! Replatforming the blog also brings us the opportunity to collect more complete metrics on front-of-the-funnel visitor priorities and patterns, giving us a baseline for further iterative improvements to our overall website portfolio.
58 |
59 | #### Confidence
60 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
61 |
62 | - **Impact = 8** (opportunity to replace neglected ipfs.io/media with something easy to maintain; making posting significantly easier speeds blog post delivery; tagging and search significantly improve user experience AND add metrics collection points)
63 | - **Confidence = 10** (effort will directly resolve known pain points, including deployment woes, image size, difficulty of posting, no search functionality, etc)
64 | - **Ease = 5** (lots of moving parts, but few outright technical challenges in the work itself)
65 |
66 |
67 | ## Project definition
68 | #### Brief plan of attack
69 |
70 | 1. Migrate existing Hugo blog content to new VuePress platform; test for successful migration, image display, broken links, etc
71 | 2. Augment functionality: search, filter, tags, new header, new footer
72 | 3. Augment content to match augmented functionality (ensure all posts have appropriate tags, correctly sized images, etc)
73 | 4. Overlay Forestry CMS, including documentation for post authors/editors
74 | 5. Set up Fleek deployment process, including workflow between staging and prod
75 | 6. Set up Countly-based metrics collection
76 | 7. Ensure that all net-new posts on old blog are being duplicated on new blog during this time
77 | 8. Train authors/editors/maintainers
78 | 9. Promote via all appropriate comms channels
79 |
80 | #### What does done look like?
81 | - Existing content on blog.ipfs.io is migrated to new platform and deployed via Fleek with no disruption of service, broken links, etc
82 | - Workflow and DRI exist for submitting, adding and maintaining non-blog content types
83 | - Any follow-up work is documented, issue-ized, and assigned
84 | - Metrics collection implemented via Countly
85 |
86 | #### What does success look like?
87 | - Uptick in overall visitors vs existing blog
88 | - Public-submitted or labber-submitted links are at a "good" number (TBD - will need to establish baseline since this is new functionality)
89 | - Internal satisfaction with ease of posting/publishing/deploying
90 | - Fewer internal fire drills at publishing/revision time; blogging plays a smooth role in overall marketing workflow
91 |
92 | #### Counterpoints & pre-mortem
93 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
94 |
95 | - Failing to keep content up to date and/or properly tagged
96 | - Failing to make the most of added content types (e.g. only adding blog posts)
97 |
98 | #### Alternatives
99 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
100 |
101 | - Training all blog writers/editors on Markdown practices
102 | - Remedying flaws in our existing CI/CD pipeline
103 |
104 | #### Dependencies/prerequisites
105 | None
106 |
107 | #### Future opportunities
108 | - More blog authors or more frequent posts due to ease of publishing
109 | - Establishment as a central "IPFS news clearinghouse" and IPFS event directory if those content types are consistently updated
110 |
111 | ## Required resources
112 |
113 | #### Effort estimate
114 | Shirt size: Large
115 |
116 | _Note that there's been some slight scope creep (performance instrumentation, CSS cleanup) that may expand this effort if we don't push it into post-launch followup._
117 |
118 | #### Roles / skills needed
119 | - Project lead/PM (Jessica Schilling): Coordinate tasks, manage schedule/dependencies, ensure adherence to spec, etc
120 | - Build/test developer (João Peixoto): Replatform, add metrics, add additional functionality, pre-launch testing
121 | - Metrics/continuity dev help (Zé Bateira): Help with adding metrics and other functionality in a manner consistent with other PL sites
122 |
--------------------------------------------------------------------------------
/proposals/ipfs-content-providing.md:
--------------------------------------------------------------------------------
1 | # Improve IPFS Content Providing
2 |
3 | Authors: @aschmahmann
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/31
6 |
7 |
11 |
19 |
22 |
23 | ## Purpose & impact
24 | #### Background & intent
25 |
26 | _Describe the desired state of the world after this project? Why does that matter?_
27 |
28 |
31 |
32 | Currently go-ipfs users are able to utilize the public IPFS DHT to find who has advertised they have some CID in under 1.5s in 95+% of cases. However, the process of putting those advertisements into the DHT is slow (e.g. 1 minute) and is a bottleneck for users trying to make their content discoverable. Users who have moderate amounts of content on their nodes complain about their content being hard to find in the DHT as a result of their nodes' inability to advertise. Additionally, some of the measures users can take to reduce the number of provider records they emit by taking actions like only reproviding the roots of graphs (see [reprovider strategies](https://github.com/ipfs/go-ipfs/blob/09178aa717689a0ef9fd2042ad355320a16ffb35/docs/config.md#reproviderstrategy)) are not generally recommended due to some outstanding issues such as the inability to resume downloads of a DAG.
33 |
34 | While R&D work on larger scale improvements to content routing is ongoing we can still take the opportunity now to make our existing system more usable and alleviate much of our users' existing pain with content routing.
35 |
36 | After completion of this project the state should be that go-ipfs users with lots of data are able to setup nodes that can put at least 100M records in the DHT per day. Additionally, users should be empowered to not have to advertise data that is not likely to be accessed independently (e.g. blocks that are part of a compressed file).
37 |
38 |
39 | #### Assumptions & hypotheses
40 | _What must be true for this project to matter?_
41 |
42 | - The IPFS public DHT content provider subsystem is insufficient for important users
43 | - The work is useful even though a more comprehensive solution will eventually be put forward, meaning either:
44 | - Users are not willing to wait, or ecosystem growth is throttled, until we build a more comprehensive content routing solution
45 | - The changes made here are either useful independent of major content routing changes, or the changes are able to inform or build towards a more comprehensive routing solution
46 |
47 | #### User workflow example
48 | _How would a developer or user use this new capability?_
49 |
50 |
51 | Users who use go-ipfs would be able to tell what percentage of their provider records have made it out to the network in a given interval and would notice more of their content being discoverable via the IPFS public DHT. Additionally, users would have a number of configurable options available to them to both modify the throughput of their provider record advertisements and to advertise fewer provider records (e.g. only advertising pin roots)
52 |
53 | #### Impact
54 | _How directly important is the outcome to web3 dev stack product-market fit?_
55 |
56 | 🔥🔥🔥 = 0-3 emoji rating
57 |
58 |
62 |
63 | Probably the most visible primitive in the web3 dev stack is content addressing which allows someone to retrieve data via its CID no matter who has it. However, while content addressing allows a user to retrieve data from **anyone** it is still critical that there are systems in place that allow a user to find **someone** who has the data (i.e. content routing).
64 |
65 | Executing well here would make it easier for users to utilize the IPFS public DHT, the mostly widely visible content routing solution in the IPFS space. This would dramatically improve usability and the onboarding experience for both new users and the experience of existing users, likely leading to ecosystem growth.
66 |
67 | #### Leverage
68 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
69 |
70 | 🎯🎯🎯 = 0-3 emoji rating
71 |
72 |
74 |
75 | Many of the components of this proposal increase development velocity by either exposing more precise tooling for debugging or working with users, or by directly enabling future work.
76 |
77 | #### Confidence
78 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
79 |
80 |
81 | 2 . We don't have direct market research demonstrating improving the resiliency of content routing will definitely lead to more people choosing IPFS or to work with the stack. However, this is a pain point for many of our users (as noted on the IPFS Matrix, Discuss and GitHub) and something we have encountered as an issue experienced by various major ecosystem members (Protocol Labs infra, Pinata, Infura, etc.).
82 |
83 | ## Project definition
84 | #### Brief plan of attack
85 |
86 |
87 |
88 | - Enable downloading sub-DAGs when a user already has the root node, but is only advertising the root node
89 | - e.g. have Bitswap sessions know about the graph structure and walk up the graph to find providers when low on peers
90 | - Add a new command to `go-ipfs` (e.g. `ipfs provide`) that at minimum allows users to see how many of their total provider records have been published (or failed) in the last 24 hours)
91 | - Add an option to go-libp2p-kad-dht for very large routing tables that are stored on disk and are periodically updated by scanning the network
92 | - Make IPFS public DHT `put`s take <3 seconds (i.e. come close to `get` performance)
93 | - Some techniques available include:
94 | - Decreasing DHT message timeouts to more reasonable levels
95 | - [Not requiring](https://github.com/libp2p/go-libp2p-kad-dht/issues/532) the "followup" phase for puts
96 | - Not requiring responses from all 20 peers before returning to the user
97 | - Not requiring responses from the 3 closest peers before aborting the query (e.g. perhaps 5 of the closest 10)
98 | - Add a function to the DHT for batch providing (and putting) and utilize it in go-ipfs
99 | - Tests with https://github.com/libp2p/go-libp2p-kad-dht/pull/709 showed tremendous speedups even in a single threaded provide loop if the provider records were sorted in XOR space
100 |
101 | #### What does done look like?
102 | _What specific deliverables should completed to consider this project done?_
103 |
104 | The project is done when users can see how much of their provide queue is complete, are able to allocate resources to increase their provide throughput until satisfied, and allocating resources is either not prohibitively expensive, or it is deemed too much work to decrease the resource allocation.
105 |
106 | #### What does success look like?
107 | _Success means impact. How will we know we did the right thing?_
108 |
109 |
112 |
113 | Success means that much fewer users report issues finding content, instead things either work for them or they file issues or ask questions on how to decrease their resource usage for providing. Things should just work for users who have 10-100k provider records and leave their nodes on continuously.
114 |
115 | #### Counterpoints & pre-mortem
116 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
117 |
118 | - People have other issues that the DHT put performance is just masking, which means we will not immediately be able to see the impact from this project alone
119 | - Users will not want to spend the raw bandwidth of emitting their records even if lookups are instant
120 | - Decreasing the query `put` time is much harder than anticipated
121 | - Technical work required is harder than anticipated
122 |
123 | #### Alternatives
124 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
125 |
126 | These alternatives are not exclusive with the proposal
127 |
128 | 1. Focus on decreasing the number of provider records
129 | - e.g. Add more options for data reproviding such as for UnixFS files only advertising Files and Directories
130 | - might be tricky UX and plumbing, but is something we likely will need to tackle eventually
131 | 2. Focus on decreasing the frequency of reproviding records
132 | - e.g. Build a second routing layer where nodes are encouraged or required to have high availability (e.g. a federated routing layer or opt-in second DHT that tracks peer availability more rigorously)
133 | - has possibility for high payoff, although has more risk associated with it
134 |
135 | #### Dependencies/prerequisites
136 |
137 |
138 | - None
139 |
140 | #### Future opportunities
141 |
142 |
143 | - Making it easier to implement alternative #1 above (enabled by `ipfs provide` and being able to download sub-DAGs when only the root node is provided)
144 | - Vastly improved lookup performance of the delegated routers that can be used in js-ipfs (enabled by allowing users to have large routing tables)
145 |
146 | ## Required resources
147 |
148 | #### Effort estimate
149 |
157 |
158 | L. There is some uncertainty in how much work will be required to increase `put` performance. However, all of the changes are client side which make them relatively easy to test. This estimate could be an overestimate as some of the changes have some uncertainty which is currently being estimated at the higher end (i.e. the work in go-ipfs and go-bitswap)
159 |
160 | #### Roles / skills needed
161 |
162 |
163 | - 3-4x go-engineers
164 | - 1-2x go-ipfs experience
165 | - 1-2x go-libp2p (ideally go-libp2p-kad-dht) experience
166 | - Some input and support may be required from research
167 |
--------------------------------------------------------------------------------
/proposals/ipfs-nft-tutorial.md:
--------------------------------------------------------------------------------
1 | # Proposal: A tutorial for building NFTs with Ethereum and IPFS
2 |
3 | Authors: @yusefnapora
4 |
5 | Initial PR: [#11](https://github.com/protocol/web3-dev-team/pull/11)
6 |
7 |
11 |
19 |
22 |
23 | ## Purpose & impact
24 | #### Background & intent
25 | _Describe the desired state of the world after this project? Why does that matter?_
26 |
29 |
30 | A lot of current interest in our stack is driven by decentralized finance applications, especially "non-fungible tokens" (NFTs).
31 | NFTs are tokens with a unique identity, which can have associated metadata. This [OpenSea NFT Bible](https://opensea.io/blog/guides/non-fungible-tokens/)
32 | has a good overview of what NFTs are and the history of their development over the past few years.
33 |
34 | We don't currently offer advice or guidance to users interested in creating NFTs and storing their metadata on IPFS. As a result, we have limited "empathy" for their specific needs, and are largely unaware of their unique pain points, and users building NFTs need to rely on third party resources.
35 |
36 | This proposal is to build a guided tutorial that creates an NFT token on ethereum that links to assets stored in IPFS using IPFS cids in the token metadata.
37 |
38 | #### Assumptions & hypotheses
39 | _What must be true for this project to matter?_
40 |
41 |
42 | - Users want to store NFT assets on IPFS
43 | - Seems to be supported by evidence, e.g. Pinata fireside chat
44 | - Users are confused about how IPFS can be leveraged when developing NFTs.
45 | - Uncertain. Perhaps existing resources, e.g. from Pinata are enough?
46 | - We at Protocol Labs would gain a better understanding of a key user demographic by producing this tutorial
47 | - The existence of this tutorial would be a positive signal to ethereum devs that PL is invested in things that matter to them
48 |
49 | #### User workflow example
50 | _How would a developer or user use this new capability?_
51 |
52 |
53 | A developer interested in building NFTs googles "IPFS NFT" and finds our tutorial. At a glance, they can tell that:
54 |
55 | - IPFS is a great fit for NFTs
56 | - A proof-of-concept is acheivable in less than a day
57 |
58 | They decide to spend an afternoon following along. At the end, they have a working token that runs on a local ethereum devnet, with content persisted to their local IPFS daemon.
59 |
60 | To wrap up the tutorial, we talk about the "path to production," which would require pinning IPFS content, etc. This gives devs
61 | a clear sense of what they'll need to do to get from toy example to raking in that sweet crypto kitty cash.
62 |
63 | #### Impact
64 | _How directly important is the outcome to web3 dev stack product-market fit?_
65 |
66 | 🔥 = 0-3 emoji rating
67 |
68 | I think this tutorial would be valuable, but it's not a "make or break" proposition, hence the one 🔥 rating.
69 |
70 | Other tutorials already exist, for example, [this very fine guide by our friends at Pinata](https://medium.com/pinata/how-to-build-erc-721-nfts-with-ipfs-e76a21d8f914),
71 | and these may already serve the needs of NFT developers well enough.
72 |
73 | The main impacts from having our own tutorial are:
74 |
75 | - NFT developers know that we are aware of and support them
76 | - We control the messaging around how IPFS fits into the NFT picture
77 | - I think this is important because we're not the only option. You can build NFTs backed by an S3 bucket, etc.
78 | - We can set the stage for a future IPFS / Filecoin NFT pinning service (e.g. as described in https://github.com/protocol/web3-dev-team/pull/3) in our "path to production".
79 |
80 |
84 |
85 | #### Leverage
86 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
87 |
88 | 🎯🎯 = 0-3 emoji rating
89 |
90 | The benefits of developing this tutorial would be as much for us at Protocol Labs as for our end users. If NFT developers are a key part of our audience / userbase, we should be working to empathize with and understand them as much as possible. As a prerequisite for writing the tutorial, we'll need to understand exactly how NFT devs are currently using IPFS. This will give us insight into things we need to improve and will let us speak more confidently as an organization about the needs of this highly specialized community.
91 |
92 |
93 | #### Confidence
94 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
95 |
96 | 0.1
97 |
98 |
99 | I have no data to support my assertions about impact, just "self conviction" and general hunches.
100 |
101 | ## Project definition
102 | #### Brief plan of attack
103 |
104 |
105 | - Do a brief review of existing tutorials & resources. Follow along with at least one other guide (ideally 2+) to completion.
106 | - Define the scope and functionality of the example project
107 | - e.g., should our toy tokens try to "do something" like crypto kitties?
108 | - do we assume readers already know solidity & are familiar with eth dev?
109 | - Build the example and prove it works
110 | - Write the tutorial to guide users through building it themselves
111 |
112 |
113 | #### What does done look like?
114 | _What specific deliverables should completed to consider this project done?_
115 |
116 | The tutorial exists and has been advertised via our social media channels, etc. A user with no prior experience building NFTs with IPFS can follow the tutorial through to completion.
117 |
118 | #### What does success look like?
119 | _Success means impact. How will we know we did the right thing?_
120 |
121 | - We get traffic to our tutorial & engagement on social media when announced
122 | - We solicit feedback at the end of the tutorial. Hopefully captures positive sentiment.
123 |
124 |
127 |
128 | #### Counterpoints & pre-mortem
129 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
130 |
131 | - If NFT developer needs are already being served, e.g. by Pinata tutorials, they may not care much about ours.
132 | - Our internal learning / empathy building benefits may be less than expected or hard to scale within PL. In other words, the team builiding the tutorial may learn something, but that may not translate to greater understanding org wide.
133 | - We may not be well-equiped to define "best practices" for NFT + IPFS development, because we're not yet intimately familiar with the domain.
134 | - The NFT "landscape" is rapidly evolving, which risks this tutorial going stale. If major changes occur in e.g. the ethereum tooling we recommend, we need to keep the tutorial updated or risk losing credibility.
135 |
136 | #### Alternatives
137 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
138 |
139 | - Education:
140 | - Prominently link to and promote external resources (e.g. pinata tutorial)
141 |
142 | - Community building:
143 | - Establish a category on discuss.ipfs.io to discuss NFT development and engage with the community. Have someone at PL monitor and engage with the forum.
144 | - Invite NFT developers to have a sync "fireside chat" style discussion with PL devs, to solicit feedback
145 |
146 | - "leveling up" internally:
147 | - have an "NFT workshop" event that people at PL can join to build an NFT on IPFS, to get our hands dirty with the tech & build empathy
148 |
149 | #### Dependencies/prerequisites
150 |
151 |
152 | We need a place to put the tutorial (docs.ipfs.io, or elsewhere?).
153 |
154 | #### Future opportunities
155 |
156 |
157 | If we develop tooling / new capabilities that are tailored for NFTs (e.g. https://github.com/protocol/web3-dev-team/pull/3), we can use the tutorial as a starting point to advertise and explain the new stuff.
158 |
159 | ## Required resources
160 |
161 | #### Effort estimate
162 |
170 |
171 | Medium, ~3-5 weeks for a team of 2 or 3 people. Less if the team has prior experience building NFTs with IPFS.
172 |
173 | #### Roles / skills needed
174 |
175 |
176 | - "solution builder" who can design and implement the example token
177 | - a dev with prior experience building on ethereum would be a plus and could shave 1-2 weeks off the time estimate.
178 | - technical writer to develop the tutorial.
179 | - ideally, at least one other technical writer to review and revise (part time)
180 | - devs with little / no prior experience to beta test and validate that the tutorial is possible to complete
181 |
--------------------------------------------------------------------------------
/proposals/lotus-dynamic-retrieval-pricing.md:
--------------------------------------------------------------------------------
1 | # Dynamic retrieval pricing in Lotus
2 |
3 | Authors: @raulk
4 |
5 | Initial PR: https://github.com/protocol/web3-dev-team/pull/97.
6 |
7 | ## Purpose & impact
8 | #### Background & intent
9 | _Describe the desired state of the world after this project? Why does that matter?_
10 |
11 | A retrieval client asks the miner for a quote before proceeding to retrieve a
12 | piece or a part therein. The miner quotes:
13 |
14 | - unseal price (the price the client must pay if the miner needs to unseal data)
15 | - minimum price per byte (the price the client must pay for each byte of data)
16 | - maximum payment interval (the number of bytes that the provider will send
17 | before pausing the transfer to wait for payment)
18 | - maximum payment interval increase (the increase in the payment interval each
19 | time payment is received)
20 |
21 | The quoted values become part of the deal proposal. Unlike with storage deals,
22 | retrieval deal proposals don’t go on chain.
23 |
24 | On the provider side, these values are filled from the “stored ask”.
25 | The “stored ask” can be changed through JSON-RPC, but it’s statically set.
26 | Lotus doesn’t have the ability to change the quote based on the piece that’s
27 | being requested.
28 |
29 | This means that fast retrieval and non-fast retrieval deals get the same
30 | treatment with regards to pricing. In other words, anything that’s set as the
31 | "stored ask" will be a blanket policy for all retrievals, fast or ordinary,
32 | verified or unverified.
33 |
34 | Impact of the blanket policy to unsealing price:
35 | - If the unsealing price is set to zero, the miner commits to serving all deals
36 | without requiring an unseal payment from the client, even those that require
37 | unsealing. Unsealing is a heavy and expensive operation, one that's irrational
38 | for miners to offer for free today.
39 | - If the unsealing price is set to non-zero, fast retrievals will require the
40 | creation of a payment channel (even if price per byte is zero), which
41 | introduces significant latency and requires chain interaction.
42 |
43 | Impact of the blanket policy to price per byte:
44 | - Miners wanting to provide free retrieval for verified deals would set this
45 | value to zero. This has the side-effect of skipping payment channel creation
46 | (if unseal price is also zero).
47 | - However, that same policy would apply to unverified deals, which the miner is
48 | likely not keen to offer for free.
49 |
50 | go-fil-markets doesn’t have access to the unseal status of a given piece,
51 | so it’s unable to vary the quoted price accordingly.
52 |
53 |
54 | #### Assumptions & hypotheses
55 | _What must be true for this project to matter?_
56 |
57 | For incentivisation circuits not to break down midway, storage deals
58 | performed with miner incentives (FIL+ datacap) must be retrievable under
59 | preferential (e.g. free) terms.
60 |
61 | To enable this, retrieval quoting should be dynamic, empowering miners to
62 | configure custom policies based on the attributes of the original storage deal,
63 | and other factors.
64 |
65 | #### User workflow example
66 | _How would a developer or user use this new capability?_
67 |
68 | Miners should be able to configure a flexible built-in policy (which enables the
69 | Bedrock golden path), as well as provide an external script that receives the
70 | decision factors wrapped in a structured message, and returns the pricing
71 | decisions. This could be similar to the existing deal filter mechanism.
72 |
73 | #### Impact
74 | _How would this directly contribute to web3 dev stack product-market fit?_
75 |
76 | It's essential.
77 |
78 | #### Internal leverage
79 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
80 |
81 | Low. This is a chore, not a novelty seeking nor risky project.
82 |
83 | #### Confidence
84 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
85 |
86 | 10. This solves an existing shortcoming in our technology that we have "launch
87 | data" for.
88 |
89 | ## Project definition
90 |
91 | 
92 |
93 | #### Brief plan of attack
94 |
95 | 1. Introduce the ability to query the unsealing status of a piece from the
96 | storage subsystem (unseal_status).
97 | 2. Introduce the ability to query whether the storage deal was a verified deal
98 | (verified_deal).
99 | 3. Introduce the ability to query whether the storage deal had fast retrieval
100 | enabled (fast_retrieval).
101 | 4. Add the ability to set a price function:
102 | - Built-in function (shipping with Lotus), which supports file
103 | configuration. By default, it prices verified, fast-retrieval, unsealed
104 | pieces at 0 (using the fields above).
105 | - Via an externally invoked function, so that miners can inject their own
106 | logic to quote dynamic prices -- similar to deal filter. This requires
107 | defining a schema and an intermediate data format (likely JSON).
108 |
109 | #### What does done look like?
110 | _What specific deliverables should completed to consider this project done?_
111 |
112 | Merged pull requests on go-fil-markets and lotus + user documentation.
113 |
114 | #### What does success look like?
115 | _Success means impact. How will we know we did the right thing?_
116 |
117 | If the goals in the project brief are satisfied, we know we did the right thing.
118 |
119 | #### Counterpoints & pre-mortem
120 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
121 |
122 | N/A.
123 |
124 | #### Alternatives
125 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
126 |
127 | The pricing function could be hardcoded, but that's too short-sighted.
128 |
129 | #### Dependencies/prerequisites
130 |
131 |
132 | F3 retrieval stabilisation is not a hard technical dependency, but work needs to
133 | be coordinated as that project is also heavily touching the codebases in scope.
134 |
135 | #### Future opportunities
136 |
137 | Introducing dynamic pricing enables miners to create more sophisticated pricing
138 | models for retrieval deals.
139 |
140 | ## Required resources
141 |
142 | #### Effort estimate
143 |
144 | Small, 1-2 weeks.
145 |
146 | #### Roles / skills needed
147 |
148 | 1 engineer.
149 |
--------------------------------------------------------------------------------
/proposals/lotus-miner-runtime-segregation.md:
--------------------------------------------------------------------------------
1 | # Lotus miner runtime segregation
2 |
3 | Authors: @raulk
4 |
5 | Initial PR: TBD
6 |
7 | ## Purpose & impact
8 |
9 | #### Background & intent
10 |
11 | _Describe the desired state of the world after this project? Why does that matter?_
12 |
13 | ##### Brief description of goal
14 |
15 | This project aims to conduct a minimal segregation of the monolith lotus-miner
16 | process through clean boundaries in order to: (a) increase the robustness and
17 | resilience of the system, (2) reduce attack surface, (3) lessen the operational
18 | risk that miners bear, (4) enable new miner-side features to be built without
19 | bloating, and (5) enable horizontal scaling, clustering, failover, firewalling,
20 | and threat prevention.
21 |
22 | The minimal segregation consists of keeping the sealing, proving and mining
23 | activity inside the `lotus-miner` binary, and spinning off the markets-related
24 | processes (deal making, retrieval) into a `lotus-markets` binary.
25 |
26 | By separating processes that inherently have different operational criticality,
27 | we increase the overall reliability of miner deployments.
28 |
29 | ##### Context
30 |
31 | lotus-miner is a monolith process. Amongst other concerns, it is responsible
32 | for:
33 |
34 | - participating in storage mining by onboarding storage through CC, ordinary
35 | deals, or FIL+ deals.
36 | - coordinating the sealing activity/pipeline with workers.
37 | - continuous sector proving.
38 | - block mining.
39 | - evaluating and accepting storage deals.
40 | - conducting the inbound data transfer for accepted storage deals.
41 | - assigning deals to sectors, and managing the related on-chain interactions.
42 | - evaluating and accepting retrieval deals.
43 | - managing the monolith Badger store for unsealed deal data.
44 | - if the deal is sealed, requesting the unsealing by the appropriate worker, and
45 | importing the unsealed payload into the monolith Badger blockstore.
46 | - conducting the outbound data transfer, coordinating the relevant actions
47 | on the payment channel layers.
48 | - for all data transfer actions, watching the data flow and restarting the
49 | transfer when it fails.
50 |
51 | Currently all these concerns are coupled and folded within the same physical
52 | process. The lack of isolation and process boundaries results in:
53 |
54 | 1. Fragility: bugs in one area of the codebase have the power to affect
55 | the entire process, potentially crashing it, despite those areas being
56 | relatively independent of one another (e.g. bugs in retrieval can affect the
57 | sealing activity).
58 | 2. Increased attack surface and threat of network-based attacks: some processes
59 | require exposing public endpoints to the world (e.g. deal making, retrieval).
60 | Legitimate traffic spikes, or spurious activity, can deschedule, displace,
61 | or otherwise impact, other processes (potentially more critical ones,
62 | like sealing).
63 | 3. Apprehension to add more miner-side features for fear of bloating the miner
64 | process.
65 | 4. Lack of scalability: it's not possible to scale the retrieval and storage
66 | capacities of a miner to respond to increased loads.
67 | 5. Lack of deployment flexibility: inability to partition the miner's network in
68 | single-purpose subnets (e.g. retrieval subnet, internal subnet, etc.),
69 | inability to introduce use-case/endpoint-specific firewalls, proxies, or
70 | middlemen, inability to use orchestrators like Kubernetes to manage and
71 | elastically scale specific operations.
72 |
73 | It is also worth noting that miners assign different priorities to different
74 | operations. Their profit-making activities revolve around sealing, and if
75 | enabling other features entails a non-negligible amount of operational risk
76 | without an offsetting ROI, they will choose to opt out of those features. A
77 | significant amount of miners consequently choose to disable deal-making
78 | features, which goes against the utility of the Filecoin network.
79 |
80 | See [concept of a miner
81 | DMZ](https://github.com/filecoin-project/lotus/issues/5149) for additional
82 | background.
83 |
84 | #### Assumptions & hypotheses
85 |
86 | _What must be true for this project to matter?_
87 |
88 | - Miners favour profit-making activities and will find ways to
89 | short-circuit/disable activities that do not attract as much revenue, if
90 | running those activities entails significant operational risk that is not
91 | appropriately rewarded.
92 | - New features must be introduced miner-side that would otherwise bloat the
93 | miner process (e.g. deal indexing), thus making miners resistant to run them.
94 | - Miners should have operational painpoints removed to encourage them to accept
95 | deals and deliver excellent QoS on retrieval.
96 | - Miners seek runtime flexibility to mature and harden their deployment to
97 | withstand increasing loads and security threats.
98 | - As upcoming incentivisation circuits put extra pressure on miners to run
99 | additional services (indexers, free retrieval, etc.), miners may revolt if the
100 | software doesn't allow doing so in a reliable manner.
101 |
102 | #### User workflow example
103 |
104 | _How would a developer or user use this new capability?_
105 |
106 | - `lotus-miner run`: starts the storage miner process.
107 | - `lotus-markets run`: starts the markets process, running all services.
108 | - `lotus-markets run --services=indexing,retrieval`: starts the markets process,
109 | running only a subset of services.
110 |
111 | #### Impact
112 |
113 | _How would this directly contribute to web3 dev stack product-market fit?_
114 |
115 | High. This project is an enabler for new miner-side expectations (indexing, free
116 | retrieval) that are being introduced in the pursuit of product-market fit. Plus,
117 | it restructures the system for higher all-around robustness and willingness to
118 | run the full Filecoin stack of services.
119 |
120 | #### Internal leverage
121 |
122 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
123 |
124 | High.
125 |
126 | Separating these concerns into well-defined domains with clear boundaries
127 | and interfaces makes the existing `lotus-miner` code more approachable, which in
128 | turns makes it possible to gain more agility in development, bugfixing and
129 | feature building.
130 |
131 | Spinning off `lotus-markets` also allows us to evolve markets code in a more
132 | agile manner, without risking mining activities.
133 |
134 | #### Confidence
135 |
136 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
137 |
138 | 7.
139 |
140 | ## Project definition
141 |
142 | 
143 |
144 | #### Brief plan of attack
145 |
146 | Transitioning to a monolith process to a segregated process architecture brings
147 | on many benefits, but if done inappropriately those benefits can be overshadowed
148 | by increased operational complexity, cognitive load, and lack of
149 | visibility/observability.
150 |
151 | For example, one could break up a single process into many microservices that
152 | communicate with one another in a potential full mesh -- thus leading to a
153 | brittle spaghetti architecture, and nasty house of cards effects in the event of
154 | failures and unavailability.
155 |
156 | We specifically refrain from such design patterns. This proposal suggests
157 | a hub-and-spoke model. A natural "hub" would be the miner process, which is
158 | where the miner's activity originates, and everything else is downstream from
159 | that. For example:
160 |
161 | 1. The ability to accept storage deals emerges from the ability to seal sectors.
162 | 2. The ability to serve retrievals emerges from the ability to accept storage
163 | deals, which in turn emerges from the ability to seal and prove sectors.
164 | 3. The ability to index deals emerges from the ability to accept storage deals,
165 | which in turn emerges from the ability to seal and prove sectors.
166 |
167 | This work cannot be taken lightly, and cannot be performed without input and
168 | buy-in from the miner community. Thus, we need to follow a structured and well
169 | documented process. This pitch proposes to:
170 |
171 | 1. Analyse and document all the current lotus-miner concerns and processes.
172 | 2. Perform exploratory PoCs to identify and define clean boundaries between
173 | processes. [Here's one.](https://github.com/filecoin-project/lotus/pull/5983/files)
174 | 3. Decide on the right process and service granularity (`lotus-miner` /
175 | `lotus-markets` seems like a good starting point, but we'll want to break
176 | things further going forward, so our implementation should support that).
177 | 4. Determine the right process hierarchy (is lotus-miner the right hub?),
178 | decide on process lifecycle/runtime management (what if certain processes
179 | die? do we need to start processes in a specific order? etc.)
180 | 5. Capture miner input throughout the design process.
181 | 6. Implement the new runtime segregation.
182 | 7. Document the new runtime.
183 |
184 | #### What does done look like?
185 |
186 | _What specific deliverables should completed to consider this project done?_
187 |
188 | - Design discussions with the miner community.
189 | - Several exploratory PoCs.
190 | - Merged Lotus PR.
191 | - Documentation.
192 | - Updated infrastructure playbooks.
193 |
194 | #### What does success look like?
195 |
196 | _Success means impact. How will we know we did the right thing?_
197 |
198 | TBD.
199 |
200 | #### Counterpoints & pre-mortem
201 |
202 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
203 |
204 | TBD.
205 |
206 | #### Alternatives
207 |
208 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
209 |
210 | N/A. There is room for different opinions on the final process
211 | layout/architecture, but those will be debated during the project's execution.
212 |
213 | #### Dependencies/prerequisites
214 |
215 | There are no upstream dependencies, but there are downstream dependents:
216 | indexing, Carbs/DAR migration, and free retrieval are some.
217 |
218 | #### Future opportunities
219 |
220 | A segregated miner architecture introduces the possibility of deploying new
221 | miner-side features that plug into well-defined hooks to enrich the
222 | functionality of the Filecoin network, making these features opt-in, and
223 | avoiding added operational risk for miners.
224 |
225 | ## Required resources
226 |
227 | #### Effort estimate
228 |
229 | Medium, 3-5 weeks.
230 |
231 | #### Roles / skills needed
232 |
233 | 3 engineers, 1 TPM.
--------------------------------------------------------------------------------
/proposals/migrate-over-ipfs.md:
--------------------------------------------------------------------------------
1 | # Download migrations over IPFS
2 |
3 | Authors: Stebalien
4 |
5 | Initial PR: TBD
6 |
7 | ## Purpose & impact
8 | #### Background & intent
9 | _Describe the desired state of the world after this project? Why does that matter?_
10 |
11 | Currently, when the on-disk layout of the datastore changes, go-ipfs will download a "migration"
12 | tool over HTTPS as-needed to "upgrade" the datastore. Unfortunately, this happens over HTTPS so:
13 |
14 | 1. We're not dogfooding our own tech.
15 | 2. It doesn't work in places where https://dist.ipfs.io is blocked (e.g., China).
16 |
17 | At the moment, we're unable to ask Brave to update go-ipfs to the latest version because the latest
18 | version will need to download one of these migrations and many of Brave's users are in China.
19 |
20 | #### Assumptions & hypotheses
21 | _What must be true for this project to matter?_
22 |
23 | - IPFS in Brave needs to matter.
24 | - A significant number of Brave users need to be in China.
25 | - Fetching migrations over IPFS needs to be reliable.
26 |
27 | #### User workflow example
28 | _How would a developer or user use this new capability?_
29 |
30 | - When starting the go-ipfs daemon with the `--migrate` flag, the migrations would be fetched over IPFS instead of HTTPs.
31 | - When starting the go-ipfs daemon in a bundled application like IPFS Desktop or Brave, the user shouldn't notice anything except that we make no connections to https://ipfs.io when starting go-ipfs the first time after upgrading.
32 |
33 | #### Impact
34 | _How would this directly contribute to web3 dev stack product-market fit?_
35 |
36 | - This would unblock updating go-ipfs in Brave, allowing us to ship bug fixes and new features to Brave users.
37 | - This would prove out IPFS as a way to ship code/updates to users.
38 |
39 | #### Leverage
40 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
41 |
42 | This is ones step towards our goal of updating go-ipfs itself over IPFS. In terms of knowledge, not much.
43 |
44 | #### Confidence
45 | _How sure are we that this is the right problem to tackle? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
46 |
47 | 3?
48 |
49 | - It's a frequent source of upgrade problems: https://github.com/ipfs/ipfs-desktop/issues?q=is%3Aissue+fs-repo-migrations
50 | - It's preventing us from shipping new go-ipfs releases (with important fixes) to brave users.
51 | - It hurts adoption in China.
52 |
53 | ## Project definition
54 |
55 | #### Brief plan of attack
56 |
57 | **Design sketch:**
58 |
59 | Given a config section like:
60 |
61 | ```js
62 | {
63 | "Migration": {
64 | // Sources in order of preference where "HTTPS" means our gateways and "IPFS" means over IPFS.
65 | // (an empty list means "do the default thing")
66 | "DownloadPolicy": ["HTTPS", "custom-gateway.io", "IPFS"],
67 | // Whether or not to keep the migration after downloading it.
68 | "Keep": "pin"|"cache"|"discard"
69 | }
70 | }
71 | ```
72 |
73 | For the initial version, the default download policy should be `["HTTPS", "IPFS"]` unless the local
74 | node uses private networks (has a swarm key) in which case it should just be `["HTTPS"]` for safety.
75 | As we gain confidence in our ability to reliably download migrations over IPFS, we should make it
76 | the default.
77 |
78 | When migrating a repo, until we succeed.
79 |
80 | 1. If the next source is HTTPS, download from `ipfs.io`.
81 | 2. If the source is IPFS:
82 | 1. Create a new _temporary_ repo (go-ipfs can't read the current repo because it's an older version).
83 | 2. Start a new _temporary_ go-ipfs node in the temporary repo.
84 | a. This node should not listen for inbound connections as it has no way to know which ports/transports should be configured (can't read the config).
85 | b. This node should not expose an API/gateway.
86 | 3. Download the required migration binaries using the temporary go-ipfs node.
87 | 4. Migrate the main go-ipfs node's repo.
88 | 5. Start the main go-ipfs node.
89 | 6. Maybe store a copy of the migration:
90 | 1. If `Keep` is not "discard", import the migration binary into the main go-ipfs node's repo.
91 | 2. If `Keep` is "pin", pin the migration binary.
92 | 7. If running, shutdown any temporary nodes and delete the temporary repo.
93 |
94 | **Implementation steps:**
95 |
96 | 1. Implement everything but step 6.
97 | 2. Implement step 6. Unless nodes keep around a copy of the migration, this feature isn't going to be all that useful.
98 | 3. Eventually, switch over to using IPFS by default instead of the gateway as the first option.
99 |
100 | NOTE: the only reason we're proposing using the gateway first is that the migration blocks startup so it needs to be fast and reliable.
101 |
102 | #### What does done look like?
103 | _What specific deliverables should completed to consider this project done?_
104 |
105 | 1. go-ipfs can download migrations over IPFS (bitswap) without having to contact a centralized server over HTTP.
106 | 2. This feature has been tested by multiple labbers (possibly even tested by a subset of ipfs-desktop users).
107 |
108 | #### What does success look like?
109 | _Success means impact. How will we know we did the right thing?_
110 |
111 | Users can start a new version of go-ipfs that switches to a new repo version without downloading anything from a centralized (easy to block) server.
112 |
113 | #### Counterpoints & pre-mortem
114 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
115 |
116 | 1. We may not be able to download/find the migration on IPFS. Trying the gateway first (or racing) should help mitigate this.
117 | 2. The "temporary" node may be missing important parts of the configuration. For example, it may need alternative bootstrap nodes as our nodes may not be reachable. We'll need to think about this carefully and may need to "reach" into the old repo's config file a bit.
118 |
119 | #### Alternatives
120 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
121 |
122 | ##### Domain Fronting
123 |
124 | A very simple alternative is to simply use domain fronting. If we registered, e.g., ipfs-dist.com and made it an alias for dist.ipfs.io, it _might_ work in China.
125 |
126 | However, we'd still be using HTTPS instead of IPFS to download migrations, which seems kind of silly.
127 |
128 | ##### Bundle the migrations
129 |
130 | We could bundle all migrations with go-ipfs. But that would add 100s of megabytes to the go-ipfs distribution so we really don't want to do this.
131 |
132 | #### Dependencies/prerequisites
133 |
134 | We need to land https://github.com/ipfs/fs-repo-migrations/issues/98 first (in progress).
135 |
136 | #### Future opportunities
137 |
138 |
139 | This is a step towards self-bootstrapped, decentralized IPFS. The remaining parts are:
140 |
141 | 1. Updating go-ipfs itself over IPFS.
142 | 2. Decentralized bootstrapping (reducing reliance on our central bootstrap nodes).
143 |
144 | ## Required resources
145 |
146 | #### Effort estimate
147 |
155 |
156 | In terms of implementation, Small. But the testing/validation could take a variable amount of time.
157 |
158 | #### Roles / skills needed
159 |
160 | * Familiarity with go-ipfs, and the go-ipfs repo.
161 | * Ideally, familiarity with go-ipfs repo migrations.
162 |
163 | The datasystems team would be the best fit.
164 |
--------------------------------------------------------------------------------
/proposals/nft.storage-chunked-car-uploads.md:
--------------------------------------------------------------------------------
1 | # nft.storage chunked CAR uploads
2 |
3 | Authors: [@alanshaw](https://github.com/alanshaw) [@vasco-santos](https://github.com/vasco-santos) [@olizilla](https://github.com/olizilla)
4 |
5 | ## What is the problem this project solves?
6 | _Describe the status quo, including any relevant context on the problem you're seeing that this project should solve. Who is the user you're solving for, and why do they care about this problem? Wherever possible, include pain points or problems that you've seen users experience to help motivate why solving this problem works towards top-line objectives._
7 |
8 | NFT.storage has a hard limit of 100MB for uploads - a limit imposed by Cloudflare Workers (the server side technology that runs the site).
9 |
10 | This project proposes a way to work around this limit by sending _multiple_ requests of <100MB.
11 |
12 | ## Impact
13 | _What goals/OKRs are being addressed (for w3dt, a specific program, etc.)? Why is this project important? What do we get with this project that we can't get without it?_
14 |
15 | Allows NFT assets over 100MB to be reliably uploaded to nft.strorage.
16 |
17 | This has been requested by multiple marketplaces and is necessary for NFTs that contain video content.
18 |
19 | ## The idea
20 | _Describe the proposed project solution, at a very high level. Stay at the level of the high-level requirements. Diagrams and interface descriptions can be useful, if you have any that help clarify and explain the idea._
21 |
22 | 1. Generate a CAR file for the content.
23 |
24 | This has the added benefit of allowing the creator to know the CID up front, allowing them to mint an NFT on a blockchain and upload the content to the nft.storage in parallel.
25 |
26 | Note: the CAR file must be generated by the NFT creator using a tool such as [ipfs-car](https://github.com/vasco-santos/ipfs-car).
27 |
28 | 2. Split the CAR file into chunks of <100MB.
29 |
30 | Walk the DAG to pack sub-graphs into each CAR file that is output. Every CAR has the same root CID, but contains a different portion of the DAG.
31 |
32 | There is a JS module that will perform the chunking: https://github.com/alanshaw/carbites
33 |
34 | 3. Upload the chunks.
35 |
36 | As nft.storage receives the CAR chunks it adds them to IPFS Cluster, pinning the root.
37 |
38 | ## Success/acceptance criteria (optional)
39 | _How do we know we're done with this project? How do we know we're successful? This field is OPTIONAL for the first draft of an MPP. Sometimes this field needs to be filled out once we have more detail on the shape of the actual solution._
40 |
41 | * Assets larger than 100MB can be uploaded to nft.storage.
42 | * A user uploads a 1GB asset to nft.storage.
43 |
44 | ## Detailed plans (optional)
45 | _Link to more detailed project plans, e.g. product requirements documents (PRDs) and technical design docs, once they have been created for this project._
46 |
47 | Contrived example:
48 |
49 | 
50 |
51 | DAG in CAR 0:
52 |
53 | 
54 |
55 | DAG in CAR 1:
56 |
57 | 
58 |
59 | DAG in CAR 2:
60 |
61 | 
62 |
63 | ## Program (optional)
64 | _If this project is part of a program, please indicate the relevant program here._
65 |
66 | /Nitro/NFT
67 |
--------------------------------------------------------------------------------
/proposals/sketches/dynamic-retrieval-pricing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/protocol/web3-dev-team/c9297fe220b846bcae9e80642186f70cdd87c9fd/proposals/sketches/dynamic-retrieval-pricing.png
--------------------------------------------------------------------------------
/proposals/sketches/lotus-miner-runtime-segregation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/protocol/web3-dev-team/c9297fe220b846bcae9e80642186f70cdd87c9fd/proposals/sketches/lotus-miner-runtime-segregation.png
--------------------------------------------------------------------------------
/proposals/storage-and-retrieval-bots.md:
--------------------------------------------------------------------------------
1 | # Storage and Retrieval Dealbots
2 |
3 | Authors: @mgoelzer
4 |
5 | Initial PR: #84
6 |
7 |
11 |
19 |
22 |
23 | ## Purpose & impact
24 | #### Background & intent
25 | In some cases, storage and retrieval deals on Filecoin mainnet fail. We do not currently have a good handle on how often this happens, what the causes are, whether it is specific to certain miners, whether miners refuse deals intentional or because of software bugs, etc.
26 |
27 | The Dealbots proposed here aims to address these problems by randomly selecting miners and making deals with them. For instance, the pair of bots can make a storage deal and later attempt to retrieve that same data to understand end-to-end reliability on mainnet.
28 |
29 | The Retrieval Bot (r-bot) can also consume a list of {CID,miner} tuples and attempt retrieval on each one.
30 |
31 | In all cases, we log the success or failure of each storage or retrieval attempt, along with diagnostic information such as where in the sequence the failure occurred, what the error message was and what the Lotus log tail contained.
32 |
33 | #### Assumptions & hypotheses
34 | _What must be true for this project to matter?_
35 |
36 | - Some storage and retrieval deals fail on mainnet
37 | - This is happening for multiple reasons: code bugs that prevent storage or retrieval from running to successful completion, miners intentionally refusing certain types of deals (certain sizes, or an asymmetry between servicing storage vs retrieval deals).
38 | - Understanding the different types of failure and their frequencies will help us find bugs in Lotus.
39 | - Understanding the same will help us understand if miner economic incentives are suboptimal.
40 | - Providing a tool that can aggregate data across many miners will provide a foundatioin for third parties to run miner reputation systems
41 |
42 | #### User workflow example
43 |
44 | ```
45 | $ ./dealbot --input path/to/deals/to/try.json
46 | {
47 | "status":"failure",
48 | "failedAt":"ClientEventProviderCanceled", // failure event
49 | "eventList":
50 | [
51 | "Recv: 0 B, Paid 0 FIL, ClientEventOpen (DealStatusNew)",
52 | "Recv: 0 B, Paid 0 FIL, ClientEventDealProposed (DealStatusWaitForAcceptance)".
53 | "Recv: 0 B, Paid 0 FIL, ClientEventDealAccepted (DealStatusAccepted)".
54 | "Recv: 0 B, Paid 0 FIL, ClientEventPaymentChannelAddingFunds (DealStatusPaymentChannelAllocatingLane)".
55 | "Recv: 0 B, Paid 0 FIL, ClientEventLaneAllocated (DealStatusOngoing)".
56 | "Recv: 0 B, Paid 0 FIL, ClientEventProviderCancelled (DealStatusCancelling)".
57 | "Recv: 0 B, Paid 0 FIL, ClientEventDataTransferError (DealStatusErrored)".
58 | "Recv: 0 B, Paid 0 FIL, ClientEventOpen (DealStatusNew)".
59 | ],
60 | "errorMessage":"ERROR: retrieval failed: Retrieve: Retrieval Error: error generated by data transfer: unable to send cancel to channel FSM: normal shutdown of state machine",
61 | "tailLog":"....", // Multiline, from `tail` of `lotus daemon`
62 | "storageDealParameters": // Given to RetrievalBot as input
63 | {
64 | "CID":"Qm...",
65 | "sha256":"73cb385...", // independent checksum of data file
66 | "sizeInBytes":"12345678".
67 | "minderId":"f01924",
68 | "verified":true,
69 | "fastRetrievalFlag":true,
70 | "dealId":"...",
71 | }
72 | "lotusVersion":"1.5.3-rc2+mainnet+git.9afb5ff94",
73 | // Call API `Filecoin.Version`
74 | "datetime":"YYYY-MM-DD_HH:MM:SS", // when attempt started
75 | },
76 | {
77 | // ...next deal attempt json blob...
78 | }
79 | ```
80 |
81 | Stdout will contain the results, in json, of each deal attempt. It is intended to be piped into a log search service like those provided by AWS/GC.
82 |
83 |
84 | #### Impact
85 | _How would this directly contribute to web3 dev stack product-market fit?_
86 |
87 | - Improve reliability of the network
88 | - Enable an ecosystem of miner reputation and ranking systems
89 | - Perform the retrieval verification in Slingshot 2.3
90 |
91 | #### Leverage
92 | _How much would nailing this project improve our knowledge and ability to execute future projects?_
93 |
94 | **Immensely!**
95 |
96 | - We don't currently have enough information about why deals fail to allocate our debugging time and resources correctly.
97 |
98 | - Miner reputation systems enabled by this tool would compliment the protocol-level incentives for miners to "do the right thing" (provide reliable retrieval of previously stored data, successfully complete all storage deals, etc)
99 |
100 | #### Confidence
101 | _How sure are we that this impact would be realized? Label from [this scale](https://medium.com/@nimay/inside-product-introduction-to-feature-priority-using-ice-impact-confidence-ease-and-gist-5180434e5b15)_.
102 |
103 | C = 8
104 |
105 | Nothing is certain, but it is very likely that building this tool will at a minimum enable the Filecoin Project to better understand the frequency and causes of deal failures.
106 |
107 | And the ability of this tool to support miner reputation systems can only help increase deals that get routed to reliable miners.
108 |
109 |
110 | ## Project definition
111 | #### Brief plan of attack
112 |
113 |
114 | - **Phase 1: Retrieval Bot.** Reads stdin describing a CID to attempt to retrieve, writes outcome of retrieval attempt to stdout.
115 | - **Phase 2: Storage Bot.** Same idea but for storage deals.
116 |
117 | In a subsequent project (out of scope for this PR; see [#87](https://github.com/protocol/web3-dev-team/pull/87)), we will create a set of bot orchestrators that invoke the r-bot and s-bot programs with inputs suitable for different use cases. For example, a queue of CIDs to test retrieve to verify the Slingshot 2.3 competition.
118 |
119 | #### What does done look like?
120 | _What specific deliverables should completed to consider this project done?_
121 |
122 | 
123 |
124 | #### What does success look like?
125 | _Success means impact. How will we know we did the right thing?_
126 |
127 | - We have a metrics dashboard (in Observable, Grafana, etc) that continuously shows the most recent deal failures, how frequently they are happening, which miners fail most/least, and similar metrics. The impact of this should be obvious: a clearer understanding of why and how often deals are failing on mainnet.
128 | - Reputation systems emerge from ecosystem partners that use the data generated by running these bots to rank miners. This would give Filecoin users reliable, real-time miner ranking, which does not currently exist in the ecosystem.
129 |
130 | #### Counterpoints & pre-mortem
131 | _Why might this project be lower impact than expected? How could this project fail to complete, or fail to be successful?_
132 |
133 | - The metrics fail to give us actionable debugging ideas
134 | - Reputation systems develop their own code to capture the same miner statistics (duplication of effort)
135 |
136 | #### Alternatives
137 | _How might this project’s intent be realized in other ways (other than this project proposal)? What other potential solutions can address the same need?_
138 |
139 | - [@whyrusleeping](https://github.com/whyrusleeping/)'s [Estuary](https://github.com/whyrusleeping/estuary) tool
140 |
141 | #### Dependencies/prerequisites
142 |
143 |
144 | - [filecoin-project/lotus/pull/5833/
145 | ](https://github.com/filecoin-project/lotus/pull/5833/)
146 |
147 | #### Future opportunities
148 |
149 |
150 | - Miner reputation systems as discussed
151 | - IPFS<>Filecoin CID indexing
152 |
153 | ## Required resources
154 |
155 | #### Effort estimate
156 |
164 |
165 | TBD with team
166 |
167 | #### Roles / skills needed
168 |
169 |
170 | TBD with team
171 |
--------------------------------------------------------------------------------