├── .github
├── ISSUE_TEMPLATE
│ └── example_request.yaml
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── assets
├── explore_high_dimensional_data.gif
└── youtube_summary.png
├── explore_high_dimensional_data
├── README.md
└── explore_high_dimensional_data.py
├── nlp_span_comparison
├── README.md
└── nlp_span_comparison.py
├── pyproject.toml
├── tests
└── test_examples_without_error.py
└── youtube_summary
├── README.md
└── youtube_summary.py
/.github/ISSUE_TEMPLATE/example_request.yaml:
--------------------------------------------------------------------------------
1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/refs/heads/master/src/schemas/json/github-issue-forms.json
2 | name: '🚀 Example request'
3 | description: Request a new example
4 | type: Example
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | Let us know what example you'd like to see added!
10 | - type: textarea
11 | id: example-description
12 | attributes:
13 | label: Description
14 | description: 'Description of the example to add. Please make the reason and usecases as detailed as possible. If you intend to submit a PR for this issue, tell us in the description. Thanks!'
15 | placeholder: I would like an example notebook on ...
16 | validations:
17 | required: true
18 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## 📝 Summary
2 |
3 |
8 |
9 | ## 🔍 Description of example
10 |
11 |
12 |
13 |
14 | ## 📋 Checklist
15 |
16 | - [ ] I have included package dependencies in the notebook file [using `--sandbox`](https://docs.marimo.io/guides/package_reproducibility/)
17 | - [ ] I have included a short README.md describing my example and how to run it.
18 | - [ ] I have included instructions on how to adapt the notebook to custom data and models
19 | - [ ] I have included an ["Open in marimo" badge](https://docs.marimo.io/guides/publishing/playground/#open-notebooks-hosted-on-github) in the README if my example works in [WASM](https://docs.marimo.io/guides/wasm/).
20 | - [ ] If my notebook accesses local files, I used [`mo.notebook_directory()`](https://docs.marimo.io/api/miscellaneous/?h=notebook_directory#marimo.notebook_directory) to construct notebook paths
21 |
--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request: {}
7 |
8 | jobs:
9 | test:
10 | name: Test notebooks
11 | runs-on: ubuntu-latest
12 | steps:
13 | - name: 🛑 Cancel Previous Runs
14 | uses: styfle/cancel-workflow-action@0.12.1
15 | - name: Checkout the repository
16 | uses: actions/checkout@main
17 | - name: 🐍 Setup uv
18 | uses: astral-sh/setup-uv@v5
19 | - name: Run tests
20 | run: |
21 | uvx --with marimo pytest -v
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | fail_fast: false
2 |
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v5.0.0
6 | hooks:
7 | - id: check-yaml
8 |
9 | - repo: https://github.com/rhysd/actionlint
10 | rev: v1.7.7
11 | hooks:
12 | - id: actionlint
13 | args: [-ignore, SC]
14 |
15 | - repo: https://github.com/igorshubovych/markdownlint-cli
16 | rev: v0.44.0
17 | hooks:
18 | - id: markdownlint-fix
19 | args: [-c, configs/.markdownlint.yaml, --fix, --disable, MD028]
20 |
21 | - repo: https://github.com/crate-ci/typos
22 | rev: typos-dict-v0.12.4
23 | hooks:
24 | - id: typos
25 |
26 | - repo: https://github.com/astral-sh/ruff-pre-commit
27 | rev: v0.9.6
28 | hooks:
29 | # Run the formatter
30 | - id: ruff-format
31 |
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 marimo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | A curated collection of example marimo notebooks.
7 |
8 |
9 | This repository contains a curated collection of example
10 | [marimo](https://github.com/marimo-team/marimo) notebooks. Use these notebooks
11 | as starting points for your own data exploration, experimentation, workflows,
12 | and apps.
13 |
14 | ## Running examples
15 |
16 | Each example has a README that tells you how to run it locally. Most examples
17 | can also be opened in marimo's [online
18 | playground](https://docs.marimo.io/guides/publishing/playground/) by clicking
19 | the "open in marimo" badge in its README.
20 |
21 | ## Adding new examples
22 |
23 | We welcome community contributions of examples!
24 |
25 | Open a pull request to contribute a new example. Ideally, examples are easy for
26 | others to adapt to their own data and models. Here's a contribution checklist:
27 |
28 | - [ ] Place the example in its own folder
29 | - [ ] Include package dependencies in notebook files [using
30 | `--sandbox`](https://docs.marimo.io/guides/package_reproducibility/)
31 | - [ ] Include a short README.md describing your example and how to run it.
32 | - [ ] Include instructions on how to adapt the notebook to custom data and models
33 |
34 | If you aren't comfortable adding a new example, you can also request new
35 | examples by filing an issue.
36 |
37 | ## Community
38 |
39 | We're building a community. Come hang out with us!
40 |
41 | - 🌟 [Star us on GitHub](https://github.com/marimo-team/examples)
42 | - 💬 [Chat with us on Discord](https://marimo.io/discord?ref=readme)
43 | - 📧 [Subscribe to our Newsletter](https://marimo.io/newsletter)
44 | - ☁️ [Join our Cloud Waitlist](https://marimo.io/cloud)
45 | - ✏️ [Start a GitHub Discussion](https://github.com/marimo-team/marimo/discussions)
46 | - 🦋 [Follow us on Bluesky](https://bsky.app/profile/marimo.io)
47 | - 🐦 [Follow us on Twitter](https://twitter.com/marimo_io)
48 | - 🕴️ [Follow us on LinkedIn](https://www.linkedin.com/company/marimo-io)
49 |
50 |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/assets/explore_high_dimensional_data.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marimo-team/examples/dc02cc00df77d06fb1d5581c47759be8b6ace50b/assets/explore_high_dimensional_data.gif
--------------------------------------------------------------------------------
/assets/youtube_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marimo-team/examples/dc02cc00df77d06fb1d5581c47759be8b6ace50b/assets/youtube_summary.png
--------------------------------------------------------------------------------
/explore_high_dimensional_data/README.md:
--------------------------------------------------------------------------------
1 | # Explore high dimensional data
2 |
3 | [](https://marimo.app/github.com/marimo-team/examples/blob/main/explore_high_dimensional_data/explore_high_dimensional_data.py)
4 |
5 | **This template lets you visualize and interactively explore high dimensional
6 | data.** The starter code uses PCA to embed and plot numerical digits, seeing
7 | how they cluster together — when you select points in the plot, the notebook
8 | shows you the underlying images!
9 |
10 | To use this notebook on your own data, just replace the implementations
11 | of the following four functions:
12 |
13 | * `load_data`
14 | * `embed_data`
15 | * `scatter_data`
16 | * `show_selection`
17 |
18 |
19 |
20 | ## Running this notebook
21 |
22 | Open this notebook in [our online
23 | playground](https://marimo.app/github.com/marimo-team/examples/blob/main/explore_high_dimensional_data/explore_high_dimensional_data.py)
24 | or run it locally.
25 |
26 | ### Running locally
27 |
28 | The requirements of each notebook are serialized in them as a top-level
29 | comment. Here are the steps to run the notebook:
30 |
31 | 1. [Install `uv`](https://github.com/astral-sh/uv/?tab=readme-ov-file#installation)
32 | 2. Open an example with `uvx marimo edit --sandbox `
33 |
34 | > [!TIP]
35 | > The [`--sandbox`
36 | > flag](https://docs.marimo.io/guides/package_reproducibility/) opens the
37 | > notebook in an isolated virtual environment, automatically installing the
38 | > notebook's dependencies 📦
39 |
40 | You can also open notebooks without `uv`, in which case you'll need to
41 | manually [install marimo](https://docs.marimo.io/getting_started/index.html#installation)
42 | first. Then run `marimo edit `; however, you'll also need to
43 | install the requirements yourself.
44 |
--------------------------------------------------------------------------------
/explore_high_dimensional_data/explore_high_dimensional_data.py:
--------------------------------------------------------------------------------
1 | # /// script
2 | # requires-python = ">=3.12"
3 | # dependencies = [
4 | # "altair==5.5.0",
5 | # "marimo",
6 | # "matplotlib==3.10.0",
7 | # "pandas==2.2.3",
8 | # "polars==1.20.0",
9 | # "scikit-learn==1.6.1",
10 | # ]
11 | # ///
12 |
13 | import marimo
14 |
15 | __generated_with = "0.10.16"
16 | app = marimo.App(width="columns")
17 |
18 |
19 | @app.cell(column=0, hide_code=True)
20 | def _(mo):
21 | mo.md(
22 | """
23 | **This template lets you visualize and interactively explore high dimensional data.** The starter code uses PCA to embed and plot numerical
24 | digits, seeing how they cluster together — when you select points in the plot, the notebook shows you the underlying images.
25 |
26 | The left-hand column implements the core logic; the right-hand column executes the dimensionality reduction and shows the outputs.
27 |
28 | **To customize this template to your own data, just implement the functions in
29 | this column.**
30 | """
31 | )
32 | return
33 |
34 |
35 | @app.cell
36 | def _():
37 | def load_data():
38 | """
39 | Return a tuple of your data:
40 |
41 | * The dataset, with each row a different item in the dataset
42 | * A label for each data point.
43 |
44 | If your data doesn't have labels, just return a list of all ones
45 | with the same length as the number of items in your dataset.
46 | """
47 | import sklearn.datasets
48 |
49 | data, labels = sklearn.datasets.load_digits(return_X_y=True)
50 | return data, labels
51 |
52 | return (load_data,)
53 |
54 |
55 | @app.cell
56 | def _():
57 | def embed_data(data):
58 | """
59 | Embed the data into two dimensions. The default implementation
60 | uses PCA, but you can also use UMAP, tSNE, or any other dimensionality
61 | reduction algorithm you like.
62 |
63 | The starter implementation here uses PCA, and assumes the data is a NumPy
64 | array.
65 | """
66 | import sklearn
67 | import sklearn.decomposition
68 |
69 | return sklearn.decomposition.PCA(n_components=2, whiten=True).fit_transform(
70 | data
71 | )
72 |
73 | return (embed_data,)
74 |
75 |
76 | @app.cell
77 | def _(pl):
78 | def scatter_data(df: pl.DataFrame) -> alt.Chart:
79 | """
80 | Visualize the embedded data using an Altair scatterplot.
81 |
82 | - df is a Polars dataframe with the following columns:
83 | * x: the first coordinate of the embedding
84 | * y: the second coordinate of the embedding
85 | * label: a label identifying each item, for coloring
86 |
87 | Modify the starter implementation to suit your needs, but make sure
88 | to return an altair chart.
89 | """
90 | import altair as alt
91 |
92 | return (
93 | alt.Chart(df)
94 | .mark_circle()
95 | .encode(
96 | x=alt.X("x:Q").scale(domain=(-2.5, 2.5)),
97 | y=alt.Y("y:Q").scale(domain=(-2.5, 2.5)),
98 | color=alt.Color("label:N"),
99 | )
100 | .properties(width=500, height=500)
101 | )
102 |
103 | return (scatter_data,)
104 |
105 |
106 | @app.cell
107 | def _():
108 | def show_selection(data, rows, max_rows=10):
109 | """
110 | Visualize selected rows of the data.
111 |
112 | - `data` is the data returned from `load_data`
113 | - `rows` is a list or array of row indices
114 | - `max_rows` is the maximum number of rows to display
115 | """
116 | import matplotlib.pyplot as plt
117 |
118 | # show 10 images: either the first 10 from the selection, or the first ten
119 | # selected in the table
120 | rows = rows[:max_rows]
121 | images = data.reshape((-1, 8, 8))[rows]
122 | fig, axes = plt.subplots(1, len(rows))
123 | fig.set_size_inches(12.5, 1.5)
124 | if len(rows) > 1:
125 | for im, ax in zip(images, axes.flat):
126 | ax.imshow(im, cmap="gray")
127 | ax.set_yticks([])
128 | ax.set_xticks([])
129 | else:
130 | axes.imshow(images[0], cmap="gray")
131 | axes.set_yticks([])
132 | axes.set_xticks([])
133 | plt.tight_layout()
134 | return fig
135 |
136 | return (show_selection,)
137 |
138 |
139 | @app.cell(column=1, hide_code=True)
140 | def _(mo):
141 | mo.md("""# Explore high dimensional data""")
142 | return
143 |
144 |
145 | @app.cell(hide_code=True)
146 | def _(mo):
147 | mo.md(
148 | """
149 | Here's an **embedding** of your data, with similar points close to each other.
150 |
151 | This notebook will automatically drill down into points you **select with
152 | your mouse**; try it!
153 | """
154 | )
155 | return
156 |
157 |
158 | @app.cell
159 | def _(load_data):
160 | data, labels = load_data()
161 | return data, labels
162 |
163 |
164 | @app.cell
165 | def _():
166 | import polars as pl
167 |
168 | return (pl,)
169 |
170 |
171 | @app.cell
172 | def _(data, embed_data, labels, pl):
173 | X_embedded = embed_data(data)
174 |
175 | embedding = pl.DataFrame(
176 | {
177 | "x": X_embedded[:, 0],
178 | "y": X_embedded[:, 1],
179 | "label": labels,
180 | "index": list(range(X_embedded.shape[0])),
181 | }
182 | )
183 | return X_embedded, embedding
184 |
185 |
186 | @app.cell
187 | def _(embedding, mo, scatter_data):
188 | chart = mo.ui.altair_chart(scatter_data(embedding))
189 | chart
190 | return (chart,)
191 |
192 |
193 | @app.cell
194 | def _(chart, mo):
195 | table = mo.ui.table(chart.value)
196 | return (table,)
197 |
198 |
199 | @app.cell
200 | def _(chart, data, mo, show_selection, table):
201 | mo.stop(not len(chart.value))
202 |
203 | selected_rows = (
204 | show_selection(data, list(chart.value["index"]))
205 | if table.value.is_empty()
206 | else show_selection(data, list(table.value["index"]))
207 | )
208 |
209 | mo.md(
210 | f"""
211 | **Here's a preview of the items you've selected**:
212 |
213 | {mo.as_html(selected_rows)}
214 |
215 | Here's all the data you've selected.
216 |
217 | {table}
218 | """
219 | )
220 | return (selected_rows,)
221 |
222 |
223 | @app.cell
224 | def _():
225 | import marimo as mo
226 |
227 | return (mo,)
228 |
229 |
230 | if __name__ == "__main__":
231 | app.run()
232 |
--------------------------------------------------------------------------------
/nlp_span_comparison/README.md:
--------------------------------------------------------------------------------
1 | # NLP Span Comparison
2 |
3 | [](https://marimo.app/github.com/marimo-team/examples/blob/main/nlp_span_comparison/nlp_span_comparison.py)
4 |
5 | This notebook can be used as a template for comparing NLP models that predict
6 | spans. Given two models and a sequence of text examples from which to extract
7 | spans, the notebook presents the model predictions on each example and
8 | lets you indicate which model yielded the better prediction. Your preferences
9 | are saved (and loaded) from storage, letting you use this as a real tool.
10 |
11 | To use this notebook for your own data, just replace the implementations
12 | of the following three functions:
13 |
14 | * `load_examples`: Load your own examples (strings) from a file or database.
15 | * `model_a_predictor`: Predict a span for a given example using model A.
16 | * `model_b_predictor`: Predict a span for a given example using model B.
17 |
18 | The notebook keeps track of your preferences in a JSON file. To track
19 | preferences in a different way, such as in a database, replace the implementations
20 | of the following two functions:
21 |
22 | * `load_choices`
23 | * `save_choices`
24 |
25 |
26 |
27 | ## Running this notebook
28 |
29 | Open this notebook in [our online
30 | playground](https://marimo.app/github.com/marimo-team/examples/blob/main/examples/nlp_span_comparison/nlp_span_comparison.py)
31 | or run it locally.
32 |
33 | ### Running locally
34 |
35 | The requirements of each notebook are serialized in them as a top-level
36 | comment. Here are the steps to run the notebook:
37 |
38 | 1. [Install `uv`](https://github.com/astral-sh/uv/?tab=readme-ov-file#installation)
39 | 2. Open an example with `uvx marimo edit --sandbox `
40 |
41 | > [!TIP]
42 | > The [`--sandbox`
43 | > flag](https://docs.marimo.io/guides/package_reproducibility/) opens the
44 | > notebook in an isolated virtual environment, automatically installing the
45 | > notebook's dependencies 📦
46 |
47 | You can also open notebooks without `uv`, in which case you'll need to
48 | manually [install marimo](https://docs.marimo.io/getting_started/index.html#installation)
49 | first. Then run `marimo edit `; however, you'll also need to
50 | install the requirements yourself.
51 |
--------------------------------------------------------------------------------
/nlp_span_comparison/nlp_span_comparison.py:
--------------------------------------------------------------------------------
1 | # /// script
2 | # requires-python = ">=3.9"
3 | # dependencies = [
4 | # "marimo",
5 | # ]
6 | # ///
7 |
8 | import marimo
9 |
10 | __generated_with = "0.10.13"
11 | app = marimo.App()
12 |
13 |
14 | @app.cell(hide_code=True)
15 | def _(mo):
16 | mo.md("""# Span Comparison""")
17 | return
18 |
19 |
20 | @app.cell
21 | def _(textwrap, urllib):
22 | # Modify this function to load your own examples
23 | def load_examples():
24 | hamlet_url = "https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt"
25 |
26 | with urllib.request.urlopen(hamlet_url) as f:
27 | HAMLET = f.read().decode("utf-8")
28 |
29 | return [
30 | textwrap.dedent(block).strip()[:1000]
31 | for block in HAMLET.split("\n\n")
32 | if block
33 | ]
34 |
35 | return (load_examples,)
36 |
37 |
38 | @app.cell
39 | def _(random):
40 | # Replace with your predictor for model A
41 | def model_a_predictor(text: str) -> tuple[int, int]:
42 | random.seed(len(text))
43 | start = random.randint(0, len(text) - 2)
44 | end = random.randint(start + 1, len(text) - 1)
45 | return start, end
46 |
47 | return (model_a_predictor,)
48 |
49 |
50 | @app.cell
51 | def _(random):
52 | # Replace with your predictor for model B
53 | def model_b_predictor(text: str) -> tuple[int, int]:
54 | random.seed(len(text) / 2)
55 | start = random.randint(0, len(text) - 2)
56 | end = random.randint(start + 1, len(text) - 1)
57 | return start, end
58 |
59 | return (model_b_predictor,)
60 |
61 |
62 | @app.cell(hide_code=True)
63 | def _(mo):
64 | mo.md(
65 | r"""
66 | !!! tip "This notebook is best viewed as an app."
67 | Hit `Cmd/Ctrl+.` or click the "app view" button in the bottom right.
68 | """
69 | )
70 | return
71 |
72 |
73 | @app.cell
74 | def _(load_examples):
75 | EXAMPLES = load_examples()
76 | return (EXAMPLES,)
77 |
78 |
79 | @app.cell
80 | def _(NUMBER_OF_EXAMPLES, mo):
81 | index = mo.ui.number(
82 | 0,
83 | NUMBER_OF_EXAMPLES - 1,
84 | value=0,
85 | step=1,
86 | debounce=True,
87 | label="example number",
88 | )
89 | return (index,)
90 |
91 |
92 | @app.cell(hide_code=True)
93 | def _(mo):
94 | mo.md("""_Models A and B both predict spans. Which do you prefer?_""")
95 | return
96 |
97 |
98 | @app.cell
99 | def _(NUMBER_OF_EXAMPLES, mo, num_a_preferred, num_b_preferred):
100 | mo.ui.table(
101 | [
102 | {"Model": "A", "Score": f"{num_a_preferred}/{NUMBER_OF_EXAMPLES}"},
103 | {"Model": "B", "Score": f"{num_b_preferred}/{NUMBER_OF_EXAMPLES}"},
104 | ],
105 | selection=None,
106 | )
107 | return
108 |
109 |
110 | @app.cell
111 | def _(get_choices, mo):
112 | mo.accordion({"All preferences": mo.ui.table(get_choices(), selection=None)})
113 | return
114 |
115 |
116 | @app.cell
117 | def _(index):
118 | index.center()
119 | return
120 |
121 |
122 | @app.cell
123 | def _(CHOICES_PATH, get_choices, index, mo, write_choices):
124 | def _():
125 | preference = get_choices()[index.value]["model"]
126 | mo.stop(preference is None, mo.md("**Choose the better model**.").center())
127 |
128 | write_choices(get_choices(), CHOICES_PATH)
129 | return mo.md(f"You prefer **model {preference}**.").center()
130 |
131 | _()
132 | return
133 |
134 |
135 | @app.cell
136 | def _(annotate, mo):
137 | mo.hstack(
138 | [
139 | annotate("Model A", [0, len("Model A")], "yellow"),
140 | annotate("Model B", [0, len("Model B")], "lightblue"),
141 | ],
142 | justify="space-around",
143 | )
144 | return
145 |
146 |
147 | @app.cell
148 | def _(CHOICES_PATH, EXAMPLES, load_choices, mo):
149 | get_choices, set_choices = mo.state(load_choices(CHOICES_PATH, len(EXAMPLES)))
150 | return get_choices, set_choices
151 |
152 |
153 | @app.cell
154 | def _(index, mo, set_choices):
155 | model_A = mo.ui.button(
156 | label="Model A",
157 | on_change=lambda _: set_choices(
158 | lambda v: v[: index.value]
159 | + [{"index": index.value, "model": "A"}]
160 | + v[index.value + 1 :]
161 | ),
162 | )
163 |
164 | model_B = mo.ui.button(
165 | label="Model B",
166 | on_change=lambda _: set_choices(
167 | lambda v: v[: index.value]
168 | + [{"index": index.value, "model": "B"}]
169 | + v[index.value + 1 :]
170 | ),
171 | )
172 | mo.hstack([model_A, model_B], justify="space-around")
173 | return model_A, model_B
174 |
175 |
176 | @app.cell
177 | def _(EXAMPLES, annotate, index, model_a_predictor, model_b_predictor):
178 | _example = EXAMPLES[index.value]
179 |
180 | model_A_prediction = annotate(_example, model_a_predictor(_example), color="yellow")
181 |
182 | model_B_prediction = annotate(
183 | _example, model_b_predictor(_example), color="lightblue"
184 | )
185 | return model_A_prediction, model_B_prediction
186 |
187 |
188 | @app.cell
189 | def _(mo, model_A_prediction, model_B_prediction):
190 | mo.hstack([model_A_prediction, model_B_prediction], gap=2, justify="space-around")
191 | return
192 |
193 |
194 | @app.cell
195 | def _(get_choices):
196 | num_a_preferred = sum(1 for c in get_choices() if c["model"] == "A")
197 | num_b_preferred = sum(1 for c in get_choices() if c["model"] == "B")
198 | return num_a_preferred, num_b_preferred
199 |
200 |
201 | @app.cell
202 | def _(mo):
203 | CHOICES_PATH = str(mo.notebook_dir() / "choices.json")
204 | return (CHOICES_PATH,)
205 |
206 |
207 | @app.cell
208 | def _(json, os):
209 | # This template gets and saves labels to a local JSON file, but you
210 | # can readily change this to point to a database or anything else.
211 | def load_choices(path, number_of_examples):
212 | if not os.path.exists(path):
213 | return [{"index": i, "model": None} for i in range(number_of_examples)]
214 |
215 | with open(path, "r") as f:
216 | choices = json.loads(f.read())
217 | assert len(choices) == number_of_examples
218 | return choices
219 |
220 | def write_choices(choices, path):
221 | # Trunacate notes
222 | with open(path, "w") as f:
223 | f.write(json.dumps(choices))
224 |
225 | return load_choices, write_choices
226 |
227 |
228 | @app.cell
229 | def _(mo):
230 | def annotate(text, span, color):
231 | mark_start = f""
232 | return mo.md(
233 | text[: span[0]]
234 | + mark_start
235 | + text[span[0] : span[1]]
236 | + ""
237 | + text[span[1] :]
238 | )
239 |
240 | return (annotate,)
241 |
242 |
243 | @app.cell
244 | def _(EXAMPLES):
245 | NUMBER_OF_EXAMPLES = len(EXAMPLES)
246 | return (NUMBER_OF_EXAMPLES,)
247 |
248 |
249 | @app.cell
250 | def _():
251 | import marimo as mo
252 |
253 | return (mo,)
254 |
255 |
256 | @app.cell
257 | def _():
258 | import json
259 | import os
260 | import random
261 | import textwrap
262 | import urllib
263 |
264 | return json, os, random, textwrap, urllib
265 |
266 |
267 | if __name__ == "__main__":
268 | app.run()
269 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pytest.ini_options]
2 | pythonpath = ["."]
3 |
--------------------------------------------------------------------------------
/tests/test_examples_without_error.py:
--------------------------------------------------------------------------------
1 | """Smoke tests that check notebooks don't have syntax errors."""
2 |
3 |
4 | def test_nlp_span_comparison() -> None:
5 | from nlp_span_comparison import nlp_span_comparison
6 |
7 | assert not nlp_span_comparison.app._unparsable
8 |
9 |
10 | def test_explore_high_dimensional_data() -> None:
11 | from explore_high_dimensional_data import explore_high_dimensional_data
12 |
13 | assert not explore_high_dimensional_data.app._unparsable
14 |
--------------------------------------------------------------------------------
/youtube_summary/README.md:
--------------------------------------------------------------------------------
1 | # Youtube Summary
2 |
3 | This notebook can be used as a template to summarise YouTube videos. It was originally
4 | used to fetch summaries for some [keyboard reviews](https://www.youtube.com/playlist?list=PLGj5nRqy15j93TD0iReqfLL9lU1lZFEs6) but the notebook itself can be adapted
5 | for many other use-cases too.
6 |
7 | 
8 |
9 | ## Running this notebook
10 |
11 | The only want to run this notebook is to run it locally. This demo uses Claude as
12 | an LLM backend which requires a `ANTHROPIC_API_KEY` set in a `.env` file. Finally,
13 | this notebook also assumes that `ffmpeg` is available on your system ([details](https://github.com/openai/whisper/blob/main/README.md#setup)).
14 |
15 | Once that's taken care of you can run this notebook in a sandbox. The requirements of each notebook are serialized in them as a top-level
16 | comment. Here are the steps to run the notebook:
17 |
18 | 1. [Install `uv`](https://github.com/astral-sh/uv/?tab=readme-ov-file#installation)
19 | 2. Open an example with `uvx marimo edit --sandbox `
20 |
21 | > [!TIP]
22 | > The [`--sandbox`
23 | > flag](https://docs.marimo.io/guides/package_reproducibility/) opens the
24 | > notebook in an isolated virtual environment, automatically installing the
25 | > notebook's dependencies 📦
26 |
27 | You can also open notebooks without `uv`, in which case you'll need to
28 | manually [install marimo](https://docs.marimo.io/getting_started/index.html#installation)
29 | first. Then run `marimo edit `; however, you'll also need to
30 | install the requirements yourself.
31 |
--------------------------------------------------------------------------------
/youtube_summary/youtube_summary.py:
--------------------------------------------------------------------------------
1 | # /// script
2 | # requires-python = ">=3.12"
3 | # dependencies = [
4 | # "anthropic==0.45.2",
5 | # "instructor==1.7.2",
6 | # "jinja2==3.1.5",
7 | # "marimo",
8 | # "matplotlib==3.10.0",
9 | # "mohtml==0.1.2",
10 | # "openai-whisper",
11 | # "opencv-python==4.11.0.86",
12 | # "pydantic==2.10.6",
13 | # "python-dotenv==1.0.1",
14 | # "wigglystuff==0.1.9",
15 | # "yt-dlp==2025.1.26",
16 | # ]
17 | # ///
18 |
19 | import marimo
20 |
21 | __generated_with = "0.10.19"
22 | app = marimo.App()
23 |
24 |
25 | @app.cell
26 | def _():
27 | import matplotlib.pylab as plt
28 | import cv2
29 | from yt_dlp import YoutubeDL
30 | from pathlib import Path
31 |
32 | def download_yt(yt_url: str):
33 | yt_id = yt_url[-11:]
34 | video_path = f"{yt_id}.m4a"
35 |
36 | ydl_opts = {
37 | "format": "m4a/bestaudio/best",
38 | "postprocessors": [
39 | {
40 | "key": "FFmpegExtractAudio",
41 | "preferredcodec": "m4a",
42 | }
43 | ],
44 | }
45 |
46 | if not Path(video_path).exists():
47 | URLS = [yt_url]
48 | with YoutubeDL(ydl_opts) as ydl:
49 | ydl.download(URLS)
50 | for vid in Path().glob("*.m4a"):
51 | if yt_id in str(vid):
52 | vid.rename(video_path)
53 | else:
54 | print("Video has been downloaded already")
55 |
56 | return Path, YoutubeDL, cv2, download_yt, plt
57 |
58 |
59 | @app.cell
60 | def _():
61 | import marimo as mo
62 |
63 | return (mo,)
64 |
65 |
66 | @app.cell
67 | def _(mo):
68 | text_input = mo.ui.text(label="YouTube URL")
69 |
70 | mo.md(f"""
71 | Fill in the YouTube URL or pass the video id here:
72 |
73 | {text_input}
74 |
75 | In our experience sofar it can help to make sure that you are downloading a video that is set to "public". Unlisted videos caused download errors in the past.
76 | """).batch(text_input=text_input).form()
77 | return (text_input,)
78 |
79 |
80 | @app.cell
81 | def _(download_yt, mo, text_input):
82 | with mo.status.spinner(subtitle="Downloading ...") as _spinner:
83 | if text_input.value:
84 | download_yt(text_input.value)
85 | return
86 |
87 |
88 | @app.cell
89 | def _(mo, text_input):
90 | import whisper
91 |
92 | with mo.status.spinner(subtitle="Running Whisper ...") as _spinner:
93 | model = whisper.load_model("base")
94 | result = model.transcribe(f"{text_input.value[-11:]}.m4a")
95 | return model, result, whisper
96 |
97 |
98 | @app.cell
99 | def _(YoutubeDL, text_input):
100 | with YoutubeDL() as ydl:
101 | info = ydl.extract_info(text_input.value, download=False)
102 | return info, ydl
103 |
104 |
105 | @app.cell
106 | def _():
107 | from typing import List
108 | import instructor
109 | from pydantic import BaseModel
110 |
111 | class YouTubeOutput(BaseModel):
112 | """
113 | Output of a YouTube video that reviews ergonomic keyboards.
114 |
115 | Make sure that you have a clear summary that highlights some of the findings. Refer to the reviewer as "me" and write as if it was written by the reviewer. But not in the present tense, it needs to be past tense. Avoid a formal style, write as if it was written on an informal tech-blog. Also make sure that you create a sequences of pros and cons of the keyboard. No more than 4 pros and 4 cons. Also add a oneliner tldr for the review, typically you can just copy what is in the title. The name of the keyboard should also include the brand if there is one.
116 | """
117 |
118 | summary: str
119 | pros: List[str]
120 | cons: List[str]
121 | tldr: str
122 | keyboard_name: str
123 |
124 | return BaseModel, List, YouTubeOutput, instructor
125 |
126 |
127 | @app.cell
128 | def _(instructor):
129 | from instructor import Instructor, Mode, patch
130 | from anthropic import Anthropic
131 | from dotenv import load_dotenv
132 | import os
133 |
134 | load_dotenv(".env")
135 |
136 | client = instructor.from_anthropic(
137 | Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]),
138 | )
139 | return Anthropic, Instructor, Mode, client, load_dotenv, os, patch
140 |
141 |
142 | @app.cell
143 | def _(mo):
144 | mo.md(
145 | "Once the downloading/parsing/generating is done, you can see the results below together with a 'copy to clipboard' button."
146 | )
147 | return
148 |
149 |
150 | @app.cell
151 | def _(
152 | CopyToClipboard,
153 | YouTubeOutput,
154 | client,
155 | info,
156 | mo,
157 | result,
158 | text_input,
159 | ):
160 | from mohtml import pre, p, code, div
161 | from jinja2 import Template
162 |
163 | template = Template("""
164 | ---
165 | hide:
166 | - toc
167 | - navigation
168 | title: {{keyboard_name}}
169 | ---
170 |
171 | ## {{tldr}}
172 |
173 |
174 |
175 |
176 |
177 | {{summary}}
178 |
179 | ## Pros
180 | {% for pro in pros %}
181 | - {{ pro }}
182 | {% endfor %}
183 |
184 | ## Cons
185 | {% for con in cons %}
186 | - {{ con }}
187 | {% endfor %}
188 |
189 | """)
190 |
191 | with mo.status.spinner(subtitle="Running LLM ...") as _spinner:
192 | response = client.chat.completions.create(
193 | model="claude-3-5-sonnet-20241022",
194 | messages=[
195 | {
196 | "role": "user",
197 | "content": f"Create a proper summary of the following keyboard review. This is the title: {info['title']}. This is the text for the full review: {result['text']}",
198 | }
199 | ],
200 | max_tokens=1500,
201 | response_model=YouTubeOutput,
202 | )
203 | rendered = template.render(
204 | summary=response.summary,
205 | pros=response.pros,
206 | cons=response.cons,
207 | title=info["title"],
208 | thumbnail=info["thumbnail"],
209 | keyboard_name=response.keyboard_name,
210 | tldr=response.tldr,
211 | video_idx=f"{text_input.value[-11:]}",
212 | )
213 | clipboard_btn = CopyToClipboard(rendered)
214 |
215 | rendered
216 | return (
217 | Template,
218 | clipboard_btn,
219 | code,
220 | div,
221 | p,
222 | pre,
223 | rendered,
224 | response,
225 | template,
226 | )
227 |
228 |
229 | @app.cell
230 | def _():
231 | from wigglystuff import CopyToClipboard
232 |
233 | return (CopyToClipboard,)
234 |
235 |
236 | @app.cell
237 | def _(clipboard_btn):
238 | clipboard_btn
239 | return
240 |
241 |
242 | if __name__ == "__main__":
243 | app.run()
244 |
--------------------------------------------------------------------------------