├── .github
    └── workflows
    │   └── cargo.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── Makefile.toml
├── README.md
├── analysis
    ├── accuracy
    │   ├── data
    │   │   ├── sine-microflow.csv
    │   │   ├── sine-test-set.csv
    │   │   └── sine-tflite.csv
    │   ├── plots
    │   │   ├── sine-accuracy-comparison.pdf
    │   │   ├── sine-accuracy.pdf
    │   │   └── sine-test-set.pdf
    │   └── sine.ipynb
    ├── memory
    │   ├── person_detect.ipynb
    │   ├── plots
    │   │   ├── person-detect-memory.pdf
    │   │   ├── sine-memory.pdf
    │   │   └── speech-memory.pdf
    │   ├── sine.ipynb
    │   └── speech.ipynb
    └── performance
    │   ├── data
    │       ├── person-detect-esp32-microflow.csv
    │       ├── person-detect-esp32-tflm.csv
    │       ├── person-detect-nrf52840-microflow.csv
    │       ├── person-detect-nrf52840-tflm.csv
    │       ├── sine-esp32-microflow.csv
    │       ├── sine-esp32-tflm.csv
    │       ├── sine-nrf52840-microflow.csv
    │       ├── sine-nrf52840-tflm.csv
    │       ├── speech-esp32-microflow.csv
    │       ├── speech-esp32-tflm.csv
    │       ├── speech-nrf52840-microflow.csv
    │       └── speech-nrf52840-tflm.csv
    │   ├── person_detect.ipynb
    │   ├── plots
    │       ├── person-detect-esp32-performance.pdf
    │       ├── person-detect-nrf52840-performance.pdf
    │       ├── person-detect-performance-comparison.pdf
    │       ├── sine-esp32-performance.pdf
    │       ├── sine-nRF52840-performance.pdf
    │       ├── sine-performance-comparison.pdf
    │       ├── speech-esp32-performance.pdf
    │       ├── speech-nrf52840-performance.pdf
    │       └── speech-performance-comparison.pdf
    │   ├── sine.ipynb
    │   └── speech.ipynb
├── assets
    ├── microflow-logo.png
    └── microflow-logo.pxd
├── benches
    ├── person_detect.rs
    ├── sine.rs
    └── speech.rs
├── examples
    ├── arduino-nano33ble
    │   ├── .cargo
    │   │   └── config.toml
    │   ├── Cargo.toml
    │   ├── Makefile.toml
    │   ├── examples
    │   │   ├── person_detect.rs
    │   │   ├── sine.rs
    │   │   └── speech.rs
    │   └── memory.x
    ├── arduino-uno
    │   ├── .cargo
    │   │   └── config.toml
    │   ├── Cargo.toml
    │   ├── Makefile.toml
    │   ├── examples
    │   │   └── sine.rs
    │   └── rust-toolchain.toml
    ├── atsamx7x
    │   ├── .cargo
    │   │   └── config.toml
    │   ├── Cargo.toml
    │   ├── Makefile.toml
    │   ├── examples
    │   │   ├── person_detect.rs
    │   │   ├── sine.rs
    │   │   └── speech.rs
    │   └── memory.x
    ├── esp32
    │   ├── .cargo
    │   │   └── config.toml
    │   ├── Cargo.toml
    │   ├── Makefile.toml
    │   ├── examples
    │   │   ├── person_detect.rs
    │   │   ├── sine.rs
    │   │   └── speech.rs
    │   └── rust-toolchain.toml
    ├── person_detect.rs
    ├── qemu
    │   ├── .cargo
    │   │   └── config.toml
    │   ├── Cargo.toml
    │   ├── Makefile.toml
    │   ├── examples
    │   │   ├── sine.rs
    │   │   └── speech.rs
    │   └── memory.x
    ├── sine.rs
    └── speech.rs
├── microflow-macros
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── flatbuffers
    │   ├── tflite.fbs
    │   └── tflite_generated.rs
    └── src
    │   ├── activation.rs
    │   ├── buffer.rs
    │   ├── lib.rs
    │   ├── ops
    │       ├── average_pool_2d.rs
    │       ├── conv_2d.rs
    │       ├── depthwise_conv_2d.rs
    │       ├── fully_connected.rs
    │       ├── mod.rs
    │       ├── reshape.rs
    │       └── softmax.rs
    │   ├── quantize.rs
    │   └── tensor.rs
├── models
    ├── person_detect.tflite
    ├── sine.tflite
    └── speech.tflite
├── samples
    ├── features
    │   ├── person_detect.rs
    │   └── speech.rs
    ├── no.wav
    ├── no_person.bmp
    ├── person.bmp
    └── yes.wav
├── src
    ├── activation.rs
    ├── buffer.rs
    ├── lib.rs
    ├── ops
    │   ├── average_pool_2d.rs
    │   ├── conv_2d.rs
    │   ├── depthwise_conv_2d.rs
    │   ├── fully_connected.rs
    │   ├── mod.rs
    │   ├── reshape.rs
    │   └── softmax.rs
    ├── quantize.rs
    └── tensor.rs
└── tests
    ├── person_detect.rs
    ├── sine.rs
    └── speech.rs


/.github/workflows/cargo.yml:
--------------------------------------------------------------------------------
 1 | name: Cargo
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | env:
10 |   CLICOLOR_FORCE: 1
11 |   CARGO_TERM_COLOR: always
12 | 
13 | jobs:
14 |   make:
15 | 
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v3
20 |     - name: Install Crates
21 |       run: cargo install cargo-make flip-link espup
22 |     - name: Install Nightly Toolchain
23 |       run: rustup toolchain install nightly --component rust-src
24 |     - name: Install ESP Toolchain
25 |       run: espup install
26 |     - name: Install AVR Toolchain
27 |       run: sudo apt install -y avr-libc gcc-avr pkg-config
28 |     - name: Add Rust Targets
29 |       run: rustup target add thumbv7em-none-eabihf thumbv7m-none-eabi
30 |     - name: Make All
31 |       run: |
32 |         . ~/export-esp.sh
33 |         cargo make all
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux,clion+all,visualstudiocode,rust,python,jupyternotebooks
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux,clion+all,visualstudiocode,rust,python,jupyternotebooks
  3 | 
  4 | ### CLion+all ###
  5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
  6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  7 | 
  8 | # User-specific stuff
  9 | .idea/**/workspace.xml
 10 | .idea/**/tasks.xml
 11 | .idea/**/usage.statistics.xml
 12 | .idea/**/dictionaries
 13 | .idea/**/shelf
 14 | 
 15 | # AWS User-specific
 16 | .idea/**/aws.xml
 17 | 
 18 | # Generated files
 19 | .idea/**/contentModel.xml
 20 | 
 21 | # Sensitive or high-churn files
 22 | .idea/**/dataSources/
 23 | .idea/**/dataSources.ids
 24 | .idea/**/dataSources.local.xml
 25 | .idea/**/sqlDataSources.xml
 26 | .idea/**/dynamic.xml
 27 | .idea/**/uiDesigner.xml
 28 | .idea/**/dbnavigator.xml
 29 | 
 30 | # Gradle
 31 | .idea/**/gradle.xml
 32 | .idea/**/libraries
 33 | 
 34 | # Gradle and Maven with auto-import
 35 | # When using Gradle or Maven with auto-import, you should exclude module files,
 36 | # since they will be recreated, and may cause churn.  Uncomment if using
 37 | # auto-import.
 38 | # .idea/artifacts
 39 | # .idea/compiler.xml
 40 | # .idea/jarRepositories.xml
 41 | # .idea/modules.xml
 42 | # .idea/*.iml
 43 | # .idea/modules
 44 | # *.iml
 45 | # *.ipr
 46 | 
 47 | # CMake
 48 | cmake-build-*/
 49 | 
 50 | # Mongo Explorer plugin
 51 | .idea/**/mongoSettings.xml
 52 | 
 53 | # File-based project format
 54 | *.iws
 55 | 
 56 | # IntelliJ
 57 | out/
 58 | 
 59 | # mpeltonen/sbt-idea plugin
 60 | .idea_modules/
 61 | 
 62 | # JIRA plugin
 63 | atlassian-ide-plugin.xml
 64 | 
 65 | # Cursive Clojure plugin
 66 | .idea/replstate.xml
 67 | 
 68 | # SonarLint plugin
 69 | .idea/sonarlint/
 70 | 
 71 | # Crashlytics plugin (for Android Studio and IntelliJ)
 72 | com_crashlytics_export_strings.xml
 73 | crashlytics.properties
 74 | crashlytics-build.properties
 75 | fabric.properties
 76 | 
 77 | # Editor-based Rest Client
 78 | .idea/httpRequests
 79 | 
 80 | # Android studio 3.1+ serialized cache file
 81 | .idea/caches/build_file_checksums.ser
 82 | 
 83 | ### CLion+all Patch ###
 84 | # Ignore everything but code style settings and run configurations
 85 | # that are supposed to be shared within teams.
 86 | 
 87 | .idea/*
 88 | 
 89 | !.idea/codeStyles
 90 | !.idea/runConfigurations
 91 | 
 92 | ### JupyterNotebooks ###
 93 | # gitignore template for Jupyter Notebooks
 94 | # website: http://jupyter.org/
 95 | 
 96 | .ipynb_checkpoints
 97 | */.ipynb_checkpoints/*
 98 | 
 99 | # IPython
100 | profile_default/
101 | ipython_config.py
102 | 
103 | # Remove previous ipynb_checkpoints
104 | #   git rm -r .ipynb_checkpoints/
105 | 
106 | ### Linux ###
107 | *~
108 | 
109 | # temporary files which can be created if a process still has a handle open of a deleted file
110 | .fuse_hidden*
111 | 
112 | # KDE directory preferences
113 | .directory
114 | 
115 | # Linux trash folder which might appear on any partition or disk
116 | .Trash-*
117 | 
118 | # .nfs files are created when an open file is removed but is still being accessed
119 | .nfs*
120 | 
121 | ### macOS ###
122 | # General
123 | .DS_Store
124 | .AppleDouble
125 | .LSOverride
126 | 
127 | # Icon must end with two \r
128 | Icon
129 | 
130 | 
131 | # Thumbnails
132 | ._*
133 | 
134 | # Files that might appear in the root of a volume
135 | .DocumentRevisions-V100
136 | .fseventsd
137 | .Spotlight-V100
138 | .TemporaryItems
139 | .Trashes
140 | .VolumeIcon.icns
141 | .com.apple.timemachine.donotpresent
142 | 
143 | # Directories potentially created on remote AFP share
144 | .AppleDB
145 | .AppleDesktop
146 | Network Trash Folder
147 | Temporary Items
148 | .apdisk
149 | 
150 | ### macOS Patch ###
151 | # iCloud generated files
152 | *.icloud
153 | 
154 | ### Python ###
155 | # Byte-compiled / optimized / DLL files
156 | __pycache__/
157 | *.py[cod]
158 | *$py.class
159 | 
160 | # C extensions
161 | *.so
162 | 
163 | # Distribution / packaging
164 | .Python
165 | build/
166 | develop-eggs/
167 | dist/
168 | downloads/
169 | eggs/
170 | .eggs/
171 | lib/
172 | lib64/
173 | parts/
174 | sdist/
175 | var/
176 | wheels/
177 | share/python-wheels/
178 | *.egg-info/
179 | .installed.cfg
180 | *.egg
181 | MANIFEST
182 | 
183 | # PyInstaller
184 | #  Usually these files are written by a python script from a template
185 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
186 | *.manifest
187 | *.spec
188 | 
189 | # Installer logs
190 | pip-log.txt
191 | pip-delete-this-directory.txt
192 | 
193 | # Unit test / coverage reports
194 | htmlcov/
195 | .tox/
196 | .nox/
197 | .coverage
198 | .coverage.*
199 | .cache
200 | nosetests.xml
201 | coverage.xml
202 | *.cover
203 | *.py,cover
204 | .hypothesis/
205 | .pytest_cache/
206 | cover/
207 | 
208 | # Translations
209 | *.mo
210 | *.pot
211 | 
212 | # Django stuff:
213 | *.log
214 | local_settings.py
215 | db.sqlite3
216 | db.sqlite3-journal
217 | 
218 | # Flask stuff:
219 | instance/
220 | .webassets-cache
221 | 
222 | # Scrapy stuff:
223 | .scrapy
224 | 
225 | # Sphinx documentation
226 | docs/_build/
227 | 
228 | # PyBuilder
229 | .pybuilder/
230 | target/
231 | 
232 | # Jupyter Notebook
233 | 
234 | # IPython
235 | 
236 | # pyenv
237 | #   For a library or package, you might want to ignore these files since the code is
238 | #   intended to run in multiple environments; otherwise, check them in:
239 | # .python-version
240 | 
241 | # pipenv
242 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
243 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
244 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
245 | #   install all needed dependencies.
246 | #Pipfile.lock
247 | 
248 | # poetry
249 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
250 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
251 | #   commonly ignored for libraries.
252 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
253 | #poetry.lock
254 | 
255 | # pdm
256 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
257 | #pdm.lock
258 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
259 | #   in version control.
260 | #   https://pdm.fming.dev/#use-with-ide
261 | .pdm.toml
262 | 
263 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
264 | __pypackages__/
265 | 
266 | # Celery stuff
267 | celerybeat-schedule
268 | celerybeat.pid
269 | 
270 | # SageMath parsed files
271 | *.sage.py
272 | 
273 | # Environments
274 | .env
275 | .venv
276 | env/
277 | venv/
278 | ENV/
279 | env.bak/
280 | venv.bak/
281 | 
282 | # Spyder project settings
283 | .spyderproject
284 | .spyproject
285 | 
286 | # Rope project settings
287 | .ropeproject
288 | 
289 | # mkdocs documentation
290 | /site
291 | 
292 | # mypy
293 | .mypy_cache/
294 | .dmypy.json
295 | dmypy.json
296 | 
297 | # Pyre type checker
298 | .pyre/
299 | 
300 | # pytype static type analyzer
301 | .pytype/
302 | 
303 | # Cython debug symbols
304 | cython_debug/
305 | 
306 | # PyCharm
307 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
308 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
309 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
310 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
311 | #.idea/
312 | 
313 | ### Python Patch ###
314 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
315 | poetry.toml
316 | 
317 | # ruff
318 | .ruff_cache/
319 | 
320 | ### Rust ###
321 | # Generated by Cargo
322 | # will have compiled files and executables
323 | debug/
324 | 
325 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
326 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
327 | Cargo.lock
328 | 
329 | # These are backup files generated by rustfmt
330 | **/*.rs.bk
331 | 
332 | # MSVC Windows builds of rustc generate these, which store debugging information
333 | *.pdb
334 | 
335 | ### VisualStudioCode ###
336 | .vscode/*
337 | !.vscode/settings.json
338 | !.vscode/tasks.json
339 | !.vscode/launch.json
340 | !.vscode/extensions.json
341 | !.vscode/*.code-snippets
342 | 
343 | # Local History for Visual Studio Code
344 | .history/
345 | 
346 | # Built Visual Studio Code Extensions
347 | *.vsix
348 | 
349 | ### VisualStudioCode Patch ###
350 | # Ignore all local history of files
351 | .history
352 | .ionide
353 | 
354 | ### Windows ###
355 | # Windows thumbnail cache files
356 | Thumbs.db
357 | Thumbs.db:encryptable
358 | ehthumbs.db
359 | ehthumbs_vista.db
360 | 
361 | # Dump file
362 | *.stackdump
363 | 
364 | # Folder config file
365 | [Dd]esktop.ini
366 | 
367 | # Recycle Bin used on file shares
368 | $RECYCLE.BIN/
369 | 
370 | # Windows Installer files
371 | *.cab
372 | *.msi
373 | *.msix
374 | *.msm
375 | *.msp
376 | 
377 | # Windows shortcuts
378 | *.lnk
379 | 
380 | # End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,clion+all,visualstudiocode,rust,python,jupyternotebooks
381 | 
382 | # JupyterLab files
383 | .jupyter
384 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [Unreleased]
 9 | 
10 | ## [0.1.3] - 2024-06-01
11 | 
12 | ### Fixed
13 | 
14 | - Update to new `esp-hal` version
15 | 
16 | ## [0.1.2] - 2024-02-09
17 | 
18 | ### Added
19 | 
20 | - MicroFlow is now available on [crates.io](https://crates.io/crates/microflow) 🎉
21 | 
22 | ### Fixed
23 | 
24 | - Fix Arduino Uno example
25 | - Update to new `esp-hal` version
26 | 
27 | ### Changed
28 | 
29 | - Minor documentation changes
30 | 
31 | ## [0.1.0] - 2023-07-02
32 | 
33 | Initial release.
34 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "microflow"
 3 | description = "A robust and efficient TinyML inference engine"
 4 | authors = ["Matteo Carnelos <matteo.carnelos98@gmail.com>"]
 5 | documentation = "https://docs.rs/microflow"
 6 | repository = "https://github.com/matteocarnelos/microflow-rs"
 7 | categories = ["embedded", "no-std", "science"]
 8 | keywords = ["tinyml"]
 9 | license = "MIT OR Apache-2.0"
10 | version = "0.1.3"
11 | edition = "2021"
12 | include = [
13 |     "/src/**",
14 |     "/benches/**",
15 |     "/tests/**",
16 |     "/models/**",
17 |     "/Cargo.toml",
18 |     "/LICENSE-MIT",
19 |     "/LICENSE-APACHE",
20 |     "/README.md",
21 | ]
22 | 
23 | [dependencies]
24 | microflow-macros = { version = "0.1", path = "microflow-macros" }
25 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] }
26 | simba = { version = "0.8", default-features = false }
27 | libm = "0.2"
28 | 
29 | [dev-dependencies]
30 | csv = "1.2"
31 | criterion = "0.5"
32 | 
33 | [[bench]]
34 | name = "sine"
35 | harness = false
36 | 
37 | [[bench]]
38 | name = "speech"
39 | harness = false
40 | 
41 | [[bench]]
42 | name = "person_detect"
43 | harness = false
44 | 
45 | [workspace]
46 | members = ["microflow-macros"]
47 | exclude = ["examples"]
48 | 
49 | [patch.crates-io]
50 | nalgebra = { git = "https://github.com/matteocarnelos/nalgebra" }
51 | 
52 | [profile.release]
53 | lto = true
54 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Matteo Carnelos
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [env]
 2 | CARGO_MAKE_EXTEND_WORKSPACE_MAKEFILE = true
 3 | 
 4 | [config]
 5 | default_to_workspace = false
 6 | 
 7 | [tasks.default]
 8 | alias = "microflow"
 9 | 
10 | [tasks.all]
11 | dependencies = [
12 |     "examples-arduino-nano33ble",
13 |     "examples-arduino-uno",
14 |     "examples-atsamx7x",
15 |     "examples-esp32",
16 |     "examples-qemu",
17 | ]
18 | run_task = { name = "microflow", fork = true }
19 | 
20 | [tasks.microflow]
21 | workspace = true
22 | dependencies = [
23 |     "build",
24 |     "examples-compile",
25 |     "test",
26 |     "clippy",
27 |     "check-format",
28 |     "docs",
29 | ]
30 | 
31 | [tasks.examples-arduino-nano33ble]
32 | cwd = "examples/arduino-nano33ble"
33 | command = "cargo"
34 | args = ["make"]
35 | 
36 | [tasks.examples-arduino-uno]
37 | cwd = "examples/arduino-uno"
38 | command = "cargo"
39 | args = ["make"]
40 | 
41 | [tasks.examples-atsamx7x]
42 | cwd = "examples/atsamx7x"
43 | command = "cargo"
44 | args = ["make"]
45 | 
46 | [tasks.examples-esp32]
47 | cwd = "examples/esp32"
48 | command = "cargo"
49 | args = ["+esp", "make"]
50 | install_crate = false
51 | 
52 | [tasks.examples-qemu]
53 | cwd = "examples/qemu"
54 | command = "cargo"
55 | args = ["make"]
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img src="assets/microflow-logo.png" width="180">
  3 | </p>
  4 | 
  5 | <h1 align="center">MicroFlow</h1>
  6 | <h3 align="center">A robust and efficient TinyML inference engine</h3>
  7 | <p align="center">
  8 |   <a href="https://crates.io/crates/microflow"><img src="https://img.shields.io/crates/v/microflow"></a>
  9 |   <a href="https://docs.rs/microflow"><img src="https://img.shields.io/docsrs/microflow"></a>
 10 |   <a href="https://github.com/matteocarnelos/microflow-rs/actions/workflows/cargo.yml"><img src="https://img.shields.io/github/actions/workflow/status/matteocarnelos/microflow-rs/cargo.yml?branch=main"></a>
 11 | </p>
 12 | 
 13 | <br>
 14 | 
 15 | MicroFlow is a robust and efficient TinyML inference engine designed for deploying machine learning models on embedded systems.
 16 | It was developed by Matteo Carnelos as part of his master's thesis project at the [University of Padova](https://www.unipd.it/en/) in collaboration with [Grepit AB](https://github.com/GrepitAB).
 17 | 
 18 | MicroFlow uses a compiler-based approach, resulting in the following engine structure:
 19 | 
 20 | ```mermaid
 21 | graph LR
 22 |   subgraph host[Host]
 23 |     model(Neural Network Model) --> compiler(MicroFlow Compiler)
 24 |   end
 25 |   subgraph target[Target]
 26 |     code(Generated Source Code) --- weights[(Weights)]
 27 |     code --- runtime(MicroFlow Runtime)
 28 |   end
 29 |   compiler --> code
 30 |   compiler --> weights
 31 | ```
 32 | 
 33 | MicroFlow consists of two primary components: the compiler, represented by the `microflow-macros` crate, and the runtime, represented by the `microflow` crate.
 34 | The compiler, which runs prior to the Rust compiler, is responsible for parsing and pre-processing the model.
 35 | It generates the necessary source code to enable inference on the model.
 36 | On the other hand, the runtime is a `[no_std]` component designed to run on the target MCU.
 37 | It encompasses the implementation of operators, activation functions, and quantization procedures.
 38 | 
 39 | ## Usage
 40 | 
 41 | MicroFlow utilizes Rust [Procedural Macros](https://doc.rust-lang.org/reference/procedural-macros.html) as its user interface.
 42 | By applying the `model` macro to a `struct` and providing the model's path, the MicroFlow compiler generates a `predict()` method.
 43 | This method can be called to perform inference on the given model.
 44 | Currently, MicroFlow only supports models in the TensorFlow Lite format (`.tflite`).
 45 | 
 46 | Here is a minimal example showcasing the usage of MicroFlow:
 47 | 
 48 | ```rust ignore
 49 | use microflow::model;
 50 | 
 51 | #[model("path/to/model.tflite")]
 52 | struct MyModel;
 53 | 
 54 | fn main() {
 55 |     let prediction = MyModel::predict(input_data);
 56 | }
 57 | ```
 58 | 
 59 | **[Documentation](https://docs.rs/microflow)**
 60 | 
 61 | ## Examples
 62 | 
 63 | The examples provided with MicroFlow can be found in the `examples` folder.
 64 | To run an example on a target board, `cd` into the board directory for the example (e.g. `examples/arduino-uno`) and run the command:
 65 | ```bash ignore
 66 | cargo run --example <example-name>
 67 | ```
 68 | Otherwise, to run the example locally, just run the above command in the root directory.
 69 | 
 70 | > [!NOTE]
 71 | > For board examples, you might need to install additional tools and configure the runner to make the example work for your setup.
 72 | 
 73 | ## Supported Operators
 74 | 
 75 | Currently, MicroFlow supports the following operators and activation functions:
 76 | 
 77 | | Operator          | Quantized | Tensor Type            |
 78 | |-------------------|-----------|------------------------|
 79 | | `FullyConnected`  | &check;   | `Tensor2D`             |
 80 | | `Conv2D`          | &check;   | `Tensor4D`             |
 81 | | `DepthwiseConv2D` | &check;   | `Tensor4D`             |
 82 | | `AveragePool2D`   | &check;   | `Tensor4D`             |
 83 | | `Reshape`         | &check;   | `Tensor2D`, `Tensor4D` |
 84 | 
 85 | | Activation Function | Quantized |
 86 | |---------------------|-----------|
 87 | | `ReLU`              | &check;   |
 88 | | `ReLU6`             | &check;   |
 89 | | `Softmax`           | &check;   |
 90 | 
 91 | These operators and activation functions cover common building blocks for neural networks and enable efficient inference with reduced memory and computational requirements.
 92 | However, MicroFlow's development roadmap includes plans for implementing additional operators and activation functions to expand the range of supported models.
 93 | 
 94 | ## Tested Models and MCUs
 95 | 
 96 | The `examples` folder contains the code used to test MicroFlow on different MCUs, including:
 97 | 
 98 | - ESP32 (32-bit Xtensa)
 99 | - ATSAMV71 (32-bit Cortex-M7F)
100 | - nRF52840 (32-bit Cortex-M4F)
101 | - LM3S6965 (32-bit Cortex-M3)
102 | - ATmega328 (8-bit AVR)
103 | 
104 | The models ued to test the inference engines can be found in the `models` directory.
105 | These models include:
106 | 
107 | - A sine predictor
108 | - A speech command recognizer (TinyConv)
109 | - A person detector (MobileNet v1)
110 | 
111 | ## Contributing
112 | 
113 | Contributors are welcome.
114 | For major changes, please open an issue first to discuss what you would like to change.
115 | Please make sure to update tests as appropriate.
116 | 
117 | ## Citation
118 | 
119 | The MicroFlow paper has been published in Elsevier's [Internet of Things](https://www.sciencedirect.com/science/article/pii/S2542660525000113) journal and can be cited as follows:
120 | 
121 | ```bibtex
122 | @article{CARNELOS2025101498,
123 |   title = {MicroFlow: An Efficient Rust-Based Inference Engine for TinyML},
124 |   journal = {Internet of Things},
125 |   volume = {30},
126 |   pages = {101498},
127 |   year = {2025},
128 |   issn = {2542-6605},
129 |   doi = {https://doi.org/10.1016/j.iot.2025.101498},
130 |   url = {https://www.sciencedirect.com/science/article/pii/S2542660525000113},
131 |   author = {Matteo Carnelos and Francesco Pasti and Nicola Bellotto},
132 |   keywords = {TinyML, Rust, Neural networks, Embedded systems, IoT}
133 | }
134 | ```
135 | 
136 | ## License
137 | 
138 | Licensed under either of
139 | 
140 | * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
141 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)
142 | 
143 | at your option.
144 | 
145 | Copyright © 2025, [Matteo Carnelos](https://github.com/matteocarnelos)
146 | 


--------------------------------------------------------------------------------
/analysis/accuracy/plots/sine-accuracy-comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/accuracy/plots/sine-accuracy-comparison.pdf


--------------------------------------------------------------------------------
/analysis/accuracy/plots/sine-accuracy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/accuracy/plots/sine-accuracy.pdf


--------------------------------------------------------------------------------
/analysis/accuracy/plots/sine-test-set.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/accuracy/plots/sine-test-set.pdf


--------------------------------------------------------------------------------
/analysis/accuracy/sine.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "1865db15-5f71-4bcb-a0c9-7a28e114a5b8",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Sine Accuracy"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "b0af9742-26ba-45cb-a72a-2803b4087175",
 14 |    "metadata": {
 15 |     "tags": []
 16 |    },
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "cc1141ab-13e8-466d-9b79-dc41e3d2d267",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "id": "3ba07279-a4e1-4c33-91c3-ff0ec9df4b36",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Test Set"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "id": "2cec1e83-ce72-40db-ab3e-e5375debf9b0",
 46 |    "metadata": {
 47 |     "tags": []
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "test_set = pd.read_csv('data/sine-test-set.csv')\n",
 52 |     "\n",
 53 |     "plt.figure(figsize=(4, 3), layout='constrained')\n",
 54 |     "plt.plot(test_set.x, test_set.y, '.')\n",
 55 |     "plt.savefig('plots/sine-test-set.pdf')\n",
 56 |     "plt.show()"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "id": "535f621d-7360-4d6e-b48d-3281819653a6",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## Predictions"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "id": "baeda57c-d802-4675-ade6-6f9a707ce246",
 71 |    "metadata": {
 72 |     "tags": []
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "tflite = pd.read_csv('data/sine-tflite.csv')\n",
 77 |     "microflow = pd.read_csv('data/sine-microflow.csv')\n",
 78 |     "\n",
 79 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
 80 |     "\n",
 81 |     "ax[0].plot(test_set.x, test_set.y, '.', label='Test values')\n",
 82 |     "ax[0].plot(tflite.x, tflite.y, '.', label='TFLM predictions')\n",
 83 |     "ax[0].set_title('TFLM')\n",
 84 |     "ax[0].legend()\n",
 85 |     "\n",
 86 |     "ax[1].plot(test_set.x, test_set.y, '.', label='Test values')\n",
 87 |     "ax[1].plot(microflow.x, microflow.y, '.', label='MicroFlow predictions')\n",
 88 |     "ax[1].set_title('MicroFlow')\n",
 89 |     "ax[1].legend()\n",
 90 |     "\n",
 91 |     "plt.savefig('plots/sine-accuracy.pdf')\n",
 92 |     "plt.show()"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "d5c5adab-ce31-4fcd-9274-79ab15d68d8e",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "## Comparison"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "id": "bed0f6a0-ca18-4fc1-9010-b200fd6546f8",
107 |    "metadata": {
108 |     "tags": []
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "plt.figure(figsize=(4, 3), layout='constrained')\n",
113 |     "plt.plot(test_set.x, test_set.y, '.', label='Test values')\n",
114 |     "plt.plot(tflite.x, tflite.y, '.', label='TFLM predictions')\n",
115 |     "plt.plot(microflow.x, microflow.y, '.', label='MicroFlow predictions')\n",
116 |     "plt.legend()\n",
117 |     "plt.savefig('plots/sine-accuracy-comparison.pdf')\n",
118 |     "plt.show()"
119 |    ]
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "kernelspec": {
124 |    "display_name": "microflow",
125 |    "language": "python",
126 |    "name": "microflow"
127 |   },
128 |   "language_info": {
129 |    "codemirror_mode": {
130 |     "name": "ipython",
131 |     "version": 3
132 |    },
133 |    "file_extension": ".py",
134 |    "mimetype": "text/x-python",
135 |    "name": "python",
136 |    "nbconvert_exporter": "python",
137 |    "pygments_lexer": "ipython3",
138 |    "version": "3.10.9"
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 5
143 | }
144 | 


--------------------------------------------------------------------------------
/analysis/memory/person_detect.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "8bbab007-c69c-4eb3-8c3f-18ba2739c7f7",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Person Detect Memory"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "4f9d44c9-8dde-4c26-8001-93880f738447",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "eec11aa8-547f-4fed-a4c8-18eb09030a29",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "import seaborn as sns"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "9845fea7-cee7-42c2-9c59-5679b06ed89f",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Flash & RAM Usage"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "e894c794-bfae-4301-917a-fcfb3f140415",
 47 |    "metadata": {
 48 |     "tags": []
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "flash = pd.DataFrame({\n",
 53 |     "    'MCU': ['ESP32', 'nRF52840', 'ATSAMV71'] * 2,\n",
 54 |     "    'Inference Engine': [*['TFLM'] * 3, *['MicroFlow'] * 3],\n",
 55 |     "    'Usage (kB)': [*[642.589, 459.648, 0], *[452.512, 391.700, 403.548]]\n",
 56 |     "})\n",
 57 |     "ram = pd.DataFrame({\n",
 58 |     "    'MCU': ['ESP32', 'nRF52840', 'ATSAMV71'] * 2,\n",
 59 |     "    'Inference Engine': [*['TFLM'] * 3, *['MicroFlow'] * 3],\n",
 60 |     "    'Usage (kB)': [*[122.428, 143.728, 0], *[96.048, 95.984, 96.512]]\n",
 61 |     "})\n",
 62 |     "\n",
 63 |     "fig, ax = plt.subplots(2, layout='constrained')\n",
 64 |     "fig.get_layout_engine().set(hspace=.1)\n",
 65 |     "\n",
 66 |     "sns.barplot(flash, ax=ax[0], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.5)\n",
 67 |     "ax[0].set_xlabel('')\n",
 68 |     "ax[0].set_title('Flash')\n",
 69 |     "ax[0].legend()\n",
 70 |     "\n",
 71 |     "sns.barplot(ram, ax=ax[1], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.5)\n",
 72 |     "ax[1].set_xlabel('')\n",
 73 |     "ax[1].set_title('RAM')\n",
 74 |     "ax[1].legend()\n",
 75 |     "\n",
 76 |     "plt.savefig('plots/person-detect-memory.pdf')\n",
 77 |     "plt.show()"
 78 |    ]
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "microflow",
 84 |    "language": "python",
 85 |    "name": "microflow"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.10.9"
 98 |   }
 99 |  },
100 |  "nbformat": 4,
101 |  "nbformat_minor": 5
102 | }
103 | 


--------------------------------------------------------------------------------
/analysis/memory/plots/person-detect-memory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/memory/plots/person-detect-memory.pdf


--------------------------------------------------------------------------------
/analysis/memory/plots/sine-memory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/memory/plots/sine-memory.pdf


--------------------------------------------------------------------------------
/analysis/memory/plots/speech-memory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/memory/plots/speech-memory.pdf


--------------------------------------------------------------------------------
/analysis/memory/sine.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "8bbab007-c69c-4eb3-8c3f-18ba2739c7f7",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Sine Memory"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "4f9d44c9-8dde-4c26-8001-93880f738447",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "eec11aa8-547f-4fed-a4c8-18eb09030a29",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "import seaborn as sns"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "9845fea7-cee7-42c2-9c59-5679b06ed89f",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Flash & RAM Usage"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "e894c794-bfae-4301-917a-fcfb3f140415",
 47 |    "metadata": {
 48 |     "tags": []
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "flash = pd.DataFrame({\n",
 53 |     "    'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965', 'ATmega328'] * 2,\n",
 54 |     "    'Inference Engine': [*['TFLM'] * 5, *['MicroFlow'] * 5],\n",
 55 |     "    'Usage (kB)': [*[306.817, 116.352, 0, 0, 0], *[100.56, 34.292, 48.0, 34.296, 13.619]]\n",
 56 |     "})\n",
 57 |     "ram = pd.DataFrame({\n",
 58 |     "    'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965', 'ATmega328'] * 2,\n",
 59 |     "    'Inference Engine': [*['TFLM'] * 5, *['MicroFlow'] * 5],\n",
 60 |     "    'Usage (kB)': [*[24.424, 45.728, 0, 0, 0], *[10.688, 5.296, 6.584, 4.620, 1.706]]\n",
 61 |     "})\n",
 62 |     "\n",
 63 |     "fig, ax = plt.subplots(2, layout='constrained')\n",
 64 |     "fig.get_layout_engine().set(hspace=.1)\n",
 65 |     "\n",
 66 |     "sns.barplot(flash, ax=ax[0], x='MCU', y='Usage (kB)', hue='Inference Engine')\n",
 67 |     "ax[0].set_xlabel('')\n",
 68 |     "ax[0].set_title('Flash')\n",
 69 |     "ax[0].legend()\n",
 70 |     "\n",
 71 |     "sns.barplot(ram, ax=ax[1], x='MCU', y='Usage (kB)', hue='Inference Engine')\n",
 72 |     "ax[1].set_xlabel('')\n",
 73 |     "ax[1].set_title('RAM')\n",
 74 |     "ax[1].legend()\n",
 75 |     "\n",
 76 |     "plt.savefig('plots/sine-memory.pdf')\n",
 77 |     "plt.show()"
 78 |    ]
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "microflow",
 84 |    "language": "python",
 85 |    "name": "microflow"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.10.9"
 98 |   }
 99 |  },
100 |  "nbformat": 4,
101 |  "nbformat_minor": 5
102 | }
103 | 


--------------------------------------------------------------------------------
/analysis/memory/speech.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "8bbab007-c69c-4eb3-8c3f-18ba2739c7f7",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Speech Memory"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "4f9d44c9-8dde-4c26-8001-93880f738447",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "eec11aa8-547f-4fed-a4c8-18eb09030a29",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "import seaborn as sns"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "9845fea7-cee7-42c2-9c59-5679b06ed89f",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Flash & RAM Usage"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "e894c794-bfae-4301-917a-fcfb3f140415",
 47 |    "metadata": {
 48 |     "tags": []
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "flash = pd.DataFrame({\n",
 53 |     "    'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965'] * 2,\n",
 54 |     "    'Inference Engine': [*['TFLM'] * 4, *['MicroFlow'] * 4],\n",
 55 |     "    'Usage (kB)': [*[341.281, 157.464, 0, 0], *[110.416, 61.804, 73.544, 61.860]]\n",
 56 |     "})\n",
 57 |     "ram = pd.DataFrame({\n",
 58 |     "    'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965'] * 2,\n",
 59 |     "    'Inference Engine': [*['TFLM'] * 4, *['MicroFlow'] * 4],\n",
 60 |     "    'Usage (kB)': [*[42.428, 63.728, 0, 0], *[25.488, 19.552, 21.168, 19.220]]\n",
 61 |     "})\n",
 62 |     "\n",
 63 |     "fig, ax = plt.subplots(2, layout='constrained')\n",
 64 |     "fig.get_layout_engine().set(hspace=.1)\n",
 65 |     "\n",
 66 |     "sns.barplot(flash, ax=ax[0], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.7)\n",
 67 |     "ax[0].set_xlabel('')\n",
 68 |     "ax[0].set_title('Flash')\n",
 69 |     "ax[0].legend()\n",
 70 |     "\n",
 71 |     "sns.barplot(ram, ax=ax[1], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.7)\n",
 72 |     "ax[1].set_xlabel('')\n",
 73 |     "ax[1].set_title('RAM')\n",
 74 |     "ax[1].legend()\n",
 75 |     "\n",
 76 |     "plt.savefig('plots/speech-memory.pdf')\n",
 77 |     "plt.show()"
 78 |    ]
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "microflow",
 84 |    "language": "python",
 85 |    "name": "microflow"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.10.9"
 98 |   }
 99 |  },
100 |  "nbformat": 4,
101 |  "nbformat_minor": 5
102 | }
103 | 


--------------------------------------------------------------------------------
/analysis/performance/data/person-detect-esp32-microflow.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,5178533
  3 | 2,5177907
  4 | 3,5176453
  5 | 4,5174880
  6 | 5,5172640
  7 | 6,5172253
  8 | 7,5170106
  9 | 8,5168120
 10 | 9,5166480
 11 | 10,5165466
 12 | 11,5163853
 13 | 12,5160934
 14 | 13,5161160
 15 | 14,5159854
 16 | 15,5159733
 17 | 16,5157733
 18 | 17,5155907
 19 | 18,5155494
 20 | 19,5154693
 21 | 20,5153920
 22 | 21,5151413
 23 | 22,5151827
 24 | 23,5150307
 25 | 24,5149520
 26 | 25,5150013
 27 | 26,5147667
 28 | 27,5146947
 29 | 28,5146200
 30 | 29,5144467
 31 | 30,5140654
 32 | 31,5140973
 33 | 32,5140506
 34 | 33,5141186
 35 | 34,5140614
 36 | 35,5139600
 37 | 36,5139053
 38 | 37,5139374
 39 | 38,5140106
 40 | 39,5139306
 41 | 40,5138960
 42 | 41,5135893
 43 | 42,5134374
 44 | 43,5135747
 45 | 44,5134946
 46 | 45,5134067
 47 | 46,5132560
 48 | 47,5132453
 49 | 48,5132826
 50 | 49,5132986
 51 | 50,5131760
 52 | 51,5131600
 53 | 52,5131000
 54 | 53,5129787
 55 | 54,5129840
 56 | 55,5129160
 57 | 56,5128773
 58 | 57,5127093
 59 | 58,5128200
 60 | 59,5128053
 61 | 60,5125747
 62 | 61,5126333
 63 | 62,5125614
 64 | 63,5125120
 65 | 64,5124787
 66 | 65,5124067
 67 | 66,5124027
 68 | 67,5123893
 69 | 68,5124254
 70 | 69,5122253
 71 | 70,5122640
 72 | 71,5122613
 73 | 72,5123480
 74 | 73,5122627
 75 | 74,5122480
 76 | 75,5122080
 77 | 76,5120307
 78 | 77,5120427
 79 | 78,5120427
 80 | 79,5121814
 81 | 80,5121026
 82 | 81,5120587
 83 | 82,5120293
 84 | 83,5120854
 85 | 84,5120760
 86 | 85,5120547
 87 | 86,5119747
 88 | 87,5120454
 89 | 88,5120013
 90 | 89,5119933
 91 | 90,5120374
 92 | 91,5118960
 93 | 92,5118240
 94 | 93,5118920
 95 | 94,5118426
 96 | 95,5117266
 97 | 96,5117813
 98 | 97,5117293
 99 | 98,5116360
100 | 99,5115146
101 | 100,5116454
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/person-detect-esp32-tflm.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,4976777
  3 | 2,4973647
  4 | 3,4965866
  5 | 4,4987112
  6 | 5,4968241
  7 | 6,4961614
  8 | 7,4968636
  9 | 8,4980608
 10 | 9,4959691
 11 | 10,4956314
 12 | 11,4978935
 13 | 12,4989492
 14 | 13,4994797
 15 | 14,4994547
 16 | 15,4991909
 17 | 16,4971436
 18 | 17,4973493
 19 | 18,4980157
 20 | 19,4984550
 21 | 20,4994818
 22 | 21,4976856
 23 | 22,4979372
 24 | 23,4984338
 25 | 24,4975640
 26 | 25,4989934
 27 | 26,4972401
 28 | 27,4954191
 29 | 28,4982271
 30 | 29,4983667
 31 | 30,4984591
 32 | 31,4975391
 33 | 32,4990218
 34 | 33,4985984
 35 | 34,4969550
 36 | 35,4980291
 37 | 36,4969550
 38 | 37,4963173
 39 | 38,4973671
 40 | 39,4988187
 41 | 40,4970671
 42 | 41,4972941
 43 | 42,4958986
 44 | 43,4954015
 45 | 44,4958804
 46 | 45,4986634
 47 | 46,4975788
 48 | 47,4973233
 49 | 48,4976888
 50 | 49,4962191
 51 | 50,4965803
 52 | 51,4980030
 53 | 52,4984812
 54 | 53,4996485
 55 | 54,4963287
 56 | 55,4976826
 57 | 56,4979057
 58 | 57,4978546
 59 | 58,4965346
 60 | 59,4964471
 61 | 60,4972621
 62 | 61,4966431
 63 | 62,4972952
 64 | 63,4976290
 65 | 64,4984216
 66 | 65,4976033
 67 | 66,4965671
 68 | 67,4969560
 69 | 68,4968895
 70 | 69,4988962
 71 | 70,4977032
 72 | 71,4990184
 73 | 72,4976633
 74 | 73,4980054
 75 | 74,4993730
 76 | 75,4967757
 77 | 76,4978817
 78 | 77,4978906
 79 | 78,4974667
 80 | 79,4970640
 81 | 80,4993393
 82 | 81,4979067
 83 | 82,4975820
 84 | 83,4957030
 85 | 84,4963796
 86 | 85,4983313
 87 | 86,4976169
 88 | 87,4980770
 89 | 88,4972035
 90 | 89,4958763
 91 | 90,4974336
 92 | 91,4977621
 93 | 92,4990391
 94 | 93,4977313
 95 | 94,4984372
 96 | 95,4972481
 97 | 96,4990489
 98 | 97,4978620
 99 | 98,4982835
100 | 99,4975165
101 | 100,4971415
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/person-detect-nrf52840-microflow.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,896319
  3 | 2,895625
  4 | 3,895902
  5 | 4,895890
  6 | 5,895083
  7 | 6,897362
  8 | 7,896600
  9 | 8,897278
 10 | 9,895444
 11 | 10,895598
 12 | 11,896848
 13 | 12,896566
 14 | 13,895844
 15 | 14,895854
 16 | 15,896199
 17 | 16,896513
 18 | 17,896911
 19 | 18,896065
 20 | 19,895692
 21 | 20,895944
 22 | 21,895725
 23 | 22,896710
 24 | 23,895216
 25 | 24,896941
 26 | 25,896979
 27 | 26,894942
 28 | 27,897510
 29 | 28,894557
 30 | 29,896651
 31 | 30,896589
 32 | 31,896749
 33 | 32,896783
 34 | 33,895892
 35 | 34,897367
 36 | 35,896075
 37 | 36,897546
 38 | 37,895796
 39 | 38,897728
 40 | 39,895848
 41 | 40,895738
 42 | 41,895788
 43 | 42,896554
 44 | 43,896515
 45 | 44,895985
 46 | 45,896173
 47 | 46,895393
 48 | 47,896877
 49 | 48,895902
 50 | 49,895952
 51 | 50,896920
 52 | 51,894377
 53 | 52,897090
 54 | 53,894996
 55 | 54,895008
 56 | 55,894612
 57 | 56,897870
 58 | 57,895078
 59 | 58,895686
 60 | 59,896116
 61 | 60,897262
 62 | 61,897086
 63 | 62,893840
 64 | 63,896336
 65 | 64,895806
 66 | 65,896329
 67 | 66,896699
 68 | 67,897456
 69 | 68,896990
 70 | 69,895907
 71 | 70,895511
 72 | 71,896889
 73 | 72,896823
 74 | 73,895526
 75 | 74,895633
 76 | 75,897727
 77 | 76,898178
 78 | 77,899029
 79 | 78,895875
 80 | 79,897048
 81 | 80,893451
 82 | 81,896058
 83 | 82,896576
 84 | 83,895714
 85 | 84,893485
 86 | 85,896823
 87 | 86,895600
 88 | 87,896603
 89 | 88,895611
 90 | 89,894846
 91 | 90,896740
 92 | 91,897314
 93 | 92,896534
 94 | 93,896458
 95 | 94,896443
 96 | 95,898251
 97 | 96,894487
 98 | 97,896952
 99 | 98,895222
100 | 99,896614
101 | 100,896130
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/person-detect-nrf52840-tflm.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,839260
  3 | 2,839150
  4 | 3,839116
  5 | 4,838858
  6 | 5,838490
  7 | 6,838269
  8 | 7,837915
  9 | 8,837975
 10 | 9,838559
 11 | 10,838753
 12 | 11,839012
 13 | 12,839235
 14 | 13,839372
 15 | 14,839504
 16 | 15,839573
 17 | 16,839643
 18 | 17,839888
 19 | 18,839914
 20 | 19,839926
 21 | 20,839831
 22 | 21,839763
 23 | 22,839576
 24 | 23,839566
 25 | 24,839524
 26 | 25,839448
 27 | 26,839265
 28 | 27,839001
 29 | 28,838760
 30 | 29,838541
 31 | 30,838255
 32 | 31,837788
 33 | 32,838250
 34 | 33,838699
 35 | 34,838911
 36 | 35,839053
 37 | 36,839275
 38 | 37,839434
 39 | 38,839565
 40 | 39,839710
 41 | 40,839663
 42 | 41,839861
 43 | 42,839973
 44 | 43,839915
 45 | 44,839825
 46 | 45,839758
 47 | 46,839644
 48 | 47,839544
 49 | 48,839475
 50 | 49,839289
 51 | 50,839119
 52 | 51,838933
 53 | 52,838548
 54 | 53,838429
 55 | 54,838088
 56 | 55,837881
 57 | 56,838520
 58 | 57,838765
 59 | 58,838970
 60 | 59,839240
 61 | 60,839425
 62 | 61,839571
 63 | 62,839636
 64 | 63,839726
 65 | 64,839753
 66 | 65,839866
 67 | 66,839824
 68 | 67,839918
 69 | 68,839830
 70 | 69,839778
 71 | 70,839605
 72 | 71,839568
 73 | 72,839419
 74 | 73,839246
 75 | 74,839007
 76 | 75,838733
 77 | 76,838530
 78 | 77,838347
 79 | 78,837837
 80 | 79,838169
 81 | 80,838623
 82 | 81,838802
 83 | 82,839072
 84 | 83,839286
 85 | 84,839409
 86 | 85,839596
 87 | 86,839651
 88 | 87,839726
 89 | 88,839711
 90 | 89,839831
 91 | 90,839934
 92 | 91,839842
 93 | 92,839777
 94 | 93,839742
 95 | 94,839567
 96 | 95,839473
 97 | 96,839394
 98 | 97,839058
 99 | 98,838864
100 | 99,838669
101 | 100,838426
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/sine-esp32-microflow.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,269
  3 | 2,106
  4 | 3,154
  5 | 4,137
  6 | 5,154
  7 | 6,86
  8 | 7,143
  9 | 8,112
 10 | 9,115
 11 | 10,141
 12 | 11,109
 13 | 12,139
 14 | 13,123
 15 | 14,118
 16 | 15,112
 17 | 16,119
 18 | 17,125
 19 | 18,126
 20 | 19,151
 21 | 20,145
 22 | 21,113
 23 | 22,127
 24 | 23,139
 25 | 24,128
 26 | 25,95
 27 | 26,129
 28 | 27,107
 29 | 28,173
 30 | 29,103
 31 | 30,166
 32 | 31,133
 33 | 32,104
 34 | 33,140
 35 | 34,131
 36 | 35,86
 37 | 36,133
 38 | 37,109
 39 | 38,97
 40 | 39,140
 41 | 40,164
 42 | 41,144
 43 | 42,117
 44 | 43,136
 45 | 44,143
 46 | 45,113
 47 | 46,145
 48 | 47,178
 49 | 48,148
 50 | 49,191
 51 | 50,119
 52 | 51,137
 53 | 52,102
 54 | 53,116
 55 | 54,123
 56 | 55,136
 57 | 56,132
 58 | 57,145
 59 | 58,160
 60 | 59,180
 61 | 60,193
 62 | 61,172
 63 | 62,151
 64 | 63,151
 65 | 64,144
 66 | 65,171
 67 | 66,153
 68 | 67,120
 69 | 68,145
 70 | 69,156
 71 | 70,148
 72 | 71,137
 73 | 72,115
 74 | 73,152
 75 | 74,125
 76 | 75,132
 77 | 76,160
 78 | 77,122
 79 | 78,147
 80 | 79,155
 81 | 80,140
 82 | 81,145
 83 | 82,176
 84 | 83,131
 85 | 84,123
 86 | 85,117
 87 | 86,141
 88 | 87,113
 89 | 88,165
 90 | 89,111
 91 | 90,117
 92 | 91,104
 93 | 92,134
 94 | 93,142
 95 | 94,154
 96 | 95,162
 97 | 96,142
 98 | 97,167
 99 | 98,152
100 | 99,185
101 | 100,168
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/sine-esp32-tflm.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,1895
  3 | 2,1783
  4 | 3,1771
  5 | 4,1772
  6 | 5,1773
  7 | 6,1777
  8 | 7,1765
  9 | 8,1760
 10 | 9,1778
 11 | 10,1790
 12 | 11,1770
 13 | 12,1763
 14 | 13,1811
 15 | 14,1827
 16 | 15,1803
 17 | 16,1785
 18 | 17,1791
 19 | 18,1769
 20 | 19,1758
 21 | 20,1774
 22 | 21,1806
 23 | 22,1764
 24 | 23,1786
 25 | 24,1782
 26 | 25,1797
 27 | 26,1794
 28 | 27,1795
 29 | 28,1802
 30 | 29,1783
 31 | 30,1806
 32 | 31,1801
 33 | 32,1791
 34 | 33,1787
 35 | 34,1779
 36 | 35,1802
 37 | 36,1827
 38 | 37,1778
 39 | 38,1735
 40 | 39,1827
 41 | 40,1775
 42 | 41,1806
 43 | 42,1780
 44 | 43,1784
 45 | 44,1811
 46 | 45,1801
 47 | 46,1783
 48 | 47,1774
 49 | 48,1782
 50 | 49,1790
 51 | 50,1775
 52 | 51,1796
 53 | 52,1805
 54 | 53,1796
 55 | 54,1770
 56 | 55,1795
 57 | 56,1806
 58 | 57,1803
 59 | 58,1806
 60 | 59,1806
 61 | 60,1804
 62 | 61,1779
 63 | 62,1797
 64 | 63,1769
 65 | 64,1800
 66 | 65,1795
 67 | 66,1765
 68 | 67,1773
 69 | 68,1806
 70 | 69,1808
 71 | 70,1810
 72 | 71,1799
 73 | 72,1814
 74 | 73,1797
 75 | 74,1780
 76 | 75,1792
 77 | 76,1785
 78 | 77,1775
 79 | 78,1783
 80 | 79,1815
 81 | 80,1759
 82 | 81,1784
 83 | 82,1789
 84 | 83,1781
 85 | 84,1771
 86 | 85,1799
 87 | 86,1814
 88 | 87,1793
 89 | 88,1783
 90 | 89,1756
 91 | 90,1809
 92 | 91,1805
 93 | 92,1779
 94 | 93,1768
 95 | 94,1771
 96 | 95,1828
 97 | 96,1786
 98 | 97,1764
 99 | 98,1776
100 | 99,1827
101 | 100,1776
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/sine-nrf52840-microflow.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,186
  3 | 2,189
  4 | 3,218
  5 | 4,220
  6 | 5,209
  7 | 6,184
  8 | 7,182
  9 | 8,210
 10 | 9,218
 11 | 10,203
 12 | 11,186
 13 | 12,188
 14 | 13,183
 15 | 14,190
 16 | 15,196
 17 | 16,181
 18 | 17,176
 19 | 18,177
 20 | 19,192
 21 | 20,179
 22 | 21,184
 23 | 22,184
 24 | 23,183
 25 | 24,214
 26 | 25,220
 27 | 26,217
 28 | 27,219
 29 | 28,218
 30 | 29,206
 31 | 30,205
 32 | 31,208
 33 | 32,207
 34 | 33,180
 35 | 34,181
 36 | 35,181
 37 | 36,189
 38 | 37,187
 39 | 38,180
 40 | 39,177
 41 | 40,182
 42 | 41,175
 43 | 42,179
 44 | 43,184
 45 | 44,187
 46 | 45,188
 47 | 46,212
 48 | 47,211
 49 | 48,210
 50 | 49,209
 51 | 50,210
 52 | 51,201
 53 | 52,214
 54 | 53,211
 55 | 54,212
 56 | 55,212
 57 | 56,179
 58 | 57,185
 59 | 58,177
 60 | 59,182
 61 | 60,175
 62 | 61,174
 63 | 62,183
 64 | 63,181
 65 | 64,189
 66 | 65,173
 67 | 66,172
 68 | 67,178
 69 | 68,180
 70 | 69,207
 71 | 70,211
 72 | 71,209
 73 | 72,217
 74 | 73,220
 75 | 74,210
 76 | 75,219
 77 | 76,215
 78 | 77,210
 79 | 78,188
 80 | 79,176
 81 | 80,178
 82 | 81,185
 83 | 82,180
 84 | 83,185
 85 | 84,171
 86 | 85,176
 87 | 86,191
 88 | 87,171
 89 | 88,181
 90 | 89,180
 91 | 90,183
 92 | 91,219
 93 | 92,218
 94 | 93,215
 95 | 94,208
 96 | 95,213
 97 | 96,213
 98 | 97,215
 99 | 98,210
100 | 99,218
101 | 100,175
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/sine-nrf52840-tflm.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,2638
  3 | 2,2621
  4 | 3,2620
  5 | 4,2592
  6 | 5,2593
  7 | 6,2593
  8 | 7,2592
  9 | 8,2593
 10 | 9,2593
 11 | 10,2593
 12 | 11,2594
 13 | 12,2594
 14 | 13,2594
 15 | 14,2595
 16 | 15,2594
 17 | 16,2612
 18 | 17,2639
 19 | 18,2640
 20 | 19,2594
 21 | 20,2595
 22 | 21,2596
 23 | 22,2595
 24 | 23,2595
 25 | 24,2595
 26 | 25,2612
 27 | 26,2640
 28 | 27,2640
 29 | 28,2594
 30 | 29,2594
 31 | 30,2596
 32 | 31,2595
 33 | 32,2595
 34 | 33,2595
 35 | 34,2613
 36 | 35,2636
 37 | 36,2641
 38 | 37,2622
 39 | 38,2594
 40 | 39,2597
 41 | 40,2596
 42 | 41,2596
 43 | 42,2596
 44 | 43,2614
 45 | 44,2640
 46 | 45,2641
 47 | 46,2622
 48 | 47,2595
 49 | 48,2597
 50 | 49,2596
 51 | 50,2596
 52 | 51,2596
 53 | 52,2613
 54 | 53,2641
 55 | 54,2641
 56 | 55,2623
 57 | 56,2595
 58 | 57,2597
 59 | 58,2595
 60 | 59,2597
 61 | 60,2596
 62 | 61,2613
 63 | 62,2641
 64 | 63,2641
 65 | 64,2622
 66 | 65,2595
 67 | 66,2597
 68 | 67,2595
 69 | 68,2596
 70 | 69,2595
 71 | 70,2612
 72 | 71,2641
 73 | 72,2640
 74 | 73,2595
 75 | 74,2594
 76 | 75,2597
 77 | 76,2595
 78 | 77,2597
 79 | 78,2596
 80 | 79,2614
 81 | 80,2640
 82 | 81,2640
 83 | 82,2623
 84 | 83,2596
 85 | 84,2596
 86 | 85,2596
 87 | 86,2597
 88 | 87,2596
 89 | 88,2613
 90 | 89,2641
 91 | 90,2641
 92 | 91,2595
 93 | 92,2595
 94 | 93,2596
 95 | 94,2595
 96 | 95,2597
 97 | 96,2596
 98 | 97,2614
 99 | 98,2641
100 | 99,2640
101 | 100,2595
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/speech-esp32-microflow.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,190920
  3 | 2,190080
  4 | 3,190013
  5 | 4,189960
  6 | 5,189920
  7 | 6,189880
  8 | 7,189773
  9 | 8,189747
 10 | 9,189654
 11 | 10,189653
 12 | 11,189653
 13 | 12,189680
 14 | 13,189613
 15 | 14,189666
 16 | 15,189600
 17 | 16,189627
 18 | 17,189573
 19 | 18,189627
 20 | 19,189614
 21 | 20,189627
 22 | 21,189614
 23 | 22,189600
 24 | 23,189533
 25 | 24,189534
 26 | 25,189547
 27 | 26,189586
 28 | 27,189533
 29 | 28,189573
 30 | 29,189533
 31 | 30,189506
 32 | 31,189480
 33 | 32,189506
 34 | 33,189480
 35 | 34,189480
 36 | 35,189453
 37 | 36,189413
 38 | 37,189373
 39 | 38,189413
 40 | 39,189293
 41 | 40,189400
 42 | 41,189400
 43 | 42,189440
 44 | 43,189373
 45 | 44,189386
 46 | 45,189387
 47 | 46,189360
 48 | 47,189320
 49 | 48,189333
 50 | 49,189306
 51 | 50,189360
 52 | 51,189347
 53 | 52,189427
 54 | 53,189360
 55 | 54,189414
 56 | 55,189333
 57 | 56,189333
 58 | 57,189293
 59 | 58,189226
 60 | 59,189187
 61 | 60,189267
 62 | 61,189187
 63 | 62,189280
 64 | 63,189266
 65 | 64,189333
 66 | 65,189293
 67 | 66,189254
 68 | 67,189267
 69 | 68,189280
 70 | 69,189267
 71 | 70,189280
 72 | 71,189266
 73 | 72,189266
 74 | 73,189266
 75 | 74,189306
 76 | 75,189253
 77 | 76,189253
 78 | 77,189093
 79 | 78,189174
 80 | 79,189134
 81 | 80,189240
 82 | 81,189147
 83 | 82,189147
 84 | 83,189147
 85 | 84,189160
 86 | 85,189120
 87 | 86,189173
 88 | 87,189160
 89 | 88,189240
 90 | 89,189173
 91 | 90,189187
 92 | 91,189160
 93 | 92,189214
 94 | 93,189160
 95 | 94,189174
 96 | 95,189107
 97 | 96,189106
 98 | 97,189080
 99 | 98,189107
100 | 99,189053
101 | 100,189160
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/speech-esp32-tflm.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,207618
  3 | 2,207398
  4 | 3,207403
  5 | 4,207397
  6 | 5,207436
  7 | 6,207426
  8 | 7,207456
  9 | 8,207452
 10 | 9,207439
 11 | 10,207423
 12 | 11,207430
 13 | 12,207427
 14 | 13,207469
 15 | 14,207449
 16 | 15,207439
 17 | 16,207454
 18 | 17,207454
 19 | 18,207431
 20 | 19,207463
 21 | 20,207400
 22 | 21,207410
 23 | 22,207433
 24 | 23,207428
 25 | 24,207434
 26 | 25,207499
 27 | 26,207430
 28 | 27,207448
 29 | 28,207405
 30 | 29,207405
 31 | 30,207425
 32 | 31,207433
 33 | 32,207404
 34 | 33,207457
 35 | 34,207395
 36 | 35,207439
 37 | 36,207422
 38 | 37,207452
 39 | 38,207394
 40 | 39,207434
 41 | 40,207426
 42 | 41,207406
 43 | 42,207424
 44 | 43,207438
 45 | 44,207404
 46 | 45,207465
 47 | 46,207440
 48 | 47,207433
 49 | 48,207411
 50 | 49,207464
 51 | 50,207400
 52 | 51,207406
 53 | 52,207407
 54 | 53,207450
 55 | 54,207404
 56 | 55,207454
 57 | 56,207393
 58 | 57,207447
 59 | 58,207464
 60 | 59,207438
 61 | 60,207398
 62 | 61,207434
 63 | 62,207399
 64 | 63,207441
 65 | 64,207457
 66 | 65,207395
 67 | 66,207410
 68 | 67,207396
 69 | 68,207397
 70 | 69,207452
 71 | 70,207449
 72 | 71,207434
 73 | 72,207446
 74 | 73,207446
 75 | 74,207369
 76 | 75,207428
 77 | 76,207446
 78 | 77,207435
 79 | 78,207420
 80 | 79,207433
 81 | 80,207408
 82 | 81,207423
 83 | 82,207395
 84 | 83,207470
 85 | 84,207427
 86 | 85,207434
 87 | 86,207439
 88 | 87,207465
 89 | 88,207398
 90 | 89,207424
 91 | 90,207420
 92 | 91,207441
 93 | 92,207433
 94 | 93,207444
 95 | 94,207398
 96 | 95,207472
 97 | 96,207418
 98 | 97,207448
 99 | 98,207414
100 | 99,207436
101 | 100,207414
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/speech-nrf52840-microflow.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,50598
  3 | 2,50586
  4 | 3,50604
  5 | 4,50585
  6 | 5,50609
  7 | 6,50607
  8 | 7,50604
  9 | 8,50583
 10 | 9,50581
 11 | 10,50604
 12 | 11,50603
 13 | 12,50586
 14 | 13,50584
 15 | 14,50603
 16 | 15,50602
 17 | 16,50603
 18 | 17,50596
 19 | 18,50602
 20 | 19,50603
 21 | 20,50597
 22 | 21,50585
 23 | 22,50586
 24 | 23,50591
 25 | 24,50598
 26 | 25,50586
 27 | 26,50589
 28 | 27,50608
 29 | 28,50596
 30 | 29,50602
 31 | 30,50622
 32 | 31,50605
 33 | 32,50604
 34 | 33,50585
 35 | 34,50602
 36 | 35,50601
 37 | 36,50607
 38 | 37,50589
 39 | 38,50586
 40 | 39,50592
 41 | 40,50593
 42 | 41,50604
 43 | 42,50598
 44 | 43,50597
 45 | 44,50591
 46 | 45,50591
 47 | 46,50593
 48 | 47,50598
 49 | 48,50580
 50 | 49,50597
 51 | 50,50604
 52 | 51,50581
 53 | 52,50633
 54 | 53,50581
 55 | 54,50596
 56 | 55,50609
 57 | 56,50602
 58 | 57,50607
 59 | 58,50598
 60 | 59,50592
 61 | 60,50595
 62 | 61,50605
 63 | 62,50591
 64 | 63,50589
 65 | 64,50603
 66 | 65,50587
 67 | 66,50597
 68 | 67,50585
 69 | 68,50592
 70 | 69,50597
 71 | 70,50586
 72 | 71,50584
 73 | 72,50604
 74 | 73,50598
 75 | 74,50626
 76 | 75,50592
 77 | 76,50602
 78 | 77,50586
 79 | 78,50601
 80 | 79,50598
 81 | 80,50584
 82 | 81,50595
 83 | 82,50586
 84 | 83,50581
 85 | 84,50601
 86 | 85,50592
 87 | 86,50604
 88 | 87,50609
 89 | 88,50589
 90 | 89,50595
 91 | 90,50590
 92 | 91,50587
 93 | 92,50580
 94 | 93,50586
 95 | 94,50608
 96 | 95,50604
 97 | 96,50615
 98 | 97,50598
 99 | 98,50580
100 | 99,50598
101 | 100,50609
102 | 


--------------------------------------------------------------------------------
/analysis/performance/data/speech-nrf52840-tflm.csv:
--------------------------------------------------------------------------------
  1 | iter,time
  2 | 1,59173
  3 | 2,59186
  4 | 3,59266
  5 | 4,59229
  6 | 5,59185
  7 | 6,59234
  8 | 7,59232
  9 | 8,59244
 10 | 9,59285
 11 | 10,59240
 12 | 11,59279
 13 | 12,59242
 14 | 13,59264
 15 | 14,59256
 16 | 15,59262
 17 | 16,59300
 18 | 17,59289
 19 | 18,59286
 20 | 19,59271
 21 | 20,59291
 22 | 21,59283
 23 | 22,59274
 24 | 23,59292
 25 | 24,59245
 26 | 25,59251
 27 | 26,59245
 28 | 27,59287
 29 | 28,59294
 30 | 29,59283
 31 | 30,59243
 32 | 31,59246
 33 | 32,59245
 34 | 33,59275
 35 | 34,59244
 36 | 35,59245
 37 | 36,59270
 38 | 37,59231
 39 | 38,59243
 40 | 39,59255
 41 | 40,59276
 42 | 41,59250
 43 | 42,59267
 44 | 43,59272
 45 | 44,59252
 46 | 45,59287
 47 | 46,59256
 48 | 47,59280
 49 | 48,59277
 50 | 49,59242
 51 | 50,59241
 52 | 51,59277
 53 | 52,59242
 54 | 53,59239
 55 | 54,59227
 56 | 55,59224
 57 | 56,59294
 58 | 57,59224
 59 | 58,59248
 60 | 59,59253
 61 | 60,59235
 62 | 61,59232
 63 | 62,59246
 64 | 63,59272
 65 | 64,59247
 66 | 65,59261
 67 | 66,59243
 68 | 67,59264
 69 | 68,59255
 70 | 69,59251
 71 | 70,59237
 72 | 71,59243
 73 | 72,59240
 74 | 73,59256
 75 | 74,59240
 76 | 75,59240
 77 | 76,59236
 78 | 77,59275
 79 | 78,59261
 80 | 79,59227
 81 | 80,59259
 82 | 81,59274
 83 | 82,59327
 84 | 83,59256
 85 | 84,59234
 86 | 85,59238
 87 | 86,59284
 88 | 87,59236
 89 | 88,59239
 90 | 89,59317
 91 | 90,59272
 92 | 91,59232
 93 | 92,59254
 94 | 93,59211
 95 | 94,59229
 96 | 95,59267
 97 | 96,59226
 98 | 97,59277
 99 | 98,59293
100 | 99,59232
101 | 100,59289
102 | 


--------------------------------------------------------------------------------
/analysis/performance/person_detect.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6e987910-eeb0-46fe-9650-e962701f51a6",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Person Detect Performance"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "812a1605-f1c6-4e55-a19d-e334f708d4c2",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "a0250ec6-017e-4f68-bd88-b5b02dc1e16c",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "from matplotlib.ticker import FormatStrFormatter\n",
 33 |     "import seaborn as sns"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "id": "14b66780-b555-48fb-82cb-0f4b26ac68d2",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## ESP32"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "id": "afc17494-4359-4500-a4b6-22da30757064",
 48 |    "metadata": {
 49 |     "tags": []
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "tflm_esp32 = pd.read_csv('data/person-detect-esp32-tflm.csv')\n",
 54 |     "microflow_esp32 = pd.read_csv('data/person-detect-esp32-microflow.csv')\n",
 55 |     "tflm_esp32.time /= 1000\n",
 56 |     "microflow_esp32.time /= 1000\n",
 57 |     "\n",
 58 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
 59 |     "fig.suptitle(\"Person Detector Model on the ESP32\")\n",
 60 |     "top_limit = 35\n",
 61 |     "\n",
 62 |     "sns.histplot(tflm_esp32.time, ax=ax[0])\n",
 63 |     "start, end = ax[0].get_xlim()\n",
 64 |     "median = tflm_esp32.time.median()\n",
 65 |     "ax[0].axvline(median, color='red', label='Median')\n",
 66 |     "ax[0].set_xlabel('Execution Time (ms)')\n",
 67 |     "ax[0].set_xticks((start, median, end))\n",
 68 |     "ax[0].set_ylim(top=top_limit)\n",
 69 |     "ax[0].set_title('TFLM')\n",
 70 |     "ax[0].legend()\n",
 71 |     "\n",
 72 |     "sns.histplot(microflow_esp32.time, ax=ax[1])\n",
 73 |     "start, end = ax[1].get_xlim()\n",
 74 |     "median = microflow_esp32.time.median()\n",
 75 |     "ax[1].axvline(median, color='red', label='Median')\n",
 76 |     "ax[1].set_xlabel('Execution Time (ms)')\n",
 77 |     "ax[1].set_xticks((start, median, end))\n",
 78 |     "ax[1].set_ylim(top=top_limit)\n",
 79 |     "ax[1].set_title('MicroFlow')\n",
 80 |     "ax[1].legend()\n",
 81 |     "\n",
 82 |     "plt.savefig('plots/person-detect-esp32-performance.pdf')\n",
 83 |     "plt.show()"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "id": "0dafe385-ab7b-4488-9eb3-b23202c4787b",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "## nRF52840"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "id": "58db7c12-52e2-4a71-b22c-78e46c88d8ab",
 98 |    "metadata": {
 99 |     "tags": []
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "tflm_nrf52840 = pd.read_csv('data/person-detect-nrf52840-tflm.csv')\n",
104 |     "microflow_nrf52840 = pd.read_csv('data/person-detect-nrf52840-microflow.csv')\n",
105 |     "tflm_nrf52840.time /= 1000\n",
106 |     "microflow_nrf52840.time /= 1000\n",
107 |     "\n",
108 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
109 |     "fig.suptitle(\"Person Detector Model on the nRF52840\")\n",
110 |     "top_limit = 30\n",
111 |     "\n",
112 |     "sns.histplot(tflm_nrf52840.time, ax=ax[0])\n",
113 |     "start, end = ax[0].get_xlim()\n",
114 |     "median = tflm_nrf52840.time.median()\n",
115 |     "ax[0].axvline(median, color='red', label='Median')\n",
116 |     "ax[0].set_xlabel('Execution Time (ms)')\n",
117 |     "ax[0].set_xticks((start, median, end))\n",
118 |     "ax[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n",
119 |     "ax[0].set_ylim(top=top_limit)\n",
120 |     "ax[0].set_title('TFLM')\n",
121 |     "ax[0].legend(loc='upper left')\n",
122 |     "\n",
123 |     "sns.histplot(microflow_nrf52840.time, ax=ax[1])\n",
124 |     "start, end = ax[1].get_xlim()\n",
125 |     "median = microflow_nrf52840.time.median()\n",
126 |     "ax[1].axvline(median, color='red', label='Median')\n",
127 |     "ax[1].set_xlabel('Execution Time (ms)')\n",
128 |     "ax[1].set_xticks((start, median, end))\n",
129 |     "ax[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n",
130 |     "ax[1].set_ylim(top=top_limit)\n",
131 |     "ax[1].set_title('MicroFlow')\n",
132 |     "ax[1].legend()\n",
133 |     "\n",
134 |     "plt.savefig('plots/person-detect-nrf52840-performance.pdf')\n",
135 |     "plt.show()"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "id": "9c3958ca-4c89-483b-be63-200453c9b0f0",
141 |    "metadata": {},
142 |    "source": [
143 |     "## Comparison"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "id": "5eb9b8ed-7018-4031-9011-7aa4ceb05822",
150 |    "metadata": {
151 |     "tags": []
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "data = pd.DataFrame({\n",
156 |     "    'MCU': [*['ESP32'] * 200, *['nRF52840'] * 200],\n",
157 |     "    'Inference Engine': [*['TFLM'] * 100, *['MicroFlow'] * 100] * 2,\n",
158 |     "    'Execution Time (ms)': [*tflm_esp32.time, *microflow_esp32.time, *tflm_nrf52840.time, *microflow_nrf52840.time]\n",
159 |     "})\n",
160 |     "\n",
161 |     "plt.figure(figsize=(8, 3), layout='constrained')\n",
162 |     "sns.barplot(\n",
163 |     "    data=data,\n",
164 |     "    x='Execution Time (ms)',\n",
165 |     "    y='MCU',\n",
166 |     "    hue='Inference Engine',\n",
167 |     "    estimator='median',\n",
168 |     "    errorbar='pi',\n",
169 |     "    capsize=.1,\n",
170 |     "    errwidth=2,\n",
171 |     "    width=.7\n",
172 |     ")\n",
173 |     "plt.ylabel('')\n",
174 |     "plt.legend()\n",
175 |     "plt.savefig('plots/person-detect-performance-comparison.pdf')\n",
176 |     "plt.show()"
177 |    ]
178 |   }
179 |  ],
180 |  "metadata": {
181 |   "kernelspec": {
182 |    "display_name": "microflow",
183 |    "language": "python",
184 |    "name": "microflow"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.10.9"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 5
201 | }
202 | 


--------------------------------------------------------------------------------
/analysis/performance/plots/person-detect-esp32-performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/person-detect-esp32-performance.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/person-detect-nrf52840-performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/person-detect-nrf52840-performance.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/person-detect-performance-comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/person-detect-performance-comparison.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/sine-esp32-performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/sine-esp32-performance.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/sine-nRF52840-performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/sine-nRF52840-performance.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/sine-performance-comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/sine-performance-comparison.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/speech-esp32-performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/speech-esp32-performance.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/speech-nrf52840-performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/speech-nrf52840-performance.pdf


--------------------------------------------------------------------------------
/analysis/performance/plots/speech-performance-comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/speech-performance-comparison.pdf


--------------------------------------------------------------------------------
/analysis/performance/sine.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6e987910-eeb0-46fe-9650-e962701f51a6",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Sine Performance"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "812a1605-f1c6-4e55-a19d-e334f708d4c2",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "a0250ec6-017e-4f68-bd88-b5b02dc1e16c",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "import seaborn as sns"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "b12f59e8-8cca-453d-b933-7563ec316ea5",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## ESP32"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "afc17494-4359-4500-a4b6-22da30757064",
 47 |    "metadata": {
 48 |     "tags": []
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "tflm_esp32 = pd.read_csv('data/sine-esp32-tflm.csv')\n",
 53 |     "microflow_esp32 = pd.read_csv('data/sine-esp32-microflow.csv')\n",
 54 |     "\n",
 55 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
 56 |     "fig.suptitle(\"Sine Predictor Model on the ESP32\")\n",
 57 |     "top_limit = 30\n",
 58 |     "\n",
 59 |     "sns.histplot(tflm_esp32.time, ax=ax[0])\n",
 60 |     "start, end = ax[0].get_xlim()\n",
 61 |     "median = tflm_esp32.time.median()\n",
 62 |     "ax[0].axvline(median, color='red', label='Median')\n",
 63 |     "ax[0].set_xlabel('Execution Time (µs)')\n",
 64 |     "ax[0].set_xticks((start, median, end))\n",
 65 |     "ax[0].set_ylim(top=top_limit)\n",
 66 |     "ax[0].set_title('TFLM')\n",
 67 |     "ax[0].legend()\n",
 68 |     "\n",
 69 |     "sns.histplot(microflow_esp32.time, ax=ax[1])\n",
 70 |     "start, end = ax[1].get_xlim()\n",
 71 |     "median = microflow_esp32.time.median()\n",
 72 |     "ax[1].axvline(median, color='red', label='Median')\n",
 73 |     "ax[1].set_xlabel('Execution Time (µs)')\n",
 74 |     "ax[1].set_xticks((start, median, end))\n",
 75 |     "ax[1].set_ylim(top=top_limit)\n",
 76 |     "ax[1].set_title('MicroFlow')\n",
 77 |     "ax[1].legend()\n",
 78 |     "\n",
 79 |     "plt.savefig('plots/sine-esp32-performance.pdf')\n",
 80 |     "plt.show()"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "id": "0dafe385-ab7b-4488-9eb3-b23202c4787b",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## nRF52840"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "id": "58db7c12-52e2-4a71-b22c-78e46c88d8ab",
 95 |    "metadata": {
 96 |     "tags": []
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "tflm_nrf52840 = pd.read_csv('data/sine-nrf52840-tflm.csv')\n",
101 |     "microflow_nrf52840 = pd.read_csv('data/sine-nrf52840-microflow.csv')\n",
102 |     "\n",
103 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
104 |     "fig.suptitle(\"Sine Predictor Model on the nRF52840\")\n",
105 |     "top_limit = 70\n",
106 |     "\n",
107 |     "sns.histplot(tflm_nrf52840.time, ax=ax[0])\n",
108 |     "start, end = ax[0].get_xlim()\n",
109 |     "median = tflm_nrf52840.time.median()\n",
110 |     "ax[0].axvline(median, color='red', label='Median')\n",
111 |     "ax[0].set_xlabel('Execution Time (µs)')\n",
112 |     "ax[0].set_xticks((start-10, median, end))\n",
113 |     "ax[0].set_ylim(top=top_limit)\n",
114 |     "ax[0].set_title('TFLM')\n",
115 |     "ax[0].legend()\n",
116 |     "\n",
117 |     "sns.histplot(microflow_nrf52840.time, ax=ax[1])\n",
118 |     "start, end = ax[1].get_xlim()\n",
119 |     "median = microflow_nrf52840.time.median()\n",
120 |     "ax[1].axvline(median, color='red', label='Median')\n",
121 |     "ax[1].set_xlabel('Execution Time (µs)')\n",
122 |     "ax[1].set_xticks((start, median, end))\n",
123 |     "ax[1].set_ylim(top=top_limit)\n",
124 |     "ax[1].set_title('MicroFlow')\n",
125 |     "ax[1].legend()\n",
126 |     "\n",
127 |     "plt.savefig('plots/sine-nrf52840-performance.pdf')\n",
128 |     "plt.show()"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "id": "aa69dde0-822f-41f4-92a4-90dfcdb19453",
134 |    "metadata": {},
135 |    "source": [
136 |     "## Comparison"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "id": "96a49dd9-e2c1-4970-8212-7485afd6bd49",
143 |    "metadata": {
144 |     "tags": []
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "data = pd.DataFrame({\n",
149 |     "    'MCU': [*['ESP32'] * 200, *['nRF52840'] * 200],\n",
150 |     "    'Inference Engine': [*['TFLM'] * 100, *['MicroFlow'] * 100] * 2,\n",
151 |     "    'Execution Time (µs)': [*tflm_esp32.time, *microflow_esp32.time, *tflm_nrf52840.time, *microflow_nrf52840.time]\n",
152 |     "})\n",
153 |     "\n",
154 |     "plt.figure(figsize=(8, 3), layout='constrained')\n",
155 |     "sns.barplot(\n",
156 |     "    data=data,\n",
157 |     "    x='Execution Time (µs)',\n",
158 |     "    y='MCU',\n",
159 |     "    hue='Inference Engine',\n",
160 |     "    estimator='median',\n",
161 |     "    errorbar='pi',\n",
162 |     "    capsize=.1,\n",
163 |     "    errwidth=2,\n",
164 |     "    width=.7\n",
165 |     ")\n",
166 |     "plt.ylabel('')\n",
167 |     "plt.legend()\n",
168 |     "plt.savefig('plots/sine-performance-comparison.pdf')\n",
169 |     "plt.show()"
170 |    ]
171 |   }
172 |  ],
173 |  "metadata": {
174 |   "kernelspec": {
175 |    "display_name": "microflow",
176 |    "language": "python",
177 |    "name": "microflow"
178 |   },
179 |   "language_info": {
180 |    "codemirror_mode": {
181 |     "name": "ipython",
182 |     "version": 3
183 |    },
184 |    "file_extension": ".py",
185 |    "mimetype": "text/x-python",
186 |    "name": "python",
187 |    "nbconvert_exporter": "python",
188 |    "pygments_lexer": "ipython3",
189 |    "version": "3.10.9"
190 |   }
191 |  },
192 |  "nbformat": 4,
193 |  "nbformat_minor": 5
194 | }
195 | 


--------------------------------------------------------------------------------
/analysis/performance/speech.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6e987910-eeb0-46fe-9650-e962701f51a6",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Speech Performance"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "812a1605-f1c6-4e55-a19d-e334f708d4c2",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Setup"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "a0250ec6-017e-4f68-bd88-b5b02dc1e16c",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "from matplotlib.ticker import FormatStrFormatter\n",
 33 |     "import seaborn as sns"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "id": "14b66780-b555-48fb-82cb-0f4b26ac68d2",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## ESP32"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "id": "afc17494-4359-4500-a4b6-22da30757064",
 48 |    "metadata": {
 49 |     "tags": []
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "tflm_esp32 = pd.read_csv('data/speech-esp32-tflm.csv')\n",
 54 |     "microflow_esp32 = pd.read_csv('data/speech-esp32-microflow.csv')\n",
 55 |     "tflm_esp32.time /= 1000\n",
 56 |     "microflow_esp32.time /= 1000\n",
 57 |     "\n",
 58 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
 59 |     "fig.suptitle(\"Speech Command Recognizer Model on the ESP32\")\n",
 60 |     "top_limit = 30\n",
 61 |     "\n",
 62 |     "sns.histplot(tflm_esp32.time, ax=ax[0])\n",
 63 |     "start, end = ax[0].get_xlim()\n",
 64 |     "median = tflm_esp32.time.median()\n",
 65 |     "ax[0].axvline(median, color='red', label='Median')\n",
 66 |     "ax[0].set_xlabel('Execution Time (ms)')\n",
 67 |     "ax[0].set_xticks((start, median, end))\n",
 68 |     "ax[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n",
 69 |     "ax[0].set_ylim(top=top_limit)\n",
 70 |     "ax[0].set_title('TFLM')\n",
 71 |     "ax[0].legend()\n",
 72 |     "\n",
 73 |     "sns.histplot(microflow_esp32.time, ax=ax[1])\n",
 74 |     "start, end = ax[1].get_xlim()\n",
 75 |     "median = microflow_esp32.time.median()\n",
 76 |     "ax[1].axvline(median, color='red', label='Median')\n",
 77 |     "ax[1].set_xlabel('Execution Time (ms)')\n",
 78 |     "ax[1].set_xticks((start, median, end))\n",
 79 |     "ax[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n",
 80 |     "ax[1].set_ylim(top=top_limit)\n",
 81 |     "ax[1].set_title('MicroFlow')\n",
 82 |     "ax[1].legend()\n",
 83 |     "\n",
 84 |     "plt.savefig('plots/speech-esp32-performance.pdf')\n",
 85 |     "plt.show()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "id": "0dafe385-ab7b-4488-9eb3-b23202c4787b",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "## nRF52840"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "id": "58db7c12-52e2-4a71-b22c-78e46c88d8ab",
100 |    "metadata": {
101 |     "tags": []
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "tflm_nrf52840 = pd.read_csv('data/speech-nrf52840-tflm.csv')\n",
106 |     "microflow_nrf52840 = pd.read_csv('data/speech-nrf52840-microflow.csv')\n",
107 |     "tflm_nrf52840.time /= 1000\n",
108 |     "microflow_nrf52840.time /= 1000\n",
109 |     "\n",
110 |     "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n",
111 |     "fig.suptitle(\"Speech Command Recognizer Model on the nRF52840\")\n",
112 |     "top_limit = 30\n",
113 |     "\n",
114 |     "sns.histplot(tflm_nrf52840.time, ax=ax[0])\n",
115 |     "start, end = ax[0].get_xlim()\n",
116 |     "median = tflm_nrf52840.time.median()\n",
117 |     "ax[0].axvline(median, color='red', label='Median')\n",
118 |     "ax[0].set_xlabel('Execution Time (ms)')\n",
119 |     "ax[0].set_xticks((start, median, end))\n",
120 |     "ax[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n",
121 |     "ax[0].set_ylim(top=top_limit)\n",
122 |     "ax[0].set_title('TFLM')\n",
123 |     "ax[0].legend()\n",
124 |     "\n",
125 |     "sns.histplot(microflow_nrf52840.time, ax=ax[1])\n",
126 |     "start, end = ax[1].get_xlim()\n",
127 |     "median = microflow_nrf52840.time.median()\n",
128 |     "ax[1].axvline(median, color='red', label='Median')\n",
129 |     "ax[1].set_xlabel('Execution Time (ms)')\n",
130 |     "ax[1].set_xticks((start, median, end))\n",
131 |     "ax[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n",
132 |     "ax[1].set_ylim(top=top_limit)\n",
133 |     "ax[1].set_title('MicroFlow')\n",
134 |     "ax[1].legend()\n",
135 |     "\n",
136 |     "plt.savefig('plots/speech-nrf52840-performance.pdf')\n",
137 |     "plt.show()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "id": "c3ec165c-6a8e-481c-8a0e-c21031fdfad1",
143 |    "metadata": {},
144 |    "source": [
145 |     "## Comparison"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "id": "b122ffe9-32d7-431d-95d5-62f119330f31",
152 |    "metadata": {
153 |     "tags": []
154 |    },
155 |    "outputs": [],
156 |    "source": [
157 |     "data = pd.DataFrame({\n",
158 |     "    'MCU': [*['ESP32'] * 200, *['nRF52840'] * 200],\n",
159 |     "    'Inference Engine': [*['TFLM'] * 100, *['MicroFlow'] * 100] * 2,\n",
160 |     "    'Execution Time (ms)': [*tflm_esp32.time, *microflow_esp32.time, *tflm_nrf52840.time, *microflow_nrf52840.time]\n",
161 |     "})\n",
162 |     "\n",
163 |     "plt.figure(figsize=(8, 3), layout='constrained')\n",
164 |     "sns.barplot(\n",
165 |     "    data=data,\n",
166 |     "    x='Execution Time (ms)',\n",
167 |     "    y='MCU',\n",
168 |     "    hue='Inference Engine',\n",
169 |     "    estimator='median',\n",
170 |     "    errorbar='pi',\n",
171 |     "    capsize=.1,\n",
172 |     "    errwidth=2,\n",
173 |     "    width=.7\n",
174 |     ")\n",
175 |     "plt.ylabel('')\n",
176 |     "plt.legend()\n",
177 |     "plt.savefig('plots/speech-performance-comparison.pdf')\n",
178 |     "plt.show()"
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "microflow",
185 |    "language": "python",
186 |    "name": "microflow"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.10.9"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 5
203 | }
204 | 


--------------------------------------------------------------------------------
/assets/microflow-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/assets/microflow-logo.png


--------------------------------------------------------------------------------
/assets/microflow-logo.pxd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/assets/microflow-logo.pxd


--------------------------------------------------------------------------------
/benches/person_detect.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Criterion};
 2 | use microflow::buffer::Buffer2D;
 3 | use microflow_macros::model;
 4 | 
 5 | #[model("models/person_detect.tflite")]
 6 | struct PersonDetect;
 7 | 
 8 | fn person_detect_model(c: &mut Criterion) {
 9 |     let input = [Buffer2D::from_element([0.5])];
10 | 
11 |     c.bench_function("person_detect_model", |b| {
12 |         b.iter(|| PersonDetect::predict(input))
13 |     });
14 | }
15 | 
16 | criterion_group!(benches, person_detect_model);
17 | criterion_main!(benches);
18 | 


--------------------------------------------------------------------------------
/benches/sine.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Criterion};
 2 | use microflow_macros::model;
 3 | use nalgebra::matrix;
 4 | 
 5 | #[model("models/sine.tflite")]
 6 | struct Sine;
 7 | 
 8 | fn sine_model(c: &mut Criterion) {
 9 |     let input = matrix![0.5];
10 | 
11 |     c.bench_function("sine_model", |b| b.iter(|| Sine::predict(input)));
12 | }
13 | 
14 | criterion_group!(benches, sine_model);
15 | criterion_main!(benches);
16 | 


--------------------------------------------------------------------------------
/benches/speech.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Criterion};
 2 | use microflow::buffer::Buffer2D;
 3 | use microflow_macros::model;
 4 | 
 5 | #[model("models/speech.tflite")]
 6 | struct Speech;
 7 | 
 8 | fn speech_model(c: &mut Criterion) {
 9 |     let input = Buffer2D::from_element(0.5);
10 | 
11 |     c.bench_function("speech_model", |b| b.iter(|| Speech::predict(input)));
12 | }
13 | 
14 | criterion_group!(benches, speech_model);
15 | criterion_main!(benches);
16 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [target.'cfg(all(target_arch = "arm", target_os = "none"))']
 2 | runner = "arduino-run -b arduino:mbed_nano:nano33ble"
 3 | rustflags = [
 4 |     "-C", "linker=flip-link",
 5 |     "-C", "link-arg=-Tlink.x",
 6 |     "-C", "link-arg=--nmagic",
 7 | ]
 8 | 
 9 | [build]
10 | target = "thumbv7em-none-eabihf"
11 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "examples-arduino-nano33ble"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | cortex-m = { version = "0.7", features = ["critical-section-single-core"] }
 8 | cortex-m-rt = "0.7"
 9 | panic-halt = "0.2"
10 | hal = { package = "nrf52840-hal", version = "0.16", features = ["rt"] }
11 | microflow = { path = "../.." }
12 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] }
13 | libm = "0.2"
14 | 
15 | [profile.release]
16 | lto = true
17 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [tasks.default]
 2 | clear = true
 3 | dependencies = [
 4 |     "examples-compile",
 5 |     "clippy",
 6 |     "check-format",
 7 | ]
 8 | 
 9 | [tasks.examples-compile]
10 | args = ["build", "--examples", "--release"]
11 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/examples/person_detect.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use panic_halt as _;
 5 | 
 6 | use core::fmt::Write;
 7 | use cortex_m::asm::nop;
 8 | use cortex_m_rt::entry;
 9 | use hal::gpio::Level;
10 | use hal::uarte::{Baudrate, Parity};
11 | use hal::{gpio, uarte, Clocks, Rtc, Uarte};
12 | use microflow::buffer::Buffer2D;
13 | use microflow::model;
14 | 
15 | const RTC_FREQ_MHZ: f32 = 0.032_768;
16 | 
17 | #[path = "../../../samples/features/person_detect.rs"]
18 | mod features;
19 | 
20 | #[model("../../models/person_detect.tflite")]
21 | struct PersonDetect;
22 | 
23 | fn print_prediction(serial: &mut impl Write, prediction: Buffer2D<f32, 1, 2>) {
24 |     writeln!(
25 |         serial,
26 |         "Prediction: {:.1}% no person, {:.1}% person",
27 |         prediction[0] * 100.,
28 |         prediction[1] * 100.,
29 |     )
30 |     .unwrap();
31 |     writeln!(
32 |         serial,
33 |         "Outcome: {}",
34 |         match prediction.iamax_full().1 {
35 |             0 => "NO PERSON",
36 |             1 => "PERSON",
37 |             _ => unreachable!(),
38 |         }
39 |     )
40 |     .unwrap();
41 | }
42 | 
43 | #[entry]
44 | fn main() -> ! {
45 |     let p = hal::pac::Peripherals::take().unwrap();
46 |     let _clocks = Clocks::new(p.CLOCK).enable_ext_hfosc();
47 |     let port1 = gpio::p1::Parts::new(p.P1);
48 |     let rtc = Rtc::new(p.RTC0, 0).unwrap();
49 |     rtc.enable_counter();
50 | 
51 |     let mut serial = Uarte::new(
52 |         p.UARTE0,
53 |         uarte::Pins {
54 |             rxd: port1.p1_10.into_floating_input().degrade(),
55 |             txd: port1.p1_03.into_push_pull_output(Level::High).degrade(),
56 |             cts: None,
57 |             rts: None,
58 |         },
59 |         Parity::EXCLUDED,
60 |         Baudrate::BAUD115200,
61 |     );
62 | 
63 |     let start = rtc.get_counter();
64 |     let person_predicted = PersonDetect::predict_quantized(features::PERSON);
65 |     let end = rtc.get_counter();
66 |     writeln!(serial).unwrap();
67 |     writeln!(serial, "Input sample: 'person.bmp'").unwrap();
68 |     print_prediction(&mut serial, person_predicted);
69 |     writeln!(
70 |         serial,
71 |         "Execution time: {:.0} us",
72 |         (end - start) as f32 / RTC_FREQ_MHZ
73 |     )
74 |     .unwrap();
75 | 
76 |     let start = rtc.get_counter();
77 |     let no_person_predicted = PersonDetect::predict_quantized(features::NO_PERSON);
78 |     let end = rtc.get_counter();
79 |     writeln!(serial).unwrap();
80 |     writeln!(serial, "Input sample: 'no_person.bmp'").unwrap();
81 |     print_prediction(&mut serial, no_person_predicted);
82 |     writeln!(
83 |         serial,
84 |         "Execution time: {:.0} us",
85 |         (end - start) as f32 / RTC_FREQ_MHZ
86 |     )
87 |     .unwrap();
88 | 
89 |     loop {
90 |         nop();
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/examples/sine.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use panic_halt as _;
 5 | 
 6 | use core::fmt::Write;
 7 | use cortex_m::asm::nop;
 8 | use cortex_m_rt::entry;
 9 | use hal::gpio::Level;
10 | use hal::uarte::{Baudrate, Parity};
11 | use hal::{gpio, Rtc};
12 | use hal::{uarte, Clocks, Uarte};
13 | use libm::sinf;
14 | use microflow::model;
15 | use nalgebra::matrix;
16 | 
17 | const RTC_FREQ_MHZ: f32 = 0.032_768;
18 | 
19 | #[model("../../models/sine.tflite")]
20 | struct Sine;
21 | 
22 | #[entry]
23 | fn main() -> ! {
24 |     let p = hal::pac::Peripherals::take().unwrap();
25 |     let _clocks = Clocks::new(p.CLOCK).enable_ext_hfosc();
26 |     let port1 = gpio::p1::Parts::new(p.P1);
27 | 
28 |     let rtc = Rtc::new(p.RTC0, 0).unwrap();
29 |     rtc.enable_counter();
30 | 
31 |     let mut serial = Uarte::new(
32 |         p.UARTE0,
33 |         uarte::Pins {
34 |             rxd: port1.p1_10.into_floating_input().degrade(),
35 |             txd: port1.p1_03.into_push_pull_output(Level::High).degrade(),
36 |             cts: None,
37 |             rts: None,
38 |         },
39 |         Parity::EXCLUDED,
40 |         Baudrate::BAUD115200,
41 |     );
42 | 
43 |     let x = 0.5;
44 |     let start = rtc.get_counter();
45 |     let y_predicted = Sine::predict(matrix![x])[0];
46 |     let end = rtc.get_counter();
47 |     let y_exact = sinf(x);
48 | 
49 |     writeln!(serial).unwrap();
50 |     writeln!(serial, "Predicted sin({}): {}", x, y_predicted).unwrap();
51 |     writeln!(serial, "Exact sin({}): {}", x, y_exact).unwrap();
52 |     writeln!(serial, "Error: {}", y_exact - y_predicted).unwrap();
53 |     writeln!(
54 |         serial,
55 |         "Execution time: {:.0} us",
56 |         (end - start) as f32 / RTC_FREQ_MHZ
57 |     )
58 |     .unwrap();
59 | 
60 |     loop {
61 |         nop();
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/examples/speech.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use panic_halt as _;
 5 | 
 6 | use core::fmt::Write;
 7 | use cortex_m::asm::nop;
 8 | use cortex_m_rt::entry;
 9 | use hal::gpio::Level;
10 | use hal::uarte::{Baudrate, Parity};
11 | use hal::{gpio, Rtc};
12 | use hal::{uarte, Clocks, Uarte};
13 | use microflow::buffer::Buffer2D;
14 | use microflow::model;
15 | 
16 | const RTC_FREQ_MHZ: f32 = 0.032_768;
17 | 
18 | #[path = "../../../samples/features/speech.rs"]
19 | mod features;
20 | 
21 | #[model("../../models/speech.tflite")]
22 | struct Speech;
23 | 
24 | fn print_prediction(serial: &mut impl Write, prediction: Buffer2D<f32, 1, 4>) {
25 |     writeln!(
26 |         serial,
27 |         "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no",
28 |         prediction[0] * 100.,
29 |         prediction[1] * 100.,
30 |         prediction[2] * 100.,
31 |         prediction[3] * 100.,
32 |     )
33 |     .unwrap();
34 |     writeln!(
35 |         serial,
36 |         "Outcome: {}",
37 |         match prediction.iamax_full().1 {
38 |             0 => "SILENCE",
39 |             1 => "UNKNOWN",
40 |             2 => "YES",
41 |             3 => "NO",
42 |             _ => unreachable!(),
43 |         }
44 |     )
45 |     .unwrap();
46 | }
47 | 
48 | #[entry]
49 | fn main() -> ! {
50 |     let p = hal::pac::Peripherals::take().unwrap();
51 |     let _clocks = Clocks::new(p.CLOCK).enable_ext_hfosc();
52 |     let port1 = gpio::p1::Parts::new(p.P1);
53 |     let rtc = Rtc::new(p.RTC0, 0).unwrap();
54 |     rtc.enable_counter();
55 | 
56 |     let mut serial = Uarte::new(
57 |         p.UARTE0,
58 |         uarte::Pins {
59 |             rxd: port1.p1_10.into_floating_input().degrade(),
60 |             txd: port1.p1_03.into_push_pull_output(Level::High).degrade(),
61 |             cts: None,
62 |             rts: None,
63 |         },
64 |         Parity::EXCLUDED,
65 |         Baudrate::BAUD115200,
66 |     );
67 | 
68 |     let start = rtc.get_counter();
69 |     let yes_predicted = Speech::predict_quantized(features::YES);
70 |     let end = rtc.get_counter();
71 |     writeln!(serial).unwrap();
72 |     writeln!(serial, "Input sample: 'yes.wav'").unwrap();
73 |     print_prediction(&mut serial, yes_predicted);
74 |     writeln!(
75 |         serial,
76 |         "Execution time: {:.0} us",
77 |         (end - start) as f32 / RTC_FREQ_MHZ
78 |     )
79 |     .unwrap();
80 | 
81 |     let start = rtc.get_counter();
82 |     let no_predicted = Speech::predict_quantized(features::NO);
83 |     let end = rtc.get_counter();
84 |     writeln!(serial).unwrap();
85 |     writeln!(serial, "Input sample: 'no.wav'").unwrap();
86 |     print_prediction(&mut serial, no_predicted);
87 |     writeln!(
88 |         serial,
89 |         "Execution time: {:.0} us",
90 |         (end - start) as f32 / RTC_FREQ_MHZ
91 |     )
92 |     .unwrap();
93 | 
94 |     loop {
95 |         nop();
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/examples/arduino-nano33ble/memory.x:
--------------------------------------------------------------------------------
1 | MEMORY
2 | {
3 |   FLASH : ORIGIN = 0x000000 + 0x10000, LENGTH = 1M - 0x10000
4 |   RAM : ORIGIN = 0x20000000, LENGTH = 256K
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/arduino-uno/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [target.'cfg(target_arch = "avr")']
 2 | runner = "arduino-run -b arduino:avr:uno"
 3 | rustflags = [
 4 |     "-C", "default-linker-libraries",
 5 |     "-C", "link-arg=-Wl,--allow-multiple-definition",
 6 | ]
 7 | 
 8 | [build]
 9 | target = "avr-unknown-gnu-atmega328"
10 | 
11 | [unstable]
12 | build-std = ["core"]
13 | 


--------------------------------------------------------------------------------
/examples/arduino-uno/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "examples-arduino-uno"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | avr-device = { version = "0.7", features = ["atmega328p"] }
 8 | ufmt = "0.2"
 9 | ufmt_float = { git = "https://github.com/tl8roy/ufmt_float" }
10 | panic-halt = "0.2"
11 | hal = { package = "arduino-hal", git = "https://github.com/rahix/avr-hal", features = ["arduino-uno"] }
12 | microflow = { path = "../.." }
13 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] }
14 | libm = "0.2"
15 | 
16 | [profile.dev]
17 | opt-level = "z"
18 | panic = "abort"
19 | lto = true
20 | codegen-units = 1
21 | 
22 | [profile.release]
23 | opt-level = "z"
24 | panic = "abort"
25 | lto = true
26 | codegen-units = 1
27 | 


--------------------------------------------------------------------------------
/examples/arduino-uno/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [tasks.default]
 2 | clear = true
 3 | dependencies = [
 4 |     "examples-compile",
 5 |     "clippy",
 6 |     "check-format",
 7 | ]
 8 | 
 9 | [tasks.examples-compile]
10 | toolchain = "nightly"
11 | args = ["build", "--examples", "--release"]
12 | 


--------------------------------------------------------------------------------
/examples/arduino-uno/examples/sine.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use avr_device::asm::nop;
 5 | use hal::prelude::*;
 6 | use libm::sinf;
 7 | use microflow::model;
 8 | use nalgebra::matrix;
 9 | use ufmt::uwriteln;
10 | use ufmt_float::uFmt_f32;
11 | 
12 | use panic_halt as _;
13 | 
14 | #[model("../../models/sine.tflite")]
15 | struct Sine;
16 | 
17 | #[hal::entry]
18 | fn main() -> ! {
19 |     let dp = hal::Peripherals::take().unwrap();
20 |     let pins = hal::pins!(dp);
21 |     let mut serial = hal::default_serial!(dp, pins, 57600);
22 | 
23 |     let x = 0.5;
24 |     let y_predicted = Sine::predict(matrix![x])[0];
25 |     let y_exact = sinf(x);
26 |     let x_display = uFmt_f32::One(x);
27 |     uwriteln!(&mut serial, "").unwrap_infallible();
28 |     uwriteln!(
29 |         &mut serial,
30 |         "Predicted sin({}): {}",
31 |         x_display,
32 |         uFmt_f32::Five(y_predicted)
33 |     )
34 |     .unwrap_infallible();
35 |     uwriteln!(
36 |         &mut serial,
37 |         "Exact sin({}): {}",
38 |         x_display,
39 |         uFmt_f32::Five(y_exact)
40 |     )
41 |     .unwrap_infallible();
42 |     uwriteln!(
43 |         &mut serial,
44 |         "Error: {}",
45 |         uFmt_f32::Five(y_exact - y_predicted)
46 |     )
47 |     .unwrap_infallible();
48 | 
49 |     loop {
50 |         nop();
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/examples/arduino-uno/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "nightly"
3 | components = ["rust-src"]
4 | profile = "minimal"
5 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [target.'cfg(all(target_arch = "arm", target_os = "none"))']
 2 | runner = "probe-rs run --chip ATSAME70Q21B"
 3 | rustflags = [
 4 |     "-C", "linker=flip-link",
 5 |     "-C", "link-arg=-Tlink.x",
 6 |     "-C", "link-arg=--nmagic",
 7 | ]
 8 | 
 9 | [build]
10 | target = "thumbv7em-none-eabihf"
11 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "examples-atsamx7x"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | cortex-m = { version = "0.7", features = ["critical-section-single-core"] }
 8 | cortex-m-rtic = "1.1"
 9 | rtt-target = { version = "0.3", features = ["cortex-m"] }
10 | panic-probe = { version = "0.3", features = ["print-rtt"] }
11 | hal = { package = "atsamx7x-hal", version = "0.4", features = ["samv71q21b-rt", "unproven"] }
12 | microflow = { path = "../.." }
13 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] }
14 | libm = "0.2"
15 | 
16 | [profile.release]
17 | lto = true
18 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [tasks.default]
 2 | clear = true
 3 | dependencies = [
 4 |     "examples-compile",
 5 |     "clippy",
 6 |     "check-format",
 7 | ]
 8 | 
 9 | [tasks.examples-compile]
10 | args = ["build", "--examples", "--release"]
11 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/examples/person_detect.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use panic_probe as _;
 5 | 
 6 | #[path = "../../../samples/features/person_detect.rs"]
 7 | mod features;
 8 | 
 9 | #[rtic::app(device = hal::pac, peripherals = true, dispatchers = [I2SC0])]
10 | mod app {
11 |     use hal::clocks::*;
12 |     use hal::efc::*;
13 |     use hal::fugit::RateExtU32;
14 |     use microflow::buffer::Buffer2D;
15 |     use microflow::model;
16 |     use rtt_target::{rprintln, rtt_init_print};
17 | 
18 |     #[model("../../models/person_detect.tflite")]
19 |     struct PersonDetect;
20 | 
21 |     #[shared]
22 |     struct Shared {}
23 | 
24 |     #[local]
25 |     struct Local {}
26 | 
27 |     fn print_prediction(prediction: Buffer2D<f32, 1, 2>) {
28 |         rprintln!(
29 |             "Prediction: {:.1}% no person, {:.1}% person",
30 |             prediction[0] * 100.,
31 |             prediction[1] * 100.,
32 |         );
33 |         rprintln!(
34 |             "Outcome: {}",
35 |             match prediction.iamax_full().1 {
36 |                 0 => "NO PERSON",
37 |                 1 => "PERSON",
38 |                 _ => unreachable!(),
39 |             }
40 |         );
41 |     }
42 | 
43 |     #[init]
44 |     fn init(cx: init::Context) -> (Shared, Local, init::Monotonics) {
45 |         rtt_init_print!();
46 | 
47 |         let clocks = Tokens::new(
48 |             (cx.device.PMC, cx.device.SUPC, cx.device.UTMI),
49 |             &cx.device.WDT.into(),
50 |         );
51 | 
52 |         clocks.slck.configure_external_normal();
53 |         let mainck = clocks.mainck.configure_external_normal(12.MHz()).unwrap();
54 | 
55 |         let pllack = clocks
56 |             .pllack
57 |             .configure(&mainck, PllaConfig { div: 1, mult: 12 })
58 |             .unwrap();
59 | 
60 |         HostClockController::new(clocks.hclk, clocks.mck)
61 |             .configure(
62 |                 &pllack,
63 |                 &mut Efc::new(cx.device.EFC, VddioLevel::V3),
64 |                 HostClockConfig {
65 |                     pres: HccPrescaler::Div1,
66 |                     div: MckDivider::Div1,
67 |                 },
68 |             )
69 |             .unwrap();
70 | 
71 |         let person_predicted = PersonDetect::predict_quantized(super::features::PERSON);
72 |         let no_person_predicted = PersonDetect::predict_quantized(super::features::NO_PERSON);
73 |         rprintln!();
74 |         rprintln!("Input sample: 'person.bmp'");
75 |         print_prediction(person_predicted);
76 |         rprintln!();
77 |         rprintln!("Input sample: 'no_person.bmp'");
78 |         print_prediction(no_person_predicted);
79 | 
80 |         (Shared {}, Local {}, init::Monotonics())
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/examples/sine.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use panic_probe as _;
 5 | 
 6 | #[rtic::app(device = hal::pac, peripherals = true, dispatchers = [I2SC0])]
 7 | mod app {
 8 |     use libm::sinf;
 9 |     use microflow::model;
10 |     use nalgebra::matrix;
11 |     use rtt_target::{rprintln, rtt_init_print};
12 | 
13 |     #[model("../../models/sine.tflite")]
14 |     struct Sine;
15 | 
16 |     #[shared]
17 |     struct Shared {}
18 | 
19 |     #[local]
20 |     struct Local {}
21 | 
22 |     #[init]
23 |     fn init(cx: init::Context) -> (Shared, Local, init::Monotonics) {
24 |         hal::watchdog::Watchdog::new(cx.device.WDT).disable();
25 |         rtt_init_print!();
26 | 
27 |         let x = 0.5;
28 |         let y_predicted = Sine::predict(matrix![x])[0];
29 |         let y_exact = sinf(x);
30 |         rprintln!();
31 |         rprintln!("Predicted sin({}): {}", x, y_predicted);
32 |         rprintln!("Exact sin({}): {}", x, y_exact);
33 |         rprintln!("Error: {}", y_exact - y_predicted);
34 | 
35 |         (Shared {}, Local {}, init::Monotonics())
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/examples/speech.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use panic_probe as _;
 5 | 
 6 | #[path = "../../../samples/features/speech.rs"]
 7 | mod features;
 8 | 
 9 | #[rtic::app(device = hal::pac, peripherals = true, dispatchers = [I2SC0])]
10 | mod app {
11 |     use hal::clocks::*;
12 |     use hal::efc::*;
13 |     use hal::fugit::RateExtU32;
14 |     use microflow::buffer::Buffer2D;
15 |     use microflow::model;
16 |     use rtt_target::{rprintln, rtt_init_print};
17 | 
18 |     #[model("../../models/speech.tflite")]
19 |     struct Speech;
20 | 
21 |     #[shared]
22 |     struct Shared {}
23 | 
24 |     #[local]
25 |     struct Local {}
26 | 
27 |     fn print_prediction(prediction: Buffer2D<f32, 1, 4>) {
28 |         rprintln!(
29 |             "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no",
30 |             prediction[0] * 100.,
31 |             prediction[1] * 100.,
32 |             prediction[2] * 100.,
33 |             prediction[3] * 100.,
34 |         );
35 |         rprintln!(
36 |             "Outcome: {}",
37 |             match prediction.iamax_full().1 {
38 |                 0 => "SILENCE",
39 |                 1 => "UNKNOWN",
40 |                 2 => "YES",
41 |                 3 => "NO",
42 |                 _ => unreachable!(),
43 |             }
44 |         );
45 |     }
46 | 
47 |     #[init]
48 |     fn init(cx: init::Context) -> (Shared, Local, init::Monotonics) {
49 |         rtt_init_print!();
50 | 
51 |         let clocks = Tokens::new(
52 |             (cx.device.PMC, cx.device.SUPC, cx.device.UTMI),
53 |             &cx.device.WDT.into(),
54 |         );
55 | 
56 |         clocks.slck.configure_external_normal();
57 |         let mainck = clocks.mainck.configure_external_normal(12.MHz()).unwrap();
58 | 
59 |         let pllack = clocks
60 |             .pllack
61 |             .configure(&mainck, PllaConfig { div: 1, mult: 12 })
62 |             .unwrap();
63 | 
64 |         HostClockController::new(clocks.hclk, clocks.mck)
65 |             .configure(
66 |                 &pllack,
67 |                 &mut Efc::new(cx.device.EFC, VddioLevel::V3),
68 |                 HostClockConfig {
69 |                     pres: HccPrescaler::Div1,
70 |                     div: MckDivider::Div1,
71 |                 },
72 |             )
73 |             .unwrap();
74 | 
75 |         let yes_predicted = Speech::predict_quantized(super::features::YES);
76 |         let no_predicted = Speech::predict_quantized(super::features::NO);
77 |         rprintln!();
78 |         rprintln!("Input sample: 'yes.wav'");
79 |         print_prediction(yes_predicted);
80 |         rprintln!();
81 |         rprintln!("Input sample: 'no.wav'");
82 |         print_prediction(no_predicted);
83 | 
84 |         (Shared {}, Local {}, init::Monotonics())
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/examples/atsamx7x/memory.x:
--------------------------------------------------------------------------------
1 | MEMORY
2 | {
3 |   FLASH : ORIGIN = 0x400000, LENGTH = 2M
4 |   RAM : ORIGIN = 0x20400000, LENGTH = 384K
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/esp32/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [target.'cfg(target_arch = "xtensa")']
 2 | runner = "espflash flash --monitor"
 3 | rustflags = [
 4 |     "-C", "link-arg=-Tlinkall.x",
 5 |     "-C", "link-arg=-nostartfiles",
 6 | ]
 7 | 
 8 | [build]
 9 | target = "xtensa-esp32-none-elf"
10 | 
11 | [unstable]
12 | build-std = ["core"]
13 | 


--------------------------------------------------------------------------------
/examples/esp32/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "examples-esp32"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | esp-println = { version = "0.13", features = ["esp32"] }
 8 | esp-backtrace = { version = "0.15", features = ["esp32", "panic-handler", "println"] }
 9 | esp_hal = { package = "esp-hal", version = "0.23", features = ["esp32"] }
10 | microflow = { path = "../.." }
11 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] }
12 | libm = "0.2"
13 | 
14 | [profile.release]
15 | lto = true
16 | 


--------------------------------------------------------------------------------
/examples/esp32/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [tasks.default]
 2 | clear = true
 3 | dependencies = [
 4 |     "examples-compile",
 5 |     "clippy",
 6 |     "check-format"
 7 | ]
 8 | 
 9 | [tasks.examples-compile]
10 | args = ["build", "--examples", "--release"]
11 | 


--------------------------------------------------------------------------------
/examples/esp32/examples/person_detect.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use esp_hal::{clock::CpuClock, main, rtc_cntl::Rtc};
 5 | use esp_println::println;
 6 | use microflow::buffer::Buffer2D;
 7 | use microflow::model;
 8 | 
 9 | use esp_backtrace as _;
10 | 
11 | #[path = "../../../samples/features/person_detect.rs"]
12 | mod features;
13 | 
14 | #[model("../../models/person_detect.tflite")]
15 | struct PersonDetect;
16 | 
17 | fn print_prediction(prediction: Buffer2D<f32, 1, 2>) {
18 |     println!(
19 |         "Prediction: {:.1}% no person, {:.1}% person",
20 |         prediction[0] * 100.,
21 |         prediction[1] * 100.,
22 |     );
23 |     println!(
24 |         "Outcome: {}",
25 |         match prediction.iamax_full().1 {
26 |             0 => "NO PERSON",
27 |             1 => "PERSON",
28 |             _ => unreachable!(),
29 |         }
30 |     );
31 | }
32 | 
33 | #[main]
34 | fn main() -> ! {
35 |     let config = esp_hal::Config::default().with_cpu_clock(CpuClock::max());
36 |     let peripherals = esp_hal::init(config);
37 |     let rtc = Rtc::new(peripherals.LPWR);
38 | 
39 |     let start = rtc.time_since_boot();
40 |     let person_predicted = PersonDetect::predict_quantized(features::PERSON);
41 |     let end = rtc.time_since_boot();
42 |     println!(" ");
43 |     println!("Input sample: 'person.bmp'");
44 |     print_prediction(person_predicted);
45 |     println!("Execution time: {}", end - start);
46 | 
47 |     let start = rtc.time_since_boot();
48 |     let no_person_predicted = PersonDetect::predict_quantized(features::NO_PERSON);
49 |     let end = rtc.time_since_boot();
50 |     println!(" ");
51 |     println!("Input sample: 'no_person.bmp'");
52 |     print_prediction(no_person_predicted);
53 |     println!("Execution time: {}", end - start);
54 | 
55 |     loop {}
56 | }
57 | 


--------------------------------------------------------------------------------
/examples/esp32/examples/sine.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use esp_hal::{clock::CpuClock, main, rtc_cntl::Rtc};
 5 | use esp_println::println;
 6 | use libm::sinf;
 7 | use microflow::model;
 8 | use nalgebra::matrix;
 9 | 
10 | use esp_backtrace as _;
11 | 
12 | #[model("../../models/sine.tflite")]
13 | struct Sine;
14 | 
15 | #[main]
16 | fn main() -> ! {
17 |     let config = esp_hal::Config::default().with_cpu_clock(CpuClock::max());
18 |     let peripherals = esp_hal::init(config);
19 |     let rtc = Rtc::new(peripherals.LPWR);
20 | 
21 |     let x = 0.5;
22 |     let start = rtc.time_since_boot();
23 |     let y_predicted = Sine::predict(matrix![x])[0];
24 |     let end = rtc.time_since_boot();
25 |     let y_exact = sinf(x);
26 |     println!(" ");
27 |     println!("Predicted sin({}): {}", x, y_predicted);
28 |     println!("Exact sin({}): {}", x, y_exact);
29 |     println!("Error: {}", y_exact - y_predicted);
30 |     println!("Execution time: {}", end - start);
31 | 
32 |     loop {}
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/esp32/examples/speech.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use esp_hal::{clock::CpuClock, main, rtc_cntl::Rtc};
 5 | use esp_println::println;
 6 | use microflow::buffer::Buffer2D;
 7 | use microflow::model;
 8 | 
 9 | use esp_backtrace as _;
10 | 
11 | #[path = "../../../samples/features/speech.rs"]
12 | mod features;
13 | 
14 | #[model("../../models/speech.tflite")]
15 | struct Speech;
16 | 
17 | fn print_prediction(prediction: Buffer2D<f32, 1, 4>) {
18 |     println!(
19 |         "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no",
20 |         prediction[0] * 100.,
21 |         prediction[1] * 100.,
22 |         prediction[2] * 100.,
23 |         prediction[3] * 100.,
24 |     );
25 |     println!(
26 |         "Outcome: {}",
27 |         match prediction.iamax_full().1 {
28 |             0 => "SILENCE",
29 |             1 => "UNKNOWN",
30 |             2 => "YES",
31 |             3 => "NO",
32 |             _ => unreachable!(),
33 |         }
34 |     );
35 | }
36 | 
37 | #[main]
38 | fn main() -> ! {
39 |     let config = esp_hal::Config::default().with_cpu_clock(CpuClock::max());
40 |     let peripherals = esp_hal::init(config);
41 |     let rtc = Rtc::new(peripherals.LPWR);
42 | 
43 |     let start = rtc.time_since_boot();
44 |     let yes_predicted = Speech::predict_quantized(features::YES);
45 |     let end = rtc.time_since_boot();
46 |     println!(" ");
47 |     println!("Input sample: 'yes.wav'");
48 |     print_prediction(yes_predicted);
49 |     println!("Execution time: {}", end - start);
50 | 
51 |     let start = rtc.time_since_boot();
52 |     let no_predicted = Speech::predict_quantized(features::NO);
53 |     let end = rtc.time_since_boot();
54 |     println!(" ");
55 |     println!("Input sample: 'no.wav'");
56 |     print_prediction(no_predicted);
57 |     println!("Execution time: {}", end - start);
58 | 
59 |     loop {}
60 | }
61 | 


--------------------------------------------------------------------------------
/examples/esp32/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "esp"
3 | 


--------------------------------------------------------------------------------
/examples/person_detect.rs:
--------------------------------------------------------------------------------
 1 | use microflow::buffer::Buffer2D;
 2 | use microflow_macros::model;
 3 | 
 4 | #[path = "../samples/features/person_detect.rs"]
 5 | mod features;
 6 | 
 7 | #[model("models/person_detect.tflite")]
 8 | struct PersonDetect;
 9 | 
10 | fn print_prediction(prediction: Buffer2D<f32, 1, 2>) {
11 |     println!(
12 |         "Prediction: {:.1}% no person, {:.1}% person",
13 |         prediction[0] * 100.,
14 |         prediction[1] * 100.,
15 |     );
16 |     println!(
17 |         "Outcome: {}",
18 |         match prediction.iamax_full().1 {
19 |             0 => "NO PERSON",
20 |             1 => "PERSON",
21 |             _ => unreachable!(),
22 |         }
23 |     );
24 | }
25 | 
26 | fn main() {
27 |     let person_predicted = PersonDetect::predict_quantized(features::PERSON);
28 |     let no_person_predicted = PersonDetect::predict_quantized(features::NO_PERSON);
29 |     println!();
30 |     println!("Input sample: 'person.bmp'");
31 |     print_prediction(person_predicted);
32 |     println!();
33 |     println!("Input sample: 'no_person.bmp'");
34 |     print_prediction(no_person_predicted);
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/qemu/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [target.'cfg(all(target_arch = "arm", target_os = "none"))']
 2 | runner = "qemu-system-arm -cpu cortex-m3 -machine lm3s6965evb -nographic -semihosting-config enable=on,target=native -kernel"
 3 | rustflags = [
 4 |     "-C", "linker=flip-link",
 5 |     "-C", "link-arg=-Tlink.x",
 6 |     "-C", "link-arg=--nmagic",
 7 | ]
 8 | 
 9 | [build]
10 | target = "thumbv7m-none-eabi"
11 | 


--------------------------------------------------------------------------------
/examples/qemu/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "examples-qemu"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | cortex-m = "0.7"
 8 | cortex-m-rt = "0.7"
 9 | cortex-m-semihosting = "0.5"
10 | panic-halt = "0.2"
11 | microflow = { path = "../.." }
12 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] }
13 | libm = "0.2"
14 | 
15 | [profile.release]
16 | lto = true
17 | 


--------------------------------------------------------------------------------
/examples/qemu/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [tasks.default]
 2 | clear = true
 3 | dependencies = [
 4 |     "examples-compile",
 5 |     "clippy",
 6 |     "check-format",
 7 | ]
 8 | 
 9 | [tasks.examples-compile]
10 | args = ["build", "--examples", "--release"]
11 | 


--------------------------------------------------------------------------------
/examples/qemu/examples/sine.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use cortex_m::asm::nop;
 5 | use cortex_m_rt::entry;
 6 | use cortex_m_semihosting::debug::{exit, EXIT_SUCCESS};
 7 | use cortex_m_semihosting::hprintln;
 8 | use libm::sinf;
 9 | use nalgebra::matrix;
10 | 
11 | use microflow::model;
12 | use panic_halt as _;
13 | 
14 | #[model("../../models/sine.tflite")]
15 | struct Sine;
16 | 
17 | #[entry]
18 | fn main() -> ! {
19 |     let x = 0.5;
20 |     let y_predicted = Sine::predict(matrix![x])[0];
21 |     let y_exact = sinf(x);
22 |     hprintln!();
23 |     hprintln!("Predicted sin({}): {}", x, y_predicted);
24 |     hprintln!("Exact sin({}): {}", x, y_exact);
25 |     hprintln!("Error: {}", y_exact - y_predicted);
26 | 
27 |     exit(EXIT_SUCCESS);
28 |     loop {
29 |         nop()
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/examples/qemu/examples/speech.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![no_main]
 3 | 
 4 | use cortex_m::asm::nop;
 5 | use cortex_m_rt::entry;
 6 | use cortex_m_semihosting::debug::{exit, EXIT_SUCCESS};
 7 | use cortex_m_semihosting::hprintln;
 8 | use microflow::buffer::Buffer2D;
 9 | use microflow::model;
10 | use panic_halt as _;
11 | 
12 | #[path = "../../../samples/features/speech.rs"]
13 | mod features;
14 | 
15 | #[model("../../models/speech.tflite")]
16 | struct Speech;
17 | 
18 | fn print_prediction(prediction: Buffer2D<f32, 1, 4>) {
19 |     hprintln!(
20 |         "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no",
21 |         prediction[0] * 100.,
22 |         prediction[1] * 100.,
23 |         prediction[2] * 100.,
24 |         prediction[3] * 100.,
25 |     );
26 |     hprintln!(
27 |         "Outcome: {}",
28 |         match prediction.iamax_full().1 {
29 |             0 => "SILENCE",
30 |             1 => "UNKNOWN",
31 |             2 => "YES",
32 |             3 => "NO",
33 |             _ => unreachable!(),
34 |         }
35 |     );
36 | }
37 | 
38 | #[entry]
39 | fn main() -> ! {
40 |     let yes_predicted = Speech::predict_quantized(features::YES);
41 |     let no_predicted = Speech::predict_quantized(features::NO);
42 |     hprintln!();
43 |     hprintln!("Input sample: 'yes.wav'");
44 |     print_prediction(yes_predicted);
45 |     hprintln!();
46 |     hprintln!("Input sample: 'no.wav'");
47 |     print_prediction(no_predicted);
48 | 
49 |     exit(EXIT_SUCCESS);
50 |     loop {
51 |         nop()
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/examples/qemu/memory.x:
--------------------------------------------------------------------------------
1 | MEMORY
2 | {
3 |   FLASH : ORIGIN = 0x00000000, LENGTH = 256K
4 |   RAM : ORIGIN = 0x20000000, LENGTH = 64K
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/sine.rs:
--------------------------------------------------------------------------------
 1 | use libm::sinf;
 2 | use microflow::model;
 3 | use nalgebra::matrix;
 4 | 
 5 | #[model("models/sine.tflite")]
 6 | struct Sine;
 7 | 
 8 | fn main() {
 9 |     let x = 0.5;
10 |     let y_predicted = Sine::predict(matrix![x])[0];
11 |     let y_exact = sinf(x);
12 |     println!();
13 |     println!("Predicted sin({}): {}", x, y_predicted);
14 |     println!("Exact sin({}): {}", x, y_exact);
15 |     println!("Error: {}", y_exact - y_predicted);
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/speech.rs:
--------------------------------------------------------------------------------
 1 | use microflow::buffer::Buffer2D;
 2 | use microflow_macros::model;
 3 | 
 4 | #[path = "../samples/features/speech.rs"]
 5 | mod features;
 6 | 
 7 | #[model("models/speech.tflite")]
 8 | struct Speech;
 9 | 
10 | fn print_prediction(prediction: Buffer2D<f32, 1, 4>) {
11 |     println!(
12 |         "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no",
13 |         prediction[0] * 100.,
14 |         prediction[1] * 100.,
15 |         prediction[2] * 100.,
16 |         prediction[3] * 100.,
17 |     );
18 |     println!(
19 |         "Outcome: {}",
20 |         match prediction.iamax_full().1 {
21 |             0 => "SILENCE",
22 |             1 => "UNKNOWN",
23 |             2 => "YES",
24 |             3 => "NO",
25 |             _ => unreachable!(),
26 |         }
27 |     );
28 | }
29 | 
30 | fn main() {
31 |     let yes_predicted = Speech::predict_quantized(features::YES);
32 |     let no_predicted = Speech::predict_quantized(features::NO);
33 |     println!();
34 |     println!("Input sample: 'yes.wav'");
35 |     print_prediction(yes_predicted);
36 |     println!();
37 |     println!("Input sample: 'no.wav'");
38 |     print_prediction(no_predicted);
39 | }
40 | 


--------------------------------------------------------------------------------
/microflow-macros/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "microflow-macros"
 3 | description = "Macro crate of the MicroFlow inference engine, namely, the MicroFlow compiler"
 4 | authors = ["Matteo Carnelos <matteo.carnelos98@gmail.com>"]
 5 | documentation = "https://docs.rs/microflow-macros"
 6 | repository = "https://github.com/matteocarnelos/microflow-rs"
 7 | readme = "../README.md"
 8 | categories = ["embedded", "no-std", "science"]
 9 | keywords = ["tinyml"]
10 | license = "MIT OR Apache-2.0"
11 | version = "0.1.1"
12 | edition = "2021"
13 | include = [
14 |     "/src/**",
15 |     "flatbuffers/**",
16 |     "/Cargo.toml",
17 |     "/LICENSE-MIT",
18 |     "/LICENSE-APACHE",
19 | ]
20 | 
21 | [lib]
22 | proc-macro = true
23 | 
24 | [dependencies]
25 | syn = { version = "2.0", features = ["full"] }
26 | quote = "1.0"
27 | proc-macro2 = "1.0"
28 | proc-macro-error = "1.0"
29 | flatbuffers = "23.1"
30 | nalgebra = "0.32"
31 | simba = "0.8"
32 | byterepr = "0.1"
33 | structmeta = "0.3"
34 | 


--------------------------------------------------------------------------------
/microflow-macros/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/microflow-macros/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/microflow-macros/src/activation.rs:
--------------------------------------------------------------------------------
 1 | use crate::tflite_flatbuffers::tflite::ActivationFunctionType;
 2 | use proc_macro2::TokenStream as TokenStream2;
 3 | use quote::{quote, ToTokens};
 4 | 
 5 | /// Represents the tokenized version of the [`FusedActivation`].
 6 | #[derive(Copy, Clone)]
 7 | pub(crate) enum TokenFusedActivation {
 8 |     None,
 9 |     Relu,
10 |     Relu6,
11 | }
12 | 
13 | impl ToTokens for TokenFusedActivation {
14 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
15 |         let ts = match self {
16 |             TokenFusedActivation::None => quote!(microflow::activation::FusedActivation::None),
17 |             TokenFusedActivation::Relu => quote!(microflow::activation::FusedActivation::Relu),
18 |             TokenFusedActivation::Relu6 => quote!(microflow::activation::FusedActivation::Relu6),
19 |         };
20 |         ts.to_tokens(tokens);
21 |     }
22 | }
23 | 
24 | impl From<ActivationFunctionType> for TokenFusedActivation {
25 |     fn from(activation: ActivationFunctionType) -> Self {
26 |         match activation {
27 |             ActivationFunctionType::NONE => Self::None,
28 |             ActivationFunctionType::RELU => Self::Relu,
29 |             ActivationFunctionType::RELU6 => Self::Relu6,
30 |             _ => unimplemented!(),
31 |         }
32 |     }
33 | }
34 | 
35 | #[cfg(test)]
36 | mod tests {
37 |     use super::*;
38 | 
39 |     #[test]
40 |     fn fused_activation_to_tokens() {
41 |         let activation = TokenFusedActivation::from(ActivationFunctionType::RELU);
42 |         assert_eq!(
43 |             activation.to_token_stream().to_string(),
44 |             quote!(microflow::activation::FusedActivation::Relu).to_string()
45 |         );
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/microflow-macros/src/buffer.rs:
--------------------------------------------------------------------------------
  1 | use std::ops::Deref;
  2 | 
  3 | use nalgebra::DMatrix;
  4 | use proc_macro2::TokenStream as TokenStream2;
  5 | use quote::{quote, ToTokens};
  6 | 
  7 | /// Represents the tokenized version of the `Buffer2D`.
  8 | #[derive(Debug)]
  9 | pub(crate) struct TokenBuffer2D<T>(pub(crate) Option<DMatrix<T>>);
 10 | 
 11 | /// Represents the tokenized version of the `Buffer4D`.
 12 | #[derive(Debug)]
 13 | pub(crate) struct TokenBuffer4D<T>(pub(crate) Option<Vec<DMatrix<Vec<T>>>>);
 14 | 
 15 | impl<T> TokenBuffer2D<T> {
 16 |     /// Builds an empty [`TokenBuffer2D`].
 17 |     pub(crate) fn new() -> Self {
 18 |         Self(None)
 19 |     }
 20 | }
 21 | 
 22 | impl<T> From<DMatrix<T>> for TokenBuffer2D<T> {
 23 |     fn from(matrix: DMatrix<T>) -> Self {
 24 |         Self(Some(matrix))
 25 |     }
 26 | }
 27 | 
 28 | impl<T: ToTokens> ToTokens for TokenBuffer2D<T> {
 29 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
 30 |         let mut rows: Vec<TokenStream2> = Vec::new();
 31 |         for row in self.row_iter() {
 32 |             let iter = row.iter();
 33 |             rows.push(quote!(#(#iter),*));
 34 |         }
 35 | 
 36 |         let ts = quote!(nalgebra::matrix![#(#rows);*]);
 37 |         ts.to_tokens(tokens);
 38 |     }
 39 | }
 40 | 
 41 | impl<T> Deref for TokenBuffer2D<T> {
 42 |     type Target = DMatrix<T>;
 43 |     fn deref(&self) -> &Self::Target {
 44 |         self.0.as_ref().unwrap()
 45 |     }
 46 | }
 47 | 
 48 | impl<T> TokenBuffer4D<T> {
 49 |     /// Builds an empty [`TokenBuffer4D`].
 50 |     pub(crate) fn new() -> Self {
 51 |         Self(None)
 52 |     }
 53 | }
 54 | 
 55 | impl<T> From<Vec<DMatrix<Vec<T>>>> for TokenBuffer4D<T> {
 56 |     fn from(data: Vec<DMatrix<Vec<T>>>) -> Self {
 57 |         Self(Some(data))
 58 |     }
 59 | }
 60 | 
 61 | impl<T: ToTokens> ToTokens for TokenBuffer4D<T> {
 62 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
 63 |         let mut batches: Vec<TokenStream2> = Vec::new();
 64 |         for batch in self.iter() {
 65 |             let mut rows: Vec<TokenStream2> = Vec::new();
 66 |             for row in batch.row_iter() {
 67 |                 let mut elements: Vec<TokenStream2> = Vec::new();
 68 |                 for element in row.iter() {
 69 |                     let iter = element.iter();
 70 |                     elements.push(quote!([#(#iter),*]));
 71 |                 }
 72 |                 rows.push(quote!(#(#elements),*));
 73 |             }
 74 |             batches.push(quote!(nalgebra::matrix![#(#rows);*]));
 75 |         }
 76 | 
 77 |         let ts = quote!([#(#batches),*]);
 78 |         ts.to_tokens(tokens);
 79 |     }
 80 | }
 81 | 
 82 | impl<T> Deref for TokenBuffer4D<T> {
 83 |     type Target = Vec<DMatrix<Vec<T>>>;
 84 |     fn deref(&self) -> &Self::Target {
 85 |         self.0.as_ref().unwrap()
 86 |     }
 87 | }
 88 | 
 89 | #[cfg(test)]
 90 | mod tests {
 91 |     use nalgebra::dmatrix;
 92 | 
 93 |     use super::*;
 94 | 
 95 |     fn setup_2d() -> TokenBuffer2D<i8> {
 96 |         TokenBuffer2D::from(dmatrix![
 97 |             1, 2, 3;
 98 |             4, 5, 6
 99 |         ])
100 |     }
101 | 
102 |     fn setup_4d() -> TokenBuffer4D<i8> {
103 |         TokenBuffer4D::from(vec![
104 |             dmatrix![
105 |                 vec![7,  8],  vec![9,  10], vec![11, 12];
106 |                 vec![13, 14], vec![15, 16], vec![17, 18]
107 |             ],
108 |             dmatrix![
109 |                 vec![19, 20], vec![21, 22], vec![23, 24];
110 |                 vec![25, 26], vec![27, 28], vec![29, 30]
111 |             ],
112 |         ])
113 |     }
114 | 
115 |     #[test]
116 |     fn buffer_2d_new() {
117 |         assert_eq!(TokenBuffer2D::<i8>::new().0, None);
118 |     }
119 | 
120 |     #[test]
121 |     fn buffer_2d_from_matrix() {
122 |         let matrix = dmatrix![1, 2, 3];
123 |         assert_eq!(TokenBuffer2D::<i8>::from(matrix.clone()).0, Some(matrix));
124 |     }
125 | 
126 |     #[test]
127 |     fn buffer_2d_to_tokens() {
128 |         let buffer = setup_2d();
129 |         assert_eq!(
130 |             buffer.to_token_stream().to_string(),
131 |             quote! {
132 |                 nalgebra::matrix![
133 |                     1i8, 2i8, 3i8;
134 |                     4i8, 5i8, 6i8
135 |                 ]
136 |             }
137 |             .to_string()
138 |         );
139 |     }
140 | 
141 |     #[test]
142 |     fn buffer_4d_new() {
143 |         assert_eq!(TokenBuffer4D::<i8>::new().0, None);
144 |     }
145 | 
146 |     #[test]
147 |     fn buffer_4d_from_data() {
148 |         let data = vec![dmatrix![vec![1], vec![2], vec![3]]];
149 |         assert_eq!(TokenBuffer4D::<i8>::from(data.clone()).0, Some(data));
150 |     }
151 | 
152 |     #[test]
153 |     fn buffer_4d_to_tokens() {
154 |         let buffer = setup_4d();
155 |         assert_eq!(
156 |             buffer.to_token_stream().to_string(),
157 |             quote! {
158 |                 [
159 |                     nalgebra::matrix![
160 |                         [7i8,  8i8],  [9i8,  10i8], [11i8, 12i8];
161 |                         [13i8, 14i8], [15i8, 16i8], [17i8, 18i8]
162 |                     ],
163 |                     nalgebra::matrix![
164 |                         [19i8, 20i8], [21i8, 22i8], [23i8, 24i8];
165 |                         [25i8, 26i8], [27i8, 28i8], [29i8, 30i8]
166 |                     ]
167 |                 ]
168 |             }
169 |             .to_string()
170 |         );
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/microflow-macros/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! [![crates.io](https://img.shields.io/crates/v/microflow-macros)](https://crates.io/crates/microflow-macros)
  2 | //! [![docs.rs](https://img.shields.io/docsrs/microflow-macros)](https://docs.rs/microflow-macros)
  3 | //! [![github](https://img.shields.io/github/actions/workflow/status/matteocarnelos/microflow-rs/cargo.yml?branch=main)](https://github.com/matteocarnelos/microflow-rs/actions/workflows/cargo.yml)
  4 | //!
  5 | //! Macro crate of the [MicroFlow](https://github.com/matteocarnelos/microflow-rs) inference engine, namely, the MicroFlow compiler.
  6 | 
  7 | extern crate proc_macro;
  8 | 
  9 | use proc_macro::TokenStream;
 10 | use proc_macro_error::{abort_call_site, proc_macro_error};
 11 | use std::fs;
 12 | 
 13 | use proc_macro2::TokenStream as TokenStream2;
 14 | use quote::{quote, ToTokens};
 15 | use syn::{parse_macro_input, ItemStruct};
 16 | 
 17 | use crate::tflite_flatbuffers::tflite::TensorType;
 18 | use ops::*;
 19 | use structmeta::StructMeta;
 20 | use syn::LitStr;
 21 | use tflite_flatbuffers::tflite::{root_as_model, BuiltinOperator};
 22 | 
 23 | mod activation;
 24 | mod buffer;
 25 | mod ops;
 26 | mod quantize;
 27 | mod tensor;
 28 | #[path = "../flatbuffers/tflite_generated.rs"]
 29 | #[allow(unused_imports)]
 30 | #[allow(clippy::all)]
 31 | mod tflite_flatbuffers;
 32 | 
 33 | #[derive(StructMeta)]
 34 | struct Args {
 35 |     #[struct_meta(unnamed)]
 36 |     path: LitStr,
 37 | }
 38 | 
 39 | /// The entry point of MicroFlow.
 40 | /// This attribute-like procedural macro can be placed on `structs` to implement the `predict()`
 41 | /// function based on the given model.
 42 | /// The macro takes as input the path of the model, which must be in the TensorFlow Lite format
 43 | /// (`.tflite`).
 44 | #[proc_macro_error]
 45 | #[proc_macro_attribute]
 46 | pub fn model(args: TokenStream, item: TokenStream) -> TokenStream {
 47 |     let args = parse_macro_input!(args as Args);
 48 |     let item = parse_macro_input!(item as ItemStruct);
 49 | 
 50 |     let buf = fs::read(args.path.value()).unwrap_or_else(|_| {
 51 |         abort_call_site!(
 52 |             "couldn't find '{}', please provide a valid path",
 53 |             &args.path.value()
 54 |         )
 55 |     });
 56 |     let model = root_as_model(&buf).unwrap_or_else(|_| {
 57 |         abort_call_site!("invalid model, please provide a valid TensorFlow Lite model")
 58 |     });
 59 | 
 60 |     let ident = &item.ident;
 61 | 
 62 |     let subgraph = model.subgraphs().unwrap().get(0);
 63 |     let tensors = subgraph.tensors().unwrap();
 64 |     let buffers = model.buffers().unwrap();
 65 | 
 66 |     let input = tensors.get(subgraph.inputs().unwrap().get(0) as usize);
 67 |     let mut input_shape: Vec<_> = input.shape().unwrap().iter().map(|e| e as usize).collect();
 68 |     if input_shape.len() == 1 {
 69 |         input_shape.insert(0, 1);
 70 |     }
 71 |     let input_type = match input.type_() {
 72 |         TensorType::INT8 => quote!(i8),
 73 |         TensorType::UINT8 => quote!(u8),
 74 |         _ => unimplemented!(),
 75 |     };
 76 |     let input_tensor = match input_shape.len() {
 77 |         2 => quote!(Tensor2D),
 78 |         4 => quote!(Tensor4D),
 79 |         _ => unimplemented!(),
 80 |     };
 81 |     let input_buffer = match input_shape.len() {
 82 |         2 => quote!(Buffer2D),
 83 |         4 => quote!(Buffer4D),
 84 |         _ => unimplemented!(),
 85 |     };
 86 |     let input_scale: Vec<_> = input
 87 |         .quantization()
 88 |         .unwrap()
 89 |         .scale()
 90 |         .unwrap()
 91 |         .iter()
 92 |         .map(|e| e.to_token_stream())
 93 |         .collect();
 94 |     let input_zero_point: Vec<_> = match input.type_() {
 95 |         TensorType::INT8 => input
 96 |             .quantization()
 97 |             .unwrap()
 98 |             .zero_point()
 99 |             .unwrap()
100 |             .iter()
101 |             .map(|e| (e as i8).to_token_stream())
102 |             .collect(),
103 |         TensorType::UINT8 => input
104 |             .quantization()
105 |             .unwrap()
106 |             .zero_point()
107 |             .unwrap()
108 |             .iter()
109 |             .map(|e| (e as u8).to_token_stream())
110 |             .collect(),
111 |         _ => unimplemented!(),
112 |     };
113 | 
114 |     let operators = subgraph.operators().unwrap();
115 |     let mut layers = TokenStream2::new();
116 |     for (index, operator) in operators.iter().enumerate() {
117 |         let layer: Box<dyn ToTokens> = match BuiltinOperator(
118 |             model
119 |                 .operator_codes()
120 |                 .unwrap()
121 |                 .get(operator.opcode_index() as usize)
122 |                 .deprecated_builtin_code() as i32,
123 |         ) {
124 |             BuiltinOperator::FULLY_CONNECTED => {
125 |                 fully_connected::parse(operator, tensors, buffers, index)
126 |             }
127 |             BuiltinOperator::DEPTHWISE_CONV_2D => {
128 |                 depthwise_conv_2d::parse(operator, tensors, buffers, index)
129 |             }
130 |             BuiltinOperator::CONV_2D => conv_2d::parse(operator, tensors, buffers, index),
131 |             BuiltinOperator::AVERAGE_POOL_2D => average_pool_2d::parse(operator, tensors),
132 |             BuiltinOperator::SOFTMAX => softmax::parse(operator, tensors),
133 |             BuiltinOperator::RESHAPE => Box::new(reshape::parse(operator, tensors)),
134 |             unsupported_op => abort_call_site!("unsupported operator: {:?}", unsupported_op),
135 |         };
136 |         layer.to_tokens(&mut layers)
137 |     }
138 | 
139 |     let output = tensors.get(subgraph.outputs().unwrap().get(0) as usize);
140 |     let mut output_shape: Vec<_> = output.shape().unwrap().iter().map(|e| e as usize).collect();
141 |     if output_shape.len() == 1 {
142 |         output_shape.insert(0, 1);
143 |     }
144 |     let output_type = match output.type_() {
145 |         TensorType::INT8 => quote!(i8),
146 |         TensorType::UINT8 => quote!(u8),
147 |         _ => unimplemented!(),
148 |     };
149 |     let output_tensor = match output_shape.len() {
150 |         2 => quote!(Tensor2D),
151 |         4 => quote!(Tensor4D),
152 |         _ => unimplemented!(),
153 |     };
154 |     let output_buffer = match output_shape.len() {
155 |         2 => quote!(Buffer2D),
156 |         4 => quote!(Buffer4D),
157 |         _ => unimplemented!(),
158 |     };
159 | 
160 |     let ts = quote! {
161 |         #item
162 |         impl #ident {
163 |             pub fn predict(input: microflow::buffer::#input_buffer<f32, #(#input_shape),*>) -> microflow::buffer::#output_buffer<f32, #(#output_shape),*> {
164 |                 let input = microflow::tensor::#input_tensor::quantize(input, [#(#input_scale),*], [#(#input_zero_point),*]);
165 |                 Self::predict_inner(input).dequantize()
166 |             }
167 | 
168 |             pub fn predict_quantized(input: microflow::buffer::#input_buffer<#input_type, #(#input_shape),*>) -> microflow::buffer::#output_buffer<f32, #(#output_shape),*> {
169 |                 let input = microflow::tensor::#input_tensor::new(input, [#(#input_scale),*], [#(#input_zero_point),*]);
170 |                 Self::predict_inner(input).dequantize()
171 |             }
172 | 
173 |             fn predict_inner(input: microflow::tensor::#input_tensor<#input_type, #(#input_shape),*, 1usize>) -> microflow::tensor::#output_tensor<#output_type, #(#output_shape),*, 1usize> {
174 |                 #layers
175 |                 input
176 |             }
177 |         }
178 |     };
179 | 
180 |     fs::write("target/microflow-expansion.rs", ts.to_string()).ok();
181 | 
182 |     ts.into()
183 | }
184 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/average_pool_2d.rs:
--------------------------------------------------------------------------------
  1 | use crate::activation::TokenFusedActivation;
  2 | use crate::quantize::TokenQuantized;
  3 | use crate::tensor::{TokenTensor4D, TokenTensorViewPadding};
  4 | use crate::tflite_flatbuffers::tflite::{Operator, Tensor, TensorType};
  5 | use flatbuffers::{ForwardsUOffset, Vector};
  6 | use proc_macro2::TokenStream as TokenStream2;
  7 | use quote::{quote, ToTokens};
  8 | use simba::scalar::SupersetOf;
  9 | 
 10 | /// Represents the tokenized version of the `AveragePool2D` operator.
 11 | pub(crate) struct TokenAveragePool2D<T: TokenQuantized> {
 12 |     pub(crate) filter_shape: (usize, usize),
 13 |     pub(crate) output: TokenTensor4D<T>,
 14 |     pub(crate) fused_activation: TokenFusedActivation,
 15 |     pub(crate) view_padding: TokenTensorViewPadding,
 16 |     pub(crate) strides: (usize, usize),
 17 |     pub(crate) constants: (f32, f32),
 18 | }
 19 | 
 20 | /// Parses the [`TokenAveragePool2D`] struct from the given operator.
 21 | ///
 22 | /// # Arguments
 23 | /// * `operator` - The model operator as an [`Operator`]
 24 | /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 25 | ///
 26 | pub(crate) fn parse(
 27 |     operator: Operator,
 28 |     tensors: Vector<ForwardsUOffset<Tensor>>,
 29 | ) -> Box<dyn ToTokens> {
 30 |     let inputs = operator.inputs().unwrap();
 31 |     let input_type = tensors.get(inputs.get(0) as usize).type_();
 32 |     match input_type {
 33 |         TensorType::INT8 => Box::new(TokenAveragePool2D::<i8>::new(operator, tensors)),
 34 |         TensorType::UINT8 => Box::new(TokenAveragePool2D::<u8>::new(operator, tensors)),
 35 |         _ => unimplemented!(),
 36 |     }
 37 | }
 38 | 
 39 | impl<T: TokenQuantized> TokenAveragePool2D<T> {
 40 |     /// Builds the [`TokenAveragePool2D`] operator from the given model operator and tensors.
 41 |     ///
 42 |     /// # Arguments
 43 |     /// * `operator` - The model operator as an [`Operator`]
 44 |     /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 45 |     ///
 46 |     pub(crate) fn new(operator: Operator, tensors: Vector<ForwardsUOffset<Tensor>>) -> Self {
 47 |         let inputs = operator.inputs().unwrap();
 48 |         let input = TokenTensor4D::from_empty_tensor(tensors.get(inputs.get(0) as usize));
 49 |         let output = TokenTensor4D::from_empty_tensor(
 50 |             tensors.get(operator.outputs().unwrap().get(0) as usize),
 51 |         );
 52 |         let options = operator.builtin_options_as_pool_2_doptions().unwrap();
 53 |         let constants = Self::preprocess(&input, &output);
 54 |         Self {
 55 |             filter_shape: (
 56 |                 options.filter_height() as usize,
 57 |                 options.filter_width() as usize,
 58 |             ),
 59 |             output,
 60 |             fused_activation: options.fused_activation_function().into(),
 61 |             view_padding: options.padding().into(),
 62 |             strides: (options.stride_h() as usize, options.stride_w() as usize),
 63 |             constants,
 64 |         }
 65 |     }
 66 | 
 67 |     /// Pre-processes the operator, returning the tuple of constants.
 68 |     ///
 69 |     /// # Arguments
 70 |     /// * `input` - The input of the operator as a [`TokenTensor2D`]
 71 |     /// * `output` - The output of the operator as a [`TokenTensor2D`]
 72 |     ///
 73 |     fn preprocess(input: &TokenTensor4D<T>, output: &TokenTensor4D<T>) -> (f32, f32) {
 74 |         (
 75 |             input.scale[0] / output.scale[0],
 76 |             f32::from_subset(&output.zero_point[0])
 77 |                 - (input.scale[0] * f32::from_subset(&input.zero_point[0])) / output.scale[0],
 78 |         )
 79 |     }
 80 | }
 81 | 
 82 | impl<T: TokenQuantized> ToTokens for TokenAveragePool2D<T> {
 83 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
 84 |         let (filter_shape_0, filter_shape_1) = self.filter_shape;
 85 |         let output_shape = &self.output.shape;
 86 |         let output_scale = &self.output.scale;
 87 |         let output_zero_point = &self.output.zero_point;
 88 |         let fused_activation = self.fused_activation;
 89 |         let view_padding = self.view_padding;
 90 |         let (strides_0, strides_1) = self.strides;
 91 |         let (constants_0, constants_1) = self.constants;
 92 | 
 93 |         let ts = quote! {
 94 |             let input: microflow::tensor::Tensor4D<_, #(#output_shape),*, 1usize> =
 95 |                 microflow::ops::average_pool_2d(
 96 |                     input,
 97 |                     (nalgebra::Const::<#filter_shape_0>, nalgebra::Const::<#filter_shape_1>),
 98 |                     [#(#output_scale),*],
 99 |                     [#(#output_zero_point),*],
100 |                     microflow::ops::AveragePool2DOptions {
101 |                         fused_activation: #fused_activation,
102 |                         view_padding: #view_padding,
103 |                         strides: (#strides_0, #strides_1),
104 |                     },
105 |                     (#constants_0, #constants_1)
106 |             );
107 |         };
108 |         ts.to_tokens(tokens);
109 |     }
110 | }
111 | 
112 | #[cfg(test)]
113 | mod tests {
114 |     use super::*;
115 |     use crate::buffer::TokenBuffer4D;
116 | 
117 |     fn setup() -> TokenAveragePool2D<i8> {
118 |         TokenAveragePool2D {
119 |             filter_shape: (2, 3),
120 |             output: TokenTensor4D {
121 |                 buffer: TokenBuffer4D::new(),
122 |                 shape: vec![1, 2, 3, 2],
123 |                 scale: vec![0.1],
124 |                 zero_point: vec![2],
125 |             },
126 |             fused_activation: TokenFusedActivation::None,
127 |             view_padding: TokenTensorViewPadding::Same,
128 |             strides: (1, 1),
129 |             constants: (3., 4.),
130 |         }
131 |     }
132 | 
133 |     #[test]
134 |     fn average_pool_2d_preprocess() {
135 |         let layer = setup();
136 |         let input = TokenTensor4D {
137 |             buffer: TokenBuffer4D::new(),
138 |             shape: vec![1, 2, 3, 2],
139 |             scale: vec![0.5],
140 |             zero_point: vec![6],
141 |         };
142 |         let constants = TokenAveragePool2D::preprocess(&input, &layer.output);
143 |         assert_eq!(constants.0, 5.);
144 |         assert_eq!(constants.1, -28.);
145 |     }
146 | 
147 |     #[test]
148 |     fn average_pool_2d_to_tokens() {
149 |         let layer = setup();
150 |         let fused_activation = layer.fused_activation;
151 |         let view_padding = layer.view_padding;
152 |         assert_eq!(
153 |             layer.to_token_stream().to_string(),
154 |             quote! {
155 |                 let input: microflow::tensor::Tensor4D<_, 1usize, 2usize, 3usize, 2usize, 1usize> =
156 |                     microflow::ops::average_pool_2d(
157 |                         input,
158 |                         (nalgebra::Const::<2usize>, nalgebra::Const::<3usize>),
159 |                         [0.1f32],
160 |                         [2i8],
161 |                         microflow::ops::AveragePool2DOptions {
162 |                             fused_activation: #fused_activation,
163 |                             view_padding: #view_padding,
164 |                             strides: (1usize, 1usize),
165 |                         },
166 |                         (3f32, 4f32)
167 |                 );
168 |             }
169 |             .to_string()
170 |         );
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/conv_2d.rs:
--------------------------------------------------------------------------------
  1 | use crate::activation::TokenFusedActivation;
  2 | use crate::buffer::TokenBuffer2D;
  3 | use crate::quantize::TokenQuantized;
  4 | use crate::tensor::{TokenTensor2D, TokenTensor4D, TokenTensorViewPadding};
  5 | use crate::tflite_flatbuffers::tflite::{Buffer, Operator, Tensor, TensorType};
  6 | use flatbuffers::{ForwardsUOffset, Vector};
  7 | use nalgebra::DMatrix;
  8 | use proc_macro2::TokenStream as TokenStream2;
  9 | use quote::{format_ident, quote, ToTokens};
 10 | 
 11 | /// Represents the tokenized version of the `Conv2D` operator.
 12 | pub(crate) struct TokenConv2D<T: TokenQuantized> {
 13 |     pub(crate) filters: TokenTensor4D<T>,
 14 |     pub(crate) output: TokenTensor4D<T>,
 15 |     pub(crate) fused_activation: TokenFusedActivation,
 16 |     pub(crate) view_padding: TokenTensorViewPadding,
 17 |     pub(crate) strides: (usize, usize),
 18 |     pub(crate) constants: (TokenBuffer2D<f32>, TokenBuffer2D<f32>),
 19 |     pub(crate) index: usize,
 20 | }
 21 | 
 22 | /// Parses the [`TokenConv2D`] struct from the given operator.
 23 | ///
 24 | /// # Arguments
 25 | /// * `operator` - The model operator as an [`Operator`]
 26 | /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 27 | /// * `buffers` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 28 | /// * `index` - The operator index
 29 | ///
 30 | pub(crate) fn parse(
 31 |     operator: Operator,
 32 |     tensors: Vector<ForwardsUOffset<Tensor>>,
 33 |     buffers: Vector<ForwardsUOffset<Buffer>>,
 34 |     index: usize,
 35 | ) -> Box<dyn ToTokens> {
 36 |     let inputs = operator.inputs().unwrap();
 37 |     let input_type = tensors.get(inputs.get(0) as usize).type_();
 38 |     match input_type {
 39 |         TensorType::INT8 => Box::new(TokenConv2D::<i8>::new(operator, tensors, buffers, index)),
 40 |         TensorType::UINT8 => Box::new(TokenConv2D::<u8>::new(operator, tensors, buffers, index)),
 41 |         _ => unimplemented!(),
 42 |     }
 43 | }
 44 | 
 45 | impl<T: TokenQuantized> TokenConv2D<T> {
 46 |     /// Builds the [`TokenConv2D`] operator from the given model operator and tensors.
 47 |     ///
 48 |     /// # Arguments
 49 |     /// * `operator` - The model operator as an [`Operator`]
 50 |     /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 51 |     /// * `buffers` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 52 |     /// * `index` - The operator index
 53 |     ///
 54 |     pub(crate) fn new(
 55 |         operator: Operator,
 56 |         tensors: Vector<ForwardsUOffset<Tensor>>,
 57 |         buffers: Vector<ForwardsUOffset<Buffer>>,
 58 |         index: usize,
 59 |     ) -> Self {
 60 |         let inputs = operator.inputs().unwrap();
 61 |         let input = TokenTensor4D::from_empty_tensor(tensors.get(inputs.get(0) as usize));
 62 |         let filters =
 63 |             TokenTensor4D::from_buffered_tensor(tensors.get(inputs.get(1) as usize), buffers);
 64 |         let biases =
 65 |             TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(2) as usize), buffers);
 66 |         let output = TokenTensor4D::from_empty_tensor(
 67 |             tensors.get(operator.outputs().unwrap().get(0) as usize),
 68 |         );
 69 |         let options = operator.builtin_options_as_conv_2_doptions().unwrap();
 70 |         let constants = Self::preprocess(&input, &filters, &biases, &output);
 71 |         Self {
 72 |             filters,
 73 |             output,
 74 |             fused_activation: options.fused_activation_function().into(),
 75 |             view_padding: options.padding().into(),
 76 |             strides: (options.stride_h() as usize, options.stride_w() as usize),
 77 |             constants,
 78 |             index,
 79 |         }
 80 |     }
 81 | 
 82 |     /// Pre-processes the operator, returning the tuple of constants.
 83 |     ///
 84 |     /// # Arguments
 85 |     /// * `input` - The input of the operator as a [`TokenTensor2D`]
 86 |     /// * `filters` - The filters of the operator as a [`TokenTensor2D`]
 87 |     /// * `biases` - The biases of the operator as a [`TokenTensor2D`]
 88 |     /// * `output` - The output of the operator as a [`TokenTensor2D`]
 89 |     ///
 90 |     fn preprocess(
 91 |         input: &TokenTensor4D<T>,
 92 |         filters: &TokenTensor4D<T>,
 93 |         biases: &TokenTensor2D<i32>,
 94 |         output: &TokenTensor4D<T>,
 95 |     ) -> (TokenBuffer2D<f32>, TokenBuffer2D<f32>) {
 96 |         (
 97 |             TokenBuffer2D::from(DMatrix::from_fn(filters.shape[0], 1, |b, _| {
 98 |                 biases.scale.get(b).copied().unwrap_or(biases.scale[0]) / output.scale[0]
 99 |                     * (biases.buffer[b]
100 |                         - biases
101 |                             .zero_point
102 |                             .get(b)
103 |                             .copied()
104 |                             .unwrap_or(biases.zero_point[0])) as f32
105 |             })),
106 |             TokenBuffer2D::from(DMatrix::from_fn(filters.scale.len(), 1, |b, _| {
107 |                 input.scale[0] * filters.scale[b] / output.scale[0]
108 |             })),
109 |         )
110 |     }
111 | }
112 | 
113 | impl<T: TokenQuantized> ToTokens for TokenConv2D<T> {
114 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
115 |         let filters_ident = format_ident!("filters_{}", self.index);
116 |         let filters_type = self.filters.type_tokens();
117 |         let filters = &self.filters;
118 |         let output_shape = &self.output.shape;
119 |         let output_scale = &self.output.scale;
120 |         let output_zero_point = &self.output.zero_point;
121 |         let fused_activation = self.fused_activation;
122 |         let view_padding = self.view_padding;
123 |         let (strides_0, strides_1) = self.strides;
124 |         let (constants_0, constants_1) = &self.constants;
125 | 
126 |         let ts = quote! {
127 |             const #filters_ident: #filters_type = #filters;
128 |             let input: microflow::tensor::Tensor4D<_, #(#output_shape),*, 1usize> =
129 |                 microflow::ops::conv_2d(
130 |                     input,
131 |                     &#filters_ident,
132 |                     [#(#output_scale),*],
133 |                     [#(#output_zero_point),*],
134 |                     microflow::ops::Conv2DOptions {
135 |                         fused_activation: #fused_activation,
136 |                         view_padding: #view_padding,
137 |                         strides: (#strides_0, #strides_1),
138 |                     },
139 |                     (#constants_0, #constants_1)
140 |             );
141 |         };
142 |         ts.to_tokens(tokens);
143 |     }
144 | }
145 | 
146 | #[cfg(test)]
147 | mod tests {
148 |     use super::*;
149 |     use crate::buffer::{TokenBuffer2D, TokenBuffer4D};
150 |     use nalgebra::dmatrix;
151 | 
152 |     fn setup() -> TokenConv2D<i8> {
153 |         TokenConv2D {
154 |             filters: TokenTensor4D {
155 |                 buffer: TokenBuffer4D::from(vec![
156 |                     dmatrix![
157 |                         vec![1, 2], vec![3,  4],  vec![5,  6];
158 |                         vec![7, 8], vec![9,  10], vec![11, 12]
159 |                     ],
160 |                     dmatrix![
161 |                         vec![13, 14], vec![15, 16], vec![17, 18];
162 |                         vec![19, 20], vec![21, 22], vec![23, 24]
163 |                     ],
164 |                 ]),
165 |                 shape: vec![2, 2, 3, 2],
166 |                 scale: vec![0.25, 0.26],
167 |                 zero_point: vec![27, 28],
168 |             },
169 |             output: TokenTensor4D {
170 |                 buffer: TokenBuffer4D::new(),
171 |                 shape: vec![1, 2, 3, 2],
172 |                 scale: vec![0.29],
173 |                 zero_point: vec![30],
174 |             },
175 |             fused_activation: TokenFusedActivation::Relu6,
176 |             view_padding: TokenTensorViewPadding::Same,
177 |             strides: (1, 1),
178 |             constants: (
179 |                 TokenBuffer2D::from(dmatrix![31., 32.]),
180 |                 TokenBuffer2D::from(dmatrix![33., 34.]),
181 |             ),
182 |             index: 0,
183 |         }
184 |     }
185 | 
186 |     #[test]
187 |     fn conv_2d_preprocess() {
188 |         let layer = setup();
189 |         let input = TokenTensor4D {
190 |             buffer: TokenBuffer4D::new(),
191 |             shape: vec![1, 2, 3, 2],
192 |             scale: vec![0.35],
193 |             zero_point: vec![36],
194 |         };
195 |         let biases = TokenTensor2D {
196 |             buffer: TokenBuffer2D::from(dmatrix![
197 |                 37;
198 |                 38
199 |             ]),
200 |             shape: vec![2, 1],
201 |             scale: vec![0.39, 0.40],
202 |             zero_point: vec![41, 42],
203 |         };
204 |         let constants = TokenConv2D::preprocess(&input, &layer.filters, &biases, &layer.output);
205 |         assert_eq!(constants.0 .0, Some(dmatrix![-5.37931; -5.5172415]));
206 |         assert_eq!(constants.1 .0, Some(dmatrix![0.30172414; 0.3137931]));
207 |     }
208 | 
209 |     #[test]
210 |     fn conv_2d_to_tokens() {
211 |         let layer = setup();
212 |         let filters = &layer.filters;
213 |         let fused_activation = layer.fused_activation;
214 |         let view_padding = layer.view_padding;
215 |         let (constants_0, constants_1) = &layer.constants;
216 |         assert_eq!(
217 |             layer.to_token_stream().to_string(),
218 |             quote! {
219 |                 const filters_0: microflow::tensor::Tensor4D<i8, 2usize, 2usize, 3usize, 2usize, 2usize> = #filters;
220 |                 let input: microflow::tensor::Tensor4D<_, 1usize, 2usize, 3usize, 2usize, 1usize> =
221 |                     microflow::ops::conv_2d(
222 |                         input,
223 |                         &filters_0,
224 |                         [0.29f32],
225 |                         [30i8],
226 |                         microflow::ops::Conv2DOptions {
227 |                             fused_activation: #fused_activation,
228 |                             view_padding: #view_padding,
229 |                             strides: (1usize, 1usize),
230 |                         },
231 |                         (#constants_0, #constants_1)
232 |                 );
233 |             }.to_string()
234 |         );
235 |     }
236 | }
237 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/depthwise_conv_2d.rs:
--------------------------------------------------------------------------------
  1 | use crate::activation::TokenFusedActivation;
  2 | use crate::buffer::TokenBuffer2D;
  3 | use crate::quantize::TokenQuantized;
  4 | use crate::tensor::{TokenTensor2D, TokenTensor4D, TokenTensorViewPadding};
  5 | use crate::tflite_flatbuffers::tflite::{Buffer, Operator, Tensor, TensorType};
  6 | use flatbuffers::{ForwardsUOffset, Vector};
  7 | use nalgebra::DMatrix;
  8 | use proc_macro2::TokenStream as TokenStream2;
  9 | use quote::{format_ident, quote, ToTokens};
 10 | 
 11 | /// Represents the tokenized version of the `DepthwiseConv2D` operator.
 12 | pub(crate) struct TokenDepthwiseConv2D<T: TokenQuantized> {
 13 |     pub(crate) weights: TokenTensor4D<T>,
 14 |     pub(crate) output: TokenTensor4D<T>,
 15 |     pub(crate) fused_activation: TokenFusedActivation,
 16 |     pub(crate) view_padding: TokenTensorViewPadding,
 17 |     pub(crate) strides: (usize, usize),
 18 |     pub(crate) constants: (TokenBuffer2D<f32>, TokenBuffer2D<f32>),
 19 |     pub(crate) index: usize,
 20 | }
 21 | 
 22 | /// Parses the [`TokenDepthwiseConv2D`] struct from the given operator.
 23 | ///
 24 | /// # Arguments
 25 | /// * `operator` - The model operator as an [`Operator`]
 26 | /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 27 | /// * `buffers` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 28 | /// * `index` - The operator index
 29 | ///
 30 | pub(crate) fn parse(
 31 |     operator: Operator,
 32 |     tensors: Vector<ForwardsUOffset<Tensor>>,
 33 |     buffers: Vector<ForwardsUOffset<Buffer>>,
 34 |     index: usize,
 35 | ) -> Box<dyn ToTokens> {
 36 |     let inputs = operator.inputs().unwrap();
 37 |     let input_type = tensors.get(inputs.get(0) as usize).type_();
 38 |     match input_type {
 39 |         TensorType::INT8 => Box::new(TokenDepthwiseConv2D::<i8>::new(
 40 |             operator, tensors, buffers, index,
 41 |         )),
 42 |         TensorType::UINT8 => Box::new(TokenDepthwiseConv2D::<u8>::new(
 43 |             operator, tensors, buffers, index,
 44 |         )),
 45 |         _ => unimplemented!(),
 46 |     }
 47 | }
 48 | 
 49 | impl<T: TokenQuantized> TokenDepthwiseConv2D<T> {
 50 |     /// Builds the [`TokenDepthwiseConv2D`] operator from the given model operator and tensors.
 51 |     ///
 52 |     /// # Arguments
 53 |     /// * `operator` - The model operator as an [`Operator`]
 54 |     /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 55 |     /// * `buffers` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 56 |     /// * `index` - The operator index
 57 |     ///
 58 |     pub(crate) fn new(
 59 |         operator: Operator,
 60 |         tensors: Vector<ForwardsUOffset<Tensor>>,
 61 |         buffers: Vector<ForwardsUOffset<Buffer>>,
 62 |         index: usize,
 63 |     ) -> Self {
 64 |         let inputs = operator.inputs().unwrap();
 65 |         let input = TokenTensor4D::from_empty_tensor(tensors.get(inputs.get(0) as usize));
 66 |         let weights =
 67 |             TokenTensor4D::from_buffered_tensor(tensors.get(inputs.get(1) as usize), buffers);
 68 |         let biases =
 69 |             TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(2) as usize), buffers);
 70 |         let output = TokenTensor4D::from_empty_tensor(
 71 |             tensors.get(operator.outputs().unwrap().get(0) as usize),
 72 |         );
 73 |         let options = operator
 74 |             .builtin_options_as_depthwise_conv_2_doptions()
 75 |             .unwrap();
 76 |         let constants = Self::preprocess(&input, &weights, &biases, &output);
 77 |         Self {
 78 |             weights,
 79 |             output,
 80 |             fused_activation: options.fused_activation_function().into(),
 81 |             view_padding: options.padding().into(),
 82 |             strides: (options.stride_h() as usize, options.stride_w() as usize),
 83 |             constants,
 84 |             index,
 85 |         }
 86 |     }
 87 | 
 88 |     /// Pre-processes the operator, returning the tuple of constants.
 89 |     ///
 90 |     /// # Arguments
 91 |     /// * `input` - The input of the operator as a [`TokenTensor2D`]
 92 |     /// * `weights` - The weights of the operator as a [`TokenTensor2D`]
 93 |     /// * `biases` - The biases of the operator as a [`TokenTensor2D`]
 94 |     /// * `output` - The output of the operator as a [`TokenTensor2D`]
 95 |     ///
 96 |     fn preprocess(
 97 |         input: &TokenTensor4D<T>,
 98 |         weights: &TokenTensor4D<T>,
 99 |         biases: &TokenTensor2D<i32>,
100 |         output: &TokenTensor4D<T>,
101 |     ) -> (TokenBuffer2D<f32>, TokenBuffer2D<f32>) {
102 |         (
103 |             TokenBuffer2D::from(DMatrix::from_fn(weights.shape[3], 1, |c, _| {
104 |                 biases.scale.get(c).copied().unwrap_or(biases.scale[0]) / output.scale[0]
105 |                     * (biases.buffer[c]
106 |                         - biases
107 |                             .zero_point
108 |                             .get(c)
109 |                             .copied()
110 |                             .unwrap_or(biases.zero_point[0])) as f32
111 |             })),
112 |             TokenBuffer2D::from(DMatrix::from_fn(weights.scale.len(), 1, |c, _| {
113 |                 input.scale[0] * weights.scale[c] / output.scale[0]
114 |             })),
115 |         )
116 |     }
117 | }
118 | 
119 | impl<T: TokenQuantized> ToTokens for TokenDepthwiseConv2D<T> {
120 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
121 |         let weights_ident = format_ident!("weights_{}", self.index);
122 |         let weights_type = self.weights.type_tokens();
123 |         let weights = &self.weights;
124 |         let output_shape = &self.output.shape;
125 |         let output_scale = &self.output.scale;
126 |         let output_zero_point = &self.output.zero_point;
127 |         let fused_activation = self.fused_activation;
128 |         let view_padding = self.view_padding;
129 |         let (strides_0, strides_1) = self.strides;
130 |         let (constants_0, constants_1) = &self.constants;
131 | 
132 |         let ts = quote! {
133 |             const #weights_ident: #weights_type = #weights;
134 |             let input: microflow::tensor::Tensor4D<_, #(#output_shape),*, 1usize> =
135 |                 microflow::ops::depthwise_conv_2d(
136 |                     input,
137 |                     &#weights_ident,
138 |                     [#(#output_scale),*],
139 |                     [#(#output_zero_point),*],
140 |                     microflow::ops::DepthwiseConv2DOptions {
141 |                         fused_activation: #fused_activation,
142 |                         view_padding: #view_padding,
143 |                         strides: (#strides_0, #strides_1),
144 |                     },
145 |                     (#constants_0, #constants_1)
146 |             );
147 |         };
148 |         ts.to_tokens(tokens);
149 |     }
150 | }
151 | 
152 | #[cfg(test)]
153 | mod tests {
154 |     use super::*;
155 |     use crate::buffer::{TokenBuffer2D, TokenBuffer4D};
156 |     use nalgebra::dmatrix;
157 | 
158 |     fn setup() -> TokenDepthwiseConv2D<i8> {
159 |         TokenDepthwiseConv2D {
160 |             weights: TokenTensor4D {
161 |                 buffer: TokenBuffer4D::from(vec![dmatrix![
162 |                     vec![1, 2], vec![3,  4],  vec![5,  6];
163 |                     vec![7, 8], vec![9,  10], vec![11, 12]
164 |                 ]]),
165 |                 shape: vec![1, 2, 3, 2],
166 |                 scale: vec![0.13, 0.14],
167 |                 zero_point: vec![15, 16],
168 |             },
169 |             output: TokenTensor4D {
170 |                 buffer: TokenBuffer4D::new(),
171 |                 shape: vec![1, 2, 3, 2],
172 |                 scale: vec![0.17],
173 |                 zero_point: vec![18],
174 |             },
175 |             fused_activation: TokenFusedActivation::Relu6,
176 |             view_padding: TokenTensorViewPadding::Same,
177 |             strides: (1, 1),
178 |             constants: (
179 |                 TokenBuffer2D::from(dmatrix![19., 20.]),
180 |                 TokenBuffer2D::from(dmatrix![21., 22.]),
181 |             ),
182 |             index: 0,
183 |         }
184 |     }
185 | 
186 |     #[test]
187 |     fn depthwise_conv_2d_preprocess() {
188 |         let layer = setup();
189 |         let input = TokenTensor4D {
190 |             buffer: TokenBuffer4D::new(),
191 |             shape: vec![1, 2, 3, 2],
192 |             scale: vec![0.23],
193 |             zero_point: vec![24],
194 |         };
195 |         let biases = TokenTensor2D {
196 |             buffer: TokenBuffer2D::from(dmatrix![
197 |                 25;
198 |                 26
199 |             ]),
200 |             shape: vec![2, 1],
201 |             scale: vec![0.27, 0.28],
202 |             zero_point: vec![29, 30],
203 |         };
204 |         let constants =
205 |             TokenDepthwiseConv2D::preprocess(&input, &layer.weights, &biases, &layer.output);
206 |         assert_eq!(constants.0 .0, Some(dmatrix![-6.3529415; -6.5882354]));
207 |         assert_eq!(constants.1 .0, Some(dmatrix![0.17588235; 0.18941177]))
208 |     }
209 | 
210 |     #[test]
211 |     fn depthwise_conv_2d_to_tokens() {
212 |         let layer = setup();
213 |         let weights = &layer.weights;
214 |         let fused_activation = layer.fused_activation;
215 |         let view_padding = layer.view_padding;
216 |         let (constants_0, constants_1) = &layer.constants;
217 |         assert_eq!(
218 |             layer.to_token_stream().to_string(),
219 |             quote! {
220 |                 const weights_0: microflow::tensor::Tensor4D<i8, 1usize, 2usize, 3usize, 2usize, 2usize> = #weights;
221 |                 let input: microflow::tensor::Tensor4D<_, 1usize, 2usize, 3usize, 2usize, 1usize> =
222 |                     microflow::ops::depthwise_conv_2d(
223 |                         input,
224 |                         &weights_0,
225 |                         [0.17f32],
226 |                         [18i8],
227 |                         microflow::ops::DepthwiseConv2DOptions {
228 |                             fused_activation: #fused_activation,
229 |                             view_padding: #view_padding,
230 |                             strides: (1usize, 1usize),
231 |                         },
232 |                         (#constants_0, #constants_1)
233 |                 );
234 |             }.to_string()
235 |         );
236 |     }
237 | }
238 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/fully_connected.rs:
--------------------------------------------------------------------------------
  1 | use flatbuffers::{ForwardsUOffset, Vector};
  2 | use nalgebra::{convert_ref, DMatrix};
  3 | use proc_macro2::TokenStream as TokenStream2;
  4 | use quote::{format_ident, quote, ToTokens};
  5 | use simba::scalar::SupersetOf;
  6 | 
  7 | use crate::activation::TokenFusedActivation;
  8 | use crate::buffer::TokenBuffer2D;
  9 | use crate::quantize::TokenQuantized;
 10 | use crate::tensor::TokenTensor2D;
 11 | use crate::tflite_flatbuffers::tflite::{Buffer, Operator, Tensor, TensorType};
 12 | 
 13 | /// Represents the tokenized version of the `FullyConnected` operator.
 14 | pub(crate) struct TokenFullyConnected<T: TokenQuantized> {
 15 |     pub(crate) weights: TokenTensor2D<T>,
 16 |     pub(crate) output: TokenTensor2D<T>,
 17 |     pub(crate) fused_activation: TokenFusedActivation,
 18 |     pub(crate) constants: (TokenBuffer2D<f32>, f32, TokenBuffer2D<i32>, i32),
 19 |     pub(crate) index: usize,
 20 |     pub(crate) reshape: bool,
 21 | }
 22 | 
 23 | /// Parses the [`TokenFullyConnected`] struct from the given operator.
 24 | ///
 25 | /// # Arguments
 26 | /// * `operator` - The model operator as an [`Operator`]
 27 | /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 28 | /// * `buffers` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 29 | /// * `index` - The operator index
 30 | ///
 31 | pub(crate) fn parse(
 32 |     operator: Operator,
 33 |     tensors: Vector<ForwardsUOffset<Tensor>>,
 34 |     buffers: Vector<ForwardsUOffset<Buffer>>,
 35 |     index: usize,
 36 | ) -> Box<dyn ToTokens> {
 37 |     let inputs = operator.inputs().unwrap();
 38 |     let input_type = tensors.get(inputs.get(0) as usize).type_();
 39 |     match input_type {
 40 |         TensorType::INT8 => Box::new(TokenFullyConnected::<i8>::new(
 41 |             operator, tensors, buffers, index,
 42 |         )),
 43 |         TensorType::UINT8 => Box::new(TokenFullyConnected::<u8>::new(
 44 |             operator, tensors, buffers, index,
 45 |         )),
 46 |         _ => unimplemented!(),
 47 |     }
 48 | }
 49 | 
 50 | impl<T: TokenQuantized> TokenFullyConnected<T> {
 51 |     /// Builds the [`TokenFullyConnected`] operator from the given model operator and tensors.
 52 |     ///
 53 |     /// # Arguments
 54 |     /// * `operator` - The model operator as an [`Operator`]
 55 |     /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
 56 |     /// * `buffers` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 57 |     /// * `index` - The operator index
 58 |     ///
 59 |     pub(crate) fn new(
 60 |         operator: Operator,
 61 |         tensors: Vector<ForwardsUOffset<Tensor>>,
 62 |         buffers: Vector<ForwardsUOffset<Buffer>>,
 63 |         index: usize,
 64 |     ) -> Self {
 65 |         let inputs = operator.inputs().unwrap();
 66 |         let input = TokenTensor2D::from_empty_tensor(tensors.get(inputs.get(0) as usize));
 67 |         let weights =
 68 |             TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(1) as usize), buffers);
 69 |         let biases =
 70 |             TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(2) as usize), buffers);
 71 |         let output = TokenTensor2D::from_empty_tensor(
 72 |             tensors.get(operator.outputs().unwrap().get(0) as usize),
 73 |         );
 74 |         let options = operator
 75 |             .builtin_options_as_fully_connected_options()
 76 |             .unwrap();
 77 |         let constants = Self::preprocess(&input, &weights, &biases, &output);
 78 |         Self {
 79 |             weights,
 80 |             output,
 81 |             fused_activation: options.fused_activation_function().into(),
 82 |             reshape: input.shape.len() != 2,
 83 |             constants,
 84 |             index,
 85 |         }
 86 |     }
 87 | 
 88 |     /// Pre-processes the operator, returning the tuple of constants.
 89 |     ///
 90 |     /// # Arguments
 91 |     /// * `input` - The input of the operator as a [`TokenTensor2D`]
 92 |     /// * `weights` - The weights of the operator as a [`TokenTensor2D`]
 93 |     /// * `biases` - The biases of the operator as a [`TokenTensor2D`]
 94 |     /// * `output` - The output of the operator as a [`TokenTensor2D`]
 95 |     ///
 96 |     fn preprocess(
 97 |         input: &TokenTensor2D<T>,
 98 |         weights: &TokenTensor2D<T>,
 99 |         biases: &TokenTensor2D<i32>,
100 |         output: &TokenTensor2D<T>,
101 |     ) -> (TokenBuffer2D<f32>, f32, TokenBuffer2D<i32>, i32) {
102 |         (
103 |             TokenBuffer2D::from(
104 |                 biases.scale[0] / output.scale[0]
105 |                     * biases
106 |                         .buffer
107 |                         .add_scalar(-biases.zero_point[0])
108 |                         .cast::<f32>(),
109 |             ),
110 |             input.scale[0] * weights.scale[0] / output.scale[0],
111 |             TokenBuffer2D::from(DMatrix::from_rows(&[
112 |                 convert_ref::<DMatrix<T>, DMatrix<i32>>(&weights.buffer).row_sum()
113 |                     * i32::from_subset(&input.zero_point[0]),
114 |             ])),
115 |             input.shape[1] as i32
116 |                 * i32::from_subset(&input.zero_point[0])
117 |                 * i32::from_subset(&weights.zero_point[0]),
118 |         )
119 |     }
120 | }
121 | 
122 | impl<T: TokenQuantized> ToTokens for TokenFullyConnected<T> {
123 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
124 |         let reshape = if self.reshape {
125 |             quote!(.into())
126 |         } else {
127 |             quote!()
128 |         };
129 |         let weights_ident = format_ident!("weights_{}", self.index);
130 |         let weights_type = self.weights.type_tokens();
131 |         let weights = &self.weights;
132 |         let output_shape = &self.output.shape;
133 |         let output_scale = self.output.scale[0];
134 |         let output_zero_point = self.output.zero_point[0];
135 |         let fused_activation = self.fused_activation;
136 |         let (constants_0, constants_1, constants_2, constants_3) = &self.constants;
137 | 
138 |         let ts = quote! {
139 |             const #weights_ident: #weights_type = #weights;
140 |             let input: microflow::tensor::Tensor2D<_, #(#output_shape),*, 1usize> =
141 |                 microflow::ops::fully_connected(
142 |                     input #reshape,
143 |                     &#weights_ident,
144 |                     [#output_scale],
145 |                     [#output_zero_point],
146 |                     microflow::ops::FullyConnectedOptions {
147 |                         fused_activation: #fused_activation,
148 |                     },
149 |                     (#constants_0, #constants_1, #constants_2, #constants_3)
150 |             );
151 |         };
152 |         ts.to_tokens(tokens);
153 |     }
154 | }
155 | 
156 | #[cfg(test)]
157 | mod tests {
158 |     use nalgebra::dmatrix;
159 | 
160 |     use super::*;
161 | 
162 |     fn setup() -> TokenFullyConnected<i8> {
163 |         TokenFullyConnected {
164 |             weights: TokenTensor2D {
165 |                 buffer: TokenBuffer2D::from(dmatrix![
166 |                     1, 2, 3;
167 |                     4, 5, 6
168 |                 ]),
169 |                 shape: vec![2, 3],
170 |                 scale: vec![0.7],
171 |                 zero_point: vec![8],
172 |             },
173 |             output: TokenTensor2D {
174 |                 buffer: TokenBuffer2D::new(),
175 |                 shape: vec![1, 3],
176 |                 scale: vec![0.9],
177 |                 zero_point: vec![10],
178 |             },
179 |             fused_activation: TokenFusedActivation::Relu,
180 |             constants: (
181 |                 TokenBuffer2D::from(dmatrix![11., 12.]),
182 |                 13.,
183 |                 TokenBuffer2D::from(dmatrix![14, 15]),
184 |                 16,
185 |             ),
186 |             index: 0,
187 |             reshape: false,
188 |         }
189 |     }
190 | 
191 |     #[test]
192 |     fn fully_connected_preprocess() {
193 |         let layer = setup();
194 |         let input = TokenTensor2D {
195 |             buffer: TokenBuffer2D::new(),
196 |             shape: vec![1, 2],
197 |             scale: vec![0.17],
198 |             zero_point: vec![18],
199 |         };
200 |         let biases = TokenTensor2D {
201 |             buffer: TokenBuffer2D::from(dmatrix![
202 |                 19;
203 |                 20;
204 |                 21
205 |             ]),
206 |             shape: vec![3, 1],
207 |             scale: vec![0.22],
208 |             zero_point: vec![23],
209 |         };
210 |         let constants =
211 |             TokenFullyConnected::preprocess(&input, &layer.weights, &biases, &layer.output);
212 |         assert_eq!(
213 |             constants.0 .0,
214 |             Some(dmatrix![-0.9777778; -0.73333335; -0.4888889])
215 |         );
216 |         assert_eq!(constants.1, 0.13222224);
217 |         assert_eq!(constants.2 .0, Some(dmatrix![90, 126, 162]));
218 |         assert_eq!(constants.3, 288);
219 |     }
220 | 
221 |     #[test]
222 |     fn fully_connected_to_tokens() {
223 |         let layer = setup();
224 |         let weights = &layer.weights;
225 |         let fused_activation = layer.fused_activation;
226 |         let constants_0 = &layer.constants.0;
227 |         let constants_2 = &layer.constants.2;
228 |         assert_eq!(
229 |             layer.to_token_stream().to_string(),
230 |             quote! {
231 |                 const weights_0: microflow::tensor::Tensor2D<i8, 2usize, 3usize, 1usize> = #weights;
232 |                 let input: microflow::tensor::Tensor2D<_, 1usize, 3usize, 1usize> =
233 |                     microflow::ops::fully_connected(
234 |                         input,
235 |                         &weights_0,
236 |                         [0.9f32],
237 |                         [10i8],
238 |                         microflow::ops::FullyConnectedOptions {
239 |                             fused_activation: #fused_activation,
240 |                         },
241 |                         (#constants_0, 13f32, #constants_2, 16i32)
242 |                 );
243 |             }
244 |             .to_string()
245 |         );
246 |     }
247 | }
248 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/mod.rs:
--------------------------------------------------------------------------------
1 | pub(crate) mod average_pool_2d;
2 | pub(crate) mod conv_2d;
3 | pub(crate) mod depthwise_conv_2d;
4 | pub(crate) mod fully_connected;
5 | pub(crate) mod reshape;
6 | pub(crate) mod softmax;
7 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/reshape.rs:
--------------------------------------------------------------------------------
 1 | use crate::tflite_flatbuffers::tflite::{Operator, Tensor};
 2 | use flatbuffers::{ForwardsUOffset, Vector};
 3 | use proc_macro2::TokenStream as TokenStream2;
 4 | use quote::{quote, ToTokens};
 5 | 
 6 | /// Represents the tokenized version of the `Reshape` operator.
 7 | pub(crate) struct TokenReshape {
 8 |     pub(crate) output_shape: Vec<usize>,
 9 | }
10 | 
11 | /// Parses the [`TokenReshape`] struct from the given operator.
12 | ///
13 | /// # Arguments
14 | /// * `operator` - The model operator as an [`Operator`]
15 | /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
16 | ///
17 | pub(crate) fn parse(
18 |     operator: Operator,
19 |     tensors: Vector<ForwardsUOffset<Tensor>>,
20 | ) -> Box<dyn ToTokens> {
21 |     Box::new(TokenReshape::new(operator, tensors))
22 | }
23 | 
24 | impl TokenReshape {
25 |     /// Builds the [`TokenReshape`] operator from the given model operator and tensors.
26 |     ///
27 |     /// # Arguments
28 |     /// * `operator` - The model operator as an [`Operator`]
29 |     /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
30 |     ///
31 |     pub(crate) fn new(operator: Operator, tensors: Vector<ForwardsUOffset<Tensor>>) -> Self {
32 |         let output_shape: Vec<_> = tensors
33 |             .get(operator.outputs().unwrap().get(0) as usize)
34 |             .shape()
35 |             .unwrap()
36 |             .iter()
37 |             .map(|e| e as usize)
38 |             .collect();
39 |         Self { output_shape }
40 |     }
41 | }
42 | 
43 | impl ToTokens for TokenReshape {
44 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
45 |         let output_shape = &self.output_shape;
46 |         let output_tensor = match output_shape.len() {
47 |             2 => quote!(Tensor2D),
48 |             4 => quote!(Tensor4D),
49 |             _ => unimplemented!(),
50 |         };
51 | 
52 |         let ts = quote! {
53 |             let input: microflow::tensor::#output_tensor<_, #(#output_shape),*, 1usize> =
54 |                 microflow::ops::reshape(input);
55 |         };
56 |         ts.to_tokens(tokens)
57 |     }
58 | }
59 | 
60 | #[cfg(test)]
61 | mod tests {
62 |     use super::*;
63 | 
64 |     fn setup() -> TokenReshape {
65 |         TokenReshape {
66 |             output_shape: vec![2, 3],
67 |         }
68 |     }
69 | 
70 |     #[test]
71 |     fn reshape_to_tokens() {
72 |         let layer = setup();
73 |         assert_eq!(
74 |             layer.to_token_stream().to_string(),
75 |             quote! {
76 |                 let input: microflow::tensor::Tensor2D<_, 2usize, 3usize, 1usize> =
77 |                     microflow::ops::reshape(input);
78 |             }
79 |             .to_string()
80 |         )
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/microflow-macros/src/ops/softmax.rs:
--------------------------------------------------------------------------------
 1 | use crate::quantize::TokenQuantized;
 2 | use crate::tensor::TokenTensor2D;
 3 | use crate::tflite_flatbuffers::tflite::{Operator, Tensor, TensorType};
 4 | use flatbuffers::{ForwardsUOffset, Vector};
 5 | use proc_macro2::TokenStream as TokenStream2;
 6 | use quote::{quote, ToTokens};
 7 | 
 8 | /// Represents the tokenized version of the `Softmax` operator.
 9 | pub(crate) struct TokenSoftmax<T: TokenQuantized> {
10 |     pub(crate) output: TokenTensor2D<T>,
11 | }
12 | 
13 | /// Parses the [`TokenSoftmax`] struct from the given operator.
14 | ///
15 | /// # Arguments
16 | /// * `operator` - The model operator as an [`Operator`]
17 | /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
18 | ///
19 | pub(crate) fn parse(
20 |     operator: Operator,
21 |     tensors: Vector<ForwardsUOffset<Tensor>>,
22 | ) -> Box<dyn ToTokens> {
23 |     let inputs = operator.inputs().unwrap();
24 |     let input_type = tensors.get(inputs.get(0) as usize).type_();
25 |     match input_type {
26 |         TensorType::INT8 => Box::new(TokenSoftmax::<i8>::new(operator, tensors)),
27 |         TensorType::UINT8 => Box::new(TokenSoftmax::<u8>::new(operator, tensors)),
28 |         _ => unimplemented!(),
29 |     }
30 | }
31 | 
32 | impl<T: TokenQuantized> TokenSoftmax<T> {
33 |     /// Builds the [`TokenSoftmax`] operator from the given model operator and tensors.
34 |     ///
35 |     /// # Arguments
36 |     /// * `operator` - The model operator as an [`Operator`]
37 |     /// * `tensors` - The model tensors as a [`Vector<ForwardsUOffset<Tensor>>`]
38 |     ///
39 |     pub(crate) fn new(operator: Operator, tensors: Vector<ForwardsUOffset<Tensor>>) -> Self {
40 |         let output = TokenTensor2D::from_empty_tensor(
41 |             tensors.get(operator.outputs().unwrap().get(0) as usize),
42 |         );
43 |         Self { output }
44 |     }
45 | }
46 | 
47 | impl<T: TokenQuantized> ToTokens for TokenSoftmax<T> {
48 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
49 |         let output_shape = &self.output.shape;
50 |         let output_scale = &self.output.scale;
51 |         let output_zero_point = &self.output.zero_point;
52 | 
53 |         let ts = quote! {
54 |             let input: microflow::tensor::Tensor2D<_, #(#output_shape),*, 1usize> =
55 |                 microflow::ops::softmax(input, [#(#output_scale),*], [#(#output_zero_point),*]);
56 |         };
57 |         ts.to_tokens(tokens);
58 |     }
59 | }
60 | 
61 | #[cfg(test)]
62 | mod tests {
63 |     use super::*;
64 |     use crate::buffer::TokenBuffer2D;
65 | 
66 |     fn setup() -> TokenSoftmax<i8> {
67 |         TokenSoftmax {
68 |             output: TokenTensor2D {
69 |                 buffer: TokenBuffer2D::new(),
70 |                 shape: vec![2, 3],
71 |                 scale: vec![0.3],
72 |                 zero_point: vec![4],
73 |             },
74 |         }
75 |     }
76 | 
77 |     #[test]
78 |     fn softmax_to_tokens() {
79 |         let layer = setup();
80 |         assert_eq!(
81 |             layer.to_token_stream().to_string(),
82 |             quote! {
83 |                 let input: microflow::tensor::Tensor2D<_, 2usize, 3usize, 1usize> =
84 |                     microflow::ops::softmax(input, [0.3f32], [4i8]);
85 |             }
86 |             .to_string()
87 |         )
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/microflow-macros/src/quantize.rs:
--------------------------------------------------------------------------------
 1 | use byterepr::ByteReprNum;
 2 | use nalgebra::Scalar;
 3 | use quote::ToTokens;
 4 | use simba::scalar::SubsetOf;
 5 | 
 6 | /// Represents the trait to constrain a type to be quantized and tokenized.
 7 | pub(crate) trait TokenQuantized:
 8 |     Scalar + ByteReprNum + ToTokens + SubsetOf<i32> + SubsetOf<f32> + SubsetOf<i64>
 9 | {
10 | }
11 | 
12 | impl<T: Scalar + ByteReprNum + ToTokens + SubsetOf<i32> + SubsetOf<f32> + SubsetOf<i64>>
13 |     TokenQuantized for T
14 | {
15 | }
16 | 


--------------------------------------------------------------------------------
/microflow-macros/src/tensor.rs:
--------------------------------------------------------------------------------
  1 | use std::any::type_name;
  2 | use std::mem::size_of;
  3 | 
  4 | use flatbuffers::{ForwardsUOffset, Vector};
  5 | use nalgebra::DMatrix;
  6 | use proc_macro2::TokenStream as TokenStream2;
  7 | use quote::{quote, ToTokens};
  8 | use simba::scalar::SupersetOf;
  9 | use syn::{parse_str, Type};
 10 | 
 11 | use crate::buffer::{TokenBuffer2D, TokenBuffer4D};
 12 | use crate::quantize::TokenQuantized;
 13 | use crate::tflite_flatbuffers::tflite::{Buffer, Padding, Tensor};
 14 | 
 15 | /// Represents the tokenized version of the `TensorViewPadding`.
 16 | #[derive(Copy, Clone)]
 17 | pub(crate) enum TokenTensorViewPadding {
 18 |     Same,
 19 |     Valid,
 20 | }
 21 | 
 22 | /// Represents the tokenized version of the `Tensor2D`.
 23 | #[derive(Debug)]
 24 | pub(crate) struct TokenTensor2D<T: TokenQuantized> {
 25 |     pub(crate) buffer: TokenBuffer2D<T>,
 26 |     pub(crate) shape: Vec<usize>,
 27 |     pub(crate) scale: Vec<f32>,
 28 |     pub(crate) zero_point: Vec<T>,
 29 | }
 30 | 
 31 | /// Represents the tokenized version of the `Tensor4D`.
 32 | #[derive(Debug)]
 33 | pub(crate) struct TokenTensor4D<T: TokenQuantized> {
 34 |     pub(crate) buffer: TokenBuffer4D<T>,
 35 |     pub(crate) shape: Vec<usize>,
 36 |     pub(crate) scale: Vec<f32>,
 37 |     pub(crate) zero_point: Vec<T>,
 38 | }
 39 | 
 40 | impl ToTokens for TokenTensorViewPadding {
 41 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
 42 |         match self {
 43 |             Self::Same => quote!(microflow::tensor::TensorViewPadding::Same),
 44 |             Self::Valid => quote!(microflow::tensor::TensorViewPadding::Valid),
 45 |         }
 46 |         .to_tokens(tokens);
 47 |     }
 48 | }
 49 | 
 50 | impl From<Padding> for TokenTensorViewPadding {
 51 |     fn from(padding: Padding) -> Self {
 52 |         match padding {
 53 |             Padding::SAME => Self::Same,
 54 |             Padding::VALID => Self::Valid,
 55 |             _ => unreachable!(),
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | impl<T: TokenQuantized> TokenTensor2D<T> {
 61 |     /// Builds a [`TokenTensor2D`] from an empty [`Tensor`].
 62 |     ///
 63 |     /// # Arguments
 64 |     /// * `tensor` - The empty model tensor as a [`Tensor`]
 65 |     ///
 66 |     pub fn from_empty_tensor(tensor: Tensor) -> Self {
 67 |         let mut shape: Vec<_> = tensor.shape().unwrap().iter().map(|e| e as usize).collect();
 68 |         if shape.len() == 1 {
 69 |             shape.insert(0, 1);
 70 |         }
 71 |         Self {
 72 |             buffer: TokenBuffer2D::new(),
 73 |             shape,
 74 |             scale: tensor
 75 |                 .quantization()
 76 |                 .unwrap()
 77 |                 .scale()
 78 |                 .unwrap()
 79 |                 .iter()
 80 |                 .collect(),
 81 |             zero_point: tensor
 82 |                 .quantization()
 83 |                 .unwrap()
 84 |                 .zero_point()
 85 |                 .unwrap()
 86 |                 .iter()
 87 |                 .map(|e| i64::to_subset_unchecked(&e))
 88 |                 .collect(),
 89 |         }
 90 |     }
 91 | 
 92 |     /// Builds a [`TokenTensor2D`] from a [`Tensor`] with a buffer.
 93 |     ///
 94 |     /// # Arguments
 95 |     /// * `tensor` - The model tensor as a [`Tensor`]
 96 |     /// * `buffer` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
 97 |     ///
 98 |     pub fn from_buffered_tensor(tensor: Tensor, buffers: Vector<ForwardsUOffset<Buffer>>) -> Self {
 99 |         let mut token_tensor = Self::from_empty_tensor(tensor);
100 |         let matrix = DMatrix::from_iterator(
101 |             token_tensor.shape[1],
102 |             token_tensor.shape[0],
103 |             buffers
104 |                 .get(tensor.buffer() as usize)
105 |                 .data()
106 |                 .unwrap()
107 |                 .bytes()
108 |                 .chunks_exact(size_of::<T>())
109 |                 .map(|e| T::from_le_bytes(e)),
110 |         );
111 |         token_tensor.buffer = TokenBuffer2D::from(matrix);
112 |         token_tensor.shape.swap(0, 1);
113 |         token_tensor
114 |     }
115 | 
116 |     /// Returns the tokens of the [`Self`] type.
117 |     pub fn type_tokens(&self) -> TokenStream2 {
118 |         let ty = parse_str::<Type>(type_name::<T>()).unwrap();
119 |         let shape = &self.shape;
120 |         let quants = self.scale.len();
121 |         quote!(microflow::tensor::Tensor2D<#ty, #(#shape),*, #quants>)
122 |     }
123 | }
124 | 
125 | impl<T: TokenQuantized> ToTokens for TokenTensor2D<T> {
126 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
127 |         let buffer = &self.buffer;
128 |         let scale = &self.scale;
129 |         let zero_point = &self.zero_point;
130 | 
131 |         let ts = quote! {
132 |             microflow::tensor::Tensor2D::new(
133 |                 #buffer,
134 |                 [#(#scale),*],
135 |                 [#(#zero_point),*]
136 |             )
137 |         };
138 |         ts.to_tokens(tokens);
139 |     }
140 | }
141 | 
142 | impl<T: TokenQuantized> TokenTensor4D<T> {
143 |     /// Builds a [`TokenTensor4D`] from an empty [`Tensor`].
144 |     ///
145 |     /// # Arguments
146 |     /// * `tensor` - The empty model tensor as a [`Tensor`]
147 |     ///
148 |     pub fn from_empty_tensor(tensor: Tensor) -> Self {
149 |         Self {
150 |             buffer: TokenBuffer4D::new(),
151 |             shape: tensor.shape().unwrap().iter().map(|e| e as usize).collect(),
152 |             scale: tensor
153 |                 .quantization()
154 |                 .unwrap()
155 |                 .scale()
156 |                 .unwrap()
157 |                 .iter()
158 |                 .collect(),
159 |             zero_point: tensor
160 |                 .quantization()
161 |                 .unwrap()
162 |                 .zero_point()
163 |                 .unwrap()
164 |                 .iter()
165 |                 .map(|e| i64::to_subset_unchecked(&e))
166 |                 .collect(),
167 |         }
168 |     }
169 | 
170 |     /// Builds a [`TokenTensor4D`] from a [`Tensor`] with a buffer.
171 |     ///
172 |     /// # Arguments
173 |     /// * `tensor` - The model tensor as a [`Tensor`]
174 |     /// * `buffer` - The model buffers as a [`Vector<ForwardsUOffset<Buffer>>`]
175 |     ///
176 |     pub fn from_buffered_tensor(tensor: Tensor, buffers: Vector<ForwardsUOffset<Buffer>>) -> Self {
177 |         let mut t = Self::from_empty_tensor(tensor);
178 |         let len = t.shape.iter().product::<usize>() * size_of::<T>();
179 |         let data = buffers
180 |             .get(tensor.buffer() as usize)
181 |             .data()
182 |             .unwrap()
183 |             .bytes()
184 |             .chunks_exact(len / t.shape[0])
185 |             .map(|m| {
186 |                 DMatrix::from_row_iterator(
187 |                     t.shape[1],
188 |                     t.shape[2],
189 |                     m.chunks_exact(len / (t.shape[0] * t.shape[1] * t.shape[2]))
190 |                         .map(|v| {
191 |                             v.chunks_exact(size_of::<T>())
192 |                                 .map(|e| T::from_le_bytes(e))
193 |                                 .collect::<Vec<_>>()
194 |                         }),
195 |                 )
196 |             })
197 |             .collect::<Vec<_>>();
198 |         t.buffer = TokenBuffer4D::from(data);
199 |         t
200 |     }
201 | 
202 |     /// Returns the tokens of the [`Self`] type.
203 |     pub fn type_tokens(&self) -> TokenStream2 {
204 |         let ty = parse_str::<Type>(type_name::<T>()).unwrap();
205 |         let shape = &self.shape;
206 |         let quants = self.scale.len();
207 |         quote!(microflow::tensor::Tensor4D<#ty, #(#shape),*, #quants>)
208 |     }
209 | }
210 | 
211 | impl<T: TokenQuantized> ToTokens for TokenTensor4D<T> {
212 |     fn to_tokens(&self, tokens: &mut TokenStream2) {
213 |         let buffer = &self.buffer;
214 |         let scale = &self.scale;
215 |         let zero_point = &self.zero_point;
216 | 
217 |         let ts = quote! {
218 |             microflow::tensor::Tensor4D::new(
219 |                 #buffer,
220 |                 [#(#scale),*],
221 |                 [#(#zero_point),*]
222 |             )
223 |         };
224 |         ts.to_tokens(tokens);
225 |     }
226 | }
227 | 
228 | #[cfg(test)]
229 | mod tests {
230 |     use nalgebra::dmatrix;
231 | 
232 |     use super::*;
233 | 
234 |     fn setup_2d() -> TokenTensor2D<i8> {
235 |         TokenTensor2D {
236 |             buffer: TokenBuffer2D::from(dmatrix![
237 |                 1, 2, 3;
238 |                 4, 5, 6
239 |             ]),
240 |             shape: vec![2, 3],
241 |             scale: vec![0.7],
242 |             zero_point: vec![8],
243 |         }
244 |     }
245 | 
246 |     fn setup_4d() -> TokenTensor4D<i8> {
247 |         TokenTensor4D {
248 |             buffer: TokenBuffer4D::from(vec![
249 |                 dmatrix![
250 |                     vec![9,  10], vec![11, 12], vec![13, 14];
251 |                     vec![15, 16], vec![17, 18], vec![19, 20]
252 |                 ],
253 |                 dmatrix![
254 |                     vec![21, 22], vec![23, 24], vec![25, 26];
255 |                     vec![27, 28], vec![29, 30], vec![31, 32]
256 |                 ],
257 |             ]),
258 |             shape: vec![2, 2, 3, 2],
259 |             scale: vec![0.33, 0.34],
260 |             zero_point: vec![35, 36],
261 |         }
262 |     }
263 | 
264 |     #[test]
265 |     fn view_padding_to_tokens() {
266 |         let padding = TokenTensorViewPadding::from(Padding::VALID);
267 |         assert_eq!(
268 |             padding.to_token_stream().to_string(),
269 |             quote!(microflow::tensor::TensorViewPadding::Valid).to_string()
270 |         );
271 |     }
272 | 
273 |     #[test]
274 |     fn tensor_2d_type_tokens() {
275 |         let tensor = setup_2d();
276 |         assert_eq!(
277 |             tensor.type_tokens().to_string(),
278 |             quote!(microflow::tensor::Tensor2D<i8, 2usize, 3usize, 1usize>).to_string(),
279 |         )
280 |     }
281 | 
282 |     #[test]
283 |     fn tensor_2d_to_tokens() {
284 |         let tensor = setup_2d();
285 |         let buffer = &tensor.buffer;
286 |         assert_eq!(
287 |             tensor.to_token_stream().to_string(),
288 |             quote! {
289 |                 microflow::tensor::Tensor2D::new(
290 |                     #buffer,
291 |                     [0.7f32],
292 |                     [8i8]
293 |                 )
294 |             }
295 |             .to_string()
296 |         );
297 |     }
298 | 
299 |     #[test]
300 |     fn tensor_4d_type_tokens() {
301 |         let tensor = setup_4d();
302 |         assert_eq!(
303 |             tensor.type_tokens().to_string(),
304 |             quote!(microflow::tensor::Tensor4D<i8, 2usize, 2usize, 3usize, 2usize, 2usize>)
305 |                 .to_string(),
306 |         )
307 |     }
308 | 
309 |     #[test]
310 |     fn tensor_4d_to_tokens() {
311 |         let tensor = setup_4d();
312 |         let buffer = &tensor.buffer;
313 |         assert_eq!(
314 |             tensor.to_token_stream().to_string(),
315 |             quote! {
316 |                 microflow::tensor::Tensor4D::new(
317 |                     #buffer,
318 |                     [0.33f32, 0.34f32],
319 |                     [35i8, 36i8]
320 |                 )
321 |             }
322 |             .to_string()
323 |         );
324 |     }
325 | }
326 | 


--------------------------------------------------------------------------------
/models/person_detect.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/models/person_detect.tflite


--------------------------------------------------------------------------------
/models/sine.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/models/sine.tflite


--------------------------------------------------------------------------------
/models/speech.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/models/speech.tflite


--------------------------------------------------------------------------------
/samples/no.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/no.wav


--------------------------------------------------------------------------------
/samples/no_person.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/no_person.bmp


--------------------------------------------------------------------------------
/samples/person.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/person.bmp


--------------------------------------------------------------------------------
/samples/yes.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/yes.wav


--------------------------------------------------------------------------------
/src/activation.rs:
--------------------------------------------------------------------------------
  1 | use crate::quantize::{quantize, Quantized};
  2 | use core::cmp::{max, min};
  3 | use libm::expf;
  4 | 
  5 | /// Represents the supported activation functions.
  6 | pub enum FusedActivation {
  7 |     /// The identity activation function.
  8 |     None,
  9 |     /// The Rectified Linear Unit (ReLU) function.
 10 |     Relu,
 11 |     /// The Rectified Linear Unit 6 (ReLU6) function.
 12 |     Relu6,
 13 | }
 14 | 
 15 | /// Performs the Rectified Linear Unit (ReLU) activation function.
 16 | ///
 17 | /// # Arguments
 18 | /// * `input` - The input value of type `T`
 19 | /// * `zero_point` - The quantization zero point
 20 | ///
 21 | pub fn relu<T: Quantized>(input: T, zero_point: T) -> T {
 22 |     max(input, zero_point)
 23 | }
 24 | 
 25 | /// Performs the Rectified Linear Unit 6 (ReLU6) activation function.
 26 | ///
 27 | /// # Arguments
 28 | /// * `input` - The input value of type `T`
 29 | /// * `scale` - The quantization scale
 30 | /// * `zero_point` - The quantization zero point
 31 | ///
 32 | pub fn relu6<T: Quantized>(input: T, scale: f32, zero_point: T) -> T {
 33 |     min(relu(input, zero_point), quantize(6., scale, zero_point))
 34 | }
 35 | 
 36 | /// Performs the Softmax activation function.
 37 | ///
 38 | /// # Arguments
 39 | /// * `input` - The floating-point input value
 40 | /// * `sum` - The sum of the outcomes
 41 | /// * `scale` - The quantization scale
 42 | /// * `zero_point` - The quantization zero point
 43 | ///
 44 | pub fn softmax<T: Quantized>(input: f32, sum: f32, scale: f32, zero_point: T) -> T {
 45 |     quantize(expf(input) / sum, scale, zero_point)
 46 | }
 47 | 
 48 | #[cfg(test)]
 49 | mod tests {
 50 |     use super::*;
 51 | 
 52 |     const SCALE: f32 = 0.1;
 53 |     const ZERO_POINT: i8 = 2;
 54 | 
 55 |     const RELU_INACTIVE_INPUT: i8 = 1;
 56 |     const RELU_ACTIVE_INPUT: i8 = 3;
 57 | 
 58 |     const RELU6_SATURATED_INPUT: i8 = 63;
 59 |     const RELU6_SATURATION_POINT: i8 = 62;
 60 | 
 61 |     const SOFTMAX_INPUT_1: f32 = 1.;
 62 |     const SOFTMAX_INPUT_2: f32 = 2.;
 63 |     const SOFTMAX_INPUT_3: f32 = 3.;
 64 |     const SOFTMAX_SUM: f32 = 30.192_875;
 65 |     const SOFTMAX_OUTPUT_1: i8 = 3;
 66 |     const SOFTMAX_TOTAL_PROBABILITY: i8 = 16;
 67 | 
 68 |     #[test]
 69 |     fn relu_inactive() {
 70 |         assert_eq!(relu(RELU_INACTIVE_INPUT, ZERO_POINT), ZERO_POINT);
 71 |     }
 72 | 
 73 |     #[test]
 74 |     fn relu_active() {
 75 |         assert_eq!(relu(RELU_ACTIVE_INPUT, ZERO_POINT), RELU_ACTIVE_INPUT);
 76 |     }
 77 | 
 78 |     #[test]
 79 |     fn relu6_saturated() {
 80 |         assert_eq!(
 81 |             relu6(RELU6_SATURATED_INPUT, SCALE, ZERO_POINT),
 82 |             RELU6_SATURATION_POINT
 83 |         );
 84 |     }
 85 | 
 86 |     #[test]
 87 |     fn softmax_active() {
 88 |         assert_eq!(
 89 |             softmax(SOFTMAX_INPUT_1, SOFTMAX_SUM, SCALE, ZERO_POINT),
 90 |             SOFTMAX_OUTPUT_1
 91 |         );
 92 |     }
 93 | 
 94 |     #[test]
 95 |     fn softmax_total() {
 96 |         let total = softmax(SOFTMAX_INPUT_1, SOFTMAX_SUM, SCALE, ZERO_POINT)
 97 |             + softmax(SOFTMAX_INPUT_2, SOFTMAX_SUM, SCALE, ZERO_POINT)
 98 |             + softmax(SOFTMAX_INPUT_3, SOFTMAX_SUM, SCALE, ZERO_POINT);
 99 |         assert_eq!(total, SOFTMAX_TOTAL_PROBABILITY);
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/buffer.rs:
--------------------------------------------------------------------------------
 1 | use nalgebra::SMatrix;
 2 | 
 3 | /// Represents a 2-dimensional buffer.
 4 | /// A 2-dimensional buffer is composed by a [`SMatrix`] of values `T`.
 5 | pub type Buffer2D<T, const ROWS: usize, const COLUMNS: usize> = SMatrix<T, ROWS, COLUMNS>;
 6 | 
 7 | /// Represents a 4-dimensional buffer.
 8 | /// A 4-dimensional buffer is composed by an array of [`Buffer2D`] containing an array of values
 9 | /// `T`.
10 | pub type Buffer4D<
11 |     T,
12 |     const BATCHES: usize,
13 |     const ROWS: usize,
14 |     const COLUMNS: usize,
15 |     const CHANNELS: usize,
16 | > = [Buffer2D<[T; CHANNELS], ROWS, COLUMNS>; BATCHES];
17 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! [![crates.io](https://img.shields.io/crates/v/microflow)](https://crates.io/crates/microflow)
 2 | //! [![docs.rs](https://img.shields.io/docsrs/microflow)](https://docs.rs/microflow)
 3 | //! [![github](https://img.shields.io/github/actions/workflow/status/matteocarnelos/microflow-rs/cargo.yml?branch=main)](https://github.com/matteocarnelos/microflow-rs/actions/workflows/cargo.yml)
 4 | //!
 5 | //! A robust and efficient TinyML inference engine for embedded systems.
 6 | 
 7 | #![no_std]
 8 | 
 9 | pub use microflow_macros::*;
10 | 
11 | pub mod activation;
12 | pub mod buffer;
13 | pub mod ops;
14 | pub mod quantize;
15 | pub mod tensor;
16 | 


--------------------------------------------------------------------------------
/src/ops/average_pool_2d.rs:
--------------------------------------------------------------------------------
  1 | use core::array;
  2 | use libm::roundf;
  3 | 
  4 | use nalgebra::Const;
  5 | use simba::scalar::SupersetOf;
  6 | 
  7 | use crate::activation::{relu, relu6, FusedActivation};
  8 | use crate::buffer::Buffer2D;
  9 | use crate::quantize::Quantized;
 10 | use crate::tensor::{Tensor4D, TensorView, TensorViewPadding};
 11 | 
 12 | pub struct AveragePool2DOptions {
 13 |     pub fused_activation: FusedActivation,
 14 |     pub view_padding: TensorViewPadding,
 15 |     pub strides: (usize, usize),
 16 | }
 17 | 
 18 | /// Performs the AveragePool2D operation.
 19 | /// Returns a 4-dimensional output tensor containing the result of the operation.
 20 | ///
 21 | /// # Arguments
 22 | /// * `input` - The 4-dimensional input tensor
 23 | /// * `_filter_shape` - The phantom shape of the filter
 24 | /// * `output_scale` - The scale of the resulting output tensor
 25 | /// * `output_zero_point` - The zero point of the resulting output tensor
 26 | /// * `options` - Operator's options as an [`AveragePool2DOptions`] struct
 27 | /// * `constants` - Constant values coming from the pre-processing phase
 28 | ///
 29 | pub fn average_pool_2d<
 30 |     T: Quantized,
 31 |     const INPUT_ROWS: usize,
 32 |     const INPUT_COLS: usize,
 33 |     const INPUT_CHANS: usize,
 34 |     const FILTER_ROWS: usize,
 35 |     const FILTER_COLS: usize,
 36 |     const OUTPUT_ROWS: usize,
 37 |     const OUTPUT_COLS: usize,
 38 | >(
 39 |     input: Tensor4D<T, 1, INPUT_ROWS, INPUT_COLS, INPUT_CHANS, 1>,
 40 |     _filter_shape: (Const<FILTER_ROWS>, Const<FILTER_COLS>),
 41 |     output_scale: [f32; 1],
 42 |     output_zero_point: [T; 1],
 43 |     options: AveragePool2DOptions,
 44 |     constants: (f32, f32),
 45 | ) -> Tensor4D<T, 1, OUTPUT_ROWS, OUTPUT_COLS, INPUT_CHANS, 1> {
 46 |     let output = [Buffer2D::from_fn(|i, j| {
 47 |         // Extract the view using the view extraction algorithm
 48 |         let view: TensorView<T, FILTER_ROWS, FILTER_COLS, INPUT_CHANS> =
 49 |             input.view((i, j), 0, options.view_padding, options.strides);
 50 |         // Compute the average pooling for each channel
 51 |         array::from_fn(|c| {
 52 |             let x = 1. / view.len as f32
 53 |                 * view
 54 |                     .buffer
 55 |                     .fold(0i32, |acc, a| acc + i32::from_subset(&a[c])) as f32;
 56 |             let y = T::from_superset_unchecked(&roundf(constants.0 * x + constants.1));
 57 |             // Apply the fused activation function (if any)
 58 |             match options.fused_activation {
 59 |                 FusedActivation::None => y,
 60 |                 FusedActivation::Relu => relu(y, output_zero_point[0]),
 61 |                 FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]),
 62 |             }
 63 |         })
 64 |     })];
 65 |     Tensor4D::new(output, output_scale, output_zero_point)
 66 | }
 67 | 
 68 | #[cfg(test)]
 69 | mod tests {
 70 |     use nalgebra::matrix;
 71 | 
 72 |     use super::*;
 73 | 
 74 |     const INPUT: Tensor4D<i8, 1, 2, 3, 2, 1> = Tensor4D {
 75 |         buffer: [matrix![
 76 |             [1, 2], [3, 4],  [5,  6];
 77 |             [7, 8], [9, 10], [11, 12]
 78 |         ]],
 79 |         scale: [0.13],
 80 |         zero_point: [14],
 81 |     };
 82 |     const FILTER_SHAPE: (Const<2>, Const<3>) = (Const, Const);
 83 |     const OUTPUT_SCALE: [f32; 1] = [0.15];
 84 |     const OUTPUT_ZERO_POINT: [i8; 1] = [16];
 85 |     const OPTIONS: AveragePool2DOptions = AveragePool2DOptions {
 86 |         fused_activation: FusedActivation::None,
 87 |         view_padding: TensorViewPadding::Same,
 88 |         strides: (1, 1),
 89 |     };
 90 |     const CONSTANTS: (f32, f32) = (0.866_666_7, 3.866_666_6);
 91 |     const OUTPUT: Tensor4D<i8, 1, 2, 3, 2, 1> = Tensor4D {
 92 |         buffer: [matrix![
 93 |             [8,  9],  [9,  10], [10, 11];
 94 |             [11, 12], [12, 13], [13, 13]
 95 |         ]],
 96 |         scale: [0.15],
 97 |         zero_point: [16],
 98 |     };
 99 | 
100 |     #[test]
101 |     fn average_pool_2d_layer() {
102 |         assert_eq!(
103 |             average_pool_2d(
104 |                 INPUT,
105 |                 FILTER_SHAPE,
106 |                 OUTPUT_SCALE,
107 |                 OUTPUT_ZERO_POINT,
108 |                 OPTIONS,
109 |                 CONSTANTS,
110 |             ),
111 |             OUTPUT
112 |         );
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/ops/conv_2d.rs:
--------------------------------------------------------------------------------
  1 | use core::array;
  2 | use libm::roundf;
  3 | 
  4 | use simba::scalar::SupersetOf;
  5 | 
  6 | use crate::activation::{relu, relu6, FusedActivation};
  7 | use crate::buffer::Buffer2D;
  8 | use crate::quantize::Quantized;
  9 | use crate::tensor::{Tensor4D, TensorView, TensorViewPadding};
 10 | 
 11 | pub struct Conv2DOptions {
 12 |     pub fused_activation: FusedActivation,
 13 |     pub view_padding: TensorViewPadding,
 14 |     pub strides: (usize, usize),
 15 | }
 16 | 
 17 | /// Performs the Conv2D operation.
 18 | /// Returns a 4-dimensional output tensor containing the result of the operation.
 19 | ///
 20 | /// # Arguments
 21 | /// * `input` - The 4-dimensional input tensor
 22 | /// * `filters` - The 4-dimensional tensor representing the filters of the operator
 23 | /// * `output_scale` - The scale of the resulting output tensor
 24 | /// * `output_zero_point` - The zero point of the resulting output tensor
 25 | /// * `options` - Operator's options as an [`Conv2DOptions`] struct
 26 | /// * `constants` - Constant values coming from the pre-processing phase
 27 | ///
 28 | pub fn conv_2d<
 29 |     T: Quantized,
 30 |     const INPUT_ROWS: usize,
 31 |     const INPUT_COLS: usize,
 32 |     const INPUT_CHANS: usize,
 33 |     const FILTERS_BATCHES: usize,
 34 |     const FILTERS_ROWS: usize,
 35 |     const FILTERS_COLS: usize,
 36 |     const FILTERS_QUANTS: usize,
 37 |     const OUTPUT_ROWS: usize,
 38 |     const OUTPUT_COLS: usize,
 39 | >(
 40 |     input: Tensor4D<T, 1, INPUT_ROWS, INPUT_COLS, INPUT_CHANS, 1>,
 41 |     filters: &Tensor4D<T, FILTERS_BATCHES, FILTERS_ROWS, FILTERS_COLS, INPUT_CHANS, FILTERS_QUANTS>,
 42 |     output_scale: [f32; 1],
 43 |     output_zero_point: [T; 1],
 44 |     options: Conv2DOptions,
 45 |     constants: (
 46 |         Buffer2D<f32, FILTERS_BATCHES, 1>,
 47 |         Buffer2D<f32, FILTERS_QUANTS, 1>,
 48 |     ),
 49 | ) -> Tensor4D<T, 1, OUTPUT_ROWS, OUTPUT_COLS, FILTERS_BATCHES, 1> {
 50 |     let output = [Buffer2D::from_fn(|i, j| {
 51 |         // Extract the view using the view extraction algorithm
 52 |         let view: TensorView<T, FILTERS_ROWS, FILTERS_COLS, INPUT_CHANS> =
 53 |             input.view((i, j), 0, options.view_padding, options.strides);
 54 |         // Perform the convolution for each filter batch
 55 |         array::from_fn(|b| {
 56 |             let input_zero_point = i32::from_subset(&input.zero_point[0]);
 57 |             let filters_zero_point = i32::from_subset(
 58 |                 &filters
 59 |                     .zero_point
 60 |                     .get(b)
 61 |                     .copied()
 62 |                     .unwrap_or(filters.zero_point[0]),
 63 |             );
 64 |             let x = (
 65 |                 // Perform the dot product between the input region and the filter
 66 |                 view.buffer.zip_fold(&filters.buffer[b], 0i32, |acc, v, f| {
 67 |                     acc + v
 68 |                         .iter()
 69 |                         .zip(f.iter())
 70 |                         .map(|(e1, e2)| i32::from_subset(e1) * i32::from_subset(e2))
 71 |                         .sum::<i32>()
 72 |                 }),
 73 |                 // Perform the 3-dimensional component-sum of the view
 74 |                 view.buffer.fold(0i32, |acc, a| {
 75 |                     acc + a.iter().fold(0i32, |acc, e| acc + i32::from_subset(e))
 76 |                 }) * filters_zero_point,
 77 |             );
 78 |             // Elaborate the constants
 79 |             let constants = (
 80 |                 constants.0,
 81 |                 constants.1,
 82 |                 input_zero_point
 83 |                     * filters.buffer[b].zip_fold(&view.mask, 0i32, |acc, f, m| {
 84 |                         if m {
 85 |                             acc + f.iter().fold(0i32, |acc, e| acc + i32::from_subset(e))
 86 |                         } else {
 87 |                             acc
 88 |                         }
 89 |                     }),
 90 |                 view.len as i32 * INPUT_CHANS as i32 * input_zero_point * filters_zero_point,
 91 |             );
 92 |             // Combine the constant values and the variants to obtain the output
 93 |             let y = T::from_superset_unchecked(&roundf(
 94 |                 f32::from_subset(&output_zero_point[0])
 95 |                     + constants.0[b]
 96 |                     + constants.1.get(b).copied().unwrap_or(constants.1[0])
 97 |                         * f32::from_subset(&(x.0 - x.1 - constants.2 + constants.3)),
 98 |             ));
 99 |             // Apply the fused activation function (if any)
100 |             match options.fused_activation {
101 |                 FusedActivation::None => y,
102 |                 FusedActivation::Relu => relu(y, output_zero_point[0]),
103 |                 FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]),
104 |             }
105 |         })
106 |     })];
107 |     Tensor4D::new(output, output_scale, output_zero_point)
108 | }
109 | 
110 | #[cfg(test)]
111 | mod tests {
112 |     use nalgebra::matrix;
113 | 
114 |     use crate::tensor::Tensor2D;
115 | 
116 |     use super::*;
117 | 
118 |     const INPUT: Tensor4D<i8, 1, 2, 3, 2, 1> = Tensor4D {
119 |         buffer: [matrix![
120 |             [1, 2], [3, 4],  [5,  6];
121 |             [7, 8], [9, 10], [11, 12]
122 |         ]],
123 |         scale: [0.13],
124 |         zero_point: [14],
125 |     };
126 |     const FILTERS: Tensor4D<i8, 2, 2, 3, 2, 2> = Tensor4D {
127 |         buffer: [
128 |             matrix![
129 |                 [15, 16], [17, 18], [19, 20];
130 |                 [21, 22], [23, 24], [25, 26]
131 |             ],
132 |             matrix![
133 |                 [27, 28], [29, 30], [31, 32];
134 |                 [33, 34], [35, 36], [37, 38]
135 |             ],
136 |         ],
137 |         scale: [0.39, 0.40],
138 |         zero_point: [41, 42],
139 |     };
140 |     const _BIASES: Tensor2D<i32, 2, 1, 2> = Tensor2D {
141 |         buffer: matrix![
142 |             43;
143 |             44
144 |         ],
145 |         scale: [0.45, 0.46],
146 |         zero_point: [47, 48],
147 |     };
148 |     const OUTPUT_SCALE: [f32; 1] = [0.49];
149 |     const OUTPUT_ZERO_POINT: [i8; 1] = [50];
150 |     const OPTIONS: Conv2DOptions = Conv2DOptions {
151 |         fused_activation: FusedActivation::None,
152 |         view_padding: TensorViewPadding::Same,
153 |         strides: (1, 1),
154 |     };
155 |     const CONSTANTS: (Buffer2D<f32, 2, 1>, Buffer2D<f32, 2, 1>) = (
156 |         matrix![-3.673_469_4; -3.755_102],
157 |         matrix![0.103_469_39; 0.106_122_45],
158 |     );
159 |     const OUTPUT: Tensor4D<i8, 1, 2, 3, 2, 1> = Tensor4D {
160 |         buffer: [matrix![
161 |             [127, 116], [127, 127], [127, 113];
162 |             [98,  74],  [114, 84],  [82,  67]
163 |         ]],
164 |         scale: [0.49],
165 |         zero_point: [50],
166 |     };
167 | 
168 |     #[test]
169 |     fn conv_2d_layer() {
170 |         assert_eq!(
171 |             conv_2d(
172 |                 INPUT,
173 |                 &FILTERS,
174 |                 OUTPUT_SCALE,
175 |                 OUTPUT_ZERO_POINT,
176 |                 OPTIONS,
177 |                 CONSTANTS,
178 |             ),
179 |             OUTPUT
180 |         );
181 |     }
182 | }
183 | 


--------------------------------------------------------------------------------
/src/ops/depthwise_conv_2d.rs:
--------------------------------------------------------------------------------
  1 | use core::array;
  2 | use libm::roundf;
  3 | 
  4 | use simba::scalar::SupersetOf;
  5 | 
  6 | use crate::activation::{relu, relu6, FusedActivation};
  7 | use crate::buffer::Buffer2D;
  8 | use crate::quantize::Quantized;
  9 | use crate::tensor::{Tensor4D, TensorView, TensorViewPadding};
 10 | 
 11 | pub struct DepthwiseConv2DOptions {
 12 |     pub fused_activation: FusedActivation,
 13 |     pub view_padding: TensorViewPadding,
 14 |     pub strides: (usize, usize),
 15 | }
 16 | 
 17 | /// Performs the DepthwiseConv2D operation.
 18 | /// Returns a 4-dimensional output tensor containing the result of the operation.
 19 | ///
 20 | /// # Arguments
 21 | /// * `input` - The 4-dimensional input tensor
 22 | /// * `weights` - The 4-dimensional tensor representing the weights of the operator
 23 | /// * `output_scale` - The scale of the resulting output tensor
 24 | /// * `output_zero_point` - The zero point of the resulting output tensor
 25 | /// * `options` - Operator's options as an [`DepthwiseConv2DOptions`] struct
 26 | /// * `constants` - Constant values coming from the pre-processing phase
 27 | ///
 28 | pub fn depthwise_conv_2d<
 29 |     T: Quantized,
 30 |     const INPUT_ROWS: usize,
 31 |     const INPUT_COLS: usize,
 32 |     const INPUT_CHANS: usize,
 33 |     const WEIGHTS_ROWS: usize,
 34 |     const WEIGHTS_COLS: usize,
 35 |     const WEIGHTS_CHANS: usize,
 36 |     const WEIGHTS_QUANTS: usize,
 37 |     const OUTPUT_ROWS: usize,
 38 |     const OUTPUT_COLS: usize,
 39 | >(
 40 |     input: Tensor4D<T, 1, INPUT_ROWS, INPUT_COLS, INPUT_CHANS, 1>,
 41 |     weights: &Tensor4D<T, 1, WEIGHTS_ROWS, WEIGHTS_COLS, WEIGHTS_CHANS, WEIGHTS_QUANTS>,
 42 |     output_scale: [f32; 1],
 43 |     output_zero_point: [T; 1],
 44 |     options: DepthwiseConv2DOptions,
 45 |     constants: (
 46 |         Buffer2D<f32, WEIGHTS_CHANS, 1>,
 47 |         Buffer2D<f32, WEIGHTS_QUANTS, 1>,
 48 |     ),
 49 | ) -> Tensor4D<T, 1, OUTPUT_ROWS, OUTPUT_COLS, WEIGHTS_CHANS, 1> {
 50 |     let output = [Buffer2D::from_fn(|i, j| {
 51 |         // Extract the view using the view extraction algorithm
 52 |         let view: TensorView<T, WEIGHTS_ROWS, WEIGHTS_COLS, INPUT_CHANS> =
 53 |             input.view((i, j), 0, options.view_padding, options.strides);
 54 |         // Perform the convolution for each input channel
 55 |         array::from_fn(|c| {
 56 |             let input_zero_point = i32::from_subset(&input.zero_point[0]);
 57 |             let weights_zero_point = i32::from_subset(
 58 |                 &weights
 59 |                     .zero_point
 60 |                     .get(c)
 61 |                     .copied()
 62 |                     .unwrap_or(weights.zero_point[0]),
 63 |             );
 64 |             let x = (
 65 |                 // Perform the dot product between the input region and the weights
 66 |                 view.buffer.zip_fold(&weights.buffer[0], 0i32, |acc, v, w| {
 67 |                     acc + i32::from_subset(&v.get(c).copied().unwrap_or(v[0]))
 68 |                         * i32::from_subset(&w[c])
 69 |                 }),
 70 |                 // Perform the 2-dimensional component-sum of the view for the given channel
 71 |                 view.buffer.fold(0i32, |acc, a| {
 72 |                     acc + i32::from_subset(&a.get(c).copied().unwrap_or(a[0]))
 73 |                 }) * weights_zero_point,
 74 |             );
 75 |             // Elaborate the constants
 76 |             let constants = (
 77 |                 constants.0,
 78 |                 constants.1,
 79 |                 input_zero_point
 80 |                     * weights.buffer[0].zip_fold(&view.mask, 0i32, |acc, w, m| {
 81 |                         if m {
 82 |                             acc + i32::from_subset(&w[c])
 83 |                         } else {
 84 |                             acc
 85 |                         }
 86 |                     }),
 87 |                 view.len as i32 * input_zero_point * weights_zero_point,
 88 |             );
 89 |             // Combine the constant values and the variants to obtain the output
 90 |             let y = T::from_superset_unchecked(&roundf(
 91 |                 f32::from_subset(&output_zero_point[0])
 92 |                     + constants.0[c]
 93 |                     + constants.1.get(c).copied().unwrap_or(constants.1[0])
 94 |                         * f32::from_subset(&(x.0 - x.1 - constants.2 + constants.3)),
 95 |             ));
 96 |             // Apply the fused activation function (if any)
 97 |             match options.fused_activation {
 98 |                 FusedActivation::None => y,
 99 |                 FusedActivation::Relu => relu(y, output_zero_point[0]),
100 |                 FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]),
101 |             }
102 |         })
103 |     })];
104 |     Tensor4D::new(output, output_scale, output_zero_point)
105 | }
106 | 
107 | #[cfg(test)]
108 | mod tests {
109 |     use nalgebra::matrix;
110 | 
111 |     use crate::tensor::Tensor2D;
112 | 
113 |     use super::*;
114 | 
115 |     const INPUT: Tensor4D<i8, 1, 2, 3, 2, 1> = Tensor4D {
116 |         buffer: [matrix![
117 |             [1, 2], [3, 4],  [5,  6];
118 |             [7, 8], [9, 10], [11, 12]
119 |         ]],
120 |         scale: [0.13],
121 |         zero_point: [14],
122 |     };
123 |     const WEIGHTS: Tensor4D<i8, 1, 2, 3, 2, 2> = Tensor4D {
124 |         buffer: [matrix![
125 |             [15, 16], [17, 18], [19, 20];
126 |             [21, 22], [23, 24], [25, 26]
127 |         ]],
128 |         scale: [0.27, 0.28],
129 |         zero_point: [29, 30],
130 |     };
131 |     const _BIASES: Tensor2D<i32, 2, 1, 2> = Tensor2D {
132 |         buffer: matrix![
133 |             31;
134 |             32
135 |         ],
136 |         scale: [0.33, 0.34],
137 |         zero_point: [35, 36],
138 |     };
139 |     const OUTPUT_SCALE: [f32; 1] = [0.37];
140 |     const OUTPUT_ZERO_POINT: [i8; 1] = [38];
141 |     const OPTIONS: DepthwiseConv2DOptions = DepthwiseConv2DOptions {
142 |         fused_activation: FusedActivation::None,
143 |         view_padding: TensorViewPadding::Same,
144 |         strides: (1, 1),
145 |     };
146 |     const CONSTANTS: (Buffer2D<f32, 2, 1>, Buffer2D<f32, 2, 1>) = (
147 |         matrix![-3.567_567_6; -3.675_675_7],
148 |         matrix![0.094_864_86; 0.098_378_378],
149 |     );
150 |     const OUTPUT: Tensor4D<i8, 1, 2, 3, 2, 1> = Tensor4D {
151 |         buffer: [matrix![
152 |             [66, 63], [82, 78], [65, 62];
153 |             [47, 45], [52, 49], [44, 42]
154 |         ]],
155 |         scale: [0.37],
156 |         zero_point: [38],
157 |     };
158 | 
159 |     #[test]
160 |     fn depthwise_conv_2d_layer() {
161 |         assert_eq!(
162 |             depthwise_conv_2d(
163 |                 INPUT,
164 |                 &WEIGHTS,
165 |                 OUTPUT_SCALE,
166 |                 OUTPUT_ZERO_POINT,
167 |                 OPTIONS,
168 |                 CONSTANTS,
169 |             ),
170 |             OUTPUT
171 |         );
172 |     }
173 | }
174 | 


--------------------------------------------------------------------------------
/src/ops/fully_connected.rs:
--------------------------------------------------------------------------------
  1 | use libm::roundf;
  2 | use simba::scalar::SupersetOf;
  3 | 
  4 | use crate::activation::{relu, relu6, FusedActivation};
  5 | use crate::buffer::Buffer2D;
  6 | use crate::quantize::Quantized;
  7 | use crate::tensor::Tensor2D;
  8 | 
  9 | pub struct FullyConnectedOptions {
 10 |     pub fused_activation: FusedActivation,
 11 | }
 12 | 
 13 | /// Performs the FullyConnected operation.
 14 | /// Returns a 2-dimensional output tensor containing the result of the operation.
 15 | ///
 16 | /// # Arguments
 17 | /// * `input` - The 2-dimensional input tensor
 18 | /// * `weights` - The 2-dimensional tensor representing the weights of the operator
 19 | /// * `output_scale` - The scale of the resulting output tensor
 20 | /// * `output_zero_point` - The zero point of the resulting output tensor
 21 | /// * `options` - Operator's options as an [`FullyConnectedOptions`] struct
 22 | /// * `constants` - Constant values coming from the pre-processing phase
 23 | ///
 24 | pub fn fully_connected<
 25 |     T: Quantized,
 26 |     const INPUT_ROWS: usize,
 27 |     const INPUT_COLS: usize,
 28 |     const WEIGHTS_COLS: usize,
 29 | >(
 30 |     input: Tensor2D<T, INPUT_ROWS, INPUT_COLS, 1>,
 31 |     weights: &Tensor2D<T, INPUT_COLS, WEIGHTS_COLS, 1>,
 32 |     output_scale: [f32; 1],
 33 |     output_zero_point: [T; 1],
 34 |     options: FullyConnectedOptions,
 35 |     constants: (
 36 |         Buffer2D<f32, WEIGHTS_COLS, 1>,
 37 |         f32,
 38 |         Buffer2D<i32, 1, WEIGHTS_COLS>,
 39 |         i32,
 40 |     ),
 41 | ) -> Tensor2D<T, INPUT_ROWS, WEIGHTS_COLS, 1> {
 42 |     let x: (
 43 |         Buffer2D<i32, INPUT_ROWS, WEIGHTS_COLS>,
 44 |         Buffer2D<i32, INPUT_ROWS, 1>,
 45 |     ) = (
 46 |         // Perform the dot product between the input and the weights
 47 |         Buffer2D::from_fn(|i, j| {
 48 |             input
 49 |                 .buffer
 50 |                 .row(i)
 51 |                 .iter()
 52 |                 .zip(weights.buffer.column(j).iter())
 53 |                 .fold(0i32, |acc, (i, w)| {
 54 |                     acc + i32::from_subset(i) * i32::from_subset(w)
 55 |                 })
 56 |         }),
 57 |         // Perform the row-sum of the weights
 58 |         Buffer2D::from_fn(|i, _| {
 59 |             input
 60 |                 .buffer
 61 |                 .row(i)
 62 |                 .fold(0i32, |acc, e| acc + i32::from_subset(&e))
 63 |                 * i32::from_subset(&weights.zero_point[0])
 64 |         }),
 65 |     );
 66 |     // Combine the constant values and the variants to obtain the output
 67 |     let output = Buffer2D::from_fn(|i, j| {
 68 |         let y = T::from_superset_unchecked(&roundf(
 69 |             f32::from_subset(&output_zero_point[0])
 70 |                 + constants.0[j]
 71 |                 + constants.1
 72 |                     * f32::from_subset(&(x.0[(i, j)] - x.1[i] - constants.2[j] + constants.3)),
 73 |         ));
 74 |         // Apply the fused activation function (if any)
 75 |         match options.fused_activation {
 76 |             FusedActivation::None => y,
 77 |             FusedActivation::Relu => relu(y, output_zero_point[0]),
 78 |             FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]),
 79 |         }
 80 |     });
 81 |     Tensor2D::new(output, output_scale, output_zero_point)
 82 | }
 83 | 
 84 | #[cfg(test)]
 85 | mod tests {
 86 |     use nalgebra::matrix;
 87 | 
 88 |     use super::*;
 89 | 
 90 |     const INPUT: Tensor2D<i8, 2, 3, 1> = Tensor2D {
 91 |         buffer: matrix![
 92 |             1, 2, 3;
 93 |             4, 5, 6
 94 |         ],
 95 |         scale: [0.7],
 96 |         zero_point: [8],
 97 |     };
 98 |     const WEIGHTS: Tensor2D<i8, 3, 4, 1> = Tensor2D {
 99 |         buffer: matrix![
100 |             9,  10, 11, 12;
101 |             13, 14, 15, 16;
102 |             17, 18, 19, 20
103 |         ],
104 |         scale: [0.21],
105 |         zero_point: [22],
106 |     };
107 |     const _BIASES: Tensor2D<i32, 4, 1, 1> = Tensor2D {
108 |         buffer: matrix![
109 |             23; 24; 25; 26
110 |         ],
111 |         scale: [0.27],
112 |         zero_point: [28],
113 |     };
114 |     const OUTPUT_SCALE: [f32; 1] = [0.29];
115 |     const OUTPUT_ZERO_POINT: [i8; 1] = [30];
116 |     const OPTIONS: FullyConnectedOptions = FullyConnectedOptions {
117 |         fused_activation: FusedActivation::Relu,
118 |     };
119 |     const CONSTANTS: (Buffer2D<f32, 4, 1>, f32, Buffer2D<i32, 1, 4>, i32) = (
120 |         matrix![-4.655_172_3; -3.724_138; -2.793_103_5; -1.862_069],
121 |         0.506_896_56,
122 |         matrix![312, 336, 360, 384],
123 |         528,
124 |     );
125 |     const OUTPUT: Tensor2D<i8, 2, 4, 1> = Tensor2D {
126 |         buffer: matrix![
127 |             112, 103, 95, 87;
128 |             70,  67,  63, 60
129 |         ],
130 |         scale: [0.29],
131 |         zero_point: [30],
132 |     };
133 | 
134 |     #[test]
135 |     fn fully_connected_layer() {
136 |         assert_eq!(
137 |             fully_connected(
138 |                 INPUT,
139 |                 &WEIGHTS,
140 |                 OUTPUT_SCALE,
141 |                 OUTPUT_ZERO_POINT,
142 |                 OPTIONS,
143 |                 CONSTANTS
144 |             ),
145 |             OUTPUT
146 |         )
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/src/ops/mod.rs:
--------------------------------------------------------------------------------
 1 | mod average_pool_2d;
 2 | mod conv_2d;
 3 | mod depthwise_conv_2d;
 4 | mod fully_connected;
 5 | mod reshape;
 6 | mod softmax;
 7 | 
 8 | pub use average_pool_2d::*;
 9 | pub use conv_2d::*;
10 | pub use depthwise_conv_2d::*;
11 | pub use fully_connected::*;
12 | pub use reshape::*;
13 | pub use softmax::*;
14 | 


--------------------------------------------------------------------------------
/src/ops/reshape.rs:
--------------------------------------------------------------------------------
 1 | /// Performs the Reshape operator.
 2 | /// Returns the correspondig output tensor.
 3 | pub fn reshape<InputT, OutputT>(input: InputT) -> OutputT
 4 | where
 5 |     InputT: Into<OutputT>,
 6 | {
 7 |     input.into()
 8 | }
 9 | 
10 | #[cfg(test)]
11 | mod tests {
12 |     use super::*;
13 |     use crate::tensor::{Tensor2D, Tensor4D};
14 |     use nalgebra::matrix;
15 | 
16 |     const INPUT: Tensor2D<i8, 2, 3, 1> = Tensor2D {
17 |         buffer: matrix![
18 |             1, 2, 3;
19 |             4, 5, 6
20 |         ],
21 |         scale: [0.7],
22 |         zero_point: [8],
23 |     };
24 |     const OUTPUT: Tensor4D<i8, 2, 1, 3, 1, 1> = Tensor4D {
25 |         buffer: [matrix![[1], [2], [3]], matrix![[4], [5], [6]]],
26 |         scale: [0.7],
27 |         zero_point: [8],
28 |     };
29 | 
30 |     #[test]
31 |     fn reshape_layer() {
32 |         let output: Tensor4D<i8, 2, 1, 3, 1, 1> = reshape(INPUT);
33 |         assert_eq!(output, OUTPUT);
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/ops/softmax.rs:
--------------------------------------------------------------------------------
 1 | use crate::activation;
 2 | use crate::quantize::Quantized;
 3 | use crate::tensor::Tensor2D;
 4 | use libm::expf;
 5 | use simba::scalar::SupersetOf;
 6 | 
 7 | /// Performs the Softmax activation function as an operator.
 8 | /// Returns a 2-dimensional output tensor containing the result of the operation.
 9 | ///
10 | /// # Arguments
11 | /// * `input` - The 2-dimensional input tensor
12 | /// * `output_scale` - The scale of the resulting output tensor
13 | /// * `output_zero_point` - The zero point of the resulting output tensor
14 | ///
15 | pub fn softmax<T: Quantized, const ROWS: usize, const COLS: usize>(
16 |     input: Tensor2D<T, ROWS, COLS, 1>,
17 |     output_scale: [f32; 1],
18 |     output_zero_point: [T; 1],
19 | ) -> Tensor2D<T, ROWS, COLS, 1> {
20 |     let exp = input.buffer.map(|e| f32::from_subset(&e) * input.scale[0]);
21 |     let sum = exp.map(expf).sum();
22 |     Tensor2D::new(
23 |         exp.map(|e| activation::softmax(e, sum, output_scale[0], output_zero_point[0])),
24 |         output_scale,
25 |         output_zero_point,
26 |     )
27 | }
28 | 
29 | #[cfg(test)]
30 | mod tests {
31 |     use super::*;
32 |     use nalgebra::matrix;
33 | 
34 |     const INPUT: Tensor2D<i8, 2, 3, 1> = Tensor2D {
35 |         buffer: matrix![
36 |             1, 2, 3;
37 |             4, 5, 6
38 |         ],
39 |         scale: [0.7],
40 |         zero_point: [8],
41 |     };
42 |     const OUTPUT_SCALE: [f32; 1] = [0.9];
43 |     const OUTPUT_ZERO_POINT: [i8; 1] = [10];
44 |     const OUTPUT: Tensor2D<i8, 2, 3, 1> = Tensor2D {
45 |         buffer: matrix![
46 |             10, 10, 10;
47 |             10, 10, 11
48 |         ],
49 |         scale: OUTPUT_SCALE,
50 |         zero_point: OUTPUT_ZERO_POINT,
51 |     };
52 | 
53 |     #[test]
54 |     fn softmax_layer() {
55 |         assert_eq!(softmax(INPUT, OUTPUT_SCALE, OUTPUT_ZERO_POINT), OUTPUT);
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/quantize.rs:
--------------------------------------------------------------------------------
 1 | use libm::roundf;
 2 | use nalgebra::Scalar;
 3 | use simba::scalar::{SubsetOf, SupersetOf};
 4 | 
 5 | /// Represents the trait to constrain a type to be quantized.
 6 | pub trait Quantized: Scalar + Copy + Ord + SubsetOf<i32> + SubsetOf<f32> {}
 7 | impl<T: Scalar + Copy + Ord + SubsetOf<i32> + SubsetOf<f32>> Quantized for T {}
 8 | 
 9 | /// Performs quantization on the given floating-point input.
10 | ///
11 | /// # Arguments
12 | /// * `input` - The input value to quantize
13 | /// * `scale` - The quantization scale
14 | /// * `zero_point` - The quantization zero point
15 | ///
16 | pub fn quantize<T: Quantized>(input: f32, scale: f32, zero_point: T) -> T {
17 |     roundf(input / scale + f32::from_subset(&zero_point)).to_subset_unchecked()
18 | }
19 | 
20 | /// Performs dequantization on the given integer input.
21 | ///
22 | /// # Arguments
23 | /// * `input` - The input value to dequantize
24 | /// * `scale` - The quantization scale
25 | /// * `zero_point` - The quantization zero point
26 | ///
27 | pub fn dequantize<T: Quantized>(input: T, scale: f32, zero_point: T) -> f32 {
28 |     scale * (f32::from_subset(&input) - f32::from_subset(&zero_point))
29 | }
30 | 
31 | #[cfg(test)]
32 | mod tests {
33 |     use super::*;
34 | 
35 |     const VALUE: f32 = 1.;
36 |     const SCALE: f32 = 0.2;
37 |     const ZERO_POINT: i8 = 3;
38 |     const VALUE_QUANTIZED: i8 = 8;
39 |     const VALUE_DEQUANTIZED: f32 = 1.;
40 | 
41 |     #[test]
42 |     fn quantize_value() {
43 |         assert_eq!(quantize(VALUE, SCALE, ZERO_POINT), VALUE_QUANTIZED);
44 |     }
45 | 
46 |     #[test]
47 |     fn dequantize_value() {
48 |         assert_eq!(
49 |             dequantize(VALUE_QUANTIZED, SCALE, ZERO_POINT),
50 |             VALUE_DEQUANTIZED
51 |         );
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/tests/person_detect.rs:
--------------------------------------------------------------------------------
 1 | use microflow::buffer::Buffer2D;
 2 | use microflow_macros::model;
 3 | use nalgebra::matrix;
 4 | 
 5 | #[model("models/person_detect.tflite")]
 6 | struct PersonDetect;
 7 | 
 8 | #[test]
 9 | fn person_detect_model() {
10 |     let input = [Buffer2D::from_element([0.5])];
11 |     let output = matrix![0.8046875, 0.1953125];
12 |     assert_eq!(PersonDetect::predict(input), output);
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/sine.rs:
--------------------------------------------------------------------------------
 1 | use microflow_macros::model;
 2 | use nalgebra::matrix;
 3 | 
 4 | #[model("models/sine.tflite")]
 5 | struct Sine;
 6 | 
 7 | #[test]
 8 | fn sine_model() {
 9 |     let input = matrix![0.5];
10 |     let output = matrix![0.41348344];
11 |     assert_eq!(Sine::predict(input), output);
12 | }
13 | 


--------------------------------------------------------------------------------
/tests/speech.rs:
--------------------------------------------------------------------------------
 1 | use microflow::buffer::Buffer2D;
 2 | use microflow_macros::model;
 3 | use nalgebra::matrix;
 4 | 
 5 | #[model("models/speech.tflite")]
 6 | struct Speech;
 7 | 
 8 | #[test]
 9 | fn speech_model() {
10 |     let input = Buffer2D::from_element(0.5);
11 |     let output = matrix![0.15625, 0.2734375, 0.2734375, 0.296875];
12 |     assert_eq!(Speech::predict(input), output);
13 | }
14 | 


--------------------------------------------------------------------------------