├── .github └── workflows │ └── cargo.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── Makefile.toml ├── README.md ├── analysis ├── accuracy │ ├── data │ │ ├── sine-microflow.csv │ │ ├── sine-test-set.csv │ │ └── sine-tflite.csv │ ├── plots │ │ ├── sine-accuracy-comparison.pdf │ │ ├── sine-accuracy.pdf │ │ └── sine-test-set.pdf │ └── sine.ipynb ├── memory │ ├── person_detect.ipynb │ ├── plots │ │ ├── person-detect-memory.pdf │ │ ├── sine-memory.pdf │ │ └── speech-memory.pdf │ ├── sine.ipynb │ └── speech.ipynb └── performance │ ├── data │ ├── person-detect-esp32-microflow.csv │ ├── person-detect-esp32-tflm.csv │ ├── person-detect-nrf52840-microflow.csv │ ├── person-detect-nrf52840-tflm.csv │ ├── sine-esp32-microflow.csv │ ├── sine-esp32-tflm.csv │ ├── sine-nrf52840-microflow.csv │ ├── sine-nrf52840-tflm.csv │ ├── speech-esp32-microflow.csv │ ├── speech-esp32-tflm.csv │ ├── speech-nrf52840-microflow.csv │ └── speech-nrf52840-tflm.csv │ ├── person_detect.ipynb │ ├── plots │ ├── person-detect-esp32-performance.pdf │ ├── person-detect-nrf52840-performance.pdf │ ├── person-detect-performance-comparison.pdf │ ├── sine-esp32-performance.pdf │ ├── sine-nRF52840-performance.pdf │ ├── sine-performance-comparison.pdf │ ├── speech-esp32-performance.pdf │ ├── speech-nrf52840-performance.pdf │ └── speech-performance-comparison.pdf │ ├── sine.ipynb │ └── speech.ipynb ├── assets ├── microflow-logo.png └── microflow-logo.pxd ├── benches ├── person_detect.rs ├── sine.rs └── speech.rs ├── examples ├── arduino-nano33ble │ ├── .cargo │ │ └── config.toml │ ├── Cargo.toml │ ├── Makefile.toml │ ├── examples │ │ ├── person_detect.rs │ │ ├── sine.rs │ │ └── speech.rs │ └── memory.x ├── arduino-uno │ ├── .cargo │ │ └── config.toml │ ├── Cargo.toml │ ├── Makefile.toml │ ├── examples │ │ └── sine.rs │ └── rust-toolchain.toml ├── atsamx7x │ ├── .cargo │ │ └── config.toml │ ├── Cargo.toml │ ├── Makefile.toml │ ├── examples │ │ ├── person_detect.rs │ │ ├── sine.rs │ │ └── speech.rs │ └── memory.x ├── esp32 │ ├── .cargo │ │ └── config.toml │ ├── Cargo.toml │ ├── Makefile.toml │ ├── examples │ │ ├── person_detect.rs │ │ ├── sine.rs │ │ └── speech.rs │ └── rust-toolchain.toml ├── person_detect.rs ├── qemu │ ├── .cargo │ │ └── config.toml │ ├── Cargo.toml │ ├── Makefile.toml │ ├── examples │ │ ├── sine.rs │ │ └── speech.rs │ └── memory.x ├── sine.rs └── speech.rs ├── microflow-macros ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── flatbuffers │ ├── tflite.fbs │ └── tflite_generated.rs └── src │ ├── activation.rs │ ├── buffer.rs │ ├── lib.rs │ ├── ops │ ├── average_pool_2d.rs │ ├── conv_2d.rs │ ├── depthwise_conv_2d.rs │ ├── fully_connected.rs │ ├── mod.rs │ ├── reshape.rs │ └── softmax.rs │ ├── quantize.rs │ └── tensor.rs ├── models ├── person_detect.tflite ├── sine.tflite └── speech.tflite ├── samples ├── features │ ├── person_detect.rs │ └── speech.rs ├── no.wav ├── no_person.bmp ├── person.bmp └── yes.wav ├── src ├── activation.rs ├── buffer.rs ├── lib.rs ├── ops │ ├── average_pool_2d.rs │ ├── conv_2d.rs │ ├── depthwise_conv_2d.rs │ ├── fully_connected.rs │ ├── mod.rs │ ├── reshape.rs │ └── softmax.rs ├── quantize.rs └── tensor.rs └── tests ├── person_detect.rs ├── sine.rs └── speech.rs /.github/workflows/cargo.yml: -------------------------------------------------------------------------------- 1 | name: Cargo 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CLICOLOR_FORCE: 1 11 | CARGO_TERM_COLOR: always 12 | 13 | jobs: 14 | make: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Install Crates 21 | run: cargo install cargo-make flip-link espup 22 | - name: Install Nightly Toolchain 23 | run: rustup toolchain install nightly --component rust-src 24 | - name: Install ESP Toolchain 25 | run: espup install 26 | - name: Install AVR Toolchain 27 | run: sudo apt install -y avr-libc gcc-avr pkg-config 28 | - name: Add Rust Targets 29 | run: rustup target add thumbv7em-none-eabihf thumbv7m-none-eabi 30 | - name: Make All 31 | run: | 32 | . ~/export-esp.sh 33 | cargo make all 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux,clion+all,visualstudiocode,rust,python,jupyternotebooks 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux,clion+all,visualstudiocode,rust,python,jupyternotebooks 3 | 4 | ### CLion+all ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # AWS User-specific 16 | .idea/**/aws.xml 17 | 18 | # Generated files 19 | .idea/**/contentModel.xml 20 | 21 | # Sensitive or high-churn files 22 | .idea/**/dataSources/ 23 | .idea/**/dataSources.ids 24 | .idea/**/dataSources.local.xml 25 | .idea/**/sqlDataSources.xml 26 | .idea/**/dynamic.xml 27 | .idea/**/uiDesigner.xml 28 | .idea/**/dbnavigator.xml 29 | 30 | # Gradle 31 | .idea/**/gradle.xml 32 | .idea/**/libraries 33 | 34 | # Gradle and Maven with auto-import 35 | # When using Gradle or Maven with auto-import, you should exclude module files, 36 | # since they will be recreated, and may cause churn. Uncomment if using 37 | # auto-import. 38 | # .idea/artifacts 39 | # .idea/compiler.xml 40 | # .idea/jarRepositories.xml 41 | # .idea/modules.xml 42 | # .idea/*.iml 43 | # .idea/modules 44 | # *.iml 45 | # *.ipr 46 | 47 | # CMake 48 | cmake-build-*/ 49 | 50 | # Mongo Explorer plugin 51 | .idea/**/mongoSettings.xml 52 | 53 | # File-based project format 54 | *.iws 55 | 56 | # IntelliJ 57 | out/ 58 | 59 | # mpeltonen/sbt-idea plugin 60 | .idea_modules/ 61 | 62 | # JIRA plugin 63 | atlassian-ide-plugin.xml 64 | 65 | # Cursive Clojure plugin 66 | .idea/replstate.xml 67 | 68 | # SonarLint plugin 69 | .idea/sonarlint/ 70 | 71 | # Crashlytics plugin (for Android Studio and IntelliJ) 72 | com_crashlytics_export_strings.xml 73 | crashlytics.properties 74 | crashlytics-build.properties 75 | fabric.properties 76 | 77 | # Editor-based Rest Client 78 | .idea/httpRequests 79 | 80 | # Android studio 3.1+ serialized cache file 81 | .idea/caches/build_file_checksums.ser 82 | 83 | ### CLion+all Patch ### 84 | # Ignore everything but code style settings and run configurations 85 | # that are supposed to be shared within teams. 86 | 87 | .idea/* 88 | 89 | !.idea/codeStyles 90 | !.idea/runConfigurations 91 | 92 | ### JupyterNotebooks ### 93 | # gitignore template for Jupyter Notebooks 94 | # website: http://jupyter.org/ 95 | 96 | .ipynb_checkpoints 97 | */.ipynb_checkpoints/* 98 | 99 | # IPython 100 | profile_default/ 101 | ipython_config.py 102 | 103 | # Remove previous ipynb_checkpoints 104 | # git rm -r .ipynb_checkpoints/ 105 | 106 | ### Linux ### 107 | *~ 108 | 109 | # temporary files which can be created if a process still has a handle open of a deleted file 110 | .fuse_hidden* 111 | 112 | # KDE directory preferences 113 | .directory 114 | 115 | # Linux trash folder which might appear on any partition or disk 116 | .Trash-* 117 | 118 | # .nfs files are created when an open file is removed but is still being accessed 119 | .nfs* 120 | 121 | ### macOS ### 122 | # General 123 | .DS_Store 124 | .AppleDouble 125 | .LSOverride 126 | 127 | # Icon must end with two \r 128 | Icon 129 | 130 | 131 | # Thumbnails 132 | ._* 133 | 134 | # Files that might appear in the root of a volume 135 | .DocumentRevisions-V100 136 | .fseventsd 137 | .Spotlight-V100 138 | .TemporaryItems 139 | .Trashes 140 | .VolumeIcon.icns 141 | .com.apple.timemachine.donotpresent 142 | 143 | # Directories potentially created on remote AFP share 144 | .AppleDB 145 | .AppleDesktop 146 | Network Trash Folder 147 | Temporary Items 148 | .apdisk 149 | 150 | ### macOS Patch ### 151 | # iCloud generated files 152 | *.icloud 153 | 154 | ### Python ### 155 | # Byte-compiled / optimized / DLL files 156 | __pycache__/ 157 | *.py[cod] 158 | *$py.class 159 | 160 | # C extensions 161 | *.so 162 | 163 | # Distribution / packaging 164 | .Python 165 | build/ 166 | develop-eggs/ 167 | dist/ 168 | downloads/ 169 | eggs/ 170 | .eggs/ 171 | lib/ 172 | lib64/ 173 | parts/ 174 | sdist/ 175 | var/ 176 | wheels/ 177 | share/python-wheels/ 178 | *.egg-info/ 179 | .installed.cfg 180 | *.egg 181 | MANIFEST 182 | 183 | # PyInstaller 184 | # Usually these files are written by a python script from a template 185 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 186 | *.manifest 187 | *.spec 188 | 189 | # Installer logs 190 | pip-log.txt 191 | pip-delete-this-directory.txt 192 | 193 | # Unit test / coverage reports 194 | htmlcov/ 195 | .tox/ 196 | .nox/ 197 | .coverage 198 | .coverage.* 199 | .cache 200 | nosetests.xml 201 | coverage.xml 202 | *.cover 203 | *.py,cover 204 | .hypothesis/ 205 | .pytest_cache/ 206 | cover/ 207 | 208 | # Translations 209 | *.mo 210 | *.pot 211 | 212 | # Django stuff: 213 | *.log 214 | local_settings.py 215 | db.sqlite3 216 | db.sqlite3-journal 217 | 218 | # Flask stuff: 219 | instance/ 220 | .webassets-cache 221 | 222 | # Scrapy stuff: 223 | .scrapy 224 | 225 | # Sphinx documentation 226 | docs/_build/ 227 | 228 | # PyBuilder 229 | .pybuilder/ 230 | target/ 231 | 232 | # Jupyter Notebook 233 | 234 | # IPython 235 | 236 | # pyenv 237 | # For a library or package, you might want to ignore these files since the code is 238 | # intended to run in multiple environments; otherwise, check them in: 239 | # .python-version 240 | 241 | # pipenv 242 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 243 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 244 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 245 | # install all needed dependencies. 246 | #Pipfile.lock 247 | 248 | # poetry 249 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 250 | # This is especially recommended for binary packages to ensure reproducibility, and is more 251 | # commonly ignored for libraries. 252 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 253 | #poetry.lock 254 | 255 | # pdm 256 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 257 | #pdm.lock 258 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 259 | # in version control. 260 | # https://pdm.fming.dev/#use-with-ide 261 | .pdm.toml 262 | 263 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 264 | __pypackages__/ 265 | 266 | # Celery stuff 267 | celerybeat-schedule 268 | celerybeat.pid 269 | 270 | # SageMath parsed files 271 | *.sage.py 272 | 273 | # Environments 274 | .env 275 | .venv 276 | env/ 277 | venv/ 278 | ENV/ 279 | env.bak/ 280 | venv.bak/ 281 | 282 | # Spyder project settings 283 | .spyderproject 284 | .spyproject 285 | 286 | # Rope project settings 287 | .ropeproject 288 | 289 | # mkdocs documentation 290 | /site 291 | 292 | # mypy 293 | .mypy_cache/ 294 | .dmypy.json 295 | dmypy.json 296 | 297 | # Pyre type checker 298 | .pyre/ 299 | 300 | # pytype static type analyzer 301 | .pytype/ 302 | 303 | # Cython debug symbols 304 | cython_debug/ 305 | 306 | # PyCharm 307 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 308 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 309 | # and can be added to the global gitignore or merged into this file. For a more nuclear 310 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 311 | #.idea/ 312 | 313 | ### Python Patch ### 314 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 315 | poetry.toml 316 | 317 | # ruff 318 | .ruff_cache/ 319 | 320 | ### Rust ### 321 | # Generated by Cargo 322 | # will have compiled files and executables 323 | debug/ 324 | 325 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 326 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 327 | Cargo.lock 328 | 329 | # These are backup files generated by rustfmt 330 | **/*.rs.bk 331 | 332 | # MSVC Windows builds of rustc generate these, which store debugging information 333 | *.pdb 334 | 335 | ### VisualStudioCode ### 336 | .vscode/* 337 | !.vscode/settings.json 338 | !.vscode/tasks.json 339 | !.vscode/launch.json 340 | !.vscode/extensions.json 341 | !.vscode/*.code-snippets 342 | 343 | # Local History for Visual Studio Code 344 | .history/ 345 | 346 | # Built Visual Studio Code Extensions 347 | *.vsix 348 | 349 | ### VisualStudioCode Patch ### 350 | # Ignore all local history of files 351 | .history 352 | .ionide 353 | 354 | ### Windows ### 355 | # Windows thumbnail cache files 356 | Thumbs.db 357 | Thumbs.db:encryptable 358 | ehthumbs.db 359 | ehthumbs_vista.db 360 | 361 | # Dump file 362 | *.stackdump 363 | 364 | # Folder config file 365 | [Dd]esktop.ini 366 | 367 | # Recycle Bin used on file shares 368 | $RECYCLE.BIN/ 369 | 370 | # Windows Installer files 371 | *.cab 372 | *.msi 373 | *.msix 374 | *.msm 375 | *.msp 376 | 377 | # Windows shortcuts 378 | *.lnk 379 | 380 | # End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,clion+all,visualstudiocode,rust,python,jupyternotebooks 381 | 382 | # JupyterLab files 383 | .jupyter 384 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ## [0.1.3] - 2024-06-01 11 | 12 | ### Fixed 13 | 14 | - Update to new `esp-hal` version 15 | 16 | ## [0.1.2] - 2024-02-09 17 | 18 | ### Added 19 | 20 | - MicroFlow is now available on [crates.io](https://crates.io/crates/microflow) 🎉 21 | 22 | ### Fixed 23 | 24 | - Fix Arduino Uno example 25 | - Update to new `esp-hal` version 26 | 27 | ### Changed 28 | 29 | - Minor documentation changes 30 | 31 | ## [0.1.0] - 2023-07-02 32 | 33 | Initial release. 34 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "microflow" 3 | description = "A robust and efficient TinyML inference engine" 4 | authors = ["Matteo Carnelos "] 5 | documentation = "https://docs.rs/microflow" 6 | repository = "https://github.com/matteocarnelos/microflow-rs" 7 | categories = ["embedded", "no-std", "science"] 8 | keywords = ["tinyml"] 9 | license = "MIT OR Apache-2.0" 10 | version = "0.1.3" 11 | edition = "2021" 12 | include = [ 13 | "/src/**", 14 | "/benches/**", 15 | "/tests/**", 16 | "/models/**", 17 | "/Cargo.toml", 18 | "/LICENSE-MIT", 19 | "/LICENSE-APACHE", 20 | "/README.md", 21 | ] 22 | 23 | [dependencies] 24 | microflow-macros = { version = "0.1", path = "microflow-macros" } 25 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] } 26 | simba = { version = "0.8", default-features = false } 27 | libm = "0.2" 28 | 29 | [dev-dependencies] 30 | csv = "1.2" 31 | criterion = "0.5" 32 | 33 | [[bench]] 34 | name = "sine" 35 | harness = false 36 | 37 | [[bench]] 38 | name = "speech" 39 | harness = false 40 | 41 | [[bench]] 42 | name = "person_detect" 43 | harness = false 44 | 45 | [workspace] 46 | members = ["microflow-macros"] 47 | exclude = ["examples"] 48 | 49 | [patch.crates-io] 50 | nalgebra = { git = "https://github.com/matteocarnelos/nalgebra" } 51 | 52 | [profile.release] 53 | lto = true 54 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Matteo Carnelos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile.toml: -------------------------------------------------------------------------------- 1 | [env] 2 | CARGO_MAKE_EXTEND_WORKSPACE_MAKEFILE = true 3 | 4 | [config] 5 | default_to_workspace = false 6 | 7 | [tasks.default] 8 | alias = "microflow" 9 | 10 | [tasks.all] 11 | dependencies = [ 12 | "examples-arduino-nano33ble", 13 | "examples-arduino-uno", 14 | "examples-atsamx7x", 15 | "examples-esp32", 16 | "examples-qemu", 17 | ] 18 | run_task = { name = "microflow", fork = true } 19 | 20 | [tasks.microflow] 21 | workspace = true 22 | dependencies = [ 23 | "build", 24 | "examples-compile", 25 | "test", 26 | "clippy", 27 | "check-format", 28 | "docs", 29 | ] 30 | 31 | [tasks.examples-arduino-nano33ble] 32 | cwd = "examples/arduino-nano33ble" 33 | command = "cargo" 34 | args = ["make"] 35 | 36 | [tasks.examples-arduino-uno] 37 | cwd = "examples/arduino-uno" 38 | command = "cargo" 39 | args = ["make"] 40 | 41 | [tasks.examples-atsamx7x] 42 | cwd = "examples/atsamx7x" 43 | command = "cargo" 44 | args = ["make"] 45 | 46 | [tasks.examples-esp32] 47 | cwd = "examples/esp32" 48 | command = "cargo" 49 | args = ["+esp", "make"] 50 | install_crate = false 51 | 52 | [tasks.examples-qemu] 53 | cwd = "examples/qemu" 54 | command = "cargo" 55 | args = ["make"] 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 |

MicroFlow

6 |

A robust and efficient TinyML inference engine

7 |

8 | 9 | 10 | 11 |

12 | 13 |
14 | 15 | MicroFlow is a robust and efficient TinyML inference engine designed for deploying machine learning models on embedded systems. 16 | It was developed by Matteo Carnelos as part of his master's thesis project at the [University of Padova](https://www.unipd.it/en/) in collaboration with [Grepit AB](https://github.com/GrepitAB). 17 | 18 | MicroFlow uses a compiler-based approach, resulting in the following engine structure: 19 | 20 | ```mermaid 21 | graph LR 22 | subgraph host[Host] 23 | model(Neural Network Model) --> compiler(MicroFlow Compiler) 24 | end 25 | subgraph target[Target] 26 | code(Generated Source Code) --- weights[(Weights)] 27 | code --- runtime(MicroFlow Runtime) 28 | end 29 | compiler --> code 30 | compiler --> weights 31 | ``` 32 | 33 | MicroFlow consists of two primary components: the compiler, represented by the `microflow-macros` crate, and the runtime, represented by the `microflow` crate. 34 | The compiler, which runs prior to the Rust compiler, is responsible for parsing and pre-processing the model. 35 | It generates the necessary source code to enable inference on the model. 36 | On the other hand, the runtime is a `[no_std]` component designed to run on the target MCU. 37 | It encompasses the implementation of operators, activation functions, and quantization procedures. 38 | 39 | ## Usage 40 | 41 | MicroFlow utilizes Rust [Procedural Macros](https://doc.rust-lang.org/reference/procedural-macros.html) as its user interface. 42 | By applying the `model` macro to a `struct` and providing the model's path, the MicroFlow compiler generates a `predict()` method. 43 | This method can be called to perform inference on the given model. 44 | Currently, MicroFlow only supports models in the TensorFlow Lite format (`.tflite`). 45 | 46 | Here is a minimal example showcasing the usage of MicroFlow: 47 | 48 | ```rust ignore 49 | use microflow::model; 50 | 51 | #[model("path/to/model.tflite")] 52 | struct MyModel; 53 | 54 | fn main() { 55 | let prediction = MyModel::predict(input_data); 56 | } 57 | ``` 58 | 59 | **[Documentation](https://docs.rs/microflow)** 60 | 61 | ## Examples 62 | 63 | The examples provided with MicroFlow can be found in the `examples` folder. 64 | To run an example on a target board, `cd` into the board directory for the example (e.g. `examples/arduino-uno`) and run the command: 65 | ```bash ignore 66 | cargo run --example 67 | ``` 68 | Otherwise, to run the example locally, just run the above command in the root directory. 69 | 70 | > [!NOTE] 71 | > For board examples, you might need to install additional tools and configure the runner to make the example work for your setup. 72 | 73 | ## Supported Operators 74 | 75 | Currently, MicroFlow supports the following operators and activation functions: 76 | 77 | | Operator | Quantized | Tensor Type | 78 | |-------------------|-----------|------------------------| 79 | | `FullyConnected` | ✓ | `Tensor2D` | 80 | | `Conv2D` | ✓ | `Tensor4D` | 81 | | `DepthwiseConv2D` | ✓ | `Tensor4D` | 82 | | `AveragePool2D` | ✓ | `Tensor4D` | 83 | | `Reshape` | ✓ | `Tensor2D`, `Tensor4D` | 84 | 85 | | Activation Function | Quantized | 86 | |---------------------|-----------| 87 | | `ReLU` | ✓ | 88 | | `ReLU6` | ✓ | 89 | | `Softmax` | ✓ | 90 | 91 | These operators and activation functions cover common building blocks for neural networks and enable efficient inference with reduced memory and computational requirements. 92 | However, MicroFlow's development roadmap includes plans for implementing additional operators and activation functions to expand the range of supported models. 93 | 94 | ## Tested Models and MCUs 95 | 96 | The `examples` folder contains the code used to test MicroFlow on different MCUs, including: 97 | 98 | - ESP32 (32-bit Xtensa) 99 | - ATSAMV71 (32-bit Cortex-M7F) 100 | - nRF52840 (32-bit Cortex-M4F) 101 | - LM3S6965 (32-bit Cortex-M3) 102 | - ATmega328 (8-bit AVR) 103 | 104 | The models ued to test the inference engines can be found in the `models` directory. 105 | These models include: 106 | 107 | - A sine predictor 108 | - A speech command recognizer (TinyConv) 109 | - A person detector (MobileNet v1) 110 | 111 | ## Contributing 112 | 113 | Contributors are welcome. 114 | For major changes, please open an issue first to discuss what you would like to change. 115 | Please make sure to update tests as appropriate. 116 | 117 | ## Citation 118 | 119 | The MicroFlow paper has been published in Elsevier's [Internet of Things](https://www.sciencedirect.com/science/article/pii/S2542660525000113) journal and can be cited as follows: 120 | 121 | ```bibtex 122 | @article{CARNELOS2025101498, 123 | title = {MicroFlow: An Efficient Rust-Based Inference Engine for TinyML}, 124 | journal = {Internet of Things}, 125 | volume = {30}, 126 | pages = {101498}, 127 | year = {2025}, 128 | issn = {2542-6605}, 129 | doi = {https://doi.org/10.1016/j.iot.2025.101498}, 130 | url = {https://www.sciencedirect.com/science/article/pii/S2542660525000113}, 131 | author = {Matteo Carnelos and Francesco Pasti and Nicola Bellotto}, 132 | keywords = {TinyML, Rust, Neural networks, Embedded systems, IoT} 133 | } 134 | ``` 135 | 136 | ## License 137 | 138 | Licensed under either of 139 | 140 | * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or ) 141 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or ) 142 | 143 | at your option. 144 | 145 | Copyright © 2025, [Matteo Carnelos](https://github.com/matteocarnelos) 146 | -------------------------------------------------------------------------------- /analysis/accuracy/plots/sine-accuracy-comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/accuracy/plots/sine-accuracy-comparison.pdf -------------------------------------------------------------------------------- /analysis/accuracy/plots/sine-accuracy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/accuracy/plots/sine-accuracy.pdf -------------------------------------------------------------------------------- /analysis/accuracy/plots/sine-test-set.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/accuracy/plots/sine-test-set.pdf -------------------------------------------------------------------------------- /analysis/accuracy/sine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1865db15-5f71-4bcb-a0c9-7a28e114a5b8", 6 | "metadata": {}, 7 | "source": [ 8 | "# Sine Accuracy" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "b0af9742-26ba-45cb-a72a-2803b4087175", 14 | "metadata": { 15 | "tags": [] 16 | }, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "cc1141ab-13e8-466d-9b79-dc41e3d2d267", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "3ba07279-a4e1-4c33-91c3-ff0ec9df4b36", 37 | "metadata": {}, 38 | "source": [ 39 | "## Test Set" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "2cec1e83-ce72-40db-ab3e-e5375debf9b0", 46 | "metadata": { 47 | "tags": [] 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "test_set = pd.read_csv('data/sine-test-set.csv')\n", 52 | "\n", 53 | "plt.figure(figsize=(4, 3), layout='constrained')\n", 54 | "plt.plot(test_set.x, test_set.y, '.')\n", 55 | "plt.savefig('plots/sine-test-set.pdf')\n", 56 | "plt.show()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "id": "535f621d-7360-4d6e-b48d-3281819653a6", 62 | "metadata": {}, 63 | "source": [ 64 | "## Predictions" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "baeda57c-d802-4675-ade6-6f9a707ce246", 71 | "metadata": { 72 | "tags": [] 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "tflite = pd.read_csv('data/sine-tflite.csv')\n", 77 | "microflow = pd.read_csv('data/sine-microflow.csv')\n", 78 | "\n", 79 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 80 | "\n", 81 | "ax[0].plot(test_set.x, test_set.y, '.', label='Test values')\n", 82 | "ax[0].plot(tflite.x, tflite.y, '.', label='TFLM predictions')\n", 83 | "ax[0].set_title('TFLM')\n", 84 | "ax[0].legend()\n", 85 | "\n", 86 | "ax[1].plot(test_set.x, test_set.y, '.', label='Test values')\n", 87 | "ax[1].plot(microflow.x, microflow.y, '.', label='MicroFlow predictions')\n", 88 | "ax[1].set_title('MicroFlow')\n", 89 | "ax[1].legend()\n", 90 | "\n", 91 | "plt.savefig('plots/sine-accuracy.pdf')\n", 92 | "plt.show()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "d5c5adab-ce31-4fcd-9274-79ab15d68d8e", 98 | "metadata": {}, 99 | "source": [ 100 | "## Comparison" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "bed0f6a0-ca18-4fc1-9010-b200fd6546f8", 107 | "metadata": { 108 | "tags": [] 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "plt.figure(figsize=(4, 3), layout='constrained')\n", 113 | "plt.plot(test_set.x, test_set.y, '.', label='Test values')\n", 114 | "plt.plot(tflite.x, tflite.y, '.', label='TFLM predictions')\n", 115 | "plt.plot(microflow.x, microflow.y, '.', label='MicroFlow predictions')\n", 116 | "plt.legend()\n", 117 | "plt.savefig('plots/sine-accuracy-comparison.pdf')\n", 118 | "plt.show()" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "microflow", 125 | "language": "python", 126 | "name": "microflow" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.10.9" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 5 143 | } 144 | -------------------------------------------------------------------------------- /analysis/memory/person_detect.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8bbab007-c69c-4eb3-8c3f-18ba2739c7f7", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Person Detect Memory" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "4f9d44c9-8dde-4c26-8001-93880f738447", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "eec11aa8-547f-4fed-a4c8-18eb09030a29", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "import seaborn as sns" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "9845fea7-cee7-42c2-9c59-5679b06ed89f", 38 | "metadata": {}, 39 | "source": [ 40 | "## Flash & RAM Usage" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "e894c794-bfae-4301-917a-fcfb3f140415", 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "flash = pd.DataFrame({\n", 53 | " 'MCU': ['ESP32', 'nRF52840', 'ATSAMV71'] * 2,\n", 54 | " 'Inference Engine': [*['TFLM'] * 3, *['MicroFlow'] * 3],\n", 55 | " 'Usage (kB)': [*[642.589, 459.648, 0], *[452.512, 391.700, 403.548]]\n", 56 | "})\n", 57 | "ram = pd.DataFrame({\n", 58 | " 'MCU': ['ESP32', 'nRF52840', 'ATSAMV71'] * 2,\n", 59 | " 'Inference Engine': [*['TFLM'] * 3, *['MicroFlow'] * 3],\n", 60 | " 'Usage (kB)': [*[122.428, 143.728, 0], *[96.048, 95.984, 96.512]]\n", 61 | "})\n", 62 | "\n", 63 | "fig, ax = plt.subplots(2, layout='constrained')\n", 64 | "fig.get_layout_engine().set(hspace=.1)\n", 65 | "\n", 66 | "sns.barplot(flash, ax=ax[0], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.5)\n", 67 | "ax[0].set_xlabel('')\n", 68 | "ax[0].set_title('Flash')\n", 69 | "ax[0].legend()\n", 70 | "\n", 71 | "sns.barplot(ram, ax=ax[1], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.5)\n", 72 | "ax[1].set_xlabel('')\n", 73 | "ax[1].set_title('RAM')\n", 74 | "ax[1].legend()\n", 75 | "\n", 76 | "plt.savefig('plots/person-detect-memory.pdf')\n", 77 | "plt.show()" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "microflow", 84 | "language": "python", 85 | "name": "microflow" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.10.9" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 5 102 | } 103 | -------------------------------------------------------------------------------- /analysis/memory/plots/person-detect-memory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/memory/plots/person-detect-memory.pdf -------------------------------------------------------------------------------- /analysis/memory/plots/sine-memory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/memory/plots/sine-memory.pdf -------------------------------------------------------------------------------- /analysis/memory/plots/speech-memory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/memory/plots/speech-memory.pdf -------------------------------------------------------------------------------- /analysis/memory/sine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8bbab007-c69c-4eb3-8c3f-18ba2739c7f7", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Sine Memory" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "4f9d44c9-8dde-4c26-8001-93880f738447", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "eec11aa8-547f-4fed-a4c8-18eb09030a29", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "import seaborn as sns" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "9845fea7-cee7-42c2-9c59-5679b06ed89f", 38 | "metadata": {}, 39 | "source": [ 40 | "## Flash & RAM Usage" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "e894c794-bfae-4301-917a-fcfb3f140415", 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "flash = pd.DataFrame({\n", 53 | " 'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965', 'ATmega328'] * 2,\n", 54 | " 'Inference Engine': [*['TFLM'] * 5, *['MicroFlow'] * 5],\n", 55 | " 'Usage (kB)': [*[306.817, 116.352, 0, 0, 0], *[100.56, 34.292, 48.0, 34.296, 13.619]]\n", 56 | "})\n", 57 | "ram = pd.DataFrame({\n", 58 | " 'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965', 'ATmega328'] * 2,\n", 59 | " 'Inference Engine': [*['TFLM'] * 5, *['MicroFlow'] * 5],\n", 60 | " 'Usage (kB)': [*[24.424, 45.728, 0, 0, 0], *[10.688, 5.296, 6.584, 4.620, 1.706]]\n", 61 | "})\n", 62 | "\n", 63 | "fig, ax = plt.subplots(2, layout='constrained')\n", 64 | "fig.get_layout_engine().set(hspace=.1)\n", 65 | "\n", 66 | "sns.barplot(flash, ax=ax[0], x='MCU', y='Usage (kB)', hue='Inference Engine')\n", 67 | "ax[0].set_xlabel('')\n", 68 | "ax[0].set_title('Flash')\n", 69 | "ax[0].legend()\n", 70 | "\n", 71 | "sns.barplot(ram, ax=ax[1], x='MCU', y='Usage (kB)', hue='Inference Engine')\n", 72 | "ax[1].set_xlabel('')\n", 73 | "ax[1].set_title('RAM')\n", 74 | "ax[1].legend()\n", 75 | "\n", 76 | "plt.savefig('plots/sine-memory.pdf')\n", 77 | "plt.show()" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "microflow", 84 | "language": "python", 85 | "name": "microflow" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.10.9" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 5 102 | } 103 | -------------------------------------------------------------------------------- /analysis/memory/speech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8bbab007-c69c-4eb3-8c3f-18ba2739c7f7", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Speech Memory" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "4f9d44c9-8dde-4c26-8001-93880f738447", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "eec11aa8-547f-4fed-a4c8-18eb09030a29", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "import seaborn as sns" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "9845fea7-cee7-42c2-9c59-5679b06ed89f", 38 | "metadata": {}, 39 | "source": [ 40 | "## Flash & RAM Usage" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "e894c794-bfae-4301-917a-fcfb3f140415", 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "flash = pd.DataFrame({\n", 53 | " 'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965'] * 2,\n", 54 | " 'Inference Engine': [*['TFLM'] * 4, *['MicroFlow'] * 4],\n", 55 | " 'Usage (kB)': [*[341.281, 157.464, 0, 0], *[110.416, 61.804, 73.544, 61.860]]\n", 56 | "})\n", 57 | "ram = pd.DataFrame({\n", 58 | " 'MCU': ['ESP32', 'nRF52840', 'ATSAMV71', 'LM3S6965'] * 2,\n", 59 | " 'Inference Engine': [*['TFLM'] * 4, *['MicroFlow'] * 4],\n", 60 | " 'Usage (kB)': [*[42.428, 63.728, 0, 0], *[25.488, 19.552, 21.168, 19.220]]\n", 61 | "})\n", 62 | "\n", 63 | "fig, ax = plt.subplots(2, layout='constrained')\n", 64 | "fig.get_layout_engine().set(hspace=.1)\n", 65 | "\n", 66 | "sns.barplot(flash, ax=ax[0], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.7)\n", 67 | "ax[0].set_xlabel('')\n", 68 | "ax[0].set_title('Flash')\n", 69 | "ax[0].legend()\n", 70 | "\n", 71 | "sns.barplot(ram, ax=ax[1], x='MCU', y='Usage (kB)', hue='Inference Engine', width=.7)\n", 72 | "ax[1].set_xlabel('')\n", 73 | "ax[1].set_title('RAM')\n", 74 | "ax[1].legend()\n", 75 | "\n", 76 | "plt.savefig('plots/speech-memory.pdf')\n", 77 | "plt.show()" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "microflow", 84 | "language": "python", 85 | "name": "microflow" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.10.9" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 5 102 | } 103 | -------------------------------------------------------------------------------- /analysis/performance/data/person-detect-esp32-microflow.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,5178533 3 | 2,5177907 4 | 3,5176453 5 | 4,5174880 6 | 5,5172640 7 | 6,5172253 8 | 7,5170106 9 | 8,5168120 10 | 9,5166480 11 | 10,5165466 12 | 11,5163853 13 | 12,5160934 14 | 13,5161160 15 | 14,5159854 16 | 15,5159733 17 | 16,5157733 18 | 17,5155907 19 | 18,5155494 20 | 19,5154693 21 | 20,5153920 22 | 21,5151413 23 | 22,5151827 24 | 23,5150307 25 | 24,5149520 26 | 25,5150013 27 | 26,5147667 28 | 27,5146947 29 | 28,5146200 30 | 29,5144467 31 | 30,5140654 32 | 31,5140973 33 | 32,5140506 34 | 33,5141186 35 | 34,5140614 36 | 35,5139600 37 | 36,5139053 38 | 37,5139374 39 | 38,5140106 40 | 39,5139306 41 | 40,5138960 42 | 41,5135893 43 | 42,5134374 44 | 43,5135747 45 | 44,5134946 46 | 45,5134067 47 | 46,5132560 48 | 47,5132453 49 | 48,5132826 50 | 49,5132986 51 | 50,5131760 52 | 51,5131600 53 | 52,5131000 54 | 53,5129787 55 | 54,5129840 56 | 55,5129160 57 | 56,5128773 58 | 57,5127093 59 | 58,5128200 60 | 59,5128053 61 | 60,5125747 62 | 61,5126333 63 | 62,5125614 64 | 63,5125120 65 | 64,5124787 66 | 65,5124067 67 | 66,5124027 68 | 67,5123893 69 | 68,5124254 70 | 69,5122253 71 | 70,5122640 72 | 71,5122613 73 | 72,5123480 74 | 73,5122627 75 | 74,5122480 76 | 75,5122080 77 | 76,5120307 78 | 77,5120427 79 | 78,5120427 80 | 79,5121814 81 | 80,5121026 82 | 81,5120587 83 | 82,5120293 84 | 83,5120854 85 | 84,5120760 86 | 85,5120547 87 | 86,5119747 88 | 87,5120454 89 | 88,5120013 90 | 89,5119933 91 | 90,5120374 92 | 91,5118960 93 | 92,5118240 94 | 93,5118920 95 | 94,5118426 96 | 95,5117266 97 | 96,5117813 98 | 97,5117293 99 | 98,5116360 100 | 99,5115146 101 | 100,5116454 102 | -------------------------------------------------------------------------------- /analysis/performance/data/person-detect-esp32-tflm.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,4976777 3 | 2,4973647 4 | 3,4965866 5 | 4,4987112 6 | 5,4968241 7 | 6,4961614 8 | 7,4968636 9 | 8,4980608 10 | 9,4959691 11 | 10,4956314 12 | 11,4978935 13 | 12,4989492 14 | 13,4994797 15 | 14,4994547 16 | 15,4991909 17 | 16,4971436 18 | 17,4973493 19 | 18,4980157 20 | 19,4984550 21 | 20,4994818 22 | 21,4976856 23 | 22,4979372 24 | 23,4984338 25 | 24,4975640 26 | 25,4989934 27 | 26,4972401 28 | 27,4954191 29 | 28,4982271 30 | 29,4983667 31 | 30,4984591 32 | 31,4975391 33 | 32,4990218 34 | 33,4985984 35 | 34,4969550 36 | 35,4980291 37 | 36,4969550 38 | 37,4963173 39 | 38,4973671 40 | 39,4988187 41 | 40,4970671 42 | 41,4972941 43 | 42,4958986 44 | 43,4954015 45 | 44,4958804 46 | 45,4986634 47 | 46,4975788 48 | 47,4973233 49 | 48,4976888 50 | 49,4962191 51 | 50,4965803 52 | 51,4980030 53 | 52,4984812 54 | 53,4996485 55 | 54,4963287 56 | 55,4976826 57 | 56,4979057 58 | 57,4978546 59 | 58,4965346 60 | 59,4964471 61 | 60,4972621 62 | 61,4966431 63 | 62,4972952 64 | 63,4976290 65 | 64,4984216 66 | 65,4976033 67 | 66,4965671 68 | 67,4969560 69 | 68,4968895 70 | 69,4988962 71 | 70,4977032 72 | 71,4990184 73 | 72,4976633 74 | 73,4980054 75 | 74,4993730 76 | 75,4967757 77 | 76,4978817 78 | 77,4978906 79 | 78,4974667 80 | 79,4970640 81 | 80,4993393 82 | 81,4979067 83 | 82,4975820 84 | 83,4957030 85 | 84,4963796 86 | 85,4983313 87 | 86,4976169 88 | 87,4980770 89 | 88,4972035 90 | 89,4958763 91 | 90,4974336 92 | 91,4977621 93 | 92,4990391 94 | 93,4977313 95 | 94,4984372 96 | 95,4972481 97 | 96,4990489 98 | 97,4978620 99 | 98,4982835 100 | 99,4975165 101 | 100,4971415 102 | -------------------------------------------------------------------------------- /analysis/performance/data/person-detect-nrf52840-microflow.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,896319 3 | 2,895625 4 | 3,895902 5 | 4,895890 6 | 5,895083 7 | 6,897362 8 | 7,896600 9 | 8,897278 10 | 9,895444 11 | 10,895598 12 | 11,896848 13 | 12,896566 14 | 13,895844 15 | 14,895854 16 | 15,896199 17 | 16,896513 18 | 17,896911 19 | 18,896065 20 | 19,895692 21 | 20,895944 22 | 21,895725 23 | 22,896710 24 | 23,895216 25 | 24,896941 26 | 25,896979 27 | 26,894942 28 | 27,897510 29 | 28,894557 30 | 29,896651 31 | 30,896589 32 | 31,896749 33 | 32,896783 34 | 33,895892 35 | 34,897367 36 | 35,896075 37 | 36,897546 38 | 37,895796 39 | 38,897728 40 | 39,895848 41 | 40,895738 42 | 41,895788 43 | 42,896554 44 | 43,896515 45 | 44,895985 46 | 45,896173 47 | 46,895393 48 | 47,896877 49 | 48,895902 50 | 49,895952 51 | 50,896920 52 | 51,894377 53 | 52,897090 54 | 53,894996 55 | 54,895008 56 | 55,894612 57 | 56,897870 58 | 57,895078 59 | 58,895686 60 | 59,896116 61 | 60,897262 62 | 61,897086 63 | 62,893840 64 | 63,896336 65 | 64,895806 66 | 65,896329 67 | 66,896699 68 | 67,897456 69 | 68,896990 70 | 69,895907 71 | 70,895511 72 | 71,896889 73 | 72,896823 74 | 73,895526 75 | 74,895633 76 | 75,897727 77 | 76,898178 78 | 77,899029 79 | 78,895875 80 | 79,897048 81 | 80,893451 82 | 81,896058 83 | 82,896576 84 | 83,895714 85 | 84,893485 86 | 85,896823 87 | 86,895600 88 | 87,896603 89 | 88,895611 90 | 89,894846 91 | 90,896740 92 | 91,897314 93 | 92,896534 94 | 93,896458 95 | 94,896443 96 | 95,898251 97 | 96,894487 98 | 97,896952 99 | 98,895222 100 | 99,896614 101 | 100,896130 102 | -------------------------------------------------------------------------------- /analysis/performance/data/person-detect-nrf52840-tflm.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,839260 3 | 2,839150 4 | 3,839116 5 | 4,838858 6 | 5,838490 7 | 6,838269 8 | 7,837915 9 | 8,837975 10 | 9,838559 11 | 10,838753 12 | 11,839012 13 | 12,839235 14 | 13,839372 15 | 14,839504 16 | 15,839573 17 | 16,839643 18 | 17,839888 19 | 18,839914 20 | 19,839926 21 | 20,839831 22 | 21,839763 23 | 22,839576 24 | 23,839566 25 | 24,839524 26 | 25,839448 27 | 26,839265 28 | 27,839001 29 | 28,838760 30 | 29,838541 31 | 30,838255 32 | 31,837788 33 | 32,838250 34 | 33,838699 35 | 34,838911 36 | 35,839053 37 | 36,839275 38 | 37,839434 39 | 38,839565 40 | 39,839710 41 | 40,839663 42 | 41,839861 43 | 42,839973 44 | 43,839915 45 | 44,839825 46 | 45,839758 47 | 46,839644 48 | 47,839544 49 | 48,839475 50 | 49,839289 51 | 50,839119 52 | 51,838933 53 | 52,838548 54 | 53,838429 55 | 54,838088 56 | 55,837881 57 | 56,838520 58 | 57,838765 59 | 58,838970 60 | 59,839240 61 | 60,839425 62 | 61,839571 63 | 62,839636 64 | 63,839726 65 | 64,839753 66 | 65,839866 67 | 66,839824 68 | 67,839918 69 | 68,839830 70 | 69,839778 71 | 70,839605 72 | 71,839568 73 | 72,839419 74 | 73,839246 75 | 74,839007 76 | 75,838733 77 | 76,838530 78 | 77,838347 79 | 78,837837 80 | 79,838169 81 | 80,838623 82 | 81,838802 83 | 82,839072 84 | 83,839286 85 | 84,839409 86 | 85,839596 87 | 86,839651 88 | 87,839726 89 | 88,839711 90 | 89,839831 91 | 90,839934 92 | 91,839842 93 | 92,839777 94 | 93,839742 95 | 94,839567 96 | 95,839473 97 | 96,839394 98 | 97,839058 99 | 98,838864 100 | 99,838669 101 | 100,838426 102 | -------------------------------------------------------------------------------- /analysis/performance/data/sine-esp32-microflow.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,269 3 | 2,106 4 | 3,154 5 | 4,137 6 | 5,154 7 | 6,86 8 | 7,143 9 | 8,112 10 | 9,115 11 | 10,141 12 | 11,109 13 | 12,139 14 | 13,123 15 | 14,118 16 | 15,112 17 | 16,119 18 | 17,125 19 | 18,126 20 | 19,151 21 | 20,145 22 | 21,113 23 | 22,127 24 | 23,139 25 | 24,128 26 | 25,95 27 | 26,129 28 | 27,107 29 | 28,173 30 | 29,103 31 | 30,166 32 | 31,133 33 | 32,104 34 | 33,140 35 | 34,131 36 | 35,86 37 | 36,133 38 | 37,109 39 | 38,97 40 | 39,140 41 | 40,164 42 | 41,144 43 | 42,117 44 | 43,136 45 | 44,143 46 | 45,113 47 | 46,145 48 | 47,178 49 | 48,148 50 | 49,191 51 | 50,119 52 | 51,137 53 | 52,102 54 | 53,116 55 | 54,123 56 | 55,136 57 | 56,132 58 | 57,145 59 | 58,160 60 | 59,180 61 | 60,193 62 | 61,172 63 | 62,151 64 | 63,151 65 | 64,144 66 | 65,171 67 | 66,153 68 | 67,120 69 | 68,145 70 | 69,156 71 | 70,148 72 | 71,137 73 | 72,115 74 | 73,152 75 | 74,125 76 | 75,132 77 | 76,160 78 | 77,122 79 | 78,147 80 | 79,155 81 | 80,140 82 | 81,145 83 | 82,176 84 | 83,131 85 | 84,123 86 | 85,117 87 | 86,141 88 | 87,113 89 | 88,165 90 | 89,111 91 | 90,117 92 | 91,104 93 | 92,134 94 | 93,142 95 | 94,154 96 | 95,162 97 | 96,142 98 | 97,167 99 | 98,152 100 | 99,185 101 | 100,168 102 | -------------------------------------------------------------------------------- /analysis/performance/data/sine-esp32-tflm.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,1895 3 | 2,1783 4 | 3,1771 5 | 4,1772 6 | 5,1773 7 | 6,1777 8 | 7,1765 9 | 8,1760 10 | 9,1778 11 | 10,1790 12 | 11,1770 13 | 12,1763 14 | 13,1811 15 | 14,1827 16 | 15,1803 17 | 16,1785 18 | 17,1791 19 | 18,1769 20 | 19,1758 21 | 20,1774 22 | 21,1806 23 | 22,1764 24 | 23,1786 25 | 24,1782 26 | 25,1797 27 | 26,1794 28 | 27,1795 29 | 28,1802 30 | 29,1783 31 | 30,1806 32 | 31,1801 33 | 32,1791 34 | 33,1787 35 | 34,1779 36 | 35,1802 37 | 36,1827 38 | 37,1778 39 | 38,1735 40 | 39,1827 41 | 40,1775 42 | 41,1806 43 | 42,1780 44 | 43,1784 45 | 44,1811 46 | 45,1801 47 | 46,1783 48 | 47,1774 49 | 48,1782 50 | 49,1790 51 | 50,1775 52 | 51,1796 53 | 52,1805 54 | 53,1796 55 | 54,1770 56 | 55,1795 57 | 56,1806 58 | 57,1803 59 | 58,1806 60 | 59,1806 61 | 60,1804 62 | 61,1779 63 | 62,1797 64 | 63,1769 65 | 64,1800 66 | 65,1795 67 | 66,1765 68 | 67,1773 69 | 68,1806 70 | 69,1808 71 | 70,1810 72 | 71,1799 73 | 72,1814 74 | 73,1797 75 | 74,1780 76 | 75,1792 77 | 76,1785 78 | 77,1775 79 | 78,1783 80 | 79,1815 81 | 80,1759 82 | 81,1784 83 | 82,1789 84 | 83,1781 85 | 84,1771 86 | 85,1799 87 | 86,1814 88 | 87,1793 89 | 88,1783 90 | 89,1756 91 | 90,1809 92 | 91,1805 93 | 92,1779 94 | 93,1768 95 | 94,1771 96 | 95,1828 97 | 96,1786 98 | 97,1764 99 | 98,1776 100 | 99,1827 101 | 100,1776 102 | -------------------------------------------------------------------------------- /analysis/performance/data/sine-nrf52840-microflow.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,186 3 | 2,189 4 | 3,218 5 | 4,220 6 | 5,209 7 | 6,184 8 | 7,182 9 | 8,210 10 | 9,218 11 | 10,203 12 | 11,186 13 | 12,188 14 | 13,183 15 | 14,190 16 | 15,196 17 | 16,181 18 | 17,176 19 | 18,177 20 | 19,192 21 | 20,179 22 | 21,184 23 | 22,184 24 | 23,183 25 | 24,214 26 | 25,220 27 | 26,217 28 | 27,219 29 | 28,218 30 | 29,206 31 | 30,205 32 | 31,208 33 | 32,207 34 | 33,180 35 | 34,181 36 | 35,181 37 | 36,189 38 | 37,187 39 | 38,180 40 | 39,177 41 | 40,182 42 | 41,175 43 | 42,179 44 | 43,184 45 | 44,187 46 | 45,188 47 | 46,212 48 | 47,211 49 | 48,210 50 | 49,209 51 | 50,210 52 | 51,201 53 | 52,214 54 | 53,211 55 | 54,212 56 | 55,212 57 | 56,179 58 | 57,185 59 | 58,177 60 | 59,182 61 | 60,175 62 | 61,174 63 | 62,183 64 | 63,181 65 | 64,189 66 | 65,173 67 | 66,172 68 | 67,178 69 | 68,180 70 | 69,207 71 | 70,211 72 | 71,209 73 | 72,217 74 | 73,220 75 | 74,210 76 | 75,219 77 | 76,215 78 | 77,210 79 | 78,188 80 | 79,176 81 | 80,178 82 | 81,185 83 | 82,180 84 | 83,185 85 | 84,171 86 | 85,176 87 | 86,191 88 | 87,171 89 | 88,181 90 | 89,180 91 | 90,183 92 | 91,219 93 | 92,218 94 | 93,215 95 | 94,208 96 | 95,213 97 | 96,213 98 | 97,215 99 | 98,210 100 | 99,218 101 | 100,175 102 | -------------------------------------------------------------------------------- /analysis/performance/data/sine-nrf52840-tflm.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,2638 3 | 2,2621 4 | 3,2620 5 | 4,2592 6 | 5,2593 7 | 6,2593 8 | 7,2592 9 | 8,2593 10 | 9,2593 11 | 10,2593 12 | 11,2594 13 | 12,2594 14 | 13,2594 15 | 14,2595 16 | 15,2594 17 | 16,2612 18 | 17,2639 19 | 18,2640 20 | 19,2594 21 | 20,2595 22 | 21,2596 23 | 22,2595 24 | 23,2595 25 | 24,2595 26 | 25,2612 27 | 26,2640 28 | 27,2640 29 | 28,2594 30 | 29,2594 31 | 30,2596 32 | 31,2595 33 | 32,2595 34 | 33,2595 35 | 34,2613 36 | 35,2636 37 | 36,2641 38 | 37,2622 39 | 38,2594 40 | 39,2597 41 | 40,2596 42 | 41,2596 43 | 42,2596 44 | 43,2614 45 | 44,2640 46 | 45,2641 47 | 46,2622 48 | 47,2595 49 | 48,2597 50 | 49,2596 51 | 50,2596 52 | 51,2596 53 | 52,2613 54 | 53,2641 55 | 54,2641 56 | 55,2623 57 | 56,2595 58 | 57,2597 59 | 58,2595 60 | 59,2597 61 | 60,2596 62 | 61,2613 63 | 62,2641 64 | 63,2641 65 | 64,2622 66 | 65,2595 67 | 66,2597 68 | 67,2595 69 | 68,2596 70 | 69,2595 71 | 70,2612 72 | 71,2641 73 | 72,2640 74 | 73,2595 75 | 74,2594 76 | 75,2597 77 | 76,2595 78 | 77,2597 79 | 78,2596 80 | 79,2614 81 | 80,2640 82 | 81,2640 83 | 82,2623 84 | 83,2596 85 | 84,2596 86 | 85,2596 87 | 86,2597 88 | 87,2596 89 | 88,2613 90 | 89,2641 91 | 90,2641 92 | 91,2595 93 | 92,2595 94 | 93,2596 95 | 94,2595 96 | 95,2597 97 | 96,2596 98 | 97,2614 99 | 98,2641 100 | 99,2640 101 | 100,2595 102 | -------------------------------------------------------------------------------- /analysis/performance/data/speech-esp32-microflow.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,190920 3 | 2,190080 4 | 3,190013 5 | 4,189960 6 | 5,189920 7 | 6,189880 8 | 7,189773 9 | 8,189747 10 | 9,189654 11 | 10,189653 12 | 11,189653 13 | 12,189680 14 | 13,189613 15 | 14,189666 16 | 15,189600 17 | 16,189627 18 | 17,189573 19 | 18,189627 20 | 19,189614 21 | 20,189627 22 | 21,189614 23 | 22,189600 24 | 23,189533 25 | 24,189534 26 | 25,189547 27 | 26,189586 28 | 27,189533 29 | 28,189573 30 | 29,189533 31 | 30,189506 32 | 31,189480 33 | 32,189506 34 | 33,189480 35 | 34,189480 36 | 35,189453 37 | 36,189413 38 | 37,189373 39 | 38,189413 40 | 39,189293 41 | 40,189400 42 | 41,189400 43 | 42,189440 44 | 43,189373 45 | 44,189386 46 | 45,189387 47 | 46,189360 48 | 47,189320 49 | 48,189333 50 | 49,189306 51 | 50,189360 52 | 51,189347 53 | 52,189427 54 | 53,189360 55 | 54,189414 56 | 55,189333 57 | 56,189333 58 | 57,189293 59 | 58,189226 60 | 59,189187 61 | 60,189267 62 | 61,189187 63 | 62,189280 64 | 63,189266 65 | 64,189333 66 | 65,189293 67 | 66,189254 68 | 67,189267 69 | 68,189280 70 | 69,189267 71 | 70,189280 72 | 71,189266 73 | 72,189266 74 | 73,189266 75 | 74,189306 76 | 75,189253 77 | 76,189253 78 | 77,189093 79 | 78,189174 80 | 79,189134 81 | 80,189240 82 | 81,189147 83 | 82,189147 84 | 83,189147 85 | 84,189160 86 | 85,189120 87 | 86,189173 88 | 87,189160 89 | 88,189240 90 | 89,189173 91 | 90,189187 92 | 91,189160 93 | 92,189214 94 | 93,189160 95 | 94,189174 96 | 95,189107 97 | 96,189106 98 | 97,189080 99 | 98,189107 100 | 99,189053 101 | 100,189160 102 | -------------------------------------------------------------------------------- /analysis/performance/data/speech-esp32-tflm.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,207618 3 | 2,207398 4 | 3,207403 5 | 4,207397 6 | 5,207436 7 | 6,207426 8 | 7,207456 9 | 8,207452 10 | 9,207439 11 | 10,207423 12 | 11,207430 13 | 12,207427 14 | 13,207469 15 | 14,207449 16 | 15,207439 17 | 16,207454 18 | 17,207454 19 | 18,207431 20 | 19,207463 21 | 20,207400 22 | 21,207410 23 | 22,207433 24 | 23,207428 25 | 24,207434 26 | 25,207499 27 | 26,207430 28 | 27,207448 29 | 28,207405 30 | 29,207405 31 | 30,207425 32 | 31,207433 33 | 32,207404 34 | 33,207457 35 | 34,207395 36 | 35,207439 37 | 36,207422 38 | 37,207452 39 | 38,207394 40 | 39,207434 41 | 40,207426 42 | 41,207406 43 | 42,207424 44 | 43,207438 45 | 44,207404 46 | 45,207465 47 | 46,207440 48 | 47,207433 49 | 48,207411 50 | 49,207464 51 | 50,207400 52 | 51,207406 53 | 52,207407 54 | 53,207450 55 | 54,207404 56 | 55,207454 57 | 56,207393 58 | 57,207447 59 | 58,207464 60 | 59,207438 61 | 60,207398 62 | 61,207434 63 | 62,207399 64 | 63,207441 65 | 64,207457 66 | 65,207395 67 | 66,207410 68 | 67,207396 69 | 68,207397 70 | 69,207452 71 | 70,207449 72 | 71,207434 73 | 72,207446 74 | 73,207446 75 | 74,207369 76 | 75,207428 77 | 76,207446 78 | 77,207435 79 | 78,207420 80 | 79,207433 81 | 80,207408 82 | 81,207423 83 | 82,207395 84 | 83,207470 85 | 84,207427 86 | 85,207434 87 | 86,207439 88 | 87,207465 89 | 88,207398 90 | 89,207424 91 | 90,207420 92 | 91,207441 93 | 92,207433 94 | 93,207444 95 | 94,207398 96 | 95,207472 97 | 96,207418 98 | 97,207448 99 | 98,207414 100 | 99,207436 101 | 100,207414 102 | -------------------------------------------------------------------------------- /analysis/performance/data/speech-nrf52840-microflow.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,50598 3 | 2,50586 4 | 3,50604 5 | 4,50585 6 | 5,50609 7 | 6,50607 8 | 7,50604 9 | 8,50583 10 | 9,50581 11 | 10,50604 12 | 11,50603 13 | 12,50586 14 | 13,50584 15 | 14,50603 16 | 15,50602 17 | 16,50603 18 | 17,50596 19 | 18,50602 20 | 19,50603 21 | 20,50597 22 | 21,50585 23 | 22,50586 24 | 23,50591 25 | 24,50598 26 | 25,50586 27 | 26,50589 28 | 27,50608 29 | 28,50596 30 | 29,50602 31 | 30,50622 32 | 31,50605 33 | 32,50604 34 | 33,50585 35 | 34,50602 36 | 35,50601 37 | 36,50607 38 | 37,50589 39 | 38,50586 40 | 39,50592 41 | 40,50593 42 | 41,50604 43 | 42,50598 44 | 43,50597 45 | 44,50591 46 | 45,50591 47 | 46,50593 48 | 47,50598 49 | 48,50580 50 | 49,50597 51 | 50,50604 52 | 51,50581 53 | 52,50633 54 | 53,50581 55 | 54,50596 56 | 55,50609 57 | 56,50602 58 | 57,50607 59 | 58,50598 60 | 59,50592 61 | 60,50595 62 | 61,50605 63 | 62,50591 64 | 63,50589 65 | 64,50603 66 | 65,50587 67 | 66,50597 68 | 67,50585 69 | 68,50592 70 | 69,50597 71 | 70,50586 72 | 71,50584 73 | 72,50604 74 | 73,50598 75 | 74,50626 76 | 75,50592 77 | 76,50602 78 | 77,50586 79 | 78,50601 80 | 79,50598 81 | 80,50584 82 | 81,50595 83 | 82,50586 84 | 83,50581 85 | 84,50601 86 | 85,50592 87 | 86,50604 88 | 87,50609 89 | 88,50589 90 | 89,50595 91 | 90,50590 92 | 91,50587 93 | 92,50580 94 | 93,50586 95 | 94,50608 96 | 95,50604 97 | 96,50615 98 | 97,50598 99 | 98,50580 100 | 99,50598 101 | 100,50609 102 | -------------------------------------------------------------------------------- /analysis/performance/data/speech-nrf52840-tflm.csv: -------------------------------------------------------------------------------- 1 | iter,time 2 | 1,59173 3 | 2,59186 4 | 3,59266 5 | 4,59229 6 | 5,59185 7 | 6,59234 8 | 7,59232 9 | 8,59244 10 | 9,59285 11 | 10,59240 12 | 11,59279 13 | 12,59242 14 | 13,59264 15 | 14,59256 16 | 15,59262 17 | 16,59300 18 | 17,59289 19 | 18,59286 20 | 19,59271 21 | 20,59291 22 | 21,59283 23 | 22,59274 24 | 23,59292 25 | 24,59245 26 | 25,59251 27 | 26,59245 28 | 27,59287 29 | 28,59294 30 | 29,59283 31 | 30,59243 32 | 31,59246 33 | 32,59245 34 | 33,59275 35 | 34,59244 36 | 35,59245 37 | 36,59270 38 | 37,59231 39 | 38,59243 40 | 39,59255 41 | 40,59276 42 | 41,59250 43 | 42,59267 44 | 43,59272 45 | 44,59252 46 | 45,59287 47 | 46,59256 48 | 47,59280 49 | 48,59277 50 | 49,59242 51 | 50,59241 52 | 51,59277 53 | 52,59242 54 | 53,59239 55 | 54,59227 56 | 55,59224 57 | 56,59294 58 | 57,59224 59 | 58,59248 60 | 59,59253 61 | 60,59235 62 | 61,59232 63 | 62,59246 64 | 63,59272 65 | 64,59247 66 | 65,59261 67 | 66,59243 68 | 67,59264 69 | 68,59255 70 | 69,59251 71 | 70,59237 72 | 71,59243 73 | 72,59240 74 | 73,59256 75 | 74,59240 76 | 75,59240 77 | 76,59236 78 | 77,59275 79 | 78,59261 80 | 79,59227 81 | 80,59259 82 | 81,59274 83 | 82,59327 84 | 83,59256 85 | 84,59234 86 | 85,59238 87 | 86,59284 88 | 87,59236 89 | 88,59239 90 | 89,59317 91 | 90,59272 92 | 91,59232 93 | 92,59254 94 | 93,59211 95 | 94,59229 96 | 95,59267 97 | 96,59226 98 | 97,59277 99 | 98,59293 100 | 99,59232 101 | 100,59289 102 | -------------------------------------------------------------------------------- /analysis/performance/person_detect.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6e987910-eeb0-46fe-9650-e962701f51a6", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Person Detect Performance" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "812a1605-f1c6-4e55-a19d-e334f708d4c2", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "a0250ec6-017e-4f68-bd88-b5b02dc1e16c", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "from matplotlib.ticker import FormatStrFormatter\n", 33 | "import seaborn as sns" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "14b66780-b555-48fb-82cb-0f4b26ac68d2", 39 | "metadata": {}, 40 | "source": [ 41 | "## ESP32" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "afc17494-4359-4500-a4b6-22da30757064", 48 | "metadata": { 49 | "tags": [] 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "tflm_esp32 = pd.read_csv('data/person-detect-esp32-tflm.csv')\n", 54 | "microflow_esp32 = pd.read_csv('data/person-detect-esp32-microflow.csv')\n", 55 | "tflm_esp32.time /= 1000\n", 56 | "microflow_esp32.time /= 1000\n", 57 | "\n", 58 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 59 | "fig.suptitle(\"Person Detector Model on the ESP32\")\n", 60 | "top_limit = 35\n", 61 | "\n", 62 | "sns.histplot(tflm_esp32.time, ax=ax[0])\n", 63 | "start, end = ax[0].get_xlim()\n", 64 | "median = tflm_esp32.time.median()\n", 65 | "ax[0].axvline(median, color='red', label='Median')\n", 66 | "ax[0].set_xlabel('Execution Time (ms)')\n", 67 | "ax[0].set_xticks((start, median, end))\n", 68 | "ax[0].set_ylim(top=top_limit)\n", 69 | "ax[0].set_title('TFLM')\n", 70 | "ax[0].legend()\n", 71 | "\n", 72 | "sns.histplot(microflow_esp32.time, ax=ax[1])\n", 73 | "start, end = ax[1].get_xlim()\n", 74 | "median = microflow_esp32.time.median()\n", 75 | "ax[1].axvline(median, color='red', label='Median')\n", 76 | "ax[1].set_xlabel('Execution Time (ms)')\n", 77 | "ax[1].set_xticks((start, median, end))\n", 78 | "ax[1].set_ylim(top=top_limit)\n", 79 | "ax[1].set_title('MicroFlow')\n", 80 | "ax[1].legend()\n", 81 | "\n", 82 | "plt.savefig('plots/person-detect-esp32-performance.pdf')\n", 83 | "plt.show()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "0dafe385-ab7b-4488-9eb3-b23202c4787b", 89 | "metadata": {}, 90 | "source": [ 91 | "## nRF52840" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "id": "58db7c12-52e2-4a71-b22c-78e46c88d8ab", 98 | "metadata": { 99 | "tags": [] 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "tflm_nrf52840 = pd.read_csv('data/person-detect-nrf52840-tflm.csv')\n", 104 | "microflow_nrf52840 = pd.read_csv('data/person-detect-nrf52840-microflow.csv')\n", 105 | "tflm_nrf52840.time /= 1000\n", 106 | "microflow_nrf52840.time /= 1000\n", 107 | "\n", 108 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 109 | "fig.suptitle(\"Person Detector Model on the nRF52840\")\n", 110 | "top_limit = 30\n", 111 | "\n", 112 | "sns.histplot(tflm_nrf52840.time, ax=ax[0])\n", 113 | "start, end = ax[0].get_xlim()\n", 114 | "median = tflm_nrf52840.time.median()\n", 115 | "ax[0].axvline(median, color='red', label='Median')\n", 116 | "ax[0].set_xlabel('Execution Time (ms)')\n", 117 | "ax[0].set_xticks((start, median, end))\n", 118 | "ax[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n", 119 | "ax[0].set_ylim(top=top_limit)\n", 120 | "ax[0].set_title('TFLM')\n", 121 | "ax[0].legend(loc='upper left')\n", 122 | "\n", 123 | "sns.histplot(microflow_nrf52840.time, ax=ax[1])\n", 124 | "start, end = ax[1].get_xlim()\n", 125 | "median = microflow_nrf52840.time.median()\n", 126 | "ax[1].axvline(median, color='red', label='Median')\n", 127 | "ax[1].set_xlabel('Execution Time (ms)')\n", 128 | "ax[1].set_xticks((start, median, end))\n", 129 | "ax[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n", 130 | "ax[1].set_ylim(top=top_limit)\n", 131 | "ax[1].set_title('MicroFlow')\n", 132 | "ax[1].legend()\n", 133 | "\n", 134 | "plt.savefig('plots/person-detect-nrf52840-performance.pdf')\n", 135 | "plt.show()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "id": "9c3958ca-4c89-483b-be63-200453c9b0f0", 141 | "metadata": {}, 142 | "source": [ 143 | "## Comparison" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "id": "5eb9b8ed-7018-4031-9011-7aa4ceb05822", 150 | "metadata": { 151 | "tags": [] 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "data = pd.DataFrame({\n", 156 | " 'MCU': [*['ESP32'] * 200, *['nRF52840'] * 200],\n", 157 | " 'Inference Engine': [*['TFLM'] * 100, *['MicroFlow'] * 100] * 2,\n", 158 | " 'Execution Time (ms)': [*tflm_esp32.time, *microflow_esp32.time, *tflm_nrf52840.time, *microflow_nrf52840.time]\n", 159 | "})\n", 160 | "\n", 161 | "plt.figure(figsize=(8, 3), layout='constrained')\n", 162 | "sns.barplot(\n", 163 | " data=data,\n", 164 | " x='Execution Time (ms)',\n", 165 | " y='MCU',\n", 166 | " hue='Inference Engine',\n", 167 | " estimator='median',\n", 168 | " errorbar='pi',\n", 169 | " capsize=.1,\n", 170 | " errwidth=2,\n", 171 | " width=.7\n", 172 | ")\n", 173 | "plt.ylabel('')\n", 174 | "plt.legend()\n", 175 | "plt.savefig('plots/person-detect-performance-comparison.pdf')\n", 176 | "plt.show()" 177 | ] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "microflow", 183 | "language": "python", 184 | "name": "microflow" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.10.9" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 5 201 | } 202 | -------------------------------------------------------------------------------- /analysis/performance/plots/person-detect-esp32-performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/person-detect-esp32-performance.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/person-detect-nrf52840-performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/person-detect-nrf52840-performance.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/person-detect-performance-comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/person-detect-performance-comparison.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/sine-esp32-performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/sine-esp32-performance.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/sine-nRF52840-performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/sine-nRF52840-performance.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/sine-performance-comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/sine-performance-comparison.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/speech-esp32-performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/speech-esp32-performance.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/speech-nrf52840-performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/speech-nrf52840-performance.pdf -------------------------------------------------------------------------------- /analysis/performance/plots/speech-performance-comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/analysis/performance/plots/speech-performance-comparison.pdf -------------------------------------------------------------------------------- /analysis/performance/sine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6e987910-eeb0-46fe-9650-e962701f51a6", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Sine Performance" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "812a1605-f1c6-4e55-a19d-e334f708d4c2", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "a0250ec6-017e-4f68-bd88-b5b02dc1e16c", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "import seaborn as sns" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "b12f59e8-8cca-453d-b933-7563ec316ea5", 38 | "metadata": {}, 39 | "source": [ 40 | "## ESP32" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "afc17494-4359-4500-a4b6-22da30757064", 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "tflm_esp32 = pd.read_csv('data/sine-esp32-tflm.csv')\n", 53 | "microflow_esp32 = pd.read_csv('data/sine-esp32-microflow.csv')\n", 54 | "\n", 55 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 56 | "fig.suptitle(\"Sine Predictor Model on the ESP32\")\n", 57 | "top_limit = 30\n", 58 | "\n", 59 | "sns.histplot(tflm_esp32.time, ax=ax[0])\n", 60 | "start, end = ax[0].get_xlim()\n", 61 | "median = tflm_esp32.time.median()\n", 62 | "ax[0].axvline(median, color='red', label='Median')\n", 63 | "ax[0].set_xlabel('Execution Time (µs)')\n", 64 | "ax[0].set_xticks((start, median, end))\n", 65 | "ax[0].set_ylim(top=top_limit)\n", 66 | "ax[0].set_title('TFLM')\n", 67 | "ax[0].legend()\n", 68 | "\n", 69 | "sns.histplot(microflow_esp32.time, ax=ax[1])\n", 70 | "start, end = ax[1].get_xlim()\n", 71 | "median = microflow_esp32.time.median()\n", 72 | "ax[1].axvline(median, color='red', label='Median')\n", 73 | "ax[1].set_xlabel('Execution Time (µs)')\n", 74 | "ax[1].set_xticks((start, median, end))\n", 75 | "ax[1].set_ylim(top=top_limit)\n", 76 | "ax[1].set_title('MicroFlow')\n", 77 | "ax[1].legend()\n", 78 | "\n", 79 | "plt.savefig('plots/sine-esp32-performance.pdf')\n", 80 | "plt.show()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "id": "0dafe385-ab7b-4488-9eb3-b23202c4787b", 86 | "metadata": {}, 87 | "source": [ 88 | "## nRF52840" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "58db7c12-52e2-4a71-b22c-78e46c88d8ab", 95 | "metadata": { 96 | "tags": [] 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "tflm_nrf52840 = pd.read_csv('data/sine-nrf52840-tflm.csv')\n", 101 | "microflow_nrf52840 = pd.read_csv('data/sine-nrf52840-microflow.csv')\n", 102 | "\n", 103 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 104 | "fig.suptitle(\"Sine Predictor Model on the nRF52840\")\n", 105 | "top_limit = 70\n", 106 | "\n", 107 | "sns.histplot(tflm_nrf52840.time, ax=ax[0])\n", 108 | "start, end = ax[0].get_xlim()\n", 109 | "median = tflm_nrf52840.time.median()\n", 110 | "ax[0].axvline(median, color='red', label='Median')\n", 111 | "ax[0].set_xlabel('Execution Time (µs)')\n", 112 | "ax[0].set_xticks((start-10, median, end))\n", 113 | "ax[0].set_ylim(top=top_limit)\n", 114 | "ax[0].set_title('TFLM')\n", 115 | "ax[0].legend()\n", 116 | "\n", 117 | "sns.histplot(microflow_nrf52840.time, ax=ax[1])\n", 118 | "start, end = ax[1].get_xlim()\n", 119 | "median = microflow_nrf52840.time.median()\n", 120 | "ax[1].axvline(median, color='red', label='Median')\n", 121 | "ax[1].set_xlabel('Execution Time (µs)')\n", 122 | "ax[1].set_xticks((start, median, end))\n", 123 | "ax[1].set_ylim(top=top_limit)\n", 124 | "ax[1].set_title('MicroFlow')\n", 125 | "ax[1].legend()\n", 126 | "\n", 127 | "plt.savefig('plots/sine-nrf52840-performance.pdf')\n", 128 | "plt.show()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "aa69dde0-822f-41f4-92a4-90dfcdb19453", 134 | "metadata": {}, 135 | "source": [ 136 | "## Comparison" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "id": "96a49dd9-e2c1-4970-8212-7485afd6bd49", 143 | "metadata": { 144 | "tags": [] 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "data = pd.DataFrame({\n", 149 | " 'MCU': [*['ESP32'] * 200, *['nRF52840'] * 200],\n", 150 | " 'Inference Engine': [*['TFLM'] * 100, *['MicroFlow'] * 100] * 2,\n", 151 | " 'Execution Time (µs)': [*tflm_esp32.time, *microflow_esp32.time, *tflm_nrf52840.time, *microflow_nrf52840.time]\n", 152 | "})\n", 153 | "\n", 154 | "plt.figure(figsize=(8, 3), layout='constrained')\n", 155 | "sns.barplot(\n", 156 | " data=data,\n", 157 | " x='Execution Time (µs)',\n", 158 | " y='MCU',\n", 159 | " hue='Inference Engine',\n", 160 | " estimator='median',\n", 161 | " errorbar='pi',\n", 162 | " capsize=.1,\n", 163 | " errwidth=2,\n", 164 | " width=.7\n", 165 | ")\n", 166 | "plt.ylabel('')\n", 167 | "plt.legend()\n", 168 | "plt.savefig('plots/sine-performance-comparison.pdf')\n", 169 | "plt.show()" 170 | ] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "microflow", 176 | "language": "python", 177 | "name": "microflow" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.10.9" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 5 194 | } 195 | -------------------------------------------------------------------------------- /analysis/performance/speech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6e987910-eeb0-46fe-9650-e962701f51a6", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Speech Performance" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "812a1605-f1c6-4e55-a19d-e334f708d4c2", 16 | "metadata": {}, 17 | "source": [ 18 | "## Setup" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "a0250ec6-017e-4f68-bd88-b5b02dc1e16c", 25 | "metadata": { 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "from matplotlib.ticker import FormatStrFormatter\n", 33 | "import seaborn as sns" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "14b66780-b555-48fb-82cb-0f4b26ac68d2", 39 | "metadata": {}, 40 | "source": [ 41 | "## ESP32" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "afc17494-4359-4500-a4b6-22da30757064", 48 | "metadata": { 49 | "tags": [] 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "tflm_esp32 = pd.read_csv('data/speech-esp32-tflm.csv')\n", 54 | "microflow_esp32 = pd.read_csv('data/speech-esp32-microflow.csv')\n", 55 | "tflm_esp32.time /= 1000\n", 56 | "microflow_esp32.time /= 1000\n", 57 | "\n", 58 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 59 | "fig.suptitle(\"Speech Command Recognizer Model on the ESP32\")\n", 60 | "top_limit = 30\n", 61 | "\n", 62 | "sns.histplot(tflm_esp32.time, ax=ax[0])\n", 63 | "start, end = ax[0].get_xlim()\n", 64 | "median = tflm_esp32.time.median()\n", 65 | "ax[0].axvline(median, color='red', label='Median')\n", 66 | "ax[0].set_xlabel('Execution Time (ms)')\n", 67 | "ax[0].set_xticks((start, median, end))\n", 68 | "ax[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n", 69 | "ax[0].set_ylim(top=top_limit)\n", 70 | "ax[0].set_title('TFLM')\n", 71 | "ax[0].legend()\n", 72 | "\n", 73 | "sns.histplot(microflow_esp32.time, ax=ax[1])\n", 74 | "start, end = ax[1].get_xlim()\n", 75 | "median = microflow_esp32.time.median()\n", 76 | "ax[1].axvline(median, color='red', label='Median')\n", 77 | "ax[1].set_xlabel('Execution Time (ms)')\n", 78 | "ax[1].set_xticks((start, median, end))\n", 79 | "ax[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n", 80 | "ax[1].set_ylim(top=top_limit)\n", 81 | "ax[1].set_title('MicroFlow')\n", 82 | "ax[1].legend()\n", 83 | "\n", 84 | "plt.savefig('plots/speech-esp32-performance.pdf')\n", 85 | "plt.show()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "0dafe385-ab7b-4488-9eb3-b23202c4787b", 91 | "metadata": {}, 92 | "source": [ 93 | "## nRF52840" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "58db7c12-52e2-4a71-b22c-78e46c88d8ab", 100 | "metadata": { 101 | "tags": [] 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "tflm_nrf52840 = pd.read_csv('data/speech-nrf52840-tflm.csv')\n", 106 | "microflow_nrf52840 = pd.read_csv('data/speech-nrf52840-microflow.csv')\n", 107 | "tflm_nrf52840.time /= 1000\n", 108 | "microflow_nrf52840.time /= 1000\n", 109 | "\n", 110 | "fig, ax = plt.subplots(1, 2, figsize=(8, 3), layout='constrained')\n", 111 | "fig.suptitle(\"Speech Command Recognizer Model on the nRF52840\")\n", 112 | "top_limit = 30\n", 113 | "\n", 114 | "sns.histplot(tflm_nrf52840.time, ax=ax[0])\n", 115 | "start, end = ax[0].get_xlim()\n", 116 | "median = tflm_nrf52840.time.median()\n", 117 | "ax[0].axvline(median, color='red', label='Median')\n", 118 | "ax[0].set_xlabel('Execution Time (ms)')\n", 119 | "ax[0].set_xticks((start, median, end))\n", 120 | "ax[0].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n", 121 | "ax[0].set_ylim(top=top_limit)\n", 122 | "ax[0].set_title('TFLM')\n", 123 | "ax[0].legend()\n", 124 | "\n", 125 | "sns.histplot(microflow_nrf52840.time, ax=ax[1])\n", 126 | "start, end = ax[1].get_xlim()\n", 127 | "median = microflow_nrf52840.time.median()\n", 128 | "ax[1].axvline(median, color='red', label='Median')\n", 129 | "ax[1].set_xlabel('Execution Time (ms)')\n", 130 | "ax[1].set_xticks((start, median, end))\n", 131 | "ax[1].xaxis.set_major_formatter(FormatStrFormatter('%.2f'))\n", 132 | "ax[1].set_ylim(top=top_limit)\n", 133 | "ax[1].set_title('MicroFlow')\n", 134 | "ax[1].legend()\n", 135 | "\n", 136 | "plt.savefig('plots/speech-nrf52840-performance.pdf')\n", 137 | "plt.show()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "id": "c3ec165c-6a8e-481c-8a0e-c21031fdfad1", 143 | "metadata": {}, 144 | "source": [ 145 | "## Comparison" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "id": "b122ffe9-32d7-431d-95d5-62f119330f31", 152 | "metadata": { 153 | "tags": [] 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "data = pd.DataFrame({\n", 158 | " 'MCU': [*['ESP32'] * 200, *['nRF52840'] * 200],\n", 159 | " 'Inference Engine': [*['TFLM'] * 100, *['MicroFlow'] * 100] * 2,\n", 160 | " 'Execution Time (ms)': [*tflm_esp32.time, *microflow_esp32.time, *tflm_nrf52840.time, *microflow_nrf52840.time]\n", 161 | "})\n", 162 | "\n", 163 | "plt.figure(figsize=(8, 3), layout='constrained')\n", 164 | "sns.barplot(\n", 165 | " data=data,\n", 166 | " x='Execution Time (ms)',\n", 167 | " y='MCU',\n", 168 | " hue='Inference Engine',\n", 169 | " estimator='median',\n", 170 | " errorbar='pi',\n", 171 | " capsize=.1,\n", 172 | " errwidth=2,\n", 173 | " width=.7\n", 174 | ")\n", 175 | "plt.ylabel('')\n", 176 | "plt.legend()\n", 177 | "plt.savefig('plots/speech-performance-comparison.pdf')\n", 178 | "plt.show()" 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "microflow", 185 | "language": "python", 186 | "name": "microflow" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.10.9" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 5 203 | } 204 | -------------------------------------------------------------------------------- /assets/microflow-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/assets/microflow-logo.png -------------------------------------------------------------------------------- /assets/microflow-logo.pxd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/assets/microflow-logo.pxd -------------------------------------------------------------------------------- /benches/person_detect.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use microflow::buffer::Buffer2D; 3 | use microflow_macros::model; 4 | 5 | #[model("models/person_detect.tflite")] 6 | struct PersonDetect; 7 | 8 | fn person_detect_model(c: &mut Criterion) { 9 | let input = [Buffer2D::from_element([0.5])]; 10 | 11 | c.bench_function("person_detect_model", |b| { 12 | b.iter(|| PersonDetect::predict(input)) 13 | }); 14 | } 15 | 16 | criterion_group!(benches, person_detect_model); 17 | criterion_main!(benches); 18 | -------------------------------------------------------------------------------- /benches/sine.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use microflow_macros::model; 3 | use nalgebra::matrix; 4 | 5 | #[model("models/sine.tflite")] 6 | struct Sine; 7 | 8 | fn sine_model(c: &mut Criterion) { 9 | let input = matrix![0.5]; 10 | 11 | c.bench_function("sine_model", |b| b.iter(|| Sine::predict(input))); 12 | } 13 | 14 | criterion_group!(benches, sine_model); 15 | criterion_main!(benches); 16 | -------------------------------------------------------------------------------- /benches/speech.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use microflow::buffer::Buffer2D; 3 | use microflow_macros::model; 4 | 5 | #[model("models/speech.tflite")] 6 | struct Speech; 7 | 8 | fn speech_model(c: &mut Criterion) { 9 | let input = Buffer2D::from_element(0.5); 10 | 11 | c.bench_function("speech_model", |b| b.iter(|| Speech::predict(input))); 12 | } 13 | 14 | criterion_group!(benches, speech_model); 15 | criterion_main!(benches); 16 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(all(target_arch = "arm", target_os = "none"))'] 2 | runner = "arduino-run -b arduino:mbed_nano:nano33ble" 3 | rustflags = [ 4 | "-C", "linker=flip-link", 5 | "-C", "link-arg=-Tlink.x", 6 | "-C", "link-arg=--nmagic", 7 | ] 8 | 9 | [build] 10 | target = "thumbv7em-none-eabihf" 11 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples-arduino-nano33ble" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | cortex-m = { version = "0.7", features = ["critical-section-single-core"] } 8 | cortex-m-rt = "0.7" 9 | panic-halt = "0.2" 10 | hal = { package = "nrf52840-hal", version = "0.16", features = ["rt"] } 11 | microflow = { path = "../.." } 12 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] } 13 | libm = "0.2" 14 | 15 | [profile.release] 16 | lto = true 17 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.default] 2 | clear = true 3 | dependencies = [ 4 | "examples-compile", 5 | "clippy", 6 | "check-format", 7 | ] 8 | 9 | [tasks.examples-compile] 10 | args = ["build", "--examples", "--release"] 11 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/examples/person_detect.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use panic_halt as _; 5 | 6 | use core::fmt::Write; 7 | use cortex_m::asm::nop; 8 | use cortex_m_rt::entry; 9 | use hal::gpio::Level; 10 | use hal::uarte::{Baudrate, Parity}; 11 | use hal::{gpio, uarte, Clocks, Rtc, Uarte}; 12 | use microflow::buffer::Buffer2D; 13 | use microflow::model; 14 | 15 | const RTC_FREQ_MHZ: f32 = 0.032_768; 16 | 17 | #[path = "../../../samples/features/person_detect.rs"] 18 | mod features; 19 | 20 | #[model("../../models/person_detect.tflite")] 21 | struct PersonDetect; 22 | 23 | fn print_prediction(serial: &mut impl Write, prediction: Buffer2D) { 24 | writeln!( 25 | serial, 26 | "Prediction: {:.1}% no person, {:.1}% person", 27 | prediction[0] * 100., 28 | prediction[1] * 100., 29 | ) 30 | .unwrap(); 31 | writeln!( 32 | serial, 33 | "Outcome: {}", 34 | match prediction.iamax_full().1 { 35 | 0 => "NO PERSON", 36 | 1 => "PERSON", 37 | _ => unreachable!(), 38 | } 39 | ) 40 | .unwrap(); 41 | } 42 | 43 | #[entry] 44 | fn main() -> ! { 45 | let p = hal::pac::Peripherals::take().unwrap(); 46 | let _clocks = Clocks::new(p.CLOCK).enable_ext_hfosc(); 47 | let port1 = gpio::p1::Parts::new(p.P1); 48 | let rtc = Rtc::new(p.RTC0, 0).unwrap(); 49 | rtc.enable_counter(); 50 | 51 | let mut serial = Uarte::new( 52 | p.UARTE0, 53 | uarte::Pins { 54 | rxd: port1.p1_10.into_floating_input().degrade(), 55 | txd: port1.p1_03.into_push_pull_output(Level::High).degrade(), 56 | cts: None, 57 | rts: None, 58 | }, 59 | Parity::EXCLUDED, 60 | Baudrate::BAUD115200, 61 | ); 62 | 63 | let start = rtc.get_counter(); 64 | let person_predicted = PersonDetect::predict_quantized(features::PERSON); 65 | let end = rtc.get_counter(); 66 | writeln!(serial).unwrap(); 67 | writeln!(serial, "Input sample: 'person.bmp'").unwrap(); 68 | print_prediction(&mut serial, person_predicted); 69 | writeln!( 70 | serial, 71 | "Execution time: {:.0} us", 72 | (end - start) as f32 / RTC_FREQ_MHZ 73 | ) 74 | .unwrap(); 75 | 76 | let start = rtc.get_counter(); 77 | let no_person_predicted = PersonDetect::predict_quantized(features::NO_PERSON); 78 | let end = rtc.get_counter(); 79 | writeln!(serial).unwrap(); 80 | writeln!(serial, "Input sample: 'no_person.bmp'").unwrap(); 81 | print_prediction(&mut serial, no_person_predicted); 82 | writeln!( 83 | serial, 84 | "Execution time: {:.0} us", 85 | (end - start) as f32 / RTC_FREQ_MHZ 86 | ) 87 | .unwrap(); 88 | 89 | loop { 90 | nop(); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/examples/sine.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use panic_halt as _; 5 | 6 | use core::fmt::Write; 7 | use cortex_m::asm::nop; 8 | use cortex_m_rt::entry; 9 | use hal::gpio::Level; 10 | use hal::uarte::{Baudrate, Parity}; 11 | use hal::{gpio, Rtc}; 12 | use hal::{uarte, Clocks, Uarte}; 13 | use libm::sinf; 14 | use microflow::model; 15 | use nalgebra::matrix; 16 | 17 | const RTC_FREQ_MHZ: f32 = 0.032_768; 18 | 19 | #[model("../../models/sine.tflite")] 20 | struct Sine; 21 | 22 | #[entry] 23 | fn main() -> ! { 24 | let p = hal::pac::Peripherals::take().unwrap(); 25 | let _clocks = Clocks::new(p.CLOCK).enable_ext_hfosc(); 26 | let port1 = gpio::p1::Parts::new(p.P1); 27 | 28 | let rtc = Rtc::new(p.RTC0, 0).unwrap(); 29 | rtc.enable_counter(); 30 | 31 | let mut serial = Uarte::new( 32 | p.UARTE0, 33 | uarte::Pins { 34 | rxd: port1.p1_10.into_floating_input().degrade(), 35 | txd: port1.p1_03.into_push_pull_output(Level::High).degrade(), 36 | cts: None, 37 | rts: None, 38 | }, 39 | Parity::EXCLUDED, 40 | Baudrate::BAUD115200, 41 | ); 42 | 43 | let x = 0.5; 44 | let start = rtc.get_counter(); 45 | let y_predicted = Sine::predict(matrix![x])[0]; 46 | let end = rtc.get_counter(); 47 | let y_exact = sinf(x); 48 | 49 | writeln!(serial).unwrap(); 50 | writeln!(serial, "Predicted sin({}): {}", x, y_predicted).unwrap(); 51 | writeln!(serial, "Exact sin({}): {}", x, y_exact).unwrap(); 52 | writeln!(serial, "Error: {}", y_exact - y_predicted).unwrap(); 53 | writeln!( 54 | serial, 55 | "Execution time: {:.0} us", 56 | (end - start) as f32 / RTC_FREQ_MHZ 57 | ) 58 | .unwrap(); 59 | 60 | loop { 61 | nop(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/examples/speech.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use panic_halt as _; 5 | 6 | use core::fmt::Write; 7 | use cortex_m::asm::nop; 8 | use cortex_m_rt::entry; 9 | use hal::gpio::Level; 10 | use hal::uarte::{Baudrate, Parity}; 11 | use hal::{gpio, Rtc}; 12 | use hal::{uarte, Clocks, Uarte}; 13 | use microflow::buffer::Buffer2D; 14 | use microflow::model; 15 | 16 | const RTC_FREQ_MHZ: f32 = 0.032_768; 17 | 18 | #[path = "../../../samples/features/speech.rs"] 19 | mod features; 20 | 21 | #[model("../../models/speech.tflite")] 22 | struct Speech; 23 | 24 | fn print_prediction(serial: &mut impl Write, prediction: Buffer2D) { 25 | writeln!( 26 | serial, 27 | "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no", 28 | prediction[0] * 100., 29 | prediction[1] * 100., 30 | prediction[2] * 100., 31 | prediction[3] * 100., 32 | ) 33 | .unwrap(); 34 | writeln!( 35 | serial, 36 | "Outcome: {}", 37 | match prediction.iamax_full().1 { 38 | 0 => "SILENCE", 39 | 1 => "UNKNOWN", 40 | 2 => "YES", 41 | 3 => "NO", 42 | _ => unreachable!(), 43 | } 44 | ) 45 | .unwrap(); 46 | } 47 | 48 | #[entry] 49 | fn main() -> ! { 50 | let p = hal::pac::Peripherals::take().unwrap(); 51 | let _clocks = Clocks::new(p.CLOCK).enable_ext_hfosc(); 52 | let port1 = gpio::p1::Parts::new(p.P1); 53 | let rtc = Rtc::new(p.RTC0, 0).unwrap(); 54 | rtc.enable_counter(); 55 | 56 | let mut serial = Uarte::new( 57 | p.UARTE0, 58 | uarte::Pins { 59 | rxd: port1.p1_10.into_floating_input().degrade(), 60 | txd: port1.p1_03.into_push_pull_output(Level::High).degrade(), 61 | cts: None, 62 | rts: None, 63 | }, 64 | Parity::EXCLUDED, 65 | Baudrate::BAUD115200, 66 | ); 67 | 68 | let start = rtc.get_counter(); 69 | let yes_predicted = Speech::predict_quantized(features::YES); 70 | let end = rtc.get_counter(); 71 | writeln!(serial).unwrap(); 72 | writeln!(serial, "Input sample: 'yes.wav'").unwrap(); 73 | print_prediction(&mut serial, yes_predicted); 74 | writeln!( 75 | serial, 76 | "Execution time: {:.0} us", 77 | (end - start) as f32 / RTC_FREQ_MHZ 78 | ) 79 | .unwrap(); 80 | 81 | let start = rtc.get_counter(); 82 | let no_predicted = Speech::predict_quantized(features::NO); 83 | let end = rtc.get_counter(); 84 | writeln!(serial).unwrap(); 85 | writeln!(serial, "Input sample: 'no.wav'").unwrap(); 86 | print_prediction(&mut serial, no_predicted); 87 | writeln!( 88 | serial, 89 | "Execution time: {:.0} us", 90 | (end - start) as f32 / RTC_FREQ_MHZ 91 | ) 92 | .unwrap(); 93 | 94 | loop { 95 | nop(); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /examples/arduino-nano33ble/memory.x: -------------------------------------------------------------------------------- 1 | MEMORY 2 | { 3 | FLASH : ORIGIN = 0x000000 + 0x10000, LENGTH = 1M - 0x10000 4 | RAM : ORIGIN = 0x20000000, LENGTH = 256K 5 | } 6 | -------------------------------------------------------------------------------- /examples/arduino-uno/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(target_arch = "avr")'] 2 | runner = "arduino-run -b arduino:avr:uno" 3 | rustflags = [ 4 | "-C", "default-linker-libraries", 5 | "-C", "link-arg=-Wl,--allow-multiple-definition", 6 | ] 7 | 8 | [build] 9 | target = "avr-unknown-gnu-atmega328" 10 | 11 | [unstable] 12 | build-std = ["core"] 13 | -------------------------------------------------------------------------------- /examples/arduino-uno/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples-arduino-uno" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | avr-device = { version = "0.7", features = ["atmega328p"] } 8 | ufmt = "0.2" 9 | ufmt_float = { git = "https://github.com/tl8roy/ufmt_float" } 10 | panic-halt = "0.2" 11 | hal = { package = "arduino-hal", git = "https://github.com/rahix/avr-hal", features = ["arduino-uno"] } 12 | microflow = { path = "../.." } 13 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] } 14 | libm = "0.2" 15 | 16 | [profile.dev] 17 | opt-level = "z" 18 | panic = "abort" 19 | lto = true 20 | codegen-units = 1 21 | 22 | [profile.release] 23 | opt-level = "z" 24 | panic = "abort" 25 | lto = true 26 | codegen-units = 1 27 | -------------------------------------------------------------------------------- /examples/arduino-uno/Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.default] 2 | clear = true 3 | dependencies = [ 4 | "examples-compile", 5 | "clippy", 6 | "check-format", 7 | ] 8 | 9 | [tasks.examples-compile] 10 | toolchain = "nightly" 11 | args = ["build", "--examples", "--release"] 12 | -------------------------------------------------------------------------------- /examples/arduino-uno/examples/sine.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use avr_device::asm::nop; 5 | use hal::prelude::*; 6 | use libm::sinf; 7 | use microflow::model; 8 | use nalgebra::matrix; 9 | use ufmt::uwriteln; 10 | use ufmt_float::uFmt_f32; 11 | 12 | use panic_halt as _; 13 | 14 | #[model("../../models/sine.tflite")] 15 | struct Sine; 16 | 17 | #[hal::entry] 18 | fn main() -> ! { 19 | let dp = hal::Peripherals::take().unwrap(); 20 | let pins = hal::pins!(dp); 21 | let mut serial = hal::default_serial!(dp, pins, 57600); 22 | 23 | let x = 0.5; 24 | let y_predicted = Sine::predict(matrix![x])[0]; 25 | let y_exact = sinf(x); 26 | let x_display = uFmt_f32::One(x); 27 | uwriteln!(&mut serial, "").unwrap_infallible(); 28 | uwriteln!( 29 | &mut serial, 30 | "Predicted sin({}): {}", 31 | x_display, 32 | uFmt_f32::Five(y_predicted) 33 | ) 34 | .unwrap_infallible(); 35 | uwriteln!( 36 | &mut serial, 37 | "Exact sin({}): {}", 38 | x_display, 39 | uFmt_f32::Five(y_exact) 40 | ) 41 | .unwrap_infallible(); 42 | uwriteln!( 43 | &mut serial, 44 | "Error: {}", 45 | uFmt_f32::Five(y_exact - y_predicted) 46 | ) 47 | .unwrap_infallible(); 48 | 49 | loop { 50 | nop(); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /examples/arduino-uno/rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly" 3 | components = ["rust-src"] 4 | profile = "minimal" 5 | -------------------------------------------------------------------------------- /examples/atsamx7x/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(all(target_arch = "arm", target_os = "none"))'] 2 | runner = "probe-rs run --chip ATSAME70Q21B" 3 | rustflags = [ 4 | "-C", "linker=flip-link", 5 | "-C", "link-arg=-Tlink.x", 6 | "-C", "link-arg=--nmagic", 7 | ] 8 | 9 | [build] 10 | target = "thumbv7em-none-eabihf" 11 | -------------------------------------------------------------------------------- /examples/atsamx7x/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples-atsamx7x" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | cortex-m = { version = "0.7", features = ["critical-section-single-core"] } 8 | cortex-m-rtic = "1.1" 9 | rtt-target = { version = "0.3", features = ["cortex-m"] } 10 | panic-probe = { version = "0.3", features = ["print-rtt"] } 11 | hal = { package = "atsamx7x-hal", version = "0.4", features = ["samv71q21b-rt", "unproven"] } 12 | microflow = { path = "../.." } 13 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] } 14 | libm = "0.2" 15 | 16 | [profile.release] 17 | lto = true 18 | -------------------------------------------------------------------------------- /examples/atsamx7x/Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.default] 2 | clear = true 3 | dependencies = [ 4 | "examples-compile", 5 | "clippy", 6 | "check-format", 7 | ] 8 | 9 | [tasks.examples-compile] 10 | args = ["build", "--examples", "--release"] 11 | -------------------------------------------------------------------------------- /examples/atsamx7x/examples/person_detect.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use panic_probe as _; 5 | 6 | #[path = "../../../samples/features/person_detect.rs"] 7 | mod features; 8 | 9 | #[rtic::app(device = hal::pac, peripherals = true, dispatchers = [I2SC0])] 10 | mod app { 11 | use hal::clocks::*; 12 | use hal::efc::*; 13 | use hal::fugit::RateExtU32; 14 | use microflow::buffer::Buffer2D; 15 | use microflow::model; 16 | use rtt_target::{rprintln, rtt_init_print}; 17 | 18 | #[model("../../models/person_detect.tflite")] 19 | struct PersonDetect; 20 | 21 | #[shared] 22 | struct Shared {} 23 | 24 | #[local] 25 | struct Local {} 26 | 27 | fn print_prediction(prediction: Buffer2D) { 28 | rprintln!( 29 | "Prediction: {:.1}% no person, {:.1}% person", 30 | prediction[0] * 100., 31 | prediction[1] * 100., 32 | ); 33 | rprintln!( 34 | "Outcome: {}", 35 | match prediction.iamax_full().1 { 36 | 0 => "NO PERSON", 37 | 1 => "PERSON", 38 | _ => unreachable!(), 39 | } 40 | ); 41 | } 42 | 43 | #[init] 44 | fn init(cx: init::Context) -> (Shared, Local, init::Monotonics) { 45 | rtt_init_print!(); 46 | 47 | let clocks = Tokens::new( 48 | (cx.device.PMC, cx.device.SUPC, cx.device.UTMI), 49 | &cx.device.WDT.into(), 50 | ); 51 | 52 | clocks.slck.configure_external_normal(); 53 | let mainck = clocks.mainck.configure_external_normal(12.MHz()).unwrap(); 54 | 55 | let pllack = clocks 56 | .pllack 57 | .configure(&mainck, PllaConfig { div: 1, mult: 12 }) 58 | .unwrap(); 59 | 60 | HostClockController::new(clocks.hclk, clocks.mck) 61 | .configure( 62 | &pllack, 63 | &mut Efc::new(cx.device.EFC, VddioLevel::V3), 64 | HostClockConfig { 65 | pres: HccPrescaler::Div1, 66 | div: MckDivider::Div1, 67 | }, 68 | ) 69 | .unwrap(); 70 | 71 | let person_predicted = PersonDetect::predict_quantized(super::features::PERSON); 72 | let no_person_predicted = PersonDetect::predict_quantized(super::features::NO_PERSON); 73 | rprintln!(); 74 | rprintln!("Input sample: 'person.bmp'"); 75 | print_prediction(person_predicted); 76 | rprintln!(); 77 | rprintln!("Input sample: 'no_person.bmp'"); 78 | print_prediction(no_person_predicted); 79 | 80 | (Shared {}, Local {}, init::Monotonics()) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /examples/atsamx7x/examples/sine.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use panic_probe as _; 5 | 6 | #[rtic::app(device = hal::pac, peripherals = true, dispatchers = [I2SC0])] 7 | mod app { 8 | use libm::sinf; 9 | use microflow::model; 10 | use nalgebra::matrix; 11 | use rtt_target::{rprintln, rtt_init_print}; 12 | 13 | #[model("../../models/sine.tflite")] 14 | struct Sine; 15 | 16 | #[shared] 17 | struct Shared {} 18 | 19 | #[local] 20 | struct Local {} 21 | 22 | #[init] 23 | fn init(cx: init::Context) -> (Shared, Local, init::Monotonics) { 24 | hal::watchdog::Watchdog::new(cx.device.WDT).disable(); 25 | rtt_init_print!(); 26 | 27 | let x = 0.5; 28 | let y_predicted = Sine::predict(matrix![x])[0]; 29 | let y_exact = sinf(x); 30 | rprintln!(); 31 | rprintln!("Predicted sin({}): {}", x, y_predicted); 32 | rprintln!("Exact sin({}): {}", x, y_exact); 33 | rprintln!("Error: {}", y_exact - y_predicted); 34 | 35 | (Shared {}, Local {}, init::Monotonics()) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /examples/atsamx7x/examples/speech.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use panic_probe as _; 5 | 6 | #[path = "../../../samples/features/speech.rs"] 7 | mod features; 8 | 9 | #[rtic::app(device = hal::pac, peripherals = true, dispatchers = [I2SC0])] 10 | mod app { 11 | use hal::clocks::*; 12 | use hal::efc::*; 13 | use hal::fugit::RateExtU32; 14 | use microflow::buffer::Buffer2D; 15 | use microflow::model; 16 | use rtt_target::{rprintln, rtt_init_print}; 17 | 18 | #[model("../../models/speech.tflite")] 19 | struct Speech; 20 | 21 | #[shared] 22 | struct Shared {} 23 | 24 | #[local] 25 | struct Local {} 26 | 27 | fn print_prediction(prediction: Buffer2D) { 28 | rprintln!( 29 | "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no", 30 | prediction[0] * 100., 31 | prediction[1] * 100., 32 | prediction[2] * 100., 33 | prediction[3] * 100., 34 | ); 35 | rprintln!( 36 | "Outcome: {}", 37 | match prediction.iamax_full().1 { 38 | 0 => "SILENCE", 39 | 1 => "UNKNOWN", 40 | 2 => "YES", 41 | 3 => "NO", 42 | _ => unreachable!(), 43 | } 44 | ); 45 | } 46 | 47 | #[init] 48 | fn init(cx: init::Context) -> (Shared, Local, init::Monotonics) { 49 | rtt_init_print!(); 50 | 51 | let clocks = Tokens::new( 52 | (cx.device.PMC, cx.device.SUPC, cx.device.UTMI), 53 | &cx.device.WDT.into(), 54 | ); 55 | 56 | clocks.slck.configure_external_normal(); 57 | let mainck = clocks.mainck.configure_external_normal(12.MHz()).unwrap(); 58 | 59 | let pllack = clocks 60 | .pllack 61 | .configure(&mainck, PllaConfig { div: 1, mult: 12 }) 62 | .unwrap(); 63 | 64 | HostClockController::new(clocks.hclk, clocks.mck) 65 | .configure( 66 | &pllack, 67 | &mut Efc::new(cx.device.EFC, VddioLevel::V3), 68 | HostClockConfig { 69 | pres: HccPrescaler::Div1, 70 | div: MckDivider::Div1, 71 | }, 72 | ) 73 | .unwrap(); 74 | 75 | let yes_predicted = Speech::predict_quantized(super::features::YES); 76 | let no_predicted = Speech::predict_quantized(super::features::NO); 77 | rprintln!(); 78 | rprintln!("Input sample: 'yes.wav'"); 79 | print_prediction(yes_predicted); 80 | rprintln!(); 81 | rprintln!("Input sample: 'no.wav'"); 82 | print_prediction(no_predicted); 83 | 84 | (Shared {}, Local {}, init::Monotonics()) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /examples/atsamx7x/memory.x: -------------------------------------------------------------------------------- 1 | MEMORY 2 | { 3 | FLASH : ORIGIN = 0x400000, LENGTH = 2M 4 | RAM : ORIGIN = 0x20400000, LENGTH = 384K 5 | } 6 | -------------------------------------------------------------------------------- /examples/esp32/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(target_arch = "xtensa")'] 2 | runner = "espflash flash --monitor" 3 | rustflags = [ 4 | "-C", "link-arg=-Tlinkall.x", 5 | "-C", "link-arg=-nostartfiles", 6 | ] 7 | 8 | [build] 9 | target = "xtensa-esp32-none-elf" 10 | 11 | [unstable] 12 | build-std = ["core"] 13 | -------------------------------------------------------------------------------- /examples/esp32/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples-esp32" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | esp-println = { version = "0.13", features = ["esp32"] } 8 | esp-backtrace = { version = "0.15", features = ["esp32", "panic-handler", "println"] } 9 | esp_hal = { package = "esp-hal", version = "0.23", features = ["esp32"] } 10 | microflow = { path = "../.." } 11 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] } 12 | libm = "0.2" 13 | 14 | [profile.release] 15 | lto = true 16 | -------------------------------------------------------------------------------- /examples/esp32/Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.default] 2 | clear = true 3 | dependencies = [ 4 | "examples-compile", 5 | "clippy", 6 | "check-format" 7 | ] 8 | 9 | [tasks.examples-compile] 10 | args = ["build", "--examples", "--release"] 11 | -------------------------------------------------------------------------------- /examples/esp32/examples/person_detect.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use esp_hal::{clock::CpuClock, main, rtc_cntl::Rtc}; 5 | use esp_println::println; 6 | use microflow::buffer::Buffer2D; 7 | use microflow::model; 8 | 9 | use esp_backtrace as _; 10 | 11 | #[path = "../../../samples/features/person_detect.rs"] 12 | mod features; 13 | 14 | #[model("../../models/person_detect.tflite")] 15 | struct PersonDetect; 16 | 17 | fn print_prediction(prediction: Buffer2D) { 18 | println!( 19 | "Prediction: {:.1}% no person, {:.1}% person", 20 | prediction[0] * 100., 21 | prediction[1] * 100., 22 | ); 23 | println!( 24 | "Outcome: {}", 25 | match prediction.iamax_full().1 { 26 | 0 => "NO PERSON", 27 | 1 => "PERSON", 28 | _ => unreachable!(), 29 | } 30 | ); 31 | } 32 | 33 | #[main] 34 | fn main() -> ! { 35 | let config = esp_hal::Config::default().with_cpu_clock(CpuClock::max()); 36 | let peripherals = esp_hal::init(config); 37 | let rtc = Rtc::new(peripherals.LPWR); 38 | 39 | let start = rtc.time_since_boot(); 40 | let person_predicted = PersonDetect::predict_quantized(features::PERSON); 41 | let end = rtc.time_since_boot(); 42 | println!(" "); 43 | println!("Input sample: 'person.bmp'"); 44 | print_prediction(person_predicted); 45 | println!("Execution time: {}", end - start); 46 | 47 | let start = rtc.time_since_boot(); 48 | let no_person_predicted = PersonDetect::predict_quantized(features::NO_PERSON); 49 | let end = rtc.time_since_boot(); 50 | println!(" "); 51 | println!("Input sample: 'no_person.bmp'"); 52 | print_prediction(no_person_predicted); 53 | println!("Execution time: {}", end - start); 54 | 55 | loop {} 56 | } 57 | -------------------------------------------------------------------------------- /examples/esp32/examples/sine.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use esp_hal::{clock::CpuClock, main, rtc_cntl::Rtc}; 5 | use esp_println::println; 6 | use libm::sinf; 7 | use microflow::model; 8 | use nalgebra::matrix; 9 | 10 | use esp_backtrace as _; 11 | 12 | #[model("../../models/sine.tflite")] 13 | struct Sine; 14 | 15 | #[main] 16 | fn main() -> ! { 17 | let config = esp_hal::Config::default().with_cpu_clock(CpuClock::max()); 18 | let peripherals = esp_hal::init(config); 19 | let rtc = Rtc::new(peripherals.LPWR); 20 | 21 | let x = 0.5; 22 | let start = rtc.time_since_boot(); 23 | let y_predicted = Sine::predict(matrix![x])[0]; 24 | let end = rtc.time_since_boot(); 25 | let y_exact = sinf(x); 26 | println!(" "); 27 | println!("Predicted sin({}): {}", x, y_predicted); 28 | println!("Exact sin({}): {}", x, y_exact); 29 | println!("Error: {}", y_exact - y_predicted); 30 | println!("Execution time: {}", end - start); 31 | 32 | loop {} 33 | } 34 | -------------------------------------------------------------------------------- /examples/esp32/examples/speech.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use esp_hal::{clock::CpuClock, main, rtc_cntl::Rtc}; 5 | use esp_println::println; 6 | use microflow::buffer::Buffer2D; 7 | use microflow::model; 8 | 9 | use esp_backtrace as _; 10 | 11 | #[path = "../../../samples/features/speech.rs"] 12 | mod features; 13 | 14 | #[model("../../models/speech.tflite")] 15 | struct Speech; 16 | 17 | fn print_prediction(prediction: Buffer2D) { 18 | println!( 19 | "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no", 20 | prediction[0] * 100., 21 | prediction[1] * 100., 22 | prediction[2] * 100., 23 | prediction[3] * 100., 24 | ); 25 | println!( 26 | "Outcome: {}", 27 | match prediction.iamax_full().1 { 28 | 0 => "SILENCE", 29 | 1 => "UNKNOWN", 30 | 2 => "YES", 31 | 3 => "NO", 32 | _ => unreachable!(), 33 | } 34 | ); 35 | } 36 | 37 | #[main] 38 | fn main() -> ! { 39 | let config = esp_hal::Config::default().with_cpu_clock(CpuClock::max()); 40 | let peripherals = esp_hal::init(config); 41 | let rtc = Rtc::new(peripherals.LPWR); 42 | 43 | let start = rtc.time_since_boot(); 44 | let yes_predicted = Speech::predict_quantized(features::YES); 45 | let end = rtc.time_since_boot(); 46 | println!(" "); 47 | println!("Input sample: 'yes.wav'"); 48 | print_prediction(yes_predicted); 49 | println!("Execution time: {}", end - start); 50 | 51 | let start = rtc.time_since_boot(); 52 | let no_predicted = Speech::predict_quantized(features::NO); 53 | let end = rtc.time_since_boot(); 54 | println!(" "); 55 | println!("Input sample: 'no.wav'"); 56 | print_prediction(no_predicted); 57 | println!("Execution time: {}", end - start); 58 | 59 | loop {} 60 | } 61 | -------------------------------------------------------------------------------- /examples/esp32/rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "esp" 3 | -------------------------------------------------------------------------------- /examples/person_detect.rs: -------------------------------------------------------------------------------- 1 | use microflow::buffer::Buffer2D; 2 | use microflow_macros::model; 3 | 4 | #[path = "../samples/features/person_detect.rs"] 5 | mod features; 6 | 7 | #[model("models/person_detect.tflite")] 8 | struct PersonDetect; 9 | 10 | fn print_prediction(prediction: Buffer2D) { 11 | println!( 12 | "Prediction: {:.1}% no person, {:.1}% person", 13 | prediction[0] * 100., 14 | prediction[1] * 100., 15 | ); 16 | println!( 17 | "Outcome: {}", 18 | match prediction.iamax_full().1 { 19 | 0 => "NO PERSON", 20 | 1 => "PERSON", 21 | _ => unreachable!(), 22 | } 23 | ); 24 | } 25 | 26 | fn main() { 27 | let person_predicted = PersonDetect::predict_quantized(features::PERSON); 28 | let no_person_predicted = PersonDetect::predict_quantized(features::NO_PERSON); 29 | println!(); 30 | println!("Input sample: 'person.bmp'"); 31 | print_prediction(person_predicted); 32 | println!(); 33 | println!("Input sample: 'no_person.bmp'"); 34 | print_prediction(no_person_predicted); 35 | } 36 | -------------------------------------------------------------------------------- /examples/qemu/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(all(target_arch = "arm", target_os = "none"))'] 2 | runner = "qemu-system-arm -cpu cortex-m3 -machine lm3s6965evb -nographic -semihosting-config enable=on,target=native -kernel" 3 | rustflags = [ 4 | "-C", "linker=flip-link", 5 | "-C", "link-arg=-Tlink.x", 6 | "-C", "link-arg=--nmagic", 7 | ] 8 | 9 | [build] 10 | target = "thumbv7m-none-eabi" 11 | -------------------------------------------------------------------------------- /examples/qemu/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples-qemu" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | cortex-m = "0.7" 8 | cortex-m-rt = "0.7" 9 | cortex-m-semihosting = "0.5" 10 | panic-halt = "0.2" 11 | microflow = { path = "../.." } 12 | nalgebra = { version = "0.32", default-features = false, features = ["macros"] } 13 | libm = "0.2" 14 | 15 | [profile.release] 16 | lto = true 17 | -------------------------------------------------------------------------------- /examples/qemu/Makefile.toml: -------------------------------------------------------------------------------- 1 | [tasks.default] 2 | clear = true 3 | dependencies = [ 4 | "examples-compile", 5 | "clippy", 6 | "check-format", 7 | ] 8 | 9 | [tasks.examples-compile] 10 | args = ["build", "--examples", "--release"] 11 | -------------------------------------------------------------------------------- /examples/qemu/examples/sine.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use cortex_m::asm::nop; 5 | use cortex_m_rt::entry; 6 | use cortex_m_semihosting::debug::{exit, EXIT_SUCCESS}; 7 | use cortex_m_semihosting::hprintln; 8 | use libm::sinf; 9 | use nalgebra::matrix; 10 | 11 | use microflow::model; 12 | use panic_halt as _; 13 | 14 | #[model("../../models/sine.tflite")] 15 | struct Sine; 16 | 17 | #[entry] 18 | fn main() -> ! { 19 | let x = 0.5; 20 | let y_predicted = Sine::predict(matrix![x])[0]; 21 | let y_exact = sinf(x); 22 | hprintln!(); 23 | hprintln!("Predicted sin({}): {}", x, y_predicted); 24 | hprintln!("Exact sin({}): {}", x, y_exact); 25 | hprintln!("Error: {}", y_exact - y_predicted); 26 | 27 | exit(EXIT_SUCCESS); 28 | loop { 29 | nop() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /examples/qemu/examples/speech.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use cortex_m::asm::nop; 5 | use cortex_m_rt::entry; 6 | use cortex_m_semihosting::debug::{exit, EXIT_SUCCESS}; 7 | use cortex_m_semihosting::hprintln; 8 | use microflow::buffer::Buffer2D; 9 | use microflow::model; 10 | use panic_halt as _; 11 | 12 | #[path = "../../../samples/features/speech.rs"] 13 | mod features; 14 | 15 | #[model("../../models/speech.tflite")] 16 | struct Speech; 17 | 18 | fn print_prediction(prediction: Buffer2D) { 19 | hprintln!( 20 | "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no", 21 | prediction[0] * 100., 22 | prediction[1] * 100., 23 | prediction[2] * 100., 24 | prediction[3] * 100., 25 | ); 26 | hprintln!( 27 | "Outcome: {}", 28 | match prediction.iamax_full().1 { 29 | 0 => "SILENCE", 30 | 1 => "UNKNOWN", 31 | 2 => "YES", 32 | 3 => "NO", 33 | _ => unreachable!(), 34 | } 35 | ); 36 | } 37 | 38 | #[entry] 39 | fn main() -> ! { 40 | let yes_predicted = Speech::predict_quantized(features::YES); 41 | let no_predicted = Speech::predict_quantized(features::NO); 42 | hprintln!(); 43 | hprintln!("Input sample: 'yes.wav'"); 44 | print_prediction(yes_predicted); 45 | hprintln!(); 46 | hprintln!("Input sample: 'no.wav'"); 47 | print_prediction(no_predicted); 48 | 49 | exit(EXIT_SUCCESS); 50 | loop { 51 | nop() 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/qemu/memory.x: -------------------------------------------------------------------------------- 1 | MEMORY 2 | { 3 | FLASH : ORIGIN = 0x00000000, LENGTH = 256K 4 | RAM : ORIGIN = 0x20000000, LENGTH = 64K 5 | } 6 | -------------------------------------------------------------------------------- /examples/sine.rs: -------------------------------------------------------------------------------- 1 | use libm::sinf; 2 | use microflow::model; 3 | use nalgebra::matrix; 4 | 5 | #[model("models/sine.tflite")] 6 | struct Sine; 7 | 8 | fn main() { 9 | let x = 0.5; 10 | let y_predicted = Sine::predict(matrix![x])[0]; 11 | let y_exact = sinf(x); 12 | println!(); 13 | println!("Predicted sin({}): {}", x, y_predicted); 14 | println!("Exact sin({}): {}", x, y_exact); 15 | println!("Error: {}", y_exact - y_predicted); 16 | } 17 | -------------------------------------------------------------------------------- /examples/speech.rs: -------------------------------------------------------------------------------- 1 | use microflow::buffer::Buffer2D; 2 | use microflow_macros::model; 3 | 4 | #[path = "../samples/features/speech.rs"] 5 | mod features; 6 | 7 | #[model("models/speech.tflite")] 8 | struct Speech; 9 | 10 | fn print_prediction(prediction: Buffer2D) { 11 | println!( 12 | "Prediction: {:.1}% silence, {:.1}% unknown, {:.1}% yes, {:.1}% no", 13 | prediction[0] * 100., 14 | prediction[1] * 100., 15 | prediction[2] * 100., 16 | prediction[3] * 100., 17 | ); 18 | println!( 19 | "Outcome: {}", 20 | match prediction.iamax_full().1 { 21 | 0 => "SILENCE", 22 | 1 => "UNKNOWN", 23 | 2 => "YES", 24 | 3 => "NO", 25 | _ => unreachable!(), 26 | } 27 | ); 28 | } 29 | 30 | fn main() { 31 | let yes_predicted = Speech::predict_quantized(features::YES); 32 | let no_predicted = Speech::predict_quantized(features::NO); 33 | println!(); 34 | println!("Input sample: 'yes.wav'"); 35 | print_prediction(yes_predicted); 36 | println!(); 37 | println!("Input sample: 'no.wav'"); 38 | print_prediction(no_predicted); 39 | } 40 | -------------------------------------------------------------------------------- /microflow-macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "microflow-macros" 3 | description = "Macro crate of the MicroFlow inference engine, namely, the MicroFlow compiler" 4 | authors = ["Matteo Carnelos "] 5 | documentation = "https://docs.rs/microflow-macros" 6 | repository = "https://github.com/matteocarnelos/microflow-rs" 7 | readme = "../README.md" 8 | categories = ["embedded", "no-std", "science"] 9 | keywords = ["tinyml"] 10 | license = "MIT OR Apache-2.0" 11 | version = "0.1.1" 12 | edition = "2021" 13 | include = [ 14 | "/src/**", 15 | "flatbuffers/**", 16 | "/Cargo.toml", 17 | "/LICENSE-MIT", 18 | "/LICENSE-APACHE", 19 | ] 20 | 21 | [lib] 22 | proc-macro = true 23 | 24 | [dependencies] 25 | syn = { version = "2.0", features = ["full"] } 26 | quote = "1.0" 27 | proc-macro2 = "1.0" 28 | proc-macro-error = "1.0" 29 | flatbuffers = "23.1" 30 | nalgebra = "0.32" 31 | simba = "0.8" 32 | byterepr = "0.1" 33 | structmeta = "0.3" 34 | -------------------------------------------------------------------------------- /microflow-macros/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /microflow-macros/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /microflow-macros/src/activation.rs: -------------------------------------------------------------------------------- 1 | use crate::tflite_flatbuffers::tflite::ActivationFunctionType; 2 | use proc_macro2::TokenStream as TokenStream2; 3 | use quote::{quote, ToTokens}; 4 | 5 | /// Represents the tokenized version of the [`FusedActivation`]. 6 | #[derive(Copy, Clone)] 7 | pub(crate) enum TokenFusedActivation { 8 | None, 9 | Relu, 10 | Relu6, 11 | } 12 | 13 | impl ToTokens for TokenFusedActivation { 14 | fn to_tokens(&self, tokens: &mut TokenStream2) { 15 | let ts = match self { 16 | TokenFusedActivation::None => quote!(microflow::activation::FusedActivation::None), 17 | TokenFusedActivation::Relu => quote!(microflow::activation::FusedActivation::Relu), 18 | TokenFusedActivation::Relu6 => quote!(microflow::activation::FusedActivation::Relu6), 19 | }; 20 | ts.to_tokens(tokens); 21 | } 22 | } 23 | 24 | impl From for TokenFusedActivation { 25 | fn from(activation: ActivationFunctionType) -> Self { 26 | match activation { 27 | ActivationFunctionType::NONE => Self::None, 28 | ActivationFunctionType::RELU => Self::Relu, 29 | ActivationFunctionType::RELU6 => Self::Relu6, 30 | _ => unimplemented!(), 31 | } 32 | } 33 | } 34 | 35 | #[cfg(test)] 36 | mod tests { 37 | use super::*; 38 | 39 | #[test] 40 | fn fused_activation_to_tokens() { 41 | let activation = TokenFusedActivation::from(ActivationFunctionType::RELU); 42 | assert_eq!( 43 | activation.to_token_stream().to_string(), 44 | quote!(microflow::activation::FusedActivation::Relu).to_string() 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /microflow-macros/src/buffer.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Deref; 2 | 3 | use nalgebra::DMatrix; 4 | use proc_macro2::TokenStream as TokenStream2; 5 | use quote::{quote, ToTokens}; 6 | 7 | /// Represents the tokenized version of the `Buffer2D`. 8 | #[derive(Debug)] 9 | pub(crate) struct TokenBuffer2D(pub(crate) Option>); 10 | 11 | /// Represents the tokenized version of the `Buffer4D`. 12 | #[derive(Debug)] 13 | pub(crate) struct TokenBuffer4D(pub(crate) Option>>>); 14 | 15 | impl TokenBuffer2D { 16 | /// Builds an empty [`TokenBuffer2D`]. 17 | pub(crate) fn new() -> Self { 18 | Self(None) 19 | } 20 | } 21 | 22 | impl From> for TokenBuffer2D { 23 | fn from(matrix: DMatrix) -> Self { 24 | Self(Some(matrix)) 25 | } 26 | } 27 | 28 | impl ToTokens for TokenBuffer2D { 29 | fn to_tokens(&self, tokens: &mut TokenStream2) { 30 | let mut rows: Vec = Vec::new(); 31 | for row in self.row_iter() { 32 | let iter = row.iter(); 33 | rows.push(quote!(#(#iter),*)); 34 | } 35 | 36 | let ts = quote!(nalgebra::matrix![#(#rows);*]); 37 | ts.to_tokens(tokens); 38 | } 39 | } 40 | 41 | impl Deref for TokenBuffer2D { 42 | type Target = DMatrix; 43 | fn deref(&self) -> &Self::Target { 44 | self.0.as_ref().unwrap() 45 | } 46 | } 47 | 48 | impl TokenBuffer4D { 49 | /// Builds an empty [`TokenBuffer4D`]. 50 | pub(crate) fn new() -> Self { 51 | Self(None) 52 | } 53 | } 54 | 55 | impl From>>> for TokenBuffer4D { 56 | fn from(data: Vec>>) -> Self { 57 | Self(Some(data)) 58 | } 59 | } 60 | 61 | impl ToTokens for TokenBuffer4D { 62 | fn to_tokens(&self, tokens: &mut TokenStream2) { 63 | let mut batches: Vec = Vec::new(); 64 | for batch in self.iter() { 65 | let mut rows: Vec = Vec::new(); 66 | for row in batch.row_iter() { 67 | let mut elements: Vec = Vec::new(); 68 | for element in row.iter() { 69 | let iter = element.iter(); 70 | elements.push(quote!([#(#iter),*])); 71 | } 72 | rows.push(quote!(#(#elements),*)); 73 | } 74 | batches.push(quote!(nalgebra::matrix![#(#rows);*])); 75 | } 76 | 77 | let ts = quote!([#(#batches),*]); 78 | ts.to_tokens(tokens); 79 | } 80 | } 81 | 82 | impl Deref for TokenBuffer4D { 83 | type Target = Vec>>; 84 | fn deref(&self) -> &Self::Target { 85 | self.0.as_ref().unwrap() 86 | } 87 | } 88 | 89 | #[cfg(test)] 90 | mod tests { 91 | use nalgebra::dmatrix; 92 | 93 | use super::*; 94 | 95 | fn setup_2d() -> TokenBuffer2D { 96 | TokenBuffer2D::from(dmatrix![ 97 | 1, 2, 3; 98 | 4, 5, 6 99 | ]) 100 | } 101 | 102 | fn setup_4d() -> TokenBuffer4D { 103 | TokenBuffer4D::from(vec![ 104 | dmatrix![ 105 | vec![7, 8], vec![9, 10], vec![11, 12]; 106 | vec![13, 14], vec![15, 16], vec![17, 18] 107 | ], 108 | dmatrix![ 109 | vec![19, 20], vec![21, 22], vec![23, 24]; 110 | vec![25, 26], vec![27, 28], vec![29, 30] 111 | ], 112 | ]) 113 | } 114 | 115 | #[test] 116 | fn buffer_2d_new() { 117 | assert_eq!(TokenBuffer2D::::new().0, None); 118 | } 119 | 120 | #[test] 121 | fn buffer_2d_from_matrix() { 122 | let matrix = dmatrix![1, 2, 3]; 123 | assert_eq!(TokenBuffer2D::::from(matrix.clone()).0, Some(matrix)); 124 | } 125 | 126 | #[test] 127 | fn buffer_2d_to_tokens() { 128 | let buffer = setup_2d(); 129 | assert_eq!( 130 | buffer.to_token_stream().to_string(), 131 | quote! { 132 | nalgebra::matrix![ 133 | 1i8, 2i8, 3i8; 134 | 4i8, 5i8, 6i8 135 | ] 136 | } 137 | .to_string() 138 | ); 139 | } 140 | 141 | #[test] 142 | fn buffer_4d_new() { 143 | assert_eq!(TokenBuffer4D::::new().0, None); 144 | } 145 | 146 | #[test] 147 | fn buffer_4d_from_data() { 148 | let data = vec![dmatrix![vec![1], vec![2], vec![3]]]; 149 | assert_eq!(TokenBuffer4D::::from(data.clone()).0, Some(data)); 150 | } 151 | 152 | #[test] 153 | fn buffer_4d_to_tokens() { 154 | let buffer = setup_4d(); 155 | assert_eq!( 156 | buffer.to_token_stream().to_string(), 157 | quote! { 158 | [ 159 | nalgebra::matrix![ 160 | [7i8, 8i8], [9i8, 10i8], [11i8, 12i8]; 161 | [13i8, 14i8], [15i8, 16i8], [17i8, 18i8] 162 | ], 163 | nalgebra::matrix![ 164 | [19i8, 20i8], [21i8, 22i8], [23i8, 24i8]; 165 | [25i8, 26i8], [27i8, 28i8], [29i8, 30i8] 166 | ] 167 | ] 168 | } 169 | .to_string() 170 | ); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /microflow-macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! [![crates.io](https://img.shields.io/crates/v/microflow-macros)](https://crates.io/crates/microflow-macros) 2 | //! [![docs.rs](https://img.shields.io/docsrs/microflow-macros)](https://docs.rs/microflow-macros) 3 | //! [![github](https://img.shields.io/github/actions/workflow/status/matteocarnelos/microflow-rs/cargo.yml?branch=main)](https://github.com/matteocarnelos/microflow-rs/actions/workflows/cargo.yml) 4 | //! 5 | //! Macro crate of the [MicroFlow](https://github.com/matteocarnelos/microflow-rs) inference engine, namely, the MicroFlow compiler. 6 | 7 | extern crate proc_macro; 8 | 9 | use proc_macro::TokenStream; 10 | use proc_macro_error::{abort_call_site, proc_macro_error}; 11 | use std::fs; 12 | 13 | use proc_macro2::TokenStream as TokenStream2; 14 | use quote::{quote, ToTokens}; 15 | use syn::{parse_macro_input, ItemStruct}; 16 | 17 | use crate::tflite_flatbuffers::tflite::TensorType; 18 | use ops::*; 19 | use structmeta::StructMeta; 20 | use syn::LitStr; 21 | use tflite_flatbuffers::tflite::{root_as_model, BuiltinOperator}; 22 | 23 | mod activation; 24 | mod buffer; 25 | mod ops; 26 | mod quantize; 27 | mod tensor; 28 | #[path = "../flatbuffers/tflite_generated.rs"] 29 | #[allow(unused_imports)] 30 | #[allow(clippy::all)] 31 | mod tflite_flatbuffers; 32 | 33 | #[derive(StructMeta)] 34 | struct Args { 35 | #[struct_meta(unnamed)] 36 | path: LitStr, 37 | } 38 | 39 | /// The entry point of MicroFlow. 40 | /// This attribute-like procedural macro can be placed on `structs` to implement the `predict()` 41 | /// function based on the given model. 42 | /// The macro takes as input the path of the model, which must be in the TensorFlow Lite format 43 | /// (`.tflite`). 44 | #[proc_macro_error] 45 | #[proc_macro_attribute] 46 | pub fn model(args: TokenStream, item: TokenStream) -> TokenStream { 47 | let args = parse_macro_input!(args as Args); 48 | let item = parse_macro_input!(item as ItemStruct); 49 | 50 | let buf = fs::read(args.path.value()).unwrap_or_else(|_| { 51 | abort_call_site!( 52 | "couldn't find '{}', please provide a valid path", 53 | &args.path.value() 54 | ) 55 | }); 56 | let model = root_as_model(&buf).unwrap_or_else(|_| { 57 | abort_call_site!("invalid model, please provide a valid TensorFlow Lite model") 58 | }); 59 | 60 | let ident = &item.ident; 61 | 62 | let subgraph = model.subgraphs().unwrap().get(0); 63 | let tensors = subgraph.tensors().unwrap(); 64 | let buffers = model.buffers().unwrap(); 65 | 66 | let input = tensors.get(subgraph.inputs().unwrap().get(0) as usize); 67 | let mut input_shape: Vec<_> = input.shape().unwrap().iter().map(|e| e as usize).collect(); 68 | if input_shape.len() == 1 { 69 | input_shape.insert(0, 1); 70 | } 71 | let input_type = match input.type_() { 72 | TensorType::INT8 => quote!(i8), 73 | TensorType::UINT8 => quote!(u8), 74 | _ => unimplemented!(), 75 | }; 76 | let input_tensor = match input_shape.len() { 77 | 2 => quote!(Tensor2D), 78 | 4 => quote!(Tensor4D), 79 | _ => unimplemented!(), 80 | }; 81 | let input_buffer = match input_shape.len() { 82 | 2 => quote!(Buffer2D), 83 | 4 => quote!(Buffer4D), 84 | _ => unimplemented!(), 85 | }; 86 | let input_scale: Vec<_> = input 87 | .quantization() 88 | .unwrap() 89 | .scale() 90 | .unwrap() 91 | .iter() 92 | .map(|e| e.to_token_stream()) 93 | .collect(); 94 | let input_zero_point: Vec<_> = match input.type_() { 95 | TensorType::INT8 => input 96 | .quantization() 97 | .unwrap() 98 | .zero_point() 99 | .unwrap() 100 | .iter() 101 | .map(|e| (e as i8).to_token_stream()) 102 | .collect(), 103 | TensorType::UINT8 => input 104 | .quantization() 105 | .unwrap() 106 | .zero_point() 107 | .unwrap() 108 | .iter() 109 | .map(|e| (e as u8).to_token_stream()) 110 | .collect(), 111 | _ => unimplemented!(), 112 | }; 113 | 114 | let operators = subgraph.operators().unwrap(); 115 | let mut layers = TokenStream2::new(); 116 | for (index, operator) in operators.iter().enumerate() { 117 | let layer: Box = match BuiltinOperator( 118 | model 119 | .operator_codes() 120 | .unwrap() 121 | .get(operator.opcode_index() as usize) 122 | .deprecated_builtin_code() as i32, 123 | ) { 124 | BuiltinOperator::FULLY_CONNECTED => { 125 | fully_connected::parse(operator, tensors, buffers, index) 126 | } 127 | BuiltinOperator::DEPTHWISE_CONV_2D => { 128 | depthwise_conv_2d::parse(operator, tensors, buffers, index) 129 | } 130 | BuiltinOperator::CONV_2D => conv_2d::parse(operator, tensors, buffers, index), 131 | BuiltinOperator::AVERAGE_POOL_2D => average_pool_2d::parse(operator, tensors), 132 | BuiltinOperator::SOFTMAX => softmax::parse(operator, tensors), 133 | BuiltinOperator::RESHAPE => Box::new(reshape::parse(operator, tensors)), 134 | unsupported_op => abort_call_site!("unsupported operator: {:?}", unsupported_op), 135 | }; 136 | layer.to_tokens(&mut layers) 137 | } 138 | 139 | let output = tensors.get(subgraph.outputs().unwrap().get(0) as usize); 140 | let mut output_shape: Vec<_> = output.shape().unwrap().iter().map(|e| e as usize).collect(); 141 | if output_shape.len() == 1 { 142 | output_shape.insert(0, 1); 143 | } 144 | let output_type = match output.type_() { 145 | TensorType::INT8 => quote!(i8), 146 | TensorType::UINT8 => quote!(u8), 147 | _ => unimplemented!(), 148 | }; 149 | let output_tensor = match output_shape.len() { 150 | 2 => quote!(Tensor2D), 151 | 4 => quote!(Tensor4D), 152 | _ => unimplemented!(), 153 | }; 154 | let output_buffer = match output_shape.len() { 155 | 2 => quote!(Buffer2D), 156 | 4 => quote!(Buffer4D), 157 | _ => unimplemented!(), 158 | }; 159 | 160 | let ts = quote! { 161 | #item 162 | impl #ident { 163 | pub fn predict(input: microflow::buffer::#input_buffer) -> microflow::buffer::#output_buffer { 164 | let input = microflow::tensor::#input_tensor::quantize(input, [#(#input_scale),*], [#(#input_zero_point),*]); 165 | Self::predict_inner(input).dequantize() 166 | } 167 | 168 | pub fn predict_quantized(input: microflow::buffer::#input_buffer<#input_type, #(#input_shape),*>) -> microflow::buffer::#output_buffer { 169 | let input = microflow::tensor::#input_tensor::new(input, [#(#input_scale),*], [#(#input_zero_point),*]); 170 | Self::predict_inner(input).dequantize() 171 | } 172 | 173 | fn predict_inner(input: microflow::tensor::#input_tensor<#input_type, #(#input_shape),*, 1usize>) -> microflow::tensor::#output_tensor<#output_type, #(#output_shape),*, 1usize> { 174 | #layers 175 | input 176 | } 177 | } 178 | }; 179 | 180 | fs::write("target/microflow-expansion.rs", ts.to_string()).ok(); 181 | 182 | ts.into() 183 | } 184 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/average_pool_2d.rs: -------------------------------------------------------------------------------- 1 | use crate::activation::TokenFusedActivation; 2 | use crate::quantize::TokenQuantized; 3 | use crate::tensor::{TokenTensor4D, TokenTensorViewPadding}; 4 | use crate::tflite_flatbuffers::tflite::{Operator, Tensor, TensorType}; 5 | use flatbuffers::{ForwardsUOffset, Vector}; 6 | use proc_macro2::TokenStream as TokenStream2; 7 | use quote::{quote, ToTokens}; 8 | use simba::scalar::SupersetOf; 9 | 10 | /// Represents the tokenized version of the `AveragePool2D` operator. 11 | pub(crate) struct TokenAveragePool2D { 12 | pub(crate) filter_shape: (usize, usize), 13 | pub(crate) output: TokenTensor4D, 14 | pub(crate) fused_activation: TokenFusedActivation, 15 | pub(crate) view_padding: TokenTensorViewPadding, 16 | pub(crate) strides: (usize, usize), 17 | pub(crate) constants: (f32, f32), 18 | } 19 | 20 | /// Parses the [`TokenAveragePool2D`] struct from the given operator. 21 | /// 22 | /// # Arguments 23 | /// * `operator` - The model operator as an [`Operator`] 24 | /// * `tensors` - The model tensors as a [`Vector>`] 25 | /// 26 | pub(crate) fn parse( 27 | operator: Operator, 28 | tensors: Vector>, 29 | ) -> Box { 30 | let inputs = operator.inputs().unwrap(); 31 | let input_type = tensors.get(inputs.get(0) as usize).type_(); 32 | match input_type { 33 | TensorType::INT8 => Box::new(TokenAveragePool2D::::new(operator, tensors)), 34 | TensorType::UINT8 => Box::new(TokenAveragePool2D::::new(operator, tensors)), 35 | _ => unimplemented!(), 36 | } 37 | } 38 | 39 | impl TokenAveragePool2D { 40 | /// Builds the [`TokenAveragePool2D`] operator from the given model operator and tensors. 41 | /// 42 | /// # Arguments 43 | /// * `operator` - The model operator as an [`Operator`] 44 | /// * `tensors` - The model tensors as a [`Vector>`] 45 | /// 46 | pub(crate) fn new(operator: Operator, tensors: Vector>) -> Self { 47 | let inputs = operator.inputs().unwrap(); 48 | let input = TokenTensor4D::from_empty_tensor(tensors.get(inputs.get(0) as usize)); 49 | let output = TokenTensor4D::from_empty_tensor( 50 | tensors.get(operator.outputs().unwrap().get(0) as usize), 51 | ); 52 | let options = operator.builtin_options_as_pool_2_doptions().unwrap(); 53 | let constants = Self::preprocess(&input, &output); 54 | Self { 55 | filter_shape: ( 56 | options.filter_height() as usize, 57 | options.filter_width() as usize, 58 | ), 59 | output, 60 | fused_activation: options.fused_activation_function().into(), 61 | view_padding: options.padding().into(), 62 | strides: (options.stride_h() as usize, options.stride_w() as usize), 63 | constants, 64 | } 65 | } 66 | 67 | /// Pre-processes the operator, returning the tuple of constants. 68 | /// 69 | /// # Arguments 70 | /// * `input` - The input of the operator as a [`TokenTensor2D`] 71 | /// * `output` - The output of the operator as a [`TokenTensor2D`] 72 | /// 73 | fn preprocess(input: &TokenTensor4D, output: &TokenTensor4D) -> (f32, f32) { 74 | ( 75 | input.scale[0] / output.scale[0], 76 | f32::from_subset(&output.zero_point[0]) 77 | - (input.scale[0] * f32::from_subset(&input.zero_point[0])) / output.scale[0], 78 | ) 79 | } 80 | } 81 | 82 | impl ToTokens for TokenAveragePool2D { 83 | fn to_tokens(&self, tokens: &mut TokenStream2) { 84 | let (filter_shape_0, filter_shape_1) = self.filter_shape; 85 | let output_shape = &self.output.shape; 86 | let output_scale = &self.output.scale; 87 | let output_zero_point = &self.output.zero_point; 88 | let fused_activation = self.fused_activation; 89 | let view_padding = self.view_padding; 90 | let (strides_0, strides_1) = self.strides; 91 | let (constants_0, constants_1) = self.constants; 92 | 93 | let ts = quote! { 94 | let input: microflow::tensor::Tensor4D<_, #(#output_shape),*, 1usize> = 95 | microflow::ops::average_pool_2d( 96 | input, 97 | (nalgebra::Const::<#filter_shape_0>, nalgebra::Const::<#filter_shape_1>), 98 | [#(#output_scale),*], 99 | [#(#output_zero_point),*], 100 | microflow::ops::AveragePool2DOptions { 101 | fused_activation: #fused_activation, 102 | view_padding: #view_padding, 103 | strides: (#strides_0, #strides_1), 104 | }, 105 | (#constants_0, #constants_1) 106 | ); 107 | }; 108 | ts.to_tokens(tokens); 109 | } 110 | } 111 | 112 | #[cfg(test)] 113 | mod tests { 114 | use super::*; 115 | use crate::buffer::TokenBuffer4D; 116 | 117 | fn setup() -> TokenAveragePool2D { 118 | TokenAveragePool2D { 119 | filter_shape: (2, 3), 120 | output: TokenTensor4D { 121 | buffer: TokenBuffer4D::new(), 122 | shape: vec![1, 2, 3, 2], 123 | scale: vec![0.1], 124 | zero_point: vec![2], 125 | }, 126 | fused_activation: TokenFusedActivation::None, 127 | view_padding: TokenTensorViewPadding::Same, 128 | strides: (1, 1), 129 | constants: (3., 4.), 130 | } 131 | } 132 | 133 | #[test] 134 | fn average_pool_2d_preprocess() { 135 | let layer = setup(); 136 | let input = TokenTensor4D { 137 | buffer: TokenBuffer4D::new(), 138 | shape: vec![1, 2, 3, 2], 139 | scale: vec![0.5], 140 | zero_point: vec![6], 141 | }; 142 | let constants = TokenAveragePool2D::preprocess(&input, &layer.output); 143 | assert_eq!(constants.0, 5.); 144 | assert_eq!(constants.1, -28.); 145 | } 146 | 147 | #[test] 148 | fn average_pool_2d_to_tokens() { 149 | let layer = setup(); 150 | let fused_activation = layer.fused_activation; 151 | let view_padding = layer.view_padding; 152 | assert_eq!( 153 | layer.to_token_stream().to_string(), 154 | quote! { 155 | let input: microflow::tensor::Tensor4D<_, 1usize, 2usize, 3usize, 2usize, 1usize> = 156 | microflow::ops::average_pool_2d( 157 | input, 158 | (nalgebra::Const::<2usize>, nalgebra::Const::<3usize>), 159 | [0.1f32], 160 | [2i8], 161 | microflow::ops::AveragePool2DOptions { 162 | fused_activation: #fused_activation, 163 | view_padding: #view_padding, 164 | strides: (1usize, 1usize), 165 | }, 166 | (3f32, 4f32) 167 | ); 168 | } 169 | .to_string() 170 | ); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/conv_2d.rs: -------------------------------------------------------------------------------- 1 | use crate::activation::TokenFusedActivation; 2 | use crate::buffer::TokenBuffer2D; 3 | use crate::quantize::TokenQuantized; 4 | use crate::tensor::{TokenTensor2D, TokenTensor4D, TokenTensorViewPadding}; 5 | use crate::tflite_flatbuffers::tflite::{Buffer, Operator, Tensor, TensorType}; 6 | use flatbuffers::{ForwardsUOffset, Vector}; 7 | use nalgebra::DMatrix; 8 | use proc_macro2::TokenStream as TokenStream2; 9 | use quote::{format_ident, quote, ToTokens}; 10 | 11 | /// Represents the tokenized version of the `Conv2D` operator. 12 | pub(crate) struct TokenConv2D { 13 | pub(crate) filters: TokenTensor4D, 14 | pub(crate) output: TokenTensor4D, 15 | pub(crate) fused_activation: TokenFusedActivation, 16 | pub(crate) view_padding: TokenTensorViewPadding, 17 | pub(crate) strides: (usize, usize), 18 | pub(crate) constants: (TokenBuffer2D, TokenBuffer2D), 19 | pub(crate) index: usize, 20 | } 21 | 22 | /// Parses the [`TokenConv2D`] struct from the given operator. 23 | /// 24 | /// # Arguments 25 | /// * `operator` - The model operator as an [`Operator`] 26 | /// * `tensors` - The model tensors as a [`Vector>`] 27 | /// * `buffers` - The model buffers as a [`Vector>`] 28 | /// * `index` - The operator index 29 | /// 30 | pub(crate) fn parse( 31 | operator: Operator, 32 | tensors: Vector>, 33 | buffers: Vector>, 34 | index: usize, 35 | ) -> Box { 36 | let inputs = operator.inputs().unwrap(); 37 | let input_type = tensors.get(inputs.get(0) as usize).type_(); 38 | match input_type { 39 | TensorType::INT8 => Box::new(TokenConv2D::::new(operator, tensors, buffers, index)), 40 | TensorType::UINT8 => Box::new(TokenConv2D::::new(operator, tensors, buffers, index)), 41 | _ => unimplemented!(), 42 | } 43 | } 44 | 45 | impl TokenConv2D { 46 | /// Builds the [`TokenConv2D`] operator from the given model operator and tensors. 47 | /// 48 | /// # Arguments 49 | /// * `operator` - The model operator as an [`Operator`] 50 | /// * `tensors` - The model tensors as a [`Vector>`] 51 | /// * `buffers` - The model buffers as a [`Vector>`] 52 | /// * `index` - The operator index 53 | /// 54 | pub(crate) fn new( 55 | operator: Operator, 56 | tensors: Vector>, 57 | buffers: Vector>, 58 | index: usize, 59 | ) -> Self { 60 | let inputs = operator.inputs().unwrap(); 61 | let input = TokenTensor4D::from_empty_tensor(tensors.get(inputs.get(0) as usize)); 62 | let filters = 63 | TokenTensor4D::from_buffered_tensor(tensors.get(inputs.get(1) as usize), buffers); 64 | let biases = 65 | TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(2) as usize), buffers); 66 | let output = TokenTensor4D::from_empty_tensor( 67 | tensors.get(operator.outputs().unwrap().get(0) as usize), 68 | ); 69 | let options = operator.builtin_options_as_conv_2_doptions().unwrap(); 70 | let constants = Self::preprocess(&input, &filters, &biases, &output); 71 | Self { 72 | filters, 73 | output, 74 | fused_activation: options.fused_activation_function().into(), 75 | view_padding: options.padding().into(), 76 | strides: (options.stride_h() as usize, options.stride_w() as usize), 77 | constants, 78 | index, 79 | } 80 | } 81 | 82 | /// Pre-processes the operator, returning the tuple of constants. 83 | /// 84 | /// # Arguments 85 | /// * `input` - The input of the operator as a [`TokenTensor2D`] 86 | /// * `filters` - The filters of the operator as a [`TokenTensor2D`] 87 | /// * `biases` - The biases of the operator as a [`TokenTensor2D`] 88 | /// * `output` - The output of the operator as a [`TokenTensor2D`] 89 | /// 90 | fn preprocess( 91 | input: &TokenTensor4D, 92 | filters: &TokenTensor4D, 93 | biases: &TokenTensor2D, 94 | output: &TokenTensor4D, 95 | ) -> (TokenBuffer2D, TokenBuffer2D) { 96 | ( 97 | TokenBuffer2D::from(DMatrix::from_fn(filters.shape[0], 1, |b, _| { 98 | biases.scale.get(b).copied().unwrap_or(biases.scale[0]) / output.scale[0] 99 | * (biases.buffer[b] 100 | - biases 101 | .zero_point 102 | .get(b) 103 | .copied() 104 | .unwrap_or(biases.zero_point[0])) as f32 105 | })), 106 | TokenBuffer2D::from(DMatrix::from_fn(filters.scale.len(), 1, |b, _| { 107 | input.scale[0] * filters.scale[b] / output.scale[0] 108 | })), 109 | ) 110 | } 111 | } 112 | 113 | impl ToTokens for TokenConv2D { 114 | fn to_tokens(&self, tokens: &mut TokenStream2) { 115 | let filters_ident = format_ident!("filters_{}", self.index); 116 | let filters_type = self.filters.type_tokens(); 117 | let filters = &self.filters; 118 | let output_shape = &self.output.shape; 119 | let output_scale = &self.output.scale; 120 | let output_zero_point = &self.output.zero_point; 121 | let fused_activation = self.fused_activation; 122 | let view_padding = self.view_padding; 123 | let (strides_0, strides_1) = self.strides; 124 | let (constants_0, constants_1) = &self.constants; 125 | 126 | let ts = quote! { 127 | const #filters_ident: #filters_type = #filters; 128 | let input: microflow::tensor::Tensor4D<_, #(#output_shape),*, 1usize> = 129 | microflow::ops::conv_2d( 130 | input, 131 | &#filters_ident, 132 | [#(#output_scale),*], 133 | [#(#output_zero_point),*], 134 | microflow::ops::Conv2DOptions { 135 | fused_activation: #fused_activation, 136 | view_padding: #view_padding, 137 | strides: (#strides_0, #strides_1), 138 | }, 139 | (#constants_0, #constants_1) 140 | ); 141 | }; 142 | ts.to_tokens(tokens); 143 | } 144 | } 145 | 146 | #[cfg(test)] 147 | mod tests { 148 | use super::*; 149 | use crate::buffer::{TokenBuffer2D, TokenBuffer4D}; 150 | use nalgebra::dmatrix; 151 | 152 | fn setup() -> TokenConv2D { 153 | TokenConv2D { 154 | filters: TokenTensor4D { 155 | buffer: TokenBuffer4D::from(vec![ 156 | dmatrix![ 157 | vec![1, 2], vec![3, 4], vec![5, 6]; 158 | vec![7, 8], vec![9, 10], vec![11, 12] 159 | ], 160 | dmatrix![ 161 | vec![13, 14], vec![15, 16], vec![17, 18]; 162 | vec![19, 20], vec![21, 22], vec![23, 24] 163 | ], 164 | ]), 165 | shape: vec![2, 2, 3, 2], 166 | scale: vec![0.25, 0.26], 167 | zero_point: vec![27, 28], 168 | }, 169 | output: TokenTensor4D { 170 | buffer: TokenBuffer4D::new(), 171 | shape: vec![1, 2, 3, 2], 172 | scale: vec![0.29], 173 | zero_point: vec![30], 174 | }, 175 | fused_activation: TokenFusedActivation::Relu6, 176 | view_padding: TokenTensorViewPadding::Same, 177 | strides: (1, 1), 178 | constants: ( 179 | TokenBuffer2D::from(dmatrix![31., 32.]), 180 | TokenBuffer2D::from(dmatrix![33., 34.]), 181 | ), 182 | index: 0, 183 | } 184 | } 185 | 186 | #[test] 187 | fn conv_2d_preprocess() { 188 | let layer = setup(); 189 | let input = TokenTensor4D { 190 | buffer: TokenBuffer4D::new(), 191 | shape: vec![1, 2, 3, 2], 192 | scale: vec![0.35], 193 | zero_point: vec![36], 194 | }; 195 | let biases = TokenTensor2D { 196 | buffer: TokenBuffer2D::from(dmatrix![ 197 | 37; 198 | 38 199 | ]), 200 | shape: vec![2, 1], 201 | scale: vec![0.39, 0.40], 202 | zero_point: vec![41, 42], 203 | }; 204 | let constants = TokenConv2D::preprocess(&input, &layer.filters, &biases, &layer.output); 205 | assert_eq!(constants.0 .0, Some(dmatrix![-5.37931; -5.5172415])); 206 | assert_eq!(constants.1 .0, Some(dmatrix![0.30172414; 0.3137931])); 207 | } 208 | 209 | #[test] 210 | fn conv_2d_to_tokens() { 211 | let layer = setup(); 212 | let filters = &layer.filters; 213 | let fused_activation = layer.fused_activation; 214 | let view_padding = layer.view_padding; 215 | let (constants_0, constants_1) = &layer.constants; 216 | assert_eq!( 217 | layer.to_token_stream().to_string(), 218 | quote! { 219 | const filters_0: microflow::tensor::Tensor4D = #filters; 220 | let input: microflow::tensor::Tensor4D<_, 1usize, 2usize, 3usize, 2usize, 1usize> = 221 | microflow::ops::conv_2d( 222 | input, 223 | &filters_0, 224 | [0.29f32], 225 | [30i8], 226 | microflow::ops::Conv2DOptions { 227 | fused_activation: #fused_activation, 228 | view_padding: #view_padding, 229 | strides: (1usize, 1usize), 230 | }, 231 | (#constants_0, #constants_1) 232 | ); 233 | }.to_string() 234 | ); 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/depthwise_conv_2d.rs: -------------------------------------------------------------------------------- 1 | use crate::activation::TokenFusedActivation; 2 | use crate::buffer::TokenBuffer2D; 3 | use crate::quantize::TokenQuantized; 4 | use crate::tensor::{TokenTensor2D, TokenTensor4D, TokenTensorViewPadding}; 5 | use crate::tflite_flatbuffers::tflite::{Buffer, Operator, Tensor, TensorType}; 6 | use flatbuffers::{ForwardsUOffset, Vector}; 7 | use nalgebra::DMatrix; 8 | use proc_macro2::TokenStream as TokenStream2; 9 | use quote::{format_ident, quote, ToTokens}; 10 | 11 | /// Represents the tokenized version of the `DepthwiseConv2D` operator. 12 | pub(crate) struct TokenDepthwiseConv2D { 13 | pub(crate) weights: TokenTensor4D, 14 | pub(crate) output: TokenTensor4D, 15 | pub(crate) fused_activation: TokenFusedActivation, 16 | pub(crate) view_padding: TokenTensorViewPadding, 17 | pub(crate) strides: (usize, usize), 18 | pub(crate) constants: (TokenBuffer2D, TokenBuffer2D), 19 | pub(crate) index: usize, 20 | } 21 | 22 | /// Parses the [`TokenDepthwiseConv2D`] struct from the given operator. 23 | /// 24 | /// # Arguments 25 | /// * `operator` - The model operator as an [`Operator`] 26 | /// * `tensors` - The model tensors as a [`Vector>`] 27 | /// * `buffers` - The model buffers as a [`Vector>`] 28 | /// * `index` - The operator index 29 | /// 30 | pub(crate) fn parse( 31 | operator: Operator, 32 | tensors: Vector>, 33 | buffers: Vector>, 34 | index: usize, 35 | ) -> Box { 36 | let inputs = operator.inputs().unwrap(); 37 | let input_type = tensors.get(inputs.get(0) as usize).type_(); 38 | match input_type { 39 | TensorType::INT8 => Box::new(TokenDepthwiseConv2D::::new( 40 | operator, tensors, buffers, index, 41 | )), 42 | TensorType::UINT8 => Box::new(TokenDepthwiseConv2D::::new( 43 | operator, tensors, buffers, index, 44 | )), 45 | _ => unimplemented!(), 46 | } 47 | } 48 | 49 | impl TokenDepthwiseConv2D { 50 | /// Builds the [`TokenDepthwiseConv2D`] operator from the given model operator and tensors. 51 | /// 52 | /// # Arguments 53 | /// * `operator` - The model operator as an [`Operator`] 54 | /// * `tensors` - The model tensors as a [`Vector>`] 55 | /// * `buffers` - The model buffers as a [`Vector>`] 56 | /// * `index` - The operator index 57 | /// 58 | pub(crate) fn new( 59 | operator: Operator, 60 | tensors: Vector>, 61 | buffers: Vector>, 62 | index: usize, 63 | ) -> Self { 64 | let inputs = operator.inputs().unwrap(); 65 | let input = TokenTensor4D::from_empty_tensor(tensors.get(inputs.get(0) as usize)); 66 | let weights = 67 | TokenTensor4D::from_buffered_tensor(tensors.get(inputs.get(1) as usize), buffers); 68 | let biases = 69 | TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(2) as usize), buffers); 70 | let output = TokenTensor4D::from_empty_tensor( 71 | tensors.get(operator.outputs().unwrap().get(0) as usize), 72 | ); 73 | let options = operator 74 | .builtin_options_as_depthwise_conv_2_doptions() 75 | .unwrap(); 76 | let constants = Self::preprocess(&input, &weights, &biases, &output); 77 | Self { 78 | weights, 79 | output, 80 | fused_activation: options.fused_activation_function().into(), 81 | view_padding: options.padding().into(), 82 | strides: (options.stride_h() as usize, options.stride_w() as usize), 83 | constants, 84 | index, 85 | } 86 | } 87 | 88 | /// Pre-processes the operator, returning the tuple of constants. 89 | /// 90 | /// # Arguments 91 | /// * `input` - The input of the operator as a [`TokenTensor2D`] 92 | /// * `weights` - The weights of the operator as a [`TokenTensor2D`] 93 | /// * `biases` - The biases of the operator as a [`TokenTensor2D`] 94 | /// * `output` - The output of the operator as a [`TokenTensor2D`] 95 | /// 96 | fn preprocess( 97 | input: &TokenTensor4D, 98 | weights: &TokenTensor4D, 99 | biases: &TokenTensor2D, 100 | output: &TokenTensor4D, 101 | ) -> (TokenBuffer2D, TokenBuffer2D) { 102 | ( 103 | TokenBuffer2D::from(DMatrix::from_fn(weights.shape[3], 1, |c, _| { 104 | biases.scale.get(c).copied().unwrap_or(biases.scale[0]) / output.scale[0] 105 | * (biases.buffer[c] 106 | - biases 107 | .zero_point 108 | .get(c) 109 | .copied() 110 | .unwrap_or(biases.zero_point[0])) as f32 111 | })), 112 | TokenBuffer2D::from(DMatrix::from_fn(weights.scale.len(), 1, |c, _| { 113 | input.scale[0] * weights.scale[c] / output.scale[0] 114 | })), 115 | ) 116 | } 117 | } 118 | 119 | impl ToTokens for TokenDepthwiseConv2D { 120 | fn to_tokens(&self, tokens: &mut TokenStream2) { 121 | let weights_ident = format_ident!("weights_{}", self.index); 122 | let weights_type = self.weights.type_tokens(); 123 | let weights = &self.weights; 124 | let output_shape = &self.output.shape; 125 | let output_scale = &self.output.scale; 126 | let output_zero_point = &self.output.zero_point; 127 | let fused_activation = self.fused_activation; 128 | let view_padding = self.view_padding; 129 | let (strides_0, strides_1) = self.strides; 130 | let (constants_0, constants_1) = &self.constants; 131 | 132 | let ts = quote! { 133 | const #weights_ident: #weights_type = #weights; 134 | let input: microflow::tensor::Tensor4D<_, #(#output_shape),*, 1usize> = 135 | microflow::ops::depthwise_conv_2d( 136 | input, 137 | &#weights_ident, 138 | [#(#output_scale),*], 139 | [#(#output_zero_point),*], 140 | microflow::ops::DepthwiseConv2DOptions { 141 | fused_activation: #fused_activation, 142 | view_padding: #view_padding, 143 | strides: (#strides_0, #strides_1), 144 | }, 145 | (#constants_0, #constants_1) 146 | ); 147 | }; 148 | ts.to_tokens(tokens); 149 | } 150 | } 151 | 152 | #[cfg(test)] 153 | mod tests { 154 | use super::*; 155 | use crate::buffer::{TokenBuffer2D, TokenBuffer4D}; 156 | use nalgebra::dmatrix; 157 | 158 | fn setup() -> TokenDepthwiseConv2D { 159 | TokenDepthwiseConv2D { 160 | weights: TokenTensor4D { 161 | buffer: TokenBuffer4D::from(vec![dmatrix![ 162 | vec![1, 2], vec![3, 4], vec![5, 6]; 163 | vec![7, 8], vec![9, 10], vec![11, 12] 164 | ]]), 165 | shape: vec![1, 2, 3, 2], 166 | scale: vec![0.13, 0.14], 167 | zero_point: vec![15, 16], 168 | }, 169 | output: TokenTensor4D { 170 | buffer: TokenBuffer4D::new(), 171 | shape: vec![1, 2, 3, 2], 172 | scale: vec![0.17], 173 | zero_point: vec![18], 174 | }, 175 | fused_activation: TokenFusedActivation::Relu6, 176 | view_padding: TokenTensorViewPadding::Same, 177 | strides: (1, 1), 178 | constants: ( 179 | TokenBuffer2D::from(dmatrix![19., 20.]), 180 | TokenBuffer2D::from(dmatrix![21., 22.]), 181 | ), 182 | index: 0, 183 | } 184 | } 185 | 186 | #[test] 187 | fn depthwise_conv_2d_preprocess() { 188 | let layer = setup(); 189 | let input = TokenTensor4D { 190 | buffer: TokenBuffer4D::new(), 191 | shape: vec![1, 2, 3, 2], 192 | scale: vec![0.23], 193 | zero_point: vec![24], 194 | }; 195 | let biases = TokenTensor2D { 196 | buffer: TokenBuffer2D::from(dmatrix![ 197 | 25; 198 | 26 199 | ]), 200 | shape: vec![2, 1], 201 | scale: vec![0.27, 0.28], 202 | zero_point: vec![29, 30], 203 | }; 204 | let constants = 205 | TokenDepthwiseConv2D::preprocess(&input, &layer.weights, &biases, &layer.output); 206 | assert_eq!(constants.0 .0, Some(dmatrix![-6.3529415; -6.5882354])); 207 | assert_eq!(constants.1 .0, Some(dmatrix![0.17588235; 0.18941177])) 208 | } 209 | 210 | #[test] 211 | fn depthwise_conv_2d_to_tokens() { 212 | let layer = setup(); 213 | let weights = &layer.weights; 214 | let fused_activation = layer.fused_activation; 215 | let view_padding = layer.view_padding; 216 | let (constants_0, constants_1) = &layer.constants; 217 | assert_eq!( 218 | layer.to_token_stream().to_string(), 219 | quote! { 220 | const weights_0: microflow::tensor::Tensor4D = #weights; 221 | let input: microflow::tensor::Tensor4D<_, 1usize, 2usize, 3usize, 2usize, 1usize> = 222 | microflow::ops::depthwise_conv_2d( 223 | input, 224 | &weights_0, 225 | [0.17f32], 226 | [18i8], 227 | microflow::ops::DepthwiseConv2DOptions { 228 | fused_activation: #fused_activation, 229 | view_padding: #view_padding, 230 | strides: (1usize, 1usize), 231 | }, 232 | (#constants_0, #constants_1) 233 | ); 234 | }.to_string() 235 | ); 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/fully_connected.rs: -------------------------------------------------------------------------------- 1 | use flatbuffers::{ForwardsUOffset, Vector}; 2 | use nalgebra::{convert_ref, DMatrix}; 3 | use proc_macro2::TokenStream as TokenStream2; 4 | use quote::{format_ident, quote, ToTokens}; 5 | use simba::scalar::SupersetOf; 6 | 7 | use crate::activation::TokenFusedActivation; 8 | use crate::buffer::TokenBuffer2D; 9 | use crate::quantize::TokenQuantized; 10 | use crate::tensor::TokenTensor2D; 11 | use crate::tflite_flatbuffers::tflite::{Buffer, Operator, Tensor, TensorType}; 12 | 13 | /// Represents the tokenized version of the `FullyConnected` operator. 14 | pub(crate) struct TokenFullyConnected { 15 | pub(crate) weights: TokenTensor2D, 16 | pub(crate) output: TokenTensor2D, 17 | pub(crate) fused_activation: TokenFusedActivation, 18 | pub(crate) constants: (TokenBuffer2D, f32, TokenBuffer2D, i32), 19 | pub(crate) index: usize, 20 | pub(crate) reshape: bool, 21 | } 22 | 23 | /// Parses the [`TokenFullyConnected`] struct from the given operator. 24 | /// 25 | /// # Arguments 26 | /// * `operator` - The model operator as an [`Operator`] 27 | /// * `tensors` - The model tensors as a [`Vector>`] 28 | /// * `buffers` - The model buffers as a [`Vector>`] 29 | /// * `index` - The operator index 30 | /// 31 | pub(crate) fn parse( 32 | operator: Operator, 33 | tensors: Vector>, 34 | buffers: Vector>, 35 | index: usize, 36 | ) -> Box { 37 | let inputs = operator.inputs().unwrap(); 38 | let input_type = tensors.get(inputs.get(0) as usize).type_(); 39 | match input_type { 40 | TensorType::INT8 => Box::new(TokenFullyConnected::::new( 41 | operator, tensors, buffers, index, 42 | )), 43 | TensorType::UINT8 => Box::new(TokenFullyConnected::::new( 44 | operator, tensors, buffers, index, 45 | )), 46 | _ => unimplemented!(), 47 | } 48 | } 49 | 50 | impl TokenFullyConnected { 51 | /// Builds the [`TokenFullyConnected`] operator from the given model operator and tensors. 52 | /// 53 | /// # Arguments 54 | /// * `operator` - The model operator as an [`Operator`] 55 | /// * `tensors` - The model tensors as a [`Vector>`] 56 | /// * `buffers` - The model buffers as a [`Vector>`] 57 | /// * `index` - The operator index 58 | /// 59 | pub(crate) fn new( 60 | operator: Operator, 61 | tensors: Vector>, 62 | buffers: Vector>, 63 | index: usize, 64 | ) -> Self { 65 | let inputs = operator.inputs().unwrap(); 66 | let input = TokenTensor2D::from_empty_tensor(tensors.get(inputs.get(0) as usize)); 67 | let weights = 68 | TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(1) as usize), buffers); 69 | let biases = 70 | TokenTensor2D::from_buffered_tensor(tensors.get(inputs.get(2) as usize), buffers); 71 | let output = TokenTensor2D::from_empty_tensor( 72 | tensors.get(operator.outputs().unwrap().get(0) as usize), 73 | ); 74 | let options = operator 75 | .builtin_options_as_fully_connected_options() 76 | .unwrap(); 77 | let constants = Self::preprocess(&input, &weights, &biases, &output); 78 | Self { 79 | weights, 80 | output, 81 | fused_activation: options.fused_activation_function().into(), 82 | reshape: input.shape.len() != 2, 83 | constants, 84 | index, 85 | } 86 | } 87 | 88 | /// Pre-processes the operator, returning the tuple of constants. 89 | /// 90 | /// # Arguments 91 | /// * `input` - The input of the operator as a [`TokenTensor2D`] 92 | /// * `weights` - The weights of the operator as a [`TokenTensor2D`] 93 | /// * `biases` - The biases of the operator as a [`TokenTensor2D`] 94 | /// * `output` - The output of the operator as a [`TokenTensor2D`] 95 | /// 96 | fn preprocess( 97 | input: &TokenTensor2D, 98 | weights: &TokenTensor2D, 99 | biases: &TokenTensor2D, 100 | output: &TokenTensor2D, 101 | ) -> (TokenBuffer2D, f32, TokenBuffer2D, i32) { 102 | ( 103 | TokenBuffer2D::from( 104 | biases.scale[0] / output.scale[0] 105 | * biases 106 | .buffer 107 | .add_scalar(-biases.zero_point[0]) 108 | .cast::(), 109 | ), 110 | input.scale[0] * weights.scale[0] / output.scale[0], 111 | TokenBuffer2D::from(DMatrix::from_rows(&[ 112 | convert_ref::, DMatrix>(&weights.buffer).row_sum() 113 | * i32::from_subset(&input.zero_point[0]), 114 | ])), 115 | input.shape[1] as i32 116 | * i32::from_subset(&input.zero_point[0]) 117 | * i32::from_subset(&weights.zero_point[0]), 118 | ) 119 | } 120 | } 121 | 122 | impl ToTokens for TokenFullyConnected { 123 | fn to_tokens(&self, tokens: &mut TokenStream2) { 124 | let reshape = if self.reshape { 125 | quote!(.into()) 126 | } else { 127 | quote!() 128 | }; 129 | let weights_ident = format_ident!("weights_{}", self.index); 130 | let weights_type = self.weights.type_tokens(); 131 | let weights = &self.weights; 132 | let output_shape = &self.output.shape; 133 | let output_scale = self.output.scale[0]; 134 | let output_zero_point = self.output.zero_point[0]; 135 | let fused_activation = self.fused_activation; 136 | let (constants_0, constants_1, constants_2, constants_3) = &self.constants; 137 | 138 | let ts = quote! { 139 | const #weights_ident: #weights_type = #weights; 140 | let input: microflow::tensor::Tensor2D<_, #(#output_shape),*, 1usize> = 141 | microflow::ops::fully_connected( 142 | input #reshape, 143 | &#weights_ident, 144 | [#output_scale], 145 | [#output_zero_point], 146 | microflow::ops::FullyConnectedOptions { 147 | fused_activation: #fused_activation, 148 | }, 149 | (#constants_0, #constants_1, #constants_2, #constants_3) 150 | ); 151 | }; 152 | ts.to_tokens(tokens); 153 | } 154 | } 155 | 156 | #[cfg(test)] 157 | mod tests { 158 | use nalgebra::dmatrix; 159 | 160 | use super::*; 161 | 162 | fn setup() -> TokenFullyConnected { 163 | TokenFullyConnected { 164 | weights: TokenTensor2D { 165 | buffer: TokenBuffer2D::from(dmatrix![ 166 | 1, 2, 3; 167 | 4, 5, 6 168 | ]), 169 | shape: vec![2, 3], 170 | scale: vec![0.7], 171 | zero_point: vec![8], 172 | }, 173 | output: TokenTensor2D { 174 | buffer: TokenBuffer2D::new(), 175 | shape: vec![1, 3], 176 | scale: vec![0.9], 177 | zero_point: vec![10], 178 | }, 179 | fused_activation: TokenFusedActivation::Relu, 180 | constants: ( 181 | TokenBuffer2D::from(dmatrix![11., 12.]), 182 | 13., 183 | TokenBuffer2D::from(dmatrix![14, 15]), 184 | 16, 185 | ), 186 | index: 0, 187 | reshape: false, 188 | } 189 | } 190 | 191 | #[test] 192 | fn fully_connected_preprocess() { 193 | let layer = setup(); 194 | let input = TokenTensor2D { 195 | buffer: TokenBuffer2D::new(), 196 | shape: vec![1, 2], 197 | scale: vec![0.17], 198 | zero_point: vec![18], 199 | }; 200 | let biases = TokenTensor2D { 201 | buffer: TokenBuffer2D::from(dmatrix![ 202 | 19; 203 | 20; 204 | 21 205 | ]), 206 | shape: vec![3, 1], 207 | scale: vec![0.22], 208 | zero_point: vec![23], 209 | }; 210 | let constants = 211 | TokenFullyConnected::preprocess(&input, &layer.weights, &biases, &layer.output); 212 | assert_eq!( 213 | constants.0 .0, 214 | Some(dmatrix![-0.9777778; -0.73333335; -0.4888889]) 215 | ); 216 | assert_eq!(constants.1, 0.13222224); 217 | assert_eq!(constants.2 .0, Some(dmatrix![90, 126, 162])); 218 | assert_eq!(constants.3, 288); 219 | } 220 | 221 | #[test] 222 | fn fully_connected_to_tokens() { 223 | let layer = setup(); 224 | let weights = &layer.weights; 225 | let fused_activation = layer.fused_activation; 226 | let constants_0 = &layer.constants.0; 227 | let constants_2 = &layer.constants.2; 228 | assert_eq!( 229 | layer.to_token_stream().to_string(), 230 | quote! { 231 | const weights_0: microflow::tensor::Tensor2D = #weights; 232 | let input: microflow::tensor::Tensor2D<_, 1usize, 3usize, 1usize> = 233 | microflow::ops::fully_connected( 234 | input, 235 | &weights_0, 236 | [0.9f32], 237 | [10i8], 238 | microflow::ops::FullyConnectedOptions { 239 | fused_activation: #fused_activation, 240 | }, 241 | (#constants_0, 13f32, #constants_2, 16i32) 242 | ); 243 | } 244 | .to_string() 245 | ); 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod average_pool_2d; 2 | pub(crate) mod conv_2d; 3 | pub(crate) mod depthwise_conv_2d; 4 | pub(crate) mod fully_connected; 5 | pub(crate) mod reshape; 6 | pub(crate) mod softmax; 7 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/reshape.rs: -------------------------------------------------------------------------------- 1 | use crate::tflite_flatbuffers::tflite::{Operator, Tensor}; 2 | use flatbuffers::{ForwardsUOffset, Vector}; 3 | use proc_macro2::TokenStream as TokenStream2; 4 | use quote::{quote, ToTokens}; 5 | 6 | /// Represents the tokenized version of the `Reshape` operator. 7 | pub(crate) struct TokenReshape { 8 | pub(crate) output_shape: Vec, 9 | } 10 | 11 | /// Parses the [`TokenReshape`] struct from the given operator. 12 | /// 13 | /// # Arguments 14 | /// * `operator` - The model operator as an [`Operator`] 15 | /// * `tensors` - The model tensors as a [`Vector>`] 16 | /// 17 | pub(crate) fn parse( 18 | operator: Operator, 19 | tensors: Vector>, 20 | ) -> Box { 21 | Box::new(TokenReshape::new(operator, tensors)) 22 | } 23 | 24 | impl TokenReshape { 25 | /// Builds the [`TokenReshape`] operator from the given model operator and tensors. 26 | /// 27 | /// # Arguments 28 | /// * `operator` - The model operator as an [`Operator`] 29 | /// * `tensors` - The model tensors as a [`Vector>`] 30 | /// 31 | pub(crate) fn new(operator: Operator, tensors: Vector>) -> Self { 32 | let output_shape: Vec<_> = tensors 33 | .get(operator.outputs().unwrap().get(0) as usize) 34 | .shape() 35 | .unwrap() 36 | .iter() 37 | .map(|e| e as usize) 38 | .collect(); 39 | Self { output_shape } 40 | } 41 | } 42 | 43 | impl ToTokens for TokenReshape { 44 | fn to_tokens(&self, tokens: &mut TokenStream2) { 45 | let output_shape = &self.output_shape; 46 | let output_tensor = match output_shape.len() { 47 | 2 => quote!(Tensor2D), 48 | 4 => quote!(Tensor4D), 49 | _ => unimplemented!(), 50 | }; 51 | 52 | let ts = quote! { 53 | let input: microflow::tensor::#output_tensor<_, #(#output_shape),*, 1usize> = 54 | microflow::ops::reshape(input); 55 | }; 56 | ts.to_tokens(tokens) 57 | } 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | 64 | fn setup() -> TokenReshape { 65 | TokenReshape { 66 | output_shape: vec![2, 3], 67 | } 68 | } 69 | 70 | #[test] 71 | fn reshape_to_tokens() { 72 | let layer = setup(); 73 | assert_eq!( 74 | layer.to_token_stream().to_string(), 75 | quote! { 76 | let input: microflow::tensor::Tensor2D<_, 2usize, 3usize, 1usize> = 77 | microflow::ops::reshape(input); 78 | } 79 | .to_string() 80 | ) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /microflow-macros/src/ops/softmax.rs: -------------------------------------------------------------------------------- 1 | use crate::quantize::TokenQuantized; 2 | use crate::tensor::TokenTensor2D; 3 | use crate::tflite_flatbuffers::tflite::{Operator, Tensor, TensorType}; 4 | use flatbuffers::{ForwardsUOffset, Vector}; 5 | use proc_macro2::TokenStream as TokenStream2; 6 | use quote::{quote, ToTokens}; 7 | 8 | /// Represents the tokenized version of the `Softmax` operator. 9 | pub(crate) struct TokenSoftmax { 10 | pub(crate) output: TokenTensor2D, 11 | } 12 | 13 | /// Parses the [`TokenSoftmax`] struct from the given operator. 14 | /// 15 | /// # Arguments 16 | /// * `operator` - The model operator as an [`Operator`] 17 | /// * `tensors` - The model tensors as a [`Vector>`] 18 | /// 19 | pub(crate) fn parse( 20 | operator: Operator, 21 | tensors: Vector>, 22 | ) -> Box { 23 | let inputs = operator.inputs().unwrap(); 24 | let input_type = tensors.get(inputs.get(0) as usize).type_(); 25 | match input_type { 26 | TensorType::INT8 => Box::new(TokenSoftmax::::new(operator, tensors)), 27 | TensorType::UINT8 => Box::new(TokenSoftmax::::new(operator, tensors)), 28 | _ => unimplemented!(), 29 | } 30 | } 31 | 32 | impl TokenSoftmax { 33 | /// Builds the [`TokenSoftmax`] operator from the given model operator and tensors. 34 | /// 35 | /// # Arguments 36 | /// * `operator` - The model operator as an [`Operator`] 37 | /// * `tensors` - The model tensors as a [`Vector>`] 38 | /// 39 | pub(crate) fn new(operator: Operator, tensors: Vector>) -> Self { 40 | let output = TokenTensor2D::from_empty_tensor( 41 | tensors.get(operator.outputs().unwrap().get(0) as usize), 42 | ); 43 | Self { output } 44 | } 45 | } 46 | 47 | impl ToTokens for TokenSoftmax { 48 | fn to_tokens(&self, tokens: &mut TokenStream2) { 49 | let output_shape = &self.output.shape; 50 | let output_scale = &self.output.scale; 51 | let output_zero_point = &self.output.zero_point; 52 | 53 | let ts = quote! { 54 | let input: microflow::tensor::Tensor2D<_, #(#output_shape),*, 1usize> = 55 | microflow::ops::softmax(input, [#(#output_scale),*], [#(#output_zero_point),*]); 56 | }; 57 | ts.to_tokens(tokens); 58 | } 59 | } 60 | 61 | #[cfg(test)] 62 | mod tests { 63 | use super::*; 64 | use crate::buffer::TokenBuffer2D; 65 | 66 | fn setup() -> TokenSoftmax { 67 | TokenSoftmax { 68 | output: TokenTensor2D { 69 | buffer: TokenBuffer2D::new(), 70 | shape: vec![2, 3], 71 | scale: vec![0.3], 72 | zero_point: vec![4], 73 | }, 74 | } 75 | } 76 | 77 | #[test] 78 | fn softmax_to_tokens() { 79 | let layer = setup(); 80 | assert_eq!( 81 | layer.to_token_stream().to_string(), 82 | quote! { 83 | let input: microflow::tensor::Tensor2D<_, 2usize, 3usize, 1usize> = 84 | microflow::ops::softmax(input, [0.3f32], [4i8]); 85 | } 86 | .to_string() 87 | ) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /microflow-macros/src/quantize.rs: -------------------------------------------------------------------------------- 1 | use byterepr::ByteReprNum; 2 | use nalgebra::Scalar; 3 | use quote::ToTokens; 4 | use simba::scalar::SubsetOf; 5 | 6 | /// Represents the trait to constrain a type to be quantized and tokenized. 7 | pub(crate) trait TokenQuantized: 8 | Scalar + ByteReprNum + ToTokens + SubsetOf + SubsetOf + SubsetOf 9 | { 10 | } 11 | 12 | impl + SubsetOf + SubsetOf> 13 | TokenQuantized for T 14 | { 15 | } 16 | -------------------------------------------------------------------------------- /microflow-macros/src/tensor.rs: -------------------------------------------------------------------------------- 1 | use std::any::type_name; 2 | use std::mem::size_of; 3 | 4 | use flatbuffers::{ForwardsUOffset, Vector}; 5 | use nalgebra::DMatrix; 6 | use proc_macro2::TokenStream as TokenStream2; 7 | use quote::{quote, ToTokens}; 8 | use simba::scalar::SupersetOf; 9 | use syn::{parse_str, Type}; 10 | 11 | use crate::buffer::{TokenBuffer2D, TokenBuffer4D}; 12 | use crate::quantize::TokenQuantized; 13 | use crate::tflite_flatbuffers::tflite::{Buffer, Padding, Tensor}; 14 | 15 | /// Represents the tokenized version of the `TensorViewPadding`. 16 | #[derive(Copy, Clone)] 17 | pub(crate) enum TokenTensorViewPadding { 18 | Same, 19 | Valid, 20 | } 21 | 22 | /// Represents the tokenized version of the `Tensor2D`. 23 | #[derive(Debug)] 24 | pub(crate) struct TokenTensor2D { 25 | pub(crate) buffer: TokenBuffer2D, 26 | pub(crate) shape: Vec, 27 | pub(crate) scale: Vec, 28 | pub(crate) zero_point: Vec, 29 | } 30 | 31 | /// Represents the tokenized version of the `Tensor4D`. 32 | #[derive(Debug)] 33 | pub(crate) struct TokenTensor4D { 34 | pub(crate) buffer: TokenBuffer4D, 35 | pub(crate) shape: Vec, 36 | pub(crate) scale: Vec, 37 | pub(crate) zero_point: Vec, 38 | } 39 | 40 | impl ToTokens for TokenTensorViewPadding { 41 | fn to_tokens(&self, tokens: &mut TokenStream2) { 42 | match self { 43 | Self::Same => quote!(microflow::tensor::TensorViewPadding::Same), 44 | Self::Valid => quote!(microflow::tensor::TensorViewPadding::Valid), 45 | } 46 | .to_tokens(tokens); 47 | } 48 | } 49 | 50 | impl From for TokenTensorViewPadding { 51 | fn from(padding: Padding) -> Self { 52 | match padding { 53 | Padding::SAME => Self::Same, 54 | Padding::VALID => Self::Valid, 55 | _ => unreachable!(), 56 | } 57 | } 58 | } 59 | 60 | impl TokenTensor2D { 61 | /// Builds a [`TokenTensor2D`] from an empty [`Tensor`]. 62 | /// 63 | /// # Arguments 64 | /// * `tensor` - The empty model tensor as a [`Tensor`] 65 | /// 66 | pub fn from_empty_tensor(tensor: Tensor) -> Self { 67 | let mut shape: Vec<_> = tensor.shape().unwrap().iter().map(|e| e as usize).collect(); 68 | if shape.len() == 1 { 69 | shape.insert(0, 1); 70 | } 71 | Self { 72 | buffer: TokenBuffer2D::new(), 73 | shape, 74 | scale: tensor 75 | .quantization() 76 | .unwrap() 77 | .scale() 78 | .unwrap() 79 | .iter() 80 | .collect(), 81 | zero_point: tensor 82 | .quantization() 83 | .unwrap() 84 | .zero_point() 85 | .unwrap() 86 | .iter() 87 | .map(|e| i64::to_subset_unchecked(&e)) 88 | .collect(), 89 | } 90 | } 91 | 92 | /// Builds a [`TokenTensor2D`] from a [`Tensor`] with a buffer. 93 | /// 94 | /// # Arguments 95 | /// * `tensor` - The model tensor as a [`Tensor`] 96 | /// * `buffer` - The model buffers as a [`Vector>`] 97 | /// 98 | pub fn from_buffered_tensor(tensor: Tensor, buffers: Vector>) -> Self { 99 | let mut token_tensor = Self::from_empty_tensor(tensor); 100 | let matrix = DMatrix::from_iterator( 101 | token_tensor.shape[1], 102 | token_tensor.shape[0], 103 | buffers 104 | .get(tensor.buffer() as usize) 105 | .data() 106 | .unwrap() 107 | .bytes() 108 | .chunks_exact(size_of::()) 109 | .map(|e| T::from_le_bytes(e)), 110 | ); 111 | token_tensor.buffer = TokenBuffer2D::from(matrix); 112 | token_tensor.shape.swap(0, 1); 113 | token_tensor 114 | } 115 | 116 | /// Returns the tokens of the [`Self`] type. 117 | pub fn type_tokens(&self) -> TokenStream2 { 118 | let ty = parse_str::(type_name::()).unwrap(); 119 | let shape = &self.shape; 120 | let quants = self.scale.len(); 121 | quote!(microflow::tensor::Tensor2D<#ty, #(#shape),*, #quants>) 122 | } 123 | } 124 | 125 | impl ToTokens for TokenTensor2D { 126 | fn to_tokens(&self, tokens: &mut TokenStream2) { 127 | let buffer = &self.buffer; 128 | let scale = &self.scale; 129 | let zero_point = &self.zero_point; 130 | 131 | let ts = quote! { 132 | microflow::tensor::Tensor2D::new( 133 | #buffer, 134 | [#(#scale),*], 135 | [#(#zero_point),*] 136 | ) 137 | }; 138 | ts.to_tokens(tokens); 139 | } 140 | } 141 | 142 | impl TokenTensor4D { 143 | /// Builds a [`TokenTensor4D`] from an empty [`Tensor`]. 144 | /// 145 | /// # Arguments 146 | /// * `tensor` - The empty model tensor as a [`Tensor`] 147 | /// 148 | pub fn from_empty_tensor(tensor: Tensor) -> Self { 149 | Self { 150 | buffer: TokenBuffer4D::new(), 151 | shape: tensor.shape().unwrap().iter().map(|e| e as usize).collect(), 152 | scale: tensor 153 | .quantization() 154 | .unwrap() 155 | .scale() 156 | .unwrap() 157 | .iter() 158 | .collect(), 159 | zero_point: tensor 160 | .quantization() 161 | .unwrap() 162 | .zero_point() 163 | .unwrap() 164 | .iter() 165 | .map(|e| i64::to_subset_unchecked(&e)) 166 | .collect(), 167 | } 168 | } 169 | 170 | /// Builds a [`TokenTensor4D`] from a [`Tensor`] with a buffer. 171 | /// 172 | /// # Arguments 173 | /// * `tensor` - The model tensor as a [`Tensor`] 174 | /// * `buffer` - The model buffers as a [`Vector>`] 175 | /// 176 | pub fn from_buffered_tensor(tensor: Tensor, buffers: Vector>) -> Self { 177 | let mut t = Self::from_empty_tensor(tensor); 178 | let len = t.shape.iter().product::() * size_of::(); 179 | let data = buffers 180 | .get(tensor.buffer() as usize) 181 | .data() 182 | .unwrap() 183 | .bytes() 184 | .chunks_exact(len / t.shape[0]) 185 | .map(|m| { 186 | DMatrix::from_row_iterator( 187 | t.shape[1], 188 | t.shape[2], 189 | m.chunks_exact(len / (t.shape[0] * t.shape[1] * t.shape[2])) 190 | .map(|v| { 191 | v.chunks_exact(size_of::()) 192 | .map(|e| T::from_le_bytes(e)) 193 | .collect::>() 194 | }), 195 | ) 196 | }) 197 | .collect::>(); 198 | t.buffer = TokenBuffer4D::from(data); 199 | t 200 | } 201 | 202 | /// Returns the tokens of the [`Self`] type. 203 | pub fn type_tokens(&self) -> TokenStream2 { 204 | let ty = parse_str::(type_name::()).unwrap(); 205 | let shape = &self.shape; 206 | let quants = self.scale.len(); 207 | quote!(microflow::tensor::Tensor4D<#ty, #(#shape),*, #quants>) 208 | } 209 | } 210 | 211 | impl ToTokens for TokenTensor4D { 212 | fn to_tokens(&self, tokens: &mut TokenStream2) { 213 | let buffer = &self.buffer; 214 | let scale = &self.scale; 215 | let zero_point = &self.zero_point; 216 | 217 | let ts = quote! { 218 | microflow::tensor::Tensor4D::new( 219 | #buffer, 220 | [#(#scale),*], 221 | [#(#zero_point),*] 222 | ) 223 | }; 224 | ts.to_tokens(tokens); 225 | } 226 | } 227 | 228 | #[cfg(test)] 229 | mod tests { 230 | use nalgebra::dmatrix; 231 | 232 | use super::*; 233 | 234 | fn setup_2d() -> TokenTensor2D { 235 | TokenTensor2D { 236 | buffer: TokenBuffer2D::from(dmatrix![ 237 | 1, 2, 3; 238 | 4, 5, 6 239 | ]), 240 | shape: vec![2, 3], 241 | scale: vec![0.7], 242 | zero_point: vec![8], 243 | } 244 | } 245 | 246 | fn setup_4d() -> TokenTensor4D { 247 | TokenTensor4D { 248 | buffer: TokenBuffer4D::from(vec![ 249 | dmatrix![ 250 | vec![9, 10], vec![11, 12], vec![13, 14]; 251 | vec![15, 16], vec![17, 18], vec![19, 20] 252 | ], 253 | dmatrix![ 254 | vec![21, 22], vec![23, 24], vec![25, 26]; 255 | vec![27, 28], vec![29, 30], vec![31, 32] 256 | ], 257 | ]), 258 | shape: vec![2, 2, 3, 2], 259 | scale: vec![0.33, 0.34], 260 | zero_point: vec![35, 36], 261 | } 262 | } 263 | 264 | #[test] 265 | fn view_padding_to_tokens() { 266 | let padding = TokenTensorViewPadding::from(Padding::VALID); 267 | assert_eq!( 268 | padding.to_token_stream().to_string(), 269 | quote!(microflow::tensor::TensorViewPadding::Valid).to_string() 270 | ); 271 | } 272 | 273 | #[test] 274 | fn tensor_2d_type_tokens() { 275 | let tensor = setup_2d(); 276 | assert_eq!( 277 | tensor.type_tokens().to_string(), 278 | quote!(microflow::tensor::Tensor2D).to_string(), 279 | ) 280 | } 281 | 282 | #[test] 283 | fn tensor_2d_to_tokens() { 284 | let tensor = setup_2d(); 285 | let buffer = &tensor.buffer; 286 | assert_eq!( 287 | tensor.to_token_stream().to_string(), 288 | quote! { 289 | microflow::tensor::Tensor2D::new( 290 | #buffer, 291 | [0.7f32], 292 | [8i8] 293 | ) 294 | } 295 | .to_string() 296 | ); 297 | } 298 | 299 | #[test] 300 | fn tensor_4d_type_tokens() { 301 | let tensor = setup_4d(); 302 | assert_eq!( 303 | tensor.type_tokens().to_string(), 304 | quote!(microflow::tensor::Tensor4D) 305 | .to_string(), 306 | ) 307 | } 308 | 309 | #[test] 310 | fn tensor_4d_to_tokens() { 311 | let tensor = setup_4d(); 312 | let buffer = &tensor.buffer; 313 | assert_eq!( 314 | tensor.to_token_stream().to_string(), 315 | quote! { 316 | microflow::tensor::Tensor4D::new( 317 | #buffer, 318 | [0.33f32, 0.34f32], 319 | [35i8, 36i8] 320 | ) 321 | } 322 | .to_string() 323 | ); 324 | } 325 | } 326 | -------------------------------------------------------------------------------- /models/person_detect.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/models/person_detect.tflite -------------------------------------------------------------------------------- /models/sine.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/models/sine.tflite -------------------------------------------------------------------------------- /models/speech.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/models/speech.tflite -------------------------------------------------------------------------------- /samples/no.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/no.wav -------------------------------------------------------------------------------- /samples/no_person.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/no_person.bmp -------------------------------------------------------------------------------- /samples/person.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/person.bmp -------------------------------------------------------------------------------- /samples/yes.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matteocarnelos/microflow-rs/2fb39afc20f568f08990be256f861b4aef60e5c7/samples/yes.wav -------------------------------------------------------------------------------- /src/activation.rs: -------------------------------------------------------------------------------- 1 | use crate::quantize::{quantize, Quantized}; 2 | use core::cmp::{max, min}; 3 | use libm::expf; 4 | 5 | /// Represents the supported activation functions. 6 | pub enum FusedActivation { 7 | /// The identity activation function. 8 | None, 9 | /// The Rectified Linear Unit (ReLU) function. 10 | Relu, 11 | /// The Rectified Linear Unit 6 (ReLU6) function. 12 | Relu6, 13 | } 14 | 15 | /// Performs the Rectified Linear Unit (ReLU) activation function. 16 | /// 17 | /// # Arguments 18 | /// * `input` - The input value of type `T` 19 | /// * `zero_point` - The quantization zero point 20 | /// 21 | pub fn relu(input: T, zero_point: T) -> T { 22 | max(input, zero_point) 23 | } 24 | 25 | /// Performs the Rectified Linear Unit 6 (ReLU6) activation function. 26 | /// 27 | /// # Arguments 28 | /// * `input` - The input value of type `T` 29 | /// * `scale` - The quantization scale 30 | /// * `zero_point` - The quantization zero point 31 | /// 32 | pub fn relu6(input: T, scale: f32, zero_point: T) -> T { 33 | min(relu(input, zero_point), quantize(6., scale, zero_point)) 34 | } 35 | 36 | /// Performs the Softmax activation function. 37 | /// 38 | /// # Arguments 39 | /// * `input` - The floating-point input value 40 | /// * `sum` - The sum of the outcomes 41 | /// * `scale` - The quantization scale 42 | /// * `zero_point` - The quantization zero point 43 | /// 44 | pub fn softmax(input: f32, sum: f32, scale: f32, zero_point: T) -> T { 45 | quantize(expf(input) / sum, scale, zero_point) 46 | } 47 | 48 | #[cfg(test)] 49 | mod tests { 50 | use super::*; 51 | 52 | const SCALE: f32 = 0.1; 53 | const ZERO_POINT: i8 = 2; 54 | 55 | const RELU_INACTIVE_INPUT: i8 = 1; 56 | const RELU_ACTIVE_INPUT: i8 = 3; 57 | 58 | const RELU6_SATURATED_INPUT: i8 = 63; 59 | const RELU6_SATURATION_POINT: i8 = 62; 60 | 61 | const SOFTMAX_INPUT_1: f32 = 1.; 62 | const SOFTMAX_INPUT_2: f32 = 2.; 63 | const SOFTMAX_INPUT_3: f32 = 3.; 64 | const SOFTMAX_SUM: f32 = 30.192_875; 65 | const SOFTMAX_OUTPUT_1: i8 = 3; 66 | const SOFTMAX_TOTAL_PROBABILITY: i8 = 16; 67 | 68 | #[test] 69 | fn relu_inactive() { 70 | assert_eq!(relu(RELU_INACTIVE_INPUT, ZERO_POINT), ZERO_POINT); 71 | } 72 | 73 | #[test] 74 | fn relu_active() { 75 | assert_eq!(relu(RELU_ACTIVE_INPUT, ZERO_POINT), RELU_ACTIVE_INPUT); 76 | } 77 | 78 | #[test] 79 | fn relu6_saturated() { 80 | assert_eq!( 81 | relu6(RELU6_SATURATED_INPUT, SCALE, ZERO_POINT), 82 | RELU6_SATURATION_POINT 83 | ); 84 | } 85 | 86 | #[test] 87 | fn softmax_active() { 88 | assert_eq!( 89 | softmax(SOFTMAX_INPUT_1, SOFTMAX_SUM, SCALE, ZERO_POINT), 90 | SOFTMAX_OUTPUT_1 91 | ); 92 | } 93 | 94 | #[test] 95 | fn softmax_total() { 96 | let total = softmax(SOFTMAX_INPUT_1, SOFTMAX_SUM, SCALE, ZERO_POINT) 97 | + softmax(SOFTMAX_INPUT_2, SOFTMAX_SUM, SCALE, ZERO_POINT) 98 | + softmax(SOFTMAX_INPUT_3, SOFTMAX_SUM, SCALE, ZERO_POINT); 99 | assert_eq!(total, SOFTMAX_TOTAL_PROBABILITY); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/buffer.rs: -------------------------------------------------------------------------------- 1 | use nalgebra::SMatrix; 2 | 3 | /// Represents a 2-dimensional buffer. 4 | /// A 2-dimensional buffer is composed by a [`SMatrix`] of values `T`. 5 | pub type Buffer2D = SMatrix; 6 | 7 | /// Represents a 4-dimensional buffer. 8 | /// A 4-dimensional buffer is composed by an array of [`Buffer2D`] containing an array of values 9 | /// `T`. 10 | pub type Buffer4D< 11 | T, 12 | const BATCHES: usize, 13 | const ROWS: usize, 14 | const COLUMNS: usize, 15 | const CHANNELS: usize, 16 | > = [Buffer2D<[T; CHANNELS], ROWS, COLUMNS>; BATCHES]; 17 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! [![crates.io](https://img.shields.io/crates/v/microflow)](https://crates.io/crates/microflow) 2 | //! [![docs.rs](https://img.shields.io/docsrs/microflow)](https://docs.rs/microflow) 3 | //! [![github](https://img.shields.io/github/actions/workflow/status/matteocarnelos/microflow-rs/cargo.yml?branch=main)](https://github.com/matteocarnelos/microflow-rs/actions/workflows/cargo.yml) 4 | //! 5 | //! A robust and efficient TinyML inference engine for embedded systems. 6 | 7 | #![no_std] 8 | 9 | pub use microflow_macros::*; 10 | 11 | pub mod activation; 12 | pub mod buffer; 13 | pub mod ops; 14 | pub mod quantize; 15 | pub mod tensor; 16 | -------------------------------------------------------------------------------- /src/ops/average_pool_2d.rs: -------------------------------------------------------------------------------- 1 | use core::array; 2 | use libm::roundf; 3 | 4 | use nalgebra::Const; 5 | use simba::scalar::SupersetOf; 6 | 7 | use crate::activation::{relu, relu6, FusedActivation}; 8 | use crate::buffer::Buffer2D; 9 | use crate::quantize::Quantized; 10 | use crate::tensor::{Tensor4D, TensorView, TensorViewPadding}; 11 | 12 | pub struct AveragePool2DOptions { 13 | pub fused_activation: FusedActivation, 14 | pub view_padding: TensorViewPadding, 15 | pub strides: (usize, usize), 16 | } 17 | 18 | /// Performs the AveragePool2D operation. 19 | /// Returns a 4-dimensional output tensor containing the result of the operation. 20 | /// 21 | /// # Arguments 22 | /// * `input` - The 4-dimensional input tensor 23 | /// * `_filter_shape` - The phantom shape of the filter 24 | /// * `output_scale` - The scale of the resulting output tensor 25 | /// * `output_zero_point` - The zero point of the resulting output tensor 26 | /// * `options` - Operator's options as an [`AveragePool2DOptions`] struct 27 | /// * `constants` - Constant values coming from the pre-processing phase 28 | /// 29 | pub fn average_pool_2d< 30 | T: Quantized, 31 | const INPUT_ROWS: usize, 32 | const INPUT_COLS: usize, 33 | const INPUT_CHANS: usize, 34 | const FILTER_ROWS: usize, 35 | const FILTER_COLS: usize, 36 | const OUTPUT_ROWS: usize, 37 | const OUTPUT_COLS: usize, 38 | >( 39 | input: Tensor4D, 40 | _filter_shape: (Const, Const), 41 | output_scale: [f32; 1], 42 | output_zero_point: [T; 1], 43 | options: AveragePool2DOptions, 44 | constants: (f32, f32), 45 | ) -> Tensor4D { 46 | let output = [Buffer2D::from_fn(|i, j| { 47 | // Extract the view using the view extraction algorithm 48 | let view: TensorView = 49 | input.view((i, j), 0, options.view_padding, options.strides); 50 | // Compute the average pooling for each channel 51 | array::from_fn(|c| { 52 | let x = 1. / view.len as f32 53 | * view 54 | .buffer 55 | .fold(0i32, |acc, a| acc + i32::from_subset(&a[c])) as f32; 56 | let y = T::from_superset_unchecked(&roundf(constants.0 * x + constants.1)); 57 | // Apply the fused activation function (if any) 58 | match options.fused_activation { 59 | FusedActivation::None => y, 60 | FusedActivation::Relu => relu(y, output_zero_point[0]), 61 | FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]), 62 | } 63 | }) 64 | })]; 65 | Tensor4D::new(output, output_scale, output_zero_point) 66 | } 67 | 68 | #[cfg(test)] 69 | mod tests { 70 | use nalgebra::matrix; 71 | 72 | use super::*; 73 | 74 | const INPUT: Tensor4D = Tensor4D { 75 | buffer: [matrix![ 76 | [1, 2], [3, 4], [5, 6]; 77 | [7, 8], [9, 10], [11, 12] 78 | ]], 79 | scale: [0.13], 80 | zero_point: [14], 81 | }; 82 | const FILTER_SHAPE: (Const<2>, Const<3>) = (Const, Const); 83 | const OUTPUT_SCALE: [f32; 1] = [0.15]; 84 | const OUTPUT_ZERO_POINT: [i8; 1] = [16]; 85 | const OPTIONS: AveragePool2DOptions = AveragePool2DOptions { 86 | fused_activation: FusedActivation::None, 87 | view_padding: TensorViewPadding::Same, 88 | strides: (1, 1), 89 | }; 90 | const CONSTANTS: (f32, f32) = (0.866_666_7, 3.866_666_6); 91 | const OUTPUT: Tensor4D = Tensor4D { 92 | buffer: [matrix![ 93 | [8, 9], [9, 10], [10, 11]; 94 | [11, 12], [12, 13], [13, 13] 95 | ]], 96 | scale: [0.15], 97 | zero_point: [16], 98 | }; 99 | 100 | #[test] 101 | fn average_pool_2d_layer() { 102 | assert_eq!( 103 | average_pool_2d( 104 | INPUT, 105 | FILTER_SHAPE, 106 | OUTPUT_SCALE, 107 | OUTPUT_ZERO_POINT, 108 | OPTIONS, 109 | CONSTANTS, 110 | ), 111 | OUTPUT 112 | ); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/ops/conv_2d.rs: -------------------------------------------------------------------------------- 1 | use core::array; 2 | use libm::roundf; 3 | 4 | use simba::scalar::SupersetOf; 5 | 6 | use crate::activation::{relu, relu6, FusedActivation}; 7 | use crate::buffer::Buffer2D; 8 | use crate::quantize::Quantized; 9 | use crate::tensor::{Tensor4D, TensorView, TensorViewPadding}; 10 | 11 | pub struct Conv2DOptions { 12 | pub fused_activation: FusedActivation, 13 | pub view_padding: TensorViewPadding, 14 | pub strides: (usize, usize), 15 | } 16 | 17 | /// Performs the Conv2D operation. 18 | /// Returns a 4-dimensional output tensor containing the result of the operation. 19 | /// 20 | /// # Arguments 21 | /// * `input` - The 4-dimensional input tensor 22 | /// * `filters` - The 4-dimensional tensor representing the filters of the operator 23 | /// * `output_scale` - The scale of the resulting output tensor 24 | /// * `output_zero_point` - The zero point of the resulting output tensor 25 | /// * `options` - Operator's options as an [`Conv2DOptions`] struct 26 | /// * `constants` - Constant values coming from the pre-processing phase 27 | /// 28 | pub fn conv_2d< 29 | T: Quantized, 30 | const INPUT_ROWS: usize, 31 | const INPUT_COLS: usize, 32 | const INPUT_CHANS: usize, 33 | const FILTERS_BATCHES: usize, 34 | const FILTERS_ROWS: usize, 35 | const FILTERS_COLS: usize, 36 | const FILTERS_QUANTS: usize, 37 | const OUTPUT_ROWS: usize, 38 | const OUTPUT_COLS: usize, 39 | >( 40 | input: Tensor4D, 41 | filters: &Tensor4D, 42 | output_scale: [f32; 1], 43 | output_zero_point: [T; 1], 44 | options: Conv2DOptions, 45 | constants: ( 46 | Buffer2D, 47 | Buffer2D, 48 | ), 49 | ) -> Tensor4D { 50 | let output = [Buffer2D::from_fn(|i, j| { 51 | // Extract the view using the view extraction algorithm 52 | let view: TensorView = 53 | input.view((i, j), 0, options.view_padding, options.strides); 54 | // Perform the convolution for each filter batch 55 | array::from_fn(|b| { 56 | let input_zero_point = i32::from_subset(&input.zero_point[0]); 57 | let filters_zero_point = i32::from_subset( 58 | &filters 59 | .zero_point 60 | .get(b) 61 | .copied() 62 | .unwrap_or(filters.zero_point[0]), 63 | ); 64 | let x = ( 65 | // Perform the dot product between the input region and the filter 66 | view.buffer.zip_fold(&filters.buffer[b], 0i32, |acc, v, f| { 67 | acc + v 68 | .iter() 69 | .zip(f.iter()) 70 | .map(|(e1, e2)| i32::from_subset(e1) * i32::from_subset(e2)) 71 | .sum::() 72 | }), 73 | // Perform the 3-dimensional component-sum of the view 74 | view.buffer.fold(0i32, |acc, a| { 75 | acc + a.iter().fold(0i32, |acc, e| acc + i32::from_subset(e)) 76 | }) * filters_zero_point, 77 | ); 78 | // Elaborate the constants 79 | let constants = ( 80 | constants.0, 81 | constants.1, 82 | input_zero_point 83 | * filters.buffer[b].zip_fold(&view.mask, 0i32, |acc, f, m| { 84 | if m { 85 | acc + f.iter().fold(0i32, |acc, e| acc + i32::from_subset(e)) 86 | } else { 87 | acc 88 | } 89 | }), 90 | view.len as i32 * INPUT_CHANS as i32 * input_zero_point * filters_zero_point, 91 | ); 92 | // Combine the constant values and the variants to obtain the output 93 | let y = T::from_superset_unchecked(&roundf( 94 | f32::from_subset(&output_zero_point[0]) 95 | + constants.0[b] 96 | + constants.1.get(b).copied().unwrap_or(constants.1[0]) 97 | * f32::from_subset(&(x.0 - x.1 - constants.2 + constants.3)), 98 | )); 99 | // Apply the fused activation function (if any) 100 | match options.fused_activation { 101 | FusedActivation::None => y, 102 | FusedActivation::Relu => relu(y, output_zero_point[0]), 103 | FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]), 104 | } 105 | }) 106 | })]; 107 | Tensor4D::new(output, output_scale, output_zero_point) 108 | } 109 | 110 | #[cfg(test)] 111 | mod tests { 112 | use nalgebra::matrix; 113 | 114 | use crate::tensor::Tensor2D; 115 | 116 | use super::*; 117 | 118 | const INPUT: Tensor4D = Tensor4D { 119 | buffer: [matrix![ 120 | [1, 2], [3, 4], [5, 6]; 121 | [7, 8], [9, 10], [11, 12] 122 | ]], 123 | scale: [0.13], 124 | zero_point: [14], 125 | }; 126 | const FILTERS: Tensor4D = Tensor4D { 127 | buffer: [ 128 | matrix![ 129 | [15, 16], [17, 18], [19, 20]; 130 | [21, 22], [23, 24], [25, 26] 131 | ], 132 | matrix![ 133 | [27, 28], [29, 30], [31, 32]; 134 | [33, 34], [35, 36], [37, 38] 135 | ], 136 | ], 137 | scale: [0.39, 0.40], 138 | zero_point: [41, 42], 139 | }; 140 | const _BIASES: Tensor2D = Tensor2D { 141 | buffer: matrix![ 142 | 43; 143 | 44 144 | ], 145 | scale: [0.45, 0.46], 146 | zero_point: [47, 48], 147 | }; 148 | const OUTPUT_SCALE: [f32; 1] = [0.49]; 149 | const OUTPUT_ZERO_POINT: [i8; 1] = [50]; 150 | const OPTIONS: Conv2DOptions = Conv2DOptions { 151 | fused_activation: FusedActivation::None, 152 | view_padding: TensorViewPadding::Same, 153 | strides: (1, 1), 154 | }; 155 | const CONSTANTS: (Buffer2D, Buffer2D) = ( 156 | matrix![-3.673_469_4; -3.755_102], 157 | matrix![0.103_469_39; 0.106_122_45], 158 | ); 159 | const OUTPUT: Tensor4D = Tensor4D { 160 | buffer: [matrix![ 161 | [127, 116], [127, 127], [127, 113]; 162 | [98, 74], [114, 84], [82, 67] 163 | ]], 164 | scale: [0.49], 165 | zero_point: [50], 166 | }; 167 | 168 | #[test] 169 | fn conv_2d_layer() { 170 | assert_eq!( 171 | conv_2d( 172 | INPUT, 173 | &FILTERS, 174 | OUTPUT_SCALE, 175 | OUTPUT_ZERO_POINT, 176 | OPTIONS, 177 | CONSTANTS, 178 | ), 179 | OUTPUT 180 | ); 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /src/ops/depthwise_conv_2d.rs: -------------------------------------------------------------------------------- 1 | use core::array; 2 | use libm::roundf; 3 | 4 | use simba::scalar::SupersetOf; 5 | 6 | use crate::activation::{relu, relu6, FusedActivation}; 7 | use crate::buffer::Buffer2D; 8 | use crate::quantize::Quantized; 9 | use crate::tensor::{Tensor4D, TensorView, TensorViewPadding}; 10 | 11 | pub struct DepthwiseConv2DOptions { 12 | pub fused_activation: FusedActivation, 13 | pub view_padding: TensorViewPadding, 14 | pub strides: (usize, usize), 15 | } 16 | 17 | /// Performs the DepthwiseConv2D operation. 18 | /// Returns a 4-dimensional output tensor containing the result of the operation. 19 | /// 20 | /// # Arguments 21 | /// * `input` - The 4-dimensional input tensor 22 | /// * `weights` - The 4-dimensional tensor representing the weights of the operator 23 | /// * `output_scale` - The scale of the resulting output tensor 24 | /// * `output_zero_point` - The zero point of the resulting output tensor 25 | /// * `options` - Operator's options as an [`DepthwiseConv2DOptions`] struct 26 | /// * `constants` - Constant values coming from the pre-processing phase 27 | /// 28 | pub fn depthwise_conv_2d< 29 | T: Quantized, 30 | const INPUT_ROWS: usize, 31 | const INPUT_COLS: usize, 32 | const INPUT_CHANS: usize, 33 | const WEIGHTS_ROWS: usize, 34 | const WEIGHTS_COLS: usize, 35 | const WEIGHTS_CHANS: usize, 36 | const WEIGHTS_QUANTS: usize, 37 | const OUTPUT_ROWS: usize, 38 | const OUTPUT_COLS: usize, 39 | >( 40 | input: Tensor4D, 41 | weights: &Tensor4D, 42 | output_scale: [f32; 1], 43 | output_zero_point: [T; 1], 44 | options: DepthwiseConv2DOptions, 45 | constants: ( 46 | Buffer2D, 47 | Buffer2D, 48 | ), 49 | ) -> Tensor4D { 50 | let output = [Buffer2D::from_fn(|i, j| { 51 | // Extract the view using the view extraction algorithm 52 | let view: TensorView = 53 | input.view((i, j), 0, options.view_padding, options.strides); 54 | // Perform the convolution for each input channel 55 | array::from_fn(|c| { 56 | let input_zero_point = i32::from_subset(&input.zero_point[0]); 57 | let weights_zero_point = i32::from_subset( 58 | &weights 59 | .zero_point 60 | .get(c) 61 | .copied() 62 | .unwrap_or(weights.zero_point[0]), 63 | ); 64 | let x = ( 65 | // Perform the dot product between the input region and the weights 66 | view.buffer.zip_fold(&weights.buffer[0], 0i32, |acc, v, w| { 67 | acc + i32::from_subset(&v.get(c).copied().unwrap_or(v[0])) 68 | * i32::from_subset(&w[c]) 69 | }), 70 | // Perform the 2-dimensional component-sum of the view for the given channel 71 | view.buffer.fold(0i32, |acc, a| { 72 | acc + i32::from_subset(&a.get(c).copied().unwrap_or(a[0])) 73 | }) * weights_zero_point, 74 | ); 75 | // Elaborate the constants 76 | let constants = ( 77 | constants.0, 78 | constants.1, 79 | input_zero_point 80 | * weights.buffer[0].zip_fold(&view.mask, 0i32, |acc, w, m| { 81 | if m { 82 | acc + i32::from_subset(&w[c]) 83 | } else { 84 | acc 85 | } 86 | }), 87 | view.len as i32 * input_zero_point * weights_zero_point, 88 | ); 89 | // Combine the constant values and the variants to obtain the output 90 | let y = T::from_superset_unchecked(&roundf( 91 | f32::from_subset(&output_zero_point[0]) 92 | + constants.0[c] 93 | + constants.1.get(c).copied().unwrap_or(constants.1[0]) 94 | * f32::from_subset(&(x.0 - x.1 - constants.2 + constants.3)), 95 | )); 96 | // Apply the fused activation function (if any) 97 | match options.fused_activation { 98 | FusedActivation::None => y, 99 | FusedActivation::Relu => relu(y, output_zero_point[0]), 100 | FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]), 101 | } 102 | }) 103 | })]; 104 | Tensor4D::new(output, output_scale, output_zero_point) 105 | } 106 | 107 | #[cfg(test)] 108 | mod tests { 109 | use nalgebra::matrix; 110 | 111 | use crate::tensor::Tensor2D; 112 | 113 | use super::*; 114 | 115 | const INPUT: Tensor4D = Tensor4D { 116 | buffer: [matrix![ 117 | [1, 2], [3, 4], [5, 6]; 118 | [7, 8], [9, 10], [11, 12] 119 | ]], 120 | scale: [0.13], 121 | zero_point: [14], 122 | }; 123 | const WEIGHTS: Tensor4D = Tensor4D { 124 | buffer: [matrix![ 125 | [15, 16], [17, 18], [19, 20]; 126 | [21, 22], [23, 24], [25, 26] 127 | ]], 128 | scale: [0.27, 0.28], 129 | zero_point: [29, 30], 130 | }; 131 | const _BIASES: Tensor2D = Tensor2D { 132 | buffer: matrix![ 133 | 31; 134 | 32 135 | ], 136 | scale: [0.33, 0.34], 137 | zero_point: [35, 36], 138 | }; 139 | const OUTPUT_SCALE: [f32; 1] = [0.37]; 140 | const OUTPUT_ZERO_POINT: [i8; 1] = [38]; 141 | const OPTIONS: DepthwiseConv2DOptions = DepthwiseConv2DOptions { 142 | fused_activation: FusedActivation::None, 143 | view_padding: TensorViewPadding::Same, 144 | strides: (1, 1), 145 | }; 146 | const CONSTANTS: (Buffer2D, Buffer2D) = ( 147 | matrix![-3.567_567_6; -3.675_675_7], 148 | matrix![0.094_864_86; 0.098_378_378], 149 | ); 150 | const OUTPUT: Tensor4D = Tensor4D { 151 | buffer: [matrix![ 152 | [66, 63], [82, 78], [65, 62]; 153 | [47, 45], [52, 49], [44, 42] 154 | ]], 155 | scale: [0.37], 156 | zero_point: [38], 157 | }; 158 | 159 | #[test] 160 | fn depthwise_conv_2d_layer() { 161 | assert_eq!( 162 | depthwise_conv_2d( 163 | INPUT, 164 | &WEIGHTS, 165 | OUTPUT_SCALE, 166 | OUTPUT_ZERO_POINT, 167 | OPTIONS, 168 | CONSTANTS, 169 | ), 170 | OUTPUT 171 | ); 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/ops/fully_connected.rs: -------------------------------------------------------------------------------- 1 | use libm::roundf; 2 | use simba::scalar::SupersetOf; 3 | 4 | use crate::activation::{relu, relu6, FusedActivation}; 5 | use crate::buffer::Buffer2D; 6 | use crate::quantize::Quantized; 7 | use crate::tensor::Tensor2D; 8 | 9 | pub struct FullyConnectedOptions { 10 | pub fused_activation: FusedActivation, 11 | } 12 | 13 | /// Performs the FullyConnected operation. 14 | /// Returns a 2-dimensional output tensor containing the result of the operation. 15 | /// 16 | /// # Arguments 17 | /// * `input` - The 2-dimensional input tensor 18 | /// * `weights` - The 2-dimensional tensor representing the weights of the operator 19 | /// * `output_scale` - The scale of the resulting output tensor 20 | /// * `output_zero_point` - The zero point of the resulting output tensor 21 | /// * `options` - Operator's options as an [`FullyConnectedOptions`] struct 22 | /// * `constants` - Constant values coming from the pre-processing phase 23 | /// 24 | pub fn fully_connected< 25 | T: Quantized, 26 | const INPUT_ROWS: usize, 27 | const INPUT_COLS: usize, 28 | const WEIGHTS_COLS: usize, 29 | >( 30 | input: Tensor2D, 31 | weights: &Tensor2D, 32 | output_scale: [f32; 1], 33 | output_zero_point: [T; 1], 34 | options: FullyConnectedOptions, 35 | constants: ( 36 | Buffer2D, 37 | f32, 38 | Buffer2D, 39 | i32, 40 | ), 41 | ) -> Tensor2D { 42 | let x: ( 43 | Buffer2D, 44 | Buffer2D, 45 | ) = ( 46 | // Perform the dot product between the input and the weights 47 | Buffer2D::from_fn(|i, j| { 48 | input 49 | .buffer 50 | .row(i) 51 | .iter() 52 | .zip(weights.buffer.column(j).iter()) 53 | .fold(0i32, |acc, (i, w)| { 54 | acc + i32::from_subset(i) * i32::from_subset(w) 55 | }) 56 | }), 57 | // Perform the row-sum of the weights 58 | Buffer2D::from_fn(|i, _| { 59 | input 60 | .buffer 61 | .row(i) 62 | .fold(0i32, |acc, e| acc + i32::from_subset(&e)) 63 | * i32::from_subset(&weights.zero_point[0]) 64 | }), 65 | ); 66 | // Combine the constant values and the variants to obtain the output 67 | let output = Buffer2D::from_fn(|i, j| { 68 | let y = T::from_superset_unchecked(&roundf( 69 | f32::from_subset(&output_zero_point[0]) 70 | + constants.0[j] 71 | + constants.1 72 | * f32::from_subset(&(x.0[(i, j)] - x.1[i] - constants.2[j] + constants.3)), 73 | )); 74 | // Apply the fused activation function (if any) 75 | match options.fused_activation { 76 | FusedActivation::None => y, 77 | FusedActivation::Relu => relu(y, output_zero_point[0]), 78 | FusedActivation::Relu6 => relu6(y, output_scale[0], output_zero_point[0]), 79 | } 80 | }); 81 | Tensor2D::new(output, output_scale, output_zero_point) 82 | } 83 | 84 | #[cfg(test)] 85 | mod tests { 86 | use nalgebra::matrix; 87 | 88 | use super::*; 89 | 90 | const INPUT: Tensor2D = Tensor2D { 91 | buffer: matrix![ 92 | 1, 2, 3; 93 | 4, 5, 6 94 | ], 95 | scale: [0.7], 96 | zero_point: [8], 97 | }; 98 | const WEIGHTS: Tensor2D = Tensor2D { 99 | buffer: matrix![ 100 | 9, 10, 11, 12; 101 | 13, 14, 15, 16; 102 | 17, 18, 19, 20 103 | ], 104 | scale: [0.21], 105 | zero_point: [22], 106 | }; 107 | const _BIASES: Tensor2D = Tensor2D { 108 | buffer: matrix![ 109 | 23; 24; 25; 26 110 | ], 111 | scale: [0.27], 112 | zero_point: [28], 113 | }; 114 | const OUTPUT_SCALE: [f32; 1] = [0.29]; 115 | const OUTPUT_ZERO_POINT: [i8; 1] = [30]; 116 | const OPTIONS: FullyConnectedOptions = FullyConnectedOptions { 117 | fused_activation: FusedActivation::Relu, 118 | }; 119 | const CONSTANTS: (Buffer2D, f32, Buffer2D, i32) = ( 120 | matrix![-4.655_172_3; -3.724_138; -2.793_103_5; -1.862_069], 121 | 0.506_896_56, 122 | matrix![312, 336, 360, 384], 123 | 528, 124 | ); 125 | const OUTPUT: Tensor2D = Tensor2D { 126 | buffer: matrix![ 127 | 112, 103, 95, 87; 128 | 70, 67, 63, 60 129 | ], 130 | scale: [0.29], 131 | zero_point: [30], 132 | }; 133 | 134 | #[test] 135 | fn fully_connected_layer() { 136 | assert_eq!( 137 | fully_connected( 138 | INPUT, 139 | &WEIGHTS, 140 | OUTPUT_SCALE, 141 | OUTPUT_ZERO_POINT, 142 | OPTIONS, 143 | CONSTANTS 144 | ), 145 | OUTPUT 146 | ) 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/ops/mod.rs: -------------------------------------------------------------------------------- 1 | mod average_pool_2d; 2 | mod conv_2d; 3 | mod depthwise_conv_2d; 4 | mod fully_connected; 5 | mod reshape; 6 | mod softmax; 7 | 8 | pub use average_pool_2d::*; 9 | pub use conv_2d::*; 10 | pub use depthwise_conv_2d::*; 11 | pub use fully_connected::*; 12 | pub use reshape::*; 13 | pub use softmax::*; 14 | -------------------------------------------------------------------------------- /src/ops/reshape.rs: -------------------------------------------------------------------------------- 1 | /// Performs the Reshape operator. 2 | /// Returns the correspondig output tensor. 3 | pub fn reshape(input: InputT) -> OutputT 4 | where 5 | InputT: Into, 6 | { 7 | input.into() 8 | } 9 | 10 | #[cfg(test)] 11 | mod tests { 12 | use super::*; 13 | use crate::tensor::{Tensor2D, Tensor4D}; 14 | use nalgebra::matrix; 15 | 16 | const INPUT: Tensor2D = Tensor2D { 17 | buffer: matrix![ 18 | 1, 2, 3; 19 | 4, 5, 6 20 | ], 21 | scale: [0.7], 22 | zero_point: [8], 23 | }; 24 | const OUTPUT: Tensor4D = Tensor4D { 25 | buffer: [matrix![[1], [2], [3]], matrix![[4], [5], [6]]], 26 | scale: [0.7], 27 | zero_point: [8], 28 | }; 29 | 30 | #[test] 31 | fn reshape_layer() { 32 | let output: Tensor4D = reshape(INPUT); 33 | assert_eq!(output, OUTPUT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/ops/softmax.rs: -------------------------------------------------------------------------------- 1 | use crate::activation; 2 | use crate::quantize::Quantized; 3 | use crate::tensor::Tensor2D; 4 | use libm::expf; 5 | use simba::scalar::SupersetOf; 6 | 7 | /// Performs the Softmax activation function as an operator. 8 | /// Returns a 2-dimensional output tensor containing the result of the operation. 9 | /// 10 | /// # Arguments 11 | /// * `input` - The 2-dimensional input tensor 12 | /// * `output_scale` - The scale of the resulting output tensor 13 | /// * `output_zero_point` - The zero point of the resulting output tensor 14 | /// 15 | pub fn softmax( 16 | input: Tensor2D, 17 | output_scale: [f32; 1], 18 | output_zero_point: [T; 1], 19 | ) -> Tensor2D { 20 | let exp = input.buffer.map(|e| f32::from_subset(&e) * input.scale[0]); 21 | let sum = exp.map(expf).sum(); 22 | Tensor2D::new( 23 | exp.map(|e| activation::softmax(e, sum, output_scale[0], output_zero_point[0])), 24 | output_scale, 25 | output_zero_point, 26 | ) 27 | } 28 | 29 | #[cfg(test)] 30 | mod tests { 31 | use super::*; 32 | use nalgebra::matrix; 33 | 34 | const INPUT: Tensor2D = Tensor2D { 35 | buffer: matrix![ 36 | 1, 2, 3; 37 | 4, 5, 6 38 | ], 39 | scale: [0.7], 40 | zero_point: [8], 41 | }; 42 | const OUTPUT_SCALE: [f32; 1] = [0.9]; 43 | const OUTPUT_ZERO_POINT: [i8; 1] = [10]; 44 | const OUTPUT: Tensor2D = Tensor2D { 45 | buffer: matrix![ 46 | 10, 10, 10; 47 | 10, 10, 11 48 | ], 49 | scale: OUTPUT_SCALE, 50 | zero_point: OUTPUT_ZERO_POINT, 51 | }; 52 | 53 | #[test] 54 | fn softmax_layer() { 55 | assert_eq!(softmax(INPUT, OUTPUT_SCALE, OUTPUT_ZERO_POINT), OUTPUT); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/quantize.rs: -------------------------------------------------------------------------------- 1 | use libm::roundf; 2 | use nalgebra::Scalar; 3 | use simba::scalar::{SubsetOf, SupersetOf}; 4 | 5 | /// Represents the trait to constrain a type to be quantized. 6 | pub trait Quantized: Scalar + Copy + Ord + SubsetOf + SubsetOf {} 7 | impl + SubsetOf> Quantized for T {} 8 | 9 | /// Performs quantization on the given floating-point input. 10 | /// 11 | /// # Arguments 12 | /// * `input` - The input value to quantize 13 | /// * `scale` - The quantization scale 14 | /// * `zero_point` - The quantization zero point 15 | /// 16 | pub fn quantize(input: f32, scale: f32, zero_point: T) -> T { 17 | roundf(input / scale + f32::from_subset(&zero_point)).to_subset_unchecked() 18 | } 19 | 20 | /// Performs dequantization on the given integer input. 21 | /// 22 | /// # Arguments 23 | /// * `input` - The input value to dequantize 24 | /// * `scale` - The quantization scale 25 | /// * `zero_point` - The quantization zero point 26 | /// 27 | pub fn dequantize(input: T, scale: f32, zero_point: T) -> f32 { 28 | scale * (f32::from_subset(&input) - f32::from_subset(&zero_point)) 29 | } 30 | 31 | #[cfg(test)] 32 | mod tests { 33 | use super::*; 34 | 35 | const VALUE: f32 = 1.; 36 | const SCALE: f32 = 0.2; 37 | const ZERO_POINT: i8 = 3; 38 | const VALUE_QUANTIZED: i8 = 8; 39 | const VALUE_DEQUANTIZED: f32 = 1.; 40 | 41 | #[test] 42 | fn quantize_value() { 43 | assert_eq!(quantize(VALUE, SCALE, ZERO_POINT), VALUE_QUANTIZED); 44 | } 45 | 46 | #[test] 47 | fn dequantize_value() { 48 | assert_eq!( 49 | dequantize(VALUE_QUANTIZED, SCALE, ZERO_POINT), 50 | VALUE_DEQUANTIZED 51 | ); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/person_detect.rs: -------------------------------------------------------------------------------- 1 | use microflow::buffer::Buffer2D; 2 | use microflow_macros::model; 3 | use nalgebra::matrix; 4 | 5 | #[model("models/person_detect.tflite")] 6 | struct PersonDetect; 7 | 8 | #[test] 9 | fn person_detect_model() { 10 | let input = [Buffer2D::from_element([0.5])]; 11 | let output = matrix![0.8046875, 0.1953125]; 12 | assert_eq!(PersonDetect::predict(input), output); 13 | } 14 | -------------------------------------------------------------------------------- /tests/sine.rs: -------------------------------------------------------------------------------- 1 | use microflow_macros::model; 2 | use nalgebra::matrix; 3 | 4 | #[model("models/sine.tflite")] 5 | struct Sine; 6 | 7 | #[test] 8 | fn sine_model() { 9 | let input = matrix![0.5]; 10 | let output = matrix![0.41348344]; 11 | assert_eq!(Sine::predict(input), output); 12 | } 13 | -------------------------------------------------------------------------------- /tests/speech.rs: -------------------------------------------------------------------------------- 1 | use microflow::buffer::Buffer2D; 2 | use microflow_macros::model; 3 | use nalgebra::matrix; 4 | 5 | #[model("models/speech.tflite")] 6 | struct Speech; 7 | 8 | #[test] 9 | fn speech_model() { 10 | let input = Buffer2D::from_element(0.5); 11 | let output = matrix![0.15625, 0.2734375, 0.2734375, 0.296875]; 12 | assert_eq!(Speech::predict(input), output); 13 | } 14 | --------------------------------------------------------------------------------