├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Jenkinsfile ├── Makefile ├── README.rst ├── cmakefiles └── xtflm.cmake ├── fetch_dependencies.py ├── flash_builder ├── flash_builder │ ├── __init__.py │ └── build_flash_file.py └── setup.py ├── host_cmd_line_interpreter ├── CMakeLists.txt ├── Makefile ├── README.rst ├── src │ ├── main.cc │ └── xtflm_conf.h └── tests │ ├── Makefile │ ├── test_mobnet │ ├── Makefile │ ├── baboon.raw │ ├── classes.raw │ ├── mobilenet_v1_0.25_128.tflite │ ├── model_mobilenet_v1.params │ └── model_mobilenet_v1.tflite │ └── write_reference_output.py ├── lib_tflite_micro ├── api │ ├── fast_flash.h │ ├── flash_server.h │ ├── inference_engine.h │ ├── ioserver.h │ ├── load_weights.h │ ├── memory_parallel_transport.h │ ├── tile_ram_server.h │ ├── version.h │ ├── xcore_config.h │ ├── xcore_device_memory.h │ └── xcore_shared_config.h ├── module_build_info └── src │ ├── fast_flash.xc │ ├── fast_flash_read_loop.S │ ├── flash_server.c │ ├── inference_engine.cc │ ├── ioserver.c │ ├── load_weights.c │ ├── memory_parallel_transport.c │ ├── memory_transport_ll.S │ ├── par_invoke_funcs.c │ ├── tflite-xcore-kernels │ ├── conv2d_float.c │ ├── conv2d_float.h │ ├── micro_time.cc │ ├── xcore_add.cc │ ├── xcore_batched_softmax.cc │ ├── xcore_beta_activationf32.cc │ ├── xcore_beta_concatf32.cc │ ├── xcore_beta_convf32.cc │ ├── xcore_beta_fcf32.cc │ ├── xcore_beta_transposeconvf32.cc │ ├── xcore_binaryi16.cc │ ├── xcore_broadcast.cc │ ├── xcore_bsign.cc │ ├── xcore_common.cc │ ├── xcore_concat.cc │ ├── xcore_conv2d_v2.cc │ ├── xcore_custom_options.cc │ ├── xcore_custom_options.h │ ├── xcore_detection_post.cc │ ├── xcore_error_reporter.cc │ ├── xcore_error_reporter.h │ ├── xcore_expand_8_to_16.cc │ ├── xcore_interpreter.cc │ ├── xcore_interpreter.h │ ├── xcore_load_store_tensor.cc │ ├── xcore_load_weights_wait.cc │ ├── xcore_lookup.cc │ ├── xcore_maxpool2d.cc │ ├── xcore_mean.cc │ ├── xcore_meani16.cc │ ├── xcore_mul.cc │ ├── xcore_n_to_4.cc │ ├── xcore_ops.cc │ ├── xcore_ops.h │ ├── xcore_pad.cc │ ├── xcore_profiler.cc │ ├── xcore_profiler.h │ ├── xcore_slice.cc │ ├── xcore_softmax.cc │ ├── xcore_transpose.cc │ ├── xcore_unaryi16.cc │ ├── xcore_utils.cc │ └── xcore_utils.h │ ├── thread_call.S │ ├── thread_call.h │ ├── thread_call_host_emulation.c │ ├── tile_ram_server.c │ └── xcore_device_memory.c ├── patches └── tflite-micro.patch ├── repos.list ├── requirements.txt ├── sample_mobilenet ├── add_newlines_to_csv.py ├── mobilenet_v1_25.tflite ├── run.py ├── s1.jpg ├── s2.jpg └── s3.jpg ├── tflite_micro_compiler ├── .clang-format ├── .github │ ├── ISSUE_TEMPLATE │ │ ├── bug_report.md │ │ ├── discussion.md │ │ ├── feature_request.md │ │ └── improvement.md │ └── workflows │ │ └── c-cpp.yml ├── .gitignore ├── .vscode │ ├── c_cpp_properties.json │ ├── launch.json │ └── tasks.json ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── NEWS.txt ├── README.md ├── VerifiedTensorflowVersion.txt ├── cmake │ ├── FindTFLite.cmake │ └── xbuild.cmake ├── common.mk ├── examples │ ├── .gitignore │ ├── CMakeLists.txt │ ├── Makefile │ ├── Makefile.inc │ ├── Model_source.txt │ ├── cifar10.cc │ ├── cifar10.tflite │ ├── cifar10_model.c │ ├── cifar10_run_comp.cc │ ├── compiled_cifar10.cc │ ├── compiled_cifar10.cc.h │ ├── compiled_hello_world.cc │ ├── compiled_hello_world.cc.h │ ├── compiled_mobilenet.cc │ ├── compiled_mobilenet.cc.h │ ├── custom │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── custom.tflite │ │ ├── custom_implementations.cpp │ │ ├── fake_implementations.cpp │ │ └── registration.cpp │ ├── generic_test │ │ ├── CMakeLists.txt │ │ └── generic_test.cpp │ ├── gnu.c │ ├── hello_world.cc │ ├── hello_world.tflite │ ├── hello_world2.cc │ ├── hello_world_model.cc │ ├── lstm2.py │ ├── lstm2.tflite │ ├── lstm_compiled.cc │ ├── lstm_compiled.cc.h │ ├── lstm_test.cc │ ├── mobilenet.cc │ ├── mobilenet2.cc │ ├── mobilenet_v1_0_25_160_quantized.c │ ├── mobilenet_v1_0_25_160_quantized.tflite │ └── truck.c ├── model_main.cpp └── src │ ├── Api.cc │ ├── Api.h │ ├── CodeWriter.cc │ ├── CodeWriter.h │ ├── Compiler.cc │ ├── Compiler.h │ ├── Makefile.inc │ ├── MemMap.cc │ ├── MemMap.h │ ├── TypeToString.cc │ ├── TypeToString.h │ ├── main.cc │ └── xtflm_conf.h ├── utils ├── compare_outputs.sh ├── diff_output.py └── main.cpp └── version_check.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | app_ie/.build 3 | app_ie/bin 4 | host_cmd_line_interpreter/build 5 | host_cmd_line_interpreter/bin 6 | .DS_Store 7 | xtflm_interpreter/build 8 | xtflm_interpreter/xtflm_interpreter/libs/ 9 | xtflm_interpreter/xtflm_interpreter/__pycache__/ 10 | xtflm_interpreter/.eggs/ 11 | xtflm_interpreter/xtflm_interpreter.egg-info/ 12 | flash_builder/flash_builder/__pycache__/ 13 | flash_builder/.eggs/ 14 | flash_builder/flash_builder.egg-info/ 15 | *.pyc 16 | build/ 17 | .cache 18 | compile_commands.json 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib_tflite_micro/submodules/tflite-micro"] 2 | path = lib_tflite_micro/submodules/tflite-micro 3 | url = git@github.com:tensorflow/tflite-micro.git 4 | [submodule "lib_tflite_micro/submodules/flatbuffers"] 5 | path = lib_tflite_micro/submodules/flatbuffers 6 | url = git@github.com:google/flatbuffers.git 7 | [submodule "lib_tflite_micro/submodules/gemmlowp"] 8 | path = lib_tflite_micro/submodules/gemmlowp 9 | url = https://github.com/google/gemmlowp.git 10 | [submodule "lib_tflite_micro/submodules/ruy"] 11 | path = lib_tflite_micro/submodules/ruy 12 | url = https://github.com/google/ruy.git 13 | [submodule "lib_tflite_micro/submodules/xmos_cmake_toolchain"] 14 | path = lib_tflite_micro/submodules/xmos_cmake_toolchain 15 | url = git@github.com:xmos/xmos_cmake_toolchain.git 16 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | @Library('xmos_jenkins_shared_library@v0.32.0') _ 2 | 3 | getApproval() 4 | 5 | pipeline { 6 | agent { 7 | label "xcore.ai" 8 | } 9 | options { 10 | 11 | // skipDefaultCheckout() 12 | buildDiscarder(xmosDiscardBuildSettings(onlyArtifacts=false)) 13 | timestamps() 14 | } 15 | environment { 16 | REPO = 'lib_tflite_micro' 17 | VIEW = getViewName(REPO) 18 | } 19 | stages { 20 | stage('Build') { 21 | steps { 22 | withVenv { 23 | sh 'git submodule update --depth=1 --init --recursive --jobs 8' 24 | sh 'make init' 25 | sh 'make patch' 26 | sh 'make build' 27 | } 28 | } 29 | } 30 | stage("Test") { 31 | steps { 32 | withVenv { 33 | sh 'make init' 34 | sh 'make test' 35 | } 36 | } 37 | } 38 | } 39 | post { 40 | cleanup { 41 | cleanWs() 42 | } 43 | } 44 | } 45 | // stage("Checkout repo") { 46 | // steps { 47 | // dir('lib_tflite_micro') { 48 | // checkout scm 49 | // stash includes: '**/*', name: 'lib_tflite_micro', useDefaultExcludes: false 50 | // script { 51 | // def short_hash = sh(returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim() 52 | // currentBuild.displayName = '#' + BUILD_NUMBER + '-' + short_hash 53 | // } 54 | // } 55 | // } 56 | // post { 57 | // cleanup { 58 | // deleteDir() 59 | // } 60 | // } 61 | // } 62 | /* stage("Cleanup2") { 63 | steps { 64 | // The Jenkins command deleteDir() doesn't seem very reliable, so we're using the basic form 65 | // sh("rm -rf *") 66 | } 67 | }*/ 68 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | patch: 2 | (cd lib_tflite_micro/submodules/tflite-micro && git reset --hard && git apply ../../../patches/tflite-micro.patch) 3 | 4 | build: 5 | (cd lib_tflite_micro && ../version_check.sh) 6 | mkdir -p build 7 | (cd build && cmake .. && make -j8) 8 | 9 | init: 10 | python3 fetch_dependencies.py 11 | pip3 install -r requirements.txt 12 | 13 | test: 14 | (cd host_cmd_line_interpreter && make test) 15 | @echo "" 16 | @echo "All tests PASS" 17 | @echo "" 18 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | TFLite-micro container 2 | ====================== 3 | 4 | This repo is a wrapper for lib_tflite_micro. 5 | This repo contains all third party repos that are needed to use tflite-micro on an XCORE. 6 | It wraps this third-party C++ software up in a library that exposes three interfaces: 7 | 8 | #. A C interface for use on embedded systems (``lib_tflite_micro``). 9 | 10 | #. A command line interface for use on a host (``host_command_line_interface``) 11 | 12 | The ``lib_tflite_micro`` library depends on ``lib_nn``. 13 | 14 | It provides the following services: 15 | 16 | * lib_tflite_micro/module_build_info: file that allows lib_tflite_micro to be integrated into normal XMOS build flow 17 | 18 | * lib_tflite_micro/src: a function that wraps the C++ interpreter in C (inference_engine.cc), and a collection of 19 | kernels that we add to tflite-micro with XCORE specific operators 20 | 21 | * lib_tflite_micro/api: .h files for the above 22 | 23 | * host_cmd_line_interpeter: a command line wrapper for XTFLM, enabling it to be used over the command line. 24 | 25 | 26 | C interface 27 | ----------- 28 | 29 | It exposes a C interface comprising a datastructure (inference_engine_t) 30 | with a few functions that can be used to initialise the structure and/or 31 | 32 | 33 | Getting the XCORE.AI optimiser 34 | ------------------------------ 35 | 36 | You can get the XCORE.AI optimiser through pypi: 37 | 38 | * https://pypi.org/project/xmos-ai-tools/ 39 | 40 | This gets you both a command line interface and python interface to the xcore-opt tool that optimises 41 | a ``.tflite`` file for xcore 42 | perform an inference. The data structure itself can be used to directly 43 | read/write data into tensors, this enables sensors to directly operate 44 | in the tensor space. 45 | 46 | The C interface can be used with the standard XMOS build system, and is 47 | built from the appropriate application directory 48 | 49 | Command line interface 50 | ---------------------- 51 | 52 | The command line interface uses the C interface above and makes it accessible 53 | from the command line, enabling the end user to send data through a TFLite model 54 | using the XTFLM interpreter. The XTFLM intepreter will have XCORE specific operators 55 | (such as 2D convolutions, loading from flash) that are emulated on the host. 56 | 57 | The command line interface is built by invoking ``make install`` at top level or 58 | inside ``host_command_line_interface``. 59 | 60 | The command line interface cane be tested by invoking ``make test`` at either level. 61 | -------------------------------------------------------------------------------- /fetch_dependencies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2021 XMOS LIMITED. 3 | # This Software is subject to the terms of the XMOS Public Licence: Version 1. 4 | import os 5 | import shutil 6 | import subprocess as sp 7 | 8 | 9 | def remove_read_only(func, path, exc_info): 10 | """Sometimes, Windows complains when removing .git folders""" 11 | import stat 12 | 13 | if not os.access(path, os.W_OK): 14 | # Is the error an access error ? 15 | os.chmod(path, stat.S_IWUSR) 16 | func(path) 17 | else: 18 | raise exc_info 19 | 20 | 21 | def read_repo_list(): 22 | """Return a list of lists: [dir, url, ref]""" 23 | repos = [] 24 | with open("repos.list") as f: 25 | lines = f.readlines() 26 | for line in lines: 27 | repos.append(line.split()) 28 | return repos 29 | 30 | 31 | base_dir = os.path.join(os.getcwd(), "..") 32 | 33 | repos = read_repo_list() 34 | for name, url, ref in repos: 35 | print("\nUpdating " + name + "...") 36 | repo_dir = os.path.join(base_dir, name) 37 | 38 | if os.path.isdir(repo_dir): 39 | # check whether it has the correct URL 40 | old_url = ( 41 | sp.check_output("git config --get remote.origin.url".split(), cwd=repo_dir) 42 | .decode("utf-8") 43 | .strip() 44 | ) 45 | if url == old_url: 46 | print("URL hasn't changed") 47 | else: 48 | print("URL for " + name + " has changed.") 49 | print(" Old: " + old_url) 50 | print(" New: " + url) 51 | print("Deleting repository.") 52 | shutil.rmtree(repo_dir, onerror=remove_read_only) 53 | 54 | # Clone 55 | if not os.path.isdir(repo_dir): 56 | sp.check_call( 57 | "git clone {} {}".format(url, name).split(), 58 | cwd=base_dir, 59 | stdout=sp.PIPE, 60 | stderr=sp.PIPE, 61 | ) 62 | 63 | # Fetch 64 | print("Fetching...") 65 | sp.check_call("git fetch".split(), cwd=repo_dir, stdout=sp.PIPE, stderr=sp.PIPE) 66 | 67 | # Checkout 68 | print("Checking out " + ref + "...") 69 | sp.check_call( 70 | "git checkout {}".format(ref).split(), 71 | cwd=repo_dir, 72 | stdout=sp.PIPE, 73 | stderr=sp.PIPE, 74 | ) 75 | -------------------------------------------------------------------------------- /flash_builder/flash_builder/build_flash_file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2020, XMOS Ltd, All rights reserved 3 | 4 | import sys 5 | import argparse 6 | from flash_builder import FlashBuilder 7 | 8 | parser = argparse.ArgumentParser(description='Build parameter/flash images') 9 | parser.add_argument('--output', default='image.bin', help='output file') 10 | parser.add_argument('--target', default='host', help='"flash" or "host" (default)') 11 | parser.add_argument('files', nargs='+', help='Model and parameter files, - indicates a missing one, must be an even number of files for "flash" (model params model params ...), or a single file for "host" (params)') 12 | 13 | args = parser.parse_args() 14 | 15 | if args.target == 'flash' or args.target == 'xcore': 16 | if len(args.files) %2 != 0: 17 | parser.print_usage() 18 | sys.exit(1) 19 | engines = len(args.files)//2 20 | fb = FlashBuilder(engines) 21 | for i in range(engines): 22 | fb.add_model(i, filename = args.files[2*i]) 23 | fb.add_params(i, filename = args.files[2*i+1]) 24 | 25 | fb.flash_file(args.output) 26 | 27 | elif args.target == 'host': 28 | if len(args.files) != 1: 29 | parser.print_usage() 30 | sys.exit(1) 31 | output = FlashBuilder.create_params_file(args.output, input_filename = args.files[0]) 32 | 33 | else: 34 | parser.print_usage() 35 | sys.exit(1) 36 | 37 | -------------------------------------------------------------------------------- /flash_builder/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 XMOS LIMITED. This Software is subject to the terms of the 2 | # XMOS Public License: Version 1 3 | import setuptools 4 | 5 | EXCLUDES = ["README.rst"] 6 | 7 | INSTALL_REQUIRES = [ 8 | ] 9 | 10 | setuptools.setup( 11 | name="flash_builder", 12 | packages=setuptools.find_packages(exclude=EXCLUDES), 13 | python_requires=">=3.8.0", 14 | install_requires=INSTALL_REQUIRES, 15 | extras_require={}, 16 | package_data={}, 17 | author="XMOS", 18 | author_email="support@xmos.com", 19 | description="XMOS Flash Builder for TensorFlow Lite model interpreter.", 20 | license="LICENSE.txt", 21 | keywords="xmos xcore", 22 | use_scm_version={ 23 | "root": "..", 24 | "relative_to": __file__, 25 | "version_scheme": "post-release", 26 | }, 27 | setup_requires=["setuptools_scm"], 28 | ) 29 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | #********************** 4 | # Setup XMOS toolchain 5 | #********************** 6 | #include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/xmos_toolchain.cmake") 7 | 8 | project(model_runner VERSION 1.0.0) 9 | 10 | enable_language(CXX C) 11 | 12 | #********************** 13 | # Disable in-source build. 14 | #********************** 15 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") 16 | message(FATAL_ERROR "In-source build is not allowed! Please specify a build folder.\n\tex:cmake -B build") 17 | endif() 18 | 19 | #********************** 20 | # install 21 | #********************** 22 | set(INSTALL_DIR "${PROJECT_SOURCE_DIR}/bin") 23 | 24 | #********************** 25 | # Build flags 26 | #********************** 27 | 28 | set(CMAKE_CXX_FLAGS "-std=c++17" CACHE STRING "C++ Compiler Base Flags" FORCE) 29 | 30 | set(BUILD_FLAGS 31 | "-O3" 32 | "-D__xtflm_conf_h_exists__" 33 | "-DNN_USE_REF" 34 | ) 35 | 36 | #********************** 37 | # firmware targets 38 | #********************** 39 | add_executable(xtflm_interpreter_cmdline) 40 | target_compile_options(xtflm_interpreter_cmdline PRIVATE ${BUILD_FLAGS}) 41 | target_link_options(xtflm_interpreter_cmdline PRIVATE ${BUILD_FLAGS}) 42 | target_link_libraries(xtflm_interpreter_cmdline PRIVATE "-lpthread") 43 | 44 | set(TOP_DIR 45 | "${CMAKE_CURRENT_SOURCE_DIR}/..") 46 | 47 | include(../cmakefiles/xtflm.cmake) 48 | 49 | target_sources(xtflm_interpreter_cmdline 50 | PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cc" 51 | PRIVATE ${ALL_SOURCES} 52 | ) 53 | 54 | list(APPEND ALL_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/src") 55 | 56 | target_include_directories(xtflm_interpreter_cmdline 57 | PRIVATE ${ALL_INCLUDES} 58 | ) 59 | 60 | install(TARGETS xtflm_interpreter_cmdline DESTINATION ${INSTALL_DIR}) 61 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | mkdir -p build 3 | (cd build && cmake .. && make install -j4) 4 | 5 | clean: 6 | rm -rf build 7 | 8 | test: install 9 | (cd tests && make test) 10 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/README.rst: -------------------------------------------------------------------------------- 1 | Command line interface for XTFLM 2 | =============================== 3 | 4 | Build 5 | ----- 6 | 7 | 8 | To build use the following command sequence:: 9 | 10 | (mkdir build && cd build && cmake .. && make install) 11 | 12 | Usage 13 | ----- 14 | 15 | Use it in either of the two following ways:: 16 | 17 | bin/xtflm_interpreter_cmdline model.tflite input-file output-file 18 | bin/xtflm_interpreter_cmdline model.tflite -i files ... -o files 19 | 20 | input and output are raw data. The first form only works when the network 21 | expects a single input and has a single output. The second form works with 22 | any number of inputs and outputs 23 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/src/xtflm_conf.h: -------------------------------------------------------------------------------- 1 | #define XTFLM_OPERATORS (250) 2 | #define NUM_OUTPUT_TENSORS (4) 3 | #define NUM_INPUT_TENSORS (4) 4 | #define MAX_DEBUG_LOG_LENGTH (1024) 5 | #define AISRV_GPIO_LENGTH (4) 6 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/Makefile: -------------------------------------------------------------------------------- 1 | test: 2 | (cd test_mobnet && make test) 3 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/test_mobnet/Makefile: -------------------------------------------------------------------------------- 1 | #TODO: derive flash from params 2 | #TODO: derive params and tflite from tflite 3 | 4 | test: 5 | @rm -f out 6 | ../../bin/xtflm_interpreter_cmdline model_mobilenet_v1.tflite model_mobilenet_v1.params baboon.raw out 7 | @diff out classes.raw 8 | @rm -f out 9 | @echo 'PASS: test_smoke' 10 | -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/test_mobnet/baboon.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/baboon.raw -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/test_mobnet/classes.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/classes.raw -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/test_mobnet/mobilenet_v1_0.25_128.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/mobilenet_v1_0.25_128.tflite -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.params -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.tflite -------------------------------------------------------------------------------- /host_cmd_line_interpreter/tests/write_reference_output.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import tensorflow as tf 4 | 5 | 6 | def load_raw_data(filename, dtype=np.int8): 7 | return np.fromfile(filename, dtype=dtype) 8 | 9 | 10 | def save_raw_data(filename, data, dtype=np.int8): 11 | data.astype(dtype).tofile(filename) 12 | 13 | 14 | def main(args): 15 | interpreter = tf.lite.Interpreter(model_path=args.model_path) 16 | interpreter.allocate_tensors() 17 | input_details = interpreter.get_input_details() 18 | output_details = interpreter.get_output_details() 19 | input_data = load_raw_data(args.input_file).reshape( 20 | input_details[0]["shape"]) 21 | interpreter.set_tensor(input_details[0]["index"], input_data) 22 | interpreter.invoke() 23 | output_data = interpreter.get_tensor(output_details[0]["index"]) 24 | save_raw_data(args.output_file, output_data) 25 | 26 | 27 | if __name__ == "__main__": 28 | parser = argparse.ArgumentParser( 29 | description="Feed raw input to a regular TFLite model and save the output." 30 | ) 31 | parser.add_argument("model_path", type=str, 32 | help="Path to the .tflite model file.") 33 | parser.add_argument("input_file", type=str, 34 | help="Path to the raw input file.") 35 | parser.add_argument( 36 | "output_file", type=str, help="Path to save the raw output file." 37 | ) 38 | args = parser.parse_args() 39 | main(args) 40 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/fast_flash.h: -------------------------------------------------------------------------------- 1 | #ifndef _FAST_FLASH_H_ 2 | #define _FAST_FLASH_H_ 3 | 4 | #include 5 | 6 | #ifdef __XC__ 7 | /** Fast flash library. 8 | * Before calling any of the functions in here, lib_quad_flash must be initialised as normal by using 9 | * fl_connectToDevice(qspi, flash_spec, n_flash_spec). 10 | * After that, a call to fast_flash_init shall be made. 11 | * After that, a sequence of calls to fast_flash_read can be made. 12 | * 13 | * The data partition must start with the following 32 bytes: **NOTE: REMOVE THE +4 in fast_flash_init** 14 | * 15 | * 0xff, 0x00, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 16 | * 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 17 | * 0x31, 0xf7, 0xce, 0x08, 0x31, 0xf7, 0xce, 0x08, 18 | * 0x9c, 0x63, 0x9c, 0x63, 0x9c, 0x63, 0x9c, 0x63 19 | * 20 | * This pattern is designed to create maximum difficulties electrically and is used 21 | * to calibrate the electrical settings. Note that this pattern must be nibble reversed 22 | * before being written to flash; just like all other data. 23 | * The rest of the data partition can be used as normal 24 | */ 25 | 26 | /** Function that initialises the fast_flash library 27 | * 28 | * \param qspi ports that connect to flash 29 | * 30 | * \returns a negative value of -1..-5 if the window is too small (size 0..4) 31 | * zero if successful 32 | */ 33 | int fast_flash_init(fl_QSPIPorts &qspi); 34 | 35 | /** Function that reads a sequential set of bytes from memory. 36 | * This function assumes that nibbles have been reversed ((x << 4) & 0xf0 | (x >> 4) & 0x0f) 37 | * before the data was written to flash. 38 | * Note that reading 32 bytes from offset 0 shall yield the special pattern above. 39 | * 40 | * \param qspi ports that connect to flash 41 | * \param addr address in flash data segment 42 | * \param word_count Number of words to read 43 | * \param read_data array to store data in to. 44 | * \param c_out_data optional channel end over which data is out() instead. 45 | */ 46 | void fast_flash_read(fl_QSPIPorts &qspi, unsigned addr, unsigned word_count, unsigned read_data[], chanend ?c_data_out); 47 | 48 | #else 49 | int fast_flash_init(fl_QSPIPorts *qspi); 50 | void fast_flash_read(fl_QSPIPorts *qspi, unsigned addr, unsigned word_count, unsigned read_data[], chanend_t c_data_out); 51 | #endif 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/flash_server.h: -------------------------------------------------------------------------------- 1 | #ifndef _flash_server_h_ 2 | #define _flash_server_h_ 3 | 4 | #include 5 | 6 | /** Struct holding the "file system" meta information for each client 7 | * The flash is partitioned and each client has a section in the flash 8 | * that stores data relevant to that particular client. For example, models 9 | * parameters, code, etc. 10 | * 11 | * This struct caches all information necessary for a client for fast access. 12 | * The main program must allocate this structure, one per client, prior to 13 | * calling the flash server. 14 | * 15 | * If there is more than one flash device connected to the device, there can be 16 | * multiple flash servers. 17 | */ 18 | typedef struct flash { 19 | int model_start; ///< Start address for model. 20 | int parameters_start; ///< Start address of parameters. 21 | int operators_start; ///< Start address for operator-binaries. 22 | int execute_in_place_start; ///< Start address for operator-binaries. 23 | } flash_t; 24 | 25 | /** Type representing the commands that the flash server accepts */ 26 | typedef enum flash_command { 27 | FLASH_READ_PARAMETERS = 28 | 0, ///< Read a set of parameters. // TODO: share with lib_tflite_micro 29 | FLASH_READ_PARAMETERS_ASYNC = 1, ///< Read parameters asynchronously. 30 | FLASH_READ_SYNCHRONIZE = 2, ///< Complete async read. 31 | FLASH_READ_XIP = 32 | 3, ///< Read code to execute-in-place throught L2 cache - future extension 33 | FLASH_SERVER_QUIT = 4, 34 | FLASH_SERVER_INIT = 5, // Initialize flash server with fast flash pattern speed match setup 35 | //FLASH_READ_PARAMETERS_COMPRESSED_FLOAT = 6, // Read a set of compressed parameters 36 | } flash_command_t; 37 | 38 | /** 39 | * Function that runs a flash-server. A flash server is a thread that serves one 40 | * or more clients. There is one flash server per flash-device, and the server 41 | * can serve clients on one or more tiles. 42 | * 43 | * The flash server takes the following commands: 44 | * - Read a whole model from the flash. 45 | * - Read some parameters from the flash 46 | * - (future extension) Read code for an operator from flash 47 | * 48 | * This function does, at present, never return. It could be made to return if 49 | * all clients close their connection 50 | * 51 | * \param c_flash_clients Array of channels; one per client. 52 | * Each client is served in turn 53 | * \param headers Space to store a header for each client 54 | * The header for the client describes the local 55 | * "filesystem" for that client \param n_flash_clients Number of clients. The 56 | * arrays in the first and second parameters should have this many entries 57 | * \param qspi Structure holding the quad-flash ports. This contains 58 | * three Ports and a clock-block, the CS_N port, the CLK port, the DATA port and 59 | * a clock block to be used for the flash. \param flash_spec Array holding 60 | * specificiations of flash devices, as per the libquadflash documentation 61 | * \param n_flash_spec Number of elements in the spec array. 62 | */ 63 | #ifdef __XC__ 64 | void flash_server(chanend c_flash_clients[], flash_t headers[], 65 | int n_flash_clients, fl_QSPIPorts &qspi, 66 | fl_QuadDeviceSpec flash_spec[], int n_flash_spec); 67 | #else 68 | void flash_server(chanend_t *c_flash_clients, flash_t *headers, 69 | int n_flash_clients, fl_QSPIPorts *qspi, 70 | fl_QuadDeviceSpec *flash_spec, int n_flash_spec); 71 | #endif 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/ioserver.h: -------------------------------------------------------------------------------- 1 | #ifndef _io_server_h_ 2 | #define _io_server_h_ 3 | 4 | #ifdef __XC__ 5 | 6 | void ioserver(chanend c_model[], unsigned n_model); 7 | 8 | #else 9 | 10 | #include 11 | #include 12 | 13 | #define CMD_LENGTH_BYTES (3) // CMD, Model, Tensor 14 | 15 | #define IOSERVER_INVOKE 1 16 | #define IOSERVER_TENSOR_SEND_OUTPUT 2 17 | #define IOSERVER_TENSOR_RECV_INPUT 3 18 | #define IOSERVER_ACK 5 19 | #define IOSERVER_NACK 6 20 | #define IOSERVER_RESET 7 21 | #define IOSERVER_EXIT 8 22 | 23 | #define MAX_PACKET_SIZE (512) 24 | #define MAX_PACKET_SIZE_WORDS (MAX_PACKET_SIZE / 4) 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | unsigned int ioserver_command_receive(chanend_t c_server, unsigned *tensor_num); 30 | void ioserver_command_acknowledge(chanend_t c_server, unsigned int ack); 31 | void ioserver_tensor_send_output(chanend_t c_server, unsigned int *data, 32 | unsigned int n); 33 | 34 | void ioserver_tensor_recv_input(chanend_t c_server, unsigned int *data, 35 | unsigned int n); 36 | 37 | void ioserver(chanend_t c_model[], unsigned n_model); 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | #endif 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/load_weights.h: -------------------------------------------------------------------------------- 1 | #ifndef _load_weights_h_ 2 | #define _load_weights_h_ 3 | 4 | #include 5 | #include "thread_call.h" 6 | 7 | #define LOAD_WEIGHTS_MAX_BLOCKS 2 8 | 9 | /** 10 | * Function that connects to a flash or tile-ram server and loads a series of weights. 11 | * This function completes when the data is loaded. 12 | * 13 | * @param c_flash_or_tile channel-end connecting to the flash server 14 | * 15 | * @param data_ptr array of pointers where the loaded data should be scattered 16 | * 17 | * @param data_sizes_in_words number of words where for each block 18 | * 19 | * @param N number of blocks in data_ptr and data_sizes_in_words 20 | * 21 | * @param external_addr address in flash or tile ram 22 | * 23 | * @param model_thread_count number of threads available 24 | * 25 | * @param tif thread_info structure for multithreading 26 | */ 27 | void load_weights_synchronous(chanend_t c_flash_or_tile, int *data_ptr[], int data_sizes_in_words[], 28 | int N, int external_addr, int model_thread_count, thread_info_t *tif); 29 | 30 | /** 31 | * Function that connects to a flash server and loads a series of weights. 32 | * This function continues loading after the call completes 33 | * 34 | * @param c_flash_or_tile channel-end connecting to the flash server 35 | * 36 | * @param data_ptr array of pointers where the loaded data should be scattered 37 | * 38 | * @param data_sizes_in_words number of words where for each block 39 | * 40 | * @param N number of blocks in data_ptr and data_sizes_in_words 41 | * 42 | * @param external_addr address in flash or tile ram 43 | * 44 | * @param model_thread_count number of threads available 45 | */ 46 | void load_weights_asynchronous(chanend_t c_flash_or_tile, int *data_ptr[], int data_sizes_in_words[], 47 | int N, int external_addr); 48 | 49 | /** 50 | * Function that connects to a flash server and waits for the last outstanding load to complete 51 | * Only one asynchronous load should be outstanding at any one time. 52 | * 53 | * @param c_flash_or_tile channel-end connecting to the flash server 54 | */ 55 | void load_weights_asynchronous_wait(chanend_t c_flash_or_tile); 56 | 57 | /** 58 | * Function that connects to a flash or tile ram server and kills it. 59 | * 60 | * @param c_flash_or_tile channel-end connecting to the flash server 61 | */ 62 | void load_weights_quit(chanend_t c_flash_or_tile); 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/memory_parallel_transport.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "thread_call.h" 3 | 4 | #ifdef __XC__ 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | #ifdef __XC__ 11 | 12 | extern void memory_parallel_receive(chanend c, uint32_t data[], uint32_t bytes); 13 | extern void memory_parallel_receive_thread_call(chanend c, uint32_t data[], uint32_t bytes, thread_info_t &ptr); 14 | extern void memory_parallel_send(chanend c, uint32_t data[], uint32_t bytes); 15 | 16 | #else 17 | 18 | /** Function that receives a block of data. 19 | * The number of bytes must be a multiple of 4. 20 | * This function creates three threads and three channel ends in order to 21 | * make full use of the bandwidth of the switch. 22 | * 23 | * \param c channel end to the sender 24 | * \param data pointer where data must be stored 25 | * \param bytes number of bytes that will be received. 26 | */ 27 | extern void memory_parallel_receive(chanend_t c, uint32_t *data, uint32_t bytes); 28 | 29 | /** Function that receives a block of data. 30 | * The number of bytes must be a multiple of 4. 31 | * This function assumes that at least three threads have been created by the 32 | * thread_call library and will use those together with three fresh channel 33 | * ends in order to make full use of the bandwidth of the switch. 34 | * 35 | * \param c channel end to the sender 36 | * \param data pointer where data must be stored 37 | * \param bytes number of bytes that will be received. 38 | */ 39 | extern void memory_parallel_receive_thread_call(chanend_t c, uint32_t *data, uint32_t bytes, thread_info_t *ptr); 40 | 41 | /** Function that sends a block of data. 42 | * The number of bytes must be a multiple of 4. 43 | * This function creates three threads and three channel ends in order to 44 | * make full use of the bandwidth of the switch. 45 | * 46 | * \param c channel end to the receiver 47 | * \param data pointer where data must be loaded frmo 48 | * \param bytes number of bytes that will be sent. 49 | */ 50 | extern void memory_parallel_send(chanend_t c, uint32_t *data, uint32_t bytes); 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/tile_ram_server.h: -------------------------------------------------------------------------------- 1 | #ifndef _tile_ram_server_h_ 2 | #define _tile_ram_server_h_ 3 | 4 | #include "flash_server.h" 5 | 6 | /** 7 | * Function that runs a flash-server. A flash server is a thread that serves one 8 | * or more clients. There is one flash server per flash-device, and the server 9 | * can serve clients on one or more tiles. 10 | * 11 | * The flash server takes the following commands: 12 | * - Read a whole model from the flash. 13 | * - Read some parameters from the flash 14 | * - (future extension) Read code for an operator from flash 15 | * 16 | * This function does, at present, never return. It could be made to return if 17 | * all clients close their connection 18 | * 19 | * \param c_tile_ram_clients Array of channels; one per client. 20 | * Each client is served in turn 21 | * NOTE MUST BE 1 FOR NOW 22 | * \param headers Space to store a header for each client 23 | * The header for the client describes the local 24 | * "filesystem" for that client 25 | * \param n_tile_ram_clients Number of clients. The 26 | * arrays in the first and second parameters should have this many entries 27 | * \param data Tile ram data 28 | * \param n_tile_ram_flash Number of bytes in array 29 | */ 30 | #ifdef __XC__ 31 | void tile_ram_server(chanend c_tile_ram_clients[], flash_t headers[], 32 | int n_tile_ram_clients, const int8_t data[]); 33 | #else 34 | void tile_ram_server(chanend_t *c_tile_ram_clients, flash_t *headers, 35 | int n_tile_ram_clients, const int8_t *data); 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/version.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved 2 | #ifndef XCORE_VERSION_H_ 3 | #define XCORE_VERSION_H_ 4 | 5 | namespace lib_tflite_micro { 6 | 7 | static const unsigned major_version = 0; 8 | static const unsigned minor_version = 7; 9 | static const unsigned patch_version = 0; 10 | 11 | } // namespace lib_tflite_micro 12 | 13 | #endif // XCORE_VERSION_H_ 14 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/xcore_config.h: -------------------------------------------------------------------------------- 1 | #ifndef XCORE_CONFIG_H_ 2 | #define XCORE_CONFIG_H_ 3 | 4 | #include "../src/thread_call.h" 5 | 6 | struct xc_context_config_t { 7 | // This is the thread count specified in the compiler. 8 | // It's used by lookup op, beta float ops etc to split up work 9 | // in the Prepare phase. 10 | // Conv ops have their own thread count as the thread work is 11 | // calculated in the compiler. 12 | int model_thread_count; 13 | thread_info_t thread_info; 14 | void *UNSAFE weights_data_ptr; // DDR ptr or channel to flash/tile server. 15 | void *UNSAFE paging_ptr; // DDR ptr for paging in/out tensor arena. 16 | }; 17 | 18 | #endif // XCORE_CONFIG_H_ 19 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/xcore_device_memory.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved 2 | #ifndef XCORE_DEVICE_MEMORY_H_ 3 | #define XCORE_DEVICE_MEMORY_H_ 4 | 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #ifdef XCORE 13 | #ifdef _TIME_H_ 14 | #define _clock_defined 15 | #endif 16 | #include 17 | 18 | #define STRINGIFY(NAME) #NAME 19 | #define GET_STACKWORDS(DEST, NAME) \ 20 | asm("ldc %[__dest], " STRINGIFY(NAME) ".nstackwords" : [__dest] "=r"(DEST)) 21 | #define GET_STACKSIZE(DEST, NAME) \ 22 | { \ 23 | size_t _stack_words; \ 24 | asm("ldc %[__dest], " STRINGIFY(NAME) ".nstackwords" \ 25 | : [__dest] "=r"(_stack_words)); \ 26 | DEST = (_stack_words + 2) * 4; \ 27 | } 28 | #define IS_RAM(a) (((uintptr_t)a >= 0x80000) && ((uintptr_t)a <= 0x100000)) 29 | #define IS_NOT_RAM(a) ((uintptr_t)a > 0x100000) 30 | #define IS_EXTMEM(a) \ 31 | (((uintptr_t)a >= 0x10000000) && (((uintptr_t)a <= 0x20000000))) 32 | #define IS_SWMEM(a) \ 33 | (((uintptr_t)a >= 0x40000000) && (((uintptr_t)a <= 0x80000000))) 34 | 35 | #ifdef USE_SWMEM 36 | #ifndef USE_QSPI_SWMEM_DEV 37 | void swmem_setup(); 38 | #else 39 | #include 40 | void swmem_setup(chanend_t ctrl_swmem_c); 41 | #endif // USE_QSPI_SWMEM_DEV 42 | #endif // USE_SWMEM 43 | 44 | void swmem_handler(void *ignored); 45 | void swmem_teardown(); 46 | 47 | #else // not XCORE 48 | 49 | #define GET_STACKSIZE(DEST, NAME) DEST = 0 50 | #define GET_STACKWORDS(DEST, NAME) DEST = 0 51 | #define IS_RAM(a) (1) 52 | #define IS_NOT_RAM(a) (0) 53 | 54 | #endif // XCORE 55 | 56 | void memload(void *dest, void *src, size_t size); 57 | 58 | #ifdef __cplusplus 59 | } 60 | #endif 61 | 62 | #endif // XCORE_DEVICE_MEMORY_H_ 63 | -------------------------------------------------------------------------------- /lib_tflite_micro/api/xcore_shared_config.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved 2 | #ifndef XCORE_SHARED_CONFIG_H_ 3 | #define XCORE_SHARED_CONFIG_H_ 4 | 5 | #include "lib_nn/api/nn_arch.h" 6 | 7 | namespace shared_config { 8 | 9 | // This string is used as a key to store the shared config 10 | // between xformer and lib_tflite_micro in the flatbuffer 11 | constexpr char xcoreMetadataName[] = "xcoreSharedConfig"; 12 | 13 | constexpr int xcoreMaxNumOfTensors = 25; 14 | 15 | struct tensor_info_t { 16 | uint32_t index; 17 | uint32_t external_address; 18 | uint32_t size; 19 | }; 20 | 21 | // The metadata struct must be aligned to 16 bytes 22 | // We cannot use alignas(16) yet in xcore 23 | struct xcore_metadata_t { 24 | // Target arch can be XS3A = 0, or VX4A = 1 25 | nn_target_arch_t target_arch; 26 | // Versions of libraries used to build the model 27 | uint32_t lib_nn_major_version; 28 | uint32_t lib_nn_minor_version; 29 | uint32_t lib_nn_patch_version; 30 | uint32_t lib_tflite_micro_major_version; 31 | uint32_t lib_tflite_micro_minor_version; 32 | uint32_t lib_tflite_micro_patch_version; 33 | uint32_t xformer_major_version; 34 | uint32_t xformer_minor_version; 35 | uint32_t xformer_patch_version; 36 | // Number of threads required from the runtime to execute the model 37 | uint32_t required_thread_count; 38 | // Number of input tensors loaded from external memory 39 | uint32_t num_external_input_tensors; 40 | // Number of output tensors loaded from external memory 41 | uint32_t num_external_output_tensors; 42 | tensor_info_t external_input_tensors_data[xcoreMaxNumOfTensors]; 43 | tensor_info_t external_output_tensors_data[xcoreMaxNumOfTensors]; 44 | }; 45 | 46 | } // namespace shared_config 47 | 48 | #endif // XCORE_SHARED_CONFIG_H_ -------------------------------------------------------------------------------- /lib_tflite_micro/src/fast_flash_read_loop.S: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .globl fast_read_loop 4 | .globl fast_read_loop.nstackwords 5 | .section .cp.rodata.cst4,"aMc",@progbits,4 6 | .cc_top .Const0x01101011.data,.Const0x01101011 7 | .align 4 8 | .type .Const0x01101011,@object 9 | .size .Const0x01101011, 4 10 | .Const0x01101011: 11 | .long 0x01101011 12 | .cc_bottom .Const0x01101011.data 13 | 14 | .text 15 | .align 16 16 | .type fast_read_loop,@function 17 | .cc_top fast_read_loop.function,fast_read_loop 18 | 19 | fast_read_loop: 20 | ENTSP_lu6 8 21 | std r4, r5, sp[1] 22 | std r6, r7, sp[2] 23 | std r8, r9, sp[3] 24 | ldc r7, 0 25 | stw r10, sp[1] 26 | ldw r4, sp[9] 27 | ldw r8, sp[11] 28 | ldc r6, 27 29 | add r9, r3, r6 // r9 <- 27 + read_adj 30 | shl r6, r1, 8 31 | or r6, r6, r2 // r6 <- (addr << 8) | mode 32 | byterev r11, r6 33 | 34 | unzip r11, r6, 2 35 | zip r6, r11, 2 36 | // r6 <- nibble_swapped(byte_revved(r6)) 37 | ldw r3, r0[2] 38 | ldc r1, 1 39 | outpw res[r3], r1, 4 40 | ldw r2, r0[3] 41 | setc res[r2], 15 42 | syncr res[r3] 43 | setc res[r2], 7 44 | ldw r11, r0[0] 45 | out res[r11], r7 46 | ldc r0, 28 47 | shl r7, r6, r0 48 | ldw r10, cp[.Const0x01101011] 49 | or r7, r7, r10 50 | out res[r3], r7 51 | setc res[r2], 15 52 | shr r6, r6, 4 53 | setpsc res[r3], r0 54 | out res[r3], r6 55 | ldc r6, 18 56 | setpt res[r3], r6 57 | ldw r6, sp[10] 58 | in r10, res[r3] 59 | setpt res[r3], r9 60 | 61 | // This is where Timing matters 62 | in r0, res[r3] 63 | bf r8, SendToMemory 64 | 65 | SendToChannel: 66 | out res[r8], r0 67 | sub r6, r4, 1 68 | bf r6, EndOfChanLoop 69 | ChanLoop: 70 | in r4, res[r3] 71 | out res[r8], r4 72 | sub r6, r6, 1 73 | bt r6, ChanLoop 74 | EndOfChanLoop: 75 | outct res[r8], 1 76 | bu EndOfFunction 77 | 78 | SendToMemory: 79 | stw r0, r6[0] 80 | sub r4, r4, 1 81 | bf r4, EndOfMemoryLoop 82 | add r6, r6, 4 83 | MemoryLoop: 84 | in r5, res[r3] 85 | stw r5, r6[0] 86 | add r6, r6, 4 87 | sub r4, r4, 1 88 | bt r4, MemoryLoop 89 | EndOfMemoryLoop: 90 | EndOfFunction: 91 | setc res[r2], 7 92 | out res[r11], r1 93 | ldd r4, r5, sp[1] 94 | ldd r6, r7, sp[2] 95 | ldd r8, r9, sp[3] 96 | ldw r10, sp[1] 97 | retsp 8 98 | 99 | .cc_bottom fast_read_loop.function 100 | .set fast_read_loop.nstackwords,8 101 | .set fast_read_loop.maxcores,1 102 | .set fast_read_loop.maxtimers,0 103 | .set fast_read_loop.maxchanends,0 104 | .Ltmp2: 105 | .size fast_read_loop, .Ltmp2-fast_read_loop 106 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/load_weights.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "load_weights.h" 3 | #include "flash_server.h" 4 | #include "memory_parallel_transport.h" 5 | 6 | void load_weights_synchronous(chanend_t c_flash_or_tile, int *data_ptrs[], int data_sizes_in_words[], 7 | int N, int external_addr, int model_thread_count, thread_info_t *tif) { 8 | // Parallel mode is for reading weights from another tile 9 | chan_out_word(c_flash_or_tile, FLASH_READ_PARAMETERS); 10 | int use_parallel_mode = chan_in_word(c_flash_or_tile); 11 | if (!use_parallel_mode) { 12 | chan_out_word(c_flash_or_tile, external_addr); 13 | 14 | int32_t total_bytes = 0; 15 | for (int i = 0; i < N; ++i) { 16 | total_bytes += data_sizes_in_words[i] * 4; 17 | } 18 | chan_out_word(c_flash_or_tile, total_bytes); 19 | 20 | for (int i = 0; i < N; ++i) { 21 | int *data_ptr = data_ptrs[i]; 22 | // The sizes are in bytes and we read from flash in words 23 | int op_data_size_in_words = data_sizes_in_words[i]; 24 | #pragma clang loop unroll_count(4) 25 | for (int j = 0; j < op_data_size_in_words; j++) { 26 | // We are reading directly from flash chanend here. 27 | // We use chanend_in_word() instead of chan_in_word() to 28 | // avoid handshake. 29 | // Adding something like a printf() within this loop 30 | // might slow it down enough to corrupt the received data. 31 | ((uint32_t *)data_ptr)[j] = chanend_in_word(c_flash_or_tile); 32 | } 33 | } 34 | // As there is no handshake, we have to accept the end token 35 | // to close the chanend 36 | chanend_check_end_token(c_flash_or_tile); 37 | } else { 38 | // The parallel mode uses four threads and can only work if 39 | // the model has been compiled with at least four threads. 40 | assert(model_thread_count >= 4 && 41 | "At least four threads are required for parallel read from " 42 | "another tile!"); 43 | chan_out_word(c_flash_or_tile, external_addr); 44 | chan_out_word(c_flash_or_tile, data_sizes_in_words[0]*4); 45 | external_addr += data_sizes_in_words[0]*4; 46 | memory_parallel_receive_thread_call(c_flash_or_tile, (uint32_t *)data_ptrs[0], 47 | 4*data_sizes_in_words[0], tif); 48 | for (int i = 1; i < N; ++i) { 49 | chan_out_word(c_flash_or_tile, 0); 50 | chan_in_word(c_flash_or_tile); 51 | chan_out_word(c_flash_or_tile, external_addr); 52 | chan_out_word(c_flash_or_tile, data_sizes_in_words[i]*4); 53 | external_addr += data_sizes_in_words[i]*4; 54 | memory_parallel_receive_thread_call(c_flash_or_tile, (uint32_t *)data_ptrs[i], 55 | 4*data_sizes_in_words[i], tif); 56 | } 57 | } 58 | } 59 | 60 | void load_weights_asynchronous(chanend_t c_flash_or_tile, int *data_ptrs[], int data_sizes_in_words[], 61 | int N, int external_addr) { 62 | chan_out_word(c_flash_or_tile, FLASH_READ_PARAMETERS_ASYNC); 63 | chan_out_word(c_flash_or_tile, external_addr); 64 | chan_out_word(c_flash_or_tile, N); 65 | 66 | for (int i = 0; i < N; ++i) { 67 | chan_out_word(c_flash_or_tile, data_sizes_in_words[i] * 4); 68 | chan_out_word(c_flash_or_tile, (int) data_ptrs[i]); 69 | } 70 | } 71 | 72 | void load_weights_asynchronous_wait(chanend_t c_flash_or_tile) { 73 | chanend_check_end_token(c_flash_or_tile); 74 | } 75 | 76 | void load_weights_quit(chanend_t c_flash_or_tile) { 77 | chan_out_word(c_flash_or_tile, FLASH_SERVER_QUIT); 78 | } 79 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/memory_parallel_transport.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "thread_call.h" 5 | #include "memory_parallel_transport.h" 6 | 7 | typedef struct { 8 | int whole; 9 | uint32_t *data; 10 | } destination_description_t; 11 | 12 | DECLARE_JOB(receive_rx, (destination_description_t *, chanend_t, int)); 13 | DECLARE_JOB(transmit_tx, (chanend_t, int, int, uint32_t *)); 14 | 15 | extern void receive_rx(destination_description_t *d, chanend_t c, int offset); 16 | extern void transmit_tx(chanend_t C, int offset, int n, uint32_t *data); 17 | 18 | extern void memory_parallel_receive(chanend_t c, uint32_t *data, 19 | uint32_t byte_count) { 20 | int whole = byte_count / 96; 21 | int last = byte_count - whole * 96; 22 | destination_description_t dest = {whole, data}; 23 | chanend_t other_c[3]; 24 | for(int i = 0; i < 3; i++) { 25 | other_c[i] = chanend_alloc(); 26 | chan_out_word(c, other_c[i]); 27 | chanend_t other_side = chan_in_word(c); 28 | chanend_set_dest(other_c[i], other_side); 29 | } 30 | PAR_JOBS( 31 | PJOB(receive_rx, (&dest, other_c[0], 0)), 32 | PJOB(receive_rx, (&dest, other_c[1], 1)), 33 | PJOB(receive_rx, (&dest, other_c[2], 2)), 34 | PJOB(receive_rx, (&dest, c, 3)) 35 | ); 36 | for(int i = 0; i < 3; i++) { 37 | chanend_out_control_token(other_c[i], 1); 38 | chanend_check_control_token(other_c[i], 1); 39 | } 40 | chan_in_buf_word(c, &data[whole*24], last>>2); 41 | for(int i = 0; i < 3; i++) { 42 | chanend_free(other_c[i]); 43 | } 44 | } 45 | 46 | extern void memory_parallel_send(chanend_t c, uint32_t *data, uint32_t byte_count) { 47 | int whole = byte_count / 96; 48 | int last = byte_count - whole * 96; 49 | chanend_t other_c[3]; 50 | for(int i = 0; i < 3; i++) { 51 | other_c[i] = chanend_alloc(); 52 | chanend_t other_side = chan_in_word(c); 53 | chan_out_word(c, other_c[i]); 54 | chanend_set_dest(other_c[i], other_side); 55 | } 56 | PAR_JOBS( 57 | PJOB(transmit_tx, (other_c[0], 0, whole, data)), 58 | PJOB(transmit_tx, (other_c[1], 1, whole, data)), 59 | PJOB(transmit_tx, (other_c[2], 2, whole, data)), 60 | PJOB(transmit_tx, ( c, 3, whole, data)) 61 | ); 62 | for(int i = 0; i < 3; i++) { 63 | chanend_out_control_token(other_c[i], 1); 64 | chanend_check_control_token(other_c[i], 1); 65 | } 66 | chan_out_buf_word(c, &data[whole*24], last>>2); 67 | for(int i = 0; i < 3; i++) { 68 | chanend_free(other_c[i]); 69 | } 70 | } 71 | 72 | 73 | extern void memory_parallel_receive_thread_call(chanend_t c, uint32_t *data, 74 | uint32_t byte_count, thread_info_t *thread_inf) { 75 | int whole = byte_count / 96; 76 | int last = byte_count - whole * 96; 77 | destination_description_t dest = {whole, data}; 78 | chanend_t other_c[3]; 79 | for(int i = 0; i < 3; i++) { 80 | other_c[i] = chanend_alloc(); 81 | chan_out_word(c, other_c[i]); 82 | chanend_t other_side = chan_in_word(c); 83 | chanend_set_dest(other_c[i], other_side); 84 | } 85 | thread_variable_setup((void*)other_c[1], (void*)1, thread_inf->thread_ids.id[0]); 86 | thread_variable_setup((void*)other_c[2], (void*)2, thread_inf->thread_ids.id[1]); 87 | thread_variable_setup((void*)c, (void*)3, thread_inf->thread_ids.id[2]); 88 | thread_call(&dest, (void*)other_c[0], (void*)0, receive_rx, thread_inf); 89 | for(int i = 0; i < 3; i++) { 90 | chanend_out_control_token(other_c[i], 1); 91 | chanend_check_control_token(other_c[i], 1); 92 | } 93 | chan_in_buf_word(c, &data[whole*24], last>>2); 94 | for(int i = 0; i < 3; i++) { 95 | chanend_free(other_c[i]); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/memory_transport_ll.S: -------------------------------------------------------------------------------- 1 | .issue_mode dual 2 | .cc_top transmit_tx.function,transmit_tx 3 | .globl transmit_tx 4 | .globl transmit_tx.nstackwords 5 | .linkset transmit_tx.nstackwords, 0 6 | .align 16 7 | transmit_tx: 8 | { dualentsp 0 ; ldc r11, 6*4 } 9 | mul r1, r1, r11 10 | mul r1, r1, r2 11 | add r1, r3, r1 12 | tx_loop: 13 | { out res[r0], r3 ; ldw r3, r1[0] } 14 | { out res[r0], r3 ; ldw r3, r1[1] } 15 | { out res[r0], r3 ; ldw r3, r1[2] } 16 | { nop ; sub r2, r2, 1 } 17 | { out res[r0], r3 ; ldw r3, r1[3] } 18 | { out res[r0], r3 ; ldw r3, r1[4] } 19 | { out res[r0], r3 ; ldw r3, r1[5] } 20 | { bt r2, tx_loop ; add r1, r1, r11 } 21 | { out res[r0], r3 ; nop } 22 | { out res[r0], r3 ; nop } // dummy out 23 | { outct res[r0], 1 ; retsp 0 } 24 | .cc_bottom transmit_tx.function 25 | 26 | .cc_top receive_rx.function,receive_rx 27 | .globl receive_rx 28 | .globl receive_rx.nstackwords 29 | .linkset receive_rx.nstackwords, 0 30 | .align 16 31 | .skip 4 32 | receive_rx: 33 | { dualentsp 0 ; ldc r11, 6*4 } 34 | mul r2, r2, r11 35 | ldw r3, r0[0] 36 | mul r2, r2, r3 37 | ldw r0, r0[1] 38 | { add r2, r0, r2 ; in r0, res[r1] } // Dummy in 39 | in r0, res[r1] 40 | rx_loop: 41 | { in r0, res[r1] ; stw r0, r2[0] } 42 | { in r0, res[r1] ; stw r0, r2[1] } 43 | { in r0, res[r1] ; stw r0, r2[2] } 44 | { nop ; sub r3, r3, 1 } 45 | { in r0, res[r1] ; stw r0, r2[3] } 46 | { in r0, res[r1] ; stw r0, r2[4] } 47 | { in r0, res[r1] ; stw r0, r2[5] } 48 | { bt r3, rx_loop ; add r2, r2, r11 } 49 | { chkct res[r1], 1 ; retsp 0 } 50 | .cc_bottom receive_rx.function 51 | 52 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/par_invoke_funcs.c: -------------------------------------------------------------------------------- 1 | 2 | #include "thread_call.h" 3 | 4 | #ifdef NO_INTERPRETER 5 | 6 | #if defined(__xcore__) || defined(__riscv_xxcore) 7 | #include 8 | DECLARE_JOB(main_task, (thread_info_t *, synchronizer_t)); 9 | DECLARE_JOB(client_task, (thread_info_t *, int)); 10 | #endif 11 | 12 | extern void invoke_subgraph_c_trampoline(); 13 | 14 | void main_task(thread_info_t *t, synchronizer_t sync) { 15 | thread_store_sync(t, sync); 16 | invoke_subgraph_c_trampoline(); 17 | } 18 | 19 | void client_task(thread_info_t *t, int n) { 20 | thread_client(t, n); 21 | } 22 | 23 | void par_invoke_1(thread_info_t *ti) { 24 | #ifdef __xcore__ 25 | PAR_JOBS( 26 | PJOB(main_task, (ti, PAR_SYNC))); 27 | #else 28 | main_task(ti, 0); 29 | #endif 30 | } 31 | 32 | void par_invoke_2(thread_info_t *ti) { 33 | #ifdef __xcore__ 34 | PAR_JOBS( 35 | PJOB(main_task, (ti, PAR_SYNC)), 36 | PJOB(client_task, (ti, 0))); 37 | #else 38 | client_task(ti, 0); 39 | main_task(ti, 0); 40 | #endif 41 | } 42 | 43 | void par_invoke_3(thread_info_t *ti) { 44 | #ifdef __xcore__ 45 | PAR_JOBS( 46 | PJOB(main_task, (ti, PAR_SYNC)), 47 | PJOB(client_task, (ti, 0)), 48 | PJOB(client_task, (ti, 1))); 49 | #else 50 | client_task(ti, 0); 51 | client_task(ti, 1); 52 | main_task(ti, 0); 53 | #endif 54 | } 55 | 56 | void par_invoke_4(thread_info_t *ti) { 57 | #ifdef __xcore__ 58 | PAR_JOBS( 59 | PJOB(main_task, (ti, PAR_SYNC)), 60 | PJOB(client_task, (ti, 0)), 61 | PJOB(client_task, (ti, 1)), 62 | PJOB(client_task, (ti, 2))); 63 | #else 64 | client_task(ti, 0); 65 | client_task(ti, 1); 66 | client_task(ti, 2); 67 | main_task(ti, 0); 68 | #endif 69 | } 70 | 71 | void par_invoke_5(thread_info_t *ti) { 72 | #ifdef __xcore__ 73 | PAR_JOBS( 74 | PJOB(main_task, (ti, PAR_SYNC)), 75 | PJOB(client_task, (ti, 0)), 76 | PJOB(client_task, (ti, 1)), 77 | PJOB(client_task, (ti, 2)), 78 | PJOB(client_task, (ti, 3))); 79 | #else 80 | client_task(ti, 0); 81 | client_task(ti, 1); 82 | client_task(ti, 2); 83 | client_task(ti, 3); 84 | main_task(ti, 0); 85 | #endif 86 | } 87 | 88 | #endif -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/micro_time.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, XMOS Ltd, All rights reserved 2 | 3 | #include "tensorflow/lite/micro/micro_time.h" 4 | 5 | extern "C" { 6 | // These are headers from XMOS toolchain. 7 | #include 8 | #ifdef _TIME_H_ 9 | #define _clock_defined 10 | #endif 11 | #include 12 | } 13 | 14 | namespace tflite_micro { 15 | 16 | uint32_t ticks_per_second() { return PLATFORM_REFERENCE_HZ; } 17 | 18 | uint32_t GetCurrentTimeTicks() { return get_reference_time(); } 19 | 20 | } // namespace tflite_micro 21 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_add.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "../thread_call.h" 4 | #include "xcore_config.h" 5 | #include "xcore_custom_options.h" 6 | #include "xcore_utils.h" 7 | extern "C" { 8 | #include "lib_nn/api/nn_operator.h" 9 | } 10 | 11 | namespace tflite_micro { 12 | namespace ops { 13 | namespace micro { 14 | namespace xcore { 15 | namespace add { 16 | 17 | struct AddShared { 18 | int8_t *Y; 19 | int8_t *X1; 20 | int8_t *X2; 21 | nn_add_params_t *blob; 22 | }; 23 | 24 | extern "C" { 25 | void add_thread_worker(void *shared, void *start, void *end) { 26 | int *s = static_cast(start); 27 | int *e = static_cast(end); 28 | auto sd = static_cast(shared); 29 | add_elementwise(sd->Y, sd->X1, sd->X2, sd->blob, *s, *e - *s); 30 | } 31 | } 32 | 33 | // This is the struct that contains the data required by the operator 34 | struct AddOpData { 35 | nn_add_params_t params; 36 | int tc; 37 | int s[XCORE_MAX_NUM_THREADS]; 38 | int e[XCORE_MAX_NUM_THREADS]; 39 | }; 40 | 41 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 42 | auto op_data = construct_persistent_object(context); 43 | 44 | auto parser = CustomOptionParser(buffer, length); 45 | int m1 = parser.parseNamedCustomOption("m1").AsInt32(); 46 | int m2 = parser.parseNamedCustomOption("m2").AsInt32(); 47 | int bias = parser.parseNamedCustomOption("bias").AsInt32(); 48 | int shift = parser.parseNamedCustomOption("shift").AsInt32(); 49 | 50 | // Broadcast values into vectors 51 | // We are VLMACC-ing in 16-bit mode 52 | for (int i = 0; i < VPU_INT16_VLMACC_ELMS; i++) { 53 | op_data->params.m1[i] = (int16_t)m1; 54 | op_data->params.m2[i] = (int16_t)m2; 55 | op_data->params.shift[i] = (int16_t)shift; 56 | // Split 32-bit bias into two 16-bit values 57 | op_data->params.bias_hi[i] = bias >> 16; 58 | op_data->params.bias_lo[i] = (int16_t) (bias & 0XFFFF); 59 | } 60 | 61 | return op_data; 62 | } 63 | 64 | // Does all the requests for scratches 65 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 66 | auto *op_data = static_cast(node->user_data); 67 | MicroContext *micro_context = GetMicroContext(context); 68 | xc_context_config_t *xc_config = reinterpret_cast( 69 | micro_context->external_context()); 70 | const TfLiteEvalTensor *output = 71 | tflite_micro::micro::GetEvalOutput(context, node, 0); 72 | int output_size = tflite_micro::micro::GetTensorShape(output).FlatSize(); 73 | op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, output_size, op_data->s, op_data->e); 74 | return kTfLiteOk; 75 | } 76 | 77 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 78 | 79 | auto *op_data = static_cast(node->user_data); 80 | // Get Input/Output Tensors 81 | const TfLiteEvalTensor *input1 = 82 | tflite_micro::micro::GetEvalInput(context, node, 0); 83 | const TfLiteEvalTensor *input2 = 84 | tflite_micro::micro::GetEvalInput(context, node, 1); 85 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 86 | // Pointers to data in In/Out Tensors 87 | int8_t *in1_data = 88 | const_cast(tflite_micro::micro::GetTensorData(input1)); 89 | int8_t *in2_data = 90 | const_cast(tflite_micro::micro::GetTensorData(input2)); 91 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 92 | 93 | MicroContext *micro_context = GetMicroContext(context); 94 | xc_context_config_t *xc_config = reinterpret_cast( 95 | micro_context->external_context()); 96 | const int tc = op_data->tc; 97 | AddShared shared_data; 98 | shared_data.Y = out_data; 99 | shared_data.X1 = in1_data; 100 | shared_data.X2 = in2_data; 101 | shared_data.blob = &op_data->params; 102 | for (int t = 0; t < tc - 1; t++) { 103 | thread_variable_setup((void *)&op_data->s[t], (void *)&op_data->e[t], 104 | xc_config->thread_info.thread_ids.id[t]); 105 | } 106 | thread_call((void *)&shared_data, &op_data->s[tc - 1], &op_data->e[tc - 1], 107 | (thread_function_pointer_t)add_thread_worker, 108 | &xc_config->thread_info); 109 | 110 | return kTfLiteOk; 111 | } 112 | 113 | } // namespace add 114 | 115 | TFLMRegistration *Register_XC_add() { 116 | static TFLMRegistration r = {add::Init, nullptr, add::Prepare, add::Eval}; 117 | return &r; 118 | } 119 | 120 | } // namespace xcore 121 | } // namespace micro 122 | } // namespace ops 123 | } // namespace tflite_micro 124 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_batched_softmax.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "../thread_call.h" 3 | #include "xcore_config.h" 4 | #include "xcore_custom_options.h" 5 | #include "xcore_utils.h" 6 | 7 | extern "C" { 8 | #include "lib_nn/api/nn_operator.h" 9 | } 10 | 11 | namespace tflite_micro { 12 | namespace ops { 13 | namespace micro { 14 | namespace xcore { 15 | namespace softmax_batched { 16 | 17 | struct SoftmaxBatchedShared { 18 | int8_t *X, *Y; 19 | int softmax_size; 20 | float *table; 21 | }; 22 | 23 | extern "C" { 24 | void softmax_thread_worker(void *shared, void *start, void *count) { 25 | int *s = static_cast(start); 26 | int *c = static_cast(count); 27 | auto sd = static_cast(shared); 28 | for (int i = 0; i < *c; i++) { 29 | const int offset = i * sd->softmax_size + *s; 30 | softmax_single(sd->Y + offset, sd->X + offset, sd->table, sd->softmax_size); 31 | } 32 | } 33 | } 34 | 35 | // This is the struct that contains the data required by the operator 36 | struct SoftmaxBatchedOpData { 37 | int tc; 38 | int softmax_size; 39 | int starts[XCORE_MAX_NUM_THREADS]; 40 | int counts[XCORE_MAX_NUM_THREADS]; 41 | }; 42 | 43 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 44 | auto op_data = construct_persistent_object(context); 45 | return op_data; 46 | } 47 | 48 | // Does all the requests for scratches 49 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 50 | auto op_data = static_cast(node->user_data); 51 | MicroContext *micro_context = GetMicroContext(context); 52 | xc_context_config_t *xc_config = reinterpret_cast( 53 | micro_context->external_context()); 54 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 55 | const int trailing_dim = tflite_micro::micro::GetTensorShape(input).DimensionsCount() - 1; 56 | const int num_softmaxes = tflite_micro::micro::GetTensorShape(input).Dims(trailing_dim - 1); 57 | op_data->softmax_size = tflite_micro::micro::GetTensorShape(input).Dims(trailing_dim); 58 | int starts[XCORE_MAX_NUM_THREADS]; 59 | int ends[XCORE_MAX_NUM_THREADS]; 60 | int counts[XCORE_MAX_NUM_THREADS]; 61 | op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, num_softmaxes, starts, ends); 62 | for (int t = 0; t < op_data->tc; t++) { 63 | op_data->counts[t] = ends[t] - starts[t]; 64 | op_data->starts[t] = starts[t] * op_data->softmax_size; 65 | } 66 | return kTfLiteOk; 67 | } 68 | 69 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 70 | 71 | auto *op_data = static_cast(node->user_data); 72 | 73 | // Get Input/Output Tensors 74 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 75 | const TfLiteEvalTensor *table = tflite_micro::micro::GetEvalInput(context, node, 1); 76 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 77 | 78 | // Pointers to data in In/Out Tensors 79 | const float *table_vals = tflite_micro::micro::GetTensorData(table); 80 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 81 | const int8_t *in_data = tflite_micro::micro::GetTensorData(input); 82 | MicroContext *micro_context = GetMicroContext(context); 83 | xc_context_config_t *xc_config = reinterpret_cast( 84 | micro_context->external_context()); 85 | const int tc = op_data->tc; 86 | SoftmaxBatchedShared shared_data; 87 | shared_data.Y = out_data; 88 | shared_data.X = const_cast(in_data); 89 | shared_data.table = const_cast(table_vals); 90 | shared_data.softmax_size = op_data->softmax_size; 91 | for (int t = 0; t < tc - 1; t++) { 92 | thread_variable_setup((void *)&op_data->starts[t], 93 | (void *)&op_data->counts[t], 94 | xc_config->thread_info.thread_ids.id[t]); 95 | } 96 | thread_call((void *)&shared_data, (void *)&op_data->starts[tc - 1], 97 | (void *)&op_data->counts[tc - 1], 98 | (thread_function_pointer_t)softmax_thread_worker, 99 | &xc_config->thread_info); 100 | return kTfLiteOk; 101 | } 102 | } // namespace softmax_batched 103 | 104 | TFLMRegistration *Register_XC_batched_softmax() { 105 | static TFLMRegistration r = {softmax_batched::Init, nullptr, 106 | softmax_batched::Prepare, softmax_batched::Eval}; 107 | return &r; 108 | } 109 | 110 | } // namespace xcore 111 | } // namespace micro 112 | } // namespace ops 113 | } // namespace tflite_micro 114 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_beta_fcf32.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "../thread_call.h" 4 | #include "conv2d_float.h" 5 | #include "xcore_config.h" 6 | #include "xcore_custom_options.h" 7 | #include "xcore_utils.h" 8 | extern "C" { 9 | #include "lib_nn/api/nn_operator.h" 10 | } 11 | 12 | namespace tflite_micro { 13 | namespace ops { 14 | namespace micro { 15 | namespace xcore { 16 | namespace beta_fcf32 { 17 | 18 | // This is the struct that contains the data required by the operator 19 | struct Beta_FcF32OpData { 20 | int tc; 21 | int s[XCORE_MAX_NUM_THREADS]; 22 | int e[XCORE_MAX_NUM_THREADS]; 23 | }; 24 | 25 | struct Beta_FcF32Shared { 26 | float *out; 27 | float *in; 28 | float *kernels; 29 | int out_f; 30 | int in_f; 31 | }; 32 | 33 | extern "C" { 34 | void beta_fcf32_thread_worker(void *shared, void *start, void *end) { 35 | int *s = static_cast(start); 36 | int *e = static_cast(end); 37 | auto sd = static_cast(shared); 38 | #if defined(NN_USE_REF) || defined(__VX4A__) 39 | xc_fc_float_ref(sd->out, sd->in, sd->kernels, sd->out_f, 40 | sd->in_f); 41 | #else 42 | xc_fc_float_opt(sd->out, sd->in, sd->kernels, sd->out_f, 43 | sd->in_f, *s, *e); 44 | #endif 45 | } 46 | } 47 | 48 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 49 | auto op_data = construct_persistent_object(context); 50 | 51 | return op_data; 52 | } 53 | 54 | // Does all the requests for scratches 55 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 56 | auto op_data = static_cast(node->user_data); 57 | MicroContext *micro_context = GetMicroContext(context); 58 | xc_context_config_t *xc_config = reinterpret_cast( 59 | micro_context->external_context()); 60 | const TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 61 | int out_f = output->dims->data[1]; 62 | op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, out_f, op_data->s, op_data->e); 63 | return kTfLiteOk; 64 | } 65 | 66 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 67 | auto op_data = static_cast(node->user_data); 68 | // Get Input/Output Tensors 69 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 70 | const TfLiteEvalTensor *kernels = 71 | tflite_micro::micro::GetEvalInput(context, node, 1); 72 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 73 | 74 | int out_f = output->dims->data[1]; 75 | 76 | int in_f = input->dims->data[1]; 77 | 78 | // Pointers to data in In/Out Tensors 79 | float *out_data = tflite_micro::micro::GetTensorData(output); 80 | float *in_data = 81 | const_cast(tflite_micro::micro::GetTensorData(input)); 82 | float *kernel_data = 83 | const_cast(tflite_micro::micro::GetTensorData(kernels)); 84 | 85 | MicroContext *micro_context = GetMicroContext(context); 86 | xc_context_config_t *xc_config = reinterpret_cast( 87 | micro_context->external_context()); 88 | 89 | // todo - this second for-loop is unpleasant 90 | for (int t = 0; t < op_data->tc - 1; ++t) { 91 | thread_variable_setup(&op_data->s[t], &op_data->e[t], 92 | xc_config->thread_info.thread_ids.id[t]); 93 | } 94 | 95 | Beta_FcF32Shared shared_data; 96 | shared_data.out = out_data; 97 | shared_data.in = in_data; 98 | shared_data.kernels = kernel_data; 99 | shared_data.out_f = out_f; 100 | shared_data.in_f = in_f; 101 | 102 | // Now set up shared data, shared function pointer, and data for final thread. 103 | thread_call((void *)&shared_data, &op_data->s[op_data->tc - 1], &op_data->e[op_data->tc - 1], 104 | (thread_function_pointer_t)beta_fcf32_thread_worker, 105 | &xc_config->thread_info); 106 | 107 | return kTfLiteOk; 108 | } 109 | 110 | } // namespace beta_fcf32 111 | 112 | TFLMRegistration *Register_XC_beta_fcf32() { 113 | static TFLMRegistration r = {beta_fcf32::Init, nullptr, 114 | beta_fcf32::Prepare, beta_fcf32::Eval}; 115 | return &r; 116 | } 117 | 118 | } // namespace xcore 119 | } // namespace micro 120 | } // namespace ops 121 | } // namespace tflite_micro 122 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_broadcast.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include 4 | extern "C" { 5 | #include "vpu_memmove_word_aligned.h" 6 | #include "vpu_memset_256.h" 7 | } 8 | 9 | #include "xcore_custom_options.h" 10 | #include "xcore_utils.h" 11 | 12 | namespace tflite_micro { 13 | namespace ops { 14 | namespace micro { 15 | namespace xcore { 16 | namespace broadcast { 17 | 18 | struct BroadcastOpData { 19 | int32_t size; 20 | int32_t num_copies; 21 | int32_t num_broadcasts; 22 | void (*func_ptr)(void *, const void *, unsigned); 23 | }; 24 | 25 | void memmove_wrapper(void *dst, const void *src, unsigned size) { 26 | memmove(dst, src, size); 27 | } 28 | 29 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 30 | auto op_data = construct_persistent_object(context); 31 | auto parser = CustomOptionParser(buffer, length); 32 | op_data->size = parser.parseNamedCustomOption("s").AsInt32(); 33 | op_data->num_copies = parser.parseNamedCustomOption("n").AsInt32(); 34 | op_data->num_broadcasts = parser.parseNamedCustomOption("b").AsInt32(); 35 | bool use_vpu = parser.parseNamedCustomOption("v").AsBool(); 36 | op_data->func_ptr = use_vpu ? vpu_memmove_word_aligned : memmove_wrapper; 37 | return op_data; 38 | } 39 | 40 | // Does all the requests for scratches 41 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 42 | return kTfLiteOk; 43 | } 44 | 45 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 46 | auto *op_data = static_cast(node->user_data); 47 | // Get Input/Output Tensors 48 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 49 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 50 | // Pointers to data in In/Out Tensors 51 | const int8_t *in_data = tflite_micro::micro::GetTensorData(input); 52 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 53 | const int size = op_data->size; 54 | const int num_copies = op_data->num_copies; 55 | const int num_broadcasts = op_data->num_broadcasts; 56 | if (size == 1 && num_copies < 64) { 57 | for (int i = 0; i < num_broadcasts; i++) { 58 | memset(out_data, *in_data, num_copies); 59 | out_data += num_copies; 60 | in_data++; 61 | } 62 | return kTfLiteOk; 63 | } 64 | if ((size != 1 && size != 2 && size != 4) || num_copies < 64) { 65 | void (*func_ptr)(void *, const void *, unsigned) = op_data->func_ptr; 66 | for (int i = 0; i < num_broadcasts; i++) { 67 | for (int j = 0; j < num_copies; j++) { 68 | func_ptr(out_data, in_data, size); 69 | out_data += size; 70 | } 71 | in_data += size; 72 | } 73 | 74 | return kTfLiteOk; 75 | } 76 | uint32_t c; 77 | uint8_t from[32]; 78 | for (int i = 0; i < num_broadcasts; i++) { 79 | switch (size) { 80 | case 1: 81 | // c = ins[0] * 0x01010101; 82 | c = ((uint8_t)(*in_data)) * 0x01010101; 83 | break; 84 | case 2: 85 | c = ((uint8_t)(*in_data) | ((uint8_t)(in_data[1]) << 8)) * 0x00010001; 86 | break; 87 | case 4: 88 | c = ((uint8_t)(*in_data) | ((uint8_t)(in_data[1]) << 8) | 89 | ((uint8_t)(in_data[2]) << 16) | ((uint8_t)(in_data[3]) << 24)); 90 | break; 91 | } 92 | broadcast_32_to_256(from, c); 93 | vpu_memset_256(out_data, from, num_copies * size); 94 | out_data += num_copies * size; 95 | in_data += size; 96 | } 97 | return kTfLiteOk; 98 | } 99 | } // namespace broadcast 100 | 101 | TFLMRegistration *Register_XC_broadcast() { 102 | static TFLMRegistration r = {broadcast::Init, nullptr, broadcast::Prepare, 103 | broadcast::Eval}; 104 | return &r; 105 | } 106 | 107 | } // namespace xcore 108 | } // namespace micro 109 | } // namespace ops 110 | } // namespace tflite_micro 111 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_bsign.cc: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "flatbuffers/flexbuffers.h" 4 | #include "tensorflow/lite/c/common.h" 5 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" 6 | #include "tensorflow/lite/kernels/kernel_util.h" 7 | #include "tensorflow/lite/micro/kernels/kernel_util.h" 8 | #include "xcore_custom_options.h" 9 | #include "xcore_utils.h" 10 | 11 | extern "C" { 12 | #include "lib_nn/api/nn_operator.h" 13 | } 14 | 15 | namespace tflite_micro { 16 | namespace ops { 17 | namespace micro { 18 | namespace xcore { 19 | namespace bsign { 20 | 21 | // -------------------------------------------------------------------- // 22 | // kernel argument type 23 | // -------------------------------------------------------------------- // 24 | 25 | struct BSign8Args { 26 | int32_t *Y; 27 | const int8_t *X; 28 | int8_t zero_point_vec[VPU_INT8_EPV]; 29 | }; 30 | 31 | // -------------------------------------------------------------------- // 32 | // thread data type and worker functions 33 | // -------------------------------------------------------------------- // 34 | 35 | struct BSign8ThreadData { 36 | const BSign8Args *args; 37 | const nn_bsign_8_job_t *job; 38 | }; 39 | 40 | extern "C" { 41 | void bsign_8_thread_worker(void *context) { 42 | auto *td = (BSign8ThreadData *)context; 43 | auto *args = td->args; 44 | bsign_8(args->Y, args->X, args->zero_point_vec, td->job); 45 | } 46 | } 47 | 48 | // -------------------------------------------------------------------- // 49 | // op data types 50 | // -------------------------------------------------------------------- // 51 | 52 | struct BSign8OpData { 53 | BSign8Args args; 54 | PersistentArray jobs; 55 | PersistentArray threads; 56 | }; 57 | 58 | // -------------------------------------------------------------------- // 59 | // op function implementations 60 | // -------------------------------------------------------------------- // 61 | 62 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 63 | auto *op_data = construct_persistent_object(context); 64 | 65 | // TODO parse data for parallelism 66 | // in this op we have one job per thread 67 | int n_threads = 1; 68 | op_data->jobs.allocate(context, n_threads) 69 | .initialize(); // TODO: REMOVE ALL OF THIS 70 | op_data->threads.allocate(context, n_threads); // SHOULD BE NOTHING LEFT. 71 | for (auto &job : op_data->jobs) { 72 | op_data->threads.append({&op_data->args, &job}); 73 | } 74 | 75 | return op_data; 76 | } 77 | 78 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 79 | TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); 80 | TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); 81 | 82 | auto *op_data = reinterpret_cast(node->user_data); 83 | MicroContext *micro_context = GetMicroContext(context); 84 | TfLiteTensor *input = micro_context->AllocateTempInputTensor(node, 0); 85 | TF_LITE_ENSURE(context, input != nullptr); 86 | 87 | const int32_t input_size = EvalTensorBytes((const TfLiteEvalTensor*)input) / sizeof(int8_t); 88 | bsign_8_prepare(op_data->jobs.begin(), op_data->args.zero_point_vec, 89 | input_size, input->params.zero_point, op_data->jobs.size()); 90 | 91 | micro_context->DeallocateTempTfLiteTensor(input); 92 | 93 | return kTfLiteOk; 94 | } 95 | 96 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 97 | auto *op_data = reinterpret_cast(node->user_data); 98 | 99 | op_data->args.X = tflite_micro::micro::GetTensorData( 100 | tflite_micro::micro::GetEvalInput(context, node, 0)); 101 | op_data->args.Y = tflite_micro::micro::GetTensorData( 102 | tflite_micro::micro::GetEvalOutput(context, node, 0)); 103 | 104 | for (auto &thread : op_data->threads) { // TODO: remove - only 1 task! 105 | bsign_8_thread_worker(reinterpret_cast(&thread)); 106 | } 107 | 108 | return kTfLiteOk; 109 | } 110 | 111 | } // namespace bsign 112 | 113 | TFLMRegistration *Register_XC_bsign_8() { 114 | static TFLMRegistration r = {bsign::Init, nullptr, bsign::Prepare, 115 | bsign::Eval}; 116 | return &r; 117 | } 118 | 119 | } // namespace xcore 120 | } // namespace micro 121 | } // namespace ops 122 | } // namespace tflite_micro 123 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #if defined __GNUC__ 7 | #define ALIGN(X) __attribute__((aligned(X))) 8 | #elif defined _MSC_VER 9 | #define ALIGN(X) __declspec(align(X)) 10 | #elif defined __TASKING__ 11 | #define ALIGN(X) __align(X) 12 | #endif 13 | 14 | #define MAX_DEBUG_LOG_LENGTH 256 15 | #define MAX_DEBUG_LOG_ENTRIES 3 16 | 17 | int debug_log_index = 0; 18 | char ALIGN(4) debug_log_buffer[MAX_DEBUG_LOG_LENGTH * MAX_DEBUG_LOG_ENTRIES]; 19 | 20 | extern "C" void DebugLog(const char* format, va_list args) { 21 | vsnprintf(&debug_log_buffer[debug_log_index * MAX_DEBUG_LOG_ENTRIES], MAX_DEBUG_LOG_LENGTH, format, args); 22 | printf("%s", &debug_log_buffer[debug_log_index * MAX_DEBUG_LOG_ENTRIES]); 23 | debug_log_index++; 24 | if (debug_log_index == MAX_DEBUG_LOG_ENTRIES) 25 | debug_log_index = 0; 26 | } 27 | 28 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.cc: -------------------------------------------------------------------------------- 1 | #include "xcore_custom_options.h" 2 | 3 | namespace tflite_micro { 4 | namespace ops { 5 | namespace micro { 6 | namespace xcore { 7 | 8 | CustomOptionParser::CustomOptionParser(const flexbuffers::Map &map) 9 | : keys_(flexbuffers::TypedVector::EmptyTypedVector()), 10 | values_(flexbuffers::Vector::EmptyVector()) { 11 | keys_ = map.Keys(); 12 | values_ = map.Values(); 13 | } 14 | 15 | CustomOptionParser::CustomOptionParser(const char *buffer, size_t buffer_length) 16 | : CustomOptionParser::CustomOptionParser( 17 | flexbuffers::GetRoot(reinterpret_cast(buffer), 18 | buffer_length) 19 | .AsMap()) { 20 | assert(buffer != nullptr); 21 | assert(buffer_length > 0); 22 | } 23 | 24 | flexbuffers::Reference 25 | CustomOptionParser::parseNamedCustomOption(const char *name) const { 26 | for (int i = 0; i < keys_.size(); ++i) { 27 | const auto &key = keys_[i].AsString().c_str(); 28 | if (strcmp(key, name) == 0) { 29 | return values_[i]; 30 | } 31 | } 32 | return flexbuffers::Reference(nullptr, 1, flexbuffers::NullPackedType()); 33 | } 34 | 35 | } // namespace xcore 36 | } // namespace micro 37 | } // namespace ops 38 | } // namespace tflite_micro 39 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h: -------------------------------------------------------------------------------- 1 | #ifndef XCORE_CUSTOM_OPTIONS_H_ 2 | #define XCORE_CUSTOM_OPTIONS_H_ 3 | 4 | #include "flatbuffers/flexbuffers.h" 5 | #include "xcore_ops.h" 6 | 7 | namespace tflite_micro { 8 | namespace ops { 9 | namespace micro { 10 | namespace xcore { 11 | 12 | class CustomOptionParser { 13 | private: 14 | flexbuffers::TypedVector keys_; 15 | flexbuffers::Vector values_; 16 | 17 | public: 18 | CustomOptionParser(const flexbuffers::Map &map); 19 | CustomOptionParser(const char *buffer, size_t buffer_length); 20 | flexbuffers::Reference parseNamedCustomOption(const char *name) const; 21 | }; 22 | 23 | } // namespace xcore 24 | } // namespace micro 25 | } // namespace ops 26 | } // namespace tflite_micro 27 | 28 | #endif // XCORE_CUSTOM_OPTIONS_H_ 29 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.cc: -------------------------------------------------------------------------------- 1 | #include "xcore_error_reporter.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef __xcore__ 10 | #include 11 | #endif 12 | 13 | //#if !defined(TF_LITE_STRIP_ERROR_STRINGS) 14 | #include "tensorflow/lite/micro/debug_log.h" 15 | //#endif 16 | 17 | namespace tflite_micro { 18 | namespace micro { 19 | namespace xcore { 20 | 21 | void XCoreErrorReporter::Init(char *debugBuffer, int debugBufferLength) { 22 | buffer = debugBuffer; 23 | max_len = debugBufferLength; 24 | memset(debugBuffer, 0, max_len); 25 | } 26 | 27 | void XCoreErrorReporter::Log(const char *format, va_list args) { 28 | #if !defined(TF_LITE_STRIP_ERROR_STRINGS) 29 | static constexpr int kMaxLogLen = 256; 30 | if (len + kMaxLogLen > max_len) { 31 | int new_len = max_len - kMaxLogLen; 32 | if (new_len < 0) { 33 | new_len = 0; 34 | } 35 | for (int i = 0; i <= new_len; i++) { 36 | buffer[i] = buffer[i - new_len + len]; 37 | } 38 | len = new_len; 39 | } 40 | vsprintf(buffer + len, format, args); 41 | len = strlen(buffer); 42 | #ifdef __xcore__ 43 | printstr(buffer); 44 | #else 45 | printf("%s", buffer); 46 | #endif 47 | 48 | #endif 49 | } 50 | 51 | int XCoreErrorReporter::Report(const char *format, va_list args) { 52 | Log(format, args); 53 | return 0; 54 | } 55 | 56 | } // namespace xcore 57 | } // namespace micro 58 | } // namespace tflite_micro 59 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019, XMOS Ltd, All rights reserved 2 | 3 | #ifndef XCORE_ERROR_REPORTER_H_ 4 | #define XCORE_ERROR_REPORTER_H_ 5 | 6 | #include "tensorflow/lite/micro/compatibility.h" 7 | #include "tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h" 8 | 9 | namespace tflite_micro { 10 | namespace micro { 11 | namespace xcore { 12 | 13 | class XCoreErrorReporter : public tflite_micro::MicroErrorReporter { 14 | public: 15 | explicit XCoreErrorReporter(){}; 16 | ~XCoreErrorReporter() override = default; 17 | void Init(char *debugBuffer, int debugBufferLength); 18 | void Log(const char *format, va_list args); 19 | int Report(const char *format, va_list args) override; 20 | 21 | private: 22 | char *buffer; 23 | int max_len; 24 | int len = 0; 25 | TF_LITE_REMOVE_VIRTUAL_DELETE 26 | }; 27 | 28 | } // namespace xcore 29 | } // namespace micro 30 | } // namespace tflite_micro 31 | 32 | #endif // XCORE_ERROR_REPORTER_H_ 33 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_expand_8_to_16.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "../thread_call.h" 4 | #include "xcore_config.h" 5 | #include "xcore_custom_options.h" 6 | #include "xcore_utils.h" 7 | extern "C" { 8 | #include "lib_nn/api/nn_operator.h" 9 | #include "lib_nn/api/expand_8_to_16.h" 10 | } 11 | 12 | namespace tflite_micro { 13 | namespace ops { 14 | namespace micro { 15 | namespace xcore { 16 | namespace expand_8to16 { 17 | 18 | struct Expand_8_To_16Shared { 19 | int8_t *X; 20 | int16_t *Y; 21 | }; 22 | 23 | extern "C" { 24 | void expand_8_to_16_thread_worker(void *shared, void *start, void *count) { 25 | int *s = static_cast(start); 26 | int *c = static_cast(count); 27 | auto sd = static_cast(shared); 28 | expand_8_to_16(sd->Y + *s, sd->X + *s, *c); 29 | } 30 | } 31 | 32 | // This is the struct that contains the data required by the operator 33 | struct Expand_8_To_16OpData { 34 | int tc; 35 | int start[XCORE_MAX_NUM_THREADS]; 36 | int count[XCORE_MAX_NUM_THREADS]; 37 | }; 38 | 39 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 40 | auto op_data = construct_persistent_object(context); 41 | return op_data; 42 | } 43 | 44 | // Does all the requests for scratches 45 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 46 | auto op_data = static_cast(node->user_data); 47 | MicroContext *micro_context = GetMicroContext(context); 48 | xc_context_config_t *xc_config = reinterpret_cast( 49 | micro_context->external_context()); 50 | const TfLiteEvalTensor *input = 51 | tflite_micro::micro::GetEvalInput(context, node, 0); 52 | int input_size = tflite_micro::micro::GetTensorShape(input).FlatSize(); 53 | op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, input_size, op_data->start, op_data->count); 54 | for (int t = 0; t < op_data->tc; t++) { 55 | op_data->count[t] = op_data->count[t] - op_data->start[t]; 56 | } 57 | return kTfLiteOk; 58 | } 59 | 60 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 61 | auto *op_data = static_cast(node->user_data); 62 | 63 | // Get Input/Output Tensors 64 | const TfLiteEvalTensor *input = 65 | tflite_micro::micro::GetEvalInput(context, node, 0); 66 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 67 | 68 | // Pointers to data in In/Out Tensors 69 | const int8_t *in_data = tflite_micro::micro::GetTensorData(input); 70 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 71 | 72 | MicroContext *micro_context = GetMicroContext(context); 73 | xc_context_config_t *xc_config = reinterpret_cast( 74 | micro_context->external_context()); 75 | const int tc = op_data->tc; 76 | Expand_8_To_16Shared shared_data; 77 | shared_data.X = const_cast(in_data); 78 | shared_data.Y = (int16_t *)out_data; 79 | for (int t = 0; t < tc - 1; t++) { 80 | thread_variable_setup((void *)&op_data->start[t], (void *)&op_data->count[t], 81 | xc_config->thread_info.thread_ids.id[t]); 82 | } 83 | thread_call((void *)&shared_data, &op_data->start[tc - 1], &op_data->count[tc - 1], 84 | (thread_function_pointer_t)expand_8_to_16_thread_worker, 85 | &xc_config->thread_info); 86 | return kTfLiteOk; 87 | } 88 | 89 | } // namespace expand_8to16 90 | 91 | TFLMRegistration *Register_XC_expand_8_to_16() { 92 | static TFLMRegistration r = {expand_8to16::Init, nullptr, expand_8to16::Prepare, 93 | expand_8to16::Eval}; 94 | return &r; 95 | } 96 | 97 | } // namespace xcore 98 | } // namespace micro 99 | } // namespace ops 100 | } // namespace tflite_micro 101 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved 2 | #ifndef XCORE_INTERPRETER_H_ 3 | #define XCORE_INTERPRETER_H_ 4 | 5 | #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h" 6 | #include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h" 7 | #include "tensorflow/lite/micro/micro_allocator.h" 8 | #include "tensorflow/lite/micro/micro_interpreter.h" 9 | #include "xcore_profiler.h" 10 | 11 | namespace tflite_micro { 12 | namespace micro { 13 | namespace xcore { 14 | 15 | class XCoreInterpreter : public tflite_micro::MicroInterpreter { 16 | public: 17 | XCoreInterpreter(const tflite_micro::Model *model, 18 | const tflite_micro::MicroOpResolver &resolver, 19 | tflite_micro::MicroAllocator *allocator, 20 | bool use_curent_thread = true, 21 | XCoreProfiler *profiler = nullptr); 22 | 23 | static XCoreInterpreter * 24 | Create(uint8_t interpreter_buffer[], const tflite_micro::Model *model, 25 | const tflite_micro::MicroOpResolver &resolver, uint8_t *arena, 26 | size_t arena_size, bool use_current_thread, XCoreProfiler *profiler); 27 | 28 | void PrintMemoryPlan(); 29 | TfLiteTensor *tensor(size_t tensor_index); 30 | const char *node_name(int sub_idx, int i); 31 | 32 | TfLiteStatus GetTensorDetails(size_t tensor_index, char *name, int name_len, 33 | int *shape, int *type, float *scale, 34 | int32_t *zero_point); 35 | 36 | TfLiteStatus GetTensorDetailsBufferSizes(size_t tensor_index, size_t *dims, 37 | size_t *scales, size_t *zero_points); 38 | 39 | size_t input_tensor_index(size_t input_index); 40 | size_t output_tensor_index(size_t output_index); 41 | const Model *model__; 42 | MicroAllocator *allocator_; 43 | }; 44 | 45 | } // namespace xcore 46 | } // namespace micro 47 | } // namespace tflite_micro 48 | 49 | #endif // XCORE_INTERPRETER_H_ 50 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_load_store_tensor.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "xcore_config.h" 4 | #include "xcore_custom_options.h" 5 | #include "xcore_utils.h" 6 | extern "C" { 7 | #include "nn_op_utils.h" 8 | #include "lib_nn/api/nn_layers.h" 9 | } 10 | 11 | namespace tflite_micro { 12 | namespace ops { 13 | namespace micro { 14 | namespace xcore { 15 | namespace load_store_tensor { 16 | 17 | // This is the struct that contains the data required by the operator 18 | struct OpData { 19 | uint32_t addr; 20 | uint32_t size; 21 | }; 22 | 23 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 24 | auto op_data = construct_persistent_object(context); 25 | 26 | auto parser = CustomOptionParser(buffer, length); 27 | op_data->addr = parser.parseNamedCustomOption("a").AsInt32(); 28 | op_data->size = parser.parseNamedCustomOption("s").AsInt32(); 29 | 30 | MicroContext *micro_context = GetMicroContext(context); 31 | xc_context_config_t *xc_config = reinterpret_cast( 32 | micro_context->external_context()); 33 | assert(true); 34 | return op_data; 35 | } 36 | 37 | TfLiteStatus Eval_Store(TfLiteContext *context, TfLiteNode *node) { 38 | auto *op_data = static_cast(node->user_data); 39 | MicroContext *micro_context = GetMicroContext(context); 40 | xc_context_config_t *xc_config = reinterpret_cast( 41 | micro_context->external_context()); 42 | 43 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 44 | const int8_t *data_ptr = tflite_micro::micro::GetTensorData(input); 45 | vpu_memcpy_ext(((int8_t *)xc_config->paging_ptr) + op_data->addr, data_ptr, 46 | op_data->size); 47 | return kTfLiteOk; 48 | } 49 | 50 | TfLiteStatus Eval_Load(TfLiteContext *context, TfLiteNode *node) { 51 | auto *op_data = static_cast(node->user_data); 52 | MicroContext *micro_context = GetMicroContext(context); 53 | xc_context_config_t *xc_config = reinterpret_cast( 54 | micro_context->external_context()); 55 | 56 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 57 | int8_t *data_ptr = tflite_micro::micro::GetTensorData(output); 58 | 59 | int output_size = EvalTensorBytes(output); 60 | assert(output_size == op_data->size); 61 | 62 | vpu_memcpy_ext((void *)data_ptr, 63 | ((int8_t *)xc_config->paging_ptr) + op_data->addr, 64 | op_data->size); 65 | return kTfLiteOk; 66 | } 67 | 68 | } // namespace load_store_tensor 69 | 70 | TFLMRegistration *Register_XC_store_tensor() { 71 | static TFLMRegistration r = {load_store_tensor::Init, nullptr, nullptr, 72 | load_store_tensor::Eval_Store}; 73 | return &r; 74 | } 75 | 76 | TFLMRegistration *Register_XC_load_tensor() { 77 | static TFLMRegistration r = {load_store_tensor::Init, nullptr, nullptr, 78 | load_store_tensor::Eval_Load}; 79 | return &r; 80 | } 81 | 82 | } // namespace xcore 83 | } // namespace micro 84 | } // namespace ops 85 | } // namespace tflite_micro 86 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_lookup.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "../thread_call.h" 4 | #include "xcore_config.h" 5 | #include "xcore_utils.h" 6 | extern "C" { 7 | #include "lib_nn/api/nn_operator.h" 8 | #include "lib_nn/api/quadratic_interpolation.h" 9 | } 10 | 11 | namespace tflite_micro { 12 | namespace ops { 13 | namespace micro { 14 | namespace xcore { 15 | namespace lookup { 16 | 17 | struct LookupShared { 18 | uint8_t *X; 19 | uint8_t *Y; 20 | uint8_t *table; 21 | }; 22 | 23 | extern "C" { 24 | void lookup8_thread_worker(void *shared, void *start, void *end) { 25 | int *s = static_cast(start); 26 | int *e = static_cast(end); 27 | auto sd = static_cast(shared); 28 | // lookup takes start and count instead of start and end 29 | lookup8(sd->Y, sd->X, sd->table, *s, *e - *s); 30 | } 31 | 32 | void lookup16_thread_worker(void *shared, void *start, void *end) { 33 | int *s = static_cast(start); 34 | int *e = static_cast(end); 35 | auto sd = static_cast(shared); 36 | // output and input pointers are adjusted with thread start 37 | quadratic_interpolation_128((int16_t *)sd->Y + *s, (int16_t *)sd->X + *s, 38 | sd->table, *e - *s); 39 | } 40 | } 41 | // This is the struct that contains the data required by the operator 42 | struct LookupOpData { 43 | int tc; 44 | int s[XCORE_MAX_NUM_THREADS]; 45 | int e[XCORE_MAX_NUM_THREADS]; 46 | }; 47 | 48 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 49 | auto op_data = construct_persistent_object(context); 50 | return op_data; 51 | } 52 | 53 | // Does all the requests for scratches 54 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 55 | auto op_data = static_cast(node->user_data); 56 | MicroContext *micro_context = GetMicroContext(context); 57 | xc_context_config_t *xc_config = reinterpret_cast( 58 | micro_context->external_context()); 59 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 60 | int input_size = tflite_micro::micro::GetTensorShape(input).FlatSize(); 61 | op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, input_size, op_data->s, op_data->e); 62 | return kTfLiteOk; 63 | } 64 | 65 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 66 | 67 | auto *op_data = static_cast(node->user_data); 68 | 69 | // Get Input/Output Tensors 70 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 71 | const TfLiteEvalTensor *table = tflite_micro::micro::GetEvalInput(context, node, 1); 72 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 73 | 74 | // Pointers to data in In/Out Tensors 75 | const uint8_t *table_vals = tflite_micro::micro::GetTensorData(table); 76 | uint8_t *out_data = tflite_micro::micro::GetTensorData(output); 77 | const uint8_t *in_data = tflite_micro::micro::GetTensorData(input); 78 | MicroContext *micro_context = GetMicroContext(context); 79 | xc_context_config_t *xc_config = reinterpret_cast( 80 | micro_context->external_context()); 81 | const int tc = op_data->tc; 82 | LookupShared shared_data; 83 | shared_data.Y = out_data; 84 | shared_data.X = const_cast(in_data); 85 | shared_data.table = const_cast(table_vals); 86 | for (int t = 0; t < tc - 1; t++) { 87 | thread_variable_setup((void *)&op_data->s[t], (void *)&op_data->e[t], 88 | xc_config->thread_info.thread_ids.id[t]); 89 | } 90 | 91 | thread_function_pointer_t fn; 92 | switch (input->type) { 93 | case kTfLiteInt8: { 94 | fn = lookup8_thread_worker; 95 | break; 96 | } 97 | case kTfLiteInt16: { 98 | fn = lookup16_thread_worker; 99 | break; 100 | } 101 | default: { 102 | return kTfLiteError; 103 | } 104 | } 105 | 106 | thread_call((void *)&shared_data, &op_data->s[tc - 1], &op_data->e[tc - 1], 107 | (thread_function_pointer_t)fn, &xc_config->thread_info); 108 | return kTfLiteOk; 109 | } 110 | 111 | } // namespace lookup 112 | 113 | TFLMRegistration *Register_XC_lookup() { 114 | static TFLMRegistration r = {lookup::Init, nullptr, lookup::Prepare, 115 | lookup::Eval}; 116 | return &r; 117 | } 118 | 119 | } // namespace xcore 120 | } // namespace micro 121 | } // namespace ops 122 | } // namespace tflite_micro 123 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_mean.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "xcore_custom_options.h" 4 | #include "xcore_utils.h" 5 | extern "C" { 6 | #include "lib_nn/api/nn_layers.h" 7 | } 8 | 9 | namespace tflite_micro { 10 | namespace ops { 11 | namespace micro { 12 | namespace xcore { 13 | namespace mean { 14 | 15 | // This is the struct that contains the data required by the operator 16 | struct MeanOpData { 17 | int start_dim_size; 18 | int mean_dim_size; 19 | int end_dim_size; 20 | float in_zero_point; 21 | float out_zero_point; 22 | float scale_mul; 23 | }; 24 | 25 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 26 | auto op_data = construct_persistent_object(context); 27 | 28 | auto parser = CustomOptionParser(buffer, length); 29 | op_data->start_dim_size = parser.parseNamedCustomOption("s").AsInt32(); 30 | op_data->mean_dim_size = parser.parseNamedCustomOption("m").AsInt32(); 31 | op_data->end_dim_size = parser.parseNamedCustomOption("e").AsInt32(); 32 | op_data->in_zero_point = parser.parseNamedCustomOption("i").AsFloat(); 33 | op_data->out_zero_point = parser.parseNamedCustomOption("o").AsFloat(); 34 | op_data->scale_mul = parser.parseNamedCustomOption("sm").AsFloat(); 35 | return op_data; 36 | } 37 | 38 | // Does all the requests for scratches 39 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 40 | return kTfLiteOk; 41 | } 42 | 43 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 44 | 45 | auto *op_data = static_cast(node->user_data); 46 | 47 | // Get Input/Output Tensors 48 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 49 | 50 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 51 | 52 | // Pointers to data in In/Out Tensors 53 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 54 | const int8_t *in_data = tflite_micro::micro::GetTensorData(input); 55 | mean_int8(in_data, out_data, op_data->start_dim_size, op_data->mean_dim_size, 56 | op_data->end_dim_size, op_data->in_zero_point, 57 | op_data->out_zero_point, op_data->scale_mul); 58 | 59 | return kTfLiteOk; 60 | } 61 | 62 | } // namespace mean 63 | 64 | TFLMRegistration *Register_XC_mean() { 65 | static TFLMRegistration r = {mean::Init, nullptr, mean::Prepare, mean::Eval}; 66 | return &r; 67 | } 68 | 69 | } // namespace xcore 70 | } // namespace micro 71 | } // namespace ops 72 | } // namespace tflite_micro 73 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_meani16.cc: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 3 | 4 | #include "xcore_custom_options.h" 5 | #include "xcore_utils.h" 6 | extern "C" { 7 | #include "lib_nn/api/nn_layers.h" 8 | } 9 | 10 | namespace tflite_micro { 11 | namespace ops { 12 | namespace micro { 13 | namespace xcore { 14 | namespace meani16 { 15 | 16 | // This is the struct that contains the data required by the operator 17 | struct MeanI16OpData { 18 | int start_dim_size; 19 | int mean_dim_size; 20 | int end_dim_size; 21 | float scale_mul; 22 | }; 23 | 24 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 25 | auto op_data = construct_persistent_object(context); 26 | 27 | auto parser = CustomOptionParser(buffer, length); 28 | op_data->start_dim_size = parser.parseNamedCustomOption("s").AsInt32(); 29 | op_data->mean_dim_size = parser.parseNamedCustomOption("m").AsInt32(); 30 | op_data->end_dim_size = parser.parseNamedCustomOption("e").AsInt32(); 31 | op_data->scale_mul = parser.parseNamedCustomOption("sm").AsFloat(); 32 | return op_data; 33 | } 34 | 35 | // Does all the requests for scratches 36 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 37 | return kTfLiteOk; 38 | } 39 | 40 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 41 | 42 | auto *op_data = static_cast(node->user_data); 43 | 44 | // Get Input/Output Tensors 45 | const TfLiteEvalTensor *input = 46 | tflite_micro::micro::GetEvalInput(context, node, 0); 47 | 48 | TfLiteEvalTensor *output = 49 | tflite_micro::micro::GetEvalOutput(context, node, 0); 50 | 51 | // Pointers to data in In/Out Tensors 52 | int16_t *out_data = tflite_micro::micro::GetTensorData(output); 53 | const int16_t *in_data = tflite_micro::micro::GetTensorData(input); 54 | mean_int16(in_data, out_data, op_data->start_dim_size, op_data->mean_dim_size, 55 | op_data->end_dim_size, op_data->scale_mul); 56 | 57 | return kTfLiteOk; 58 | } 59 | 60 | } // namespace meani16 61 | 62 | TFLMRegistration *Register_XC_meani16() { 63 | static TFLMRegistration r = {meani16::Init, nullptr, meani16::Prepare, 64 | meani16::Eval}; 65 | return &r; 66 | } 67 | 68 | } // namespace xcore 69 | } // namespace micro 70 | } // namespace ops 71 | } // namespace tflite_micro 72 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_mul.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "../thread_call.h" 4 | #include "xcore_config.h" 5 | #include "xcore_custom_options.h" 6 | #include "xcore_utils.h" 7 | extern "C" { 8 | #include "lib_nn/api/nn_layers.h" 9 | #include "lib_nn/api/nn_operator.h" 10 | } 11 | 12 | namespace tflite_micro { 13 | namespace ops { 14 | namespace micro { 15 | namespace xcore { 16 | namespace mul { 17 | 18 | struct MulShared { 19 | int8_t *Y; 20 | int8_t *X1; 21 | int8_t *X2; 22 | nn_mul_params_t *blob; 23 | }; 24 | 25 | extern "C" { 26 | void mul_thread_worker(void *shared, void *start, void *end) { 27 | int *s = static_cast(start); 28 | int *e = static_cast(end); 29 | auto sd = static_cast(shared); 30 | mul_elementwise(sd->X1 + *s, sd->X2 + *s, *e - *s, sd->blob, sd->Y + *s); 31 | } 32 | } 33 | 34 | // This is the struct that contains the data required by the operator 35 | struct MulOpData { 36 | nn_mul_params_t *mp_params; 37 | int tc; 38 | int s[XCORE_MAX_NUM_THREADS]; 39 | int e[XCORE_MAX_NUM_THREADS]; 40 | }; 41 | 42 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 43 | auto op_data = construct_persistent_object(context); 44 | 45 | auto parser = CustomOptionParser(buffer, length); 46 | op_data->mp_params = (nn_mul_params_t *)parser.parseNamedCustomOption("mp").AsBlob().data(); 47 | 48 | return op_data; 49 | } 50 | 51 | // Does all the requests for scratches 52 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 53 | auto *op_data = static_cast(node->user_data); 54 | MicroContext *micro_context = GetMicroContext(context); 55 | xc_context_config_t *xc_config = reinterpret_cast( 56 | micro_context->external_context()); 57 | const TfLiteEvalTensor *output = 58 | tflite_micro::micro::GetEvalOutput(context, node, 0); 59 | int output_size = tflite_micro::micro::GetTensorShape(output).FlatSize(); 60 | op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, output_size, op_data->s, op_data->e); 61 | return kTfLiteOk; 62 | } 63 | 64 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 65 | 66 | auto *op_data = static_cast(node->user_data); 67 | 68 | // Get Input/Output Tensors 69 | const TfLiteEvalTensor *input1 = 70 | tflite_micro::micro::GetEvalInput(context, node, 0); 71 | const TfLiteEvalTensor *input2 = 72 | tflite_micro::micro::GetEvalInput(context, node, 1); 73 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 74 | 75 | // Pointers to data in In/Out Tensors 76 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 77 | const int8_t *in1_data = tflite_micro::micro::GetTensorData(input1); 78 | const int8_t *in2_data = tflite_micro::micro::GetTensorData(input2); 79 | 80 | MicroContext *micro_context = GetMicroContext(context); 81 | xc_context_config_t *xc_config = reinterpret_cast( 82 | micro_context->external_context()); 83 | const int tc = op_data->tc; 84 | MulShared shared_data; 85 | shared_data.Y = out_data; 86 | shared_data.X1 = const_cast(in1_data); 87 | shared_data.X2 = const_cast(in2_data); 88 | shared_data.blob = op_data->mp_params; 89 | for (int t = 0; t < tc - 1; t++) { 90 | thread_variable_setup((void *)&op_data->s[t], (void *)&op_data->e[t], 91 | xc_config->thread_info.thread_ids.id[t]); 92 | } 93 | thread_call((void *)&shared_data, &op_data->s[tc - 1], &op_data->e[tc - 1], 94 | (thread_function_pointer_t)mul_thread_worker, 95 | &xc_config->thread_info); 96 | 97 | return kTfLiteOk; 98 | } 99 | 100 | } // namespace mul 101 | 102 | TFLMRegistration *Register_XC_mul() { 103 | static TFLMRegistration r = {mul::Init, nullptr, mul::Prepare, 104 | mul::Eval}; 105 | return &r; 106 | } 107 | 108 | } // namespace xcore 109 | } // namespace micro 110 | } // namespace ops 111 | } // namespace tflite_micro 112 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_n_to_4.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "xcore_custom_options.h" 4 | #include "xcore_utils.h" 5 | 6 | extern "C" { 7 | #include "lib_nn/api/nn_operator.h" 8 | } 9 | 10 | namespace tflite_micro { 11 | namespace ops { 12 | namespace micro { 13 | namespace xcore { 14 | namespace pad_n_to_4 { 15 | 16 | struct OpData { 17 | uint32_t n; 18 | uint32_t pad_val; 19 | }; 20 | 21 | 22 | void* Init(TfLiteContext* context, const char* buffer, size_t length) { 23 | auto op_data = construct_persistent_object(context); 24 | 25 | auto parser = CustomOptionParser(buffer, length); 26 | auto pad_value = parser.parseNamedCustomOption("pv").AsUInt32(); 27 | op_data->pad_val = pad_value; 28 | return op_data; 29 | } 30 | 31 | TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { 32 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 33 | auto shape = tflite_micro::micro::GetTensorShape(output); 34 | TFLITE_DCHECK(shape.DimensionsCount() == 4 && shape.DimsData()[0] == 1); 35 | int number_of_pixels = shape.DimsData()[1] * shape.DimsData()[2]; 36 | OpData* op_data = static_cast(node->user_data); 37 | op_data->n = number_of_pixels; 38 | return kTfLiteOk; 39 | } 40 | 41 | TfLiteStatus Eval3To4(TfLiteContext* context, TfLiteNode* node) { 42 | TFLITE_DCHECK(node->user_data != nullptr); 43 | const OpData* data = static_cast(node->user_data); 44 | 45 | const TfLiteEvalTensor* input = 46 | tflite_micro::micro::GetEvalInput(context, node, /*index=*/0); 47 | 48 | TfLiteEvalTensor* output = 49 | tflite_micro::micro::GetEvalOutput(context, node, /*index=*/0); 50 | 51 | int8_t *output_p = 52 | const_cast(tflite_micro::micro::GetTensorData(output)); 53 | int8_t *input_p = 54 | const_cast(tflite_micro::micro::GetTensorData(input)); 55 | 56 | // The function takes the number of pixels as data->n 57 | pad_3_to_4_run(output_p, 58 | input_p, 59 | data->n, data->pad_val); 60 | 61 | return kTfLiteOk; 62 | } 63 | 64 | TfLiteStatus Eval1To4(TfLiteContext* context, TfLiteNode* node) { 65 | TFLITE_DCHECK(node->user_data != nullptr); 66 | const OpData* data = static_cast(node->user_data); 67 | 68 | const TfLiteEvalTensor* input = 69 | tflite_micro::micro::GetEvalInput(context, node, /*index=*/0); 70 | 71 | TfLiteEvalTensor* output = 72 | tflite_micro::micro::GetEvalOutput(context, node, /*index=*/0); 73 | 74 | int8_t *output_p = 75 | const_cast(tflite_micro::micro::GetTensorData(output)); 76 | int8_t *input_p = 77 | const_cast(tflite_micro::micro::GetTensorData(input)); 78 | 79 | // The function takes the number of 4 byte input chunks 80 | int n_4 = (data->n) / 4; 81 | pad_1_to_4_run(output_p, 82 | input_p, 83 | n_4, data->pad_val); 84 | 85 | // We pad the remaining inputs here 86 | for(int i = n_4 * 4; i < data->n; i++){ 87 | output_p[i * 4] = input_p[i]; 88 | for(int n = 1; n < 4; n++) { 89 | output_p[i * 4 + n] = (int8_t)data->pad_val; 90 | } 91 | } 92 | 93 | return kTfLiteOk; 94 | } 95 | 96 | } // namespace pad 97 | 98 | TFLMRegistration *Register_XC_pad_3_to_4() { 99 | static TFLMRegistration r = {pad_n_to_4::Init, nullptr, pad_n_to_4::Prepare, pad_n_to_4::Eval3To4}; 100 | return &r; 101 | } 102 | 103 | TFLMRegistration *Register_XC_pad_1_to_4() { 104 | static TFLMRegistration r = {pad_n_to_4::Init, nullptr, pad_n_to_4::Prepare, pad_n_to_4::Eval1To4}; 105 | return &r; 106 | } 107 | 108 | } // namespace xcore 109 | } // namespace micro 110 | } // namespace ops 111 | } // namespace tflite_micro 112 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.cc: -------------------------------------------------------------------------------- 1 | #include "xcore_ops.h" 2 | 3 | #if defined(__xtflm_conf_h_exists__) 4 | #include "xtflm_conf.h" 5 | #else 6 | #ifndef XTFLM_OPERATORS 7 | #define XTFLM_OPERATORS 10 8 | #endif 9 | #endif 10 | 11 | #ifndef XCORE_TFLITE_MICRO_PATCHED 12 | #error \ 13 | "tflite-micro patch not applied! Fix by running 'make patch' in lib_tflite_micro!" 14 | #endif 15 | 16 | namespace tflite_micro { 17 | namespace ops { 18 | namespace micro { 19 | namespace xcore { 20 | 21 | void RegisterXCOps(MicroOpResolver *res) { 22 | auto *resolver = 23 | reinterpret_cast *>(res); 24 | 25 | resolver->AddCustom(XC_beta_activationf32_OpCode, 26 | Register_XC_beta_activationf32()); 27 | resolver->AddCustom(XC_beta_concatf32_OpCode, Register_XC_beta_concatf32()); 28 | resolver->AddCustom(XC_beta_convf32_OpCode, Register_XC_beta_convf32()); 29 | resolver->AddCustom(XC_beta_transposeconvf32_OpCode, 30 | Register_XC_beta_transposeconvf32()); 31 | resolver->AddCustom(XC_beta_fcf32_OpCode, Register_XC_beta_fcf32()); 32 | resolver->AddCustom(XC_binaryi16_OpCode, Register_XC_binaryi16()); 33 | resolver->AddCustom(XC_unaryi16_OpCode, Register_XC_unaryi16()); 34 | resolver->AddCustom(XC_conv2d_v2_OpCode, Register_XC_conv2d_v2()); 35 | resolver->AddCustom(XC_maxpool2d_OpCode, Register_XC_maxpool2d()); 36 | resolver->AddCustom(XC_softmax_OpCode, Register_XC_softmax()); 37 | resolver->AddCustom(XC_batched_softmax_OpCode, Register_XC_batched_softmax()); 38 | resolver->AddCustom(XC_add_OpCode, Register_XC_add()); 39 | resolver->AddCustom(XC_slice_OpCode, Register_XC_slice()); 40 | resolver->AddCustom(XC_broadcast_OpCode, Register_XC_broadcast()); 41 | resolver->AddCustom(XC_ld_weights_OpCode, Register_XC_ld_weights()); 42 | resolver->AddCustom(XC_ld_weights_wait_OpCode, Register_XC_ld_weights_wait()); 43 | resolver->AddCustom(XC_bsign_8_OpCode, Register_XC_bsign_8()); 44 | resolver->AddCustom(XC_lookup_OpCode, Register_XC_lookup()); 45 | resolver->AddCustom(XC_pad_OpCode, Register_XC_pad()); 46 | resolver->AddCustom(XC_concat_OpCode, Register_XC_concat()); 47 | resolver->AddCustom(XC_transpose_OpCode, Register_XC_transpose()); 48 | resolver->AddCustom(XC_pad_3_to_4_OpCode, Register_XC_pad_3_to_4()); 49 | resolver->AddCustom(XC_pad_1_to_4_OpCode, Register_XC_pad_1_to_4()); 50 | resolver->AddCustom(XC_mul_OpCode, Register_XC_mul()); 51 | resolver->AddCustom(XC_mean_OpCode, Register_XC_mean()); 52 | resolver->AddCustom(XC_meani16_OpCode, Register_XC_meani16()); 53 | resolver->AddCustom(XC_expand_8_to_16_OpCode, Register_XC_expand_8_to_16()); 54 | resolver->AddCustom(XC_no_op_OpCode, Register_XC_no_op()); 55 | resolver->AddCustom(XC_store_tensor_OpCode, Register_XC_store_tensor()); 56 | resolver->AddCustom(XC_load_tensor_OpCode, Register_XC_load_tensor()); 57 | } 58 | 59 | } // namespace xcore 60 | } // namespace micro 61 | } // namespace ops 62 | } // namespace tflite_micro 63 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h: -------------------------------------------------------------------------------- 1 | #ifndef XCORE_OPS_H_ 2 | #define XCORE_OPS_H_ 3 | 4 | #include "tensorflow/lite/micro/micro_mutable_op_resolver.h" 5 | 6 | namespace tflite_micro { 7 | namespace ops { 8 | namespace micro { 9 | namespace xcore { 10 | 11 | constexpr const char *XC_beta_activationf32_OpCode = "XC_beta_activationf32"; 12 | constexpr const char *XC_beta_concatf32_OpCode = "XC_beta_concatf32"; 13 | constexpr const char *XC_beta_convf32_OpCode = "XC_beta_convf32"; 14 | constexpr const char *XC_beta_transposeconvf32_OpCode = 15 | "XC_beta_transposeconvf32"; 16 | constexpr const char *XC_beta_fcf32_OpCode = "XC_beta_fcf32"; 17 | 18 | constexpr const char *XC_binaryi16_OpCode = "XC_binaryi16"; 19 | constexpr const char *XC_unaryi16_OpCode = "XC_unaryi16"; 20 | 21 | constexpr const char *XC_conv2d_v2_OpCode = "XC_conv2d_v2"; 22 | constexpr const char *XC_maxpool2d_OpCode = "XC_maxpool2d"; 23 | constexpr const char *XC_softmax_OpCode = "XC_softmax"; 24 | constexpr const char *XC_batched_softmax_OpCode = "XC_batched_softmax"; 25 | constexpr const char *XC_ld_weights_OpCode = "XC_ld_weights"; 26 | constexpr const char *XC_ld_weights_wait_OpCode = "XC_ld_weights_wait"; 27 | constexpr const char *XC_add_OpCode = "XC_add"; 28 | constexpr const char *XC_slice_OpCode = "XC_slice"; 29 | constexpr const char *XC_broadcast_OpCode = "XC_broadcast"; 30 | constexpr const char *XC_lookup_OpCode = "XC_lookup"; 31 | constexpr const char *XC_pad_OpCode = "XC_pad"; 32 | constexpr const char *XC_concat_OpCode = "XC_concat"; 33 | constexpr const char *XC_transpose_OpCode = "XC_transpose"; 34 | constexpr const char *XC_pad_3_to_4_OpCode = "XC_pad_3_to_4"; 35 | constexpr const char *XC_pad_1_to_4_OpCode = "XC_pad_1_to_4"; 36 | constexpr const char *XC_mul_OpCode = "XC_mul"; 37 | constexpr const char *XC_mean_OpCode = "XC_mean"; 38 | constexpr const char *XC_meani16_OpCode = "XC_meani16"; 39 | constexpr const char *XC_expand_8_to_16_OpCode = "XC_expand_8_to_16"; 40 | constexpr const char *XC_no_op_OpCode = "XC_no_op"; 41 | constexpr const char *XC_store_tensor_OpCode = "XC_store_tensor"; 42 | constexpr const char *XC_load_tensor_OpCode = "XC_load_tensor"; 43 | 44 | // Binarized ops 45 | constexpr const char *XC_bsign_8_OpCode = "XC_bsign_8"; 46 | 47 | TFLMRegistration *Register_XC_beta_activationf32(); 48 | TFLMRegistration *Register_XC_beta_concatf32(); 49 | TFLMRegistration *Register_XC_beta_convf32(); 50 | TFLMRegistration *Register_XC_beta_transposeconvf32(); 51 | TFLMRegistration *Register_XC_beta_fcf32(); 52 | 53 | TFLMRegistration *Register_XC_binaryi16(); 54 | TFLMRegistration *Register_XC_unaryi16(); 55 | 56 | TFLMRegistration *Register_XC_conv2d_v2(); 57 | TFLMRegistration *Register_XC_maxpool2d(); 58 | TFLMRegistration *Register_XC_softmax(); 59 | TFLMRegistration *Register_XC_batched_softmax(); 60 | TFLMRegistration *Register_XC_ld_weights(); 61 | TFLMRegistration *Register_XC_ld_weights_wait(); 62 | TFLMRegistration *Register_XC_add(); 63 | TFLMRegistration *Register_XC_slice(); 64 | TFLMRegistration *Register_XC_broadcast(); 65 | TFLMRegistration *Register_XC_lookup(); 66 | TFLMRegistration *Register_XC_pad(); 67 | TFLMRegistration *Register_XC_concat(); 68 | TFLMRegistration *Register_XC_transpose(); 69 | TFLMRegistration *Register_XC_pad_3_to_4(); 70 | TFLMRegistration *Register_XC_pad_1_to_4(); 71 | TFLMRegistration *Register_XC_mul(); 72 | TFLMRegistration *Register_XC_mean(); 73 | TFLMRegistration *Register_XC_meani16(); 74 | TFLMRegistration *Register_XC_expand_8_to_16(); 75 | TFLMRegistration *Register_XC_no_op(); 76 | TFLMRegistration *Register_XC_store_tensor(); 77 | TFLMRegistration *Register_XC_load_tensor(); 78 | 79 | // Binarized ops 80 | TFLMRegistration *Register_XC_bsign_8(); 81 | 82 | void RegisterXCOps(tflite_micro::MicroOpResolver *res); 83 | 84 | } // namespace xcore 85 | } // namespace micro 86 | } // namespace ops 87 | } // namespace tflite_micro 88 | 89 | #endif // XCORE_OPS_H_ 90 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_pad.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | #include "xcore_custom_options.h" 4 | #include "xcore_utils.h" 5 | #include 6 | 7 | extern "C" { 8 | #include "vpu_memmove_word_aligned.h" 9 | #include "vpu_memset_256.h" 10 | } 11 | 12 | namespace tflite_micro { 13 | namespace ops { 14 | namespace micro { 15 | namespace xcore { 16 | namespace pad { 17 | 18 | struct PadOpData { 19 | int32_t start; 20 | int32_t pad_size; 21 | int32_t size; 22 | int32_t num_copies; 23 | int32_t zero_point; 24 | int32_t end; 25 | void (*func_ptr)(void *, const void *, unsigned); 26 | }; 27 | 28 | void memmove_wrapper(void *dst, const void *src, unsigned size) { 29 | memmove(dst, src, size); 30 | } 31 | 32 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 33 | auto op_data = construct_persistent_object(context); 34 | auto parser = CustomOptionParser(buffer, length); 35 | op_data->start = parser.parseNamedCustomOption("s").AsInt32(); 36 | op_data->pad_size = parser.parseNamedCustomOption("p").AsInt32(); 37 | op_data->size = parser.parseNamedCustomOption("l").AsInt32(); 38 | op_data->num_copies = parser.parseNamedCustomOption("n").AsInt32(); 39 | op_data->zero_point = parser.parseNamedCustomOption("z").AsInt32(); 40 | op_data->end = parser.parseNamedCustomOption("e").AsInt32(); 41 | bool use_vpu = parser.parseNamedCustomOption("v").AsBool(); 42 | op_data->func_ptr = use_vpu ? vpu_memmove_word_aligned : memmove_wrapper; 43 | return op_data; 44 | } 45 | 46 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 47 | return kTfLiteOk; 48 | } 49 | 50 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 51 | TFLITE_DCHECK(node->user_data != nullptr); 52 | 53 | auto *op_data = static_cast(node->user_data); 54 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 55 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 56 | const int8_t *in_data = tflite_micro::micro::GetTensorData(input); 57 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 58 | uint8_t from[32]; 59 | broadcast_32_to_256(from, op_data->zero_point); 60 | if (op_data->start) 61 | vpu_memset_256(out_data, from, op_data->start); 62 | out_data += op_data->start; 63 | const int size = op_data->size; 64 | const int pad_size = op_data->pad_size; 65 | void (*func_ptr)(void *, const void *, unsigned) = op_data->func_ptr; 66 | for (int i = 0; i < op_data->num_copies; i++) { 67 | func_ptr(out_data, in_data, size); 68 | out_data += size; 69 | in_data += size; 70 | vpu_memset_256(out_data, from, pad_size); 71 | out_data += pad_size; 72 | } 73 | func_ptr(out_data, in_data, size); 74 | out_data += size; 75 | vpu_memset_256(out_data, from, op_data->end); 76 | return kTfLiteOk; 77 | } 78 | 79 | } // namespace pad 80 | 81 | TFLMRegistration *Register_XC_pad() { 82 | static TFLMRegistration r = {pad::Init, nullptr, pad::Prepare, pad::Eval}; 83 | return &r; 84 | } 85 | 86 | } // namespace xcore 87 | } // namespace micro 88 | } // namespace ops 89 | } // namespace tflite_micro 90 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019, XMOS Ltd, All rights reserved 2 | #include "xcore_profiler.h" 3 | 4 | #include "tensorflow/lite/kernels/internal/compatibility.h" 5 | #include "tensorflow/lite/micro/micro_time.h" 6 | 7 | namespace tflite_micro { 8 | namespace micro { 9 | namespace xcore { 10 | 11 | void XCoreProfiler::Init(tflite_micro::MicroAllocator *allocator, 12 | size_t max_event_count) { 13 | max_event_count_ = max_event_count; 14 | event_durations_ = static_cast( 15 | allocator->AllocatePersistentBuffer(max_event_count * sizeof(uint32_t))); 16 | } 17 | 18 | uint32_t const *XCoreProfiler::GetEventDurations() { return event_durations_; } 19 | 20 | size_t XCoreProfiler::GetNumEvents() { return event_count_; } 21 | 22 | void XCoreProfiler::ClearEvents() { event_count_ = 0; } 23 | 24 | uint32_t XCoreProfiler::BeginEvent(const char *tag) { 25 | TFLITE_DCHECK(tag); 26 | event_tag_ = tag; 27 | event_start_time_ = tflite_micro::GetCurrentTimeTicks(); 28 | return 0; 29 | } 30 | 31 | void XCoreProfiler::EndEvent(uint32_t event_handle) { 32 | int32_t event_end_time = tflite_micro::GetCurrentTimeTicks(); 33 | event_count_ = event_count_ % max_event_count_; 34 | // wrap if there are too many events 35 | event_durations_[event_count_++] = event_end_time - event_start_time_; 36 | } 37 | 38 | } // namespace xcore 39 | } // namespace micro 40 | } // namespace tflite_micro 41 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019, XMOS Ltd, All rights reserved 2 | 3 | #ifndef XCORE_PROFILER_H_ 4 | #define XCORE_PROFILER_H_ 5 | 6 | #include "tensorflow/lite/micro/compatibility.h" 7 | #include "tensorflow/lite/micro/micro_allocator.h" 8 | #include "tensorflow/lite/micro/micro_profiler.h" 9 | 10 | #if !defined(XCORE_PROFILER_DEFAULT_MAX_LEVELS) 11 | #define XCORE_PROFILER_DEFAULT_MAX_LEVELS (64) 12 | #endif 13 | 14 | namespace tflite_micro { 15 | namespace micro { 16 | namespace xcore { 17 | 18 | class XCoreProfiler : public tflite_micro::MicroProfiler { 19 | public: 20 | explicit XCoreProfiler(){}; 21 | ~XCoreProfiler() override = default; 22 | 23 | void Init(tflite_micro::MicroAllocator *allocator, 24 | size_t max_event_count = XCORE_PROFILER_DEFAULT_MAX_LEVELS); 25 | 26 | void ClearEvents(); 27 | 28 | uint32_t BeginEvent(const char *tag) override; 29 | 30 | // Event_handle is ignored since TFLu does not support concurrent events. 31 | void EndEvent(uint32_t event_handle) override; 32 | 33 | uint32_t const *GetEventDurations(); 34 | size_t GetNumEvents(); 35 | 36 | private: 37 | const char *event_tag_; 38 | uint32_t event_start_time_; 39 | size_t event_count_ = 0; 40 | size_t max_event_count_ = 0; 41 | uint32_t *event_durations_; 42 | TF_LITE_REMOVE_VIRTUAL_DELETE 43 | }; 44 | 45 | } // namespace xcore 46 | } // namespace micro 47 | } // namespace tflite_micro 48 | 49 | #endif // XCORE_PROFILER_H_ 50 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_slice.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved 2 | 3 | extern "C" { 4 | #include "vpu_memmove_word_aligned.h" 5 | } 6 | 7 | #include "xcore_custom_options.h" 8 | #include "xcore_utils.h" 9 | 10 | namespace tflite_micro { 11 | namespace ops { 12 | namespace micro { 13 | namespace xcore { 14 | namespace slice { 15 | 16 | struct SliceOpData { 17 | int32_t start; 18 | int32_t offset; 19 | int32_t size; 20 | int32_t num_copies; 21 | void (*func_ptr)(void *, const void *, unsigned); 22 | }; 23 | 24 | void memmove_wrapper(void *dst, const void *src, unsigned size) { 25 | memmove(dst, src, size); 26 | } 27 | 28 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 29 | auto op_data = construct_persistent_object(context); 30 | auto parser = CustomOptionParser(buffer, length); 31 | op_data->start = parser.parseNamedCustomOption("s").AsInt32(); 32 | op_data->offset = parser.parseNamedCustomOption("o").AsInt32(); 33 | op_data->size = parser.parseNamedCustomOption("l").AsInt32(); 34 | op_data->num_copies = parser.parseNamedCustomOption("n").AsInt32(); 35 | bool use_vpu = parser.parseNamedCustomOption("v").AsBool(); 36 | op_data->func_ptr = use_vpu ? vpu_memmove_word_aligned : memmove_wrapper; 37 | return op_data; 38 | } 39 | 40 | // Does all the requests for scratches 41 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 42 | return kTfLiteOk; 43 | } 44 | 45 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 46 | 47 | auto *op_data = static_cast(node->user_data); 48 | // Get Input/Output Tensors 49 | const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 50 | TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 51 | // Pointers to data in In/Out Tensors 52 | const int8_t *in_data = 53 | tflite_micro::micro::GetTensorData(input) + op_data->start; 54 | int8_t *out_data = tflite_micro::micro::GetTensorData(output); 55 | const int size = op_data->size; 56 | const int offset = op_data->offset; 57 | void (*func_ptr)(void *, const void *, unsigned) = op_data->func_ptr; 58 | for (int i = 0; i < op_data->num_copies; i++) { 59 | func_ptr(out_data, in_data, size); 60 | in_data += offset; 61 | out_data += size; 62 | } 63 | return kTfLiteOk; 64 | } 65 | 66 | } // namespace slice 67 | 68 | TFLMRegistration *Register_XC_slice() { 69 | static TFLMRegistration r = {slice::Init, nullptr, slice::Prepare, 70 | slice::Eval}; 71 | return &r; 72 | } 73 | 74 | } // namespace xcore 75 | } // namespace micro 76 | } // namespace ops 77 | } // namespace tflite_micro 78 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_transpose.cc: -------------------------------------------------------------------------------- 1 | #include "tensorflow/lite/c/common.h" 2 | #include "xcore_custom_options.h" 3 | #include "xcore_utils.h" 4 | 5 | namespace tflite_micro { 6 | namespace ops { 7 | namespace micro { 8 | namespace xcore { 9 | namespace transpose { 10 | 11 | using tflite_micro::micro::GetEvalInput; 12 | using tflite_micro::micro::GetEvalOutput; 13 | using tflite_micro::micro::GetTensorData; 14 | 15 | constexpr int kTransposeDims = 4; // Exactly 4 dimensions as specified 16 | 17 | struct TransposeOpData { 18 | int32_t t_shape[kTransposeDims]; 19 | int32_t offsets[kTransposeDims]; 20 | }; 21 | 22 | void *Init(TfLiteContext *context, const char *buffer, size_t length) { 23 | auto op_data = construct_persistent_object(context); 24 | auto parser = CustomOptionParser(buffer, length); 25 | 26 | auto t_shape_vector = parser.parseNamedCustomOption("s").AsVector(); 27 | auto offsets_vector = parser.parseNamedCustomOption("o").AsVector(); 28 | 29 | for (int i = 0; i < kTransposeDims; ++i) { 30 | op_data->t_shape[i] = t_shape_vector[i].AsInt32(); 31 | op_data->offsets[i] = offsets_vector[i].AsInt32(); 32 | } 33 | 34 | return op_data; 35 | } 36 | 37 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) { 38 | // No preparation needed 39 | return kTfLiteOk; 40 | } 41 | 42 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) { 43 | auto *op_data = static_cast(node->user_data); 44 | 45 | const int32_t *t_shape = op_data->t_shape; 46 | const int32_t *offsets = op_data->offsets; 47 | 48 | const int8_t *input_data = 49 | GetTensorData(GetEvalInput(context, node, 0)); 50 | int8_t *output_data = GetTensorData(GetEvalOutput(context, node, 0)); 51 | 52 | // TODO: 1. Optimise by pre-computing increments 53 | // TODO: 2. Dereference t_shape in advance 54 | // TODO: 3. Multi-threading 55 | for (int i0 = 0; i0 < t_shape[0]; ++i0) { 56 | const int j0 = i0 * offsets[0]; 57 | for (int i1 = 0; i1 < t_shape[1]; ++i1) { 58 | const int j1 = j0 + i1 * offsets[1]; 59 | for (int i2 = 0; i2 < t_shape[2]; ++i2) { 60 | const int j2 = j1 + i2 * offsets[2]; 61 | for (int i3 = 0; i3 < t_shape[3]; ++i3) { 62 | const int j3 = j2 + i3 * offsets[3]; 63 | *output_data++ = input_data[j3]; 64 | } 65 | } 66 | } 67 | } 68 | 69 | return kTfLiteOk; 70 | } 71 | 72 | } // namespace transpose 73 | 74 | TFLMRegistration *Register_XC_transpose() { 75 | static TFLMRegistration r = {transpose::Init, nullptr, transpose::Prepare, 76 | transpose::Eval}; 77 | return &r; 78 | } 79 | 80 | } // namespace xcore 81 | } // namespace micro 82 | } // namespace ops 83 | } // namespace tflite_micro 84 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.cc: -------------------------------------------------------------------------------- 1 | #include "xcore_utils.h" 2 | 3 | namespace tflite_micro { 4 | namespace ops { 5 | namespace micro { 6 | namespace xcore { 7 | 8 | TfLiteStatus No_Op_Eval(TfLiteContext *context, TfLiteNode *node) { 9 | // Get Input/Output Tensors 10 | // const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0); 11 | // TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0); 12 | // // Pointers to data in In/Out Tensors 13 | // const int8_t *in_data = tflite_micro::micro::GetTensorData(input); 14 | // int8_t *out_data = tflite_micro::micro::GetTensorData(output); 15 | 16 | // size_t sizeof_tensor_type; 17 | // TfLiteTypeSizeOf(output->type, &sizeof_tensor_type); 18 | // int size = tflite_micro::micro::GetTensorShape(output).FlatSize(); 19 | // memcpy((int8_t *)out_data, (int8_t *)in_data, size * sizeof_tensor_type); 20 | return kTfLiteOk; 21 | } 22 | 23 | TFLMRegistration *Register_XC_no_op() { 24 | static TFLMRegistration r = {nullptr, nullptr, nullptr, 25 | No_Op_Eval}; 26 | return &r; 27 | } 28 | 29 | size_t FetchBuffer(int8_t **dest, int8_t const *src, size_t size) { 30 | if (is_ram_address((uintptr_t)src)) { 31 | *dest = (int8_t *)src; 32 | return 0; 33 | } else { 34 | memload((void *)*dest, (void *)src, size); 35 | return size; 36 | } 37 | } 38 | 39 | } // namespace xcore 40 | } // namespace micro 41 | } // namespace ops 42 | } // namespace tflite_micro 43 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/thread_call.h: -------------------------------------------------------------------------------- 1 | #if !defined(__micro_thread_library_h__) 2 | #define __micro_thread_library_h__ 3 | 4 | #include 5 | #ifdef __xcore__ 6 | #include 7 | #else 8 | typedef unsigned synchronizer_t; 9 | #endif 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | #define XCORE_MAX_NUM_THREADS 5 16 | 17 | #ifdef __XC__ 18 | #define UNSAFE unsafe 19 | #else 20 | #define UNSAFE /**/ 21 | #endif 22 | 23 | typedef struct { // THIS STRUCT MUST BE IN SYNC WITH ASSEMBLY CODE. 24 | union { 25 | uint64_t id_aligned[2]; // Guarantee 64-bit alignment. 26 | uint32_t id[4]; // Actual IDs 27 | } thread_ids; // ids of at most 4 threads - live during invoke 28 | uint32_t synchroniser; // synchroniser for threads - live during invoke 29 | } thread_info_t; 30 | 31 | 32 | #ifndef __XC__ 33 | 34 | typedef void (*thread_function_pointer_t)(void * arg0, void * arg1, void * arg2); 35 | struct inference_engine; 36 | 37 | /** Function that runs the client task 38 | */ 39 | void thread_client(thread_info_t *ptr, int n); 40 | 41 | /** Function that runs the client task 42 | */ 43 | static inline void thread_store_sync(thread_info_t *ptr, uint32_t s) { 44 | ptr->synchroniser = s; 45 | } 46 | 47 | /** Function that sets up parameters for one of the client threads 48 | * This particular one passes the second and third arguments to the thread. 49 | * When the thread function is actually called (through thread_call) 50 | * the thread function will be called with those two arguments, 51 | * and the first shared argument provided by thread_call. 52 | * Note - we can make versions with more or fewer parameters. 53 | * Note - we could pass this function the thread-function itself 54 | * 55 | * \param arg1 Second argument for the thread function 56 | * \param arg2 Third argument for the thread function 57 | * \param thread_id The thread_id to initialise; one of ptr[0]..ptr[3] above 58 | */ 59 | #ifdef __xcore__ 60 | static inline void thread_variable_setup(void * arg1, void * arg2, uint32_t thread_id) { 61 | #ifdef __VX4A__ 62 | asm volatile("xm.tsetr %0, 11, %1" :: "r" (thread_id), "r" (arg1)); 63 | asm volatile("xm.tsetr %0, 12, %1" :: "r" (thread_id), "r" (arg2)); 64 | asm volatile("xm.tsetr %0, 24, %1" :: "r" (thread_id), "r" (1)); 65 | #else 66 | asm volatile("set t[%0]:r1, %1" :: "r" (thread_id), "r" (arg1)); 67 | asm volatile("set t[%0]:r2, %1" :: "r" (thread_id), "r" (arg2)); 68 | asm volatile("set t[%0]:r10, %1" :: "r" (thread_id), "r" (1)); 69 | #endif 70 | } 71 | #else 72 | extern void thread_variable_setup(void * arg1, void * arg2, uint32_t thread_id); 73 | #endif 74 | 75 | /** Function that starts all thread functions and runs them until completion. 76 | * It is assumed that the variable parts have been set up per thread. 77 | * by thread_variable_setup. 78 | * This thread will also invoke the function with the given variable arguments. 79 | * 80 | * \param arg0 First argument shared among all threads (usually the output pointer) 81 | * \param arg1 Second argument for the master thread function 82 | * \param arg2 Third argument for the master thread function 83 | * \param fp thread function to call on all threads. 84 | * \param ptr Pointer to the thread info block held in the xcore 85 | * interpreter. 86 | */ 87 | void thread_call(void * arg0, void * arg1, void * arg2, 88 | thread_function_pointer_t fp, thread_info_t *ptr); 89 | #ifdef __cplusplus 90 | }; 91 | #endif 92 | 93 | #endif // __XC__ 94 | 95 | #endif // __micro_thread_library_h__ 96 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/thread_call_host_emulation.c: -------------------------------------------------------------------------------- 1 | #ifndef __xcore__ 2 | 3 | #include "thread_call.h" 4 | #include 5 | #include 6 | 7 | static void *args[4][10]; 8 | static int32_t max_thread_id = -1; 9 | void thread_variable_setup(void *arg1, void *arg2, uint32_t thread_id) { 10 | assert(thread_id != -1); 11 | args[thread_id][1] = arg1; 12 | args[thread_id][2] = arg2; 13 | if ((int)thread_id > max_thread_id) { 14 | max_thread_id = thread_id; 15 | } 16 | } 17 | 18 | void thread_client(thread_info_t *ptr, int n) { 19 | ptr->thread_ids.id[n] = n; 20 | } 21 | 22 | void thread_call(void *arg0, void *arg1, void *arg2, 23 | thread_function_pointer_t fp, thread_info_t *ptr) { 24 | (*fp)(arg0, arg1, arg2); 25 | for (int i = 0; i <= max_thread_id; i++) { 26 | (*fp)(arg0, args[i][1], args[i][2]); 27 | } 28 | max_thread_id = -1; 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /lib_tflite_micro/src/tile_ram_server.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "tile_ram_server.h" 10 | #include "memory_parallel_transport.h" 11 | 12 | #define TMP_BUF_SIZE_IN_BYTES 1024 13 | 14 | #define VERSION_MAJOR 1 15 | #define VERSION_MINOR 2 16 | #define VERSION_LITTLE_ENDING (VERSION_MAJOR |\ 17 | (VERSION_MINOR << 8) |\ 18 | ((VERSION_MAJOR^0xff) << 16) |\ 19 | ((VERSION_MINOR^0xff) << 24)) 20 | 21 | void tile_ram_server(chanend_t *c_tile_ram, flash_t *headers, int n_tile_ram, 22 | const int8_t *tile_ram) { 23 | uint32_t tmp = ((uint32_t*)tile_ram)[0]; 24 | if ((tmp ^ VERSION_LITTLE_ENDING) != 0) { 25 | printstr("version check error"); 26 | asm("clre; waiteu"); 27 | } 28 | memcpy(headers, tile_ram + 4, (n_tile_ram * sizeof(flash_t))); 29 | assert(n_tile_ram == 1); 30 | int tile_ram_server_alive = 1; 31 | while(tile_ram_server_alive) { 32 | int byte_address, number_bytes; 33 | flash_command_t cmd; 34 | int i = 0; // TODO: extend SELECT-FOR-LOOP 35 | cmd = chan_in_word(c_tile_ram[i]); 36 | //if (cmd == FLASH_READ_PARAMETERS || cmd == FLASH_READ_PARAMETERS_COMPRESSED_FLOAT) { 37 | if (cmd == FLASH_READ_PARAMETERS) { 38 | // Set parallel mode 39 | chan_out_word(c_tile_ram[i], 1); 40 | byte_address = chan_in_word(c_tile_ram[i]); 41 | number_bytes = chan_in_word(c_tile_ram[i]); 42 | byte_address = headers[i].parameters_start + byte_address; 43 | } else if (cmd == FLASH_SERVER_INIT) { 44 | ; // NO init required 45 | } else if (cmd == FLASH_SERVER_QUIT) { 46 | tile_ram_server_alive = 0; 47 | } 48 | if (tile_ram_server_alive && cmd != FLASH_SERVER_INIT) { 49 | memory_parallel_send(c_tile_ram[i], &((uint8_t *)tile_ram)[byte_address], number_bytes); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /repos.list: -------------------------------------------------------------------------------- 1 | lib_nn git@github.com:xmos/lib_nn ebe972405e41182830f1026aa8867fb60028aaff 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # python_version 3.12 2 | numpy>=1.20 3 | opencv-python>=4.4 4 | flatbuffers<2.0,>=1.12 5 | tflite==2.4.0 6 | pyusb==1.2.1 -------------------------------------------------------------------------------- /sample_mobilenet/add_newlines_to_csv.py: -------------------------------------------------------------------------------- 1 | file = open('src/in.csv', 'r') 2 | 3 | 4 | f2 = open('src/out.csv', 'w') 5 | 6 | count = 0 7 | while 1: 8 | 9 | # read by character 10 | char = file.read(1) 11 | f2.write(char) 12 | 13 | if char == ',': 14 | count += 1 15 | 16 | if count == 80: 17 | f2.write('\n') 18 | count = 0 19 | 20 | if not char: 21 | break 22 | 23 | #print(char) 24 | 25 | file.close() 26 | f2.close() 27 | -------------------------------------------------------------------------------- /sample_mobilenet/mobilenet_v1_25.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/mobilenet_v1_25.tflite -------------------------------------------------------------------------------- /sample_mobilenet/s1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/s1.jpg -------------------------------------------------------------------------------- /sample_mobilenet/s2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/s2.jpg -------------------------------------------------------------------------------- /sample_mobilenet/s3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/s3.jpg -------------------------------------------------------------------------------- /tflite_micro_compiler/.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Screenshots** 20 | If applicable, add screenshots to help explain your problem. 21 | 22 | **Desktop (please complete the following information):** 23 | - OS: [e.g. iOS] 24 | - TensorFlow version 25 | - CMake/Make Version 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.github/ISSUE_TEMPLATE/discussion.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Discussion 3 | about: Starting a friendly discussion about something relating to this repository 4 | title: "[DISCUSSION]" 5 | labels: discussion 6 | assignees: '' 7 | 8 | --- 9 | 10 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEATURE]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.github/ISSUE_TEMPLATE/improvement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Improvement 3 | about: Recommended an area of the project that could be improved 4 | title: "[IMPROVEMENT]" 5 | labels: Improvement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What should be improved?** 11 | A clear and concise description of what could be improved. 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what desired behaviour you'd like to see. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the improvement here. 18 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.github/workflows/c-cpp.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | config: 10 | - {name: "Linux", os: ubuntu-latest, cmake-generator: ""} 11 | - {name: "Windows", os: windows-latest, cmake-generator: "-G \"MinGW Makefiles\""} 12 | runs-on: ${{ matrix.config.os }} 13 | name: ${{ matrix.config.name }} 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Requirements 18 | shell: bash 19 | run: | 20 | cd .. 21 | git clone https://github.com/tensorflow/tensorflow.git 22 | cd tensorflow 23 | make -f tensorflow/lite/micro/tools/make/Makefile hello_world 24 | - name: Requirements (Windows) 25 | if: matrix.config.os == 'windows-latest' 26 | run: | 27 | choco install wget unzip 28 | - name: Configure 29 | shell: bash 30 | run: | 31 | mkdir build 32 | cd build 33 | cmake ${{ matrix.config.cmake-generator }} .. 34 | - name: Compile 35 | shell: bash 36 | run: | 37 | cd build 38 | cmake --build . 39 | - name: Run 40 | shell: bash 41 | run: | 42 | cd build 43 | wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/hello_world_2020_04_13.zip 44 | unzip hello_world_2020_04_13.zip 45 | ./compiler hello_world/models/model.tflite out.cpp 46 | - name: Test setup 47 | run: | 48 | cd examples/generic_test 49 | mkdir build 50 | cd build 51 | cmake ${{ matrix.config.cmake-generator }} .. 52 | cmake --build . 53 | # Binary representation of output -1.09 (~sin(1.5*PI)) 54 | echo "\xd6\xf3\x8b\xbf" > outExpect.txt 55 | # Binary representation of input 4.71 (1.5*PI) 56 | - name: Test setup (Windows) 57 | if: matrix.config.os == 'windows-latest' 58 | run: | 59 | cd examples/generic_test/build 60 | echo 52 b8 96 40 > inData.tmp 61 | certutil -f -decodehex inData.tmp inData.bin 62 | - name: Test setup (Linux) 63 | if: matrix.config.os == 'ubuntu-latest' 64 | run: | 65 | cd examples/generic_test/build 66 | echo -n -e '\x52\xb8\x96\x40' > inData.bin 67 | - name: Test run 68 | shell: bash 69 | run: | 70 | cd examples/generic_test/build 71 | ./generic_test inData.bin > outData.txt 72 | cmp outData.txt outExpect.txt 73 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | mobilnet 3 | hello_world 4 | hello_world_compiled 5 | compiler 6 | build*/ 7 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Linux", 5 | "includePath": [ 6 | "${workspaceFolder}/**", 7 | "${workspaceFolder}/../tensorflow", 8 | "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/", 9 | "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/gemmlowp", 10 | "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include", 11 | "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/ruy", 12 | "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/kissfft" 13 | ], 14 | "defines": [ "TF_LITE_STATIC_MEMORY", "NDEBUG", "TF_LITE_DISABLE_X86_NEON", "SUFFICIENT_ARENA_SIZE" ], 15 | "compilerPath": "/usr/bin/g++", 16 | "cStandard": "c11", 17 | "cppStandard": "c++17", 18 | "intelliSenseMode": "clang-x64" 19 | } 20 | ], 21 | "version": 4 22 | } 23 | -------------------------------------------------------------------------------- /tflite_micro_compiler/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "(gdb) hello example Launch", 9 | "type": "cppdbg", 10 | "request": "launch", 11 | "program": "${workspaceFolder}/example/hello_world", 12 | "args": [], 13 | "stopAtEntry": true, 14 | "cwd": "${workspaceFolder}", 15 | "environment": [], 16 | "externalConsole": false, 17 | "MIMode": "gdb", 18 | "setupCommands": [ 19 | { 20 | "description": "Enable pretty-printing for gdb", 21 | "text": "-enable-pretty-printing", 22 | "ignoreFailures": true 23 | } 24 | ] 25 | }, 26 | { 27 | "name": "(gdb) cifar compiled example Launch", 28 | "type": "cppdbg", 29 | "request": "launch", 30 | "program": "${workspaceFolder}/examples/cifar10_compiled", 31 | "args": [], 32 | "stopAtEntry": true, 33 | "cwd": "${workspaceFolder}", 34 | "environment": [], 35 | "externalConsole": false, 36 | "MIMode": "gdb", 37 | "setupCommands": [ 38 | { 39 | "description": "Enable pretty-printing for gdb", 40 | "text": "-enable-pretty-printing", 41 | "ignoreFailures": true 42 | } 43 | ] 44 | }, 45 | { 46 | "name": "(gdb) cifar interpreter example Launch", 47 | "type": "cppdbg", 48 | "request": "launch", 49 | "program": "${workspaceFolder}/examples/cifar10", 50 | "args": [], 51 | "stopAtEntry": true, 52 | "cwd": "${workspaceFolder}", 53 | "environment": [], 54 | "externalConsole": false, 55 | "MIMode": "gdb", 56 | "setupCommands": [ 57 | { 58 | "description": "Enable pretty-printing for gdb", 59 | "text": "-enable-pretty-printing", 60 | "ignoreFailures": true 61 | } 62 | ] 63 | } 64 | ] 65 | } -------------------------------------------------------------------------------- /tflite_micro_compiler/.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "label": "make", 8 | "type": "shell", 9 | "command": "make", 10 | // use options.cwd property if the Makefile is not in the project root ${workspaceRoot} dir 11 | "options": { 12 | "cwd": "${workspaceRoot}" 13 | }, 14 | // start the build without prompting for task selection, use "group": "build" otherwise 15 | "group": { 16 | "kind": "build", 17 | "isDefault": true 18 | }, 19 | "presentation": { 20 | "echo": true, 21 | "reveal": "always", 22 | "focus": false, 23 | "panel": "shared" 24 | }, 25 | // arg passing example: in this case is executed make QUIET=0 26 | "args": ["QUIET=0"], 27 | // Use the standard less compilation problem matcher. 28 | "problemMatcher": { 29 | "base": "$gcc", 30 | "fileLocation": [ "relative", "${workspaceRoot}" ] 31 | } 32 | } 33 | ] 34 | } -------------------------------------------------------------------------------- /tflite_micro_compiler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.13 FATAL_ERROR) 2 | PROJECT(tflite_micro_compiler) 3 | 4 | # We define XBUILD to use own cmake when building with 5 | # lib_tflite_micro 6 | IF(XBUILD) 7 | INCLUDE("cmake/xbuild.cmake") 8 | ELSE() 9 | SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 10 | 11 | SET(CMAKE_EXPORT_COMPILE_COMMANDS ON) 12 | 13 | IF(NOT TF_DIR) 14 | SET(TF_DIR "../tensorflow" CACHE STRING "TensorFlow source directory") 15 | ENDIF() 16 | 17 | GET_FILENAME_COMPONENT(TF_ABSPATH ${TF_DIR} REALPATH) 18 | 19 | IF(NOT GET_TF_SRC) 20 | if(EXISTS "${TF_ABSPATH}") 21 | SET(TFL_SRC ${TF_ABSPATH}/tensorflow/lite) 22 | SET(TFLM_SRC ${TFL_SRC}/micro) 23 | SET(TFLMD_SRC ${TF_ABSPATH}/..) 24 | SET(TF_INCS 25 | ${TF_ABSPATH} 26 | ${TFLMD_SRC}/flatbuffers/include 27 | ${TFLMD_SRC}/ruy 28 | ) 29 | #SET(TF_LIB tensorflow-microlite) 30 | ELSE() 31 | MESSAGE(FATAL_ERROR "\ 32 | No valid TensorFlow source directory provided, default path \ 33 | '../tensorflow' is also not valid. To automatically pull TensorFlow \ 34 | source please provide argument '-DGET_TF_SRC=ON' to CMake.\ 35 | ") 36 | ENDIF() 37 | ELSE() 38 | FIND_PACKAGE(TFLite REQUIRED) 39 | SET(TF_INCS 40 | ${TFLite_INCLUDE_DIRS} 41 | ) 42 | SET(TF_LIB tensorflow-microlite) 43 | ENDIF() 44 | 45 | SET(COMPILER_HEADERS 46 | ${PROJECT_SOURCE_DIR}/src/CodeWriter.h 47 | ${PROJECT_SOURCE_DIR}/src/Compiler.h 48 | ${PROJECT_SOURCE_DIR}/src/CustomOperators.h 49 | ${PROJECT_SOURCE_DIR}/src/MemMap.h 50 | ${PROJECT_SOURCE_DIR}/src/RecordAllocations.h 51 | ${PROJECT_SOURCE_DIR}/src/TypeToString.h 52 | ) 53 | 54 | SET(COMPILER_SRCS 55 | ${PROJECT_SOURCE_DIR}/src/CodeWriter.cc 56 | ${PROJECT_SOURCE_DIR}/src/Compiler.cc 57 | ${PROJECT_SOURCE_DIR}/src/CustomOperators.cc 58 | ${PROJECT_SOURCE_DIR}/src/MemMap.cc 59 | ${PROJECT_SOURCE_DIR}/src/RecordAllocations.cc 60 | ${PROJECT_SOURCE_DIR}/src/TypeToString.cc 61 | ${PROJECT_SOURCE_DIR}/src/main.cc 62 | ) 63 | 64 | ADD_LIBRARY(${PROJECT_NAME} STATIC 65 | ${COMPILER_SRCS} 66 | ) 67 | 68 | TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC 69 | ${TF_INCS} 70 | ) 71 | 72 | TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${TF_LIB}) 73 | 74 | TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC 75 | TF_LITE_STATIC_MEMORY 76 | TF_LITE_DISABLE_X86_NEON 77 | SUFFICIENT_ARENA_SIZE=128*1024*1024 78 | ) 79 | TARGET_COMPILE_FEATURES(${PROJECT_NAME} PUBLIC cxx_std_14) 80 | ADD_EXECUTABLE(compiler src/main.cc) 81 | TARGET_LINK_LIBRARIES(compiler ${PROJECT_NAME}) 82 | 83 | ADD_CUSTOM_TARGET( 84 | format 85 | COMMAND clang-format -i ${COMPILER_SRCS} ${COMPILER_HEADERS} 86 | ) 87 | 88 | IF(TF_EXAMPLES) 89 | ADD_SUBDIRECTORY(${PROJECT_SOURCE_DIR}/examples) 90 | ENDIF() 91 | ENDIF() 92 | -------------------------------------------------------------------------------- /tflite_micro_compiler/Makefile: -------------------------------------------------------------------------------- 1 | TF_DIR=../tensorflow 2 | include common.mk 3 | 4 | .PHONY: tflite all 5 | 6 | all: compiler examples 7 | 8 | tflite: 9 | $(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/Makefile microlite 10 | 11 | COMPILER_OBJS = src/main.o src/Compiler.o src/CodeWriter.o src/TypeToString.o src/RecordAllocations.o src/MemMap.o src/CustomOperators.o 12 | 13 | compiler: $(COMPILER_OBJS) tflite 14 | $(CXX) $(LDOPTS) -o $@ $(COMPILER_OBJS) $(LIBS) 15 | 16 | clean: clean-compiler clean-examples 17 | $(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/makefile clean 18 | 19 | FORMAT_FILES := $(shell find src -regex '.*\(h\|cpp\)') 20 | 21 | format: 22 | clang-format -i $(FORMAT_FILES) 23 | 24 | .PHONY: examples clean-examples clean-compiler 25 | examples: 26 | cd examples && $(MAKE) 27 | 28 | clean-examples: 29 | $(MAKE) -C examples clean 30 | 31 | clean-compiler: 32 | $(RM) src/*.o compiler 33 | 34 | -------------------------------------------------------------------------------- /tflite_micro_compiler/NEWS.txt: -------------------------------------------------------------------------------- 1 | 2020-05-15 2 | Properly handle allocation from the Prepare method 3 | move example code to separate directory 4 | 5 | 2020-05-13 6 | Support unknown operators by guessing the name of the register function 7 | 8 | -------------------------------------------------------------------------------- /tflite_micro_compiler/README.md: -------------------------------------------------------------------------------- 1 | # tflite_micro_compiler 2 | 3 | Generate tflite micro code which bypasses the interpreter (directly calls into kernels) 4 | 5 | Basically this code uses a fully set up tflite micro instance to dump the internal allocations and 6 | function calls assigned to the model, then dumps the tensor and node settings into a compilable 7 | file, eliminating the need for running the interpreter at each program start and for resolving the correct 8 | kernel at run time. 9 | 10 | An in depth explanation of the motivation and benefits is included in the matching [RFC](https://docs.google.com/document/d/1wDqC50sjCaWyQxsSn_Y-XAGh8-ozIgm2HDzX_b9DIyo/edit?usp=sharing). 11 | 12 | # Building 13 | 14 | ## CMake 15 | 16 | Below the two methods of incorporating the TensorFlow sources into your build are 17 | explained. 18 | 19 | The basic flow of building with CMake is 20 | 21 | ``` bash 22 | mkdir build 23 | cd build 24 | cmake [options] .. 25 | make 26 | ``` 27 | 28 | ### Examples 29 | The examples cmake [here](examples/CMakeLists.txt) is by default not included due to issues with TensorFlow source code compatibility when using specific code versions. 30 | To enable building the examples pass `-DTF_EXAMPLES=ON` to CMake. 31 | 32 | ## Automatic TensorFlow Source Fetching 33 | 34 | To pull the TensorFlow sources using CMake with the variable `GET_TF_SRC` 35 | set to `ON`. 36 | 37 | e.g. 38 | 39 | ``` bash 40 | cmake -DGET_TF_SRC=ON .. 41 | ``` 42 | 43 | This will retrieve the TensorFlow master branch's code. 44 | It should also be noted that `GET_TF_SRC` is prioritized over `TF_DIR` (see below). 45 | If you want to specify a TensorFlow tag to checkout then this can be passed to 46 | CMake using the option `TF_TAG`. 47 | 48 | e.g. 49 | 50 | ``` bash 51 | cmake -DGET_TF_SRC=ON TF_TAG=v2.2.0 .. 52 | ``` 53 | 54 | Similarly a Git commit hash can be provided using `TF_COMMIT`. Note that 55 | `TF_TAG` takes precedence if both are provided. 56 | 57 | e.g. 58 | 59 | ```bash 60 | cmake -DGET_TF_SRC=ON TF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac .. 61 | ``` 62 | 63 | To checkout a different TensorFlow code base without clearing the CMake cache 64 | the argument `TF_RECACHE` should be set, this will force the TensorFlow 65 | source to be checked-out again. 66 | 67 | e.g. 68 | 69 | ```bash 70 | cmake -DGET_TF_SRC=ON -DTF_RECACHE=ON TF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac .. 71 | ``` 72 | 73 | ## Providing TensorFlow Source Manually 74 | 75 | By default CMake looks for the TensorFlow source in the directory `../tensorflow`. 76 | If you want to specify you TensorFlow source directory this can be done by 77 | providing the argument `TF_DIR`. 78 | 79 | e.g. 80 | 81 | ``` bash 82 | cmake -DTF_DIR=../my_tensorflow .. 83 | ``` 84 | 85 | ## Additional Targets 86 | 87 | ### format 88 | 89 | To invoke `clang-format` CMake provides the `format` target. 90 | 91 | e.g. 92 | 93 | ```bash 94 | cmake .. 95 | make format 96 | ``` 97 | 98 | ## Make 99 | 100 | - check out tensorflow master next to this project (in ../tensorflow) 101 | - start with building the tflite micro library as described in https://www.tensorflow.org/lite/microcontrollers/library: 102 | 103 | - `cd ../tensorflow` 104 | 105 | - `make -f tensorflow/lite/micro/tools/make/Makefile hello_world_bin` 106 | [optionally add BUILD_TYPE=debug] 107 | 108 | - now run make in this project to get the compiler 109 | 110 | # Usage 111 | 112 | - the compiler is invoked as `./compiler input.tflite output.cpp [prefix]` 113 | 114 | e.g. 115 | 116 | ``` bash 117 | ./compiler hello_world.tflite hello_compiled.cpp hello_ 118 | ``` 119 | 120 | - for a quick view into the generated code see [`compiled_hello_world.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/compiled_hello_world.cc) 121 | 122 | You can compare calling into interpreter and compiled code between [`hello_world.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/hello_world.cc) 123 | and [`hello_world2.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/hello_world2.cc) 124 | 125 | - The example directory contains a collection of traditional tflite micro and compiled versions: 126 | 127 | - hello_world: Standard tflite micro example 128 | - cifar10: Computer vision CNN example 129 | 130 | # Limitations 131 | 132 | - no support for big endian machines, yet 133 | -------------------------------------------------------------------------------- /tflite_micro_compiler/VerifiedTensorflowVersion.txt: -------------------------------------------------------------------------------- 1 | 62b6c316d2a9a1fb06aefb086856e76241280c08 2 | -------------------------------------------------------------------------------- /tflite_micro_compiler/cmake/xbuild.cmake: -------------------------------------------------------------------------------- 1 | #********************** 2 | # Disable in-source build. 3 | #********************** 4 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") 5 | message(FATAL_ERROR "In-source build is not allowed! Please specify a build folder.\n\tex:cmake -B build") 6 | endif() 7 | 8 | #********************** 9 | # install 10 | #********************** 11 | set(INSTALL_DIR "${PROJECT_SOURCE_DIR}/bin") 12 | 13 | #********************** 14 | # Build flags 15 | #********************** 16 | 17 | set(BUILD_FLAGS 18 | "-g" 19 | "-O0" 20 | ) 21 | 22 | #********************** 23 | # Targets 24 | #********************** 25 | set(TOP_DIR 26 | "${CMAKE_CURRENT_SOURCE_DIR}/..") 27 | include(${TOP_DIR}/cmakefiles/xtflm.cmake) 28 | 29 | add_library(xtflitemicro SHARED) 30 | set(DEFINTIONS 31 | "__xtflm_conf_h_exists__" 32 | "NO_INTERPRETER" 33 | "NN_USE_REF" 34 | "TF_LITE_STATIC_MEMORY" 35 | "TF_LITE_DISABLE_X86_NEON" 36 | ) 37 | target_compile_options(xtflitemicro PRIVATE ${BUILD_FLAGS}) 38 | target_link_options(xtflitemicro PRIVATE ${BUILD_FLAGS}) 39 | target_compile_definitions(xtflitemicro PUBLIC 40 | ${DEFINTIONS} 41 | ) 42 | target_compile_features(xtflitemicro PUBLIC cxx_std_11) 43 | target_sources(xtflitemicro 44 | PRIVATE ${TFLM_KERNEL_SOURCES} 45 | PRIVATE ${TFLITE_SOURCES} 46 | PRIVATE ${NN_SOURCES} 47 | PRIVATE ${XTFLIB_KERNEL_SOURCES} 48 | ) 49 | target_include_directories(xtflitemicro 50 | PRIVATE ${ALL_INCLUDES} 51 | ) 52 | install(TARGETS xtflitemicro DESTINATION ${INSTALL_DIR}) 53 | 54 | 55 | add_executable(tflite_micro_compiler) 56 | unset(DEFINTIONS) 57 | set(DEFINTIONS 58 | "__xtflm_conf_h_exists__" 59 | "NN_USE_REF" 60 | "TF_LITE_STATIC_MEMORY" 61 | "TF_LITE_DISABLE_X86_NEON" 62 | "SUFFICIENT_ARENA_SIZE=128*1024*1024" 63 | ) 64 | target_compile_options(tflite_micro_compiler PRIVATE ${BUILD_FLAGS}) 65 | target_link_options(tflite_micro_compiler PRIVATE ${BUILD_FLAGS}) 66 | file(GLOB_RECURSE COMPILER_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h") 67 | file(GLOB_RECURSE COMPILER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc") 68 | target_compile_definitions(tflite_micro_compiler PUBLIC 69 | ${DEFINTIONS} 70 | ) 71 | target_compile_features(tflite_micro_compiler PUBLIC cxx_std_11) 72 | target_sources(tflite_micro_compiler 73 | PRIVATE ${ALL_SOURCES} 74 | PRIVATE ${COMPILER_SRCS} 75 | ) 76 | target_include_directories(tflite_micro_compiler 77 | PRIVATE ${COMPILER_HEADERS} 78 | PRIVATE ${ALL_INCLUDES} 79 | ) 80 | install(TARGETS tflite_micro_compiler DESTINATION ${INSTALL_DIR}) 81 | -------------------------------------------------------------------------------- /tflite_micro_compiler/common.mk: -------------------------------------------------------------------------------- 1 | CXXFLAGS=-g -std=c++14 -DTF_LITE_STATIC_MEMORY -DNDEBUG -O3 -DTF_LITE_DISABLE_X86_NEON -DSUFFICIENT_ARENA_SIZE=128\*1024\*1024 \ 2 | -I$(TF_DIR) -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/ \ 3 | -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/gemmlowp \ 4 | -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include \ 5 | -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/ruy \ 6 | -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/kissfft 7 | 8 | LDOPTS=-L $(TF_DIR)/tensorflow/lite/micro/tools/make/gen/$(HOST_OS_BUILD)/lib 9 | 10 | 11 | ifeq ($(OS),Windows_NT) 12 | LIBS=-ltensorflow-microlite 13 | HOST_OS_BUILD=windows_x86_64 14 | else 15 | LIBS=-ltensorflow-microlite -ldl 16 | HOST_OS_BUILD=linux_x86_64 17 | endif 18 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/.gitignore: -------------------------------------------------------------------------------- 1 | cifar10 2 | cifar10_compiled 3 | mobilenet 4 | mobilenet_compiled 5 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | MESSAGE(STATUS "Micro compiler examples included") 2 | MACRO(ADD_EXAMPLE TARGET_NAME) 3 | ADD_EXECUTABLE(${TARGET_NAME} ${TARGET_NAME}.cc ${ARGV}) 4 | TARGET_LINK_LIBRARIES(${TARGET_NAME} PUBLIC ${PROJECT_NAME}) 5 | ENDMACRO() 6 | 7 | 8 | ADD_EXAMPLE(hello_world hello_world_model.cc) 9 | ADD_EXAMPLE(mobilenet mobilenet_v1_0_25_160_quantized.c gnu.c) 10 | ADD_EXAMPLE(cifar10 cifar10_model.c truck.c) 11 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/Makefile: -------------------------------------------------------------------------------- 1 | TF_DIR=../../tensorflow 2 | include ../common.mk 3 | 4 | 5 | all: hello_world hello_world_compiled mobilenet mobilenet_compiled cifar10 cifar10_compiled 6 | 7 | clean: 8 | $(RM) *.o hello_world hello_world_compiled mobilnet mobilnet_compiled cifar10 cifar10_compiled 9 | 10 | mobilenet: mobilenet.o mobilenet_v1_0_25_160_quantized.o gnu.o 11 | $(CXX) -o $@ $^ $(LDOPTS) $(LIBS) 12 | 13 | mobilenet_compiled: mobilenet2.o compiled_mobilenet.o gnu.o 14 | $(CXX) -o $@ $^ $(LDOPTS) $(LIBS) 15 | 16 | hello_world: hello_world.o hello_world_model.o 17 | $(CXX) -o $@ $^ $(LDOPTS) $(LIBS) 18 | 19 | hello_world_compiled: hello_world2.o compiled_hello_world.o 20 | $(CXX) -o $@ $^ $(LDOPTS) $(LIBS) 21 | 22 | hello_world_model.o: hello_world_model.cc 23 | $(CXX) -o $@ -c $^ $(CXXFLAGS) 24 | 25 | cifar10: cifar10_model.o truck.o cifar10.o 26 | $(CXX) -o $@ $^ $(LDOPTS) $(LIBS) 27 | 28 | cifar10_compiled: truck.o compiled_cifar10.o cifar10_run_comp.o 29 | $(CXX) -o $@ $^ $(LDOPTS) $(LIBS) 30 | 31 | lstm_compiled: lstm_test.o lstm_compiled.o 32 | $(CXX) -o $@ $^ ${LIBS} 33 | 34 | regenerate: ../compiler 35 | ../compiler hello_world.tflite compiled_hello.cpp hello_ 36 | ../compiler mobilenet_v1_0_25_160_quantized.tflite compiled_mobilenet.cpp mobilenet_ 37 | ../compiler cifar10.tflite cifar10_compiled.cc cifar_ 38 | ../compiler lstm2.tflite lstm_compiled.cc lstm_ 39 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/Makefile.inc: -------------------------------------------------------------------------------- 1 | 2 | MOBILENET_SRCS := \ 3 | examples/mobilenet.cc examples/mobilenet_v1_0_25_160_quantized.c examples/gnu.c 4 | 5 | MOBILENET_COMPILED_SRCS := \ 6 | examples/mobilenet2.cc examples/compiled_mobilenet.cc examples/gnu.c 7 | 8 | MOBILENET_HDRS := 9 | MOBILENET_COMPILED_HDRS := 10 | 11 | HELLO_WORLD_SRC := \ 12 | examples/hello_world.cc examples/hello_world_model.cc 13 | HELLO_WORLD_HDRS := 14 | 15 | HELLO_WORLD_COMPILED_SRC := \ 16 | examples/hello_world2.cc examples/compiled_hello_world.cc 17 | HELLO_WORLD_COMPILED_HDRS := 18 | 19 | 20 | CIFAR10_SRC := \ 21 | examples/cifar10.cc examples/cifar10_model.cc examples/truck.c 22 | CIFAR10_HDRS := 23 | 24 | 25 | CIFAR10_COMPILED_SRC := \ 26 | examples/cifar10_compiled.cc examples/cifar10_run_comp.cc examples/truck.c 27 | CIFAR10_COMPILED_HDRS := 28 | 29 | $(info Adding mobilenet mobilenet_compiled ) 30 | 31 | # Builds a standalone binary. 32 | $(eval $(call microlite_test,mobilenet,\ 33 | $(MOBILENET_SRCS),$(MOBILENET_HDRS))) 34 | 35 | $(eval $(call microlite_test,mobilenet_compiled,\ 36 | $(MOBILENET_COMPILED_SRCS),$(MOBILENET_COMPILED_HDRS))) 37 | 38 | $(info Adding cifar10 cifar10_compiled ) 39 | $(eval $(call microlite_test,cifar10,\ 40 | $(CIFAR10_SRC),$(CIFAR10_HDRS))) 41 | 42 | $(eval $(call microlite_test,cifar10_compiled,\ 43 | $(CIFAR10_SRC),$(CIFAR10_HDRS))) 44 | 45 | $(info Adding hello_world ) 46 | $(eval $(call microlite_test,hello_world,\ 47 | $(HELLO_WORLD_SRC),$(HELLO_WORLD_HDRS))) 48 | 49 | $(eval $(call microlite_test,hello_world_compiled,\ 50 | $(HELLO_WORLD_COMPILED_SRC),$(HELLO_WORLD_COMPILED_HDRS))) -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/Model_source.txt: -------------------------------------------------------------------------------- 1 | https://www.tensorflow.org/tutorials/images/cnn 2 | https://storage.googleapis.com/tfhub-lite-models/tensorflow/lite-model/mobilenet_v1_0.25_160_quantized/1/default/1.tflite 3 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/cifar10.cc: -------------------------------------------------------------------------------- 1 | 2 | #include // for check output 3 | 4 | #include "tensorflow/lite/micro/all_ops_resolver.h" 5 | #include "tensorflow/lite/micro/micro_error_reporter.h" 6 | #include "tensorflow/lite/micro/micro_interpreter.h" 7 | #include "tensorflow/lite/micro/simple_memory_allocator.h" 8 | #include "tensorflow/lite/schema/schema_generated.h" 9 | #include "tensorflow/lite/version.h" 10 | 11 | // Create an area of memory to use for input, output, and intermediate arrays. 12 | // The size of this will depend on the model you're using, and may need to be 13 | // determined by experimentation. 14 | static const int tensor_arena_size = 150 * 1000; 15 | static uint8_t tensor_arena[tensor_arena_size]; 16 | 17 | extern "C" const unsigned char cifar10_tflite[]; 18 | extern "C" const unsigned char truck[]; 19 | extern "C" const int cifar10_tflite_len; 20 | 21 | // Set up logging. 22 | static tflite::ErrorReporter* error_reporter = nullptr; 23 | // This pulls in all the operation implementations we need. 24 | static tflite::AllOpsResolver* resolver = nullptr; 25 | static const tflite::Model* model = nullptr; 26 | static tflite::MicroInterpreter* interpreter = nullptr; 27 | 28 | void init(void) { 29 | static tflite::MicroErrorReporter micro_error_reporter; 30 | error_reporter = µ_error_reporter; 31 | 32 | // Map the model into a usable data structure. This doesn't involve any 33 | // copying or parsing, it's a very lightweight operation. 34 | model = ::tflite::GetModel(cifar10_tflite); 35 | if (model->version() != TFLITE_SCHEMA_VERSION) { 36 | TF_LITE_REPORT_ERROR(error_reporter, 37 | "Model provided is schema version %d not equal " 38 | "to supported version %d.\n", 39 | model->version(), TFLITE_SCHEMA_VERSION); 40 | return; 41 | } 42 | static tflite::AllOpsResolver local_resolver; 43 | resolver = &local_resolver; 44 | 45 | // Build an interpreter to run the model with. 46 | static tflite::MicroInterpreter static_interpreter( 47 | model, *resolver, tensor_arena, tensor_arena_size, error_reporter); 48 | interpreter = &static_interpreter; 49 | TfLiteStatus allocate_status = interpreter->AllocateTensors(); 50 | if (allocate_status != kTfLiteOk) { 51 | TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed"); 52 | return; 53 | } 54 | } 55 | 56 | void run() { 57 | TfLiteTensor* model_input = interpreter->input(0); 58 | for (uint32_t i = 0; i < 32 * 32 * 3; ++i) 59 | model_input->data.f[i] = truck[i] / 255.0f; 60 | 61 | TfLiteStatus invoke_status = interpreter->Invoke(); 62 | if (invoke_status != kTfLiteOk) { 63 | TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed"); 64 | } 65 | TfLiteTensor* model_output = interpreter->output(0); 66 | for (int i = 0; i < model_output->dims->data[1]; ++i) 67 | std::cerr << model_output->data.f[i] << ", "; 68 | std::cerr << std::endl; 69 | } 70 | 71 | int main(int argc, char** argv) { 72 | init(); 73 | run(); 74 | return 0; 75 | } 76 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/cifar10.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/cifar10.tflite -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/cifar10_run_comp.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2020 Christof Petig. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #include 17 | 18 | #include "tensorflow/lite/c/common.h" 19 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" 20 | 21 | extern "C" const unsigned char truck[]; 22 | 23 | extern void cifar_init(); 24 | extern void cifar_invoke(); 25 | extern TfLiteTensor* cifar_input(int index = 0); 26 | extern TfLiteTensor* cifar_output(int index = 0); 27 | 28 | void test_compiled(void) { 29 | float* in = cifar_input()->data.f; 30 | for (uint32_t i = 0; i < 32 * 32 * 3; ++i) in[i] = truck[i] / 255.0f; 31 | float* out = cifar_output()->data.f; 32 | cifar_invoke(); 33 | for (uint32_t i = 0; i < 10; ++i) std::cerr << out[i] << ", "; 34 | std::cerr << std::endl; 35 | } 36 | 37 | int main(int argc, char** argv) { 38 | cifar_init(); 39 | test_compiled(); 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/compiled_cifar10.cc.h: -------------------------------------------------------------------------------- 1 | // This file is generated. Do not edit. 2 | // Generated on: 11.08.2020 11:26:36 3 | 4 | #ifndef cifar_GEN_H 5 | #define cifar_GEN_H 6 | 7 | #include "tensorflow/lite/c/common.h" 8 | 9 | // Sets up the model with init and prepare steps. 10 | TfLiteStatus cifar_init(); 11 | // Returns the input tensor with the given index. 12 | TfLiteTensor *cifar_input(int index); 13 | // Returns the output tensor with the given index. 14 | TfLiteTensor *cifar_output(int index); 15 | // Runs inference for the model. 16 | TfLiteStatus cifar_invoke(); 17 | 18 | // Returns the number of input tensors. 19 | inline size_t cifar_inputs() { return 1; } 20 | // Returns the number of output tensors. 21 | inline size_t cifar_outputs() { return 1; } 22 | 23 | inline void *cifar_input_ptr(int index) { 24 | return cifar_input(index)->data.data; 25 | } 26 | inline size_t cifar_input_size(int index) { return cifar_input(index)->bytes; } 27 | inline int cifar_input_dims_len(int index) { 28 | return cifar_input(index)->dims->data[0]; 29 | } 30 | inline int *cifar_input_dims(int index) { 31 | return &cifar_input(index)->dims->data[1]; 32 | } 33 | 34 | inline void *cifar_output_ptr(int index) { 35 | return cifar_output(index)->data.data; 36 | } 37 | inline size_t cifar_output_size(int index) { 38 | return cifar_output(index)->bytes; 39 | } 40 | inline int cifar_output_dims_len(int index) { 41 | return cifar_output(index)->dims->data[0]; 42 | } 43 | inline int *cifar_output_dims(int index) { 44 | return &cifar_output(index)->dims->data[1]; 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/compiled_hello_world.cc.h: -------------------------------------------------------------------------------- 1 | // This file is generated. Do not edit. 2 | // Generated on: 11.08.2020 11:26:36 3 | 4 | #ifndef hello_GEN_H 5 | #define hello_GEN_H 6 | 7 | #include "tensorflow/lite/c/common.h" 8 | 9 | // Sets up the model with init and prepare steps. 10 | TfLiteStatus hello_init(); 11 | // Returns the input tensor with the given index. 12 | TfLiteTensor *hello_input(int index); 13 | // Returns the output tensor with the given index. 14 | TfLiteTensor *hello_output(int index); 15 | // Runs inference for the model. 16 | TfLiteStatus hello_invoke(); 17 | 18 | // Returns the number of input tensors. 19 | inline size_t hello_inputs() { return 1; } 20 | // Returns the number of output tensors. 21 | inline size_t hello_outputs() { return 1; } 22 | 23 | inline void *hello_input_ptr(int index) { 24 | return hello_input(index)->data.data; 25 | } 26 | inline size_t hello_input_size(int index) { return hello_input(index)->bytes; } 27 | inline int hello_input_dims_len(int index) { 28 | return hello_input(index)->dims->data[0]; 29 | } 30 | inline int *hello_input_dims(int index) { 31 | return &hello_input(index)->dims->data[1]; 32 | } 33 | 34 | inline void *hello_output_ptr(int index) { 35 | return hello_output(index)->data.data; 36 | } 37 | inline size_t hello_output_size(int index) { 38 | return hello_output(index)->bytes; 39 | } 40 | inline int hello_output_dims_len(int index) { 41 | return hello_output(index)->dims->data[0]; 42 | } 43 | inline int *hello_output_dims(int index) { 44 | return &hello_output(index)->dims->data[1]; 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/compiled_mobilenet.cc.h: -------------------------------------------------------------------------------- 1 | // This file is generated. Do not edit. 2 | // Generated on: 11.08.2020 11:26:36 3 | 4 | #ifndef mobilenet_GEN_H 5 | #define mobilenet_GEN_H 6 | 7 | #include "tensorflow/lite/c/common.h" 8 | 9 | // Sets up the model with init and prepare steps. 10 | TfLiteStatus mobilenet_init(); 11 | // Returns the input tensor with the given index. 12 | TfLiteTensor *mobilenet_input(int index); 13 | // Returns the output tensor with the given index. 14 | TfLiteTensor *mobilenet_output(int index); 15 | // Runs inference for the model. 16 | TfLiteStatus mobilenet_invoke(); 17 | 18 | // Returns the number of input tensors. 19 | inline size_t mobilenet_inputs() { return 1; } 20 | // Returns the number of output tensors. 21 | inline size_t mobilenet_outputs() { return 1; } 22 | 23 | inline void *mobilenet_input_ptr(int index) { 24 | return mobilenet_input(index)->data.data; 25 | } 26 | inline size_t mobilenet_input_size(int index) { 27 | return mobilenet_input(index)->bytes; 28 | } 29 | inline int mobilenet_input_dims_len(int index) { 30 | return mobilenet_input(index)->dims->data[0]; 31 | } 32 | inline int *mobilenet_input_dims(int index) { 33 | return &mobilenet_input(index)->dims->data[1]; 34 | } 35 | 36 | inline void *mobilenet_output_ptr(int index) { 37 | return mobilenet_output(index)->data.data; 38 | } 39 | inline size_t mobilenet_output_size(int index) { 40 | return mobilenet_output(index)->bytes; 41 | } 42 | inline int mobilenet_output_dims_len(int index) { 43 | return mobilenet_output(index)->dims->data[0]; 44 | } 45 | inline int *mobilenet_output_dims(int index) { 46 | return &mobilenet_output(index)->dims->data[1]; 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/custom/.gitignore: -------------------------------------------------------------------------------- 1 | libtflite_micro_custom.so 2 | *.o 3 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/custom/Makefile: -------------------------------------------------------------------------------- 1 | TF_DIR=../../../tensorflow 2 | CXXFLAGS=-fPIC -g -std=c++11 -DTF_LITE_STATIC_MEMORY -DNDEBUG -O3 -DTF_LITE_DISABLE_X86_NEON \ 3 | -I${TF_DIR} -I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/ \ 4 | -I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/gemmlowp \ 5 | -I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include \ 6 | -I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/ruy \ 7 | -I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/kissfft 8 | 9 | libtflite_micro_custom.so: registration.o fake_implementations.o custom_implementations.o 10 | $(CXX) --shared -o $@ $^ 11 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/custom/custom.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/custom/custom.tflite -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/custom/custom_implementations.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "tensorflow/lite/c/builtin_op_data.h" 3 | #include "tensorflow/lite/micro/kernels/all_ops_resolver.h" 4 | 5 | namespace tflite { 6 | namespace ops { 7 | namespace micro { 8 | namespace complex { 9 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) 10 | { 11 | return kTfLiteOk; 12 | } 13 | } // namespace reduce_max 14 | TfLiteRegistration *Register_Complex(void) { 15 | static TfLiteRegistration res = { 16 | nullptr, 17 | nullptr, 18 | nullptr, 19 | complex::Eval, 20 | }; 21 | return &res; 22 | } 23 | namespace imag { 24 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) 25 | { 26 | return kTfLiteOk; 27 | } 28 | } // namespace reduce_max 29 | TfLiteRegistration *Register_Imag(void) { 30 | static TfLiteRegistration res = { 31 | nullptr, 32 | nullptr, 33 | nullptr, 34 | imag::Eval, 35 | }; 36 | return &res; 37 | } 38 | } // namespace micro 39 | } // namespace ops 40 | } // namespace tflite 41 | 42 | void register_addons2(tflite::ops::micro::AllOpsResolver *res) { 43 | res->AddCustom("Complex", tflite::ops::micro::Register_Complex()); 44 | res->AddCustom("Imag", tflite::ops::micro::Register_Imag()); 45 | } 46 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/custom/fake_implementations.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "tensorflow/lite/c/builtin_op_data.h" 3 | #include "tensorflow/lite/micro/all_ops_resolver.h" 4 | 5 | namespace tflite { 6 | namespace ops { 7 | namespace micro { 8 | namespace reduce_max { 9 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; } 10 | } // namespace reduce_max 11 | TfLiteRegistration *Register_REDUCE_MAX(void) { 12 | static TfLiteRegistration res = { 13 | nullptr, 14 | nullptr, 15 | nullptr, 16 | reduce_max::Eval, 17 | }; 18 | return &res; 19 | } 20 | namespace exp { 21 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; } 22 | } // namespace exp 23 | TfLiteRegistration *Register_EXP(void) { 24 | static TfLiteRegistration res = { 25 | nullptr, 26 | nullptr, 27 | nullptr, 28 | exp::Eval, 29 | }; 30 | return &res; 31 | } 32 | namespace sum { 33 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; } 34 | } // namespace sum 35 | TfLiteRegistration *Register_SUM(void) { 36 | static TfLiteRegistration res = { 37 | nullptr, 38 | nullptr, 39 | nullptr, 40 | sum::Eval, 41 | }; 42 | return &res; 43 | } 44 | namespace div { 45 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; } 46 | } // namespace div 47 | TfLiteRegistration *Register_DIV(void) { 48 | static TfLiteRegistration res = { 49 | nullptr, 50 | nullptr, 51 | nullptr, 52 | div::Eval, 53 | }; 54 | return &res; 55 | } 56 | namespace squeeze { 57 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; } 58 | } // namespace squeeze 59 | TfLiteRegistration *Register_SQUEEZE(void) { 60 | static TfLiteRegistration res = { 61 | nullptr, 62 | nullptr, 63 | nullptr, 64 | squeeze::Eval, 65 | }; 66 | return &res; 67 | } 68 | } // namespace micro 69 | } // namespace ops 70 | } // namespace tflite 71 | 72 | void register_addons(tflite::AllOpsResolver *res) { 73 | res->AddBuiltin(tflite::BuiltinOperator_REDUCE_MAX, 74 | tflite::ops::micro::Register_REDUCE_MAX()); 75 | res->AddBuiltin(tflite::BuiltinOperator_EXP, 76 | tflite::ops::micro::Register_EXP()); 77 | res->AddBuiltin(tflite::BuiltinOperator_SUM, 78 | tflite::ops::micro::Register_SUM()); 79 | res->AddBuiltin(tflite::BuiltinOperator_DIV, 80 | tflite::ops::micro::Register_DIV()); 81 | res->AddBuiltin(tflite::BuiltinOperator_SQUEEZE, 82 | tflite::ops::micro::Register_SQUEEZE()); 83 | } 84 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/custom/registration.cpp: -------------------------------------------------------------------------------- 1 | #include "tensorflow/lite/micro/kernels/all_ops_resolver.h" 2 | #include 3 | 4 | extern void register_addons(tflite::ops::micro::AllOpsResolver *res); 5 | extern void register_addons2(tflite::ops::micro::AllOpsResolver *res); 6 | 7 | // symbol needed inside this dll 8 | int tflite::ErrorReporter::Report(const char* format, ...) { 9 | va_list va; 10 | va_start(va, format); 11 | vfprintf(stderr, format, va); 12 | va_end(va); 13 | return 0; 14 | } 15 | 16 | extern "C" TfLiteStatus register_custom(tflite::ops::micro::AllOpsResolver *res) { 17 | register_addons(res); 18 | register_addons2(res); 19 | return kTfLiteOk; 20 | } 21 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/generic_test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.2 FATAL_ERROR) 2 | PROJECT(generic_test) 3 | 4 | SET(TF_DIR "../../../tensorflow" CACHE STRING "TensorFlow source directory") 5 | SET(TFL_SRC ${TF_DIR}/tensorflow/lite) 6 | SET(TFLM_SRC ${TFL_SRC}/micro) 7 | SET(TFLMD_SRC ${TFLM_SRC}/tools/make/downloads) 8 | 9 | ADD_EXECUTABLE(${PROJECT_NAME} 10 | generic_test.cpp 11 | ../../build/out.cpp 12 | ) 13 | 14 | TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC 15 | ${TF_DIR} 16 | ${TFLMD_SRC}/flatbuffers/include 17 | ${TFLMD_SRC}/ruy 18 | ../../build 19 | ) 20 | 21 | IF(WIN32) 22 | TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib) 23 | ELSE() 24 | TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib) 25 | ENDIF() 26 | TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC tensorflow-microlite) 27 | 28 | TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC 29 | TF_LITE_STATIC_MEMORY 30 | TF_LITE_DISABLE_X86_NEON 31 | ) 32 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/generic_test/generic_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "out.cpp.h" 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc != 2) { 10 | std::cerr << "Usage: " << argv[0] << " inDataFile\n"; 11 | return 1; 12 | } 13 | 14 | if (model_inputs() != 1 || model_outputs() != 1) { 15 | std::cerr << "Mismatch for number of inputs/outputs\n"; 16 | return 1; 17 | } 18 | 19 | std::ifstream inFile(argv[1], std::ios::binary); 20 | 21 | model_init(); 22 | 23 | std::vector inData(model_input_size(0)); 24 | if (!inFile.read((char *)model_input_ptr(0), model_input_size(0))) { 25 | std::cerr << "Failed to read input file\n"; 26 | return 1; 27 | } 28 | 29 | model_invoke(); 30 | for (size_t i = 0; i < model_output_size(0); i++) { 31 | std::cout << "\\x" << std::setw(2) << std::setfill('0') << std::hex 32 | << (int)((unsigned char *)model_output_ptr(0))[i]; 33 | } 34 | std::cout << std::endl; 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/hello_world.cc: -------------------------------------------------------------------------------- 1 | 2 | #include // for check output 3 | 4 | #include "tensorflow/lite/micro/all_ops_resolver.h" 5 | #include "tensorflow/lite/micro/micro_error_reporter.h" 6 | #include "tensorflow/lite/micro/micro_interpreter.h" 7 | #include "tensorflow/lite/micro/simple_memory_allocator.h" 8 | #include "tensorflow/lite/schema/schema_generated.h" 9 | #include "tensorflow/lite/version.h" 10 | 11 | // Create an area of memory to use for input, output, and intermediate arrays. 12 | // The size of this will depend on the model you're using, and may need to be 13 | // determined by experimentation. 14 | static const int tensor_arena_size = 6 * 1024; 15 | static uint8_t tensor_arena[tensor_arena_size]; 16 | 17 | extern const unsigned char g_model[]; 18 | // extern const int g_model_len; 19 | 20 | // Set up logging. 21 | static tflite::ErrorReporter* error_reporter = nullptr; 22 | // This pulls in all the operation implementations we need. 23 | static tflite::AllOpsResolver* resolver = nullptr; 24 | static const tflite::Model* model = nullptr; 25 | static tflite::MicroInterpreter* interpreter = nullptr; 26 | 27 | void init(void) { 28 | static tflite::MicroErrorReporter micro_error_reporter; 29 | error_reporter = µ_error_reporter; 30 | 31 | // Map the model into a usable data structure. This doesn't involve any 32 | // copying or parsing, it's a very lightweight operation. 33 | model = ::tflite::GetModel(g_model); 34 | if (model->version() != TFLITE_SCHEMA_VERSION) { 35 | TF_LITE_REPORT_ERROR(error_reporter, 36 | "Model provided is schema version %d not equal " 37 | "to supported version %d.\n", 38 | model->version(), TFLITE_SCHEMA_VERSION); 39 | return; 40 | } 41 | static tflite::AllOpsResolver local_resolver; 42 | resolver = &local_resolver; 43 | 44 | // Build an interpreter to run the model with. 45 | static tflite::MicroInterpreter static_interpreter( 46 | model, *resolver, tensor_arena, tensor_arena_size, error_reporter); 47 | interpreter = &static_interpreter; 48 | TfLiteStatus allocate_status = interpreter->AllocateTensors(); 49 | if (allocate_status != kTfLiteOk) { 50 | TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed"); 51 | return; 52 | } 53 | } 54 | 55 | void run() { 56 | TfLiteTensor* model_input = interpreter->input(0); 57 | model_input->data.f[0] = 1.57f; // roughly PI/2 58 | 59 | TfLiteStatus invoke_status = interpreter->Invoke(); 60 | if (invoke_status != kTfLiteOk) { 61 | TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed"); 62 | } 63 | TfLiteTensor* model_output = interpreter->output(0); 64 | std::cerr << "result " << model_output->data.f[0] << std::endl; 65 | } 66 | 67 | int main(int argc, char** argv) { 68 | init(); 69 | run(); 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/hello_world.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/hello_world.tflite -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/hello_world2.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2020 Christof Petig. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #include 17 | 18 | #include "compiled_hello_world.cc.h" 19 | #include "tensorflow/lite/c/common.h" 20 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" 21 | 22 | void test_compiled(void) { 23 | hello_init(); 24 | tflite::GetTensorData(hello_input(0))[0] = 1.57f; 25 | hello_invoke(); 26 | float out = tflite::GetTensorData(hello_output(0))[0]; 27 | std::cerr << "result " << out << std::endl; 28 | } 29 | 30 | int main(int argc, char** argv) { 31 | test_compiled(); 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/lstm2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import random 3 | import math 4 | import numpy as np 5 | import tensorflow as tf 6 | from tensorflow.keras.models import Sequential 7 | from tensorflow.keras.layers import Dense, Dropout, LSTM 8 | 9 | train_batches=2000 10 | eval_batches=50 11 | train_sequlen=32 12 | train_inputs=1 13 | lstm_states=6 14 | #activation="relu" 15 | activation=None 16 | rec_activation="hard_sigmoid" 17 | 18 | x_train = np.zeros((train_batches*train_sequlen,1,train_inputs)) 19 | y_train = np.zeros((train_batches*train_sequlen,1,1)) 20 | x_test = np.zeros((eval_batches*train_sequlen,1,train_inputs)) 21 | y_test = np.zeros((eval_batches*train_sequlen,1,1)) 22 | 23 | random.seed(1234) 24 | 25 | # generate input of random sine waves, feed one at a time to the network 26 | 27 | def random_sample(): 28 | ampl = random.uniform(0.5,1) 29 | freq = random.uniform(18,32) 30 | phase= random.uniform(-math.pi,math.pi) 31 | return (ampl,freq,phase) 32 | 33 | def waveform(ampl,freq,phase,idx): 34 | return ampl*math.sin(idx/freq*2*math.pi+phase) 35 | 36 | # calculate train data 37 | for i in range(train_batches): 38 | (ampl,freq,phase) = random_sample() 39 | for j in range(train_sequlen): # subsequent measurements 40 | for k in range(train_inputs): 41 | x_train[i*train_sequlen+j][0][k]=waveform(ampl,freq,phase,j+k) 42 | y_train[i*train_sequlen+j][0]=waveform(ampl,freq,phase,j+train_inputs) 43 | for i in range(eval_batches): 44 | (ampl,freq,phase) = random_sample() 45 | for j in range(train_sequlen): # subsequent measurements 46 | for k in range(train_inputs): 47 | x_test[i*train_sequlen+j][0][k]=waveform(ampl,freq,phase,j+k) 48 | y_test[i*train_sequlen+j][0]=waveform(ampl,freq,phase,j+train_inputs) 49 | 50 | print(x_train[0][0:5], y_train[0][0:5]) 51 | print(x_train.shape, y_train.shape) 52 | print(x_test.shape, y_test.shape) 53 | 54 | def create_model(train=True): 55 | 56 | if train: 57 | input0 = tf.keras.Input(batch_shape=(train_sequlen,1,train_inputs)) 58 | # stateful is worse 59 | x = LSTM(lstm_states, recurrent_activation=rec_activation, activation=activation, return_sequences=False, return_state=False, stateful=False)(input0) 60 | #x = Dropout(0.1)(x) makes it a bit worse 61 | else: 62 | input0 = tf.keras.Input(batch_shape=(1,1,train_inputs),name="data") 63 | input1 = tf.keras.Input(batch_shape=(1,lstm_states),name="state_h") 64 | input2 = tf.keras.Input(batch_shape=(1,lstm_states),name="state_c") 65 | x, state,state2 = LSTM(lstm_states, recurrent_activation=rec_activation, activation=activation, return_sequences=False, return_state=True, stateful=True, unroll=True)(input0, initial_state=(input1, input2)) 66 | 67 | x = Dense(units=1)(x) 68 | 69 | if train: 70 | model = tf.keras.Model(inputs=input0, outputs=x, name="sine") 71 | else: 72 | model = tf.keras.Model(inputs=(input0,input1,input2), outputs=(x,state,state2), name="sine") 73 | model.summary() 74 | return model 75 | 76 | model=create_model() 77 | 78 | model.compile(loss='mean_squared_error', optimizer='adam') 79 | 80 | for i in range(8): 81 | model.fit(x_train, y_train, epochs=1, batch_size=train_sequlen, verbose=1, shuffle=False, 82 | validation_data=(x_test,y_test)) 83 | model.reset_states() 84 | 85 | model.save('mymodel') 86 | model.save('mymodel_w.h5', save_format="h5") 87 | 88 | model2= create_model(False) 89 | model2.load_weights('mymodel_w.h5') 90 | model2.save('evalmodel.h5', save_format="h5") 91 | 92 | model2.compile(loss='mean_squared_error', optimizer='adam') 93 | 94 | state_h2 = np.zeros((1,lstm_states)) 95 | state_c2 = np.zeros((1,lstm_states)) 96 | for i in range(train_sequlen): 97 | testx, testy = x_test[i], y_test[i] 98 | testx = testx.reshape(1, 1, 1) 99 | res = model2.predict([testx,state_h2,state_c2], batch_size=1) 100 | print('In=%.1f, Expected=%.1f, Predicted=%.1f' % (testx[0][0][0], testy, res[0])) 101 | state_h2=res[1] 102 | state_c2=res[2] 103 | 104 | # to convert to tflite use 105 | # tflite_convert --keras_model_file evalmodel.h5 --output_file evalmodel.tflite --inference_type FLOAT 106 | # from tensorflow 1.15 (2.2 doesn't work) 107 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/lstm2.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/lstm2.tflite -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/lstm_compiled.cc.h: -------------------------------------------------------------------------------- 1 | // This file is generated. Do not edit. 2 | // Generated on: 12.08.2020 18:54:29 3 | 4 | #ifndef lstm_GEN_H 5 | #define lstm_GEN_H 6 | 7 | #include "tensorflow/lite/c/common.h" 8 | 9 | // Sets up the model with init and prepare steps. 10 | TfLiteStatus lstm_init(); 11 | // Returns the input tensor with the given index. 12 | TfLiteTensor *lstm_input(int index); 13 | // Returns the output tensor with the given index. 14 | TfLiteTensor *lstm_output(int index); 15 | // Runs inference for the model. 16 | TfLiteStatus lstm_invoke(); 17 | 18 | // Returns the number of input tensors. 19 | inline size_t lstm_inputs() { return 3; } 20 | // Returns the number of output tensors. 21 | inline size_t lstm_outputs() { return 3; } 22 | 23 | inline void *lstm_input_ptr(int index) { return lstm_input(index)->data.data; } 24 | inline size_t lstm_input_size(int index) { return lstm_input(index)->bytes; } 25 | inline int lstm_input_dims_len(int index) { 26 | return lstm_input(index)->dims->data[0]; 27 | } 28 | inline int *lstm_input_dims(int index) { 29 | return &lstm_input(index)->dims->data[1]; 30 | } 31 | 32 | inline void *lstm_output_ptr(int index) { 33 | return lstm_output(index)->data.data; 34 | } 35 | inline size_t lstm_output_size(int index) { return lstm_output(index)->bytes; } 36 | inline int lstm_output_dims_len(int index) { 37 | return lstm_output(index)->dims->data[0]; 38 | } 39 | inline int *lstm_output_dims(int index) { 40 | return &lstm_output(index)->dims->data[1]; 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/lstm_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2020 Christof Petig. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #include 17 | #include 18 | 19 | #include "lstm_compiled.cc.h" 20 | #include "tensorflow/lite/c/common.h" 21 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" 22 | 23 | static float state_h[6], state_c[6]; 24 | 25 | static const float amplitude = 0.8; 26 | static const float wavelength = 16; 27 | static const float phase = -3.141593f / 2; // roughly -90deg 28 | 29 | float calculate_sine(uint32_t index) { 30 | return amplitude * sinf(index * (6.283185f / wavelength) + phase); 31 | } 32 | 33 | void test_compiled(void) { 34 | lstm_input(1)->data.f = state_h; 35 | lstm_input(2)->data.f = state_c; 36 | lstm_output(1)->data.f = state_h; // feed back to state 37 | lstm_output(2)->data.f = state_c; 38 | for (uint32_t i = 0; i < 30; ++i) { 39 | float in = calculate_sine(i); 40 | tflite::GetTensorData(lstm_input(0))[0] = in; 41 | lstm_invoke(); 42 | printf("input %.3f output %.3f\n", in, 43 | tflite::GetTensorData(lstm_output(0))[0]); 44 | } 45 | } 46 | 47 | int main(int argc, char** argv) { 48 | lstm_init(); 49 | test_compiled(); 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/mobilenet.cc: -------------------------------------------------------------------------------- 1 | 2 | #include // for check output 3 | 4 | #include "tensorflow/lite/micro/all_ops_resolver.h" 5 | #include "tensorflow/lite/micro/micro_error_reporter.h" 6 | #include "tensorflow/lite/micro/micro_interpreter.h" 7 | #include "tensorflow/lite/micro/simple_memory_allocator.h" 8 | #include "tensorflow/lite/schema/schema_generated.h" 9 | #include "tensorflow/lite/version.h" 10 | 11 | // Create an area of memory to use for input, output, and intermediate arrays. 12 | // The size of this will depend on the model you're using, and may need to be 13 | // determined by experimentation. 14 | static const int tensor_arena_size = 10 * 1024 * 1024; 15 | static uint8_t tensor_arena[tensor_arena_size]; 16 | 17 | extern "C" const unsigned char __1_tflite[]; 18 | // extern "C" const unsigned int __1_tflite_len; 19 | extern "C" const unsigned char gnu_ppm[]; 20 | 21 | // Set up logging. 22 | static tflite::ErrorReporter* error_reporter = nullptr; 23 | // This pulls in all the operation implementations we need. 24 | static tflite::AllOpsResolver* resolver = nullptr; 25 | static const tflite::Model* model = nullptr; 26 | static tflite::MicroInterpreter* interpreter = nullptr; 27 | 28 | void init(void) { 29 | static tflite::MicroErrorReporter micro_error_reporter; 30 | error_reporter = µ_error_reporter; 31 | 32 | // Map the model into a usable data structure. This doesn't involve any 33 | // copying or parsing, it's a very lightweight operation. 34 | model = ::tflite::GetModel(__1_tflite); 35 | if (model->version() != TFLITE_SCHEMA_VERSION) { 36 | TF_LITE_REPORT_ERROR(error_reporter, 37 | "Model provided is schema version %d not equal " 38 | "to supported version %d.\n", 39 | model->version(), TFLITE_SCHEMA_VERSION); 40 | return; 41 | } 42 | static tflite::AllOpsResolver local_resolver; 43 | resolver = &local_resolver; 44 | 45 | // Build an interpreter to run the model with. 46 | static tflite::MicroInterpreter static_interpreter( 47 | model, *resolver, tensor_arena, tensor_arena_size, error_reporter); 48 | interpreter = &static_interpreter; 49 | TfLiteStatus allocate_status = interpreter->AllocateTensors(); 50 | if (allocate_status != kTfLiteOk) { 51 | TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed"); 52 | return; 53 | } 54 | } 55 | 56 | // strictly this is no longer necessary at all 57 | void exit(void) { 58 | if (interpreter) { 59 | interpreter = 0; 60 | } 61 | if (resolver) { 62 | resolver = 0; 63 | } 64 | if (error_reporter) { 65 | error_reporter = 0; 66 | } 67 | model = 0; 68 | } 69 | 70 | void run() { 71 | TfLiteTensor* model_input = interpreter->input(0); 72 | memcpy(model_input->data.uint8, gnu_ppm, 160 * 160 * 3); 73 | 74 | TfLiteStatus invoke_status = interpreter->Invoke(); 75 | if (invoke_status != kTfLiteOk) { 76 | TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed"); 77 | } 78 | TfLiteTensor* model_output = interpreter->output(0); 79 | uint32_t best = 0; 80 | uint32_t bestval = model_output->data.uint8[0]; 81 | for (uint32_t i = 1; i < 1001; ++i) { 82 | if (model_output->data.uint8[i] > bestval) { 83 | bestval = model_output->data.uint8[i]; 84 | best = i; 85 | } 86 | } 87 | printf("Best match is %u with %d%%\n", best, (int)(bestval * 100 / 255)); 88 | } 89 | 90 | int main(int argc, char** argv) { 91 | init(); 92 | run(); 93 | exit(); 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/mobilenet2.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2020 Christof Petig. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #include 17 | 18 | #include "compiled_mobilenet.cc.h" 19 | #include "tensorflow/lite/c/common.h" 20 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" 21 | 22 | extern "C" const unsigned char gnu_ppm[]; 23 | 24 | void run() { 25 | TfLiteTensor* model_input = mobilenet_input(0); 26 | memcpy(model_input->data.uint8, gnu_ppm, 160 * 160 * 3); 27 | 28 | TfLiteStatus invoke_status = mobilenet_invoke(); 29 | if (invoke_status != kTfLiteOk) { 30 | fprintf(stderr, "Invoke failed\n"); 31 | } 32 | TfLiteTensor* model_output = mobilenet_output(0); 33 | uint32_t best = 0; 34 | uint32_t bestval = model_output->data.uint8[0]; 35 | for (uint32_t i = 1; i < 1001; ++i) { 36 | if (model_output->data.uint8[i] > bestval) { 37 | bestval = model_output->data.uint8[i]; 38 | best = i; 39 | } 40 | } 41 | printf("Best match is %u with %d%%\n", best, bestval * 100 / 256); 42 | } 43 | 44 | int main(int argc, char** argv) { 45 | mobilenet_init(); 46 | run(); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tflite_micro_compiler/examples/mobilenet_v1_0_25_160_quantized.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/mobilenet_v1_0_25_160_quantized.tflite -------------------------------------------------------------------------------- /tflite_micro_compiler/model_main.cpp: -------------------------------------------------------------------------------- 1 | #include "model.tflite.h" 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | if(model_init(NULL)){ 10 | printf("Error!\n"); 11 | } 12 | 13 | char *input_filename = argv[1]; 14 | 15 | xt::xarray input = xt::load_npy(input_filename); 16 | int8_t *in = model_input(0)->data.int8; 17 | for (int i=0;idata.int8; 27 | xt::xarray output; 28 | output.resize({model_output_size(n)}); 29 | 30 | for (int i=0;iwriteSource(out); 18 | } 19 | void TFLMC_Compiler::writeHeader(std::ostream &out) { 20 | compiler_->writeHeader(out); 21 | } 22 | 23 | // Returns a name that describes a tensors relation to network layers. 24 | std::string TFLMC_Compiler::getTensorName(int tensorIndex, int sg) const { 25 | return compiler_->getTensorName(tensorIndex, sg); 26 | } 27 | 28 | // Returns tensor arena size 29 | size_t TFLMC_Compiler::getTensorArenaSize() const { 30 | return compiler_->getTensorArenaSize(); 31 | } 32 | } // namespace tflmc -------------------------------------------------------------------------------- /tflite_micro_compiler/src/Api.h: -------------------------------------------------------------------------------- 1 | #ifndef TFLMCOMPILER_API_H 2 | #define TFLMCOMPILER_API_H 3 | 4 | #include 5 | 6 | #include "xcore_shared_config.h" 7 | 8 | namespace tflmc { 9 | 10 | class Compiler; 11 | 12 | class TFLMC_Compiler { 13 | public: 14 | TFLMC_Compiler(const void *modelData, 15 | const struct shared_config::xcore_metadata_t *sharedCfg, 16 | const std::string &versionString, 17 | const std::string &argsString, 18 | const std::string &prefix = "model_", 19 | const bool debugPrint = false); 20 | 21 | ~TFLMC_Compiler(); 22 | 23 | void writeSource(std::ostream &out); 24 | void writeHeader(std::ostream &out); 25 | 26 | // Returns a name that describes a tensors relation to network layers. 27 | std::string getTensorName(int tensorIndex, int sg) const; 28 | 29 | // Returns tensor arena size 30 | size_t getTensorArenaSize() const; 31 | 32 | private: 33 | Compiler *compiler_; 34 | }; 35 | 36 | } // namespace tflmc 37 | 38 | #endif -------------------------------------------------------------------------------- /tflite_micro_compiler/src/CodeWriter.h: -------------------------------------------------------------------------------- 1 | #ifndef TFLMCOMPILER_CODEWRITER_H 2 | #define TFLMCOMPILER_CODEWRITER_H 3 | 4 | #include 5 | 6 | #include "tensorflow/lite/micro/micro_interpreter.h" 7 | 8 | namespace tflmc { 9 | 10 | // Helper functions for top-level code generation. 11 | class CodeWriter { 12 | public: 13 | CodeWriter(std::ostream &out, const tflite_micro::SubGraph *subgraph); 14 | 15 | void writeBuiltin(tflite_micro::BuiltinOperator op, const void *data, 16 | const std::string &name); 17 | 18 | // Write IntArray with variable declaration. 19 | void writeIntArray(const TfLiteIntArray &arr, const std::string &name); 20 | // Write only the comma separated contents of an IntArray. 21 | void writeIntArrayData(const TfLiteIntArray &arr); 22 | 23 | void writeTensor(const TfLiteTensor &t, const std::string &name); 24 | 25 | void writeQuantization(const TfLiteQuantization &q, const std::string &name); 26 | 27 | #if TF_LITE_PACKED_QUANTIZED_DATA_VERSION == 100 28 | void writeQuantizationDetails(const TfLiteQuantization &q, 29 | const std::string &name); 30 | #endif 31 | 32 | template 33 | CodeWriter &operator<<(T &&value) { 34 | out_ << std::forward(value); 35 | return *this; 36 | } 37 | 38 | private: 39 | std::ostream &out_; 40 | const tflite_micro::SubGraph *subgraph_ = nullptr; 41 | }; 42 | 43 | } // namespace tflmc 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/Makefile.inc: -------------------------------------------------------------------------------- 1 | $(info Adding tflite-micro compiler) 2 | TFLITE_U_COMPILER_SRCS := \ 3 | src/CodeWriter.cc src/CustomOperators.cc src/MemMap.cc src/TypeToString.cc \ 4 | src/Compiler.cc src/main.cc src/RecordAllocations.cc 5 | 6 | TFLITE_U_COMPILER_HDRS := \ 7 | src/CodeWriter.h src/Compiler.h src/CustomOperators.h src/MemMap.h src/RecordAllocations.h src/TypeToString.h 8 | 9 | 10 | 11 | 12 | # Builds a standalone binary. 13 | $(eval $(call microlite_test,compiler,\ 14 | $(TFLITE_U_COMPILER_SRCS),$(TFLITE_U_COMPILER_HDRS))) 15 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/MemMap.cc: -------------------------------------------------------------------------------- 1 | #include "MemMap.h" 2 | 3 | void tflmc::MemMap::recordROM(ptrdiff_t offset, size_t len, 4 | const std::string &tag) { 5 | m_romEntries.push_back({offset, len, tag}); 6 | } 7 | 8 | void tflmc::MemMap::recordRAM(ptrdiff_t offset, size_t len, 9 | const std::string &tag) { 10 | m_ramEntries.push_back({offset, len, tag}); 11 | } 12 | 13 | static void PrintBar(const std::string &label, float start, float end) { 14 | static const int BAR_WIDTH = 100; 15 | static const int TEXT_LABEL_START = 3; 16 | 17 | if (start == -1.0f) { 18 | for (int i = 0; i < BAR_WIDTH + 2; i++) { 19 | printf("#"); 20 | } 21 | printf("\n"); 22 | return; 23 | } 24 | 25 | int barStart = start * BAR_WIDTH; 26 | int barEnd = end * BAR_WIDTH; 27 | bool smallBar = false; 28 | if (barStart == barEnd) { 29 | // Avoid zero width bars. 30 | barEnd++; 31 | smallBar = true; 32 | } 33 | 34 | int labelStart = TEXT_LABEL_START; 35 | int labelEnd = labelStart + label.size(); 36 | if (labelStart <= barEnd && labelEnd >= barStart) { 37 | // Avoid hiding bar with label. 38 | labelEnd = BAR_WIDTH - TEXT_LABEL_START; 39 | labelStart = labelEnd - label.size(); 40 | if (labelStart <= barEnd && labelEnd >= barStart) { 41 | // Still overlaps, center should be fine. 42 | labelStart = (BAR_WIDTH + label.size()) / 2; 43 | labelEnd = (BAR_WIDTH - label.size()) / 2; 44 | } 45 | } 46 | 47 | printf("#"); 48 | for (int i = 0; i < BAR_WIDTH; i++) { 49 | if (i >= labelStart && i < labelEnd) { 50 | printf("%c", label[i - labelStart]); 51 | } else if (i >= barStart && i < barEnd) { 52 | printf(smallBar ? "|" : "X"); 53 | } else { 54 | printf("."); 55 | } 56 | } 57 | printf("#\n"); 58 | } 59 | 60 | void tflmc::MemMap::report() const { 61 | size_t constSize = 0; 62 | size_t arenaSize = 0; 63 | for (const auto &entry : m_romEntries) { 64 | constSize = std::max(constSize, entry.base + entry.len); 65 | } 66 | for (const auto &entry : m_ramEntries) { 67 | arenaSize = std::max(arenaSize, entry.base + entry.len); 68 | } 69 | 70 | printf("ROM summary: %lu bytes total\n", constSize); 71 | PrintBar("", -1.0f, -1.0f); 72 | for (const auto &entry : m_romEntries) { 73 | PrintBar(entry.tag, entry.base / (float)constSize, 74 | (entry.base + entry.len) / (float)constSize); 75 | } 76 | PrintBar("", -1.0f, -1.0f); 77 | 78 | printf("RAM summary: %lu bytes total\n", arenaSize); 79 | PrintBar("", -1.0f, -1.0f); 80 | for (const auto &entry : m_ramEntries) { 81 | PrintBar(entry.tag, entry.base / (float)arenaSize, 82 | (entry.base + entry.len) / (float)arenaSize); 83 | } 84 | PrintBar("", -1.0f, -1.0f); 85 | } 86 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/MemMap.h: -------------------------------------------------------------------------------- 1 | #ifndef TFLMCOMPILER_MEMMAP_H 2 | #define TFLMCOMPILER_MEMMAP_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace tflmc { 9 | 10 | // Keeps track of buffers and prints a summary. 11 | class MemMap { 12 | public: 13 | void recordROM(ptrdiff_t offset, size_t len, const std::string &tag); 14 | void recordRAM(ptrdiff_t offset, size_t len, const std::string &tag); 15 | void report() const; 16 | 17 | private: 18 | struct Entry { 19 | ptrdiff_t base; 20 | size_t len; 21 | std::string tag; 22 | }; 23 | std::vector m_romEntries; 24 | std::vector m_ramEntries; 25 | }; 26 | 27 | } // namespace tflmc 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/TypeToString.cc: -------------------------------------------------------------------------------- 1 | #include "TypeToString.h" 2 | 3 | #include 4 | #include 5 | 6 | #define NAME(X) \ 7 | case X: \ 8 | return #X 9 | 10 | std::string tflmc::to_string(TfLiteType t) { 11 | switch (t) { 12 | NAME(kTfLiteFloat32); 13 | NAME(kTfLiteInt32); 14 | NAME(kTfLiteUInt8); 15 | NAME(kTfLiteInt64); 16 | NAME(kTfLiteString); 17 | NAME(kTfLiteBool); 18 | NAME(kTfLiteInt16); 19 | NAME(kTfLiteComplex64); 20 | NAME(kTfLiteInt8); 21 | NAME(kTfLiteFloat16); 22 | NAME(kTfLiteFloat64); 23 | default: 24 | throw std::runtime_error( 25 | "Missing case in TfLiteType to string conversion"); 26 | } 27 | } 28 | 29 | std::string tflmc::c_type(TfLiteType t) { 30 | switch (t) { 31 | case kTfLiteFloat32: 32 | return "float"; 33 | case kTfLiteInt32: 34 | return "int32_t"; 35 | case kTfLiteUInt8: 36 | return "uint8_t"; 37 | case kTfLiteInt64: 38 | return "int64_t"; 39 | // case kTfLiteString: return "float"; 40 | // case kTfLiteBool: return "float"; 41 | case kTfLiteInt16: 42 | return "int16_t"; 43 | // case kTfLiteComplex64: return "float"; 44 | case kTfLiteInt8: 45 | return "int8_t"; 46 | // case kTfLiteFloat16: return "float"; 47 | case kTfLiteFloat64: 48 | return "double"; 49 | default: 50 | throw std::runtime_error( 51 | "Missing case in TfLiteType to C type conversion"); 52 | } 53 | } 54 | 55 | std::string tflmc::to_string(TfLiteAllocationType t) { 56 | switch (t) { 57 | NAME(kTfLiteMmapRo); 58 | NAME(kTfLiteArenaRw); 59 | default: 60 | throw std::runtime_error( 61 | "Missing case in TfLiteAllocationType to string " 62 | "conversion"); 63 | } 64 | } 65 | 66 | std::string tflmc::to_string(TfLiteFusedActivation t) { 67 | switch (t) { 68 | NAME(kTfLiteActNone); 69 | NAME(kTfLiteActRelu); 70 | NAME(kTfLiteActReluN1To1); 71 | NAME(kTfLiteActRelu6); 72 | NAME(kTfLiteActTanh); 73 | NAME(kTfLiteActSignBit); 74 | NAME(kTfLiteActSigmoid); 75 | default: 76 | throw std::runtime_error( 77 | "Missing case in TfLiteFusedActivation to string conversion"); 78 | } 79 | } 80 | 81 | std::string tflmc::to_string(TfLiteFullyConnectedWeightsFormat t) { 82 | switch (t) { 83 | NAME(kTfLiteFullyConnectedWeightsFormatDefault); 84 | NAME(kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8); 85 | default: 86 | throw std::runtime_error( 87 | "Missing case in TfLiteFullyConnectedWeightsFormat to string " 88 | "conversion"); 89 | } 90 | } 91 | 92 | std::string tflmc::to_string(TfLitePadding t) { 93 | switch (t) { 94 | NAME(kTfLitePaddingUnknown); 95 | NAME(kTfLitePaddingSame); 96 | NAME(kTfLitePaddingValid); 97 | default: 98 | throw std::runtime_error( 99 | "Missing case in TfLitePadding to string conversion"); 100 | } 101 | } 102 | 103 | std::string tflmc::to_string(TfLitePaddingValues const& v) { 104 | std::stringstream out; 105 | out << "{ " << v.width << "," << v.height << ", " << v.width_offset << ", " 106 | << v.height_offset << " }"; 107 | return out.str(); 108 | } 109 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/TypeToString.h: -------------------------------------------------------------------------------- 1 | #ifndef TFLMCOMPILER_TYPETOSTRING_H 2 | #define TFLMCOMPILER_TYPETOSTRING_H 3 | 4 | #include 5 | 6 | #include "tensorflow/lite/c/builtin_op_data.h" 7 | 8 | namespace tflmc { 9 | 10 | std::string to_string(TfLiteType t); 11 | std::string c_type(TfLiteType t); 12 | std::string to_string(TfLiteAllocationType t); 13 | std::string to_string(TfLiteFusedActivation t); 14 | std::string to_string(TfLiteFullyConnectedWeightsFormat t); 15 | std::string to_string(TfLitePadding t); 16 | std::string to_string(TfLitePaddingValues const& v); 17 | 18 | } // namespace tflmc 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/main.cc: -------------------------------------------------------------------------------- 1 | #include "CodeWriter.h" 2 | #include "Compiler.h" 3 | 4 | int main(int argc, char *argv[]) { 5 | if (argc < 3 || argc > 4) { 6 | printf( 7 | "Usage: %s modelFile.tflite outFile.cpp [NamingPrefix = \"model_\"]\n", 8 | argv[0]); 9 | return 1; 10 | } 11 | 12 | std::string prefix = "model_"; 13 | if (argc == 4) { 14 | prefix = argv[3]; 15 | } 16 | 17 | if (!tflmc::CompileFile(argv[1], argv[2], prefix)) { 18 | return 1; 19 | } 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /tflite_micro_compiler/src/xtflm_conf.h: -------------------------------------------------------------------------------- 1 | // XTLM_OPERATORS must be 200 as we use PythonOpsResolver in 2 | // tflite micro compiler. 3 | // PythonOpsResolver is defined as MicroMutableOpResolver<200> in 4 | // https://github.com/tensorflow/tflite-micro/blob/main/python/tflite_micro/python_ops_resolver.h 5 | #define XTFLM_OPERATORS (200) 6 | #define NUM_OUTPUT_TENSORS (40) 7 | #define NUM_INPUT_TENSORS (40) 8 | #define MAX_DEBUG_LOG_LENGTH (1024) 9 | #define AISRV_GPIO_LENGTH (4) 10 | -------------------------------------------------------------------------------- /utils/compare_outputs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUR_DIR=$(pwd) 4 | 5 | OUT_DIR=$1 6 | 7 | cd sample_vww 8 | python run.py 9 | 10 | cd .. 11 | mkdir $OUT_DIR 12 | 13 | clang++ -DTF_LITE_DISABLE_X86_NEON -DTF_LITE_STATIC_MEMORY -DNO_INTERPRETER -Ilib_tflite_micro/submodules/tflite-micro -Ilib_tflite_micro/submodules/flatbuffers/include -I../lib_nn/ -I. -std=c++14 main.cpp sample_vww/tfl_model.tflite.cpp -g -O0 -lxtflitemicro -Ltflite_micro_compiler/build -rpath /Users/deepakpanickal/code/ai_tools2/third_party/lib_tflite_micro/tflite_micro_compiler/build -I$CONDA_PREFIX/include -DTFLMC_PRINT_TENSORS -o $OUT_DIR/tfl.out 14 | 15 | $OUT_DIR/tfl.out $OUT_DIR/tfl.out >$OUT_DIR/tflite.json 2>&1 16 | 17 | clang++ -DTF_LITE_DISABLE_X86_NEON -DTF_LITE_STATIC_MEMORY -DNO_INTERPRETER -Ilib_tflite_micro/submodules/tflite-micro -Ilib_tflite_micro/submodules/flatbuffers/include -I../lib_nn/ -I. -std=c++14 main.cpp sample_vww/xcore_model.tflite.cpp -g -O0 -lxtflitemicro -Ltflite_micro_compiler/build -rpath /Users/deepakpanickal/code/ai_tools2/third_party/lib_tflite_micro/tflite_micro_compiler/build -I$CONDA_PREFIX/include -DTFLMC_PRINT_TENSORS -o $OUT_DIR/xcore.out 18 | 19 | $OUT_DIR/xcore.out sample_vww/xcore_model.params >$OUT_DIR/xcore.json 2>&1 20 | 21 | python diff_output.py $OUT_DIR/tflite.json $OUT_DIR/xcore.json >$OUT_DIR/accuracy_diff.txt 22 | 23 | cp sample_vww/tfl_model.tflite* $OUT_DIR 24 | cp sample_vww/xcore_model.tflite* $OUT_DIR 25 | 26 | exit 0 27 | -------------------------------------------------------------------------------- /utils/diff_output.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import sys 4 | 5 | # 6 | tfl_to_xcore_map = {1:3, 2:6, 3:9, 4:12, 6:15, 7:18, 8:21, 9:24, 10:27, 11:28} 7 | 8 | 9 | # Opening JSON file 10 | f = open(sys.argv[1]) 11 | tflite_data = json.load(f) 12 | # Closing file 13 | f.close() 14 | 15 | f = open(sys.argv[2]) 16 | xcore_data = json.load(f) 17 | # Closing file 18 | f.close() 19 | 20 | # Iterating through the json 21 | # list 22 | for i in tfl_to_xcore_map: 23 | tfl = np.array(tflite_data[i]["data"][0]["val"]) 24 | xc = np.array(xcore_data[tfl_to_xcore_map[i]]["data"][0]["val"]) 25 | diffs = tfl - xc 26 | unique, counts = np.unique(diffs, return_counts=True) 27 | print("\n\nTFLite %s, node %d" %(tflite_data[i]["op"], i)) 28 | print("Xcore %s, node %d" %(xcore_data[tfl_to_xcore_map[i]]["op"], tfl_to_xcore_map[i])) 29 | print(np.asarray((unique, counts)).T) 30 | -------------------------------------------------------------------------------- /utils/main.cpp: -------------------------------------------------------------------------------- 1 | #include "model.tflite.h" 2 | #include 3 | 4 | // #include 5 | // #include 6 | 7 | unsigned char checksum_calc(char *data, unsigned int length) 8 | { 9 | static char sum; 10 | static char * end; 11 | sum = 0; 12 | end = data + length; 13 | 14 | do 15 | { 16 | sum -= *data++; 17 | } while (data != end); 18 | return sum; 19 | } 20 | 21 | #define MAX_PARAMS_SIZE 5000000 22 | #define MAX_MODEL_CONTENT_SIZE 5000000 23 | static int load_binary_file(const char *filename, uint32_t *content, 24 | size_t size) { 25 | FILE *fd = fopen(filename, "rb"); 26 | if (fd == NULL) { 27 | fprintf(stderr, "Cannot read model/param file %s\n", filename); 28 | } 29 | int s = fread(content, 1, size, fd); 30 | fclose(fd); 31 | 32 | return s; 33 | } 34 | uint32_t params_content[MAX_MODEL_CONTENT_SIZE / sizeof(uint32_t)]; 35 | 36 | #define I16 37 | 38 | int main(int argc, char *argv[]) 39 | { 40 | (void)load_binary_file(argv[1], params_content, MAX_PARAMS_SIZE); 41 | 42 | if(model_init(params_content)){ 43 | printf("Error!\n"); 44 | } 45 | 46 | //xt::xarray input = xt::load_npy("input.npy"); 47 | 48 | for(int n=0; n< model_inputs(); ++n) { 49 | //int32_t *in = model_input(n)->data.i32; 50 | #ifdef I16 51 | int16_t *in = model_input(n)->data.i16; 52 | int size = model_input_size(n)/2; 53 | int k = -32768; 54 | for (int i=0;i= 32767) { 56 | k = -32768; 57 | } 58 | in[i] = k;//input[i]; 59 | k = k + 5000; 60 | } 61 | #else 62 | int8_t *in = model_input(n)->data.int8; 63 | int size = model_input_size(n); 64 | int k = -128; 65 | for (int i=0;i= 128) { 67 | k = -128; 68 | } 69 | in[i] = k;//input[i]; 70 | k = k + 3; 71 | } 72 | #endif 73 | } 74 | printf("\n"); 75 | 76 | model_invoke(); 77 | 78 | for(int n=0; n< model_outputs(); ++n) { 79 | //int32_t *out = model_output(n)->data.i32; 80 | #ifdef I16 81 | int16_t *out = model_output(n)->data.i16; 82 | int size = model_output_size(n)/2; 83 | #else 84 | int8_t *out = model_output(n)->data.int8; 85 | int size = model_output_size(n); 86 | #endif 87 | for (int i=0;i