├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Jenkinsfile
├── Makefile
├── README.rst
├── cmakefiles
    └── xtflm.cmake
├── fetch_dependencies.py
├── flash_builder
    ├── flash_builder
    │   ├── __init__.py
    │   └── build_flash_file.py
    └── setup.py
├── host_cmd_line_interpreter
    ├── CMakeLists.txt
    ├── Makefile
    ├── README.rst
    ├── src
    │   ├── main.cc
    │   └── xtflm_conf.h
    └── tests
    │   ├── Makefile
    │   ├── test_mobnet
    │       ├── Makefile
    │       ├── baboon.raw
    │       ├── classes.raw
    │       ├── mobilenet_v1_0.25_128.tflite
    │       ├── model_mobilenet_v1.params
    │       └── model_mobilenet_v1.tflite
    │   └── write_reference_output.py
├── lib_tflite_micro
    ├── api
    │   ├── fast_flash.h
    │   ├── flash_server.h
    │   ├── inference_engine.h
    │   ├── ioserver.h
    │   ├── load_weights.h
    │   ├── memory_parallel_transport.h
    │   ├── tile_ram_server.h
    │   ├── version.h
    │   ├── xcore_config.h
    │   ├── xcore_device_memory.h
    │   └── xcore_shared_config.h
    ├── module_build_info
    └── src
    │   ├── fast_flash.xc
    │   ├── fast_flash_read_loop.S
    │   ├── flash_server.c
    │   ├── inference_engine.cc
    │   ├── ioserver.c
    │   ├── load_weights.c
    │   ├── memory_parallel_transport.c
    │   ├── memory_transport_ll.S
    │   ├── par_invoke_funcs.c
    │   ├── tflite-xcore-kernels
    │       ├── conv2d_float.c
    │       ├── conv2d_float.h
    │       ├── micro_time.cc
    │       ├── xcore_add.cc
    │       ├── xcore_batched_softmax.cc
    │       ├── xcore_beta_activationf32.cc
    │       ├── xcore_beta_concatf32.cc
    │       ├── xcore_beta_convf32.cc
    │       ├── xcore_beta_fcf32.cc
    │       ├── xcore_beta_transposeconvf32.cc
    │       ├── xcore_binaryi16.cc
    │       ├── xcore_broadcast.cc
    │       ├── xcore_bsign.cc
    │       ├── xcore_common.cc
    │       ├── xcore_concat.cc
    │       ├── xcore_conv2d_v2.cc
    │       ├── xcore_custom_options.cc
    │       ├── xcore_custom_options.h
    │       ├── xcore_detection_post.cc
    │       ├── xcore_error_reporter.cc
    │       ├── xcore_error_reporter.h
    │       ├── xcore_expand_8_to_16.cc
    │       ├── xcore_interpreter.cc
    │       ├── xcore_interpreter.h
    │       ├── xcore_load_store_tensor.cc
    │       ├── xcore_load_weights_wait.cc
    │       ├── xcore_lookup.cc
    │       ├── xcore_maxpool2d.cc
    │       ├── xcore_mean.cc
    │       ├── xcore_meani16.cc
    │       ├── xcore_mul.cc
    │       ├── xcore_n_to_4.cc
    │       ├── xcore_ops.cc
    │       ├── xcore_ops.h
    │       ├── xcore_pad.cc
    │       ├── xcore_profiler.cc
    │       ├── xcore_profiler.h
    │       ├── xcore_slice.cc
    │       ├── xcore_softmax.cc
    │       ├── xcore_transpose.cc
    │       ├── xcore_unaryi16.cc
    │       ├── xcore_utils.cc
    │       └── xcore_utils.h
    │   ├── thread_call.S
    │   ├── thread_call.h
    │   ├── thread_call_host_emulation.c
    │   ├── tile_ram_server.c
    │   └── xcore_device_memory.c
├── patches
    └── tflite-micro.patch
├── repos.list
├── requirements.txt
├── sample_mobilenet
    ├── add_newlines_to_csv.py
    ├── mobilenet_v1_25.tflite
    ├── run.py
    ├── s1.jpg
    ├── s2.jpg
    └── s3.jpg
├── tflite_micro_compiler
    ├── .clang-format
    ├── .github
    │   ├── ISSUE_TEMPLATE
    │   │   ├── bug_report.md
    │   │   ├── discussion.md
    │   │   ├── feature_request.md
    │   │   └── improvement.md
    │   └── workflows
    │   │   └── c-cpp.yml
    ├── .gitignore
    ├── .vscode
    │   ├── c_cpp_properties.json
    │   ├── launch.json
    │   └── tasks.json
    ├── CMakeLists.txt
    ├── LICENSE
    ├── Makefile
    ├── NEWS.txt
    ├── README.md
    ├── VerifiedTensorflowVersion.txt
    ├── cmake
    │   ├── FindTFLite.cmake
    │   └── xbuild.cmake
    ├── common.mk
    ├── examples
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── Makefile.inc
    │   ├── Model_source.txt
    │   ├── cifar10.cc
    │   ├── cifar10.tflite
    │   ├── cifar10_model.c
    │   ├── cifar10_run_comp.cc
    │   ├── compiled_cifar10.cc
    │   ├── compiled_cifar10.cc.h
    │   ├── compiled_hello_world.cc
    │   ├── compiled_hello_world.cc.h
    │   ├── compiled_mobilenet.cc
    │   ├── compiled_mobilenet.cc.h
    │   ├── custom
    │   │   ├── .gitignore
    │   │   ├── Makefile
    │   │   ├── custom.tflite
    │   │   ├── custom_implementations.cpp
    │   │   ├── fake_implementations.cpp
    │   │   └── registration.cpp
    │   ├── generic_test
    │   │   ├── CMakeLists.txt
    │   │   └── generic_test.cpp
    │   ├── gnu.c
    │   ├── hello_world.cc
    │   ├── hello_world.tflite
    │   ├── hello_world2.cc
    │   ├── hello_world_model.cc
    │   ├── lstm2.py
    │   ├── lstm2.tflite
    │   ├── lstm_compiled.cc
    │   ├── lstm_compiled.cc.h
    │   ├── lstm_test.cc
    │   ├── mobilenet.cc
    │   ├── mobilenet2.cc
    │   ├── mobilenet_v1_0_25_160_quantized.c
    │   ├── mobilenet_v1_0_25_160_quantized.tflite
    │   └── truck.c
    ├── model_main.cpp
    └── src
    │   ├── Api.cc
    │   ├── Api.h
    │   ├── CodeWriter.cc
    │   ├── CodeWriter.h
    │   ├── Compiler.cc
    │   ├── Compiler.h
    │   ├── Makefile.inc
    │   ├── MemMap.cc
    │   ├── MemMap.h
    │   ├── TypeToString.cc
    │   ├── TypeToString.h
    │   ├── main.cc
    │   └── xtflm_conf.h
├── utils
    ├── compare_outputs.sh
    ├── diff_output.py
    └── main.cpp
└── version_check.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | app_ie/.build
 3 | app_ie/bin
 4 | host_cmd_line_interpreter/build
 5 | host_cmd_line_interpreter/bin
 6 | .DS_Store
 7 | xtflm_interpreter/build
 8 | xtflm_interpreter/xtflm_interpreter/libs/
 9 | xtflm_interpreter/xtflm_interpreter/__pycache__/
10 | xtflm_interpreter/.eggs/
11 | xtflm_interpreter/xtflm_interpreter.egg-info/
12 | flash_builder/flash_builder/__pycache__/
13 | flash_builder/.eggs/
14 | flash_builder/flash_builder.egg-info/
15 | *.pyc
16 | build/
17 | .cache
18 | compile_commands.json
19 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "lib_tflite_micro/submodules/tflite-micro"]
 2 | 	path = lib_tflite_micro/submodules/tflite-micro
 3 | 	url = git@github.com:tensorflow/tflite-micro.git
 4 | [submodule "lib_tflite_micro/submodules/flatbuffers"]
 5 | 	path = lib_tflite_micro/submodules/flatbuffers
 6 | 	url = git@github.com:google/flatbuffers.git
 7 | [submodule "lib_tflite_micro/submodules/gemmlowp"]
 8 | 	path = lib_tflite_micro/submodules/gemmlowp
 9 | 	url = https://github.com/google/gemmlowp.git
10 | [submodule "lib_tflite_micro/submodules/ruy"]
11 | 	path = lib_tflite_micro/submodules/ruy
12 | 	url = https://github.com/google/ruy.git
13 | [submodule "lib_tflite_micro/submodules/xmos_cmake_toolchain"]
14 | 	path = lib_tflite_micro/submodules/xmos_cmake_toolchain
15 | 	url = git@github.com:xmos/xmos_cmake_toolchain.git
16 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | @Library('xmos_jenkins_shared_library@v0.32.0') _
 2 | 
 3 | getApproval()
 4 | 
 5 | pipeline {
 6 |     agent {
 7 |         label "xcore.ai"
 8 |     }
 9 |     options {
10 | 
11 |         // skipDefaultCheckout()
12 |         buildDiscarder(xmosDiscardBuildSettings(onlyArtifacts=false))
13 |         timestamps()
14 |     }
15 |     environment {
16 |         REPO = 'lib_tflite_micro'
17 |         VIEW = getViewName(REPO)
18 |     }
19 |     stages {
20 |             stage('Build') {
21 |                 steps {
22 |                     withVenv {
23 |                         sh 'git submodule update --depth=1 --init --recursive --jobs 8'
24 |                         sh 'make init'
25 | 			sh 'make patch'
26 | 			sh 'make build'
27 |                     }
28 |                 }
29 |             }
30 |             stage("Test") {
31 |                 steps {
32 |                     withVenv {
33 | 			sh 'make init'
34 | 			sh 'make test'
35 |                     }
36 |                 }
37 |             }
38 |     }
39 |     post {
40 |         cleanup {
41 |             cleanWs()
42 |         }
43 |     }
44 | }
45 |         // stage("Checkout repo") {
46 |         //     steps {
47 |         //         dir('lib_tflite_micro') {
48 |         //             checkout scm
49 |         //             stash includes: '**/*', name: 'lib_tflite_micro', useDefaultExcludes: false
50 |         //             script {
51 |         //                 def short_hash = sh(returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim()
52 |         //                 currentBuild.displayName = '#' + BUILD_NUMBER + '-' + short_hash
53 |         //             }
54 |         //         }                        
55 |         //     }
56 |         //     post {
57 |         //         cleanup {
58 |         //             deleteDir()
59 |         //         }
60 |         //     }
61 |         // }
62 | /*        stage("Cleanup2") {
63 |             steps {
64 |                 // The Jenkins command deleteDir() doesn't seem very reliable, so we're using the basic form
65 | //                sh("rm -rf *")
66 |             }
67 |         }*/
68 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | patch:
 2 | 	(cd lib_tflite_micro/submodules/tflite-micro && git reset --hard && git apply ../../../patches/tflite-micro.patch)
 3 | 
 4 | build:
 5 | 	(cd lib_tflite_micro && ../version_check.sh)
 6 | 	mkdir -p build
 7 | 	(cd build && cmake .. && make -j8)
 8 | 
 9 | init:
10 | 	python3 fetch_dependencies.py
11 | 	pip3 install -r requirements.txt
12 | 
13 | test:
14 | 	(cd host_cmd_line_interpreter && make test)
15 | 	@echo ""
16 | 	@echo "All tests PASS"
17 | 	@echo ""
18 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | TFLite-micro container
 2 | ======================
 3 | 
 4 | This repo is a wrapper for lib_tflite_micro.
 5 | This repo contains all third party repos that are needed to use tflite-micro on an XCORE.
 6 | It wraps this third-party C++ software up in a library that exposes three interfaces:
 7 | 
 8 | #. A C interface for use on embedded systems (``lib_tflite_micro``).
 9 | 
10 | #. A command line interface for use on a host (``host_command_line_interface``)
11 | 
12 | The ``lib_tflite_micro`` library depends on ``lib_nn``.
13 | 
14 | It provides the following services:
15 | 
16 | * lib_tflite_micro/module_build_info: file that allows lib_tflite_micro to be integrated into normal XMOS build flow
17 | 
18 | * lib_tflite_micro/src: a function that wraps the C++ interpreter in C (inference_engine.cc), and a collection of
19 |   kernels that we add to tflite-micro with XCORE specific operators
20 |   
21 | * lib_tflite_micro/api: .h files for the above
22 | 
23 | * host_cmd_line_interpeter: a command line wrapper for XTFLM, enabling it to be used over the command line.
24 | 
25 | 
26 | C interface
27 | -----------
28 | 
29 | It exposes a C interface comprising a datastructure (inference_engine_t)
30 | with a few functions that can be used to initialise the structure and/or
31 | 
32 | 
33 | Getting the XCORE.AI optimiser
34 | ------------------------------
35 | 
36 | You can get the XCORE.AI optimiser through pypi:
37 | 
38 | * https://pypi.org/project/xmos-ai-tools/
39 | 
40 | This gets you both a command line interface and python interface to the xcore-opt tool that optimises
41 | a ``.tflite`` file for xcore
42 | perform an inference. The data structure itself can be used to directly
43 | read/write data into tensors, this enables sensors to directly operate
44 | in the tensor space.
45 | 
46 | The C interface can be used with the standard XMOS build system, and is
47 | built from the appropriate application directory
48 | 
49 | Command line interface
50 | ----------------------
51 | 
52 | The command line interface uses the C interface above and makes it accessible
53 | from the command line, enabling the end user to send data through a TFLite model
54 | using the XTFLM interpreter. The XTFLM intepreter will have XCORE specific operators
55 | (such as 2D convolutions, loading from flash) that are emulated on the host.
56 | 
57 | The command line interface is built by invoking ``make install`` at top level or
58 | inside ``host_command_line_interface``.
59 | 
60 | The command line interface cane be tested by invoking ``make test`` at either level. 
61 | 


--------------------------------------------------------------------------------
/fetch_dependencies.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2021 XMOS LIMITED.
 3 | # This Software is subject to the terms of the XMOS Public Licence: Version 1.
 4 | import os
 5 | import shutil
 6 | import subprocess as sp
 7 | 
 8 | 
 9 | def remove_read_only(func, path, exc_info):
10 |     """Sometimes, Windows complains when removing .git folders"""
11 |     import stat
12 | 
13 |     if not os.access(path, os.W_OK):
14 |         # Is the error an access error ?
15 |         os.chmod(path, stat.S_IWUSR)
16 |         func(path)
17 |     else:
18 |         raise exc_info
19 | 
20 | 
21 | def read_repo_list():
22 |     """Return a list of lists: [dir, url, ref]"""
23 |     repos = []
24 |     with open("repos.list") as f:
25 |         lines = f.readlines()
26 |     for line in lines:
27 |         repos.append(line.split())
28 |     return repos
29 | 
30 | 
31 | base_dir = os.path.join(os.getcwd(), "..")
32 | 
33 | repos = read_repo_list()
34 | for name, url, ref in repos:
35 |     print("\nUpdating " + name + "...")
36 |     repo_dir = os.path.join(base_dir, name)
37 | 
38 |     if os.path.isdir(repo_dir):
39 |         # check whether it has the correct URL
40 |         old_url = (
41 |             sp.check_output("git config --get remote.origin.url".split(), cwd=repo_dir)
42 |             .decode("utf-8")
43 |             .strip()
44 |         )
45 |         if url == old_url:
46 |             print("URL hasn't changed")
47 |         else:
48 |             print("URL for " + name + " has changed.")
49 |             print("    Old: " + old_url)
50 |             print("    New: " + url)
51 |             print("Deleting repository.")
52 |             shutil.rmtree(repo_dir, onerror=remove_read_only)
53 | 
54 |     # Clone
55 |     if not os.path.isdir(repo_dir):
56 |         sp.check_call(
57 |             "git clone {} {}".format(url, name).split(),
58 |             cwd=base_dir,
59 |             stdout=sp.PIPE,
60 |             stderr=sp.PIPE,
61 |         )
62 | 
63 |     # Fetch
64 |     print("Fetching...")
65 |     sp.check_call("git fetch".split(), cwd=repo_dir, stdout=sp.PIPE, stderr=sp.PIPE)
66 | 
67 |     # Checkout
68 |     print("Checking out " + ref + "...")
69 |     sp.check_call(
70 |         "git checkout {}".format(ref).split(),
71 |         cwd=repo_dir,
72 |         stdout=sp.PIPE,
73 |         stderr=sp.PIPE,
74 |     )
75 | 


--------------------------------------------------------------------------------
/flash_builder/flash_builder/build_flash_file.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2020, XMOS Ltd, All rights reserved
 3 | 
 4 | import sys
 5 | import argparse
 6 | from flash_builder import FlashBuilder
 7 | 
 8 | parser = argparse.ArgumentParser(description='Build parameter/flash images')
 9 | parser.add_argument('--output', default='image.bin',  help='output file')
10 | parser.add_argument('--target', default='host',       help='"flash" or "host" (default)')
11 | parser.add_argument('files',    nargs='+', help='Model and parameter files, - indicates a missing one, must be an even number of files for "flash" (model params model params ...), or a single file for "host" (params)')
12 | 
13 | args = parser.parse_args()
14 | 
15 | if args.target == 'flash' or args.target == 'xcore':
16 |     if len(args.files) %2 != 0:
17 |         parser.print_usage()
18 |         sys.exit(1)
19 |     engines = len(args.files)//2
20 |     fb = FlashBuilder(engines)
21 |     for i in range(engines):
22 |         fb.add_model(i, filename = args.files[2*i])
23 |         fb.add_params(i, filename = args.files[2*i+1])
24 | 
25 |     fb.flash_file(args.output)
26 | 
27 | elif args.target == 'host':
28 |     if len(args.files) != 1:
29 |         parser.print_usage()
30 |         sys.exit(1)
31 |     output = FlashBuilder.create_params_file(args.output, input_filename = args.files[0])
32 | 
33 | else:
34 |     parser.print_usage()
35 |     sys.exit(1)
36 | 
37 | 


--------------------------------------------------------------------------------
/flash_builder/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 XMOS LIMITED. This Software is subject to the terms of the
 2 | # XMOS Public License: Version 1
 3 | import setuptools
 4 | 
 5 | EXCLUDES = ["README.rst"]
 6 | 
 7 | INSTALL_REQUIRES = [
 8 | ]
 9 | 
10 | setuptools.setup(
11 |     name="flash_builder",
12 |     packages=setuptools.find_packages(exclude=EXCLUDES),
13 |     python_requires=">=3.8.0",
14 |     install_requires=INSTALL_REQUIRES,
15 |     extras_require={},
16 |     package_data={},
17 |     author="XMOS",
18 |     author_email="support@xmos.com",
19 |     description="XMOS Flash Builder for TensorFlow Lite model interpreter.",
20 |     license="LICENSE.txt",
21 |     keywords="xmos xcore",
22 |     use_scm_version={
23 |         "root": "..",
24 |         "relative_to": __file__,
25 |         "version_scheme": "post-release",
26 |     },
27 |     setup_requires=["setuptools_scm"],
28 | )
29 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | 
 3 | #**********************
 4 | # Setup XMOS toolchain
 5 | #**********************
 6 | #include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/xmos_toolchain.cmake")
 7 | 
 8 | project(model_runner VERSION 1.0.0)
 9 | 
10 | enable_language(CXX C)
11 | 
12 | #**********************
13 | # Disable in-source build.
14 | #**********************
15 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
16 |     message(FATAL_ERROR "In-source build is not allowed! Please specify a build folder.\n\tex:cmake -B build")
17 | endif()
18 | 
19 | #**********************
20 | # install
21 | #**********************
22 | set(INSTALL_DIR "${PROJECT_SOURCE_DIR}/bin")
23 | 
24 | #**********************
25 | # Build flags
26 | #**********************
27 | 
28 | set(CMAKE_CXX_FLAGS "-std=c++17" CACHE STRING "C++ Compiler Base Flags" FORCE)
29 | 
30 | set(BUILD_FLAGS
31 |   "-O3"
32 |   "-D__xtflm_conf_h_exists__"
33 |   "-DNN_USE_REF"
34 | )
35 | 
36 | #**********************
37 | # firmware targets
38 | #**********************
39 | add_executable(xtflm_interpreter_cmdline)
40 | target_compile_options(xtflm_interpreter_cmdline PRIVATE ${BUILD_FLAGS})
41 | target_link_options(xtflm_interpreter_cmdline PRIVATE ${BUILD_FLAGS})
42 | target_link_libraries(xtflm_interpreter_cmdline PRIVATE "-lpthread")
43 | 
44 | set(TOP_DIR
45 |   "${CMAKE_CURRENT_SOURCE_DIR}/..")
46 | 
47 | include(../cmakefiles/xtflm.cmake)
48 | 
49 | target_sources(xtflm_interpreter_cmdline
50 |   PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cc"
51 |   PRIVATE ${ALL_SOURCES}
52 | )
53 | 
54 | list(APPEND ALL_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/src")
55 | 
56 | target_include_directories(xtflm_interpreter_cmdline
57 |   PRIVATE ${ALL_INCLUDES}
58 | )
59 | 
60 | install(TARGETS xtflm_interpreter_cmdline DESTINATION ${INSTALL_DIR})
61 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/Makefile:
--------------------------------------------------------------------------------
 1 | install:
 2 | 	mkdir -p build
 3 | 	(cd build && cmake .. && make install -j4)
 4 | 
 5 | clean:
 6 | 	rm -rf build
 7 | 
 8 | test: install
 9 | 	(cd tests && make test)
10 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/README.rst:
--------------------------------------------------------------------------------
 1 | Command line interface for XTFLM
 2 | ===============================
 3 | 
 4 | Build
 5 | -----
 6 | 
 7 | 
 8 | To build use the following command sequence::
 9 | 
10 |   (mkdir build && cd build && cmake .. && make install)
11 | 
12 | Usage
13 | -----
14 | 
15 | Use it in either of the two following ways::
16 | 
17 |   bin/xtflm_interpreter_cmdline  model.tflite input-file output-file
18 |   bin/xtflm_interpreter_cmdline  model.tflite -i files ... -o files 
19 | 
20 | input and output are raw data. The first form only works when the network
21 | expects a single input and has a single output. The second form works with
22 | any number of inputs and outputs
23 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/src/xtflm_conf.h:
--------------------------------------------------------------------------------
1 | #define XTFLM_OPERATORS (250)
2 | #define NUM_OUTPUT_TENSORS (4)
3 | #define NUM_INPUT_TENSORS (4)
4 | #define MAX_DEBUG_LOG_LENGTH (1024)
5 | #define AISRV_GPIO_LENGTH (4)
6 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/Makefile:
--------------------------------------------------------------------------------
1 | test:
2 | 	(cd test_mobnet && make test)
3 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/test_mobnet/Makefile:
--------------------------------------------------------------------------------
 1 | #TODO: derive flash from params
 2 | #TODO: derive params and tflite from tflite
 3 | 
 4 | test:
 5 | 	@rm -f out
 6 | 	../../bin/xtflm_interpreter_cmdline model_mobilenet_v1.tflite model_mobilenet_v1.params baboon.raw out
 7 | 	@diff out classes.raw
 8 | 	@rm -f out
 9 | 	@echo 'PASS: test_smoke'
10 | 


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/test_mobnet/baboon.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/baboon.raw


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/test_mobnet/classes.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/classes.raw


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/test_mobnet/mobilenet_v1_0.25_128.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/mobilenet_v1_0.25_128.tflite


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.params


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/host_cmd_line_interpreter/tests/test_mobnet/model_mobilenet_v1.tflite


--------------------------------------------------------------------------------
/host_cmd_line_interpreter/tests/write_reference_output.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import argparse
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def load_raw_data(filename, dtype=np.int8):
 7 |     return np.fromfile(filename, dtype=dtype)
 8 | 
 9 | 
10 | def save_raw_data(filename, data, dtype=np.int8):
11 |     data.astype(dtype).tofile(filename)
12 | 
13 | 
14 | def main(args):
15 |     interpreter = tf.lite.Interpreter(model_path=args.model_path)
16 |     interpreter.allocate_tensors()
17 |     input_details = interpreter.get_input_details()
18 |     output_details = interpreter.get_output_details()
19 |     input_data = load_raw_data(args.input_file).reshape(
20 |         input_details[0]["shape"])
21 |     interpreter.set_tensor(input_details[0]["index"], input_data)
22 |     interpreter.invoke()
23 |     output_data = interpreter.get_tensor(output_details[0]["index"])
24 |     save_raw_data(args.output_file, output_data)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     parser = argparse.ArgumentParser(
29 |         description="Feed raw input to a regular TFLite model and save the output."
30 |     )
31 |     parser.add_argument("model_path", type=str,
32 |                         help="Path to the .tflite model file.")
33 |     parser.add_argument("input_file", type=str,
34 |                         help="Path to the raw input file.")
35 |     parser.add_argument(
36 |         "output_file", type=str, help="Path to save the raw output file."
37 |     )
38 |     args = parser.parse_args()
39 |     main(args)
40 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/fast_flash.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FAST_FLASH_H_
 2 | #define _FAST_FLASH_H_
 3 | 
 4 | #include <quadflash.h>
 5 | 
 6 | #ifdef __XC__
 7 | /** Fast flash library.
 8 |  * Before calling any of the functions in here, lib_quad_flash must be initialised as normal by using
 9 |  * fl_connectToDevice(qspi, flash_spec, n_flash_spec).
10 |  * After that, a call to fast_flash_init shall be made.
11 |  * After that, a sequence of calls to fast_flash_read can be made.
12 |  *
13 |  * The data partition must start with the following 32 bytes: **NOTE: REMOVE THE +4 in fast_flash_init**
14 |  *
15 |  *   0xff, 0x00, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
16 |  *   0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
17 |  *   0x31, 0xf7, 0xce, 0x08, 0x31, 0xf7, 0xce, 0x08,
18 |  *   0x9c, 0x63, 0x9c, 0x63, 0x9c, 0x63, 0x9c, 0x63
19 |  * 
20 |  * This pattern is designed to create maximum difficulties electrically and is used
21 |  * to calibrate the electrical settings. Note that this pattern must be nibble reversed
22 |  * before being written to flash; just like all other data.
23 |  * The rest of the data partition can be used as normal
24 |  */
25 | 
26 | /** Function that initialises the fast_flash library
27 |  *
28 |  * \param      qspi        ports that connect to flash
29 |  *
30 |  * \returns    a negative value of -1..-5 if the window is too small (size 0..4)
31 |  *             zero if successful
32 |  */
33 | int fast_flash_init(fl_QSPIPorts &qspi);
34 | 
35 | /** Function that reads a sequential set of bytes from memory.
36 |  * This function assumes that nibbles have been reversed ((x << 4) & 0xf0 | (x >> 4) & 0x0f)
37 |  * before the data was written to flash.
38 |  * Note that reading 32 bytes from offset 0 shall yield the special pattern above.
39 |  *
40 |  * \param      qspi        ports that connect to flash
41 |  * \param      addr        address in flash data segment
42 |  * \param      word_count  Number of words to read
43 |  * \param      read_data   array to store data in to.
44 |  * \param      c_out_data  optional channel end over which data is out() instead.
45 |  */
46 | void fast_flash_read(fl_QSPIPorts &qspi, unsigned addr, unsigned word_count, unsigned read_data[], chanend ?c_data_out);
47 | 
48 | #else
49 | int fast_flash_init(fl_QSPIPorts *qspi);
50 | void fast_flash_read(fl_QSPIPorts *qspi, unsigned addr, unsigned word_count, unsigned read_data[], chanend_t c_data_out);
51 | #endif
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/flash_server.h:
--------------------------------------------------------------------------------
 1 | #ifndef _flash_server_h_
 2 | #define _flash_server_h_
 3 | 
 4 | #include <quadflash.h>
 5 | 
 6 | /** Struct holding the "file system" meta information for each client
 7 |  * The flash is partitioned and each client has a section in the flash
 8 |  * that stores data relevant to that particular client. For example, models
 9 |  * parameters, code, etc.
10 |  *
11 |  * This struct caches all information necessary for a client for fast access.
12 |  * The main program must allocate this structure, one per client, prior to
13 |  * calling the flash server.
14 |  *
15 |  * If there is more than one flash device connected to the device, there can be
16 |  * multiple flash servers.
17 |  */
18 | typedef struct flash {
19 |   int model_start;            ///< Start address for model.
20 |   int parameters_start;       ///< Start address of parameters.
21 |   int operators_start;        ///< Start address for operator-binaries.
22 |   int execute_in_place_start; ///< Start address for operator-binaries.
23 | } flash_t;
24 | 
25 | /** Type representing the commands that the flash server accepts */
26 | typedef enum flash_command {
27 |   FLASH_READ_PARAMETERS =
28 |       0, ///< Read a set of parameters.   // TODO: share with lib_tflite_micro
29 |   FLASH_READ_PARAMETERS_ASYNC = 1, ///< Read parameters asynchronously.
30 |   FLASH_READ_SYNCHRONIZE      = 2, ///< Complete async read.
31 |   FLASH_READ_XIP =
32 |       3, ///< Read code to execute-in-place throught L2 cache - future extension
33 |   FLASH_SERVER_QUIT = 4,
34 |   FLASH_SERVER_INIT = 5, // Initialize flash server with fast flash pattern speed match setup
35 |   //FLASH_READ_PARAMETERS_COMPRESSED_FLOAT = 6, // Read a set of compressed parameters
36 | } flash_command_t;
37 | 
38 | /**
39 |  * Function that runs a flash-server. A flash server is a thread that serves one
40 |  * or more clients. There is one flash server per flash-device, and the server
41 |  * can serve clients on one or more tiles.
42 |  *
43 |  * The flash server takes the following commands:
44 |  *   - Read a whole model from the flash.
45 |  *   - Read some parameters from the flash
46 |  *   - (future extension) Read code for an operator from flash
47 |  *
48 |  * This function does, at present, never return. It could be made to return if
49 |  * all clients close their connection
50 |  *
51 |  * \param c_flash_clients Array of channels; one per client.
52 |  *                        Each client is served in turn
53 |  * \param headers         Space to store a header for each client
54 |  *                        The header for the client describes the local
55 |  * "filesystem" for that client \param n_flash_clients Number of clients. The
56 |  * arrays in the first and second parameters should have this many entries
57 |  * \param qspi            Structure holding the quad-flash ports. This contains
58 |  * three Ports and a clock-block, the CS_N port, the CLK port, the DATA port and
59 |  * a clock block to be used for the flash. \param flash_spec      Array holding
60 |  * specificiations of flash devices, as per the libquadflash documentation
61 |  * \param n_flash_spec    Number of elements in the spec array.
62 |  */
63 | #ifdef __XC__
64 | void flash_server(chanend c_flash_clients[], flash_t headers[],
65 |                   int n_flash_clients, fl_QSPIPorts &qspi,
66 |                   fl_QuadDeviceSpec flash_spec[], int n_flash_spec);
67 | #else
68 | void flash_server(chanend_t *c_flash_clients, flash_t *headers,
69 |                   int n_flash_clients, fl_QSPIPorts *qspi,
70 |                   fl_QuadDeviceSpec *flash_spec, int n_flash_spec);
71 | #endif
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/ioserver.h:
--------------------------------------------------------------------------------
 1 | #ifndef _io_server_h_
 2 | #define _io_server_h_
 3 | 
 4 | #ifdef __XC__
 5 | 
 6 | void ioserver(chanend c_model[], unsigned n_model);
 7 | 
 8 | #else
 9 | 
10 | #include <xcore/chanend.h>
11 | #include <xcore/channel.h>
12 | 
13 | #define CMD_LENGTH_BYTES (3) // CMD, Model, Tensor
14 | 
15 | #define IOSERVER_INVOKE 1
16 | #define IOSERVER_TENSOR_SEND_OUTPUT 2
17 | #define IOSERVER_TENSOR_RECV_INPUT 3
18 | #define IOSERVER_ACK 5
19 | #define IOSERVER_NACK 6
20 | #define IOSERVER_RESET 7
21 | #define IOSERVER_EXIT 8
22 | 
23 | #define MAX_PACKET_SIZE (512)
24 | #define MAX_PACKET_SIZE_WORDS (MAX_PACKET_SIZE / 4)
25 | 
26 | #ifdef __cplusplus
27 | extern "C" {
28 | #endif
29 | unsigned int ioserver_command_receive(chanend_t c_server, unsigned *tensor_num);
30 | void ioserver_command_acknowledge(chanend_t c_server, unsigned int ack);
31 | void ioserver_tensor_send_output(chanend_t c_server, unsigned int *data,
32 |                                  unsigned int n);
33 | 
34 | void ioserver_tensor_recv_input(chanend_t c_server, unsigned int *data,
35 |                                 unsigned int n);
36 | 
37 | void ioserver(chanend_t c_model[], unsigned n_model);
38 | #ifdef __cplusplus
39 | }
40 | #endif
41 | 
42 | #endif
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/load_weights.h:
--------------------------------------------------------------------------------
 1 | #ifndef _load_weights_h_
 2 | #define _load_weights_h_
 3 | 
 4 | #include <xcore/channel.h>
 5 | #include "thread_call.h"
 6 | 
 7 | #define LOAD_WEIGHTS_MAX_BLOCKS   2
 8 | 
 9 | /**
10 |  * Function that connects to a flash or tile-ram server and loads a series of weights.
11 |  * This function completes when the data is loaded.
12 |  *
13 |  * @param  c_flash_or_tile     channel-end connecting to the flash server
14 |  *
15 |  * @param  data_ptr            array of pointers where the loaded data should be scattered
16 |  *
17 |  * @param  data_sizes_in_words number of words where for each block
18 |  *
19 |  * @param  N                   number of blocks in data_ptr and data_sizes_in_words
20 |  *
21 |  * @param  external_addr       address in flash or tile ram
22 |  *
23 |  * @param  model_thread_count  number of threads available
24 |  *
25 |  * @param  tif                 thread_info structure for multithreading
26 |  */ 
27 | void load_weights_synchronous(chanend_t c_flash_or_tile, int *data_ptr[], int data_sizes_in_words[],
28 |                               int N, int external_addr, int model_thread_count, thread_info_t *tif);
29 | 
30 | /**
31 |  * Function that connects to a flash server and loads a series of weights.
32 |  * This function continues loading after the call completes
33 |  *
34 |  * @param  c_flash_or_tile     channel-end connecting to the flash server
35 |  *
36 |  * @param  data_ptr            array of pointers where the loaded data should be scattered
37 |  *
38 |  * @param  data_sizes_in_words number of words where for each block
39 |  *
40 |  * @param  N                   number of blocks in data_ptr and data_sizes_in_words
41 |  *
42 |  * @param  external_addr       address in flash or tile ram
43 |  *
44 |  * @param  model_thread_count  number of threads available
45 |  */ 
46 | void load_weights_asynchronous(chanend_t c_flash_or_tile, int *data_ptr[], int data_sizes_in_words[],
47 |                                int N, int external_addr);
48 | 
49 | /**
50 |  * Function that connects to a flash server and waits for the last outstanding load to complete
51 |  * Only one asynchronous load should be outstanding at any one time.
52 |  *
53 |  * @param  c_flash_or_tile     channel-end connecting to the flash server
54 |  */
55 | void load_weights_asynchronous_wait(chanend_t c_flash_or_tile);
56 | 
57 | /**
58 |  * Function that connects to a flash or tile ram server and kills it.
59 |  *
60 |  * @param  c_flash_or_tile     channel-end connecting to the flash server
61 |  */
62 | void load_weights_quit(chanend_t c_flash_or_tile);
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/memory_parallel_transport.h:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | #include "thread_call.h"
 3 | 
 4 | #ifdef __XC__
 5 | #include <xs1.h>
 6 | #else
 7 | #include <xcore/chanend.h>
 8 | #endif
 9 | 
10 | #ifdef __XC__
11 | 
12 | extern void memory_parallel_receive(chanend c, uint32_t data[], uint32_t bytes);
13 | extern void memory_parallel_receive_thread_call(chanend c, uint32_t data[], uint32_t bytes, thread_info_t &ptr);
14 | extern void memory_parallel_send(chanend c, uint32_t data[], uint32_t bytes);
15 | 
16 | #else
17 | 
18 | /** Function that receives a block of data.
19 |  * The number of bytes must be a multiple of 4.
20 |  * This function creates three threads and three channel ends in order to
21 |  * make full use of the bandwidth of the switch.
22 |  * 
23 |  * \param c        channel end to the sender
24 |  * \param data     pointer where data must be stored
25 |  * \param bytes    number of bytes that will be received.
26 |  */
27 | extern void memory_parallel_receive(chanend_t c, uint32_t *data, uint32_t bytes);
28 | 
29 | /** Function that receives a block of data.
30 |  * The number of bytes must be a multiple of 4.
31 |  * This function assumes that at least three threads have been created by the
32 |  * thread_call library and will use those together with three fresh channel
33 |  * ends in order to make full use of the bandwidth of the switch.
34 |  * 
35 |  * \param c        channel end to the sender
36 |  * \param data     pointer where data must be stored
37 |  * \param bytes    number of bytes that will be received.
38 |  */
39 | extern void memory_parallel_receive_thread_call(chanend_t c, uint32_t *data, uint32_t bytes, thread_info_t *ptr);
40 | 
41 | /** Function that sends a block of data.
42 |  * The number of bytes must be a multiple of 4.
43 |  * This function creates three threads and three channel ends in order to
44 |  * make full use of the bandwidth of the switch.
45 |  * 
46 |  * \param c        channel end to the receiver
47 |  * \param data     pointer where data must be loaded frmo
48 |  * \param bytes    number of bytes that will be sent.
49 |  */
50 | extern void memory_parallel_send(chanend_t c, uint32_t *data, uint32_t bytes);
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/tile_ram_server.h:
--------------------------------------------------------------------------------
 1 | #ifndef _tile_ram_server_h_
 2 | #define _tile_ram_server_h_
 3 | 
 4 | #include "flash_server.h"
 5 | 
 6 | /**
 7 |  * Function that runs a flash-server. A flash server is a thread that serves one
 8 |  * or more clients. There is one flash server per flash-device, and the server
 9 |  * can serve clients on one or more tiles.
10 |  *
11 |  * The flash server takes the following commands:
12 |  *   - Read a whole model from the flash.
13 |  *   - Read some parameters from the flash
14 |  *   - (future extension) Read code for an operator from flash
15 |  *
16 |  * This function does, at present, never return. It could be made to return if
17 |  * all clients close their connection
18 |  *
19 |  * \param c_tile_ram_clients Array of channels; one per client.
20 |  *                          Each client is served in turn
21 |  *                          NOTE MUST BE 1 FOR NOW
22 |  * \param headers           Space to store a header for each client
23 |  *                          The header for the client describes the local
24 |  *                          "filesystem" for that client
25 |  * \param n_tile_ram_clients Number of clients. The
26 |  *                          arrays in the first and second parameters should have this many entries
27 |  * \param data              Tile ram data
28 |  * \param n_tile_ram_flash  Number of bytes in array
29 |  */
30 | #ifdef __XC__
31 | void tile_ram_server(chanend c_tile_ram_clients[], flash_t headers[],
32 |                      int n_tile_ram_clients, const int8_t data[]);
33 | #else
34 | void tile_ram_server(chanend_t *c_tile_ram_clients, flash_t *headers,
35 |                      int n_tile_ram_clients, const int8_t *data);
36 | #endif
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/version.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved
 2 | #ifndef XCORE_VERSION_H_
 3 | #define XCORE_VERSION_H_
 4 | 
 5 | namespace lib_tflite_micro {
 6 | 
 7 | static const unsigned major_version = 0;
 8 | static const unsigned minor_version = 7;
 9 | static const unsigned patch_version = 0;
10 | 
11 | } // namespace lib_tflite_micro
12 | 
13 | #endif // XCORE_VERSION_H_
14 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/xcore_config.h:
--------------------------------------------------------------------------------
 1 | #ifndef XCORE_CONFIG_H_
 2 | #define XCORE_CONFIG_H_
 3 | 
 4 | #include "../src/thread_call.h"
 5 | 
 6 | struct xc_context_config_t {
 7 |   // This is the thread count specified in the compiler.
 8 |   // It's used by lookup op, beta float ops etc to split up work
 9 |   // in the Prepare phase.
10 |   // Conv ops have their own thread count as the thread work is
11 |   // calculated in the compiler.
12 |   int model_thread_count;
13 |   thread_info_t thread_info;
14 |   void *UNSAFE weights_data_ptr; // DDR ptr or channel to flash/tile server.
15 |   void *UNSAFE paging_ptr; // DDR ptr for paging in/out tensor arena.
16 | };
17 | 
18 | #endif // XCORE_CONFIG_H_
19 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/xcore_device_memory.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved
 2 | #ifndef XCORE_DEVICE_MEMORY_H_
 3 | #define XCORE_DEVICE_MEMORY_H_
 4 | 
 5 | #include <stddef.h>
 6 | #include <stdint.h>
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | 
12 | #ifdef XCORE
13 | #ifdef _TIME_H_
14 | #define _clock_defined
15 | #endif
16 | #include <xcore/thread.h>
17 | 
18 | #define STRINGIFY(NAME) #NAME
19 | #define GET_STACKWORDS(DEST, NAME)                                             \
20 |   asm("ldc %[__dest], " STRINGIFY(NAME) ".nstackwords" : [__dest] "=r"(DEST))
21 | #define GET_STACKSIZE(DEST, NAME)                                              \
22 |   {                                                                            \
23 |     size_t _stack_words;                                                       \
24 |     asm("ldc %[__dest], " STRINGIFY(NAME) ".nstackwords"                       \
25 |         : [__dest] "=r"(_stack_words));                                        \
26 |     DEST = (_stack_words + 2) * 4;                                             \
27 |   }
28 | #define IS_RAM(a) (((uintptr_t)a >= 0x80000) && ((uintptr_t)a <= 0x100000))
29 | #define IS_NOT_RAM(a) ((uintptr_t)a > 0x100000)
30 | #define IS_EXTMEM(a)                                                           \
31 |   (((uintptr_t)a >= 0x10000000) && (((uintptr_t)a <= 0x20000000)))
32 | #define IS_SWMEM(a)                                                            \
33 |   (((uintptr_t)a >= 0x40000000) && (((uintptr_t)a <= 0x80000000)))
34 | 
35 | #ifdef USE_SWMEM
36 | #ifndef USE_QSPI_SWMEM_DEV
37 | void swmem_setup();
38 | #else
39 | #include <xcore/chanend.h>
40 | void swmem_setup(chanend_t ctrl_swmem_c);
41 | #endif // USE_QSPI_SWMEM_DEV
42 | #endif // USE_SWMEM
43 | 
44 | void swmem_handler(void *ignored);
45 | void swmem_teardown();
46 | 
47 | #else // not XCORE
48 | 
49 | #define GET_STACKSIZE(DEST, NAME) DEST = 0
50 | #define GET_STACKWORDS(DEST, NAME) DEST = 0
51 | #define IS_RAM(a) (1)
52 | #define IS_NOT_RAM(a) (0)
53 | 
54 | #endif // XCORE
55 | 
56 | void memload(void *dest, void *src, size_t size);
57 | 
58 | #ifdef __cplusplus
59 | }
60 | #endif
61 | 
62 | #endif // XCORE_DEVICE_MEMORY_H_
63 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/api/xcore_shared_config.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved
 2 | #ifndef XCORE_SHARED_CONFIG_H_
 3 | #define XCORE_SHARED_CONFIG_H_
 4 | 
 5 | #include "lib_nn/api/nn_arch.h"
 6 | 
 7 | namespace shared_config {
 8 | 
 9 | // This string is used as a key to store the shared config
10 | // between xformer and lib_tflite_micro in the flatbuffer
11 | constexpr char xcoreMetadataName[] = "xcoreSharedConfig";
12 | 
13 | constexpr int xcoreMaxNumOfTensors = 25;
14 | 
15 | struct tensor_info_t {
16 |   uint32_t index;
17 |   uint32_t external_address;
18 |   uint32_t size;
19 | };
20 | 
21 | // The metadata struct must be aligned to 16 bytes
22 | // We cannot use alignas(16) yet in xcore
23 | struct xcore_metadata_t {
24 |   // Target arch can be XS3A = 0, or VX4A = 1
25 |   nn_target_arch_t target_arch;
26 |   // Versions of libraries used to build the model
27 |   uint32_t lib_nn_major_version;
28 |   uint32_t lib_nn_minor_version;
29 |   uint32_t lib_nn_patch_version;
30 |   uint32_t lib_tflite_micro_major_version;
31 |   uint32_t lib_tflite_micro_minor_version;
32 |   uint32_t lib_tflite_micro_patch_version;
33 |   uint32_t xformer_major_version;
34 |   uint32_t xformer_minor_version;
35 |   uint32_t xformer_patch_version;
36 |   // Number of threads required from the runtime to execute the model
37 |   uint32_t required_thread_count;
38 |   // Number of input tensors loaded from external memory
39 |   uint32_t num_external_input_tensors;
40 |   // Number of output tensors loaded from external memory
41 |   uint32_t num_external_output_tensors;
42 |   tensor_info_t external_input_tensors_data[xcoreMaxNumOfTensors];
43 |   tensor_info_t external_output_tensors_data[xcoreMaxNumOfTensors];
44 | };
45 | 
46 | } // namespace shared_config
47 | 
48 | #endif // XCORE_SHARED_CONFIG_H_


--------------------------------------------------------------------------------
/lib_tflite_micro/src/fast_flash_read_loop.S:
--------------------------------------------------------------------------------
  1 | 	.text
  2 | 
  3 |     .globl fast_read_loop
  4 |     .globl fast_read_loop.nstackwords
  5 | 	.section	.cp.rodata.cst4,"aMc",@progbits,4
  6 | 	.cc_top .Const0x01101011.data,.Const0x01101011
  7 | 	.align	4
  8 | 	.type	.Const0x01101011,@object
  9 | 	.size	.Const0x01101011, 4
 10 | .Const0x01101011:
 11 | 	.long	0x01101011
 12 | 	.cc_bottom .Const0x01101011.data
 13 | 
 14 | 	.text
 15 | 	.align	16
 16 | 	.type	fast_read_loop,@function
 17 | 	.cc_top fast_read_loop.function,fast_read_loop
 18 | 
 19 | fast_read_loop:
 20 | 	ENTSP_lu6 8
 21 | 	std   r4, r5, sp[1]
 22 | 	std   r6, r7, sp[2]
 23 | 	std   r8, r9, sp[3]
 24 |     ldc r7, 0
 25 | 	stw   r10, sp[1]
 26 | 	ldw   r4, sp[9]
 27 | 	ldw   r8, sp[11]
 28 | 	ldc   r6, 27
 29 | 	add   r9, r3, r6        // r9 <- 27 + read_adj
 30 | 	shl   r6, r1, 8
 31 |     or    r6, r6, r2        // r6 <- (addr << 8) | mode
 32 | 	byterev r11, r6
 33 | 
 34 |     unzip r11, r6, 2
 35 | 	zip   r6, r11, 2
 36 |                             // r6 <- nibble_swapped(byte_revved(r6))
 37 | 	ldw   r3, r0[2]
 38 | 	ldc   r1, 1
 39 | 	outpw res[r3], r1, 4
 40 | 	ldw   r2, r0[3]
 41 | 	setc  res[r2], 15
 42 | 	syncr res[r3]
 43 | 	setc  res[r2], 7
 44 | 	ldw   r11, r0[0]
 45 | 	out   res[r11], r7
 46 | 	ldc   r0, 28
 47 | 	shl   r7, r6, r0
 48 | 	ldw   r10, cp[.Const0x01101011]
 49 | 	or    r7, r7, r10
 50 | 	out   res[r3], r7
 51 | 	setc  res[r2], 15
 52 | 	shr   r6, r6, 4
 53 | 	setpsc res[r3], r0
 54 | 	out   res[r3], r6
 55 | 	ldc   r6, 18
 56 | 	setpt res[r3], r6
 57 | 	ldw   r6, sp[10]
 58 | 	in    r10, res[r3]
 59 | 	setpt res[r3], r9
 60 |     
 61 |     // This is where Timing matters
 62 | 	in    r0, res[r3]
 63 | 	bf    r8, SendToMemory
 64 | 
 65 | SendToChannel:
 66 | 	out   res[r8], r0
 67 | 	sub   r6, r4, 1
 68 | 	bf    r6, EndOfChanLoop
 69 | ChanLoop:
 70 | 	in    r4, res[r3]
 71 | 	out   res[r8], r4
 72 | 	sub   r6, r6, 1
 73 | 	bt    r6, ChanLoop
 74 | EndOfChanLoop:
 75 | 	outct res[r8], 1
 76 | 	bu    EndOfFunction
 77 |     
 78 | SendToMemory:
 79 | 	stw   r0, r6[0]
 80 | 	sub   r4, r4, 1
 81 | 	bf    r4, EndOfMemoryLoop
 82 | 	add   r6, r6, 4
 83 | MemoryLoop:
 84 | 	in    r5, res[r3]
 85 | 	stw   r5, r6[0]
 86 | 	add   r6, r6, 4
 87 | 	sub   r4, r4, 1
 88 | 	bt    r4, MemoryLoop
 89 | EndOfMemoryLoop:
 90 | EndOfFunction:
 91 | 	setc  res[r2], 7
 92 | 	out   res[r11], r1
 93 | 	ldd   r4, r5, sp[1]
 94 | 	ldd   r6, r7, sp[2]
 95 | 	ldd   r8, r9, sp[3]
 96 | 	ldw   r10, sp[1]
 97 | 	retsp 8
 98 | 
 99 | 	.cc_bottom fast_read_loop.function
100 | 	.set	fast_read_loop.nstackwords,8
101 | 	.set	fast_read_loop.maxcores,1
102 | 	.set	fast_read_loop.maxtimers,0
103 | 	.set	fast_read_loop.maxchanends,0
104 | .Ltmp2:
105 | 	.size	fast_read_loop, .Ltmp2-fast_read_loop
106 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/load_weights.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include "load_weights.h"
 3 | #include "flash_server.h"
 4 | #include "memory_parallel_transport.h"
 5 | 
 6 | void load_weights_synchronous(chanend_t c_flash_or_tile, int *data_ptrs[], int data_sizes_in_words[],
 7 |                               int N, int external_addr, int model_thread_count, thread_info_t *tif) {
 8 |     // Parallel mode is for reading weights from another tile
 9 |     chan_out_word(c_flash_or_tile, FLASH_READ_PARAMETERS);
10 |     int use_parallel_mode = chan_in_word(c_flash_or_tile);
11 |     if (!use_parallel_mode) {
12 |         chan_out_word(c_flash_or_tile, external_addr);
13 | 
14 |         int32_t total_bytes = 0;
15 |         for (int i = 0; i < N; ++i) {
16 |             total_bytes += data_sizes_in_words[i] * 4;
17 |         }
18 |         chan_out_word(c_flash_or_tile, total_bytes);
19 | 
20 |         for (int i = 0; i < N; ++i) {
21 |             int *data_ptr = data_ptrs[i];
22 |             // The sizes are in bytes and we read from flash in words
23 |             int op_data_size_in_words = data_sizes_in_words[i];
24 | #pragma clang loop unroll_count(4)
25 |             for (int j = 0; j < op_data_size_in_words; j++) {
26 |                 // We are reading directly from flash chanend here.
27 |                 // We use chanend_in_word() instead of chan_in_word() to
28 |                 // avoid handshake.
29 |                 // Adding something like a printf() within this loop
30 |                 // might slow it down enough to corrupt the received data.
31 |                 ((uint32_t *)data_ptr)[j] = chanend_in_word(c_flash_or_tile);
32 |             }
33 |         }
34 |         // As there is no handshake, we have to accept the end token
35 |         // to close the chanend
36 |         chanend_check_end_token(c_flash_or_tile);
37 |     } else {
38 |         // The parallel mode uses four threads and can only work if
39 |         // the model has been compiled with at least four threads.
40 |         assert(model_thread_count >= 4 &&
41 |                "At least four threads are required for parallel read from "
42 |                "another tile!");
43 |         chan_out_word(c_flash_or_tile, external_addr);
44 |         chan_out_word(c_flash_or_tile, data_sizes_in_words[0]*4);
45 |         external_addr += data_sizes_in_words[0]*4;
46 |         memory_parallel_receive_thread_call(c_flash_or_tile, (uint32_t *)data_ptrs[0],
47 |                                             4*data_sizes_in_words[0], tif);
48 |         for (int i = 1; i < N; ++i) {
49 |             chan_out_word(c_flash_or_tile, 0);
50 |             chan_in_word(c_flash_or_tile);
51 |             chan_out_word(c_flash_or_tile, external_addr);
52 |             chan_out_word(c_flash_or_tile, data_sizes_in_words[i]*4);
53 |             external_addr += data_sizes_in_words[i]*4;
54 |             memory_parallel_receive_thread_call(c_flash_or_tile, (uint32_t *)data_ptrs[i],
55 |                                                 4*data_sizes_in_words[i], tif);
56 |         }
57 |     }
58 | }
59 | 
60 | void load_weights_asynchronous(chanend_t c_flash_or_tile, int *data_ptrs[], int data_sizes_in_words[],
61 |                               int N, int external_addr) {
62 |     chan_out_word(c_flash_or_tile, FLASH_READ_PARAMETERS_ASYNC);
63 |     chan_out_word(c_flash_or_tile, external_addr);
64 |     chan_out_word(c_flash_or_tile, N);
65 |     
66 |     for (int i = 0; i < N; ++i) {
67 |         chan_out_word(c_flash_or_tile, data_sizes_in_words[i] * 4);
68 |         chan_out_word(c_flash_or_tile, (int) data_ptrs[i]);
69 |     }
70 | }
71 | 
72 | void load_weights_asynchronous_wait(chanend_t c_flash_or_tile) {
73 |     chanend_check_end_token(c_flash_or_tile);
74 | }
75 | 
76 | void load_weights_quit(chanend_t c_flash_or_tile) {
77 |     chan_out_word(c_flash_or_tile, FLASH_SERVER_QUIT);
78 | }
79 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/memory_parallel_transport.c:
--------------------------------------------------------------------------------
 1 | #include <xcore/chanend.h>
 2 | #include <xcore/channel.h>
 3 | #include <xcore/parallel.h>
 4 | #include "thread_call.h"
 5 | #include "memory_parallel_transport.h"
 6 | 
 7 | typedef struct {
 8 |     int whole;
 9 |     uint32_t *data;
10 | } destination_description_t;
11 | 
12 | DECLARE_JOB(receive_rx, (destination_description_t *, chanend_t, int));
13 | DECLARE_JOB(transmit_tx, (chanend_t, int, int, uint32_t *));
14 | 
15 | extern void receive_rx(destination_description_t *d, chanend_t c, int offset);
16 | extern void transmit_tx(chanend_t C, int offset, int n, uint32_t *data);
17 | 
18 | extern void memory_parallel_receive(chanend_t c, uint32_t *data,
19 |                                     uint32_t byte_count) {
20 |     int whole = byte_count / 96;
21 |     int last  = byte_count - whole * 96;
22 |     destination_description_t dest = {whole, data};
23 |     chanend_t other_c[3];
24 |     for(int i = 0; i < 3; i++) {
25 |         other_c[i] = chanend_alloc();
26 |         chan_out_word(c, other_c[i]);
27 |         chanend_t other_side = chan_in_word(c);
28 |         chanend_set_dest(other_c[i], other_side);
29 |     }
30 |     PAR_JOBS(
31 |         PJOB(receive_rx, (&dest, other_c[0], 0)),
32 |         PJOB(receive_rx, (&dest, other_c[1], 1)),
33 |         PJOB(receive_rx, (&dest, other_c[2], 2)),
34 |         PJOB(receive_rx, (&dest,          c, 3))
35 |         );
36 |     for(int i = 0; i < 3; i++) {
37 |         chanend_out_control_token(other_c[i], 1);
38 |         chanend_check_control_token(other_c[i], 1);
39 |     }
40 |     chan_in_buf_word(c, &data[whole*24], last>>2);
41 |     for(int i = 0; i < 3; i++) {
42 |         chanend_free(other_c[i]);
43 |     }
44 | }
45 | 
46 | extern void memory_parallel_send(chanend_t c, uint32_t *data, uint32_t byte_count) {
47 |     int whole = byte_count / 96;
48 |     int last  = byte_count - whole * 96;
49 |     chanend_t other_c[3];
50 |     for(int i = 0; i < 3; i++) {
51 |         other_c[i] = chanend_alloc();
52 |         chanend_t other_side = chan_in_word(c);
53 |         chan_out_word(c, other_c[i]);
54 |         chanend_set_dest(other_c[i], other_side);
55 |     }
56 |     PAR_JOBS(
57 |         PJOB(transmit_tx, (other_c[0], 0, whole, data)),
58 |         PJOB(transmit_tx, (other_c[1], 1, whole, data)),
59 |         PJOB(transmit_tx, (other_c[2], 2, whole, data)),
60 |         PJOB(transmit_tx, (         c, 3, whole, data))
61 |         );
62 |     for(int i = 0; i < 3; i++) {
63 |         chanend_out_control_token(other_c[i], 1);
64 |         chanend_check_control_token(other_c[i], 1);
65 |     }
66 |     chan_out_buf_word(c, &data[whole*24], last>>2);
67 |     for(int i = 0; i < 3; i++) {
68 |         chanend_free(other_c[i]);
69 |     }
70 | }
71 | 
72 | 
73 | extern void memory_parallel_receive_thread_call(chanend_t c, uint32_t *data,
74 |                                                 uint32_t byte_count, thread_info_t *thread_inf) {
75 |     int whole = byte_count / 96;
76 |     int last  = byte_count - whole * 96;
77 |     destination_description_t dest = {whole, data};
78 |     chanend_t other_c[3];
79 |     for(int i = 0; i < 3; i++) {
80 |         other_c[i] = chanend_alloc();
81 |         chan_out_word(c, other_c[i]);
82 |         chanend_t other_side = chan_in_word(c);
83 |         chanend_set_dest(other_c[i], other_side);
84 |     }
85 |     thread_variable_setup((void*)other_c[1], (void*)1, thread_inf->thread_ids.id[0]);
86 |     thread_variable_setup((void*)other_c[2], (void*)2, thread_inf->thread_ids.id[1]);
87 |     thread_variable_setup((void*)c, (void*)3, thread_inf->thread_ids.id[2]);
88 |     thread_call(&dest, (void*)other_c[0], (void*)0, receive_rx, thread_inf);
89 |     for(int i = 0; i < 3; i++) {
90 |         chanend_out_control_token(other_c[i], 1);
91 |         chanend_check_control_token(other_c[i], 1);
92 |     }
93 |     chan_in_buf_word(c, &data[whole*24], last>>2);
94 |     for(int i = 0; i < 3; i++) {
95 |         chanend_free(other_c[i]);
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/memory_transport_ll.S:
--------------------------------------------------------------------------------
 1 |     .issue_mode dual
 2 |     .cc_top transmit_tx.function,transmit_tx
 3 |     .globl transmit_tx
 4 |     .globl transmit_tx.nstackwords
 5 |     .linkset transmit_tx.nstackwords, 0
 6 |     .align 16
 7 | transmit_tx:
 8 |     { dualentsp 0                ; ldc r11, 6*4 }
 9 |     mul r1, r1, r11
10 |     mul r1, r1, r2
11 |     add r1, r3, r1
12 | tx_loop:   
13 |     { out res[r0], r3            ; ldw r3, r1[0] }
14 |     { out res[r0], r3            ; ldw r3, r1[1] }
15 |     { out res[r0], r3            ; ldw r3, r1[2] }
16 |     { nop                         ; sub r2, r2, 1 }
17 |     { out res[r0], r3            ; ldw r3, r1[3] }
18 |     { out res[r0], r3            ; ldw r3, r1[4] }
19 |     { out res[r0], r3            ; ldw r3, r1[5] }
20 |     { bt  r2, tx_loop             ; add r1, r1, r11  }
21 |     { out res[r0], r3            ; nop }
22 |     { out res[r0], r3            ; nop } // dummy out
23 |     { outct res[r0], 1            ; retsp 0 }
24 |     .cc_bottom transmit_tx.function
25 | 
26 |     .cc_top receive_rx.function,receive_rx
27 |     .globl receive_rx
28 |     .globl receive_rx.nstackwords
29 |     .linkset receive_rx.nstackwords, 0
30 |     .align 16
31 |     .skip 4
32 | receive_rx:
33 |     { dualentsp 0                ; ldc r11, 6*4 }
34 |     mul r2, r2, r11
35 |     ldw r3, r0[0]
36 |     mul r2, r2, r3
37 |     ldw r0, r0[1]
38 |     { add r2, r0, r2            ; in r0, res[r1] }  // Dummy in
39 |     in r0, res[r1]
40 | rx_loop:   
41 |     { in r0, res[r1]             ; stw r0, r2[0] }
42 |     { in r0, res[r1]             ; stw r0, r2[1] }
43 |     { in r0, res[r1]             ; stw r0, r2[2] }
44 |     { nop                         ; sub r3, r3, 1 }
45 |     { in r0, res[r1]             ; stw r0, r2[3] }
46 |     { in r0, res[r1]             ; stw r0, r2[4] }
47 |     { in r0, res[r1]             ; stw r0, r2[5] }
48 |     { bt  r3, rx_loop             ; add r2, r2, r11 }
49 |     { chkct res[r1], 1            ; retsp 0 }
50 |     .cc_bottom receive_rx.function
51 |     
52 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/par_invoke_funcs.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "thread_call.h"
 3 | 
 4 | #ifdef NO_INTERPRETER
 5 | 
 6 | #if defined(__xcore__) || defined(__riscv_xxcore)
 7 | #include <xcore/parallel.h>
 8 | DECLARE_JOB(main_task, (thread_info_t *, synchronizer_t));
 9 | DECLARE_JOB(client_task, (thread_info_t *, int));
10 | #endif
11 | 
12 | extern void invoke_subgraph_c_trampoline();
13 | 
14 | void main_task(thread_info_t *t, synchronizer_t sync) {
15 |    thread_store_sync(t, sync);
16 |    invoke_subgraph_c_trampoline();
17 | }
18 | 
19 | void client_task(thread_info_t *t, int n) {
20 |   thread_client(t, n);
21 | }
22 | 
23 | void par_invoke_1(thread_info_t *ti) {
24 | #ifdef __xcore__
25 |   PAR_JOBS(
26 |      PJOB(main_task, (ti, PAR_SYNC)));
27 | #else
28 |   main_task(ti, 0);
29 | #endif
30 | }
31 | 
32 | void par_invoke_2(thread_info_t *ti) {
33 | #ifdef __xcore__
34 |   PAR_JOBS(
35 |      PJOB(main_task, (ti, PAR_SYNC)),
36 |      PJOB(client_task, (ti, 0)));
37 | #else
38 |   client_task(ti, 0);
39 |   main_task(ti, 0);
40 | #endif
41 | }
42 | 
43 | void par_invoke_3(thread_info_t *ti) {
44 | #ifdef __xcore__
45 |   PAR_JOBS(
46 |      PJOB(main_task, (ti, PAR_SYNC)),
47 |      PJOB(client_task, (ti, 0)),
48 |      PJOB(client_task, (ti, 1)));
49 | #else
50 |   client_task(ti, 0);
51 |   client_task(ti, 1);
52 |   main_task(ti, 0);
53 | #endif
54 | }
55 | 
56 | void par_invoke_4(thread_info_t *ti) {
57 | #ifdef __xcore__
58 |   PAR_JOBS(
59 |      PJOB(main_task, (ti, PAR_SYNC)),
60 |      PJOB(client_task, (ti, 0)),
61 |      PJOB(client_task, (ti, 1)),
62 |      PJOB(client_task, (ti, 2)));
63 | #else
64 |   client_task(ti, 0);
65 |   client_task(ti, 1);
66 |   client_task(ti, 2);
67 |   main_task(ti, 0);
68 | #endif
69 | }
70 | 
71 | void par_invoke_5(thread_info_t *ti) {
72 | #ifdef __xcore__
73 |   PAR_JOBS(
74 |      PJOB(main_task, (ti, PAR_SYNC)),
75 |      PJOB(client_task, (ti, 0)),
76 |      PJOB(client_task, (ti, 1)),
77 |      PJOB(client_task, (ti, 2)),
78 |      PJOB(client_task, (ti, 3)));
79 | #else
80 |   client_task(ti, 0);
81 |   client_task(ti, 1);
82 |   client_task(ti, 2);
83 |   client_task(ti, 3);
84 |   main_task(ti, 0);
85 | #endif
86 | }
87 | 
88 | #endif


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/micro_time.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022, XMOS Ltd, All rights reserved
 2 | 
 3 | #include "tensorflow/lite/micro/micro_time.h"
 4 | 
 5 | extern "C" {
 6 | // These are headers from XMOS toolchain.
 7 | #include <platform.h>
 8 | #ifdef _TIME_H_
 9 | #define _clock_defined
10 | #endif
11 | #include <xcore/hwtimer.h>
12 | }
13 | 
14 | namespace tflite_micro {
15 | 
16 | uint32_t ticks_per_second() { return PLATFORM_REFERENCE_HZ; }
17 | 
18 | uint32_t GetCurrentTimeTicks() { return get_reference_time(); }
19 | 
20 | } // namespace tflite_micro
21 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_add.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include "../thread_call.h"
  4 | #include "xcore_config.h"
  5 | #include "xcore_custom_options.h"
  6 | #include "xcore_utils.h"
  7 | extern "C" {
  8 | #include "lib_nn/api/nn_operator.h"
  9 | }
 10 | 
 11 | namespace tflite_micro {
 12 | namespace ops {
 13 | namespace micro {
 14 | namespace xcore {
 15 | namespace add {
 16 | 
 17 | struct AddShared {
 18 |   int8_t *Y;
 19 |   int8_t *X1;
 20 |   int8_t *X2;
 21 |   nn_add_params_t *blob;
 22 | };
 23 | 
 24 | extern "C" {
 25 | void add_thread_worker(void *shared, void *start, void *end) {
 26 |   int *s = static_cast<int *>(start);
 27 |   int *e = static_cast<int *>(end);
 28 |   auto sd = static_cast<AddShared *>(shared);
 29 |   add_elementwise(sd->Y, sd->X1, sd->X2, sd->blob, *s, *e - *s);
 30 | }
 31 | }
 32 | 
 33 | // This is the struct that contains the data required by the operator
 34 | struct AddOpData {
 35 |   nn_add_params_t params;
 36 |   int tc;
 37 |   int s[XCORE_MAX_NUM_THREADS];
 38 |   int e[XCORE_MAX_NUM_THREADS];
 39 | };
 40 | 
 41 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 42 |   auto op_data = construct_persistent_object<AddOpData>(context);
 43 | 
 44 |   auto parser = CustomOptionParser(buffer, length);
 45 |   int m1 = parser.parseNamedCustomOption("m1").AsInt32();
 46 |   int m2 = parser.parseNamedCustomOption("m2").AsInt32();
 47 |   int bias = parser.parseNamedCustomOption("bias").AsInt32();
 48 |   int shift = parser.parseNamedCustomOption("shift").AsInt32();
 49 | 
 50 |   // Broadcast values into vectors
 51 |   // We are VLMACC-ing in 16-bit mode
 52 |   for (int i = 0; i < VPU_INT16_VLMACC_ELMS; i++) {
 53 |     op_data->params.m1[i] = (int16_t)m1;
 54 |     op_data->params.m2[i] = (int16_t)m2;
 55 |     op_data->params.shift[i] = (int16_t)shift;
 56 |     // Split 32-bit bias into two 16-bit values
 57 |     op_data->params.bias_hi[i] = bias >> 16;
 58 |     op_data->params.bias_lo[i] = (int16_t) (bias & 0XFFFF);
 59 |   }
 60 | 
 61 |   return op_data;
 62 | }
 63 | 
 64 | // Does all the requests for scratches
 65 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 66 |   auto *op_data = static_cast<AddOpData *>(node->user_data);
 67 |   MicroContext *micro_context = GetMicroContext(context);
 68 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 69 |       micro_context->external_context());
 70 |   const TfLiteEvalTensor *output =
 71 |       tflite_micro::micro::GetEvalOutput(context, node, 0);
 72 |   int output_size = tflite_micro::micro::GetTensorShape(output).FlatSize();
 73 |   op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, output_size, op_data->s, op_data->e);
 74 |   return kTfLiteOk;
 75 | }
 76 | 
 77 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 78 | 
 79 |   auto *op_data = static_cast<AddOpData *>(node->user_data);
 80 |   // Get Input/Output Tensors
 81 |   const TfLiteEvalTensor *input1 =
 82 |       tflite_micro::micro::GetEvalInput(context, node, 0);
 83 |   const TfLiteEvalTensor *input2 =
 84 |       tflite_micro::micro::GetEvalInput(context, node, 1);
 85 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 86 |   // Pointers to data in In/Out Tensors
 87 |   int8_t *in1_data =
 88 |       const_cast<int8_t *>(tflite_micro::micro::GetTensorData<int8_t>(input1));
 89 |   int8_t *in2_data =
 90 |       const_cast<int8_t *>(tflite_micro::micro::GetTensorData<int8_t>(input2));
 91 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
 92 | 
 93 |   MicroContext *micro_context = GetMicroContext(context);
 94 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 95 |       micro_context->external_context());
 96 |   const int tc = op_data->tc;
 97 |   AddShared shared_data;
 98 |   shared_data.Y = out_data;
 99 |   shared_data.X1 = in1_data;
100 |   shared_data.X2 = in2_data;
101 |   shared_data.blob = &op_data->params;
102 |   for (int t = 0; t < tc - 1; t++) {
103 |     thread_variable_setup((void *)&op_data->s[t], (void *)&op_data->e[t],
104 |                           xc_config->thread_info.thread_ids.id[t]);
105 |   }
106 |   thread_call((void *)&shared_data, &op_data->s[tc - 1], &op_data->e[tc - 1],
107 |               (thread_function_pointer_t)add_thread_worker,
108 |               &xc_config->thread_info);
109 | 
110 |   return kTfLiteOk;
111 | }
112 | 
113 | } // namespace add
114 | 
115 | TFLMRegistration *Register_XC_add() {
116 |   static TFLMRegistration r = {add::Init, nullptr, add::Prepare, add::Eval};
117 |   return &r;
118 | }
119 | 
120 | } // namespace xcore
121 | } // namespace micro
122 | } // namespace ops
123 | } // namespace tflite_micro
124 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_batched_softmax.cc:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "../thread_call.h"
  3 | #include "xcore_config.h"
  4 | #include "xcore_custom_options.h"
  5 | #include "xcore_utils.h"
  6 | 
  7 | extern "C" {
  8 | #include "lib_nn/api/nn_operator.h"
  9 | }
 10 | 
 11 | namespace tflite_micro {
 12 | namespace ops {
 13 | namespace micro {
 14 | namespace xcore {
 15 | namespace softmax_batched {
 16 | 
 17 | struct SoftmaxBatchedShared {
 18 |   int8_t *X, *Y;
 19 |   int softmax_size;
 20 |   float *table;
 21 | };
 22 | 
 23 | extern "C" {
 24 | void softmax_thread_worker(void *shared, void *start, void *count) {
 25 |   int *s = static_cast<int *>(start);
 26 |   int *c = static_cast<int *>(count);
 27 |   auto sd = static_cast<SoftmaxBatchedShared *>(shared);
 28 |   for (int i = 0; i < *c; i++) {
 29 |     const int offset = i * sd->softmax_size + *s;
 30 |     softmax_single(sd->Y + offset, sd->X + offset, sd->table, sd->softmax_size);
 31 |   }
 32 | }
 33 | }
 34 | 
 35 | // This is the struct that contains the data required by the operator
 36 | struct SoftmaxBatchedOpData {
 37 |   int tc;
 38 |   int softmax_size;
 39 |   int starts[XCORE_MAX_NUM_THREADS];
 40 |   int counts[XCORE_MAX_NUM_THREADS];
 41 | };
 42 | 
 43 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 44 |   auto op_data = construct_persistent_object<SoftmaxBatchedOpData>(context);
 45 |   return op_data;
 46 | }
 47 | 
 48 | // Does all the requests for scratches
 49 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 50 |   auto op_data = static_cast<SoftmaxBatchedOpData *>(node->user_data);
 51 |   MicroContext *micro_context = GetMicroContext(context);
 52 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 53 |       micro_context->external_context());
 54 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
 55 |   const int trailing_dim = tflite_micro::micro::GetTensorShape(input).DimensionsCount() - 1;
 56 |   const int num_softmaxes = tflite_micro::micro::GetTensorShape(input).Dims(trailing_dim - 1);
 57 |   op_data->softmax_size = tflite_micro::micro::GetTensorShape(input).Dims(trailing_dim);
 58 |   int starts[XCORE_MAX_NUM_THREADS];
 59 |   int ends[XCORE_MAX_NUM_THREADS];
 60 |   int counts[XCORE_MAX_NUM_THREADS];
 61 |   op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, num_softmaxes, starts, ends);
 62 |   for (int t = 0; t < op_data->tc; t++) {
 63 |     op_data->counts[t] = ends[t] - starts[t];
 64 |     op_data->starts[t] = starts[t] * op_data->softmax_size;
 65 |   }
 66 |   return kTfLiteOk;
 67 | }
 68 | 
 69 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 70 | 
 71 |   auto *op_data = static_cast<SoftmaxBatchedOpData *>(node->user_data);
 72 | 
 73 |   // Get Input/Output Tensors
 74 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
 75 |   const TfLiteEvalTensor *table = tflite_micro::micro::GetEvalInput(context, node, 1);
 76 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 77 | 
 78 |   // Pointers to data in In/Out Tensors
 79 |   const float *table_vals = tflite_micro::micro::GetTensorData<float>(table);
 80 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
 81 |   const int8_t *in_data = tflite_micro::micro::GetTensorData<int8_t>(input);
 82 |   MicroContext *micro_context = GetMicroContext(context);
 83 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 84 |       micro_context->external_context());
 85 |   const int tc = op_data->tc;
 86 |   SoftmaxBatchedShared shared_data;
 87 |   shared_data.Y = out_data;
 88 |   shared_data.X = const_cast<int8_t *>(in_data);
 89 |   shared_data.table = const_cast<float *>(table_vals);
 90 |   shared_data.softmax_size = op_data->softmax_size;
 91 |   for (int t = 0; t < tc - 1; t++) {
 92 |     thread_variable_setup((void *)&op_data->starts[t],
 93 |                           (void *)&op_data->counts[t],
 94 |                           xc_config->thread_info.thread_ids.id[t]);
 95 |   }
 96 |   thread_call((void *)&shared_data, (void *)&op_data->starts[tc - 1],
 97 |               (void *)&op_data->counts[tc - 1],
 98 |               (thread_function_pointer_t)softmax_thread_worker,
 99 |               &xc_config->thread_info);
100 |   return kTfLiteOk;
101 | }
102 | } // namespace softmax_batched
103 | 
104 | TFLMRegistration *Register_XC_batched_softmax() {
105 |   static TFLMRegistration r = {softmax_batched::Init, nullptr,
106 |                                softmax_batched::Prepare, softmax_batched::Eval};
107 |   return &r;
108 | }
109 | 
110 | } // namespace xcore
111 | } // namespace micro
112 | } // namespace ops
113 | } // namespace tflite_micro
114 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_beta_fcf32.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include "../thread_call.h"
  4 | #include "conv2d_float.h"
  5 | #include "xcore_config.h"
  6 | #include "xcore_custom_options.h"
  7 | #include "xcore_utils.h"
  8 | extern "C" {
  9 | #include "lib_nn/api/nn_operator.h"
 10 | }
 11 | 
 12 | namespace tflite_micro {
 13 | namespace ops {
 14 | namespace micro {
 15 | namespace xcore {
 16 | namespace beta_fcf32 {
 17 | 
 18 | // This is the struct that contains the data required by the operator
 19 | struct Beta_FcF32OpData {
 20 |   int tc;
 21 |   int s[XCORE_MAX_NUM_THREADS];
 22 |   int e[XCORE_MAX_NUM_THREADS];
 23 | };
 24 | 
 25 | struct Beta_FcF32Shared {
 26 |   float *out;
 27 |   float *in;
 28 |   float *kernels;
 29 |   int out_f;
 30 |   int in_f;
 31 | };
 32 | 
 33 | extern "C" {
 34 | void beta_fcf32_thread_worker(void *shared, void *start, void *end) {
 35 |   int *s = static_cast<int *>(start);
 36 |   int *e = static_cast<int *>(end);
 37 |   auto sd = static_cast<Beta_FcF32Shared *>(shared);
 38 |   #if defined(NN_USE_REF) || defined(__VX4A__)
 39 |   xc_fc_float_ref(sd->out, sd->in, sd->kernels, sd->out_f,
 40 |                   sd->in_f);
 41 |   #else
 42 |   xc_fc_float_opt(sd->out, sd->in, sd->kernels, sd->out_f,
 43 |                   sd->in_f, *s, *e);
 44 |   #endif
 45 | }
 46 | }
 47 | 
 48 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 49 |   auto op_data = construct_persistent_object<Beta_FcF32OpData>(context);
 50 | 
 51 |   return op_data;
 52 | }
 53 | 
 54 | // Does all the requests for scratches
 55 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 56 |   auto op_data = static_cast<Beta_FcF32OpData *>(node->user_data);
 57 |   MicroContext *micro_context = GetMicroContext(context);
 58 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 59 |       micro_context->external_context());
 60 |   const TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 61 |   int out_f = output->dims->data[1];
 62 |   op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, out_f, op_data->s, op_data->e);
 63 |   return kTfLiteOk;
 64 | }
 65 | 
 66 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 67 |   auto op_data = static_cast<Beta_FcF32OpData *>(node->user_data);
 68 |   // Get Input/Output Tensors
 69 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
 70 |   const TfLiteEvalTensor *kernels =
 71 |       tflite_micro::micro::GetEvalInput(context, node, 1);
 72 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 73 | 
 74 |   int out_f = output->dims->data[1];
 75 | 
 76 |   int in_f = input->dims->data[1];
 77 | 
 78 |   // Pointers to data in In/Out Tensors
 79 |   float *out_data = tflite_micro::micro::GetTensorData<float>(output);
 80 |   float *in_data =
 81 |       const_cast<float *>(tflite_micro::micro::GetTensorData<float>(input));
 82 |   float *kernel_data =
 83 |       const_cast<float *>(tflite_micro::micro::GetTensorData<float>(kernels));
 84 | 
 85 |   MicroContext *micro_context = GetMicroContext(context);
 86 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 87 |       micro_context->external_context());
 88 | 
 89 |   // todo - this second for-loop is unpleasant
 90 |   for (int t = 0; t < op_data->tc - 1; ++t) {
 91 |     thread_variable_setup(&op_data->s[t], &op_data->e[t],
 92 |                           xc_config->thread_info.thread_ids.id[t]);
 93 |   }
 94 | 
 95 |   Beta_FcF32Shared shared_data;
 96 |   shared_data.out = out_data;
 97 |   shared_data.in = in_data;
 98 |   shared_data.kernels = kernel_data;
 99 |   shared_data.out_f = out_f;
100 |   shared_data.in_f = in_f;
101 | 
102 |   // Now set up shared data, shared function pointer, and data for final thread.
103 |   thread_call((void *)&shared_data, &op_data->s[op_data->tc - 1], &op_data->e[op_data->tc - 1],
104 |               (thread_function_pointer_t)beta_fcf32_thread_worker,
105 |               &xc_config->thread_info);
106 | 
107 |   return kTfLiteOk;
108 | }
109 | 
110 | } // namespace beta_fcf32
111 | 
112 | TFLMRegistration *Register_XC_beta_fcf32() {
113 |   static TFLMRegistration r = {beta_fcf32::Init, nullptr,
114 |                                     beta_fcf32::Prepare, beta_fcf32::Eval};
115 |   return &r;
116 | }
117 | 
118 | } // namespace xcore
119 | } // namespace micro
120 | } // namespace ops
121 | } // namespace tflite_micro
122 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_broadcast.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include <cstdint>
  4 | extern "C" {
  5 | #include "vpu_memmove_word_aligned.h"
  6 | #include "vpu_memset_256.h"
  7 | }
  8 | 
  9 | #include "xcore_custom_options.h"
 10 | #include "xcore_utils.h"
 11 | 
 12 | namespace tflite_micro {
 13 | namespace ops {
 14 | namespace micro {
 15 | namespace xcore {
 16 | namespace broadcast {
 17 | 
 18 | struct BroadcastOpData {
 19 |   int32_t size;
 20 |   int32_t num_copies;
 21 |   int32_t num_broadcasts;
 22 |   void (*func_ptr)(void *, const void *, unsigned);
 23 | };
 24 | 
 25 | void memmove_wrapper(void *dst, const void *src, unsigned size) {
 26 |   memmove(dst, src, size);
 27 | }
 28 | 
 29 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 30 |   auto op_data = construct_persistent_object<BroadcastOpData>(context);
 31 |   auto parser = CustomOptionParser(buffer, length);
 32 |   op_data->size = parser.parseNamedCustomOption("s").AsInt32();
 33 |   op_data->num_copies = parser.parseNamedCustomOption("n").AsInt32();
 34 |   op_data->num_broadcasts = parser.parseNamedCustomOption("b").AsInt32();
 35 |   bool use_vpu = parser.parseNamedCustomOption("v").AsBool();
 36 |   op_data->func_ptr = use_vpu ? vpu_memmove_word_aligned : memmove_wrapper;
 37 |   return op_data;
 38 | }
 39 | 
 40 | // Does all the requests for scratches
 41 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 42 |   return kTfLiteOk;
 43 | }
 44 | 
 45 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 46 |   auto *op_data = static_cast<BroadcastOpData *>(node->user_data);
 47 |   // Get Input/Output Tensors
 48 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
 49 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 50 |   // Pointers to data in In/Out Tensors
 51 |   const int8_t *in_data = tflite_micro::micro::GetTensorData<int8_t>(input);
 52 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
 53 |   const int size = op_data->size;
 54 |   const int num_copies = op_data->num_copies;
 55 |   const int num_broadcasts = op_data->num_broadcasts;
 56 |   if (size == 1 && num_copies < 64) {
 57 |     for (int i = 0; i < num_broadcasts; i++) {
 58 |       memset(out_data, *in_data, num_copies);
 59 |       out_data += num_copies;
 60 |       in_data++;
 61 |     }
 62 |     return kTfLiteOk;
 63 |   }
 64 |   if ((size != 1 && size != 2 && size != 4) || num_copies < 64) {
 65 |     void (*func_ptr)(void *, const void *, unsigned) = op_data->func_ptr;
 66 |     for (int i = 0; i < num_broadcasts; i++) {
 67 |       for (int j = 0; j < num_copies; j++) {
 68 |         func_ptr(out_data, in_data, size);
 69 |         out_data += size;
 70 |       }
 71 |       in_data += size;
 72 |     }
 73 | 
 74 |     return kTfLiteOk;
 75 |   }
 76 |   uint32_t c;
 77 |   uint8_t from[32];
 78 |   for (int i = 0; i < num_broadcasts; i++) {
 79 |     switch (size) {
 80 |     case 1:
 81 |       // c = ins[0] * 0x01010101;
 82 |       c = ((uint8_t)(*in_data)) * 0x01010101;
 83 |       break;
 84 |     case 2:
 85 |       c = ((uint8_t)(*in_data) | ((uint8_t)(in_data[1]) << 8)) * 0x00010001;
 86 |       break;
 87 |     case 4:
 88 |       c = ((uint8_t)(*in_data) | ((uint8_t)(in_data[1]) << 8) |
 89 |            ((uint8_t)(in_data[2]) << 16) | ((uint8_t)(in_data[3]) << 24));
 90 |       break;
 91 |     }
 92 |     broadcast_32_to_256(from, c);
 93 |     vpu_memset_256(out_data, from, num_copies * size);
 94 |     out_data += num_copies * size;
 95 |     in_data += size;
 96 |   }
 97 |   return kTfLiteOk;
 98 | }
 99 | } // namespace broadcast
100 | 
101 | TFLMRegistration *Register_XC_broadcast() {
102 |   static TFLMRegistration r = {broadcast::Init, nullptr, broadcast::Prepare,
103 |                                broadcast::Eval};
104 |   return &r;
105 | }
106 | 
107 | } // namespace xcore
108 | } // namespace micro
109 | } // namespace ops
110 | } // namespace tflite_micro
111 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_bsign.cc:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include "flatbuffers/flexbuffers.h"
  4 | #include "tensorflow/lite/c/common.h"
  5 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
  6 | #include "tensorflow/lite/kernels/kernel_util.h"
  7 | #include "tensorflow/lite/micro/kernels/kernel_util.h"
  8 | #include "xcore_custom_options.h"
  9 | #include "xcore_utils.h"
 10 | 
 11 | extern "C" {
 12 | #include "lib_nn/api/nn_operator.h"
 13 | }
 14 | 
 15 | namespace tflite_micro {
 16 | namespace ops {
 17 | namespace micro {
 18 | namespace xcore {
 19 | namespace bsign {
 20 | 
 21 | // -------------------------------------------------------------------- //
 22 | // kernel argument type
 23 | // -------------------------------------------------------------------- //
 24 | 
 25 | struct BSign8Args {
 26 |   int32_t *Y;
 27 |   const int8_t *X;
 28 |   int8_t zero_point_vec[VPU_INT8_EPV];
 29 | };
 30 | 
 31 | // -------------------------------------------------------------------- //
 32 | // thread data type and worker functions
 33 | // -------------------------------------------------------------------- //
 34 | 
 35 | struct BSign8ThreadData {
 36 |   const BSign8Args *args;
 37 |   const nn_bsign_8_job_t *job;
 38 | };
 39 | 
 40 | extern "C" {
 41 | void bsign_8_thread_worker(void *context) {
 42 |   auto *td = (BSign8ThreadData *)context;
 43 |   auto *args = td->args;
 44 |   bsign_8(args->Y, args->X, args->zero_point_vec, td->job);
 45 | }
 46 | }
 47 | 
 48 | // -------------------------------------------------------------------- //
 49 | // op data types
 50 | // -------------------------------------------------------------------- //
 51 | 
 52 | struct BSign8OpData {
 53 |   BSign8Args args;
 54 |   PersistentArray<nn_bsign_8_job_t> jobs;
 55 |   PersistentArray<BSign8ThreadData> threads;
 56 | };
 57 | 
 58 | // -------------------------------------------------------------------- //
 59 | // op function implementations
 60 | // -------------------------------------------------------------------- //
 61 | 
 62 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 63 |   auto *op_data = construct_persistent_object<BSign8OpData>(context);
 64 | 
 65 |   // TODO parse data for parallelism
 66 |   // in this op we have one job per thread
 67 |   int n_threads = 1;
 68 |   op_data->jobs.allocate(context, n_threads)
 69 |       .initialize();                             // TODO: REMOVE ALL OF THIS
 70 |   op_data->threads.allocate(context, n_threads); // SHOULD BE NOTHING LEFT.
 71 |   for (auto &job : op_data->jobs) {
 72 |     op_data->threads.append({&op_data->args, &job});
 73 |   }
 74 | 
 75 |   return op_data;
 76 | }
 77 | 
 78 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 79 |   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
 80 |   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 81 | 
 82 |   auto *op_data = reinterpret_cast<BSign8OpData *>(node->user_data);
 83 |   MicroContext *micro_context = GetMicroContext(context);
 84 |   TfLiteTensor *input = micro_context->AllocateTempInputTensor(node, 0);
 85 |   TF_LITE_ENSURE(context, input != nullptr);
 86 | 
 87 |   const int32_t input_size = EvalTensorBytes((const TfLiteEvalTensor*)input) / sizeof(int8_t);
 88 |   bsign_8_prepare(op_data->jobs.begin(), op_data->args.zero_point_vec,
 89 |                   input_size, input->params.zero_point, op_data->jobs.size());
 90 | 
 91 |   micro_context->DeallocateTempTfLiteTensor(input);
 92 | 
 93 |   return kTfLiteOk;
 94 | }
 95 | 
 96 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 97 |   auto *op_data = reinterpret_cast<BSign8OpData *>(node->user_data);
 98 | 
 99 |   op_data->args.X = tflite_micro::micro::GetTensorData<int8_t>(
100 |       tflite_micro::micro::GetEvalInput(context, node, 0));
101 |   op_data->args.Y = tflite_micro::micro::GetTensorData<int32_t>(
102 |       tflite_micro::micro::GetEvalOutput(context, node, 0));
103 | 
104 |   for (auto &thread : op_data->threads) { // TODO: remove - only 1 task!
105 |     bsign_8_thread_worker(reinterpret_cast<void *>(&thread));
106 |   }
107 | 
108 |   return kTfLiteOk;
109 | }
110 | 
111 | } // namespace bsign
112 | 
113 | TFLMRegistration *Register_XC_bsign_8() {
114 |   static TFLMRegistration r = {bsign::Init, nullptr, bsign::Prepare,
115 |                                  bsign::Eval};
116 |   return &r;
117 | }
118 | 
119 | } // namespace xcore
120 | } // namespace micro
121 | } // namespace ops
122 | } // namespace tflite_micro
123 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <cstring>
 3 | #include <cstdarg>
 4 | #include <algorithm>
 5 | 
 6 | #if defined __GNUC__
 7 | #define ALIGN(X) __attribute__((aligned(X)))
 8 | #elif defined _MSC_VER
 9 | #define ALIGN(X) __declspec(align(X))
10 | #elif defined __TASKING__
11 | #define ALIGN(X) __align(X)
12 | #endif
13 | 
14 | #define MAX_DEBUG_LOG_LENGTH 256
15 | #define MAX_DEBUG_LOG_ENTRIES 3
16 | 
17 | int debug_log_index = 0;
18 | char ALIGN(4) debug_log_buffer[MAX_DEBUG_LOG_LENGTH * MAX_DEBUG_LOG_ENTRIES];
19 | 
20 | extern "C" void DebugLog(const char* format, va_list args) {
21 |   vsnprintf(&debug_log_buffer[debug_log_index * MAX_DEBUG_LOG_ENTRIES], MAX_DEBUG_LOG_LENGTH, format, args);
22 |   printf("%s", &debug_log_buffer[debug_log_index * MAX_DEBUG_LOG_ENTRIES]);
23 |   debug_log_index++;
24 |   if (debug_log_index == MAX_DEBUG_LOG_ENTRIES)
25 |     debug_log_index = 0;
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.cc:
--------------------------------------------------------------------------------
 1 | #include "xcore_custom_options.h"
 2 | 
 3 | namespace tflite_micro {
 4 | namespace ops {
 5 | namespace micro {
 6 | namespace xcore {
 7 | 
 8 | CustomOptionParser::CustomOptionParser(const flexbuffers::Map &map)
 9 |     : keys_(flexbuffers::TypedVector::EmptyTypedVector()),
10 |       values_(flexbuffers::Vector::EmptyVector()) {
11 |   keys_ = map.Keys();
12 |   values_ = map.Values();
13 | }
14 | 
15 | CustomOptionParser::CustomOptionParser(const char *buffer, size_t buffer_length)
16 |     : CustomOptionParser::CustomOptionParser(
17 |           flexbuffers::GetRoot(reinterpret_cast<const uint8_t *>(buffer),
18 |                                buffer_length)
19 |               .AsMap()) {
20 |   assert(buffer != nullptr);
21 |   assert(buffer_length > 0);
22 | }
23 | 
24 | flexbuffers::Reference
25 | CustomOptionParser::parseNamedCustomOption(const char *name) const {
26 |   for (int i = 0; i < keys_.size(); ++i) {
27 |     const auto &key = keys_[i].AsString().c_str();
28 |     if (strcmp(key, name) == 0) {
29 |       return values_[i];
30 |     }
31 |   }
32 |   return flexbuffers::Reference(nullptr, 1, flexbuffers::NullPackedType());
33 | }
34 | 
35 | } // namespace xcore
36 | } // namespace micro
37 | } // namespace ops
38 | } // namespace tflite_micro
39 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h:
--------------------------------------------------------------------------------
 1 | #ifndef XCORE_CUSTOM_OPTIONS_H_
 2 | #define XCORE_CUSTOM_OPTIONS_H_
 3 | 
 4 | #include "flatbuffers/flexbuffers.h"
 5 | #include "xcore_ops.h"
 6 | 
 7 | namespace tflite_micro {
 8 | namespace ops {
 9 | namespace micro {
10 | namespace xcore {
11 | 
12 | class CustomOptionParser {
13 | private:
14 |   flexbuffers::TypedVector keys_;
15 |   flexbuffers::Vector values_;
16 | 
17 | public:
18 |   CustomOptionParser(const flexbuffers::Map &map);
19 |   CustomOptionParser(const char *buffer, size_t buffer_length);
20 |   flexbuffers::Reference parseNamedCustomOption(const char *name) const;
21 | };
22 | 
23 | } // namespace xcore
24 | } // namespace micro
25 | } // namespace ops
26 | } // namespace tflite_micro
27 | 
28 | #endif // XCORE_CUSTOM_OPTIONS_H_
29 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.cc:
--------------------------------------------------------------------------------
 1 | #include "xcore_error_reporter.h"
 2 | 
 3 | #include <cstdarg>
 4 | #include <cstdint>
 5 | #include <cstdio>
 6 | #include <cstring>
 7 | #include <new>
 8 | 
 9 | #ifdef __xcore__
10 | #include<print.h>
11 | #endif
12 | 
13 | //#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
14 | #include "tensorflow/lite/micro/debug_log.h"
15 | //#endif
16 | 
17 | namespace tflite_micro {
18 | namespace micro {
19 | namespace xcore {
20 | 
21 | void XCoreErrorReporter::Init(char *debugBuffer, int debugBufferLength) {
22 |   buffer = debugBuffer;
23 |   max_len = debugBufferLength;
24 |   memset(debugBuffer, 0, max_len);
25 | }
26 | 
27 | void XCoreErrorReporter::Log(const char *format, va_list args) {
28 | #if !defined(TF_LITE_STRIP_ERROR_STRINGS)
29 |   static constexpr int kMaxLogLen = 256;
30 |   if (len + kMaxLogLen > max_len) {
31 |     int new_len = max_len - kMaxLogLen;
32 |     if (new_len < 0) {
33 |       new_len = 0;
34 |     }
35 |     for (int i = 0; i <= new_len; i++) {
36 |       buffer[i] = buffer[i - new_len + len];
37 |     }
38 |     len = new_len;
39 |   }
40 |   vsprintf(buffer + len, format, args);
41 |   len = strlen(buffer);
42 | #ifdef __xcore__
43 |   printstr(buffer);
44 | #else
45 |   printf("%s", buffer);
46 | #endif
47 | 
48 | #endif
49 | }
50 | 
51 | int XCoreErrorReporter::Report(const char *format, va_list args) {
52 |   Log(format, args);
53 |   return 0;
54 | }
55 | 
56 | } // namespace xcore
57 | } // namespace micro
58 | } // namespace tflite_micro
59 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019, XMOS Ltd, All rights reserved
 2 | 
 3 | #ifndef XCORE_ERROR_REPORTER_H_
 4 | #define XCORE_ERROR_REPORTER_H_
 5 | 
 6 | #include "tensorflow/lite/micro/compatibility.h"
 7 | #include "tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h"
 8 | 
 9 | namespace tflite_micro {
10 | namespace micro {
11 | namespace xcore {
12 | 
13 | class XCoreErrorReporter : public tflite_micro::MicroErrorReporter {
14 | public:
15 |   explicit XCoreErrorReporter(){};
16 |   ~XCoreErrorReporter() override = default;
17 |   void Init(char *debugBuffer, int debugBufferLength);
18 |   void Log(const char *format, va_list args);
19 |   int Report(const char *format, va_list args) override;
20 | 
21 | private:
22 |   char *buffer;
23 |   int max_len;
24 |   int len = 0;
25 |   TF_LITE_REMOVE_VIRTUAL_DELETE
26 | };
27 | 
28 | } // namespace xcore
29 | } // namespace micro
30 | } // namespace tflite_micro
31 | 
32 | #endif // XCORE_ERROR_REPORTER_H_
33 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_expand_8_to_16.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include "../thread_call.h"
  4 | #include "xcore_config.h"
  5 | #include "xcore_custom_options.h"
  6 | #include "xcore_utils.h"
  7 | extern "C" {
  8 | #include "lib_nn/api/nn_operator.h"
  9 | #include "lib_nn/api/expand_8_to_16.h"
 10 | }
 11 | 
 12 | namespace tflite_micro {
 13 | namespace ops {
 14 | namespace micro {
 15 | namespace xcore {
 16 | namespace expand_8to16 {
 17 | 
 18 | struct Expand_8_To_16Shared {
 19 |   int8_t *X;
 20 |   int16_t *Y;
 21 | };
 22 | 
 23 | extern "C" {
 24 | void expand_8_to_16_thread_worker(void *shared, void *start, void *count) {
 25 |   int *s = static_cast<int *>(start);
 26 |   int *c = static_cast<int *>(count);
 27 |   auto sd = static_cast<Expand_8_To_16Shared *>(shared);
 28 |   expand_8_to_16(sd->Y + *s, sd->X + *s, *c);
 29 | }
 30 | }
 31 | 
 32 | // This is the struct that contains the data required by the operator
 33 | struct Expand_8_To_16OpData {
 34 |   int tc;
 35 |   int start[XCORE_MAX_NUM_THREADS];
 36 |   int count[XCORE_MAX_NUM_THREADS];
 37 | };
 38 | 
 39 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 40 |   auto op_data = construct_persistent_object<Expand_8_To_16OpData>(context);
 41 |   return op_data;
 42 | }
 43 | 
 44 | // Does all the requests for scratches
 45 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 46 |   auto op_data = static_cast<Expand_8_To_16OpData *>(node->user_data);
 47 |   MicroContext *micro_context = GetMicroContext(context);
 48 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 49 |       micro_context->external_context());
 50 |   const TfLiteEvalTensor *input =
 51 |       tflite_micro::micro::GetEvalInput(context, node, 0);
 52 |   int input_size = tflite_micro::micro::GetTensorShape(input).FlatSize();
 53 |   op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, input_size, op_data->start, op_data->count);
 54 |   for (int t = 0; t < op_data->tc; t++) {
 55 |     op_data->count[t] = op_data->count[t] - op_data->start[t];
 56 |   }
 57 |   return kTfLiteOk;
 58 | }
 59 | 
 60 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 61 |   auto *op_data = static_cast<Expand_8_To_16OpData *>(node->user_data);
 62 | 
 63 |   // Get Input/Output Tensors
 64 |   const TfLiteEvalTensor *input =
 65 |       tflite_micro::micro::GetEvalInput(context, node, 0);
 66 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 67 | 
 68 |   // Pointers to data in In/Out Tensors
 69 |   const int8_t *in_data = tflite_micro::micro::GetTensorData<int8_t>(input);
 70 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
 71 | 
 72 |   MicroContext *micro_context = GetMicroContext(context);
 73 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 74 |       micro_context->external_context());
 75 |   const int tc = op_data->tc;
 76 |   Expand_8_To_16Shared shared_data;
 77 |   shared_data.X = const_cast<int8_t *>(in_data);
 78 |   shared_data.Y = (int16_t *)out_data;
 79 |   for (int t = 0; t < tc - 1; t++) {
 80 |     thread_variable_setup((void *)&op_data->start[t], (void *)&op_data->count[t],
 81 |                           xc_config->thread_info.thread_ids.id[t]);
 82 |   }
 83 |   thread_call((void *)&shared_data, &op_data->start[tc - 1], &op_data->count[tc - 1],
 84 |               (thread_function_pointer_t)expand_8_to_16_thread_worker,
 85 |               &xc_config->thread_info);
 86 |   return kTfLiteOk;
 87 | }
 88 | 
 89 | } // namespace expand_8to16
 90 | 
 91 | TFLMRegistration *Register_XC_expand_8_to_16() {
 92 |   static TFLMRegistration r = {expand_8to16::Init, nullptr, expand_8to16::Prepare,
 93 |                                expand_8to16::Eval};
 94 |   return &r;
 95 | }
 96 | 
 97 | } // namespace xcore
 98 | } // namespace micro
 99 | } // namespace ops
100 | } // namespace tflite_micro
101 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020, XMOS Ltd, All rights reserved
 2 | #ifndef XCORE_INTERPRETER_H_
 3 | #define XCORE_INTERPRETER_H_
 4 | 
 5 | #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
 6 | #include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
 7 | #include "tensorflow/lite/micro/micro_allocator.h"
 8 | #include "tensorflow/lite/micro/micro_interpreter.h"
 9 | #include "xcore_profiler.h"
10 | 
11 | namespace tflite_micro {
12 | namespace micro {
13 | namespace xcore {
14 | 
15 | class XCoreInterpreter : public tflite_micro::MicroInterpreter {
16 | public:
17 |   XCoreInterpreter(const tflite_micro::Model *model,
18 |                    const tflite_micro::MicroOpResolver &resolver,
19 |                    tflite_micro::MicroAllocator *allocator,
20 |                    bool use_curent_thread = true,
21 |                    XCoreProfiler *profiler = nullptr);
22 | 
23 |   static XCoreInterpreter *
24 |   Create(uint8_t interpreter_buffer[], const tflite_micro::Model *model,
25 |          const tflite_micro::MicroOpResolver &resolver, uint8_t *arena,
26 |          size_t arena_size, bool use_current_thread, XCoreProfiler *profiler);
27 | 
28 |   void PrintMemoryPlan();
29 |   TfLiteTensor *tensor(size_t tensor_index);
30 |   const char *node_name(int sub_idx, int i);
31 | 
32 |   TfLiteStatus GetTensorDetails(size_t tensor_index, char *name, int name_len,
33 |                                 int *shape, int *type, float *scale,
34 |                                 int32_t *zero_point);
35 | 
36 |   TfLiteStatus GetTensorDetailsBufferSizes(size_t tensor_index, size_t *dims,
37 |                                            size_t *scales, size_t *zero_points);
38 | 
39 |   size_t input_tensor_index(size_t input_index);
40 |   size_t output_tensor_index(size_t output_index);
41 |   const Model *model__;
42 |   MicroAllocator *allocator_;
43 | };
44 | 
45 | } // namespace xcore
46 | } // namespace micro
47 | } // namespace tflite_micro
48 | 
49 | #endif // XCORE_INTERPRETER_H_
50 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_load_store_tensor.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
 2 | 
 3 | #include "xcore_config.h"
 4 | #include "xcore_custom_options.h"
 5 | #include "xcore_utils.h"
 6 | extern "C" {
 7 | #include "nn_op_utils.h"
 8 | #include "lib_nn/api/nn_layers.h"
 9 | }
10 | 
11 | namespace tflite_micro {
12 | namespace ops {
13 | namespace micro {
14 | namespace xcore {
15 | namespace load_store_tensor {
16 | 
17 | // This is the struct that contains the data required by the operator
18 | struct OpData {
19 |   uint32_t addr;
20 |   uint32_t size;
21 | };
22 | 
23 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
24 |   auto op_data = construct_persistent_object<OpData>(context);
25 |   
26 |   auto parser = CustomOptionParser(buffer, length);
27 |   op_data->addr = parser.parseNamedCustomOption("a").AsInt32();
28 |   op_data->size = parser.parseNamedCustomOption("s").AsInt32();
29 | 
30 |   MicroContext *micro_context = GetMicroContext(context);
31 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
32 |       micro_context->external_context());
33 |   assert(true);
34 |   return op_data;
35 | }
36 | 
37 | TfLiteStatus Eval_Store(TfLiteContext *context, TfLiteNode *node) {
38 |   auto *op_data = static_cast<OpData *>(node->user_data);
39 |   MicroContext *micro_context = GetMicroContext(context);
40 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
41 |       micro_context->external_context());
42 | 
43 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
44 |   const int8_t *data_ptr = tflite_micro::micro::GetTensorData<int8_t>(input);
45 |   vpu_memcpy_ext(((int8_t *)xc_config->paging_ptr) + op_data->addr, data_ptr,
46 |            op_data->size);
47 |   return kTfLiteOk;
48 | }
49 | 
50 | TfLiteStatus Eval_Load(TfLiteContext *context, TfLiteNode *node) {
51 |   auto *op_data = static_cast<OpData *>(node->user_data);
52 |   MicroContext *micro_context = GetMicroContext(context);
53 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
54 |       micro_context->external_context());
55 | 
56 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
57 |   int8_t *data_ptr = tflite_micro::micro::GetTensorData<int8_t>(output);
58 |   
59 |   int output_size = EvalTensorBytes(output);
60 |   assert(output_size == op_data->size);
61 |   
62 |   vpu_memcpy_ext((void *)data_ptr,
63 |            ((int8_t *)xc_config->paging_ptr) + op_data->addr,
64 |            op_data->size);
65 |   return kTfLiteOk;
66 | }
67 | 
68 | } // namespace load_store_tensor
69 | 
70 | TFLMRegistration *Register_XC_store_tensor() {
71 |   static TFLMRegistration r = {load_store_tensor::Init, nullptr, nullptr,
72 |                                     load_store_tensor::Eval_Store};
73 |   return &r;
74 | }
75 | 
76 | TFLMRegistration *Register_XC_load_tensor() {
77 |   static TFLMRegistration r = {load_store_tensor::Init, nullptr, nullptr,
78 |                                     load_store_tensor::Eval_Load};
79 |   return &r;
80 | }
81 | 
82 | } // namespace xcore
83 | } // namespace micro
84 | } // namespace ops
85 | } // namespace tflite_micro
86 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_lookup.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include "../thread_call.h"
  4 | #include "xcore_config.h"
  5 | #include "xcore_utils.h"
  6 | extern "C" {
  7 | #include "lib_nn/api/nn_operator.h"
  8 | #include "lib_nn/api/quadratic_interpolation.h"
  9 | }
 10 | 
 11 | namespace tflite_micro {
 12 | namespace ops {
 13 | namespace micro {
 14 | namespace xcore {
 15 | namespace lookup {
 16 | 
 17 | struct LookupShared {
 18 |   uint8_t *X;
 19 |   uint8_t *Y;
 20 |   uint8_t *table;
 21 | };
 22 | 
 23 | extern "C" {
 24 | void lookup8_thread_worker(void *shared, void *start, void *end) {
 25 |   int *s = static_cast<int *>(start);
 26 |   int *e = static_cast<int *>(end);
 27 |   auto sd = static_cast<LookupShared *>(shared);
 28 |   // lookup takes start and count instead of start and end
 29 |   lookup8(sd->Y, sd->X, sd->table, *s, *e - *s);
 30 | }
 31 | 
 32 | void lookup16_thread_worker(void *shared, void *start, void *end) {
 33 |   int *s = static_cast<int *>(start);
 34 |   int *e = static_cast<int *>(end);
 35 |   auto sd = static_cast<LookupShared *>(shared);
 36 |   // output and input pointers are adjusted with thread start
 37 |   quadratic_interpolation_128((int16_t *)sd->Y + *s, (int16_t *)sd->X + *s,
 38 |                               sd->table, *e - *s);
 39 | }
 40 | }
 41 | // This is the struct that contains the data required by the operator
 42 | struct LookupOpData {
 43 |   int tc;
 44 |   int s[XCORE_MAX_NUM_THREADS];
 45 |   int e[XCORE_MAX_NUM_THREADS];
 46 | };
 47 | 
 48 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 49 |   auto op_data = construct_persistent_object<LookupOpData>(context);
 50 |   return op_data;
 51 | }
 52 | 
 53 | // Does all the requests for scratches
 54 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 55 |   auto op_data = static_cast<LookupOpData *>(node->user_data);
 56 |   MicroContext *micro_context = GetMicroContext(context);
 57 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 58 |       micro_context->external_context());
 59 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
 60 |   int input_size = tflite_micro::micro::GetTensorShape(input).FlatSize();
 61 |   op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, input_size, op_data->s, op_data->e);
 62 |   return kTfLiteOk;
 63 | }
 64 | 
 65 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 66 | 
 67 |   auto *op_data = static_cast<LookupOpData *>(node->user_data);
 68 | 
 69 |   // Get Input/Output Tensors
 70 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
 71 |   const TfLiteEvalTensor *table = tflite_micro::micro::GetEvalInput(context, node, 1);
 72 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 73 | 
 74 |   // Pointers to data in In/Out Tensors
 75 |   const uint8_t *table_vals = tflite_micro::micro::GetTensorData<uint8_t>(table);
 76 |   uint8_t *out_data = tflite_micro::micro::GetTensorData<uint8_t>(output);
 77 |   const uint8_t *in_data = tflite_micro::micro::GetTensorData<uint8_t>(input);
 78 |   MicroContext *micro_context = GetMicroContext(context);
 79 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 80 |       micro_context->external_context());
 81 |   const int tc = op_data->tc;
 82 |   LookupShared shared_data;
 83 |   shared_data.Y = out_data;
 84 |   shared_data.X = const_cast<uint8_t *>(in_data);
 85 |   shared_data.table = const_cast<uint8_t *>(table_vals);
 86 |   for (int t = 0; t < tc - 1; t++) {
 87 |     thread_variable_setup((void *)&op_data->s[t], (void *)&op_data->e[t],
 88 |                           xc_config->thread_info.thread_ids.id[t]);
 89 |   }
 90 | 
 91 |   thread_function_pointer_t fn;
 92 |   switch (input->type) {
 93 |   case kTfLiteInt8: {
 94 |     fn = lookup8_thread_worker;
 95 |     break;
 96 |   }
 97 |   case kTfLiteInt16: {
 98 |     fn = lookup16_thread_worker;
 99 |     break;
100 |   }
101 |   default: {
102 |     return kTfLiteError;
103 |   }
104 |   }
105 | 
106 |   thread_call((void *)&shared_data, &op_data->s[tc - 1], &op_data->e[tc - 1],
107 |               (thread_function_pointer_t)fn, &xc_config->thread_info);
108 |   return kTfLiteOk;
109 | }
110 | 
111 | } // namespace lookup
112 | 
113 | TFLMRegistration *Register_XC_lookup() {
114 |   static TFLMRegistration r = {lookup::Init, nullptr, lookup::Prepare,
115 |                                lookup::Eval};
116 |   return &r;
117 | }
118 | 
119 | } // namespace xcore
120 | } // namespace micro
121 | } // namespace ops
122 | } // namespace tflite_micro
123 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_mean.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
 2 | 
 3 | #include "xcore_custom_options.h"
 4 | #include "xcore_utils.h"
 5 | extern "C" {
 6 | #include "lib_nn/api/nn_layers.h"
 7 | }
 8 | 
 9 | namespace tflite_micro {
10 | namespace ops {
11 | namespace micro {
12 | namespace xcore {
13 | namespace mean {
14 | 
15 | // This is the struct that contains the data required by the operator
16 | struct MeanOpData {
17 |   int start_dim_size;
18 |   int mean_dim_size;
19 |   int end_dim_size;
20 |   float in_zero_point;
21 |   float out_zero_point;
22 |   float scale_mul;
23 | };
24 | 
25 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
26 |   auto op_data = construct_persistent_object<MeanOpData>(context);
27 | 
28 |   auto parser = CustomOptionParser(buffer, length);
29 |   op_data->start_dim_size = parser.parseNamedCustomOption("s").AsInt32();
30 |   op_data->mean_dim_size = parser.parseNamedCustomOption("m").AsInt32();
31 |   op_data->end_dim_size = parser.parseNamedCustomOption("e").AsInt32();
32 |   op_data->in_zero_point = parser.parseNamedCustomOption("i").AsFloat();
33 |   op_data->out_zero_point = parser.parseNamedCustomOption("o").AsFloat();
34 |   op_data->scale_mul = parser.parseNamedCustomOption("sm").AsFloat();
35 |   return op_data;
36 | }
37 | 
38 | // Does all the requests for scratches
39 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
40 |   return kTfLiteOk;
41 | }
42 | 
43 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
44 | 
45 |   auto *op_data = static_cast<MeanOpData *>(node->user_data);
46 | 
47 |   // Get Input/Output Tensors
48 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
49 | 
50 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
51 | 
52 |   // Pointers to data in In/Out Tensors
53 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
54 |   const int8_t *in_data = tflite_micro::micro::GetTensorData<int8_t>(input);
55 |   mean_int8(in_data, out_data, op_data->start_dim_size, op_data->mean_dim_size,
56 |             op_data->end_dim_size, op_data->in_zero_point,
57 |             op_data->out_zero_point, op_data->scale_mul);
58 | 
59 |   return kTfLiteOk;
60 | }
61 | 
62 | } // namespace mean
63 | 
64 | TFLMRegistration *Register_XC_mean() {
65 |   static TFLMRegistration r = {mean::Init, nullptr, mean::Prepare, mean::Eval};
66 |   return &r;
67 | }
68 | 
69 | } // namespace xcore
70 | } // namespace micro
71 | } // namespace ops
72 | } // namespace tflite_micro
73 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_meani16.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
 3 | 
 4 | #include "xcore_custom_options.h"
 5 | #include "xcore_utils.h"
 6 | extern "C" {
 7 | #include "lib_nn/api/nn_layers.h"
 8 | }
 9 | 
10 | namespace tflite_micro {
11 | namespace ops {
12 | namespace micro {
13 | namespace xcore {
14 | namespace meani16 {
15 | 
16 | // This is the struct that contains the data required by the operator
17 | struct MeanI16OpData {
18 |   int start_dim_size;
19 |   int mean_dim_size;
20 |   int end_dim_size;
21 |   float scale_mul;
22 | };
23 | 
24 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
25 |   auto op_data = construct_persistent_object<MeanI16OpData>(context);
26 | 
27 |   auto parser = CustomOptionParser(buffer, length);
28 |   op_data->start_dim_size = parser.parseNamedCustomOption("s").AsInt32();
29 |   op_data->mean_dim_size = parser.parseNamedCustomOption("m").AsInt32();
30 |   op_data->end_dim_size = parser.parseNamedCustomOption("e").AsInt32();
31 |   op_data->scale_mul = parser.parseNamedCustomOption("sm").AsFloat();
32 |   return op_data;
33 | }
34 | 
35 | // Does all the requests for scratches
36 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
37 |   return kTfLiteOk;
38 | }
39 | 
40 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
41 | 
42 |   auto *op_data = static_cast<MeanI16OpData *>(node->user_data);
43 | 
44 |   // Get Input/Output Tensors
45 |   const TfLiteEvalTensor *input =
46 |       tflite_micro::micro::GetEvalInput(context, node, 0);
47 | 
48 |   TfLiteEvalTensor *output =
49 |       tflite_micro::micro::GetEvalOutput(context, node, 0);
50 | 
51 |   // Pointers to data in In/Out Tensors
52 |   int16_t *out_data = tflite_micro::micro::GetTensorData<int16_t>(output);
53 |   const int16_t *in_data = tflite_micro::micro::GetTensorData<int16_t>(input);
54 |   mean_int16(in_data, out_data, op_data->start_dim_size, op_data->mean_dim_size,
55 |              op_data->end_dim_size, op_data->scale_mul);
56 | 
57 |   return kTfLiteOk;
58 | }
59 | 
60 | } // namespace meani16
61 | 
62 | TFLMRegistration *Register_XC_meani16() {
63 |   static TFLMRegistration r = {meani16::Init, nullptr, meani16::Prepare,
64 |                                meani16::Eval};
65 |   return &r;
66 | }
67 | 
68 | } // namespace xcore
69 | } // namespace micro
70 | } // namespace ops
71 | } // namespace tflite_micro
72 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_mul.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include "../thread_call.h"
  4 | #include "xcore_config.h"
  5 | #include "xcore_custom_options.h"
  6 | #include "xcore_utils.h"
  7 | extern "C" {
  8 | #include "lib_nn/api/nn_layers.h"
  9 | #include "lib_nn/api/nn_operator.h"
 10 | }
 11 | 
 12 | namespace tflite_micro {
 13 | namespace ops {
 14 | namespace micro {
 15 | namespace xcore {
 16 | namespace mul {
 17 | 
 18 | struct MulShared {
 19 |   int8_t *Y;
 20 |   int8_t *X1;
 21 |   int8_t *X2;
 22 |   nn_mul_params_t *blob;
 23 | };
 24 | 
 25 | extern "C" {
 26 | void mul_thread_worker(void *shared, void *start, void *end) {
 27 |   int *s = static_cast<int *>(start);
 28 |   int *e = static_cast<int *>(end);
 29 |   auto sd = static_cast<MulShared *>(shared);
 30 |   mul_elementwise(sd->X1 + *s, sd->X2 + *s, *e - *s, sd->blob, sd->Y + *s);
 31 | }
 32 | }
 33 | 
 34 | // This is the struct that contains the data required by the operator
 35 | struct MulOpData {
 36 |   nn_mul_params_t *mp_params;
 37 |   int tc;
 38 |   int s[XCORE_MAX_NUM_THREADS];
 39 |   int e[XCORE_MAX_NUM_THREADS];
 40 | };
 41 | 
 42 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
 43 |   auto op_data = construct_persistent_object<MulOpData>(context);
 44 | 
 45 |   auto parser = CustomOptionParser(buffer, length);
 46 |   op_data->mp_params = (nn_mul_params_t *)parser.parseNamedCustomOption("mp").AsBlob().data();
 47 | 
 48 |   return op_data;
 49 | }
 50 | 
 51 | // Does all the requests for scratches
 52 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
 53 |   auto *op_data = static_cast<MulOpData *>(node->user_data);
 54 |   MicroContext *micro_context = GetMicroContext(context);
 55 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 56 |       micro_context->external_context());
 57 |   const TfLiteEvalTensor *output =
 58 |       tflite_micro::micro::GetEvalOutput(context, node, 0);
 59 |   int output_size = tflite_micro::micro::GetTensorShape(output).FlatSize();
 60 |   op_data->tc = calculateAlignedThreadSplit(xc_config->model_thread_count, output_size, op_data->s, op_data->e);
 61 |   return kTfLiteOk;
 62 | }
 63 | 
 64 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
 65 | 
 66 |   auto *op_data = static_cast<MulOpData *>(node->user_data);
 67 | 
 68 |   // Get Input/Output Tensors
 69 |   const TfLiteEvalTensor *input1 =
 70 |       tflite_micro::micro::GetEvalInput(context, node, 0);
 71 |   const TfLiteEvalTensor *input2 =
 72 |       tflite_micro::micro::GetEvalInput(context, node, 1);
 73 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 74 | 
 75 |   // Pointers to data in In/Out Tensors
 76 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
 77 |   const int8_t *in1_data = tflite_micro::micro::GetTensorData<int8_t>(input1);
 78 |   const int8_t *in2_data = tflite_micro::micro::GetTensorData<int8_t>(input2);
 79 | 
 80 |   MicroContext *micro_context = GetMicroContext(context);
 81 |   xc_context_config_t *xc_config = reinterpret_cast<xc_context_config_t *>(
 82 |       micro_context->external_context());
 83 |   const int tc = op_data->tc;
 84 |   MulShared shared_data;
 85 |   shared_data.Y = out_data;
 86 |   shared_data.X1 = const_cast<int8_t *>(in1_data);
 87 |   shared_data.X2 = const_cast<int8_t *>(in2_data);
 88 |   shared_data.blob = op_data->mp_params;
 89 |   for (int t = 0; t < tc - 1; t++) {
 90 |     thread_variable_setup((void *)&op_data->s[t], (void *)&op_data->e[t],
 91 |                           xc_config->thread_info.thread_ids.id[t]);
 92 |   }
 93 |   thread_call((void *)&shared_data, &op_data->s[tc - 1], &op_data->e[tc - 1],
 94 |               (thread_function_pointer_t)mul_thread_worker,
 95 |               &xc_config->thread_info);
 96 | 
 97 |   return kTfLiteOk;
 98 | }
 99 | 
100 | } // namespace mul
101 | 
102 | TFLMRegistration *Register_XC_mul() {
103 |   static TFLMRegistration r = {mul::Init, nullptr, mul::Prepare,
104 |                                     mul::Eval};
105 |   return &r;
106 | }
107 | 
108 | } // namespace xcore
109 | } // namespace micro
110 | } // namespace ops
111 | } // namespace tflite_micro
112 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_n_to_4.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
  2 | 
  3 | #include "xcore_custom_options.h"
  4 | #include "xcore_utils.h"
  5 | 
  6 | extern "C" {
  7 | #include "lib_nn/api/nn_operator.h"
  8 | }
  9 | 
 10 | namespace tflite_micro {
 11 | namespace ops {
 12 | namespace micro {
 13 | namespace xcore {
 14 | namespace pad_n_to_4 {
 15 | 
 16 | struct OpData {
 17 |   uint32_t n;
 18 |   uint32_t pad_val;
 19 | };
 20 | 
 21 | 
 22 | void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 23 |   auto op_data = construct_persistent_object<OpData>(context);
 24 | 
 25 |   auto parser = CustomOptionParser(buffer, length);
 26 |   auto pad_value = parser.parseNamedCustomOption("pv").AsUInt32();
 27 |   op_data->pad_val = pad_value;
 28 |   return op_data;
 29 | }
 30 | 
 31 | TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 32 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
 33 |   auto shape = tflite_micro::micro::GetTensorShape(output);
 34 |   TFLITE_DCHECK(shape.DimensionsCount() == 4 && shape.DimsData()[0] == 1);
 35 |   int number_of_pixels = shape.DimsData()[1] * shape.DimsData()[2];
 36 |   OpData* op_data = static_cast<OpData*>(node->user_data);
 37 |   op_data->n = number_of_pixels;
 38 |   return kTfLiteOk;
 39 | }
 40 | 
 41 | TfLiteStatus Eval3To4(TfLiteContext* context, TfLiteNode* node) {
 42 |   TFLITE_DCHECK(node->user_data != nullptr);
 43 |   const OpData* data = static_cast<const OpData*>(node->user_data);
 44 | 
 45 |   const TfLiteEvalTensor* input =
 46 |       tflite_micro::micro::GetEvalInput(context, node, /*index=*/0);
 47 | 
 48 |   TfLiteEvalTensor* output =
 49 |       tflite_micro::micro::GetEvalOutput(context, node, /*index=*/0);
 50 | 
 51 |   int8_t *output_p =
 52 |       const_cast<int8_t *>(tflite_micro::micro::GetTensorData<int8_t>(output));
 53 |   int8_t *input_p =
 54 |       const_cast<int8_t *>(tflite_micro::micro::GetTensorData<int8_t>(input));
 55 | 
 56 |   // The function takes the number of pixels as data->n
 57 |   pad_3_to_4_run(output_p,
 58 |           input_p,
 59 |           data->n, data->pad_val);
 60 | 
 61 |   return kTfLiteOk;
 62 | }
 63 | 
 64 | TfLiteStatus Eval1To4(TfLiteContext* context, TfLiteNode* node) {
 65 |   TFLITE_DCHECK(node->user_data != nullptr);
 66 |   const OpData* data = static_cast<const OpData*>(node->user_data);
 67 | 
 68 |   const TfLiteEvalTensor* input =
 69 |       tflite_micro::micro::GetEvalInput(context, node, /*index=*/0);
 70 | 
 71 |   TfLiteEvalTensor* output =
 72 |       tflite_micro::micro::GetEvalOutput(context, node, /*index=*/0);
 73 | 
 74 |   int8_t *output_p =
 75 |       const_cast<int8_t *>(tflite_micro::micro::GetTensorData<int8_t>(output));
 76 |   int8_t *input_p =
 77 |       const_cast<int8_t *>(tflite_micro::micro::GetTensorData<int8_t>(input));
 78 | 
 79 |   // The function takes the number of 4 byte input chunks
 80 |   int n_4 = (data->n) / 4;
 81 |   pad_1_to_4_run(output_p,
 82 |           input_p,
 83 |           n_4, data->pad_val);
 84 | 
 85 |   // We pad the remaining inputs here
 86 |   for(int i = n_4 * 4; i < data->n; i++){
 87 |     output_p[i * 4] = input_p[i];
 88 |     for(int n = 1; n < 4; n++) {
 89 |       output_p[i * 4 + n] = (int8_t)data->pad_val;
 90 |     }
 91 |   }
 92 | 
 93 |   return kTfLiteOk;
 94 | }
 95 | 
 96 | }  // namespace pad
 97 | 
 98 | TFLMRegistration *Register_XC_pad_3_to_4() {
 99 |   static TFLMRegistration r = {pad_n_to_4::Init, nullptr, pad_n_to_4::Prepare, pad_n_to_4::Eval3To4};
100 |   return &r;
101 | }
102 | 
103 | TFLMRegistration *Register_XC_pad_1_to_4() {
104 |   static TFLMRegistration r = {pad_n_to_4::Init, nullptr, pad_n_to_4::Prepare, pad_n_to_4::Eval1To4};
105 |   return &r;
106 | }
107 | 
108 | } // namespace xcore
109 | }  // namespace micro
110 | }  // namespace ops
111 | }  // namespace tflite_micro
112 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.cc:
--------------------------------------------------------------------------------
 1 | #include "xcore_ops.h"
 2 | 
 3 | #if defined(__xtflm_conf_h_exists__)
 4 | #include "xtflm_conf.h"
 5 | #else
 6 | #ifndef XTFLM_OPERATORS
 7 | #define XTFLM_OPERATORS 10
 8 | #endif
 9 | #endif
10 | 
11 | #ifndef XCORE_TFLITE_MICRO_PATCHED
12 | #error                                                                         \
13 |     "tflite-micro patch not applied! Fix by running 'make patch' in lib_tflite_micro!"
14 | #endif
15 | 
16 | namespace tflite_micro {
17 | namespace ops {
18 | namespace micro {
19 | namespace xcore {
20 | 
21 | void RegisterXCOps(MicroOpResolver *res) {
22 |   auto *resolver =
23 |       reinterpret_cast<MicroMutableOpResolver<XTFLM_OPERATORS> *>(res);
24 | 
25 |   resolver->AddCustom(XC_beta_activationf32_OpCode,
26 |                       Register_XC_beta_activationf32());
27 |   resolver->AddCustom(XC_beta_concatf32_OpCode, Register_XC_beta_concatf32());
28 |   resolver->AddCustom(XC_beta_convf32_OpCode, Register_XC_beta_convf32());
29 |   resolver->AddCustom(XC_beta_transposeconvf32_OpCode,
30 |                       Register_XC_beta_transposeconvf32());
31 |   resolver->AddCustom(XC_beta_fcf32_OpCode, Register_XC_beta_fcf32());
32 |   resolver->AddCustom(XC_binaryi16_OpCode, Register_XC_binaryi16());
33 |   resolver->AddCustom(XC_unaryi16_OpCode, Register_XC_unaryi16());
34 |   resolver->AddCustom(XC_conv2d_v2_OpCode, Register_XC_conv2d_v2());
35 |   resolver->AddCustom(XC_maxpool2d_OpCode, Register_XC_maxpool2d());
36 |   resolver->AddCustom(XC_softmax_OpCode, Register_XC_softmax());
37 |   resolver->AddCustom(XC_batched_softmax_OpCode, Register_XC_batched_softmax());
38 |   resolver->AddCustom(XC_add_OpCode, Register_XC_add());
39 |   resolver->AddCustom(XC_slice_OpCode, Register_XC_slice());
40 |   resolver->AddCustom(XC_broadcast_OpCode, Register_XC_broadcast());
41 |   resolver->AddCustom(XC_ld_weights_OpCode, Register_XC_ld_weights());
42 |   resolver->AddCustom(XC_ld_weights_wait_OpCode, Register_XC_ld_weights_wait());
43 |   resolver->AddCustom(XC_bsign_8_OpCode, Register_XC_bsign_8());
44 |   resolver->AddCustom(XC_lookup_OpCode, Register_XC_lookup());
45 |   resolver->AddCustom(XC_pad_OpCode, Register_XC_pad());
46 |   resolver->AddCustom(XC_concat_OpCode, Register_XC_concat());
47 |   resolver->AddCustom(XC_transpose_OpCode, Register_XC_transpose());
48 |   resolver->AddCustom(XC_pad_3_to_4_OpCode, Register_XC_pad_3_to_4());
49 |   resolver->AddCustom(XC_pad_1_to_4_OpCode, Register_XC_pad_1_to_4());
50 |   resolver->AddCustom(XC_mul_OpCode, Register_XC_mul());
51 |   resolver->AddCustom(XC_mean_OpCode, Register_XC_mean());
52 |   resolver->AddCustom(XC_meani16_OpCode, Register_XC_meani16());
53 |   resolver->AddCustom(XC_expand_8_to_16_OpCode, Register_XC_expand_8_to_16());
54 |   resolver->AddCustom(XC_no_op_OpCode, Register_XC_no_op());
55 |   resolver->AddCustom(XC_store_tensor_OpCode, Register_XC_store_tensor());
56 |   resolver->AddCustom(XC_load_tensor_OpCode, Register_XC_load_tensor());
57 | }
58 | 
59 | } // namespace xcore
60 | } // namespace micro
61 | } // namespace ops
62 | } // namespace tflite_micro
63 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h:
--------------------------------------------------------------------------------
 1 | #ifndef XCORE_OPS_H_
 2 | #define XCORE_OPS_H_
 3 | 
 4 | #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
 5 | 
 6 | namespace tflite_micro {
 7 | namespace ops {
 8 | namespace micro {
 9 | namespace xcore {
10 | 
11 | constexpr const char *XC_beta_activationf32_OpCode = "XC_beta_activationf32";
12 | constexpr const char *XC_beta_concatf32_OpCode = "XC_beta_concatf32";
13 | constexpr const char *XC_beta_convf32_OpCode = "XC_beta_convf32";
14 | constexpr const char *XC_beta_transposeconvf32_OpCode =
15 |     "XC_beta_transposeconvf32";
16 | constexpr const char *XC_beta_fcf32_OpCode = "XC_beta_fcf32";
17 | 
18 | constexpr const char *XC_binaryi16_OpCode = "XC_binaryi16";
19 | constexpr const char *XC_unaryi16_OpCode = "XC_unaryi16";
20 | 
21 | constexpr const char *XC_conv2d_v2_OpCode = "XC_conv2d_v2";
22 | constexpr const char *XC_maxpool2d_OpCode = "XC_maxpool2d";
23 | constexpr const char *XC_softmax_OpCode = "XC_softmax";
24 | constexpr const char *XC_batched_softmax_OpCode = "XC_batched_softmax";
25 | constexpr const char *XC_ld_weights_OpCode = "XC_ld_weights";
26 | constexpr const char *XC_ld_weights_wait_OpCode = "XC_ld_weights_wait";
27 | constexpr const char *XC_add_OpCode = "XC_add";
28 | constexpr const char *XC_slice_OpCode = "XC_slice";
29 | constexpr const char *XC_broadcast_OpCode = "XC_broadcast";
30 | constexpr const char *XC_lookup_OpCode = "XC_lookup";
31 | constexpr const char *XC_pad_OpCode = "XC_pad";
32 | constexpr const char *XC_concat_OpCode = "XC_concat";
33 | constexpr const char *XC_transpose_OpCode = "XC_transpose";
34 | constexpr const char *XC_pad_3_to_4_OpCode = "XC_pad_3_to_4";
35 | constexpr const char *XC_pad_1_to_4_OpCode = "XC_pad_1_to_4";
36 | constexpr const char *XC_mul_OpCode = "XC_mul";
37 | constexpr const char *XC_mean_OpCode = "XC_mean";
38 | constexpr const char *XC_meani16_OpCode = "XC_meani16";
39 | constexpr const char *XC_expand_8_to_16_OpCode = "XC_expand_8_to_16";
40 | constexpr const char *XC_no_op_OpCode = "XC_no_op";
41 | constexpr const char *XC_store_tensor_OpCode = "XC_store_tensor";
42 | constexpr const char *XC_load_tensor_OpCode = "XC_load_tensor";
43 | 
44 | // Binarized ops
45 | constexpr const char *XC_bsign_8_OpCode = "XC_bsign_8";
46 | 
47 | TFLMRegistration *Register_XC_beta_activationf32();
48 | TFLMRegistration *Register_XC_beta_concatf32();
49 | TFLMRegistration *Register_XC_beta_convf32();
50 | TFLMRegistration *Register_XC_beta_transposeconvf32();
51 | TFLMRegistration *Register_XC_beta_fcf32();
52 | 
53 | TFLMRegistration *Register_XC_binaryi16();
54 | TFLMRegistration *Register_XC_unaryi16();
55 | 
56 | TFLMRegistration *Register_XC_conv2d_v2();
57 | TFLMRegistration *Register_XC_maxpool2d();
58 | TFLMRegistration *Register_XC_softmax();
59 | TFLMRegistration *Register_XC_batched_softmax();
60 | TFLMRegistration *Register_XC_ld_weights();
61 | TFLMRegistration *Register_XC_ld_weights_wait();
62 | TFLMRegistration *Register_XC_add();
63 | TFLMRegistration *Register_XC_slice();
64 | TFLMRegistration *Register_XC_broadcast();
65 | TFLMRegistration *Register_XC_lookup();
66 | TFLMRegistration *Register_XC_pad();
67 | TFLMRegistration *Register_XC_concat();
68 | TFLMRegistration *Register_XC_transpose();
69 | TFLMRegistration *Register_XC_pad_3_to_4();
70 | TFLMRegistration *Register_XC_pad_1_to_4();
71 | TFLMRegistration *Register_XC_mul();
72 | TFLMRegistration *Register_XC_mean();
73 | TFLMRegistration *Register_XC_meani16();
74 | TFLMRegistration *Register_XC_expand_8_to_16();
75 | TFLMRegistration *Register_XC_no_op();
76 | TFLMRegistration *Register_XC_store_tensor();
77 | TFLMRegistration *Register_XC_load_tensor();
78 | 
79 | // Binarized ops
80 | TFLMRegistration *Register_XC_bsign_8();
81 | 
82 | void RegisterXCOps(tflite_micro::MicroOpResolver *res);
83 | 
84 | } // namespace xcore
85 | } // namespace micro
86 | } // namespace ops
87 | } // namespace tflite_micro
88 | 
89 | #endif // XCORE_OPS_H_
90 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_pad.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
 2 | 
 3 | #include "xcore_custom_options.h"
 4 | #include "xcore_utils.h"
 5 | #include <string.h>
 6 | 
 7 | extern "C" {
 8 | #include "vpu_memmove_word_aligned.h"
 9 | #include "vpu_memset_256.h"
10 | }
11 | 
12 | namespace tflite_micro {
13 | namespace ops {
14 | namespace micro {
15 | namespace xcore {
16 | namespace pad {
17 | 
18 | struct PadOpData {
19 |   int32_t start;
20 |   int32_t pad_size;
21 |   int32_t size;
22 |   int32_t num_copies;
23 |   int32_t zero_point;
24 |   int32_t end;
25 |   void (*func_ptr)(void *, const void *, unsigned);
26 | };
27 | 
28 | void memmove_wrapper(void *dst, const void *src, unsigned size) {
29 |   memmove(dst, src, size);
30 | }
31 | 
32 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
33 |   auto op_data = construct_persistent_object<PadOpData>(context);
34 |   auto parser = CustomOptionParser(buffer, length);
35 |   op_data->start = parser.parseNamedCustomOption("s").AsInt32();
36 |   op_data->pad_size = parser.parseNamedCustomOption("p").AsInt32();
37 |   op_data->size = parser.parseNamedCustomOption("l").AsInt32();
38 |   op_data->num_copies = parser.parseNamedCustomOption("n").AsInt32();
39 |   op_data->zero_point = parser.parseNamedCustomOption("z").AsInt32();
40 |   op_data->end = parser.parseNamedCustomOption("e").AsInt32();
41 |   bool use_vpu = parser.parseNamedCustomOption("v").AsBool();
42 |   op_data->func_ptr = use_vpu ? vpu_memmove_word_aligned : memmove_wrapper;
43 |   return op_data;
44 | }
45 | 
46 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
47 |   return kTfLiteOk;
48 | }
49 | 
50 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
51 |   TFLITE_DCHECK(node->user_data != nullptr);
52 | 
53 |   auto *op_data = static_cast<PadOpData *>(node->user_data);
54 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
55 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
56 |   const int8_t *in_data = tflite_micro::micro::GetTensorData<int8_t>(input);
57 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
58 |   uint8_t from[32];
59 |   broadcast_32_to_256(from, op_data->zero_point);
60 |   if (op_data->start)
61 |     vpu_memset_256(out_data, from, op_data->start);
62 |   out_data += op_data->start;
63 |   const int size = op_data->size;
64 |   const int pad_size = op_data->pad_size;
65 |   void (*func_ptr)(void *, const void *, unsigned) = op_data->func_ptr;
66 |   for (int i = 0; i < op_data->num_copies; i++) {
67 |     func_ptr(out_data, in_data, size);
68 |     out_data += size;
69 |     in_data += size;
70 |     vpu_memset_256(out_data, from, pad_size);
71 |     out_data += pad_size;
72 |   }
73 |   func_ptr(out_data, in_data, size);
74 |   out_data += size;
75 |   vpu_memset_256(out_data, from, op_data->end);
76 |   return kTfLiteOk;
77 | }
78 | 
79 | } // namespace pad
80 | 
81 | TFLMRegistration *Register_XC_pad() {
82 |   static TFLMRegistration r = {pad::Init, nullptr, pad::Prepare, pad::Eval};
83 |   return &r;
84 | }
85 | 
86 | } // namespace xcore
87 | } // namespace micro
88 | } // namespace ops
89 | } // namespace tflite_micro
90 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019, XMOS Ltd, All rights reserved
 2 | #include "xcore_profiler.h"
 3 | 
 4 | #include "tensorflow/lite/kernels/internal/compatibility.h"
 5 | #include "tensorflow/lite/micro/micro_time.h"
 6 | 
 7 | namespace tflite_micro {
 8 | namespace micro {
 9 | namespace xcore {
10 | 
11 | void XCoreProfiler::Init(tflite_micro::MicroAllocator *allocator,
12 |                          size_t max_event_count) {
13 |   max_event_count_ = max_event_count;
14 |   event_durations_ = static_cast<uint32_t *>(
15 |       allocator->AllocatePersistentBuffer(max_event_count * sizeof(uint32_t)));
16 | }
17 | 
18 | uint32_t const *XCoreProfiler::GetEventDurations() { return event_durations_; }
19 | 
20 | size_t XCoreProfiler::GetNumEvents() { return event_count_; }
21 | 
22 | void XCoreProfiler::ClearEvents() { event_count_ = 0; }
23 | 
24 | uint32_t XCoreProfiler::BeginEvent(const char *tag) {
25 |   TFLITE_DCHECK(tag);
26 |   event_tag_ = tag;
27 |   event_start_time_ = tflite_micro::GetCurrentTimeTicks();
28 |   return 0;
29 | }
30 | 
31 | void XCoreProfiler::EndEvent(uint32_t event_handle) {
32 |   int32_t event_end_time = tflite_micro::GetCurrentTimeTicks();
33 |   event_count_ = event_count_ % max_event_count_;
34 |   // wrap if there are too many events
35 |   event_durations_[event_count_++] = event_end_time - event_start_time_;
36 | }
37 | 
38 | } // namespace xcore
39 | } // namespace micro
40 | } // namespace tflite_micro
41 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019, XMOS Ltd, All rights reserved
 2 | 
 3 | #ifndef XCORE_PROFILER_H_
 4 | #define XCORE_PROFILER_H_
 5 | 
 6 | #include "tensorflow/lite/micro/compatibility.h"
 7 | #include "tensorflow/lite/micro/micro_allocator.h"
 8 | #include "tensorflow/lite/micro/micro_profiler.h"
 9 | 
10 | #if !defined(XCORE_PROFILER_DEFAULT_MAX_LEVELS)
11 | #define XCORE_PROFILER_DEFAULT_MAX_LEVELS (64)
12 | #endif
13 | 
14 | namespace tflite_micro {
15 | namespace micro {
16 | namespace xcore {
17 | 
18 | class XCoreProfiler : public tflite_micro::MicroProfiler {
19 | public:
20 |   explicit XCoreProfiler(){};
21 |   ~XCoreProfiler() override = default;
22 | 
23 |   void Init(tflite_micro::MicroAllocator *allocator,
24 |             size_t max_event_count = XCORE_PROFILER_DEFAULT_MAX_LEVELS);
25 | 
26 |   void ClearEvents();
27 | 
28 |   uint32_t BeginEvent(const char *tag) override;
29 | 
30 |   // Event_handle is ignored since TFLu does not support concurrent events.
31 |   void EndEvent(uint32_t event_handle) override;
32 | 
33 |   uint32_t const *GetEventDurations();
34 |   size_t GetNumEvents();
35 | 
36 | private:
37 |   const char *event_tag_;
38 |   uint32_t event_start_time_;
39 |   size_t event_count_ = 0;
40 |   size_t max_event_count_ = 0;
41 |   uint32_t *event_durations_;
42 |   TF_LITE_REMOVE_VIRTUAL_DELETE
43 | };
44 | 
45 | } // namespace xcore
46 | } // namespace micro
47 | } // namespace tflite_micro
48 | 
49 | #endif // XCORE_PROFILER_H_
50 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_slice.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023, XMOS Ltd, All rights reserved
 2 | 
 3 | extern "C" {
 4 | #include "vpu_memmove_word_aligned.h"
 5 | }
 6 | 
 7 | #include "xcore_custom_options.h"
 8 | #include "xcore_utils.h"
 9 | 
10 | namespace tflite_micro {
11 | namespace ops {
12 | namespace micro {
13 | namespace xcore {
14 | namespace slice {
15 | 
16 | struct SliceOpData {
17 |   int32_t start;
18 |   int32_t offset;
19 |   int32_t size;
20 |   int32_t num_copies;
21 |   void (*func_ptr)(void *, const void *, unsigned);
22 | };
23 | 
24 | void memmove_wrapper(void *dst, const void *src, unsigned size) {
25 |   memmove(dst, src, size);
26 | }
27 | 
28 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
29 |   auto op_data = construct_persistent_object<SliceOpData>(context);
30 |   auto parser = CustomOptionParser(buffer, length);
31 |   op_data->start = parser.parseNamedCustomOption("s").AsInt32();
32 |   op_data->offset = parser.parseNamedCustomOption("o").AsInt32();
33 |   op_data->size = parser.parseNamedCustomOption("l").AsInt32();
34 |   op_data->num_copies = parser.parseNamedCustomOption("n").AsInt32();
35 |   bool use_vpu = parser.parseNamedCustomOption("v").AsBool();
36 |   op_data->func_ptr = use_vpu ? vpu_memmove_word_aligned : memmove_wrapper;
37 |   return op_data;
38 | }
39 | 
40 | // Does all the requests for scratches
41 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
42 |   return kTfLiteOk;
43 | }
44 | 
45 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
46 | 
47 |   auto *op_data = static_cast<SliceOpData *>(node->user_data);
48 |   // Get Input/Output Tensors
49 |   const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
50 |   TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
51 |   // Pointers to data in In/Out Tensors
52 |   const int8_t *in_data =
53 |       tflite_micro::micro::GetTensorData<int8_t>(input) + op_data->start;
54 |   int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
55 |   const int size = op_data->size;
56 |   const int offset = op_data->offset;
57 |   void (*func_ptr)(void *, const void *, unsigned) = op_data->func_ptr;
58 |   for (int i = 0; i < op_data->num_copies; i++) {
59 |     func_ptr(out_data, in_data, size);
60 |     in_data += offset;
61 |     out_data += size;
62 |   }
63 |   return kTfLiteOk;
64 | }
65 | 
66 | } // namespace slice
67 | 
68 | TFLMRegistration *Register_XC_slice() {
69 |   static TFLMRegistration r = {slice::Init, nullptr, slice::Prepare,
70 |                                slice::Eval};
71 |   return &r;
72 | }
73 | 
74 | } // namespace xcore
75 | } // namespace micro
76 | } // namespace ops
77 | } // namespace tflite_micro
78 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_transpose.cc:
--------------------------------------------------------------------------------
 1 | #include "tensorflow/lite/c/common.h"
 2 | #include "xcore_custom_options.h"
 3 | #include "xcore_utils.h"
 4 | 
 5 | namespace tflite_micro {
 6 | namespace ops {
 7 | namespace micro {
 8 | namespace xcore {
 9 | namespace transpose {
10 | 
11 | using tflite_micro::micro::GetEvalInput;
12 | using tflite_micro::micro::GetEvalOutput;
13 | using tflite_micro::micro::GetTensorData;
14 | 
15 | constexpr int kTransposeDims = 4; // Exactly 4 dimensions as specified
16 | 
17 | struct TransposeOpData {
18 |   int32_t t_shape[kTransposeDims];
19 |   int32_t offsets[kTransposeDims];
20 | };
21 | 
22 | void *Init(TfLiteContext *context, const char *buffer, size_t length) {
23 |   auto op_data = construct_persistent_object<TransposeOpData>(context);
24 |   auto parser = CustomOptionParser(buffer, length);
25 | 
26 |   auto t_shape_vector = parser.parseNamedCustomOption("s").AsVector();
27 |   auto offsets_vector = parser.parseNamedCustomOption("o").AsVector();
28 | 
29 |   for (int i = 0; i < kTransposeDims; ++i) {
30 |     op_data->t_shape[i] = t_shape_vector[i].AsInt32();
31 |     op_data->offsets[i] = offsets_vector[i].AsInt32();
32 |   }
33 | 
34 |   return op_data;
35 | }
36 | 
37 | TfLiteStatus Prepare(TfLiteContext *context, TfLiteNode *node) {
38 |   // No preparation needed
39 |   return kTfLiteOk;
40 | }
41 | 
42 | TfLiteStatus Eval(TfLiteContext *context, TfLiteNode *node) {
43 |   auto *op_data = static_cast<TransposeOpData *>(node->user_data);
44 | 
45 |   const int32_t *t_shape = op_data->t_shape;
46 |   const int32_t *offsets = op_data->offsets;
47 | 
48 |   const int8_t *input_data =
49 |       GetTensorData<int8_t>(GetEvalInput(context, node, 0));
50 |   int8_t *output_data = GetTensorData<int8_t>(GetEvalOutput(context, node, 0));
51 | 
52 |   // TODO: 1. Optimise by pre-computing increments
53 |   // TODO: 2. Dereference t_shape in advance
54 |   // TODO: 3. Multi-threading
55 |   for (int i0 = 0; i0 < t_shape[0]; ++i0) {
56 |     const int j0 = i0 * offsets[0];
57 |     for (int i1 = 0; i1 < t_shape[1]; ++i1) {
58 |       const int j1 = j0 + i1 * offsets[1];
59 |       for (int i2 = 0; i2 < t_shape[2]; ++i2) {
60 |         const int j2 = j1 + i2 * offsets[2];
61 |         for (int i3 = 0; i3 < t_shape[3]; ++i3) {
62 |           const int j3 = j2 + i3 * offsets[3];
63 |           *output_data++ = input_data[j3];
64 |         }
65 |       }
66 |     }
67 |   }
68 | 
69 |   return kTfLiteOk;
70 | }
71 | 
72 | } // namespace transpose
73 | 
74 | TFLMRegistration *Register_XC_transpose() {
75 |   static TFLMRegistration r = {transpose::Init, nullptr, transpose::Prepare,
76 |                                transpose::Eval};
77 |   return &r;
78 | }
79 | 
80 | } // namespace xcore
81 | } // namespace micro
82 | } // namespace ops
83 | } // namespace tflite_micro
84 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "xcore_utils.h"
 2 | 
 3 | namespace tflite_micro {
 4 | namespace ops {
 5 | namespace micro {
 6 | namespace xcore {
 7 | 
 8 | TfLiteStatus No_Op_Eval(TfLiteContext *context, TfLiteNode *node) {
 9 |   // Get Input/Output Tensors
10 |   // const TfLiteEvalTensor *input = tflite_micro::micro::GetEvalInput(context, node, 0);
11 |   // TfLiteEvalTensor *output = tflite_micro::micro::GetEvalOutput(context, node, 0);
12 |   // // Pointers to data in In/Out Tensors
13 |   // const int8_t *in_data = tflite_micro::micro::GetTensorData<int8_t>(input);
14 |   // int8_t *out_data = tflite_micro::micro::GetTensorData<int8_t>(output);
15 | 
16 |   // size_t sizeof_tensor_type;
17 |   // TfLiteTypeSizeOf(output->type, &sizeof_tensor_type);
18 |   // int size = tflite_micro::micro::GetTensorShape(output).FlatSize();
19 |   // memcpy((int8_t *)out_data, (int8_t *)in_data, size * sizeof_tensor_type);
20 |   return kTfLiteOk;
21 | } 
22 | 
23 | TFLMRegistration *Register_XC_no_op() {
24 |   static TFLMRegistration r = {nullptr, nullptr, nullptr,
25 |                                     No_Op_Eval};
26 |   return &r;
27 | }
28 | 
29 | size_t FetchBuffer(int8_t **dest, int8_t const *src, size_t size) {
30 |   if (is_ram_address((uintptr_t)src)) {
31 |     *dest = (int8_t *)src;
32 |     return 0;
33 |   } else {
34 |     memload((void *)*dest, (void *)src, size);
35 |     return size;
36 |   }
37 | }
38 | 
39 | } // namespace xcore
40 | } // namespace micro
41 | } // namespace ops
42 | } // namespace tflite_micro
43 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/thread_call.h:
--------------------------------------------------------------------------------
 1 | #if !defined(__micro_thread_library_h__)
 2 | #define __micro_thread_library_h__
 3 | 
 4 | #include <stdint.h>
 5 | #ifdef __xcore__
 6 | #include <xcore/parallel.h>
 7 | #else
 8 | typedef unsigned synchronizer_t;
 9 | #endif
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | #define XCORE_MAX_NUM_THREADS 5
16 | 
17 | #ifdef __XC__
18 |     #define UNSAFE unsafe
19 | #else
20 |     #define UNSAFE /**/
21 | #endif
22 | 
23 | typedef struct { // THIS STRUCT MUST BE IN SYNC WITH ASSEMBLY CODE.
24 |   union {
25 |     uint64_t id_aligned[2]; // Guarantee 64-bit alignment.
26 |     uint32_t id[4];         // Actual IDs
27 |   } thread_ids;             // ids of at most 4 threads - live during invoke
28 |   uint32_t synchroniser;    // synchroniser for threads - live during invoke
29 | } thread_info_t;
30 | 
31 | 
32 | #ifndef __XC__
33 | 
34 | typedef void (*thread_function_pointer_t)(void * arg0, void * arg1, void * arg2);
35 | struct inference_engine;
36 | 
37 | /** Function that runs the client task
38 |  */
39 | void thread_client(thread_info_t *ptr, int n);
40 | 
41 | /** Function that runs the client task
42 |  */
43 | static inline void thread_store_sync(thread_info_t *ptr, uint32_t s) {
44 |   ptr->synchroniser = s;
45 | }
46 | 
47 | /** Function that sets up parameters for one of the client threads
48 |  * This particular one passes the second and third arguments to the thread.
49 |  * When the thread function is actually called (through thread_call)
50 |  * the thread function will be called with those two arguments, 
51 |  * and the first shared argument provided by thread_call.
52 |  * Note - we can make versions with more or fewer parameters.
53 |  * Note - we could pass this function the thread-function itself
54 |  *
55 |  * \param arg1      Second argument for the thread function
56 |  * \param arg2      Third argument for the thread function
57 |  * \param thread_id The thread_id to initialise; one of ptr[0]..ptr[3] above
58 |  */
59 | #ifdef __xcore__
60 | static inline void thread_variable_setup(void * arg1, void * arg2, uint32_t thread_id) {
61 | #ifdef __VX4A__
62 |     asm volatile("xm.tsetr %0, 11, %1" :: "r" (thread_id), "r" (arg1));
63 |     asm volatile("xm.tsetr %0, 12, %1" :: "r" (thread_id), "r" (arg2));
64 |     asm volatile("xm.tsetr %0, 24, %1" :: "r" (thread_id), "r" (1));
65 | #else
66 |     asm volatile("set t[%0]:r1, %1" :: "r" (thread_id), "r" (arg1));
67 |     asm volatile("set t[%0]:r2, %1" :: "r" (thread_id), "r" (arg2));
68 |     asm volatile("set t[%0]:r10, %1" :: "r" (thread_id), "r" (1));
69 | #endif
70 | }
71 | #else
72 | extern void thread_variable_setup(void * arg1, void * arg2, uint32_t thread_id);
73 | #endif
74 | 
75 | /** Function that starts all thread functions and runs them until completion.
76 |  * It is assumed that the variable parts have been set up per thread.
77 |  * by thread_variable_setup.
78 |  * This thread will also invoke the function with the given variable arguments.
79 |  *
80 |  * \param arg0      First argument shared among all threads (usually the output pointer)
81 |  * \param arg1      Second argument for the master thread function
82 |  * \param arg2      Third argument for the master thread function
83 |  * \param fp        thread function to call on all threads.
84 |  * \param ptr       Pointer to the thread info block held in the xcore
85 |  * interpreter.
86 |  */
87 | void thread_call(void * arg0, void * arg1, void * arg2,
88 |                  thread_function_pointer_t fp, thread_info_t *ptr);
89 | #ifdef __cplusplus
90 | };
91 | #endif
92 | 
93 | #endif // __XC__
94 | 
95 | #endif // __micro_thread_library_h__
96 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/thread_call_host_emulation.c:
--------------------------------------------------------------------------------
 1 | #ifndef __xcore__
 2 | 
 3 | #include "thread_call.h"
 4 | #include <assert.h>
 5 | #include <stdio.h>
 6 | 
 7 | static void *args[4][10];
 8 | static int32_t max_thread_id = -1;
 9 | void thread_variable_setup(void *arg1, void *arg2, uint32_t thread_id) {
10 |   assert(thread_id != -1);
11 |   args[thread_id][1] = arg1;
12 |   args[thread_id][2] = arg2;
13 |   if ((int)thread_id > max_thread_id) {
14 |     max_thread_id = thread_id;
15 |   }
16 | }
17 | 
18 | void thread_client(thread_info_t *ptr, int n) {
19 |     ptr->thread_ids.id[n] = n;
20 | }
21 | 
22 | void thread_call(void *arg0, void *arg1, void *arg2,
23 |                  thread_function_pointer_t fp, thread_info_t *ptr) {
24 |   (*fp)(arg0, arg1, arg2);
25 |   for (int i = 0; i <= max_thread_id; i++) {
26 |     (*fp)(arg0, args[i][1], args[i][2]);
27 |   }
28 |   max_thread_id = -1;
29 | }
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/lib_tflite_micro/src/tile_ram_server.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include <print.h>
 4 | #include <string.h>
 5 | #include <stdint.h>
 6 | #include <platform.h>
 7 | #include <xcore/channel.h>
 8 | #include <xcore/chanend.h>
 9 | #include "tile_ram_server.h"
10 | #include "memory_parallel_transport.h"
11 | 
12 | #define TMP_BUF_SIZE_IN_BYTES  1024
13 | 
14 | #define VERSION_MAJOR 1
15 | #define VERSION_MINOR 2
16 | #define VERSION_LITTLE_ENDING (VERSION_MAJOR |\
17 |                                (VERSION_MINOR << 8) |\
18 |                                ((VERSION_MAJOR^0xff) << 16) |\
19 |                                ((VERSION_MINOR^0xff) << 24))
20 | 
21 | void tile_ram_server(chanend_t *c_tile_ram, flash_t *headers, int n_tile_ram,
22 |                      const int8_t *tile_ram) {
23 |     uint32_t tmp = ((uint32_t*)tile_ram)[0];
24 |     if ((tmp ^ VERSION_LITTLE_ENDING) != 0) {
25 |         printstr("version check error");
26 |         asm("clre; waiteu");
27 |     }
28 |     memcpy(headers, tile_ram + 4, (n_tile_ram * sizeof(flash_t)));
29 |     assert(n_tile_ram == 1);
30 |     int tile_ram_server_alive = 1;
31 |     while(tile_ram_server_alive) {
32 |         int byte_address, number_bytes;
33 |         flash_command_t cmd;
34 |         int i = 0;           // TODO: extend SELECT-FOR-LOOP
35 |         cmd = chan_in_word(c_tile_ram[i]);
36 |         //if (cmd == FLASH_READ_PARAMETERS || cmd == FLASH_READ_PARAMETERS_COMPRESSED_FLOAT) {
37 |         if (cmd == FLASH_READ_PARAMETERS) {
38 |             // Set parallel mode
39 |             chan_out_word(c_tile_ram[i], 1);
40 |             byte_address = chan_in_word(c_tile_ram[i]);
41 |             number_bytes   = chan_in_word(c_tile_ram[i]);
42 |             byte_address = headers[i].parameters_start + byte_address;
43 |         } else if (cmd == FLASH_SERVER_INIT) {
44 |             ;  // NO init required
45 |         } else if (cmd == FLASH_SERVER_QUIT) {
46 |             tile_ram_server_alive = 0;
47 |         }
48 |         if (tile_ram_server_alive && cmd != FLASH_SERVER_INIT) {
49 |             memory_parallel_send(c_tile_ram[i], &((uint8_t *)tile_ram)[byte_address], number_bytes);
50 |         }
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/repos.list:
--------------------------------------------------------------------------------
1 | lib_nn           git@github.com:xmos/lib_nn          ebe972405e41182830f1026aa8867fb60028aaff
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # python_version 3.12
2 | numpy>=1.20
3 | opencv-python>=4.4
4 | flatbuffers<2.0,>=1.12
5 | tflite==2.4.0
6 | pyusb==1.2.1


--------------------------------------------------------------------------------
/sample_mobilenet/add_newlines_to_csv.py:
--------------------------------------------------------------------------------
 1 | file = open('src/in.csv', 'r')
 2 |  
 3 | 
 4 | f2 = open('src/out.csv', 'w')
 5 | 
 6 | count = 0
 7 | while 1:
 8 |      
 9 |     # read by character
10 |     char = file.read(1)
11 |     f2.write(char)
12 | 
13 |     if char == ',':
14 |         count += 1
15 | 
16 |     if count == 80:
17 |         f2.write('\n') 
18 |         count = 0
19 |          
20 |     if not char:
21 |         break
22 |          
23 |     #print(char)
24 |  
25 | file.close()
26 | f2.close()
27 | 


--------------------------------------------------------------------------------
/sample_mobilenet/mobilenet_v1_25.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/mobilenet_v1_25.tflite


--------------------------------------------------------------------------------
/sample_mobilenet/s1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/s1.jpg


--------------------------------------------------------------------------------
/sample_mobilenet/s2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/s2.jpg


--------------------------------------------------------------------------------
/sample_mobilenet/s3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/sample_mobilenet/s3.jpg


--------------------------------------------------------------------------------
/tflite_micro_compiler/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google
2 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Screenshots**
20 | If applicable, add screenshots to help explain your problem.
21 | 
22 | **Desktop (please complete the following information):**
23 |  - OS: [e.g. iOS]
24 |  - TensorFlow version 
25 |  - CMake/Make Version 
26 | 
27 | **Additional context**
28 | Add any other context about the problem here.
29 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.github/ISSUE_TEMPLATE/discussion.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Discussion
 3 | about: Starting a friendly discussion about something relating to this repository
 4 | title: "[DISCUSSION]"
 5 | labels: discussion
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEATURE]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.github/ISSUE_TEMPLATE/improvement.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Improvement
 3 | about: Recommended an area of the project that could be improved
 4 | title: "[IMPROVEMENT]"
 5 | labels: Improvement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What should be improved?**
11 | A clear and concise description of what could be improved.
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what desired behaviour you'd like to see.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the improvement here.
18 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.github/workflows/c-cpp.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     strategy:
 8 |       matrix:
 9 |         config:
10 |           - {name: "Linux", os: ubuntu-latest, cmake-generator: ""}
11 |           - {name: "Windows", os: windows-latest, cmake-generator: "-G \"MinGW Makefiles\""}
12 |     runs-on: ${{ matrix.config.os }}
13 |     name: ${{ matrix.config.name }}
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Requirements
18 |       shell: bash
19 |       run: |
20 |         cd ..
21 |         git clone https://github.com/tensorflow/tensorflow.git
22 |         cd tensorflow
23 |         make -f tensorflow/lite/micro/tools/make/Makefile hello_world
24 |     - name: Requirements (Windows)
25 |       if: matrix.config.os == 'windows-latest'
26 |       run: |
27 |         choco install wget unzip
28 |     - name: Configure
29 |       shell: bash
30 |       run: |
31 |         mkdir build
32 |         cd build
33 |         cmake ${{ matrix.config.cmake-generator }} ..
34 |     - name: Compile
35 |       shell: bash
36 |       run: |
37 |         cd build
38 |         cmake --build .
39 |     - name: Run
40 |       shell: bash
41 |       run: |
42 |         cd build
43 |         wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/hello_world_2020_04_13.zip
44 |         unzip hello_world_2020_04_13.zip
45 |         ./compiler hello_world/models/model.tflite out.cpp
46 |     - name: Test setup
47 |       run: |
48 |         cd examples/generic_test
49 |         mkdir build
50 |         cd build
51 |         cmake ${{ matrix.config.cmake-generator }} ..
52 |         cmake --build .
53 |         # Binary representation of output -1.09 (~sin(1.5*PI))
54 |         echo "\xd6\xf3\x8b\xbf" > outExpect.txt
55 |     # Binary representation of input 4.71 (1.5*PI)
56 |     - name: Test setup (Windows)
57 |       if: matrix.config.os == 'windows-latest'
58 |       run: |
59 |         cd examples/generic_test/build
60 |         echo 52 b8 96 40 > inData.tmp
61 |         certutil -f -decodehex inData.tmp inData.bin
62 |     - name: Test setup (Linux)
63 |       if: matrix.config.os == 'ubuntu-latest'
64 |       run: |
65 |         cd examples/generic_test/build
66 |         echo -n -e '\x52\xb8\x96\x40' > inData.bin
67 |     - name: Test run
68 |       shell: bash
69 |       run: |
70 |         cd examples/generic_test/build
71 |         ./generic_test inData.bin > outData.txt
72 |         cmp outData.txt outExpect.txt
73 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | mobilnet
3 | hello_world
4 | hello_world_compiled
5 | compiler
6 | build*/
7 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "Linux",
 5 |             "includePath": [
 6 |                 "${workspaceFolder}/**",
 7 |                 "${workspaceFolder}/../tensorflow",
 8 |                 "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/",
 9 |                 "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/gemmlowp",
10 |                 "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include",
11 |                 "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/ruy",
12 |                 "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/kissfft"
13 |            ],
14 |             "defines": [ "TF_LITE_STATIC_MEMORY", "NDEBUG", "TF_LITE_DISABLE_X86_NEON", "SUFFICIENT_ARENA_SIZE" ],
15 |             "compilerPath": "/usr/bin/g++",
16 |             "cStandard": "c11",
17 |             "cppStandard": "c++17",
18 |             "intelliSenseMode": "clang-x64"
19 |         }
20 |     ],
21 |     "version": 4
22 | }
23 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "(gdb) hello example Launch",
 9 |             "type": "cppdbg",
10 |             "request": "launch",
11 |             "program": "${workspaceFolder}/example/hello_world",
12 |             "args": [],
13 |             "stopAtEntry": true,
14 |             "cwd": "${workspaceFolder}",
15 |             "environment": [],
16 |             "externalConsole": false,
17 |             "MIMode": "gdb",
18 |             "setupCommands": [
19 |                 {
20 |                     "description": "Enable pretty-printing for gdb",
21 |                     "text": "-enable-pretty-printing",
22 |                     "ignoreFailures": true
23 |                 }
24 |             ]
25 |         },
26 |         {
27 |             "name": "(gdb) cifar compiled example Launch",
28 |             "type": "cppdbg",
29 |             "request": "launch",
30 |             "program": "${workspaceFolder}/examples/cifar10_compiled",
31 |             "args": [],
32 |             "stopAtEntry": true,
33 |             "cwd": "${workspaceFolder}",
34 |             "environment": [],
35 |             "externalConsole": false,
36 |             "MIMode": "gdb",
37 |             "setupCommands": [
38 |                 {
39 |                     "description": "Enable pretty-printing for gdb",
40 |                     "text": "-enable-pretty-printing",
41 |                     "ignoreFailures": true
42 |                 }
43 |             ]
44 |         },
45 |         {
46 |             "name": "(gdb) cifar interpreter example Launch",
47 |             "type": "cppdbg",
48 |             "request": "launch",
49 |             "program": "${workspaceFolder}/examples/cifar10",
50 |             "args": [],
51 |             "stopAtEntry": true,
52 |             "cwd": "${workspaceFolder}",
53 |             "environment": [],
54 |             "externalConsole": false,
55 |             "MIMode": "gdb",
56 |             "setupCommands": [
57 |                 {
58 |                     "description": "Enable pretty-printing for gdb",
59 |                     "text": "-enable-pretty-printing",
60 |                     "ignoreFailures": true
61 |                 }
62 |             ]
63 |         }
64 |     ]
65 | }


--------------------------------------------------------------------------------
/tflite_micro_compiler/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // See https://go.microsoft.com/fwlink/?LinkId=733558
 3 |     // for the documentation about the tasks.json format
 4 |     "version": "2.0.0",
 5 |     "tasks": [
 6 |         {
 7 |             "label": "make",
 8 |             "type": "shell",
 9 |             "command": "make",
10 |             // use options.cwd property if the Makefile is not in the project root ${workspaceRoot} dir
11 |             "options": {
12 |                 "cwd": "${workspaceRoot}"
13 |             },
14 |             // start the build without prompting for task selection, use "group": "build" otherwise
15 |             "group": {
16 |                 "kind": "build",
17 |                 "isDefault": true
18 |             },
19 |             "presentation": {
20 |                 "echo": true,
21 |                 "reveal": "always",
22 |                 "focus": false,
23 |                 "panel": "shared"
24 |             },
25 |             // arg passing example: in this case is executed make QUIET=0
26 |             "args": ["QUIET=0"],
27 |             // Use the standard less compilation problem matcher.
28 |             "problemMatcher": {
29 |                 "base": "$gcc",
30 |                 "fileLocation": [ "relative", "${workspaceRoot}" ]
31 |             }
32 |         }
33 |     ]
34 | }


--------------------------------------------------------------------------------
/tflite_micro_compiler/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.13 FATAL_ERROR)
 2 | PROJECT(tflite_micro_compiler)
 3 | 
 4 | # We define XBUILD to use own cmake when building with
 5 | # lib_tflite_micro
 6 | IF(XBUILD)
 7 |     INCLUDE("cmake/xbuild.cmake")
 8 | ELSE()
 9 |     SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
10 | 
11 |     SET(CMAKE_EXPORT_COMPILE_COMMANDS ON)
12 | 
13 |     IF(NOT TF_DIR)
14 |         SET(TF_DIR "../tensorflow" CACHE STRING "TensorFlow source directory")
15 |     ENDIF()
16 | 
17 |     GET_FILENAME_COMPONENT(TF_ABSPATH ${TF_DIR} REALPATH)
18 | 
19 |     IF(NOT GET_TF_SRC)
20 |         if(EXISTS "${TF_ABSPATH}")
21 |             SET(TFL_SRC ${TF_ABSPATH}/tensorflow/lite)
22 |             SET(TFLM_SRC ${TFL_SRC}/micro)
23 |             SET(TFLMD_SRC ${TF_ABSPATH}/..)
24 |             SET(TF_INCS
25 |                 ${TF_ABSPATH}
26 |                 ${TFLMD_SRC}/flatbuffers/include
27 |                 ${TFLMD_SRC}/ruy
28 |             )
29 |             #SET(TF_LIB tensorflow-microlite)
30 |         ELSE()
31 |             MESSAGE(FATAL_ERROR "\
32 |     No valid TensorFlow source directory provided, default path \
33 |     '../tensorflow' is also not valid. To automatically pull TensorFlow \
34 |     source please provide argument '-DGET_TF_SRC=ON' to CMake.\
35 |             ")
36 |         ENDIF()
37 |     ELSE()
38 |         FIND_PACKAGE(TFLite REQUIRED)
39 |         SET(TF_INCS
40 |             ${TFLite_INCLUDE_DIRS}
41 |             )
42 |         SET(TF_LIB tensorflow-microlite)
43 |     ENDIF()
44 | 
45 |     SET(COMPILER_HEADERS
46 |         ${PROJECT_SOURCE_DIR}/src/CodeWriter.h
47 |         ${PROJECT_SOURCE_DIR}/src/Compiler.h
48 |         ${PROJECT_SOURCE_DIR}/src/CustomOperators.h
49 |         ${PROJECT_SOURCE_DIR}/src/MemMap.h
50 |         ${PROJECT_SOURCE_DIR}/src/RecordAllocations.h
51 |         ${PROJECT_SOURCE_DIR}/src/TypeToString.h
52 |         )
53 | 
54 |     SET(COMPILER_SRCS
55 |         ${PROJECT_SOURCE_DIR}/src/CodeWriter.cc
56 |         ${PROJECT_SOURCE_DIR}/src/Compiler.cc
57 |         ${PROJECT_SOURCE_DIR}/src/CustomOperators.cc
58 |         ${PROJECT_SOURCE_DIR}/src/MemMap.cc
59 |         ${PROJECT_SOURCE_DIR}/src/RecordAllocations.cc
60 |         ${PROJECT_SOURCE_DIR}/src/TypeToString.cc
61 |         ${PROJECT_SOURCE_DIR}/src/main.cc
62 |         )
63 | 
64 |     ADD_LIBRARY(${PROJECT_NAME} STATIC
65 |         ${COMPILER_SRCS}
66 |         )
67 | 
68 |     TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC
69 |         ${TF_INCS}
70 |         )
71 | 
72 |     TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${TF_LIB})
73 | 
74 |     TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC
75 |         TF_LITE_STATIC_MEMORY
76 |         TF_LITE_DISABLE_X86_NEON
77 |         SUFFICIENT_ARENA_SIZE=128*1024*1024
78 |     )
79 |     TARGET_COMPILE_FEATURES(${PROJECT_NAME} PUBLIC cxx_std_14)
80 |     ADD_EXECUTABLE(compiler src/main.cc)
81 |     TARGET_LINK_LIBRARIES(compiler ${PROJECT_NAME})
82 | 
83 |     ADD_CUSTOM_TARGET(
84 |         format
85 |         COMMAND clang-format -i ${COMPILER_SRCS} ${COMPILER_HEADERS}
86 |         )
87 | 
88 |     IF(TF_EXAMPLES)
89 |         ADD_SUBDIRECTORY(${PROJECT_SOURCE_DIR}/examples)
90 |     ENDIF()
91 | ENDIF()
92 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/Makefile:
--------------------------------------------------------------------------------
 1 | TF_DIR=../tensorflow
 2 | include common.mk
 3 | 
 4 | .PHONY: tflite all
 5 | 
 6 | all: compiler examples
 7 | 
 8 | tflite:
 9 | 	$(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/Makefile microlite
10 | 
11 | COMPILER_OBJS = src/main.o src/Compiler.o src/CodeWriter.o src/TypeToString.o src/RecordAllocations.o src/MemMap.o src/CustomOperators.o
12 | 
13 | compiler: $(COMPILER_OBJS) tflite
14 | 	$(CXX) $(LDOPTS) -o $@ $(COMPILER_OBJS) $(LIBS)
15 | 
16 | clean: clean-compiler clean-examples
17 | 	$(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/makefile clean
18 | 
19 | FORMAT_FILES := $(shell find src -regex '.*\(h\|cpp\)')
20 | 
21 | format: 
22 | 	clang-format -i $(FORMAT_FILES)
23 | 
24 | .PHONY: examples clean-examples clean-compiler
25 | examples:
26 | 	cd examples && $(MAKE)
27 | 
28 | clean-examples:
29 | 	$(MAKE) -C examples clean
30 | 
31 | clean-compiler:
32 | 	$(RM) src/*.o compiler
33 | 	
34 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/NEWS.txt:
--------------------------------------------------------------------------------
1 | 2020-05-15
2 |     Properly handle allocation from the Prepare method
3 |     move example code to separate directory
4 | 
5 | 2020-05-13
6 |     Support unknown operators by guessing the name of the register function
7 | 
8 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/README.md:
--------------------------------------------------------------------------------
  1 | # tflite_micro_compiler
  2 | 
  3 | Generate tflite micro code which bypasses the interpreter (directly calls into kernels)
  4 | 
  5 | Basically this code uses a fully set up tflite micro instance to dump the internal allocations and
  6 | function calls assigned to the model, then dumps the tensor and node settings into a compilable
  7 | file, eliminating the need for running the interpreter at each program start and for resolving the correct
  8 | kernel at run time.
  9 | 
 10 | An in depth explanation of the motivation and benefits is included in the matching [RFC](https://docs.google.com/document/d/1wDqC50sjCaWyQxsSn_Y-XAGh8-ozIgm2HDzX_b9DIyo/edit?usp=sharing).
 11 | 
 12 | # Building
 13 | 
 14 | ## CMake
 15 | 
 16 | Below the two methods of incorporating the TensorFlow sources into your build are
 17 | explained.
 18 | 
 19 | The basic flow of building with CMake is
 20 | 
 21 | ``` bash
 22 | mkdir build
 23 | cd build
 24 | cmake [options] ..
 25 | make
 26 | ```
 27 | 
 28 | ### Examples 
 29 | The examples cmake [here](examples/CMakeLists.txt) is by default not included due to issues with TensorFlow source code compatibility when using specific code versions.
 30 | To enable building the examples pass `-DTF_EXAMPLES=ON` to CMake.
 31 | 
 32 | ## Automatic TensorFlow Source Fetching
 33 | 
 34 | To pull the TensorFlow sources using CMake with the variable `GET_TF_SRC`
 35 | set to `ON`. 
 36 | 
 37 | e.g.
 38 | 
 39 | ``` bash
 40 | cmake -DGET_TF_SRC=ON ..
 41 | ```
 42 | 
 43 | This will retrieve the TensorFlow master branch's code. 
 44 | It should also be noted that `GET_TF_SRC` is prioritized over `TF_DIR` (see below).
 45 | If you want to specify a TensorFlow tag to checkout then this can be passed to
 46 | CMake using the option `TF_TAG`. 
 47 | 
 48 | e.g.
 49 | 
 50 | ``` bash
 51 | cmake -DGET_TF_SRC=ON TF_TAG=v2.2.0 ..
 52 | ```
 53 | 
 54 | Similarly a Git commit hash can be provided using `TF_COMMIT`. Note that
 55 | `TF_TAG` takes precedence if both are provided.
 56 | 
 57 | e.g.
 58 | 
 59 | ```bash
 60 | cmake -DGET_TF_SRC=ON TF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac ..
 61 | ```
 62 | 
 63 | To checkout a different TensorFlow code base without clearing the CMake cache
 64 | the argument `TF_RECACHE` should be set, this will force the TensorFlow
 65 | source to be checked-out again.
 66 | 
 67 | e.g.
 68 | 
 69 | ```bash
 70 | cmake -DGET_TF_SRC=ON -DTF_RECACHE=ON TF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac ..
 71 | ```
 72 | 
 73 | ## Providing TensorFlow Source Manually
 74 | 
 75 | By default CMake looks for the TensorFlow source in the directory `../tensorflow`.
 76 | If you want to specify you TensorFlow source directory this can be done by
 77 | providing the argument `TF_DIR`. 
 78 | 
 79 | e.g.
 80 | 
 81 | ``` bash
 82 | cmake -DTF_DIR=../my_tensorflow ..
 83 | ```
 84 | 
 85 | ## Additional Targets
 86 | 
 87 | ### format
 88 | 
 89 | To invoke `clang-format` CMake provides the `format` target.
 90 | 
 91 | e.g.
 92 | 
 93 | ```bash 
 94 | cmake ..
 95 | make format
 96 | ```
 97 | 
 98 | ## Make
 99 | 
100 | - check out tensorflow master next to this project (in ../tensorflow)
101 | - start with building the tflite micro library as described in https://www.tensorflow.org/lite/microcontrollers/library:
102 | 
103 |   - `cd ../tensorflow`
104 | 
105 |   - `make -f tensorflow/lite/micro/tools/make/Makefile hello_world_bin`
106 |     [optionally add BUILD_TYPE=debug]
107 | 
108 | - now run  make  in this project to get the compiler
109 | 
110 | # Usage
111 | 
112 | - the compiler is invoked as `./compiler input.tflite output.cpp [prefix]`
113 | 
114 |     e.g.
115 | 
116 |     ``` bash 
117 |     ./compiler hello_world.tflite hello_compiled.cpp hello_
118 |     ```
119 | 
120 | - for a quick view into the generated code see [`compiled_hello_world.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/compiled_hello_world.cc)
121 | 
122 |   You can compare calling into interpreter and compiled code between [`hello_world.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/hello_world.cc)
123 |   and [`hello_world2.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/hello_world2.cc)
124 | 
125 | - The example directory contains a collection of traditional tflite micro and compiled versions:
126 | 
127 |   - hello_world: Standard tflite micro example
128 |   - cifar10: Computer vision CNN example
129 | 
130 | # Limitations
131 | 
132 | - no support for big endian machines, yet
133 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/VerifiedTensorflowVersion.txt:
--------------------------------------------------------------------------------
1 | 62b6c316d2a9a1fb06aefb086856e76241280c08
2 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/cmake/xbuild.cmake:
--------------------------------------------------------------------------------
 1 | #**********************
 2 | # Disable in-source build.
 3 | #**********************
 4 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
 5 |     message(FATAL_ERROR "In-source build is not allowed! Please specify a build folder.\n\tex:cmake -B build")
 6 | endif()
 7 | 
 8 | #**********************
 9 | # install
10 | #**********************
11 | set(INSTALL_DIR "${PROJECT_SOURCE_DIR}/bin")
12 | 
13 | #**********************
14 | # Build flags
15 | #**********************
16 | 
17 | set(BUILD_FLAGS
18 |   "-g"
19 |   "-O0"
20 | )
21 | 
22 | #**********************
23 | # Targets
24 | #**********************
25 | set(TOP_DIR
26 |   "${CMAKE_CURRENT_SOURCE_DIR}/..")
27 | include(${TOP_DIR}/cmakefiles/xtflm.cmake)
28 | 
29 | add_library(xtflitemicro SHARED)
30 | set(DEFINTIONS
31 |   "__xtflm_conf_h_exists__"
32 |   "NO_INTERPRETER"
33 |   "NN_USE_REF"
34 |   "TF_LITE_STATIC_MEMORY"
35 |   "TF_LITE_DISABLE_X86_NEON"
36 | )
37 | target_compile_options(xtflitemicro PRIVATE ${BUILD_FLAGS})
38 | target_link_options(xtflitemicro PRIVATE ${BUILD_FLAGS})
39 | target_compile_definitions(xtflitemicro PUBLIC
40 |     ${DEFINTIONS}
41 | )
42 | target_compile_features(xtflitemicro PUBLIC cxx_std_11)
43 | target_sources(xtflitemicro
44 |   PRIVATE ${TFLM_KERNEL_SOURCES}
45 |   PRIVATE ${TFLITE_SOURCES}
46 |   PRIVATE ${NN_SOURCES}
47 |   PRIVATE ${XTFLIB_KERNEL_SOURCES}
48 | )
49 | target_include_directories(xtflitemicro
50 |   PRIVATE ${ALL_INCLUDES}
51 | )
52 | install(TARGETS xtflitemicro DESTINATION ${INSTALL_DIR})
53 | 
54 | 
55 | add_executable(tflite_micro_compiler)
56 | unset(DEFINTIONS)
57 | set(DEFINTIONS
58 |   "__xtflm_conf_h_exists__"
59 |   "NN_USE_REF"
60 |   "TF_LITE_STATIC_MEMORY"
61 |   "TF_LITE_DISABLE_X86_NEON"
62 |   "SUFFICIENT_ARENA_SIZE=128*1024*1024"
63 | )
64 | target_compile_options(tflite_micro_compiler PRIVATE ${BUILD_FLAGS})
65 | target_link_options(tflite_micro_compiler PRIVATE ${BUILD_FLAGS})
66 | file(GLOB_RECURSE COMPILER_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h")
67 | file(GLOB_RECURSE COMPILER_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc")
68 | target_compile_definitions(tflite_micro_compiler PUBLIC
69 |     ${DEFINTIONS}
70 | )
71 | target_compile_features(tflite_micro_compiler PUBLIC cxx_std_11)
72 | target_sources(tflite_micro_compiler
73 |   PRIVATE ${ALL_SOURCES}
74 |   PRIVATE ${COMPILER_SRCS}
75 | )
76 | target_include_directories(tflite_micro_compiler
77 |   PRIVATE ${COMPILER_HEADERS}
78 |   PRIVATE ${ALL_INCLUDES}
79 | )
80 | install(TARGETS tflite_micro_compiler DESTINATION ${INSTALL_DIR})
81 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/common.mk:
--------------------------------------------------------------------------------
 1 | CXXFLAGS=-g -std=c++14 -DTF_LITE_STATIC_MEMORY -DNDEBUG -O3 -DTF_LITE_DISABLE_X86_NEON -DSUFFICIENT_ARENA_SIZE=128\*1024\*1024 \
 2 | 	-I$(TF_DIR) -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/ \
 3 | 	-I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/gemmlowp \
 4 | 	-I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include \
 5 | 	-I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/ruy \
 6 | 	-I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/kissfft
 7 | 
 8 | LDOPTS=-L $(TF_DIR)/tensorflow/lite/micro/tools/make/gen/$(HOST_OS_BUILD)/lib
 9 | 
10 | 
11 | ifeq ($(OS),Windows_NT)
12 |   LIBS=-ltensorflow-microlite 
13 |   HOST_OS_BUILD=windows_x86_64
14 | else
15 |   LIBS=-ltensorflow-microlite -ldl
16 |   HOST_OS_BUILD=linux_x86_64
17 | endif
18 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/.gitignore:
--------------------------------------------------------------------------------
1 | cifar10
2 | cifar10_compiled
3 | mobilenet
4 | mobilenet_compiled
5 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | MESSAGE(STATUS "Micro compiler examples included")
 2 | MACRO(ADD_EXAMPLE TARGET_NAME)
 3 |     ADD_EXECUTABLE(${TARGET_NAME} ${TARGET_NAME}.cc ${ARGV})
 4 |     TARGET_LINK_LIBRARIES(${TARGET_NAME} PUBLIC ${PROJECT_NAME})
 5 | ENDMACRO()
 6 | 
 7 | 
 8 | ADD_EXAMPLE(hello_world hello_world_model.cc)
 9 | ADD_EXAMPLE(mobilenet mobilenet_v1_0_25_160_quantized.c gnu.c)
10 | ADD_EXAMPLE(cifar10 cifar10_model.c truck.c)
11 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/Makefile:
--------------------------------------------------------------------------------
 1 | TF_DIR=../../tensorflow
 2 | include ../common.mk
 3 | 
 4 | 
 5 | all: hello_world  hello_world_compiled  mobilenet mobilenet_compiled cifar10 cifar10_compiled
 6 | 
 7 | clean:
 8 | 	$(RM) *.o hello_world hello_world_compiled  mobilnet mobilnet_compiled cifar10 cifar10_compiled
 9 | 
10 | mobilenet: mobilenet.o mobilenet_v1_0_25_160_quantized.o gnu.o
11 | 	$(CXX) -o $@ $^ $(LDOPTS) $(LIBS)
12 | 
13 | mobilenet_compiled: mobilenet2.o compiled_mobilenet.o gnu.o
14 | 	$(CXX) -o $@ $^ $(LDOPTS) $(LIBS)
15 | 
16 | hello_world: hello_world.o hello_world_model.o
17 | 	$(CXX) -o $@ $^ $(LDOPTS) $(LIBS)
18 | 
19 | hello_world_compiled: hello_world2.o compiled_hello_world.o
20 | 	$(CXX) -o $@ $^ $(LDOPTS)  $(LIBS)
21 | 
22 | hello_world_model.o: hello_world_model.cc
23 | 	$(CXX) -o $@ -c $^ $(CXXFLAGS)
24 | 
25 | cifar10: cifar10_model.o truck.o cifar10.o
26 | 	$(CXX) -o $@ $^ $(LDOPTS)  $(LIBS)
27 | 
28 | cifar10_compiled: truck.o compiled_cifar10.o cifar10_run_comp.o
29 | 	$(CXX) -o $@ $^ $(LDOPTS) $(LIBS)
30 | 
31 | lstm_compiled: lstm_test.o lstm_compiled.o
32 | 	$(CXX) -o $@ $^ ${LIBS}
33 | 
34 | regenerate: ../compiler
35 | 	../compiler hello_world.tflite compiled_hello.cpp hello_ 
36 | 	../compiler mobilenet_v1_0_25_160_quantized.tflite compiled_mobilenet.cpp mobilenet_
37 | 	../compiler cifar10.tflite cifar10_compiled.cc cifar_
38 | 	../compiler lstm2.tflite lstm_compiled.cc lstm_
39 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/Makefile.inc:
--------------------------------------------------------------------------------
 1 | 
 2 | MOBILENET_SRCS := \
 3 |   examples/mobilenet.cc  examples/mobilenet_v1_0_25_160_quantized.c  examples/gnu.c
 4 | 
 5 | MOBILENET_COMPILED_SRCS := \
 6 |   examples/mobilenet2.cc  examples/compiled_mobilenet.cc examples/gnu.c
 7 | 
 8 | MOBILENET_HDRS := 
 9 | MOBILENET_COMPILED_HDRS := 
10 | 
11 | HELLO_WORLD_SRC := \
12 |   examples/hello_world.cc examples/hello_world_model.cc
13 | HELLO_WORLD_HDRS :=
14 | 
15 | HELLO_WORLD_COMPILED_SRC := \
16 |   examples/hello_world2.cc examples/compiled_hello_world.cc
17 | HELLO_WORLD_COMPILED_HDRS :=
18 | 
19 | 
20 | CIFAR10_SRC := \
21 |   examples/cifar10.cc examples/cifar10_model.cc examples/truck.c
22 | CIFAR10_HDRS :=
23 | 
24 | 
25 | CIFAR10_COMPILED_SRC := \
26 |   examples/cifar10_compiled.cc examples/cifar10_run_comp.cc examples/truck.c
27 | CIFAR10_COMPILED_HDRS :=
28 | 
29 | $(info Adding mobilenet mobilenet_compiled )
30 | 
31 | # Builds a standalone binary.
32 | $(eval $(call microlite_test,mobilenet,\
33 |   $(MOBILENET_SRCS),$(MOBILENET_HDRS)))
34 | 
35 | $(eval $(call microlite_test,mobilenet_compiled,\
36 |   $(MOBILENET_COMPILED_SRCS),$(MOBILENET_COMPILED_HDRS)))
37 | 
38 | $(info Adding cifar10 cifar10_compiled )
39 | $(eval $(call microlite_test,cifar10,\
40 |   $(CIFAR10_SRC),$(CIFAR10_HDRS)))
41 | 
42 | $(eval $(call microlite_test,cifar10_compiled,\
43 |   $(CIFAR10_SRC),$(CIFAR10_HDRS)))
44 | 
45 | $(info Adding hello_world  )
46 | $(eval $(call microlite_test,hello_world,\
47 |   $(HELLO_WORLD_SRC),$(HELLO_WORLD_HDRS)))
48 | 
49 | $(eval $(call microlite_test,hello_world_compiled,\
50 |   $(HELLO_WORLD_COMPILED_SRC),$(HELLO_WORLD_COMPILED_HDRS)))


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/Model_source.txt:
--------------------------------------------------------------------------------
1 | https://www.tensorflow.org/tutorials/images/cnn
2 | https://storage.googleapis.com/tfhub-lite-models/tensorflow/lite-model/mobilenet_v1_0.25_160_quantized/1/default/1.tflite
3 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/cifar10.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <iostream>  // for check output
 3 | 
 4 | #include "tensorflow/lite/micro/all_ops_resolver.h"
 5 | #include "tensorflow/lite/micro/micro_error_reporter.h"
 6 | #include "tensorflow/lite/micro/micro_interpreter.h"
 7 | #include "tensorflow/lite/micro/simple_memory_allocator.h"
 8 | #include "tensorflow/lite/schema/schema_generated.h"
 9 | #include "tensorflow/lite/version.h"
10 | 
11 | // Create an area of memory to use for input, output, and intermediate arrays.
12 | // The size of this will depend on the model you're using, and may need to be
13 | // determined by experimentation.
14 | static const int tensor_arena_size = 150 * 1000;
15 | static uint8_t tensor_arena[tensor_arena_size];
16 | 
17 | extern "C" const unsigned char cifar10_tflite[];
18 | extern "C" const unsigned char truck[];
19 | extern "C" const int cifar10_tflite_len;
20 | 
21 | // Set up logging.
22 | static tflite::ErrorReporter* error_reporter = nullptr;
23 | // This pulls in all the operation implementations we need.
24 | static tflite::AllOpsResolver* resolver = nullptr;
25 | static const tflite::Model* model = nullptr;
26 | static tflite::MicroInterpreter* interpreter = nullptr;
27 | 
28 | void init(void) {
29 |   static tflite::MicroErrorReporter micro_error_reporter;
30 |   error_reporter = &micro_error_reporter;
31 | 
32 |   // Map the model into a usable data structure. This doesn't involve any
33 |   // copying or parsing, it's a very lightweight operation.
34 |   model = ::tflite::GetModel(cifar10_tflite);
35 |   if (model->version() != TFLITE_SCHEMA_VERSION) {
36 |     TF_LITE_REPORT_ERROR(error_reporter,
37 |                          "Model provided is schema version %d not equal "
38 |                          "to supported version %d.\n",
39 |                          model->version(), TFLITE_SCHEMA_VERSION);
40 |     return;
41 |   }
42 |   static tflite::AllOpsResolver local_resolver;
43 |   resolver = &local_resolver;
44 | 
45 |   // Build an interpreter to run the model with.
46 |   static tflite::MicroInterpreter static_interpreter(
47 |       model, *resolver, tensor_arena, tensor_arena_size, error_reporter);
48 |   interpreter = &static_interpreter;
49 |   TfLiteStatus allocate_status = interpreter->AllocateTensors();
50 |   if (allocate_status != kTfLiteOk) {
51 |     TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed");
52 |     return;
53 |   }
54 | }
55 | 
56 | void run() {
57 |   TfLiteTensor* model_input = interpreter->input(0);
58 |   for (uint32_t i = 0; i < 32 * 32 * 3; ++i)
59 |     model_input->data.f[i] = truck[i] / 255.0f;
60 | 
61 |   TfLiteStatus invoke_status = interpreter->Invoke();
62 |   if (invoke_status != kTfLiteOk) {
63 |     TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed");
64 |   }
65 |   TfLiteTensor* model_output = interpreter->output(0);
66 |   for (int i = 0; i < model_output->dims->data[1]; ++i)
67 |     std::cerr << model_output->data.f[i] << ", ";
68 |   std::cerr << std::endl;
69 | }
70 | 
71 | int main(int argc, char** argv) {
72 |   init();
73 |   run();
74 |   return 0;
75 | }
76 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/cifar10.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/cifar10.tflite


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/cifar10_run_comp.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2020 Christof Petig. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #include <iostream>
17 | 
18 | #include "tensorflow/lite/c/common.h"
19 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
20 | 
21 | extern "C" const unsigned char truck[];
22 | 
23 | extern void cifar_init();
24 | extern void cifar_invoke();
25 | extern TfLiteTensor* cifar_input(int index = 0);
26 | extern TfLiteTensor* cifar_output(int index = 0);
27 | 
28 | void test_compiled(void) {
29 |   float* in = cifar_input()->data.f;
30 |   for (uint32_t i = 0; i < 32 * 32 * 3; ++i) in[i] = truck[i] / 255.0f;
31 |   float* out = cifar_output()->data.f;
32 |   cifar_invoke();
33 |   for (uint32_t i = 0; i < 10; ++i) std::cerr << out[i] << ", ";
34 |   std::cerr << std::endl;
35 | }
36 | 
37 | int main(int argc, char** argv) {
38 |   cifar_init();
39 |   test_compiled();
40 |   return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/compiled_cifar10.cc.h:
--------------------------------------------------------------------------------
 1 | // This file is generated. Do not edit.
 2 | // Generated on: 11.08.2020 11:26:36
 3 | 
 4 | #ifndef cifar_GEN_H
 5 | #define cifar_GEN_H
 6 | 
 7 | #include "tensorflow/lite/c/common.h"
 8 | 
 9 | // Sets up the model with init and prepare steps.
10 | TfLiteStatus cifar_init();
11 | // Returns the input tensor with the given index.
12 | TfLiteTensor *cifar_input(int index);
13 | // Returns the output tensor with the given index.
14 | TfLiteTensor *cifar_output(int index);
15 | // Runs inference for the model.
16 | TfLiteStatus cifar_invoke();
17 | 
18 | // Returns the number of input tensors.
19 | inline size_t cifar_inputs() { return 1; }
20 | // Returns the number of output tensors.
21 | inline size_t cifar_outputs() { return 1; }
22 | 
23 | inline void *cifar_input_ptr(int index) {
24 |   return cifar_input(index)->data.data;
25 | }
26 | inline size_t cifar_input_size(int index) { return cifar_input(index)->bytes; }
27 | inline int cifar_input_dims_len(int index) {
28 |   return cifar_input(index)->dims->data[0];
29 | }
30 | inline int *cifar_input_dims(int index) {
31 |   return &cifar_input(index)->dims->data[1];
32 | }
33 | 
34 | inline void *cifar_output_ptr(int index) {
35 |   return cifar_output(index)->data.data;
36 | }
37 | inline size_t cifar_output_size(int index) {
38 |   return cifar_output(index)->bytes;
39 | }
40 | inline int cifar_output_dims_len(int index) {
41 |   return cifar_output(index)->dims->data[0];
42 | }
43 | inline int *cifar_output_dims(int index) {
44 |   return &cifar_output(index)->dims->data[1];
45 | }
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/compiled_hello_world.cc.h:
--------------------------------------------------------------------------------
 1 | // This file is generated. Do not edit.
 2 | // Generated on: 11.08.2020 11:26:36
 3 | 
 4 | #ifndef hello_GEN_H
 5 | #define hello_GEN_H
 6 | 
 7 | #include "tensorflow/lite/c/common.h"
 8 | 
 9 | // Sets up the model with init and prepare steps.
10 | TfLiteStatus hello_init();
11 | // Returns the input tensor with the given index.
12 | TfLiteTensor *hello_input(int index);
13 | // Returns the output tensor with the given index.
14 | TfLiteTensor *hello_output(int index);
15 | // Runs inference for the model.
16 | TfLiteStatus hello_invoke();
17 | 
18 | // Returns the number of input tensors.
19 | inline size_t hello_inputs() { return 1; }
20 | // Returns the number of output tensors.
21 | inline size_t hello_outputs() { return 1; }
22 | 
23 | inline void *hello_input_ptr(int index) {
24 |   return hello_input(index)->data.data;
25 | }
26 | inline size_t hello_input_size(int index) { return hello_input(index)->bytes; }
27 | inline int hello_input_dims_len(int index) {
28 |   return hello_input(index)->dims->data[0];
29 | }
30 | inline int *hello_input_dims(int index) {
31 |   return &hello_input(index)->dims->data[1];
32 | }
33 | 
34 | inline void *hello_output_ptr(int index) {
35 |   return hello_output(index)->data.data;
36 | }
37 | inline size_t hello_output_size(int index) {
38 |   return hello_output(index)->bytes;
39 | }
40 | inline int hello_output_dims_len(int index) {
41 |   return hello_output(index)->dims->data[0];
42 | }
43 | inline int *hello_output_dims(int index) {
44 |   return &hello_output(index)->dims->data[1];
45 | }
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/compiled_mobilenet.cc.h:
--------------------------------------------------------------------------------
 1 | // This file is generated. Do not edit.
 2 | // Generated on: 11.08.2020 11:26:36
 3 | 
 4 | #ifndef mobilenet_GEN_H
 5 | #define mobilenet_GEN_H
 6 | 
 7 | #include "tensorflow/lite/c/common.h"
 8 | 
 9 | // Sets up the model with init and prepare steps.
10 | TfLiteStatus mobilenet_init();
11 | // Returns the input tensor with the given index.
12 | TfLiteTensor *mobilenet_input(int index);
13 | // Returns the output tensor with the given index.
14 | TfLiteTensor *mobilenet_output(int index);
15 | // Runs inference for the model.
16 | TfLiteStatus mobilenet_invoke();
17 | 
18 | // Returns the number of input tensors.
19 | inline size_t mobilenet_inputs() { return 1; }
20 | // Returns the number of output tensors.
21 | inline size_t mobilenet_outputs() { return 1; }
22 | 
23 | inline void *mobilenet_input_ptr(int index) {
24 |   return mobilenet_input(index)->data.data;
25 | }
26 | inline size_t mobilenet_input_size(int index) {
27 |   return mobilenet_input(index)->bytes;
28 | }
29 | inline int mobilenet_input_dims_len(int index) {
30 |   return mobilenet_input(index)->dims->data[0];
31 | }
32 | inline int *mobilenet_input_dims(int index) {
33 |   return &mobilenet_input(index)->dims->data[1];
34 | }
35 | 
36 | inline void *mobilenet_output_ptr(int index) {
37 |   return mobilenet_output(index)->data.data;
38 | }
39 | inline size_t mobilenet_output_size(int index) {
40 |   return mobilenet_output(index)->bytes;
41 | }
42 | inline int mobilenet_output_dims_len(int index) {
43 |   return mobilenet_output(index)->dims->data[0];
44 | }
45 | inline int *mobilenet_output_dims(int index) {
46 |   return &mobilenet_output(index)->dims->data[1];
47 | }
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/custom/.gitignore:
--------------------------------------------------------------------------------
1 | libtflite_micro_custom.so
2 | *.o
3 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/custom/Makefile:
--------------------------------------------------------------------------------
 1 | TF_DIR=../../../tensorflow
 2 | CXXFLAGS=-fPIC -g -std=c++11 -DTF_LITE_STATIC_MEMORY -DNDEBUG -O3 -DTF_LITE_DISABLE_X86_NEON \
 3 | 	-I${TF_DIR} -I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/ \
 4 | 	-I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/gemmlowp \
 5 | 	-I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include \
 6 | 	-I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/ruy \
 7 | 	-I${TF_DIR}/tensorflow/lite/micro/tools/make/downloads/kissfft
 8 | 
 9 | libtflite_micro_custom.so: registration.o fake_implementations.o custom_implementations.o
10 | 	$(CXX) --shared -o $@ $^
11 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/custom/custom.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/custom/custom.tflite


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/custom/custom_implementations.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "tensorflow/lite/c/builtin_op_data.h"
 3 | #include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
 4 | 
 5 | namespace tflite {
 6 | namespace ops {
 7 | namespace micro {
 8 | namespace complex {
 9 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) 
10 | { 
11 |     return kTfLiteOk; 
12 | }
13 | }  // namespace reduce_max
14 | TfLiteRegistration *Register_Complex(void) {
15 |   static TfLiteRegistration res = {
16 |       nullptr,
17 |       nullptr,
18 |       nullptr,
19 |       complex::Eval,
20 |   };
21 |   return &res;
22 | }
23 | namespace imag {
24 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) 
25 | { 
26 |     return kTfLiteOk; 
27 | }
28 | }  // namespace reduce_max
29 | TfLiteRegistration *Register_Imag(void) {
30 |   static TfLiteRegistration res = {
31 |       nullptr,
32 |       nullptr,
33 |       nullptr,
34 |       imag::Eval,
35 |   };
36 |   return &res;
37 | }
38 | }  // namespace micro
39 | }  // namespace ops
40 | }  // namespace tflite
41 | 
42 | void register_addons2(tflite::ops::micro::AllOpsResolver *res) {
43 |   res->AddCustom("Complex", tflite::ops::micro::Register_Complex());
44 |   res->AddCustom("Imag", tflite::ops::micro::Register_Imag());
45 | }
46 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/custom/fake_implementations.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "tensorflow/lite/c/builtin_op_data.h"
 3 | #include "tensorflow/lite/micro/all_ops_resolver.h"
 4 | 
 5 | namespace tflite {
 6 | namespace ops {
 7 | namespace micro {
 8 | namespace reduce_max {
 9 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; }
10 | }  // namespace reduce_max
11 | TfLiteRegistration *Register_REDUCE_MAX(void) {
12 |   static TfLiteRegistration res = {
13 |       nullptr,
14 |       nullptr,
15 |       nullptr,
16 |       reduce_max::Eval,
17 |   };
18 |   return &res;
19 | }
20 | namespace exp {
21 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; }
22 | }  // namespace exp
23 | TfLiteRegistration *Register_EXP(void) {
24 |   static TfLiteRegistration res = {
25 |       nullptr,
26 |       nullptr,
27 |       nullptr,
28 |       exp::Eval,
29 |   };
30 |   return &res;
31 | }
32 | namespace sum {
33 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; }
34 | }  // namespace sum
35 | TfLiteRegistration *Register_SUM(void) {
36 |   static TfLiteRegistration res = {
37 |       nullptr,
38 |       nullptr,
39 |       nullptr,
40 |       sum::Eval,
41 |   };
42 |   return &res;
43 | }
44 | namespace div {
45 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; }
46 | }  // namespace div
47 | TfLiteRegistration *Register_DIV(void) {
48 |   static TfLiteRegistration res = {
49 |       nullptr,
50 |       nullptr,
51 |       nullptr,
52 |       div::Eval,
53 |   };
54 |   return &res;
55 | }
56 | namespace squeeze {
57 | extern TfLiteStatus Eval(TfLiteContext *, TfLiteNode *) { return kTfLiteOk; }
58 | }  // namespace squeeze
59 | TfLiteRegistration *Register_SQUEEZE(void) {
60 |   static TfLiteRegistration res = {
61 |       nullptr,
62 |       nullptr,
63 |       nullptr,
64 |       squeeze::Eval,
65 |   };
66 |   return &res;
67 | }
68 | }  // namespace micro
69 | }  // namespace ops
70 | }  // namespace tflite
71 | 
72 | void register_addons(tflite::AllOpsResolver *res) {
73 |   res->AddBuiltin(tflite::BuiltinOperator_REDUCE_MAX,
74 |                   tflite::ops::micro::Register_REDUCE_MAX());
75 |   res->AddBuiltin(tflite::BuiltinOperator_EXP,
76 |                   tflite::ops::micro::Register_EXP());
77 |   res->AddBuiltin(tflite::BuiltinOperator_SUM,
78 |                   tflite::ops::micro::Register_SUM());
79 |   res->AddBuiltin(tflite::BuiltinOperator_DIV,
80 |                   tflite::ops::micro::Register_DIV());
81 |   res->AddBuiltin(tflite::BuiltinOperator_SQUEEZE,
82 |                   tflite::ops::micro::Register_SQUEEZE());
83 | }
84 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/custom/registration.cpp:
--------------------------------------------------------------------------------
 1 | #include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
 2 | #include <stdarg.h>
 3 | 
 4 | extern void register_addons(tflite::ops::micro::AllOpsResolver *res);
 5 | extern void register_addons2(tflite::ops::micro::AllOpsResolver *res);
 6 | 
 7 | // symbol needed inside this dll
 8 | int tflite::ErrorReporter::Report(const char* format, ...) {
 9 |     va_list va;
10 |     va_start(va, format);
11 |     vfprintf(stderr, format, va);
12 |     va_end(va);
13 |     return 0;
14 | }
15 | 
16 | extern "C" TfLiteStatus register_custom(tflite::ops::micro::AllOpsResolver *res) {
17 |     register_addons(res);
18 |     register_addons2(res);
19 |     return kTfLiteOk;
20 | }
21 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/generic_test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.2 FATAL_ERROR)
 2 | PROJECT(generic_test)
 3 | 
 4 | SET(TF_DIR "../../../tensorflow" CACHE STRING "TensorFlow source directory")
 5 | SET(TFL_SRC ${TF_DIR}/tensorflow/lite)
 6 | SET(TFLM_SRC ${TFL_SRC}/micro)
 7 | SET(TFLMD_SRC ${TFLM_SRC}/tools/make/downloads)
 8 | 
 9 | ADD_EXECUTABLE(${PROJECT_NAME}
10 |     generic_test.cpp
11 |     ../../build/out.cpp
12 | )
13 | 
14 | TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC
15 |     ${TF_DIR}
16 |     ${TFLMD_SRC}/flatbuffers/include
17 |     ${TFLMD_SRC}/ruy
18 |     ../../build
19 | )
20 | 
21 | IF(WIN32)
22 |     TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib)
23 | ELSE()
24 |     TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib)
25 | ENDIF()
26 | TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC tensorflow-microlite)
27 | 
28 | TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC
29 |     TF_LITE_STATIC_MEMORY
30 |     TF_LITE_DISABLE_X86_NEON
31 | )
32 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/generic_test/generic_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iomanip>
 3 | #include <iostream>
 4 | #include <vector>
 5 | 
 6 | #include "out.cpp.h"
 7 | 
 8 | int main(int argc, char *argv[]) {
 9 |   if (argc != 2) {
10 |     std::cerr << "Usage: " << argv[0] << " inDataFile\n";
11 |     return 1;
12 |   }
13 | 
14 |   if (model_inputs() != 1 || model_outputs() != 1) {
15 |     std::cerr << "Mismatch for number of inputs/outputs\n";
16 |     return 1;
17 |   }
18 | 
19 |   std::ifstream inFile(argv[1], std::ios::binary);
20 | 
21 |   model_init();
22 | 
23 |   std::vector<char> inData(model_input_size(0));
24 |   if (!inFile.read((char *)model_input_ptr(0), model_input_size(0))) {
25 |     std::cerr << "Failed to read input file\n";
26 |     return 1;
27 |   }
28 | 
29 |   model_invoke();
30 |   for (size_t i = 0; i < model_output_size(0); i++) {
31 |     std::cout << "\\x" << std::setw(2) << std::setfill('0') << std::hex
32 |               << (int)((unsigned char *)model_output_ptr(0))[i];
33 |   }
34 |   std::cout << std::endl;
35 | 
36 |   return 0;
37 | }
38 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/hello_world.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <iostream>  // for check output
 3 | 
 4 | #include "tensorflow/lite/micro/all_ops_resolver.h"
 5 | #include "tensorflow/lite/micro/micro_error_reporter.h"
 6 | #include "tensorflow/lite/micro/micro_interpreter.h"
 7 | #include "tensorflow/lite/micro/simple_memory_allocator.h"
 8 | #include "tensorflow/lite/schema/schema_generated.h"
 9 | #include "tensorflow/lite/version.h"
10 | 
11 | // Create an area of memory to use for input, output, and intermediate arrays.
12 | // The size of this will depend on the model you're using, and may need to be
13 | // determined by experimentation.
14 | static const int tensor_arena_size = 6 * 1024;
15 | static uint8_t tensor_arena[tensor_arena_size];
16 | 
17 | extern const unsigned char g_model[];
18 | // extern const int g_model_len;
19 | 
20 | // Set up logging.
21 | static tflite::ErrorReporter* error_reporter = nullptr;
22 | // This pulls in all the operation implementations we need.
23 | static tflite::AllOpsResolver* resolver = nullptr;
24 | static const tflite::Model* model = nullptr;
25 | static tflite::MicroInterpreter* interpreter = nullptr;
26 | 
27 | void init(void) {
28 |   static tflite::MicroErrorReporter micro_error_reporter;
29 |   error_reporter = &micro_error_reporter;
30 | 
31 |   // Map the model into a usable data structure. This doesn't involve any
32 |   // copying or parsing, it's a very lightweight operation.
33 |   model = ::tflite::GetModel(g_model);
34 |   if (model->version() != TFLITE_SCHEMA_VERSION) {
35 |     TF_LITE_REPORT_ERROR(error_reporter,
36 |                          "Model provided is schema version %d not equal "
37 |                          "to supported version %d.\n",
38 |                          model->version(), TFLITE_SCHEMA_VERSION);
39 |     return;
40 |   }
41 |   static tflite::AllOpsResolver local_resolver;
42 |   resolver = &local_resolver;
43 | 
44 |   // Build an interpreter to run the model with.
45 |   static tflite::MicroInterpreter static_interpreter(
46 |       model, *resolver, tensor_arena, tensor_arena_size, error_reporter);
47 |   interpreter = &static_interpreter;
48 |   TfLiteStatus allocate_status = interpreter->AllocateTensors();
49 |   if (allocate_status != kTfLiteOk) {
50 |     TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed");
51 |     return;
52 |   }
53 | }
54 | 
55 | void run() {
56 |   TfLiteTensor* model_input = interpreter->input(0);
57 |   model_input->data.f[0] = 1.57f;  // roughly PI/2
58 | 
59 |   TfLiteStatus invoke_status = interpreter->Invoke();
60 |   if (invoke_status != kTfLiteOk) {
61 |     TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed");
62 |   }
63 |   TfLiteTensor* model_output = interpreter->output(0);
64 |   std::cerr << "result " << model_output->data.f[0] << std::endl;
65 | }
66 | 
67 | int main(int argc, char** argv) {
68 |   init();
69 |   run();
70 |   return 0;
71 | }
72 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/hello_world.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/hello_world.tflite


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/hello_world2.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2020 Christof Petig. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #include <iostream>
17 | 
18 | #include "compiled_hello_world.cc.h"
19 | #include "tensorflow/lite/c/common.h"
20 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
21 | 
22 | void test_compiled(void) {
23 |   hello_init();
24 |   tflite::GetTensorData<float>(hello_input(0))[0] = 1.57f;
25 |   hello_invoke();
26 |   float out = tflite::GetTensorData<float>(hello_output(0))[0];
27 |   std::cerr << "result " << out << std::endl;
28 | }
29 | 
30 | int main(int argc, char** argv) {
31 |   test_compiled();
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/lstm2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import random
  3 | import math
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from tensorflow.keras.models import Sequential
  7 | from tensorflow.keras.layers import Dense, Dropout, LSTM
  8 | 
  9 | train_batches=2000
 10 | eval_batches=50
 11 | train_sequlen=32
 12 | train_inputs=1
 13 | lstm_states=6
 14 | #activation="relu"
 15 | activation=None
 16 | rec_activation="hard_sigmoid"
 17 | 
 18 | x_train = np.zeros((train_batches*train_sequlen,1,train_inputs))
 19 | y_train = np.zeros((train_batches*train_sequlen,1,1))
 20 | x_test = np.zeros((eval_batches*train_sequlen,1,train_inputs))
 21 | y_test = np.zeros((eval_batches*train_sequlen,1,1))
 22 | 
 23 | random.seed(1234)
 24 | 
 25 | # generate input of random sine waves, feed one at a time to the network
 26 | 
 27 | def random_sample():
 28 |     ampl = random.uniform(0.5,1)
 29 |     freq = random.uniform(18,32)
 30 |     phase= random.uniform(-math.pi,math.pi)
 31 |     return (ampl,freq,phase)
 32 | 
 33 | def waveform(ampl,freq,phase,idx):
 34 |     return ampl*math.sin(idx/freq*2*math.pi+phase)
 35 | 
 36 | # calculate train data
 37 | for i in range(train_batches):
 38 |     (ampl,freq,phase) = random_sample()
 39 |     for j in range(train_sequlen): # subsequent measurements
 40 |         for k in range(train_inputs):
 41 |             x_train[i*train_sequlen+j][0][k]=waveform(ampl,freq,phase,j+k)
 42 |         y_train[i*train_sequlen+j][0]=waveform(ampl,freq,phase,j+train_inputs)
 43 | for i in range(eval_batches):
 44 |     (ampl,freq,phase) = random_sample()
 45 |     for j in range(train_sequlen): # subsequent measurements
 46 |         for k in range(train_inputs):
 47 |             x_test[i*train_sequlen+j][0][k]=waveform(ampl,freq,phase,j+k)
 48 |         y_test[i*train_sequlen+j][0]=waveform(ampl,freq,phase,j+train_inputs)
 49 | 
 50 | print(x_train[0][0:5], y_train[0][0:5])
 51 | print(x_train.shape, y_train.shape)
 52 | print(x_test.shape, y_test.shape)
 53 | 
 54 | def create_model(train=True):
 55 | 
 56 |     if train:
 57 |         input0 = tf.keras.Input(batch_shape=(train_sequlen,1,train_inputs))
 58 |         # stateful is worse
 59 |         x = LSTM(lstm_states, recurrent_activation=rec_activation, activation=activation, return_sequences=False, return_state=False, stateful=False)(input0)
 60 |         #x = Dropout(0.1)(x) makes it a bit worse
 61 |     else:
 62 |         input0 = tf.keras.Input(batch_shape=(1,1,train_inputs),name="data")
 63 |         input1 = tf.keras.Input(batch_shape=(1,lstm_states),name="state_h")
 64 |         input2 = tf.keras.Input(batch_shape=(1,lstm_states),name="state_c")
 65 |         x, state,state2 = LSTM(lstm_states, recurrent_activation=rec_activation, activation=activation, return_sequences=False, return_state=True, stateful=True, unroll=True)(input0, initial_state=(input1, input2))
 66 | 
 67 |     x = Dense(units=1)(x)
 68 | 
 69 |     if train:
 70 |         model = tf.keras.Model(inputs=input0, outputs=x, name="sine")
 71 |     else:
 72 |         model = tf.keras.Model(inputs=(input0,input1,input2), outputs=(x,state,state2), name="sine")
 73 |     model.summary()
 74 |     return model
 75 | 
 76 | model=create_model()
 77 | 
 78 | model.compile(loss='mean_squared_error', optimizer='adam')
 79 | 
 80 | for i in range(8):
 81 | 	model.fit(x_train, y_train, epochs=1, batch_size=train_sequlen, verbose=1, shuffle=False,
 82 |         validation_data=(x_test,y_test))
 83 | 	model.reset_states()
 84 | 
 85 | model.save('mymodel')
 86 | model.save('mymodel_w.h5', save_format="h5")
 87 | 
 88 | model2= create_model(False)
 89 | model2.load_weights('mymodel_w.h5')
 90 | model2.save('evalmodel.h5', save_format="h5")
 91 | 
 92 | model2.compile(loss='mean_squared_error', optimizer='adam')
 93 | 
 94 | state_h2 = np.zeros((1,lstm_states))
 95 | state_c2 = np.zeros((1,lstm_states))
 96 | for i in range(train_sequlen):
 97 | 	testx, testy = x_test[i], y_test[i]
 98 | 	testx = testx.reshape(1, 1, 1)
 99 | 	res = model2.predict([testx,state_h2,state_c2], batch_size=1)
100 | 	print('In=%.1f, Expected=%.1f, Predicted=%.1f' % (testx[0][0][0], testy, res[0]))
101 | 	state_h2=res[1]
102 | 	state_c2=res[2]
103 | 
104 | # to convert to tflite use
105 | # tflite_convert --keras_model_file evalmodel.h5 --output_file evalmodel.tflite  --inference_type FLOAT
106 | # from tensorflow 1.15 (2.2 doesn't work)
107 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/lstm2.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/lstm2.tflite


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/lstm_compiled.cc.h:
--------------------------------------------------------------------------------
 1 | // This file is generated. Do not edit.
 2 | // Generated on: 12.08.2020 18:54:29
 3 | 
 4 | #ifndef lstm_GEN_H
 5 | #define lstm_GEN_H
 6 | 
 7 | #include "tensorflow/lite/c/common.h"
 8 | 
 9 | // Sets up the model with init and prepare steps.
10 | TfLiteStatus lstm_init();
11 | // Returns the input tensor with the given index.
12 | TfLiteTensor *lstm_input(int index);
13 | // Returns the output tensor with the given index.
14 | TfLiteTensor *lstm_output(int index);
15 | // Runs inference for the model.
16 | TfLiteStatus lstm_invoke();
17 | 
18 | // Returns the number of input tensors.
19 | inline size_t lstm_inputs() { return 3; }
20 | // Returns the number of output tensors.
21 | inline size_t lstm_outputs() { return 3; }
22 | 
23 | inline void *lstm_input_ptr(int index) { return lstm_input(index)->data.data; }
24 | inline size_t lstm_input_size(int index) { return lstm_input(index)->bytes; }
25 | inline int lstm_input_dims_len(int index) {
26 |   return lstm_input(index)->dims->data[0];
27 | }
28 | inline int *lstm_input_dims(int index) {
29 |   return &lstm_input(index)->dims->data[1];
30 | }
31 | 
32 | inline void *lstm_output_ptr(int index) {
33 |   return lstm_output(index)->data.data;
34 | }
35 | inline size_t lstm_output_size(int index) { return lstm_output(index)->bytes; }
36 | inline int lstm_output_dims_len(int index) {
37 |   return lstm_output(index)->dims->data[0];
38 | }
39 | inline int *lstm_output_dims(int index) {
40 |   return &lstm_output(index)->dims->data[1];
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/lstm_test.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2020 Christof Petig. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #include <math.h>
17 | #include <stdio.h>
18 | 
19 | #include "lstm_compiled.cc.h"
20 | #include "tensorflow/lite/c/common.h"
21 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
22 | 
23 | static float state_h[6], state_c[6];
24 | 
25 | static const float amplitude = 0.8;
26 | static const float wavelength = 16;
27 | static const float phase = -3.141593f / 2;  // roughly -90deg
28 | 
29 | float calculate_sine(uint32_t index) {
30 |   return amplitude * sinf(index * (6.283185f / wavelength) + phase);
31 | }
32 | 
33 | void test_compiled(void) {
34 |   lstm_input(1)->data.f = state_h;
35 |   lstm_input(2)->data.f = state_c;
36 |   lstm_output(1)->data.f = state_h;  // feed back to state
37 |   lstm_output(2)->data.f = state_c;
38 |   for (uint32_t i = 0; i < 30; ++i) {
39 |     float in = calculate_sine(i);
40 |     tflite::GetTensorData<float>(lstm_input(0))[0] = in;
41 |     lstm_invoke();
42 |     printf("input %.3f output %.3f\n", in,
43 |            tflite::GetTensorData<float>(lstm_output(0))[0]);
44 |   }
45 | }
46 | 
47 | int main(int argc, char** argv) {
48 |   lstm_init();
49 |   test_compiled();
50 |   return 0;
51 | }
52 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/mobilenet.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>  // for check output
 3 | 
 4 | #include "tensorflow/lite/micro/all_ops_resolver.h"
 5 | #include "tensorflow/lite/micro/micro_error_reporter.h"
 6 | #include "tensorflow/lite/micro/micro_interpreter.h"
 7 | #include "tensorflow/lite/micro/simple_memory_allocator.h"
 8 | #include "tensorflow/lite/schema/schema_generated.h"
 9 | #include "tensorflow/lite/version.h"
10 | 
11 | // Create an area of memory to use for input, output, and intermediate arrays.
12 | // The size of this will depend on the model you're using, and may need to be
13 | // determined by experimentation.
14 | static const int tensor_arena_size = 10 * 1024 * 1024;
15 | static uint8_t tensor_arena[tensor_arena_size];
16 | 
17 | extern "C" const unsigned char __1_tflite[];
18 | // extern "C" const unsigned int __1_tflite_len;
19 | extern "C" const unsigned char gnu_ppm[];
20 | 
21 | // Set up logging.
22 | static tflite::ErrorReporter* error_reporter = nullptr;
23 | // This pulls in all the operation implementations we need.
24 | static tflite::AllOpsResolver* resolver = nullptr;
25 | static const tflite::Model* model = nullptr;
26 | static tflite::MicroInterpreter* interpreter = nullptr;
27 | 
28 | void init(void) {
29 |   static tflite::MicroErrorReporter micro_error_reporter;
30 |   error_reporter = &micro_error_reporter;
31 | 
32 |   // Map the model into a usable data structure. This doesn't involve any
33 |   // copying or parsing, it's a very lightweight operation.
34 |   model = ::tflite::GetModel(__1_tflite);
35 |   if (model->version() != TFLITE_SCHEMA_VERSION) {
36 |     TF_LITE_REPORT_ERROR(error_reporter,
37 |                          "Model provided is schema version %d not equal "
38 |                          "to supported version %d.\n",
39 |                          model->version(), TFLITE_SCHEMA_VERSION);
40 |     return;
41 |   }
42 |   static tflite::AllOpsResolver local_resolver;
43 |   resolver = &local_resolver;
44 | 
45 |   // Build an interpreter to run the model with.
46 |   static tflite::MicroInterpreter static_interpreter(
47 |       model, *resolver, tensor_arena, tensor_arena_size, error_reporter);
48 |   interpreter = &static_interpreter;
49 |   TfLiteStatus allocate_status = interpreter->AllocateTensors();
50 |   if (allocate_status != kTfLiteOk) {
51 |     TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed");
52 |     return;
53 |   }
54 | }
55 | 
56 | // strictly this is no longer necessary at all
57 | void exit(void) {
58 |   if (interpreter) {
59 |     interpreter = 0;
60 |   }
61 |   if (resolver) {
62 |     resolver = 0;
63 |   }
64 |   if (error_reporter) {
65 |     error_reporter = 0;
66 |   }
67 |   model = 0;
68 | }
69 | 
70 | void run() {
71 |   TfLiteTensor* model_input = interpreter->input(0);
72 |   memcpy(model_input->data.uint8, gnu_ppm, 160 * 160 * 3);
73 | 
74 |   TfLiteStatus invoke_status = interpreter->Invoke();
75 |   if (invoke_status != kTfLiteOk) {
76 |     TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed");
77 |   }
78 |   TfLiteTensor* model_output = interpreter->output(0);
79 |   uint32_t best = 0;
80 |   uint32_t bestval = model_output->data.uint8[0];
81 |   for (uint32_t i = 1; i < 1001; ++i) {
82 |     if (model_output->data.uint8[i] > bestval) {
83 |       bestval = model_output->data.uint8[i];
84 |       best = i;
85 |     }
86 |   }
87 |   printf("Best match is %u with %d%%\n", best, (int)(bestval * 100 / 255));
88 | }
89 | 
90 | int main(int argc, char** argv) {
91 |   init();
92 |   run();
93 |   exit();
94 |   return 0;
95 | }
96 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/mobilenet2.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2020 Christof Petig. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | #include <iostream>
17 | 
18 | #include "compiled_mobilenet.cc.h"
19 | #include "tensorflow/lite/c/common.h"
20 | #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
21 | 
22 | extern "C" const unsigned char gnu_ppm[];
23 | 
24 | void run() {
25 |   TfLiteTensor* model_input = mobilenet_input(0);
26 |   memcpy(model_input->data.uint8, gnu_ppm, 160 * 160 * 3);
27 | 
28 |   TfLiteStatus invoke_status = mobilenet_invoke();
29 |   if (invoke_status != kTfLiteOk) {
30 |     fprintf(stderr, "Invoke failed\n");
31 |   }
32 |   TfLiteTensor* model_output = mobilenet_output(0);
33 |   uint32_t best = 0;
34 |   uint32_t bestval = model_output->data.uint8[0];
35 |   for (uint32_t i = 1; i < 1001; ++i) {
36 |     if (model_output->data.uint8[i] > bestval) {
37 |       bestval = model_output->data.uint8[i];
38 |       best = i;
39 |     }
40 |   }
41 |   printf("Best match is %u with %d%%\n", best, bestval * 100 / 256);
42 | }
43 | 
44 | int main(int argc, char** argv) {
45 |   mobilenet_init();
46 |   run();
47 |   return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/examples/mobilenet_v1_0_25_160_quantized.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmos/lib_tflite_micro/0ff5d7de9103a6de60abbd4ea61836a490de7b76/tflite_micro_compiler/examples/mobilenet_v1_0_25_160_quantized.tflite


--------------------------------------------------------------------------------
/tflite_micro_compiler/model_main.cpp:
--------------------------------------------------------------------------------
 1 | #include "model.tflite.h"
 2 | #include<stdio.h>
 3 | 
 4 | #include <xtensor/xarray.hpp>
 5 | #include <xtensor/xnpy.hpp>
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |   if(model_init(NULL)){
10 |     printf("Error!\n");
11 |   }
12 | 
13 |   char *input_filename = argv[1];
14 |  
15 |   xt::xarray<int8_t> input = xt::load_npy<int8_t>(input_filename);
16 |   int8_t *in = model_input(0)->data.int8;
17 |   for (int i=0;i<model_input_size(0);++i) {
18 |     printf("%d,",(int)input.flat(i));
19 |     in[i] = input.flat(i);
20 |   }
21 |   printf("\n");
22 | 
23 |   model_invoke();
24 | 
25 |   for(int n=0; n< model_outputs(); ++n) {
26 |     int8_t *out = model_output(n)->data.int8;
27 |     xt::xarray<int8_t> output;
28 |     output.resize({model_output_size(n)});
29 | 
30 |     for (int i=0;i<model_output_size(n);++i){
31 |       printf("%d,",(int)out[i]);
32 |       output[i] = out[i];
33 |     }
34 |     xt::dump_npy(std::to_string(n) + ".npy", output);
35 |     printf("\n");
36 |   }
37 |   return 0;
38 | }
39 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/Api.cc:
--------------------------------------------------------------------------------
 1 | #include "Api.h"
 2 | 
 3 | #include "Compiler.h"
 4 | 
 5 | namespace tflmc {
 6 | 
 7 | TFLMC_Compiler::TFLMC_Compiler(
 8 |     const void *modelData,
 9 |     const struct shared_config::xcore_metadata_t *sharedCfg,
10 |     const std::string &versionString, const std::string &argsString, const std::string &prefix, const bool debugPrint) {
11 |   compiler_ = new Compiler(modelData, sharedCfg, versionString, argsString, prefix, debugPrint);
12 | }
13 | 
14 | TFLMC_Compiler::~TFLMC_Compiler() { delete compiler_; }
15 | 
16 | void TFLMC_Compiler::writeSource(std::ostream &out) {
17 |   compiler_->writeSource(out);
18 | }
19 | void TFLMC_Compiler::writeHeader(std::ostream &out) {
20 |   compiler_->writeHeader(out);
21 | }
22 | 
23 | // Returns a name that describes a tensors relation to network layers.
24 | std::string TFLMC_Compiler::getTensorName(int tensorIndex, int sg) const {
25 |   return compiler_->getTensorName(tensorIndex, sg);
26 | }
27 | 
28 | // Returns tensor arena size
29 | size_t TFLMC_Compiler::getTensorArenaSize() const {
30 |   return compiler_->getTensorArenaSize();
31 | }
32 | }  // namespace tflmc


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/Api.h:
--------------------------------------------------------------------------------
 1 | #ifndef TFLMCOMPILER_API_H
 2 | #define TFLMCOMPILER_API_H
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "xcore_shared_config.h"
 7 | 
 8 | namespace tflmc {
 9 | 
10 | class Compiler;
11 | 
12 | class TFLMC_Compiler {
13 |  public:
14 |   TFLMC_Compiler(const void *modelData,
15 |                  const struct shared_config::xcore_metadata_t *sharedCfg,
16 |                  const std::string &versionString,
17 |                  const std::string &argsString,
18 |                  const std::string &prefix = "model_",
19 |                  const bool debugPrint = false);
20 | 
21 |   ~TFLMC_Compiler();
22 | 
23 |   void writeSource(std::ostream &out);
24 |   void writeHeader(std::ostream &out);
25 | 
26 |   // Returns a name that describes a tensors relation to network layers.
27 |   std::string getTensorName(int tensorIndex, int sg) const;
28 | 
29 |   // Returns tensor arena size
30 |   size_t getTensorArenaSize() const;
31 | 
32 |  private:
33 |   Compiler *compiler_;
34 | };
35 | 
36 | }  // namespace tflmc
37 | 
38 | #endif


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/CodeWriter.h:
--------------------------------------------------------------------------------
 1 | #ifndef TFLMCOMPILER_CODEWRITER_H
 2 | #define TFLMCOMPILER_CODEWRITER_H
 3 | 
 4 | #include <iostream>
 5 | 
 6 | #include "tensorflow/lite/micro/micro_interpreter.h"
 7 | 
 8 | namespace tflmc {
 9 | 
10 | // Helper functions for top-level code generation.
11 | class CodeWriter {
12 |  public:
13 |   CodeWriter(std::ostream &out, const tflite_micro::SubGraph *subgraph);
14 | 
15 |   void writeBuiltin(tflite_micro::BuiltinOperator op, const void *data,
16 |                     const std::string &name);
17 | 
18 |   // Write IntArray with variable declaration.
19 |   void writeIntArray(const TfLiteIntArray &arr, const std::string &name);
20 |   // Write only the comma separated contents of an IntArray.
21 |   void writeIntArrayData(const TfLiteIntArray &arr);
22 | 
23 |   void writeTensor(const TfLiteTensor &t, const std::string &name);
24 | 
25 |   void writeQuantization(const TfLiteQuantization &q, const std::string &name);
26 | 
27 | #if TF_LITE_PACKED_QUANTIZED_DATA_VERSION == 100
28 |   void writeQuantizationDetails(const TfLiteQuantization &q,
29 |                                 const std::string &name);
30 | #endif
31 | 
32 |   template <typename T>
33 |   CodeWriter &operator<<(T &&value) {
34 |     out_ << std::forward<T>(value);
35 |     return *this;
36 |   }
37 | 
38 |  private:
39 |   std::ostream &out_;
40 |   const tflite_micro::SubGraph *subgraph_ = nullptr;
41 | };
42 | 
43 | }  // namespace tflmc
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/Makefile.inc:
--------------------------------------------------------------------------------
 1 | $(info Adding tflite-micro compiler)
 2 | TFLITE_U_COMPILER_SRCS := \
 3 |   src/CodeWriter.cc  src/CustomOperators.cc src/MemMap.cc src/TypeToString.cc \
 4 |   src/Compiler.cc  src/main.cc src/RecordAllocations.cc
 5 | 
 6 | TFLITE_U_COMPILER_HDRS := \
 7 |   src/CodeWriter.h src/Compiler.h src/CustomOperators.h src/MemMap.h src/RecordAllocations.h src/TypeToString.h
 8 | 
 9 | 
10 | 
11 | 
12 | # Builds a standalone binary.
13 | $(eval $(call microlite_test,compiler,\
14 |   $(TFLITE_U_COMPILER_SRCS),$(TFLITE_U_COMPILER_HDRS)))
15 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/MemMap.cc:
--------------------------------------------------------------------------------
 1 | #include "MemMap.h"
 2 | 
 3 | void tflmc::MemMap::recordROM(ptrdiff_t offset, size_t len,
 4 |                               const std::string &tag) {
 5 |   m_romEntries.push_back({offset, len, tag});
 6 | }
 7 | 
 8 | void tflmc::MemMap::recordRAM(ptrdiff_t offset, size_t len,
 9 |                               const std::string &tag) {
10 |   m_ramEntries.push_back({offset, len, tag});
11 | }
12 | 
13 | static void PrintBar(const std::string &label, float start, float end) {
14 |   static const int BAR_WIDTH = 100;
15 |   static const int TEXT_LABEL_START = 3;
16 | 
17 |   if (start == -1.0f) {
18 |     for (int i = 0; i < BAR_WIDTH + 2; i++) {
19 |       printf("#");
20 |     }
21 |     printf("\n");
22 |     return;
23 |   }
24 | 
25 |   int barStart = start * BAR_WIDTH;
26 |   int barEnd = end * BAR_WIDTH;
27 |   bool smallBar = false;
28 |   if (barStart == barEnd) {
29 |     // Avoid zero width bars.
30 |     barEnd++;
31 |     smallBar = true;
32 |   }
33 | 
34 |   int labelStart = TEXT_LABEL_START;
35 |   int labelEnd = labelStart + label.size();
36 |   if (labelStart <= barEnd && labelEnd >= barStart) {
37 |     // Avoid hiding bar with label.
38 |     labelEnd = BAR_WIDTH - TEXT_LABEL_START;
39 |     labelStart = labelEnd - label.size();
40 |     if (labelStart <= barEnd && labelEnd >= barStart) {
41 |       // Still overlaps, center should be fine.
42 |       labelStart = (BAR_WIDTH + label.size()) / 2;
43 |       labelEnd = (BAR_WIDTH - label.size()) / 2;
44 |     }
45 |   }
46 | 
47 |   printf("#");
48 |   for (int i = 0; i < BAR_WIDTH; i++) {
49 |     if (i >= labelStart && i < labelEnd) {
50 |       printf("%c", label[i - labelStart]);
51 |     } else if (i >= barStart && i < barEnd) {
52 |       printf(smallBar ? "|" : "X");
53 |     } else {
54 |       printf(".");
55 |     }
56 |   }
57 |   printf("#\n");
58 | }
59 | 
60 | void tflmc::MemMap::report() const {
61 |   size_t constSize = 0;
62 |   size_t arenaSize = 0;
63 |   for (const auto &entry : m_romEntries) {
64 |     constSize = std::max(constSize, entry.base + entry.len);
65 |   }
66 |   for (const auto &entry : m_ramEntries) {
67 |     arenaSize = std::max(arenaSize, entry.base + entry.len);
68 |   }
69 | 
70 |   printf("ROM summary: %lu bytes total\n", constSize);
71 |   PrintBar("", -1.0f, -1.0f);
72 |   for (const auto &entry : m_romEntries) {
73 |     PrintBar(entry.tag, entry.base / (float)constSize,
74 |              (entry.base + entry.len) / (float)constSize);
75 |   }
76 |   PrintBar("", -1.0f, -1.0f);
77 | 
78 |   printf("RAM summary: %lu bytes total\n", arenaSize);
79 |   PrintBar("", -1.0f, -1.0f);
80 |   for (const auto &entry : m_ramEntries) {
81 |     PrintBar(entry.tag, entry.base / (float)arenaSize,
82 |              (entry.base + entry.len) / (float)arenaSize);
83 |   }
84 |   PrintBar("", -1.0f, -1.0f);
85 | }
86 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/MemMap.h:
--------------------------------------------------------------------------------
 1 | #ifndef TFLMCOMPILER_MEMMAP_H
 2 | #define TFLMCOMPILER_MEMMAP_H
 3 | 
 4 | #include <cstddef>
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | namespace tflmc {
 9 | 
10 | // Keeps track of buffers and prints a summary.
11 | class MemMap {
12 |  public:
13 |   void recordROM(ptrdiff_t offset, size_t len, const std::string &tag);
14 |   void recordRAM(ptrdiff_t offset, size_t len, const std::string &tag);
15 |   void report() const;
16 | 
17 |  private:
18 |   struct Entry {
19 |     ptrdiff_t base;
20 |     size_t len;
21 |     std::string tag;
22 |   };
23 |   std::vector<Entry> m_romEntries;
24 |   std::vector<Entry> m_ramEntries;
25 | };
26 | 
27 | }  // namespace tflmc
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/TypeToString.cc:
--------------------------------------------------------------------------------
  1 | #include "TypeToString.h"
  2 | 
  3 | #include <sstream>
  4 | #include <stdexcept>
  5 | 
  6 | #define NAME(X) \
  7 |   case X:       \
  8 |     return #X
  9 | 
 10 | std::string tflmc::to_string(TfLiteType t) {
 11 |   switch (t) {
 12 |     NAME(kTfLiteFloat32);
 13 |     NAME(kTfLiteInt32);
 14 |     NAME(kTfLiteUInt8);
 15 |     NAME(kTfLiteInt64);
 16 |     NAME(kTfLiteString);
 17 |     NAME(kTfLiteBool);
 18 |     NAME(kTfLiteInt16);
 19 |     NAME(kTfLiteComplex64);
 20 |     NAME(kTfLiteInt8);
 21 |     NAME(kTfLiteFloat16);
 22 |     NAME(kTfLiteFloat64);
 23 |     default:
 24 |       throw std::runtime_error(
 25 |           "Missing case in TfLiteType to string conversion");
 26 |   }
 27 | }
 28 | 
 29 | std::string tflmc::c_type(TfLiteType t) {
 30 |   switch (t) {
 31 |     case kTfLiteFloat32:
 32 |       return "float";
 33 |     case kTfLiteInt32:
 34 |       return "int32_t";
 35 |     case kTfLiteUInt8:
 36 |       return "uint8_t";
 37 |     case kTfLiteInt64:
 38 |       return "int64_t";
 39 |     // case kTfLiteString: return "float";
 40 |     // case kTfLiteBool: return "float";
 41 |     case kTfLiteInt16:
 42 |       return "int16_t";
 43 |     // case kTfLiteComplex64: return "float";
 44 |     case kTfLiteInt8:
 45 |       return "int8_t";
 46 |     // case kTfLiteFloat16: return "float";
 47 |     case kTfLiteFloat64:
 48 |       return "double";
 49 |     default:
 50 |       throw std::runtime_error(
 51 |           "Missing case in TfLiteType to C type conversion");
 52 |   }
 53 | }
 54 | 
 55 | std::string tflmc::to_string(TfLiteAllocationType t) {
 56 |   switch (t) {
 57 |     NAME(kTfLiteMmapRo);
 58 |     NAME(kTfLiteArenaRw);
 59 |     default:
 60 |       throw std::runtime_error(
 61 |           "Missing case in TfLiteAllocationType to string "
 62 |           "conversion");
 63 |   }
 64 | }
 65 | 
 66 | std::string tflmc::to_string(TfLiteFusedActivation t) {
 67 |   switch (t) {
 68 |     NAME(kTfLiteActNone);
 69 |     NAME(kTfLiteActRelu);
 70 |     NAME(kTfLiteActReluN1To1);
 71 |     NAME(kTfLiteActRelu6);
 72 |     NAME(kTfLiteActTanh);
 73 |     NAME(kTfLiteActSignBit);
 74 |     NAME(kTfLiteActSigmoid);
 75 |     default:
 76 |       throw std::runtime_error(
 77 |           "Missing case in TfLiteFusedActivation to string conversion");
 78 |   }
 79 | }
 80 | 
 81 | std::string tflmc::to_string(TfLiteFullyConnectedWeightsFormat t) {
 82 |   switch (t) {
 83 |     NAME(kTfLiteFullyConnectedWeightsFormatDefault);
 84 |     NAME(kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8);
 85 |     default:
 86 |       throw std::runtime_error(
 87 |           "Missing case in TfLiteFullyConnectedWeightsFormat to string "
 88 |           "conversion");
 89 |   }
 90 | }
 91 | 
 92 | std::string tflmc::to_string(TfLitePadding t) {
 93 |   switch (t) {
 94 |     NAME(kTfLitePaddingUnknown);
 95 |     NAME(kTfLitePaddingSame);
 96 |     NAME(kTfLitePaddingValid);
 97 |     default:
 98 |       throw std::runtime_error(
 99 |           "Missing case in TfLitePadding to string conversion");
100 |   }
101 | }
102 | 
103 | std::string tflmc::to_string(TfLitePaddingValues const& v) {
104 |   std::stringstream out;
105 |   out << "{ " << v.width << "," << v.height << ", " << v.width_offset << ", "
106 |       << v.height_offset << " }";
107 |   return out.str();
108 | }
109 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/TypeToString.h:
--------------------------------------------------------------------------------
 1 | #ifndef TFLMCOMPILER_TYPETOSTRING_H
 2 | #define TFLMCOMPILER_TYPETOSTRING_H
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "tensorflow/lite/c/builtin_op_data.h"
 7 | 
 8 | namespace tflmc {
 9 | 
10 | std::string to_string(TfLiteType t);
11 | std::string c_type(TfLiteType t);
12 | std::string to_string(TfLiteAllocationType t);
13 | std::string to_string(TfLiteFusedActivation t);
14 | std::string to_string(TfLiteFullyConnectedWeightsFormat t);
15 | std::string to_string(TfLitePadding t);
16 | std::string to_string(TfLitePaddingValues const& v);
17 | 
18 | }  // namespace tflmc
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/main.cc:
--------------------------------------------------------------------------------
 1 | #include "CodeWriter.h"
 2 | #include "Compiler.h"
 3 | 
 4 | int main(int argc, char *argv[]) {
 5 |   if (argc < 3 || argc > 4) {
 6 |     printf(
 7 |         "Usage: %s modelFile.tflite outFile.cpp [NamingPrefix = \"model_\"]\n",
 8 |         argv[0]);
 9 |     return 1;
10 |   }
11 | 
12 |   std::string prefix = "model_";
13 |   if (argc == 4) {
14 |     prefix = argv[3];
15 |   }
16 | 
17 |   if (!tflmc::CompileFile(argv[1], argv[2], prefix)) {
18 |     return 1;
19 |   }
20 | 
21 |   return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/tflite_micro_compiler/src/xtflm_conf.h:
--------------------------------------------------------------------------------
 1 | // XTLM_OPERATORS must be 200 as we use PythonOpsResolver in
 2 | // tflite micro compiler.
 3 | // PythonOpsResolver is defined as MicroMutableOpResolver<200> in
 4 | // https://github.com/tensorflow/tflite-micro/blob/main/python/tflite_micro/python_ops_resolver.h
 5 | #define XTFLM_OPERATORS (200)
 6 | #define NUM_OUTPUT_TENSORS (40)
 7 | #define NUM_INPUT_TENSORS (40)
 8 | #define MAX_DEBUG_LOG_LENGTH (1024)
 9 | #define AISRV_GPIO_LENGTH (4)
10 | 


--------------------------------------------------------------------------------
/utils/compare_outputs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CUR_DIR=$(pwd)
 4 | 
 5 | OUT_DIR=$1
 6 | 
 7 | cd sample_vww
 8 | python run.py
 9 | 
10 | cd ..
11 | mkdir $OUT_DIR
12 | 
13 | clang++ -DTF_LITE_DISABLE_X86_NEON -DTF_LITE_STATIC_MEMORY -DNO_INTERPRETER -Ilib_tflite_micro/submodules/tflite-micro -Ilib_tflite_micro/submodules/flatbuffers/include -I../lib_nn/ -I. -std=c++14 main.cpp sample_vww/tfl_model.tflite.cpp -g -O0 -lxtflitemicro -Ltflite_micro_compiler/build -rpath /Users/deepakpanickal/code/ai_tools2/third_party/lib_tflite_micro/tflite_micro_compiler/build -I$CONDA_PREFIX/include -DTFLMC_PRINT_TENSORS -o $OUT_DIR/tfl.out
14 | 
15 | $OUT_DIR/tfl.out $OUT_DIR/tfl.out >$OUT_DIR/tflite.json 2>&1
16 | 
17 | clang++ -DTF_LITE_DISABLE_X86_NEON -DTF_LITE_STATIC_MEMORY -DNO_INTERPRETER -Ilib_tflite_micro/submodules/tflite-micro -Ilib_tflite_micro/submodules/flatbuffers/include -I../lib_nn/ -I. -std=c++14 main.cpp sample_vww/xcore_model.tflite.cpp -g -O0 -lxtflitemicro -Ltflite_micro_compiler/build -rpath /Users/deepakpanickal/code/ai_tools2/third_party/lib_tflite_micro/tflite_micro_compiler/build -I$CONDA_PREFIX/include -DTFLMC_PRINT_TENSORS -o $OUT_DIR/xcore.out
18 | 
19 | $OUT_DIR/xcore.out sample_vww/xcore_model.params >$OUT_DIR/xcore.json 2>&1
20 | 
21 | python diff_output.py $OUT_DIR/tflite.json $OUT_DIR/xcore.json >$OUT_DIR/accuracy_diff.txt
22 | 
23 | cp sample_vww/tfl_model.tflite* $OUT_DIR
24 | cp sample_vww/xcore_model.tflite* $OUT_DIR
25 | 
26 | exit 0
27 | 


--------------------------------------------------------------------------------
/utils/diff_output.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import sys
 4 | 
 5 | #
 6 | tfl_to_xcore_map = {1:3, 2:6, 3:9, 4:12, 6:15, 7:18, 8:21, 9:24, 10:27, 11:28}
 7 | 
 8 | 
 9 | # Opening JSON file
10 | f = open(sys.argv[1])
11 | tflite_data = json.load(f)
12 | # Closing file
13 | f.close()
14 | 
15 | f = open(sys.argv[2])
16 | xcore_data = json.load(f)
17 | # Closing file
18 | f.close()
19 | 
20 | # Iterating through the json
21 | # list
22 | for i in tfl_to_xcore_map:
23 |     tfl = np.array(tflite_data[i]["data"][0]["val"])
24 |     xc = np.array(xcore_data[tfl_to_xcore_map[i]]["data"][0]["val"])
25 |     diffs = tfl - xc
26 |     unique, counts = np.unique(diffs, return_counts=True)
27 |     print("\n\nTFLite %s, node %d" %(tflite_data[i]["op"], i))
28 |     print("Xcore %s, node %d" %(xcore_data[tfl_to_xcore_map[i]]["op"], tfl_to_xcore_map[i]))
29 |     print(np.asarray((unique, counts)).T)
30 | 


--------------------------------------------------------------------------------
/utils/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "model.tflite.h"
 2 | #include<stdio.h>
 3 | 
 4 | // #include <xtensor/xarray.hpp>
 5 | // #include <xtensor/xnpy.hpp>
 6 | 
 7 | unsigned char checksum_calc(char *data, unsigned int length)
 8 | {
 9 |   static char sum;
10 |   static char * end;
11 |   sum = 0;
12 |   end = data + length;
13 | 
14 |   do
15 |   {
16 |       sum -= *data++;
17 |   } while (data != end);
18 |   return sum;
19 | }
20 | 
21 | #define MAX_PARAMS_SIZE 5000000
22 | #define MAX_MODEL_CONTENT_SIZE 5000000
23 | static int load_binary_file(const char *filename, uint32_t *content,
24 |                             size_t size) {
25 |   FILE *fd = fopen(filename, "rb");
26 |   if (fd == NULL) {
27 |     fprintf(stderr, "Cannot read model/param file %s\n", filename);
28 |   }
29 |   int s = fread(content, 1, size, fd);
30 |   fclose(fd);
31 | 
32 |   return s;
33 | }
34 | uint32_t params_content[MAX_MODEL_CONTENT_SIZE / sizeof(uint32_t)];
35 | 
36 | #define I16
37 | 
38 | int main(int argc, char *argv[])
39 | {
40 |   (void)load_binary_file(argv[1], params_content, MAX_PARAMS_SIZE);
41 | 
42 |   if(model_init(params_content)){
43 |     printf("Error!\n");
44 |   }
45 | 
46 |   //xt::xarray<int8_t> input = xt::load_npy<int8_t>("input.npy");
47 |   
48 |   for(int n=0; n< model_inputs(); ++n) {
49 |     //int32_t *in = model_input(n)->data.i32;
50 |     #ifdef I16
51 |     int16_t *in = model_input(n)->data.i16;
52 |     int size = model_input_size(n)/2;
53 |     int k = -32768;
54 |     for (int i=0;i<size;++i) {
55 |       if (k >= 32767) {
56 |         k = -32768;
57 |       }
58 |       in[i] = k;//input[i];
59 |       k = k + 5000;
60 |     }
61 |     #else
62 |     int8_t *in = model_input(n)->data.int8;
63 |     int size = model_input_size(n);
64 |     int k = -128;
65 |     for (int i=0;i<size;++i) {
66 |       if (k >= 128) {
67 |         k = -128;
68 |       }
69 |       in[i] = k;//input[i];
70 |       k = k + 3;
71 |     }
72 |     #endif
73 |   }
74 |   printf("\n");
75 | 
76 |   model_invoke();
77 | 
78 |   for(int n=0; n< model_outputs(); ++n) {
79 |     //int32_t *out = model_output(n)->data.i32;
80 |     #ifdef I16
81 |     int16_t *out = model_output(n)->data.i16;
82 |     int size = model_output_size(n)/2;
83 |     #else
84 |     int8_t *out = model_output(n)->data.int8;
85 |     int size = model_output_size(n);
86 |     #endif
87 |      for (int i=0;i<size;++i){
88 |        printf("%d,",(int)out[i]);
89 |      }
90 |     printf("\nchecksum : %d\n\n", (int)checksum_calc((char*)out, model_output_size(n)));
91 |   }
92 | 
93 |   return 0;
94 | }
95 | 


--------------------------------------------------------------------------------
/version_check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CUR_DIR=$(pwd)
 4 | 
 5 | cd ../../lib_nn/lib_nn
 6 | if ! ../version_check.sh; then
 7 |     exit 1
 8 | fi
 9 | 
10 | cd $CUR_DIR
11 | printf "\nRunning version check for lib_tflite_micro..."
12 | 
13 | # in lib_tflite_micro/lib_tflite_micro folder
14 | TAG=$(git describe --tags --abbrev=0)
15 | GIT_VERSION=$(printf ${TAG} | sed 's/v//')
16 | 
17 | printf "\nGit version = "$GIT_VERSION
18 | 
19 | function get_version()
20 | {
21 |     local filename=$1
22 |     MAJOR=$(grep 'major' $filename | awk '{print $6}' | sed 's/;//')
23 |     MINOR=$(grep 'minor' $filename | awk '{print $6}' | sed 's/;//')
24 |     PATCH=$(grep 'patch' $filename | awk '{print $6}' | sed 's/;//')
25 |     printf "$MAJOR.$MINOR.$PATCH"
26 | }
27 | 
28 | VERSION_H="api/version.h"
29 | 
30 | VERSION_H_STR=$(get_version $VERSION_H)
31 | printf "\nVersion header = "$VERSION_H_STR
32 | 
33 | if [ "$GIT_VERSION" != "$VERSION_H_STR" ]
34 | then printf "\nVersion mismatch!" && exit 1
35 | fi
36 | 
37 | MODULE_BUILD_INFO="module_build_info"
38 | MODULE_BUILD_INFO_STR=$(grep 'VERSION' $MODULE_BUILD_INFO | awk '{print $3}')
39 | 
40 | printf "\nModule build info version = "$MODULE_BUILD_INFO_STR
41 | 
42 | if [ "$VERSION_H_STR" != "$MODULE_BUILD_INFO_STR" ]
43 | then printf "\nVersion mismatch!" && exit 1
44 | fi
45 | 
46 | printf "\n"
47 | exit 0
48 | 


--------------------------------------------------------------------------------