├── .clang-format ├── .clang-tidy ├── .cmake-format.yaml ├── .codespellrc ├── .gitattributes ├── .github ├── scripts │ └── conan-ci-setup.sh └── workflows │ └── ci.yml ├── .gitignore ├── BUILDING.md ├── CMakeLists.txt ├── CMakePresets.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── HACKING.md ├── LICENSE.txt ├── README.md ├── cmake ├── coverage.cmake ├── dev-mode.cmake ├── docs-ci.cmake ├── docs.cmake ├── folders.cmake ├── install-config.cmake ├── install-rules.cmake ├── lint-targets.cmake ├── lint.cmake ├── prelude.cmake ├── project-is-top-level.cmake ├── spell-targets.cmake ├── spell.cmake └── variables.cmake ├── conanfile.py ├── docs ├── Doxyfile.in ├── conf.py.in └── pages │ └── about.dox ├── example ├── CMakeLists.txt ├── README.md ├── imageClassifier.hpp └── mobilenet_v3_small.cpp ├── images ├── dog.jpg ├── keyboard.jpg └── large-logo.png ├── include └── edgerunner │ ├── edgerunner.hpp │ ├── model.hpp │ ├── qnn │ ├── backend.hpp │ ├── config.hpp │ ├── graph.hpp │ ├── model.hpp │ ├── tensor.hpp │ └── tensorOps.hpp │ ├── tensor.hpp │ └── tflite │ ├── model.hpp │ └── tensor.hpp ├── models ├── common │ └── imagenet_labels.txt ├── qnn │ ├── mobilenet_v3_large_quantized.so │ ├── mobilenet_v3_small.bin │ └── mobilenet_v3_small.so └── tflite │ ├── mobilenet_v3_large_quantized.tflite │ └── mobilenet_v3_small.tflite ├── profiles └── android ├── scripts └── run_with_adb.sh ├── source ├── edgerunner.cpp ├── qnn │ ├── backend.cpp │ ├── graph.cpp │ ├── model.cpp │ ├── tensor.cpp │ └── tensorOps.cpp └── tflite │ ├── model.cpp │ └── tensor.cpp ├── test ├── CMakeLists.txt └── source │ ├── bad_model_test.cpp │ ├── qnn_context_binary_npu_test.cpp │ ├── qnn_multiple_models_test.cpp │ ├── qnn_quantized_test.cpp │ ├── qnn_shared_library_npu_test.cpp │ ├── tflite_delegate_test.cpp │ ├── tflite_from_buffer_test.cpp │ ├── tflite_gpu_test.cpp │ ├── tflite_npu_test.cpp │ ├── tflite_quantized_test.cpp │ ├── tflite_test.cpp │ └── utils.hpp └── version.txt /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Chromium 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveMacros: false 7 | AlignConsecutiveAssignments: false 8 | AlignConsecutiveBitFields: false 9 | AlignConsecutiveDeclarations: false 10 | AlignEscapedNewlines: DontAlign 11 | AlignOperands: DontAlign 12 | AlignTrailingComments: false 13 | AllowAllArgumentsOnNextLine: true 14 | AllowAllConstructorInitializersOnNextLine: false 15 | AllowAllParametersOfDeclarationOnNextLine: true 16 | AllowShortEnumsOnASingleLine: false 17 | AllowShortBlocksOnASingleLine: Empty 18 | AllowShortCaseLabelsOnASingleLine: false 19 | AllowShortFunctionsOnASingleLine: Inline 20 | AllowShortLambdasOnASingleLine: All 21 | AllowShortIfStatementsOnASingleLine: Never 22 | AllowShortLoopsOnASingleLine: false 23 | AlwaysBreakAfterDefinitionReturnType: None 24 | AlwaysBreakAfterReturnType: None 25 | AlwaysBreakBeforeMultilineStrings: true 26 | AlwaysBreakTemplateDeclarations: Yes 27 | BinPackArguments: false 28 | BinPackParameters: false 29 | BraceWrapping: 30 | AfterCaseLabel: false 31 | AfterClass: false 32 | AfterControlStatement: MultiLine 33 | AfterEnum: false 34 | AfterFunction: false 35 | AfterNamespace: false 36 | AfterObjCDeclaration: false 37 | AfterStruct: false 38 | AfterUnion: false 39 | AfterExternBlock: false 40 | BeforeCatch: false 41 | BeforeElse: false 42 | BeforeLambdaBody: false 43 | BeforeWhile: false 44 | IndentBraces: false 45 | SplitEmptyFunction: false 46 | SplitEmptyRecord: false 47 | SplitEmptyNamespace: false 48 | BreakBeforeBinaryOperators: NonAssignment 49 | BreakBeforeBraces: Custom 50 | # BreakBeforeInheritanceComma: true 51 | BreakInheritanceList: BeforeComma 52 | BreakBeforeTernaryOperators: true 53 | BreakConstructorInitializersBeforeComma: true 54 | BreakConstructorInitializers: BeforeComma 55 | BreakAfterJavaFieldAnnotations: true 56 | BreakStringLiterals: true 57 | ColumnLimit: 80 58 | CommentPragmas: '^ IWYU pragma:' 59 | CompactNamespaces: false 60 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 61 | ConstructorInitializerIndentWidth: 4 62 | ContinuationIndentWidth: 4 63 | Cpp11BracedListStyle: true 64 | DeriveLineEnding: false 65 | DerivePointerAlignment: false 66 | DisableFormat: false 67 | ExperimentalAutoDetectBinPacking: false 68 | FixNamespaceComments: true 69 | ForEachMacros: 70 | - foreach 71 | - Q_FOREACH 72 | - BOOST_FOREACH 73 | IncludeBlocks: Regroup 74 | IncludeCategories: 75 | # Standard library headers come before anything else 76 | - Regex: '^<[a-z_]+>' 77 | Priority: -1 78 | - Regex: '^<.+\.h(pp)?>' 79 | Priority: 1 80 | - Regex: '^<.*' 81 | Priority: 2 82 | - Regex: '.*' 83 | Priority: 3 84 | IncludeIsMainRegex: '' 85 | IncludeIsMainSourceRegex: '' 86 | IndentCaseLabels: true 87 | IndentCaseBlocks: false 88 | IndentGotoLabels: true 89 | IndentPPDirectives: AfterHash 90 | IndentExternBlock: NoIndent 91 | IndentWidth: 4 92 | IndentWrappedFunctionNames: false 93 | InsertTrailingCommas: Wrapped 94 | JavaScriptQuotes: Double 95 | JavaScriptWrapImports: true 96 | KeepEmptyLinesAtTheStartOfBlocks: false 97 | MacroBlockBegin: '' 98 | MacroBlockEnd: '' 99 | MaxEmptyLinesToKeep: 1 100 | NamespaceIndentation: None 101 | ObjCBinPackProtocolList: Never 102 | ObjCBlockIndentWidth: 2 103 | ObjCBreakBeforeNestedBlockParam: true 104 | ObjCSpaceAfterProperty: false 105 | ObjCSpaceBeforeProtocolList: true 106 | PenaltyBreakAssignment: 2 107 | PenaltyBreakBeforeFirstCallParameter: 1 108 | PenaltyBreakComment: 300 109 | PenaltyBreakFirstLessLess: 120 110 | PenaltyBreakString: 1000 111 | PenaltyBreakTemplateDeclaration: 10 112 | PenaltyExcessCharacter: 1000000 113 | PenaltyReturnTypeOnItsOwnLine: 200 114 | PointerAlignment: Left 115 | RawStringFormats: 116 | - Language: Cpp 117 | Delimiters: 118 | - cc 119 | - CC 120 | - cpp 121 | - Cpp 122 | - CPP 123 | - 'c++' 124 | - 'C++' 125 | CanonicalDelimiter: '' 126 | BasedOnStyle: google 127 | - Language: TextProto 128 | Delimiters: 129 | - pb 130 | - PB 131 | - proto 132 | - PROTO 133 | EnclosingFunctions: 134 | - EqualsProto 135 | - EquivToProto 136 | - PARSE_PARTIAL_TEXT_PROTO 137 | - PARSE_TEST_PROTO 138 | - PARSE_TEXT_PROTO 139 | - ParseTextOrDie 140 | - ParseTextProtoOrDie 141 | - ParseTestProto 142 | - ParsePartialTestProto 143 | CanonicalDelimiter: '' 144 | BasedOnStyle: google 145 | ReflowComments: true 146 | SortIncludes: true 147 | SortUsingDeclarations: true 148 | SpaceAfterCStyleCast: false 149 | SpaceAfterLogicalNot: false 150 | SpaceAfterTemplateKeyword: false 151 | SpaceBeforeAssignmentOperators: true 152 | SpaceBeforeCpp11BracedList: true 153 | SpaceBeforeCtorInitializerColon: true 154 | SpaceBeforeInheritanceColon: true 155 | SpaceBeforeParens: ControlStatementsExceptForEachMacros 156 | SpaceBeforeRangeBasedForLoopColon: true 157 | SpaceInEmptyBlock: false 158 | SpaceInEmptyParentheses: false 159 | SpacesBeforeTrailingComments: 2 160 | SpacesInAngles: false 161 | SpacesInConditionalStatement: false 162 | SpacesInContainerLiterals: false 163 | SpacesInCStyleCastParentheses: false 164 | SpacesInParentheses: false 165 | SpacesInSquareBrackets: false 166 | SpaceBeforeSquareBrackets: false 167 | Standard: Auto 168 | StatementMacros: 169 | - Q_UNUSED 170 | - QT_REQUIRE_VERSION 171 | TabWidth: 8 172 | UseCRLF: false 173 | UseTab: Never 174 | WhitespaceSensitiveMacros: 175 | - STRINGIZE 176 | - PP_STRINGIZE 177 | - BOOST_PP_STRINGIZE 178 | ... 179 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | # Enable ALL the things! Except not really 3 | # misc-non-private-member-variables-in-classes: the options don't do anything 4 | # modernize-use-nodiscard: too aggressive, attribute is situationally useful 5 | Checks: "*,\ 6 | -google-readability-todo,\ 7 | -altera-*,\ 8 | -fuchsia-*,\ 9 | fuchsia-multiple-inheritance,\ 10 | -llvm-header-guard,\ 11 | -llvm-include-order,\ 12 | -llvmlibc-*,\ 13 | -modernize-use-nodiscard,\ 14 | -misc-non-private-member-variables-in-classes" 15 | WarningsAsErrors: '' 16 | CheckOptions: 17 | - key: 'bugprone-argument-comment.StrictMode' 18 | value: 'true' 19 | # Prefer using enum classes with 2 values for parameters instead of bools 20 | - key: 'bugprone-argument-comment.CommentBoolLiterals' 21 | value: 'true' 22 | - key: 'bugprone-misplaced-widening-cast.CheckImplicitCasts' 23 | value: 'true' 24 | - key: 'bugprone-sizeof-expression.WarnOnSizeOfIntegerExpression' 25 | value: 'true' 26 | - key: 'bugprone-suspicious-string-compare.WarnOnLogicalNotComparison' 27 | value: 'true' 28 | - key: 'readability-simplify-boolean-expr.ChainedConditionalReturn' 29 | value: 'true' 30 | - key: 'readability-simplify-boolean-expr.ChainedConditionalAssignment' 31 | value: 'true' 32 | - key: 'readability-uniqueptr-delete-release.PreferResetCall' 33 | value: 'true' 34 | - key: 'cppcoreguidelines-init-variables.MathHeader' 35 | value: '' 36 | - key: 'cppcoreguidelines-narrowing-conversions.PedanticMode' 37 | value: 'true' 38 | - key: 'readability-else-after-return.WarnOnUnfixable' 39 | value: 'true' 40 | - key: 'readability-else-after-return.WarnOnConditionVariables' 41 | value: 'true' 42 | - key: 'readability-inconsistent-declaration-parameter-name.Strict' 43 | value: 'true' 44 | - key: 'readability-qualified-auto.AddConstToQualified' 45 | value: 'true' 46 | - key: 'readability-redundant-access-specifiers.CheckFirstDeclaration' 47 | value: 'true' 48 | # These seem to be the most common identifier styles 49 | - key: 'readability-identifier-naming.AbstractClassCase' 50 | value: 'CamelCase' 51 | - key: 'readability-identifier-naming.ClassCase' 52 | value: 'CamelCase' 53 | - key: 'readability-identifier-naming.ClassConstantCase' 54 | value: 'camelBack' 55 | - key: 'readability-identifier-naming.ClassMemberCase' 56 | value: 'camelBack' 57 | - key: 'readability-identifier-naming.ClassMemberPrefix' 58 | value: 'm_' 59 | - key: 'readability-identifier-naming.ClassMethodCase' 60 | value: 'camelBack' 61 | - key: 'readability-identifier-naming.ConstantCase' 62 | value: 'camelBack' 63 | - key: 'readability-identifier-naming.ConstantMemberCase' 64 | value: 'camelBack' 65 | - key: 'readability-identifier-naming.ConstantMemberPrefix' 66 | value: 'm_' 67 | - key: 'readability-identifier-naming.ConstantParameterCase' 68 | value: 'camelBack' 69 | - key: 'readability-identifier-naming.ConstantPointerParameterCase' 70 | value: 'camelBack' 71 | - key: 'readability-identifier-naming.ConstexprFunctionCase' 72 | value: 'camelBack' 73 | - key: 'readability-identifier-naming.ConstexprMethodCase' 74 | value: 'camelBack' 75 | - key: 'readability-identifier-naming.ConstexprVariableCase' 76 | value: 'CamelCase' 77 | - key: 'readability-identifier-naming.EnumCase' 78 | value: 'CamelCase' 79 | - key: 'readability-identifier-naming.EnumConstantCase' 80 | value: 'UPPER_CASE' 81 | - key: 'readability-identifier-naming.FunctionCase' 82 | value: 'camelBack' 83 | - key: 'readability-identifier-naming.GlobalConstantCase' 84 | value: 'CamelCase' 85 | - key: 'readability-identifier-naming.GlobalConstantPointerCase' 86 | value: 'CamelCase' 87 | - key: 'readability-identifier-naming.GlobalFunctionCase' 88 | value: 'camelBack' 89 | - key: 'readability-identifier-naming.GlobalPointerCase' 90 | value: 'camelBack' 91 | - key: 'readability-identifier-naming.GlobalVariableCase' 92 | value: 'camelBack' 93 | - key: 'readability-identifier-naming.InlineNamespaceCase' 94 | value: 'camelBack' 95 | - key: 'readability-identifier-naming.LocalConstantCase' 96 | value: 'camelBack' 97 | - key: 'readability-identifier-naming.LocalConstantPointerCase' 98 | value: 'camelBack' 99 | - key: 'readability-identifier-naming.LocalPointerCase' 100 | value: 'camelBack' 101 | - key: 'readability-identifier-naming.LocalVariableCase' 102 | value: 'camelBack' 103 | - key: 'readability-identifier-naming.MacroDefinitionCase' 104 | value: 'UPPER_CASE' 105 | - key: 'readability-identifier-naming.MemberCase' 106 | value: 'camelBack' 107 | - key: 'readability-identifier-naming.MethodCase' 108 | value: 'camelBack' 109 | - key: 'readability-identifier-naming.NamespaceCase' 110 | value: 'camelBack' 111 | - key: 'readability-identifier-naming.ParameterCase' 112 | value: 'camelBack' 113 | - key: 'readability-identifier-naming.ParameterPackCase' 114 | value: 'camelBack' 115 | - key: 'readability-identifier-naming.PointerParameterCase' 116 | value: 'camelBack' 117 | - key: 'readability-identifier-naming.PrivateMemberCase' 118 | value: 'camelBack' 119 | - key: 'readability-identifier-naming.PrivateMemberPrefix' 120 | value: 'm_' 121 | - key: 'readability-identifier-naming.PrivateMethodCase' 122 | value: 'camelBack' 123 | - key: 'readability-identifier-naming.ProtectedMemberCase' 124 | value: 'camelBack' 125 | - key: 'readability-identifier-naming.ProtectedMemberPrefix' 126 | value: 'm_' 127 | - key: 'readability-identifier-naming.ProtectedMethodCase' 128 | value: 'camelBack' 129 | - key: 'readability-identifier-naming.PublicMemberCase' 130 | value: 'camelBack' 131 | - key: 'readability-identifier-naming.PublicMethodCase' 132 | value: 'camelBack' 133 | - key: 'readability-identifier-naming.ScopedEnumConstantCase' 134 | value: 'CamelCase' 135 | - key: 'readability-identifier-naming.StaticConstantCase' 136 | value: 'camelBack' 137 | - key: 'readability-identifier-naming.StaticVariableCase' 138 | value: 'camelBack' 139 | - key: 'readability-identifier-naming.StructCase' 140 | value: 'CamelCase' 141 | - key: 'readability-identifier-naming.TemplateParameterCase' 142 | value: 'CamelCase' 143 | - key: 'readability-identifier-naming.TemplateTemplateParameterCase' 144 | value: 'CamelCase' 145 | - key: 'readability-identifier-naming.TypeAliasCase' 146 | value: 'CamelCase' 147 | - key: 'readability-identifier-naming.TypedefCase' 148 | value: 'CamelCase' 149 | - key: 'readability-identifier-naming.TypeTemplateParameterCase' 150 | value: 'CamelCase' 151 | - key: 'readability-identifier-naming.UnionCase' 152 | value: 'camelBack' 153 | - key: 'readability-identifier-naming.ValueTemplateParameterCase' 154 | value: 'CamelCase' 155 | - key: 'readability-identifier-naming.VariableCase' 156 | value: 'camelBack' 157 | - key: 'readability-identifier-naming.VirtualMethodCase' 158 | value: 'camelBack' 159 | ... 160 | -------------------------------------------------------------------------------- /.cmake-format.yaml: -------------------------------------------------------------------------------- 1 | _help_parse: Options affecting listfile parsing 2 | parse: 3 | _help_additional_commands: 4 | - Specify structure for custom cmake functions 5 | additional_commands: 6 | foo: 7 | flags: 8 | - BAR 9 | - BAZ 10 | kwargs: 11 | HEADERS: '*' 12 | SOURCES: '*' 13 | DEPENDS: '*' 14 | _help_override_spec: 15 | - Override configurations per-command where available 16 | override_spec: {} 17 | _help_vartags: 18 | - Specify variable tags. 19 | vartags: [] 20 | _help_proptags: 21 | - Specify property tags. 22 | proptags: [] 23 | _help_format: Options affecting formatting. 24 | format: 25 | _help_disable: 26 | - Disable formatting entirely, making cmake-format a no-op 27 | disable: false 28 | _help_line_width: 29 | - How wide to allow formatted cmake files 30 | line_width: 80 31 | _help_tab_size: 32 | - How many spaces to tab for indent 33 | tab_size: 4 34 | _help_use_tabchars: 35 | - If true, lines are indented using tab characters (utf-8 36 | - 0x09) instead of space characters (utf-8 0x20). 37 | - In cases where the layout would require a fractional tab 38 | - character, the behavior of the fractional indentation is 39 | - governed by 40 | use_tabchars: false 41 | _help_fractional_tab_policy: 42 | - If is True, then the value of this variable 43 | - indicates how fractional indentions are handled during 44 | - whitespace replacement. If set to 'use-space', fractional 45 | - indentation is left as spaces (utf-8 0x20). If set to 46 | - '`round-up` fractional indentation is replaced with a single' 47 | - tab character (utf-8 0x09) effectively shifting the column 48 | - to the next tabstop 49 | fractional_tab_policy: use-space 50 | _help_max_subgroups_hwrap: 51 | - If an argument group contains more than this many sub-groups 52 | - (parg or kwarg groups) then force it to a vertical layout. 53 | max_subgroups_hwrap: 2 54 | _help_max_pargs_hwrap: 55 | - If a positional argument group contains more than this many 56 | - arguments, then force it to a vertical layout. 57 | max_pargs_hwrap: 6 58 | _help_max_rows_cmdline: 59 | - If a cmdline positional group consumes more than this many 60 | - lines without nesting, then invalidate the layout (and nest) 61 | max_rows_cmdline: 2 62 | _help_separate_ctrl_name_with_space: 63 | - If true, separate flow control names from their parentheses 64 | - with a space 65 | separate_ctrl_name_with_space: false 66 | _help_separate_fn_name_with_space: 67 | - If true, separate function names from parentheses with a 68 | - space 69 | separate_fn_name_with_space: false 70 | _help_dangle_parens: 71 | - If a statement is wrapped to more than one line, than dangle 72 | - the closing parenthesis on its own line. 73 | dangle_parens: true 74 | _help_dangle_align: 75 | - If the trailing parenthesis must be 'dangled' on its on 76 | - 'line, then align it to this reference: `prefix`: the start' 77 | - 'of the statement, `prefix-indent`: the start of the' 78 | - 'statement, plus one indentation level, `child`: align to' 79 | - the column of the arguments 80 | dangle_align: prefix 81 | _help_min_prefix_chars: 82 | - If the statement spelling length (including space and 83 | - parenthesis) is smaller than this amount, then force reject 84 | - nested layouts. 85 | min_prefix_chars: 4 86 | _help_max_prefix_chars: 87 | - If the statement spelling length (including space and 88 | - parenthesis) is larger than the tab width by more than this 89 | - amount, then force reject un-nested layouts. 90 | max_prefix_chars: 10 91 | _help_max_lines_hwrap: 92 | - If a candidate layout is wrapped horizontally but it exceeds 93 | - this many lines, then reject the layout. 94 | max_lines_hwrap: 2 95 | _help_line_ending: 96 | - What style line endings to use in the output. 97 | line_ending: unix 98 | _help_command_case: 99 | - Format command names consistently as 'lower' or 'upper' case 100 | command_case: canonical 101 | _help_keyword_case: 102 | - Format keywords consistently as 'lower' or 'upper' case 103 | keyword_case: unchanged 104 | _help_always_wrap: 105 | - A list of command names which should always be wrapped 106 | always_wrap: [] 107 | _help_enable_sort: 108 | - If true, the argument lists which are known to be sortable 109 | - will be sorted lexicographicall 110 | enable_sort: true 111 | _help_autosort: 112 | - If true, the parsers may infer whether or not an argument 113 | - list is sortable (without annotation). 114 | autosort: false 115 | _help_require_valid_layout: 116 | - By default, if cmake-format cannot successfully fit 117 | - everything into the desired linewidth it will apply the 118 | - last, most aggressive attempt that it made. If this flag is 119 | - True, however, cmake-format will print error, exit with non- 120 | - zero status code, and write-out nothing 121 | require_valid_layout: false 122 | _help_layout_passes: 123 | - A dictionary mapping layout nodes to a list of wrap 124 | - decisions. See the documentation for more information. 125 | layout_passes: {} 126 | _help_markup: Options affecting comment reflow and formatting. 127 | markup: 128 | _help_bullet_char: 129 | - What character to use for bulleted lists 130 | bullet_char: '*' 131 | _help_enum_char: 132 | - What character to use as punctuation after numerals in an 133 | - enumerated list 134 | enum_char: . 135 | _help_first_comment_is_literal: 136 | - If comment markup is enabled, don't reflow the first comment 137 | - block in each listfile. Use this to preserve formatting of 138 | - your copyright/license statements. 139 | first_comment_is_literal: false 140 | _help_literal_comment_pattern: 141 | - If comment markup is enabled, don't reflow any comment block 142 | - which matches this (regex) pattern. Default is `None` 143 | - (disabled). 144 | literal_comment_pattern: null 145 | _help_fence_pattern: 146 | - Regular expression to match preformat fences in comments 147 | - default= ``r'^\s*([`~]{3}[`~]*)(.*)$'`` 148 | fence_pattern: ^\s*([`~]{3}[`~]*)(.*)$ 149 | _help_ruler_pattern: 150 | - Regular expression to match rulers in comments default= 151 | - '``r''^\s*[^\w\s]{3}.*[^\w\s]{3}$''``' 152 | ruler_pattern: ^\s*[^\w\s]{3}.*[^\w\s]{3}$ 153 | _help_explicit_trailing_pattern: 154 | - If a comment line matches starts with this pattern then it 155 | - is explicitly a trailing comment for the preceding 156 | - argument. Default is '#<' 157 | explicit_trailing_pattern: '#<' 158 | _help_hashruler_min_length: 159 | - If a comment line starts with at least this many consecutive 160 | - hash characters, then don't lstrip() them off. This allows 161 | - for lazy hash rulers where the first hash char is not 162 | - separated by space 163 | hashruler_min_length: 10 164 | _help_canonicalize_hashrulers: 165 | - If true, then insert a space between the first hash char and 166 | - remaining hash chars in a hash ruler, and normalize its 167 | - length to fill the column 168 | canonicalize_hashrulers: true 169 | _help_enable_markup: 170 | - enable comment markup parsing and reflow 171 | enable_markup: true 172 | _help_lint: Options affecting the linter 173 | lint: 174 | _help_disabled_codes: 175 | - a list of lint codes to disable 176 | disabled_codes: [] 177 | _help_function_pattern: 178 | - regular expression pattern describing valid function names 179 | function_pattern: '[0-9a-z_]+' 180 | _help_macro_pattern: 181 | - regular expression pattern describing valid macro names 182 | macro_pattern: '[0-9A-Z_]+' 183 | _help_global_var_pattern: 184 | - regular expression pattern describing valid names for 185 | - variables with global (cache) scope 186 | global_var_pattern: '[A-Z][0-9A-Z_]+' 187 | _help_internal_var_pattern: 188 | - regular expression pattern describing valid names for 189 | - variables with global scope (but internal semantic) 190 | internal_var_pattern: _[A-Z][0-9A-Z_]+ 191 | _help_local_var_pattern: 192 | - regular expression pattern describing valid names for 193 | - variables with local scope 194 | local_var_pattern: '[a-z][a-z0-9_]+' 195 | _help_private_var_pattern: 196 | - regular expression pattern describing valid names for 197 | - privatedirectory variables 198 | private_var_pattern: _[0-9a-z_]+ 199 | _help_public_var_pattern: 200 | - regular expression pattern describing valid names for public 201 | - directory variables 202 | public_var_pattern: '[A-Z][0-9A-Z_]+' 203 | _help_argument_var_pattern: 204 | - regular expression pattern describing valid names for 205 | - function/macro arguments and loop variables. 206 | argument_var_pattern: '[a-z][a-z0-9_]+' 207 | _help_keyword_pattern: 208 | - regular expression pattern describing valid names for 209 | - keywords used in functions or macros 210 | keyword_pattern: '[A-Z][0-9A-Z_]+' 211 | _help_max_conditionals_custom_parser: 212 | - In the heuristic for C0201, how many conditionals to match 213 | - within a loop in before considering the loop a parser. 214 | max_conditionals_custom_parser: 2 215 | _help_min_statement_spacing: 216 | - Require at least this many newlines between statements 217 | min_statement_spacing: 1 218 | _help_max_statement_spacing: 219 | - Require no more than this many newlines between statements 220 | max_statement_spacing: 2 221 | max_returns: 6 222 | max_branches: 12 223 | max_arguments: 5 224 | max_localvars: 15 225 | max_statements: 50 226 | _help_encode: Options affecting file encoding 227 | encode: 228 | _help_emit_byteorder_mark: 229 | - If true, emit the unicode byte-order mark (BOM) at the start 230 | - of the file 231 | emit_byteorder_mark: false 232 | _help_input_encoding: 233 | - Specify the encoding of the input file. Defaults to utf-8 234 | input_encoding: utf-8 235 | _help_output_encoding: 236 | - Specify the encoding of the output file. Defaults to utf-8. 237 | - Note that cmake only claims to support utf-8 so be careful 238 | - when using anything else 239 | output_encoding: utf-8 240 | _help_misc: Miscellaneous configurations options. 241 | misc: 242 | _help_per_command: 243 | - A dictionary containing any per-command configuration 244 | - overrides. Currently only `command_case` is supported. 245 | per_command: {} 246 | -------------------------------------------------------------------------------- /.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | builtin = clear,rare,names,informal,code 3 | check-filenames = 4 | check-hidden = 5 | skip = */.git,*/build,*/prefix,*/conan 6 | quiet-level = 2 7 | ignore-words-list=lite,crate 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/.gitattributes -------------------------------------------------------------------------------- /.github/scripts/conan-ci-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PS4='\033[1;34m>>>\033[0m ' 4 | 5 | set -xeu 6 | 7 | pip3 install conan 8 | 9 | conan profile detect -f 10 | 11 | std=17 12 | profile="$(conan profile path default)" 13 | 14 | mv "$profile" "${profile}.bak" 15 | sed 's/^\(compiler\.cppstd=\).\{1,\}$/\1'"$std/" "${profile}.bak" >"$profile" 16 | rm "${profile}.bak" 17 | 18 | if [ -f conan_cache_save.tgz ]; then 19 | conan cache restore conan_cache_save.tgz 20 | fi 21 | conan remove \* --lru=1M -c 22 | conan install . -b missing 23 | conan cache save '*/*:*' --file=conan_cache_save.tgz 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | lint: 14 | runs-on: ubuntu-22.04 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - uses: actions/setup-python@v5 20 | with: { python-version: "3.12" } 21 | 22 | - name: Install codespell 23 | run: pip3 install codespell 24 | 25 | - name: Lint 26 | run: cmake -D FORMAT_COMMAND=clang-format-15 -P cmake/lint.cmake 27 | 28 | - name: Spell check 29 | if: always() 30 | run: cmake -P cmake/spell.cmake 31 | 32 | coverage: 33 | needs: [lint] 34 | 35 | runs-on: ubuntu-22.04 36 | 37 | if: github.repository_owner == 'neuralize-ai' 38 | 39 | steps: 40 | - uses: actions/checkout@v4 41 | 42 | - name: Install LCov 43 | run: sudo apt-get update -q 44 | && sudo apt-get install lcov -q -y 45 | 46 | - name: Install Python 47 | uses: actions/setup-python@v5 48 | with: { python-version: "3.12" } 49 | 50 | - name: Conan cache 51 | uses: actions/cache@v4 52 | with: 53 | path: conan_cache_save.tgz 54 | key: conan-coverage-${{ hashFiles('conan*.[pl][yo]*') }} 55 | 56 | - name: Install dependencies 57 | run: bash < .github/scripts/conan-ci-setup.sh 58 | 59 | - name: Configure 60 | run: cmake --preset=ci-coverage 61 | 62 | - name: Build 63 | run: cmake --build build/coverage -j 2 64 | 65 | - name: Test 66 | working-directory: build/coverage 67 | run: ctest --output-on-failure --no-tests=error -j 2 68 | 69 | - name: Process coverage info 70 | run: cmake --build build/coverage -t coverage 71 | 72 | - name: Submit to codecov.io 73 | uses: codecov/codecov-action@v4 74 | with: 75 | file: build/coverage/coverage.info 76 | token: ${{ secrets.CODECOV_TOKEN }} 77 | 78 | sanitize: 79 | needs: [lint] 80 | 81 | runs-on: ubuntu-22.04 82 | 83 | env: { CXX: clang++-14 } 84 | 85 | steps: 86 | - uses: actions/checkout@v4 87 | 88 | - name: Install Python 89 | uses: actions/setup-python@v5 90 | with: { python-version: "3.12" } 91 | 92 | - name: Conan cache 93 | uses: actions/cache@v4 94 | with: 95 | path: conan_cache_save.tgz 96 | key: conan-sanitize-${{ hashFiles('conan*.[pl][yo]*') }} 97 | 98 | - name: Install dependencies 99 | run: bash < .github/scripts/conan-ci-setup.sh 100 | 101 | - name: Configure 102 | run: cmake --preset=ci-sanitize 103 | 104 | - name: Build 105 | run: cmake --build build/sanitize -j 2 106 | 107 | - name: Test 108 | working-directory: build/sanitize 109 | env: 110 | ASAN_OPTIONS: "strict_string_checks=1:\ 111 | detect_stack_use_after_return=1:\ 112 | check_initialization_order=1:\ 113 | strict_init_order=1:\ 114 | detect_leaks=1:\ 115 | halt_on_error=1" 116 | UBSAN_OPTIONS: "print_stacktrace=1:\ 117 | halt_on_error=1" 118 | run: ctest --output-on-failure --no-tests=error -j 2 119 | 120 | test: 121 | needs: [lint] 122 | 123 | strategy: 124 | matrix: 125 | os: [ubuntu-22.04, macos-14] # , windows-2022] 126 | 127 | type: [shared, static] 128 | 129 | include: 130 | - { type: shared, shared: YES } 131 | - { type: static, shared: NO } 132 | 133 | runs-on: ${{ matrix.os }} 134 | 135 | steps: 136 | - uses: actions/checkout@v4 137 | 138 | - name: Install static analyzers 139 | if: matrix.os == 'ubuntu-22.04' 140 | run: >- 141 | sudo apt-get install clang-tidy-14 cppcheck -y -q 142 | 143 | sudo update-alternatives --install 144 | /usr/bin/clang-tidy clang-tidy 145 | /usr/bin/clang-tidy-14 140 146 | 147 | - name: Install Python 148 | uses: actions/setup-python@v5 149 | with: { python-version: "3.12" } 150 | 151 | - name: Conan cache 152 | uses: actions/cache@v4 153 | with: 154 | path: conan_cache_save.tgz 155 | key: conan-${{ matrix.os }}-${{ hashFiles('conan*.[pl][yo]*') }} 156 | 157 | - name: Install dependencies 158 | shell: bash 159 | run: bash < .github/scripts/conan-ci-setup.sh 160 | 161 | - name: Setup MultiToolTask 162 | if: matrix.os == 'windows-2022' 163 | run: | 164 | Add-Content "$env:GITHUB_ENV" 'UseMultiToolTask=true' 165 | Add-Content "$env:GITHUB_ENV" 'EnforceProcessCountAcrossBuilds=true' 166 | 167 | - name: Configure 168 | shell: pwsh 169 | run: cmake "--preset=ci-$("${{ matrix.os }}".split("-")[0])" 170 | -D BUILD_SHARED_LIBS=${{ matrix.shared }} 171 | 172 | - name: Setup PATH 173 | if: matrix.os == 'windows-2022' && matrix.type == 'shared' 174 | run: Add-Content "$env:GITHUB_PATH" "$(Get-Location)\build\Release" 175 | 176 | - name: Build 177 | run: cmake --build build --config Release -j 2 178 | 179 | - name: Install 180 | run: cmake --install build --config Release --prefix prefix 181 | 182 | - name: Test 183 | working-directory: build 184 | run: ctest --output-on-failure --no-tests=error -C Release -j 2 185 | 186 | docs: 187 | # Deploy docs only when builds succeed 188 | needs: [sanitize, test] 189 | 190 | runs-on: ubuntu-22.04 191 | 192 | if: github.ref == 'refs/heads/main' 193 | && github.event_name == 'push' 194 | && github.repository_owner == 'neuralize-ai' 195 | 196 | permissions: 197 | contents: write 198 | 199 | steps: 200 | - uses: actions/checkout@v4 201 | 202 | - uses: actions/setup-python@v5 203 | with: { python-version: "3.12" } 204 | 205 | - name: Install m.css dependencies 206 | run: pip3 install jinja2 Pygments 207 | 208 | - name: Install Doxygen 209 | run: sudo apt-get update -q 210 | && sudo apt-get install doxygen -q -y 211 | 212 | - name: Build docs 213 | run: cmake "-DPROJECT_SOURCE_DIR=$PWD" "-DPROJECT_BINARY_DIR=$PWD/build" 214 | -P cmake/docs-ci.cmake 215 | 216 | - name: Deploy docs 217 | uses: peaceiris/actions-gh-pages@v4 218 | with: 219 | github_token: ${{ secrets.GITHUB_TOKEN }} 220 | publish_dir: build/docs/html 221 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | .idea/ 3 | .vs/ 4 | .vscode/ 5 | .cache/ 6 | .clangd/ 7 | build/ 8 | cmake-build-*/ 9 | conan/ 10 | prefix/ 11 | CMakeLists.txt.user 12 | CMakeUserPresets.json 13 | compile_commands.json 14 | conan_cache_*.tgz 15 | -------------------------------------------------------------------------------- /BUILDING.md: -------------------------------------------------------------------------------- 1 | # Building with CMake 2 | 3 | ## Dependencies 4 | 5 | For a list of dependencies, please refer to [conanfile.py](conanfile.py). 6 | 7 | ## Build 8 | 9 | This project doesn't require any special command-line flags to build to keep 10 | things simple. 11 | 12 | Here are the steps for building in release mode with a single-configuration 13 | generator, like the Unix Makefiles one: 14 | 15 | ```sh 16 | cmake -S . -B build -D CMAKE_BUILD_TYPE=Release 17 | cmake --build build 18 | ``` 19 | 20 | Here are the steps for building in release mode with a multi-configuration 21 | generator, like the Visual Studio ones: 22 | 23 | ```sh 24 | cmake -S . -B build 25 | cmake --build build --config Release 26 | ``` 27 | 28 | ### Building with MSVC 29 | 30 | Note that MSVC by default is not standards compliant and you need to pass some 31 | flags to make it behave properly. See the `flags-msvc` preset in the 32 | [CMakePresets.json](CMakePresets.json) file for the flags and with what 33 | variable to provide them to CMake during configuration. 34 | 35 | ### Building on Apple Silicon 36 | 37 | CMake supports building on Apple Silicon properly since 3.20.1. Make sure you 38 | have the [latest version][1] installed. 39 | 40 | ## Install 41 | 42 | This project doesn't require any special command-line flags to install to keep 43 | things simple. As a prerequisite, the project has to be built with the above 44 | commands already. 45 | 46 | The below commands require at least CMake 3.15 to run, because that is the 47 | version in which [Install a Project][2] was added. 48 | 49 | Here is the command for installing the release mode artifacts with a 50 | single-configuration generator, like the Unix Makefiles one: 51 | 52 | ```sh 53 | cmake --install build 54 | ``` 55 | 56 | Here is the command for installing the release mode artifacts with a 57 | multi-configuration generator, like the Visual Studio ones: 58 | 59 | ```sh 60 | cmake --install build --config Release 61 | ``` 62 | 63 | ### CMake package 64 | 65 | This project exports a CMake package to be used with the [`find_package`][3] 66 | command of CMake: 67 | 68 | * Package name: `edgerunner` 69 | * Target name: `edgerunner::edgerunner` 70 | 71 | Example usage: 72 | 73 | ```cmake 74 | find_package(edgerunner REQUIRED) 75 | # Declare the imported target as a build requirement using PRIVATE, where 76 | # project_target is a target created in the consuming project 77 | target_link_libraries( 78 | project_target PRIVATE 79 | edgerunner::edgerunner 80 | ) 81 | ``` 82 | 83 | ### Note to packagers 84 | 85 | The `CMAKE_INSTALL_INCLUDEDIR` is set to a path other than just `include` if 86 | the project is configured as a top level project to avoid indirectly including 87 | other libraries when installed to a common prefix. Please review the 88 | [install-rules.cmake](cmake/install-rules.cmake) file for the full set of 89 | install rules. 90 | 91 | [1]: https://cmake.org/download/ 92 | [2]: https://cmake.org/cmake/help/latest/manual/cmake.1.html#install-a-project 93 | [3]: https://cmake.org/cmake/help/latest/command/find_package.html 94 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | include(cmake/prelude.cmake) 4 | 5 | file(READ "version.txt" edgerunner_VERSION) 6 | string(STRIP ${edgerunner_VERSION} edgerunner_VERSION) 7 | 8 | project( 9 | edgerunner 10 | VERSION "${edgerunner_VERSION}" 11 | DESCRIPTION 12 | "Edgerunner is a cross-platform ML inference library for mobile devices" 13 | HOMEPAGE_URL "https://runlocal.ai" 14 | LANGUAGES CXX 15 | ) 16 | 17 | include(cmake/project-is-top-level.cmake) 18 | include(cmake/variables.cmake) 19 | 20 | # ---- Declare library ---- 21 | 22 | add_library(edgerunner_edgerunner source/edgerunner.cpp) 23 | add_library(edgerunner::edgerunner ALIAS edgerunner_edgerunner) 24 | 25 | include(GenerateExportHeader) 26 | generate_export_header( 27 | edgerunner_edgerunner 28 | BASE_NAME 29 | edgerunner 30 | EXPORT_FILE_NAME 31 | export/edgerunner/edgerunner_export.hpp 32 | CUSTOM_CONTENT_FROM_VARIABLE 33 | pragma_suppress_c4251 34 | ) 35 | 36 | if(NOT BUILD_SHARED_LIBS) 37 | target_compile_definitions( 38 | edgerunner_edgerunner PUBLIC EDGERUNNER_STATIC_DEFINE 39 | ) 40 | endif() 41 | 42 | set_target_properties( 43 | edgerunner_edgerunner 44 | PROPERTIES CXX_VISIBILITY_PRESET hidden 45 | VISIBILITY_INLINES_HIDDEN YES 46 | VERSION "${PROJECT_VERSION}" 47 | SOVERSION "${PROJECT_VERSION_MAJOR}" 48 | EXPORT_NAME edgerunner 49 | OUTPUT_NAME edgerunner 50 | ) 51 | 52 | target_include_directories( 53 | edgerunner_edgerunner ${warning_guard} 54 | PUBLIC "\$" 55 | ) 56 | 57 | target_include_directories( 58 | edgerunner_edgerunner SYSTEM 59 | PUBLIC "\$" 60 | ) 61 | 62 | target_compile_features(edgerunner_edgerunner PUBLIC cxx_std_17) 63 | 64 | find_package(fmt REQUIRED) 65 | target_link_libraries(edgerunner_edgerunner PRIVATE fmt::fmt) 66 | 67 | find_package(span-lite REQUIRED) 68 | target_link_libraries(edgerunner_edgerunner PUBLIC nonstd::span-lite) 69 | 70 | if(edgerunner_ENABLE_TFLITE) 71 | find_package(tensorflowlite REQUIRED) 72 | target_link_libraries( 73 | edgerunner_edgerunner PRIVATE tensorflow::tensorflowlite 74 | ) 75 | 76 | target_sources( 77 | edgerunner_edgerunner PRIVATE source/tflite/model.cpp 78 | source/tflite/tensor.cpp 79 | ) 80 | target_compile_definitions(edgerunner_edgerunner PUBLIC EDGERUNNER_TFLITE) 81 | endif() 82 | 83 | if(edgerunner_ENABLE_GPU) 84 | target_compile_definitions(edgerunner_edgerunner PUBLIC EDGERUNNER_GPU) 85 | endif() 86 | 87 | if(edgerunner_ENABLE_NPU) 88 | target_sources( 89 | edgerunner_edgerunner 90 | PRIVATE source/qnn/model.cpp source/qnn/tensor.cpp 91 | source/qnn/backend.cpp source/qnn/graph.cpp 92 | source/qnn/tensorOps.cpp 93 | ) 94 | 95 | find_package(qnn REQUIRED) 96 | target_link_libraries(edgerunner_edgerunner PRIVATE qnn::headers) 97 | 98 | if(ANDROID) 99 | target_compile_definitions(edgerunner_edgerunner PUBLIC EDGERUNNER_QNN) 100 | if(edgerunner_ENABLE_TFLITE) 101 | target_link_libraries(edgerunner_edgerunner PRIVATE qnn::tflite) 102 | endif() 103 | endif() 104 | endif() 105 | 106 | # ---- Install rules ---- 107 | 108 | if(NOT CMAKE_SKIP_INSTALL_RULES) 109 | include(cmake/install-rules.cmake) 110 | endif() 111 | 112 | # ---- Examples ---- 113 | 114 | if(PROJECT_IS_TOP_LEVEL) 115 | option(BUILD_EXAMPLES "Build examples tree." OFF) 116 | if(BUILD_EXAMPLES) 117 | add_subdirectory(example) 118 | endif() 119 | endif() 120 | 121 | # ---- Developer mode ---- 122 | 123 | if(NOT edgerunner_DEVELOPER_MODE) 124 | return() 125 | elseif(NOT PROJECT_IS_TOP_LEVEL) 126 | message( 127 | AUTHOR_WARNING "Developer mode is intended for developers of edgerunner" 128 | ) 129 | endif() 130 | 131 | include(cmake/dev-mode.cmake) 132 | -------------------------------------------------------------------------------- /CMakePresets.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 2, 3 | "cmakeMinimumRequired": { 4 | "major": 3, 5 | "minor": 14, 6 | "patch": 0 7 | }, 8 | "configurePresets": [ 9 | { 10 | "name": "cmake-pedantic", 11 | "hidden": true, 12 | "warnings": { 13 | "dev": true, 14 | "deprecated": true, 15 | "uninitialized": true, 16 | "unusedCli": true, 17 | "systemVars": false 18 | }, 19 | "errors": { 20 | "dev": true, 21 | "deprecated": true 22 | } 23 | }, 24 | { 25 | "name": "dev-mode", 26 | "hidden": true, 27 | "inherits": "cmake-pedantic", 28 | "cacheVariables": { 29 | "edgerunner_DEVELOPER_MODE": "ON" 30 | } 31 | }, 32 | { 33 | "name": "conan", 34 | "hidden": true, 35 | "cacheVariables": { 36 | "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/conan/conan_toolchain.cmake", 37 | "CMAKE_POLICY_DEFAULT_CMP0091": "NEW" 38 | } 39 | }, 40 | { 41 | "name": "cppcheck", 42 | "hidden": true, 43 | "cacheVariables": { 44 | "CMAKE_CXX_CPPCHECK": "cppcheck;--inline-suppr" 45 | } 46 | }, 47 | { 48 | "name": "clang-tidy", 49 | "hidden": true, 50 | "cacheVariables": { 51 | "CMAKE_CXX_CLANG_TIDY": "clang-tidy;--header-filter=^${sourceDir}/" 52 | } 53 | }, 54 | { 55 | "name": "ci-std", 56 | "description": "This preset makes sure the project actually builds with at least the specified standard", 57 | "hidden": true, 58 | "cacheVariables": { 59 | "CMAKE_CXX_EXTENSIONS": "OFF", 60 | "CMAKE_CXX_STANDARD": "17", 61 | "CMAKE_CXX_STANDARD_REQUIRED": "ON" 62 | } 63 | }, 64 | { 65 | "name": "flags-gcc-clang", 66 | "description": "These flags are supported by both GCC and Clang", 67 | "hidden": true, 68 | "cacheVariables": { 69 | "CMAKE_CXX_FLAGS": "-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -D_GLIBCXX_ASSERTIONS=1 -fstack-protector-strong -fcf-protection=full -fstack-clash-protection -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -Wcast-qual -Wformat=2 -Wundef -Werror=float-equal -Wshadow -Wcast-align -Wunused -Wnull-dereference -Wdouble-promotion -Wimplicit-fallthrough -Wextra-semi -Woverloaded-virtual -Wnon-virtual-dtor -Wold-style-cast", 70 | "CMAKE_EXE_LINKER_FLAGS": "-Wl,--allow-shlib-undefined,--as-needed,-z,noexecstack,-z,relro,-z,now,-z,nodlopen", 71 | "CMAKE_SHARED_LINKER_FLAGS": "-Wl,--allow-shlib-undefined,--as-needed,-z,noexecstack,-z,relro,-z,now,-z,nodlopen" 72 | } 73 | }, 74 | { 75 | "name": "flags-appleclang", 76 | "hidden": true, 77 | "cacheVariables": { 78 | "CMAKE_CXX_FLAGS": "-fstack-protector-strong -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -Wcast-qual -Wformat=2 -Wundef -Werror=float-equal -Wshadow -Wcast-align -Wunused -Wnull-dereference -Wdouble-promotion -Wimplicit-fallthrough -Wextra-semi -Woverloaded-virtual -Wnon-virtual-dtor -Wold-style-cast" 79 | } 80 | }, 81 | { 82 | "name": "flags-msvc", 83 | "description": "Note that all the flags after /W4 are required for MSVC to conform to the language standard", 84 | "hidden": true, 85 | "cacheVariables": { 86 | "CMAKE_CXX_FLAGS": "/sdl /guard:cf /utf-8 /diagnostics:caret /w14165 /w44242 /w44254 /w44263 /w34265 /w34287 /w44296 /w44365 /w44388 /w44464 /w14545 /w14546 /w14547 /w14549 /w14555 /w34619 /w34640 /w24826 /w14905 /w14906 /w14928 /w45038 /W4 /permissive- /volatile:iso /Zc:inline /Zc:preprocessor /Zc:enumTypes /Zc:lambda /Zc:__cplusplus /Zc:externConstexpr /Zc:throwingNew /EHsc", 87 | "CMAKE_EXE_LINKER_FLAGS": "/machine:x64 /guard:cf", 88 | "CMAKE_SHARED_LINKER_FLAGS": "/machine:x64 /guard:cf" 89 | } 90 | }, 91 | { 92 | "name": "ci-linux", 93 | "inherits": ["flags-gcc-clang", "ci-std"], 94 | "generator": "Unix Makefiles", 95 | "hidden": true, 96 | "cacheVariables": { 97 | "CMAKE_BUILD_TYPE": "Release" 98 | } 99 | }, 100 | { 101 | "name": "ci-darwin", 102 | "inherits": ["flags-appleclang", "ci-std"], 103 | "generator": "Xcode", 104 | "hidden": true 105 | }, 106 | { 107 | "name": "ci-win64", 108 | "inherits": ["flags-msvc", "ci-std"], 109 | "generator": "Visual Studio 17 2022", 110 | "architecture": "x64", 111 | "hidden": true 112 | }, 113 | { 114 | "name": "coverage-linux", 115 | "binaryDir": "${sourceDir}/build/coverage", 116 | "inherits": "ci-linux", 117 | "hidden": true, 118 | "cacheVariables": { 119 | "ENABLE_COVERAGE": "ON", 120 | "CMAKE_BUILD_TYPE": "Coverage", 121 | "CMAKE_CXX_FLAGS_COVERAGE": "-Og -g --coverage -fkeep-inline-functions -fkeep-static-functions", 122 | "CMAKE_EXE_LINKER_FLAGS_COVERAGE": "--coverage", 123 | "CMAKE_SHARED_LINKER_FLAGS_COVERAGE": "--coverage", 124 | "CMAKE_MAP_IMPORTED_CONFIG_COVERAGE": "Coverage;RelWithDebInfo;Release;Debug;" 125 | } 126 | }, 127 | { 128 | "name": "ci-coverage", 129 | "inherits": ["coverage-linux", "dev-mode", "conan"], 130 | "cacheVariables": { 131 | "COVERAGE_HTML_COMMAND": "" 132 | } 133 | }, 134 | { 135 | "name": "ci-sanitize", 136 | "binaryDir": "${sourceDir}/build/sanitize", 137 | "inherits": ["ci-linux", "dev-mode", "conan"], 138 | "cacheVariables": { 139 | "CMAKE_BUILD_TYPE": "Sanitize", 140 | "CMAKE_CXX_FLAGS_SANITIZE": "-U_FORTIFY_SOURCE -O2 -g -fsanitize=address,undefined -fno-omit-frame-pointer -fno-common", 141 | "CMAKE_MAP_IMPORTED_CONFIG_SANITIZE": "Sanitize;RelWithDebInfo;Release;Debug;" 142 | } 143 | }, 144 | { 145 | "name": "ci-build", 146 | "binaryDir": "${sourceDir}/build", 147 | "hidden": true 148 | }, 149 | { 150 | "name": "ci-multi-config", 151 | "description": "Speed up multi-config generators by generating only one configuration instead of the defaults", 152 | "hidden": true, 153 | "cacheVariables": { 154 | "CMAKE_CONFIGURATION_TYPES": "Release" 155 | } 156 | }, 157 | { 158 | "name": "ci-macos", 159 | "inherits": ["ci-build", "ci-darwin", "dev-mode", "ci-multi-config", "conan"] 160 | }, 161 | { 162 | "name": "ci-ubuntu", 163 | "inherits": ["ci-build", "ci-linux", "clang-tidy", "conan", "cppcheck", "dev-mode"] 164 | }, 165 | { 166 | "name": "ci-windows", 167 | "inherits": ["ci-build", "ci-win64", "dev-mode", "ci-multi-config", "conan"] 168 | } 169 | ] 170 | } 171 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | * You will be judged by your contributions first, and your sense of humor 4 | second. 5 | * Nobody owes you anything. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | 7 | 8 | ## Code of Conduct 9 | 10 | Please see the [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md) document. 11 | 12 | ## Getting started 13 | 14 | Helpful notes for developers can be found in the [`HACKING.md`](HACKING.md) 15 | document. 16 | 17 | In addition to he above, if you use the presets file as instructed, then you 18 | should NOT check it into source control, just as the CMake documentation 19 | suggests. 20 | -------------------------------------------------------------------------------- /HACKING.md: -------------------------------------------------------------------------------- 1 | # Hacking 2 | 3 | Here are some instructions to help you build and test this project as a 4 | developer and potential contributor. 5 | 6 | Edgerunner is designed to support (cross-)compiling for many architectures and 7 | operating systems, including most dependencies. In order to take as much of the 8 | burden as possible away from end users, there is some complex logic to the build 9 | process. This document is a good starting point to get up and running with CPU 10 | support. The steps must be followed carefully, but once properly configured 11 | allows for quick development iteration. 12 | 13 | The [examples document](/example/README.md) outlines steps to add GPU 14 | and NPU support, but should be attempted after successfully running the tests 15 | as described below. 16 | 17 | ## Developer mode 18 | 19 | Build system targets that are only useful for developers of this project are 20 | hidden if the `edgerunner_DEVELOPER_MODE` option is disabled. Enabling this 21 | option makes tests and other developer targets and options available. Not 22 | enabling this option means that you are a consumer of this project and thus you 23 | have no need for these targets and options. 24 | 25 | Developer mode is always set to on in CI workflows. 26 | 27 | ### Presets 28 | 29 | This project makes use of [presets][1] to simplify the process of configuring 30 | the project. As a developer, you are recommended to always have the [latest 31 | CMake version][2] installed to make use of the latest Quality-of-Life 32 | additions. 33 | 34 | You have a few options to pass `edgerunner_DEVELOPER_MODE` to the configure 35 | command, but this project prefers to use presets. 36 | 37 | As a developer, you should create a `CMakeUserPresets.json` file at the root of 38 | the project: 39 | 40 | ```json 41 | { 42 | "version": 2, 43 | "cmakeMinimumRequired": { 44 | "major": 3, 45 | "minor": 14, 46 | "patch": 0 47 | }, 48 | "configurePresets": [ 49 | { 50 | "name": "dev", 51 | "binaryDir": "${sourceDir}/build/dev", 52 | "inherits": ["dev-mode", "conan", "ci-"], 53 | "cacheVariables": { 54 | "CMAKE_BUILD_TYPE": "Debug" 55 | } 56 | } 57 | ], 58 | "buildPresets": [ 59 | { 60 | "name": "dev", 61 | "configurePreset": "dev", 62 | "configuration": "Debug" 63 | } 64 | ], 65 | "testPresets": [ 66 | { 67 | "name": "dev", 68 | "configurePreset": "dev", 69 | "configuration": "Debug", 70 | "output": { 71 | "outputOnFailure": true 72 | } 73 | } 74 | ] 75 | } 76 | ``` 77 | 78 | You should replace `` in your newly created presets file with the name of 79 | the operating system you have, which may be `win64`, `linux` or `darwin`. You 80 | can see what these correspond to in the 81 | [`CMakePresets.json`](CMakePresets.json) file. 82 | 83 | `CMakeUserPresets.json` is also the perfect place in which you can put all 84 | sorts of things that you would otherwise want to pass to the configure command 85 | in the terminal. 86 | 87 | > **Note** 88 | > Some editors are pretty greedy with how they open projects with presets. 89 | > Some just randomly pick a preset and start configuring without your consent, 90 | > which can be confusing. Make sure that your editor configures when you 91 | > actually want it to, for example in CLion you have to make sure only the 92 | > `dev-dev preset` has `Enable profile` ticked in 93 | > `File > Settings... > Build, Execution, Deployment > CMake` and in Visual 94 | > Studio you have to set the option `Never run configure step automatically` 95 | > in `Tools > Options > CMake` **prior to opening the project**, after which 96 | > you can manually configure using `Project > Configure Cache`. 97 | 98 | ### Dependency manager 99 | 100 | The above preset will make use of the [conan][conan] dependency manager. After 101 | installing it, make sure you have a [Conan profile][profile] setup, then 102 | download the dependencies and generate the necessary CMake files by running 103 | this command in the project root: 104 | 105 | ```sh 106 | conan install . -s build_type=Debug -b missing 107 | ``` 108 | 109 | Note that if your conan profile does not specify the same compiler, standard 110 | level, build type and runtime library as CMake, then that could potentially 111 | cause issues. See the link above for profiles documentation. 112 | 113 | [conan]: https://conan.io/ 114 | [profile]: https://docs.conan.io/2/reference/config_files/profiles.html 115 | 116 | #### Android 117 | 118 | An example Android profile is bundled with this repository. It can be installed 119 | to your local conan prefix using: 120 | 121 | ```sh 122 | conan config install profiles -tf profiles 123 | ``` 124 | 125 | Use it by adding `-pr android` to your `conan install` invocation. 126 | 127 | #### GPU 128 | 129 | For GPU support add `-o gpu=True` to the `conan install` invocation. 130 | > [!NOTE] 131 | > The tensorflow-lite conan package disables GPU by default and as such these 132 | steps will not work currently. I have patched the recipe locally to enable GPU 133 | support and will make this available on Conan Center or another repository 134 | soon. In the mean time, my custom recipe can be be used as outlined 135 | [here](https://github.com/neuralize-ai/tensorflow-lite-conan). If you have 136 | previously `conan install`ed, remove the existing TFLite package(s) using 137 | `conan remove "tensorflow-lite"`. Make sure to create the TFLite package 138 | version that is required in [conanfile](/conanfile.py). 139 | 140 | GPU support requires a functioning OpenCL installation. Refer to your OS 141 | documentation for the steps for setting this up correctly for your GPU vendor. 142 | 143 | #### NPU 144 | 145 | There is support for executing on Qualcomm NPUs (more hardware support is 146 | upcoming). Since this involves using Qualcomm's pre-compiled shared libraries, 147 | I have created a Conan recipe that must be used 148 | [here](https://github.com/neuralize-ai/qnn-conan). Follow the instructions on 149 | that repository and the steps above with `-o with_npu=True` supplied to the 150 | `conan install` invocation. Make sure to create the package version required 151 | in [conanfile](/conanfile.py). 152 | 153 | ### Configure, build and test 154 | 155 | If you followed the above instructions, then you can configure, build and test 156 | the project respectively with the following commands from the project root on 157 | any operating system with any build system: 158 | 159 | ```sh 160 | cmake --preset=dev 161 | cmake --build --preset=dev 162 | ctest --preset=dev 163 | ``` 164 | 165 | If you are using a compatible editor (e.g. VSCode) or IDE (e.g. CLion, VS), you 166 | will also be able to select the above created user presets for automatic 167 | integration. 168 | 169 | Please note that both the build and test commands accept a `-j` flag to specify 170 | the number of jobs to use, which should ideally be specified to the number of 171 | threads your CPU has. You may also want to add that to your preset using the 172 | `jobs` property, see the [presets documentation][1] for more details. 173 | 174 | For Android, the above `ctest` approach does not work. Instead, provided that `conan install` is invoked with an appropriate android profile and Android compatible presets are used, there will be an additional `test-android` target that can be executed with: 175 | 176 | ```sh 177 | cmake --build --preset= -t test-android 178 | ``` 179 | 180 | Ensure [adb](https://developer.android.com/tools/adb) is configured and a device 181 | with USB debugging enabled is connected. 182 | 183 | ### Developer mode targets 184 | 185 | These are targets you may invoke using the build command from above, with an 186 | additional `-t ` flag: 187 | 188 | #### `coverage` 189 | 190 | Available if `ENABLE_COVERAGE` is enabled. This target processes the output of 191 | the previously run tests when built with coverage configuration. The commands 192 | this target runs can be found in the `COVERAGE_TRACE_COMMAND` and 193 | `COVERAGE_HTML_COMMAND` cache variables. The trace command produces an info 194 | file by default, which can be submitted to services with CI integration. The 195 | HTML command uses the trace command's output to generate an HTML document to 196 | `/coverage_html` by default. 197 | 198 | #### `docs` 199 | 200 | Available if `BUILD_MCSS_DOCS` is enabled. Builds to documentation using 201 | Doxygen and m.css. The output will go to `/docs` by default 202 | (customizable using `DOXYGEN_OUTPUT_DIRECTORY`). 203 | 204 | #### `format-check` and `format-fix` 205 | 206 | These targets run the clang-format tool on the codebase to check errors and to 207 | fix them respectively. Customization available using the `FORMAT_PATTERNS` and 208 | `FORMAT_COMMAND` cache variables. 209 | 210 | #### run-examples 211 | 212 | Available if `-o examples=True` was supplied to the `conan install` invocation. 213 | This is because the examples may require additional dependencies for pre and post 214 | processing that we do not wish the bundle with the main project. 215 | 216 | Runs all the examples created by the `add_example` command. 217 | 218 | Individual examples can be executed using `run_` (without the 219 | extension) instead of `run-examples`. 220 | 221 | See [examples](./example) for more details. 222 | 223 | #### `spell-check` and `spell-fix` 224 | 225 | These targets run the codespell tool on the codebase to check errors and to fix 226 | them respectively. Customization available using the `SPELL_COMMAND` cache 227 | variable. 228 | 229 | [1]: https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html 230 | [2]: https://cmake.org/download/ 231 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Neuralize Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | 5 |

6 | Edgerunner 7 |

8 | 9 |

10 | Simplified AI runtime integration for mobile app development 11 |

12 | 13 |
14 | Website | 15 | Contact | 16 | Discord | 17 | Twitter | 18 | Docs 19 |

20 | 21 | 22 | 23 | 24 |
25 | 26 | ## 💡 Introduction 27 | 28 | The purpose of Edgerunner is to facilitate easy integration of 29 | common AI model formats and inference runtimes 30 | ([TFLite](https://ai.google.dev/edge/lite), [ONNX](https://github.com/onnx/onnx), 31 | [QNN](https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk), 32 | etc.) for 33 | consumer mobile devices (smartphones, laptops, tablets, wearables, etc.). 34 | 35 | Edgerunner removes the complexities of deploying an off-the-shelf model into 36 | your app, with NPU acceleration, regardless of the model format or target devices. 37 | Platform-specific NPU SDKs are managed for you and can be leveraged with a 38 | device-agnostic API. Edgerunner exposes a boilerplate-free API with sane defaults. 39 | 40 | Kotlin bindings to create AI applications in Android with Edgerunner can 41 | be found at 42 | [edgerunner-android](https://github.com/neuralize-ai/edgerunner-android). We are 43 | also creating specific use cases built upon Edgerunner (Llama, Stable 44 | diffusion, etc.) which will come with their own Android bindings. 45 | 46 | Please request additional features or desired use cases through Github issues or 47 | on our [Discord](https://discord.gg/y9EzZEkwbR). 48 | 49 | ## 🔌 Support 50 | 51 | ### OS 52 | 53 | | Android | iOS | Linux | MacOS | Windows | 54 | |:--------:|:-----:|:-----:|:-------:|:-------:| 55 | | ✅ | ⏳ | ✅ | ⏳ | ⏳ | 56 | 57 | ### NPU 58 | 59 | | Apple | Qualcomm | MediaTek | Samsung | Intel | AMD | 60 | |:------:|:--------:|:--------:|:-------:|:-----:|:---:| 61 | | ⏳ | ✅ | ⏳ | ⏳ | ⏳ | ⏳ | 62 | 63 | ## 🛠 Building and installing 64 | 65 | Edgerunner is in its early development stages. Refer to the 66 | [HACKING](/HACKING.md) document to get setup. 67 | 68 | ## 🕹 Usage 69 | 70 | Edgerunner is designed around the following usage pattern; 71 | 72 | ```cpp 73 | #include 74 | #include 75 | 76 | auto model = edge::createModel("/path/to/model"); 77 | 78 | model.applyDelegate(DELEGATE::NPU); 79 | 80 | auto input = model.getInput(0).getTensorAs(); 81 | 82 | /* overwrite input data */ 83 | 84 | model.execute(); 85 | 86 | auto output = model.getInput(0).getTensorAs(); 87 | 88 | /* interpret output data */ 89 | ``` 90 | 91 | See [examples](example/README.md) for more detailed usage. 92 | 93 | See [model.hpp](/include/edgerunner/model.hpp) and 94 | [tensor.hpp](/include/edgerunner/tensor.hpp) for complete API. 95 | 96 | ## 🏆 Contributing 97 | 98 | See the [CONTRIBUTING](CONTRIBUTING.md) document. 99 | 100 | Join our [Discord](https://discord.gg/y9EzZEkwbR) for discussing any issues. 101 | 102 | ## 📜 Licensing 103 | 104 | See the [LICENSING](LICENSE.txt) document. 105 | -------------------------------------------------------------------------------- /cmake/coverage.cmake: -------------------------------------------------------------------------------- 1 | # ---- Variables ---- 2 | 3 | # We use variables separate from what CTest uses, because those have 4 | # customization issues 5 | set( 6 | COVERAGE_TRACE_COMMAND 7 | lcov -c -q 8 | -o "${PROJECT_BINARY_DIR}/coverage.info" 9 | -d "${PROJECT_BINARY_DIR}" 10 | --include "${PROJECT_SOURCE_DIR}/*" 11 | CACHE STRING 12 | "; separated command to generate a trace for the 'coverage' target" 13 | ) 14 | 15 | set( 16 | COVERAGE_HTML_COMMAND 17 | genhtml --legend -f -q 18 | "${PROJECT_BINARY_DIR}/coverage.info" 19 | -p "${PROJECT_SOURCE_DIR}" 20 | -o "${PROJECT_BINARY_DIR}/coverage_html" 21 | CACHE STRING 22 | "; separated command to generate an HTML report for the 'coverage' target" 23 | ) 24 | 25 | # ---- Coverage target ---- 26 | 27 | add_custom_target( 28 | coverage 29 | COMMAND ${COVERAGE_TRACE_COMMAND} 30 | COMMAND ${COVERAGE_HTML_COMMAND} 31 | COMMENT "Generating coverage report" 32 | VERBATIM 33 | ) 34 | -------------------------------------------------------------------------------- /cmake/dev-mode.cmake: -------------------------------------------------------------------------------- 1 | include(cmake/folders.cmake) 2 | 3 | include(CTest) 4 | if(BUILD_TESTING) 5 | add_subdirectory(test) 6 | endif() 7 | 8 | option(BUILD_MCSS_DOCS "Build documentation using Doxygen and m.css" OFF) 9 | if(BUILD_MCSS_DOCS) 10 | include(cmake/docs.cmake) 11 | endif() 12 | 13 | option(ENABLE_COVERAGE "Enable coverage support separate from CTest's" OFF) 14 | if(ENABLE_COVERAGE) 15 | include(cmake/coverage.cmake) 16 | endif() 17 | 18 | include(cmake/lint-targets.cmake) 19 | include(cmake/spell-targets.cmake) 20 | 21 | add_folders(Project) 22 | -------------------------------------------------------------------------------- /cmake/docs-ci.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | foreach(var IN ITEMS PROJECT_BINARY_DIR PROJECT_SOURCE_DIR) 4 | if(NOT DEFINED "${var}") 5 | message(FATAL_ERROR "${var} must be defined") 6 | endif() 7 | endforeach() 8 | set(bin "${PROJECT_BINARY_DIR}") 9 | set(src "${PROJECT_SOURCE_DIR}") 10 | 11 | # ---- Dependencies ---- 12 | 13 | set(mcss_SOURCE_DIR "${bin}/docs/.ci") 14 | if(NOT IS_DIRECTORY "${mcss_SOURCE_DIR}") 15 | file(MAKE_DIRECTORY "${mcss_SOURCE_DIR}") 16 | file( 17 | DOWNLOAD 18 | https://github.com/friendlyanon/m.css/releases/download/release-1/mcss.zip 19 | "${mcss_SOURCE_DIR}/mcss.zip" 20 | STATUS status 21 | EXPECTED_MD5 00cd2757ebafb9bcba7f5d399b3bec7f 22 | ) 23 | if(NOT status MATCHES "^0;") 24 | message(FATAL_ERROR "Download failed with ${status}") 25 | endif() 26 | execute_process( 27 | COMMAND "${CMAKE_COMMAND}" -E tar xf mcss.zip 28 | WORKING_DIRECTORY "${mcss_SOURCE_DIR}" 29 | RESULT_VARIABLE result 30 | ) 31 | if(NOT result EQUAL "0") 32 | message(FATAL_ERROR "Extraction failed with ${result}") 33 | endif() 34 | file(REMOVE "${mcss_SOURCE_DIR}/mcss.zip") 35 | endif() 36 | 37 | find_program(Python3_EXECUTABLE NAMES python3 python) 38 | if(NOT Python3_EXECUTABLE) 39 | message(FATAL_ERROR "Python executable was not found") 40 | endif() 41 | 42 | # ---- Process project() call in CMakeLists.txt ---- 43 | 44 | file(READ "${src}/CMakeLists.txt" content) 45 | 46 | string(FIND "${content}" "project(" index) 47 | if(index EQUAL "-1") 48 | message(FATAL_ERROR "Could not find \"project(\"") 49 | endif() 50 | string(SUBSTRING "${content}" "${index}" -1 content) 51 | 52 | string(FIND "${content}" "\n)\n" index) 53 | if(index EQUAL "-1") 54 | message(FATAL_ERROR "Could not find \"\\n)\\n\"") 55 | endif() 56 | string(SUBSTRING "${content}" 0 "${index}" content) 57 | 58 | file(WRITE "${bin}/docs-ci.project.cmake" "docs_${content}\n)\n") 59 | 60 | macro(list_pop_front list out) 61 | list(GET "${list}" 0 "${out}") 62 | list(REMOVE_AT "${list}" 0) 63 | endmacro() 64 | 65 | function(docs_project name) 66 | cmake_parse_arguments(PARSE_ARGV 1 "" "" "VERSION;DESCRIPTION;HOMEPAGE_URL" LANGUAGES) 67 | set(PROJECT_NAME "${name}" PARENT_SCOPE) 68 | if(DEFINED _VERSION) 69 | set(PROJECT_VERSION "${_VERSION}" PARENT_SCOPE) 70 | string(REGEX MATCH "^[0-9]+(\\.[0-9]+)*" versions "${_VERSION}") 71 | string(REPLACE . ";" versions "${versions}") 72 | set(suffixes MAJOR MINOR PATCH TWEAK) 73 | while(NOT versions STREQUAL "" AND NOT suffixes STREQUAL "") 74 | list_pop_front(versions version) 75 | list_pop_front(suffixes suffix) 76 | set("PROJECT_VERSION_${suffix}" "${version}" PARENT_SCOPE) 77 | endwhile() 78 | endif() 79 | if(DEFINED _DESCRIPTION) 80 | set(PROJECT_DESCRIPTION "${_DESCRIPTION}" PARENT_SCOPE) 81 | endif() 82 | if(DEFINED _HOMEPAGE_URL) 83 | set(PROJECT_HOMEPAGE_URL "${_HOMEPAGE_URL}" PARENT_SCOPE) 84 | endif() 85 | endfunction() 86 | 87 | include("${bin}/docs-ci.project.cmake") 88 | 89 | # ---- Generate docs ---- 90 | 91 | if(NOT DEFINED DOXYGEN_OUTPUT_DIRECTORY) 92 | set(DOXYGEN_OUTPUT_DIRECTORY "${bin}/docs") 93 | endif() 94 | set(out "${DOXYGEN_OUTPUT_DIRECTORY}") 95 | 96 | foreach(file IN ITEMS Doxyfile conf.py) 97 | configure_file("${src}/docs/${file}.in" "${bin}/docs/${file}" @ONLY) 98 | endforeach() 99 | 100 | set(mcss_script "${mcss_SOURCE_DIR}/documentation/doxygen.py") 101 | set(config "${bin}/docs/conf.py") 102 | 103 | file(REMOVE_RECURSE "${out}/html" "${out}/xml") 104 | 105 | execute_process( 106 | COMMAND "${Python3_EXECUTABLE}" "${mcss_script}" "${config}" 107 | WORKING_DIRECTORY "${bin}/docs" 108 | RESULT_VARIABLE result 109 | ) 110 | if(NOT result EQUAL "0") 111 | message(FATAL_ERROR "m.css returned with ${result}") 112 | endif() 113 | -------------------------------------------------------------------------------- /cmake/docs.cmake: -------------------------------------------------------------------------------- 1 | # ---- Dependencies ---- 2 | 3 | set(extract_timestamps "") 4 | if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") 5 | set(extract_timestamps DOWNLOAD_EXTRACT_TIMESTAMP YES) 6 | endif() 7 | 8 | include(FetchContent) 9 | FetchContent_Declare( 10 | mcss 11 | GIT_REPOSITORY https://github.com/mosra/m.css 12 | GIT_TAG 523506668a61646603ed299e1b60b7f77a8ebd77 13 | SOURCE_DIR "${PROJECT_BINARY_DIR}/mcss" 14 | UPDATE_DISCONNECTED YES 15 | ${extract_timestamps} 16 | ) 17 | FetchContent_MakeAvailable(mcss) 18 | 19 | find_package(Python3 3.6 REQUIRED) 20 | 21 | # ---- Declare documentation target ---- 22 | 23 | set( 24 | DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/docs" 25 | CACHE PATH "Path for the generated Doxygen documentation" 26 | ) 27 | 28 | set(working_dir "${PROJECT_BINARY_DIR}/docs") 29 | 30 | foreach(file IN ITEMS Doxyfile conf.py) 31 | configure_file("docs/${file}.in" "${working_dir}/${file}" @ONLY) 32 | endforeach() 33 | 34 | set(mcss_script "${mcss_SOURCE_DIR}/documentation/doxygen.py") 35 | set(config "${working_dir}/conf.py") 36 | 37 | add_custom_target( 38 | docs 39 | COMMAND "${CMAKE_COMMAND}" -E remove_directory 40 | "${DOXYGEN_OUTPUT_DIRECTORY}/html" 41 | "${DOXYGEN_OUTPUT_DIRECTORY}/xml" 42 | COMMAND "${Python3_EXECUTABLE}" "${mcss_script}" "${config}" 43 | COMMENT "Building documentation using Doxygen and m.css" 44 | WORKING_DIRECTORY "${working_dir}" 45 | VERBATIM 46 | ) 47 | -------------------------------------------------------------------------------- /cmake/folders.cmake: -------------------------------------------------------------------------------- 1 | set_property(GLOBAL PROPERTY USE_FOLDERS YES) 2 | 3 | # Call this function at the end of a directory scope to assign a folder to 4 | # targets created in that directory. Utility targets will be assigned to the 5 | # UtilityTargets folder, otherwise to the ${name}Targets folder. If a target 6 | # already has a folder assigned, then that target will be skipped. 7 | function(add_folders name) 8 | get_property(targets DIRECTORY PROPERTY BUILDSYSTEM_TARGETS) 9 | foreach(target IN LISTS targets) 10 | get_property(folder TARGET "${target}" PROPERTY FOLDER) 11 | if(DEFINED folder) 12 | continue() 13 | endif() 14 | set(folder Utility) 15 | get_property(type TARGET "${target}" PROPERTY TYPE) 16 | if(NOT type STREQUAL "UTILITY") 17 | set(folder "${name}") 18 | endif() 19 | set_property(TARGET "${target}" PROPERTY FOLDER "${folder}Targets") 20 | endforeach() 21 | endfunction() 22 | -------------------------------------------------------------------------------- /cmake/install-config.cmake: -------------------------------------------------------------------------------- 1 | include(CMakeFindDependencyMacro) 2 | find_dependency(span-lite) 3 | 4 | include("${CMAKE_CURRENT_LIST_DIR}/edgerunnerTargets.cmake") 5 | -------------------------------------------------------------------------------- /cmake/install-rules.cmake: -------------------------------------------------------------------------------- 1 | if(PROJECT_IS_TOP_LEVEL) 2 | set( 3 | CMAKE_INSTALL_INCLUDEDIR "include" 4 | CACHE STRING "" 5 | ) 6 | set_property(CACHE CMAKE_INSTALL_INCLUDEDIR PROPERTY TYPE PATH) 7 | endif() 8 | 9 | include(CMakePackageConfigHelpers) 10 | include(GNUInstallDirs) 11 | 12 | # find_package() call for consumers to find this project 13 | set(package edgerunner) 14 | 15 | install( 16 | DIRECTORY 17 | include/ 18 | "${PROJECT_BINARY_DIR}/export/" 19 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" 20 | COMPONENT edgerunner_Development 21 | ) 22 | 23 | install( 24 | TARGETS edgerunner_edgerunner 25 | EXPORT edgerunnerTargets 26 | RUNTIME # 27 | COMPONENT edgerunner_Runtime 28 | LIBRARY # 29 | COMPONENT edgerunner_Runtime 30 | NAMELINK_COMPONENT edgerunner_Development 31 | ARCHIVE # 32 | COMPONENT edgerunner_Development 33 | INCLUDES # 34 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" 35 | ) 36 | 37 | write_basic_package_version_file( 38 | "${package}ConfigVersion.cmake" 39 | COMPATIBILITY SameMajorVersion 40 | ) 41 | 42 | # Allow package maintainers to freely override the path for the configs 43 | set( 44 | edgerunner_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/${package}" 45 | CACHE STRING "CMake package config location relative to the install prefix" 46 | ) 47 | set_property(CACHE edgerunner_INSTALL_CMAKEDIR PROPERTY TYPE PATH) 48 | mark_as_advanced(edgerunner_INSTALL_CMAKEDIR) 49 | 50 | install( 51 | FILES cmake/install-config.cmake 52 | DESTINATION "${edgerunner_INSTALL_CMAKEDIR}" 53 | RENAME "${package}Config.cmake" 54 | COMPONENT edgerunner_Development 55 | ) 56 | 57 | install( 58 | FILES "${PROJECT_BINARY_DIR}/${package}ConfigVersion.cmake" 59 | DESTINATION "${edgerunner_INSTALL_CMAKEDIR}" 60 | COMPONENT edgerunner_Development 61 | ) 62 | 63 | install( 64 | EXPORT edgerunnerTargets 65 | NAMESPACE edgerunner:: 66 | DESTINATION "${edgerunner_INSTALL_CMAKEDIR}" 67 | COMPONENT edgerunner_Development 68 | ) 69 | 70 | if(PROJECT_IS_TOP_LEVEL) 71 | include(CPack) 72 | endif() 73 | -------------------------------------------------------------------------------- /cmake/lint-targets.cmake: -------------------------------------------------------------------------------- 1 | set( 2 | FORMAT_PATTERNS 3 | source/*.cpp source/*.hpp 4 | include/*.hpp 5 | test/*.cpp test/*.hpp 6 | example/*.cpp example/*.hpp 7 | CACHE STRING 8 | "; separated patterns relative to the project source dir to format" 9 | ) 10 | 11 | set(FORMAT_COMMAND clang-format CACHE STRING "Formatter to use") 12 | 13 | add_custom_target( 14 | format-check 15 | COMMAND "${CMAKE_COMMAND}" 16 | -D "FORMAT_COMMAND=${FORMAT_COMMAND}" 17 | -D "PATTERNS=${FORMAT_PATTERNS}" 18 | -P "${PROJECT_SOURCE_DIR}/cmake/lint.cmake" 19 | WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" 20 | COMMENT "Linting the code" 21 | VERBATIM 22 | ) 23 | 24 | add_custom_target( 25 | format-fix 26 | COMMAND "${CMAKE_COMMAND}" 27 | -D "FORMAT_COMMAND=${FORMAT_COMMAND}" 28 | -D "PATTERNS=${FORMAT_PATTERNS}" 29 | -D FIX=YES 30 | -P "${PROJECT_SOURCE_DIR}/cmake/lint.cmake" 31 | WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" 32 | COMMENT "Fixing the code" 33 | VERBATIM 34 | ) 35 | -------------------------------------------------------------------------------- /cmake/lint.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | macro(default name) 4 | if(NOT DEFINED "${name}") 5 | set("${name}" "${ARGN}") 6 | endif() 7 | endmacro() 8 | 9 | default(FORMAT_COMMAND clang-format) 10 | default( 11 | PATTERNS 12 | source/*.cpp source/*.hpp 13 | include/*.hpp 14 | test/*.cpp test/*.hpp 15 | example/*.cpp example/*.hpp 16 | ) 17 | default(FIX NO) 18 | 19 | set(flag --output-replacements-xml) 20 | set(args OUTPUT_VARIABLE output) 21 | if(FIX) 22 | set(flag -i) 23 | set(args "") 24 | endif() 25 | 26 | file(GLOB_RECURSE files ${PATTERNS}) 27 | set(badly_formatted "") 28 | set(output "") 29 | string(LENGTH "${CMAKE_SOURCE_DIR}/" path_prefix_length) 30 | 31 | foreach(file IN LISTS files) 32 | execute_process( 33 | COMMAND "${FORMAT_COMMAND}" --style=file "${flag}" "${file}" 34 | WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" 35 | RESULT_VARIABLE result 36 | ${args} 37 | ) 38 | if(NOT result EQUAL "0") 39 | message(FATAL_ERROR "'${file}': formatter returned with ${result}") 40 | endif() 41 | if(NOT FIX AND output MATCHES "\nDoxygen, making use of some useful 6 | * special commands. 7 | */ 8 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(edgerunnerExamples CXX) 4 | 5 | include(../cmake/project-is-top-level.cmake) 6 | include(../cmake/folders.cmake) 7 | 8 | add_library(examples_interface INTERFACE) 9 | 10 | if(PROJECT_IS_TOP_LEVEL) 11 | find_package(edgerunner REQUIRED) 12 | find_package(fmt REQUIRED) 13 | endif() 14 | 15 | target_link_libraries(examples_interface INTERFACE edgerunner::edgerunner) 16 | 17 | target_link_libraries(examples_interface INTERFACE fmt::fmt) 18 | 19 | find_package(OpenCV REQUIRED) 20 | target_link_libraries(examples_interface INTERFACE opencv::opencv) 21 | 22 | add_custom_target(run-examples) 23 | 24 | function(add_example NAME) 25 | add_executable("${NAME}" "${NAME}.cpp") 26 | target_link_libraries("${NAME}" PRIVATE examples_interface) 27 | target_compile_features("${NAME}" PRIVATE cxx_std_17) 28 | if(ANDROID) 29 | add_custom_target( 30 | "run_${NAME}" 31 | COMMAND "${CMAKE_SOURCE_DIR}/scripts/run_with_adb.sh" -b 32 | "${CMAKE_CURRENT_BINARY_DIR}" -e "${NAME}" 33 | VERBATIM 34 | ) 35 | else() 36 | add_custom_target( 37 | "run_${NAME}" 38 | COMMAND "${NAME}" 39 | VERBATIM 40 | ) 41 | endif() 42 | add_dependencies("run_${NAME}" "${NAME}") 43 | add_dependencies(run-examples "run_${NAME}") 44 | endfunction() 45 | 46 | # NOTE: for Android, adb push fails on symlinks, push directly manually instead 47 | if(ANDROID) 48 | foreach(dir ${CONAN_RUNTIME_LIB_DIRS}) 49 | file(GLOB_RECURSE shared_libs "${dir}/*.so") 50 | file(COPY ${shared_libs} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 51 | endforeach() 52 | else() 53 | set(MODELS_DIR "${CMAKE_SOURCE_DIR}/models") 54 | set(MODELS_DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/models") 55 | set(IMAGES_DIR "${CMAKE_SOURCE_DIR}/images") 56 | set(IMAGES_DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/images") 57 | if(UNIX) 58 | execute_process(COMMAND ln -sfn ${MODELS_DIR} ${MODELS_DEST_DIR}) 59 | execute_process(COMMAND ln -sfn ${IMAGES_DIR} ${IMAGES_DEST_DIR}) 60 | elseif(WIN32) 61 | execute_process( 62 | COMMAND cmd.exe /c mklink ${MODELS_DEST_DIR} ${MODELS_DIR} 63 | ) 64 | execute_process( 65 | COMMAND cmd.exe /c mklink ${IMAGES_DEST_DIR} ${IMAGES_DIR} 66 | ) 67 | endif() 68 | endif() 69 | 70 | add_example(mobilenet_v3_small) 71 | 72 | add_folders(Example) 73 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Set up a [`CMakeUserPresets.json`](/HACKING.md#presets) and ensure 4 | [conan](/HACKING.md#dependency-manager) is configured. 5 | 6 | You may wish to add presets to build in `Release` mode. Add something like the 7 | following to your `CMakeUserPresets.json`: 8 | 9 | ```json 10 | { 11 | ... 12 | "configurePresets": [ 13 | ... 14 | { 15 | "name": "rel", 16 | "binaryDir": "${sourceDir}/build/rel", 17 | "inherits": ["conan"], 18 | "generator": "Unix Makefiles", 19 | "cacheVariables": { 20 | "CMAKE_BUILD_TYPE": "Release" 21 | } 22 | } 23 | ], 24 | "buildPresets": [ 25 | ... 26 | { 27 | "name": "rel", 28 | "configurePreset": "rel", 29 | "configuration": "Release", 30 | "jobs": "" 31 | } 32 | ] 33 | } 34 | ``` 35 | 36 | For MacOS, replace "Unix Makefiles" with "Xcode". 37 | 38 | > [!NOTE] 39 | > Examples require additional dependencies to the main library. As such, it is 40 | required to supply `-o examples=True` to the `conan install` command. 41 | 42 | Refer to [HACKING](/HACKING.md) for further configuration options. 43 | 44 | ## Unix 45 | 46 | Run all examples using one of the following methods from the project root directory. 47 | 48 | For `Debug`: 49 | 50 | ```bash 51 | conan install . -b missing -s build_type=Debug -o examples=True 52 | cmake --preset=dev 53 | cmake --build --preset=dev -t run-examples 54 | ``` 55 | 56 | For `Release`: 57 | 58 | ```bash 59 | conan install . -b missing -o examples=True 60 | cmake --preset=rel 61 | cmake --build --preset=rel -t run-examples 62 | ``` 63 | 64 | If an existing build exists, you may need to run: 65 | 66 | ```bash 67 | cmake --preset=rel -DBUILD_EXAMPLES=ON 68 | ``` 69 | 70 | To run an individual example, execute: 71 | 72 | ```bash 73 | cmake --build --preset=rel -t run_ 74 | ``` 75 | 76 | where `example_name` is the example filename without the extension (eg. `mobilenet_v3_small`). 77 | 78 | ## Android 79 | 80 | Ensure [adb](https://developer.android.com/tools/adb) is configured and a device 81 | with USB debugging enabled is connected. 82 | 83 | An android [conan profile](https://docs.conan.io/2/reference/config_files/profiles.html) 84 | is required to build for Android. To use the android profile provided with this 85 | repo, run 86 | 87 | ```bash 88 | conan config install profiles -tf profiles 89 | ``` 90 | 91 | from the project root directory. 92 | 93 | Using the above presets, run all examples using the following steps from the 94 | project root directory: 95 | 96 | ```bash 97 | conan install . -b missing -pr android -o examples=True 98 | cmake --preset=rel 99 | cmake --build --preset=rel -t run-examples 100 | ``` 101 | 102 | To run an individual example, execute: 103 | 104 | ```bash 105 | cmake --build --preset=rel -t run_ 106 | ``` 107 | 108 | where `example_name` is the example filename without the extension (eg. `mobilenet_v3_small`). 109 | -------------------------------------------------------------------------------- /example/imageClassifier.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "edgerunner/edgerunner.hpp" 17 | #include "edgerunner/model.hpp" 18 | #include "edgerunner/tensor.hpp" 19 | 20 | class ImageClassifier { 21 | public: 22 | ImageClassifier(const std::filesystem::path& modelPath, 23 | const std::filesystem::path& labelListPath); 24 | auto loadImage(const std::filesystem::path& imagePath) -> edge::STATUS; 25 | 26 | auto setDelegate(edge::DELEGATE delegate) -> edge::STATUS; 27 | 28 | auto predict(size_t numPredictions = 3) 29 | -> std::pair>, double>; 30 | 31 | private: 32 | void convertImage(cv::Mat& image) const; 33 | 34 | static void resize(cv::Mat& image, size_t size); 35 | 36 | static void centerCrop(cv::Mat& image, const std::vector& cropSize); 37 | 38 | static void normalize(cv::Mat& image); 39 | 40 | template 41 | void writeImageToInputBuffer(const cv::Mat& inputImage, 42 | nonstd::span& output); 43 | 44 | template 45 | void preprocess(cv::Mat& image, 46 | const std::vector& dimensions, 47 | nonstd::span& modelInput); 48 | 49 | template 50 | static auto softmax(const nonstd::span& elements) -> std::vector; 51 | 52 | template 53 | static auto topKIndices(const nonstd::span& elements, 54 | size_t numPredictions) -> std::vector; 55 | 56 | static auto loadLabelList(const std::filesystem::path& labelListPath) 57 | -> std::vector; 58 | 59 | template 60 | static void printPixel(const nonstd::span& image, 61 | const std::vector& dimensions, 62 | size_t hIndex, 63 | size_t wIndex); 64 | 65 | template 66 | static void printPixel(const cv::Mat& image, size_t hIndex, size_t wIndex); 67 | 68 | template 69 | static auto nextPowerOfTwo(T val) -> T { 70 | T power = 2; 71 | while (power < val) { 72 | power *= 2; 73 | } 74 | 75 | return power; 76 | } 77 | 78 | std::unique_ptr m_model; 79 | 80 | std::vector m_labelList; 81 | 82 | cv::Mat m_image; 83 | 84 | bool m_quantized {false}; 85 | }; 86 | 87 | inline ImageClassifier::ImageClassifier( 88 | const std::filesystem::path& modelPath, /* NOLINT */ 89 | const std::filesystem::path& labelListPath) 90 | : m_model(edge::createModel(modelPath)) 91 | , m_labelList(loadLabelList(labelListPath)) { 92 | if (m_model != nullptr) { 93 | m_quantized = (m_model->getPrecision() == edge::TensorType::UINT8); 94 | } 95 | } 96 | 97 | inline auto ImageClassifier::loadImage(const std::filesystem::path& imagePath) 98 | -> edge::STATUS { 99 | m_image = cv::imread(imagePath, cv::IMREAD_COLOR); 100 | 101 | if (m_image.empty()) { 102 | return edge::STATUS::FAIL; 103 | } 104 | 105 | convertImage(m_image); 106 | 107 | return edge::STATUS::SUCCESS; 108 | } 109 | 110 | inline auto ImageClassifier::setDelegate(const edge::DELEGATE delegate) 111 | -> edge::STATUS { 112 | return m_model->applyDelegate(delegate); 113 | } 114 | 115 | inline auto ImageClassifier::predict(const size_t numPredictions) 116 | -> std::pair>, double> { 117 | auto input = m_model->getInput(0); 118 | 119 | const auto inputDimensions = input->getDimensions(); 120 | 121 | if (m_quantized) { 122 | auto inputBuffer = input->getTensorAs(); 123 | preprocess(m_image, inputDimensions, inputBuffer); 124 | } else { 125 | auto inputBuffer = input->getTensorAs(); 126 | preprocess(m_image, inputDimensions, inputBuffer); 127 | } 128 | 129 | const auto start = std::chrono::high_resolution_clock::now(); 130 | if (m_model->execute() != edge::STATUS::SUCCESS) { 131 | return {}; 132 | } 133 | const auto end = std::chrono::high_resolution_clock::now(); 134 | 135 | const auto inferenceTime = 136 | std::chrono::duration(end - start).count(); 137 | 138 | std::vector probabilities; 139 | std::vector topIndices; 140 | 141 | if (m_quantized) { 142 | auto output = m_model->getOutput(0)->getTensorAs(); 143 | probabilities = softmax(output); 144 | topIndices = topKIndices(output, numPredictions); 145 | } else { 146 | auto output = m_model->getOutput(0)->getTensorAs(); 147 | probabilities = softmax(output); 148 | topIndices = topKIndices(output, numPredictions); 149 | } 150 | 151 | std::vector> topPredictions; 152 | topPredictions.reserve(topIndices.size()); 153 | 154 | for (const auto index : topIndices) { 155 | topPredictions.emplace_back(m_labelList[index + 1], 156 | probabilities[index]); 157 | } 158 | 159 | return {topPredictions, inferenceTime}; 160 | } 161 | 162 | inline void ImageClassifier::convertImage(cv::Mat& image) const { 163 | cv::cvtColor(image, image, cv::COLOR_BGR2RGB); 164 | 165 | if (m_quantized) { 166 | image.convertTo(image, CV_8UC3); 167 | } else { 168 | // Convert the image to float and scale it to [0, 1] range 169 | image.convertTo( 170 | image, CV_32FC3, 1.0 / std::numeric_limits::max()); 171 | } 172 | } 173 | 174 | inline void ImageClassifier::resize(cv::Mat& image, const size_t size) { 175 | const auto imageHeight = image.rows; 176 | const auto imageWidth = image.cols; 177 | 178 | const auto longDim = static_cast(std::max(imageHeight, imageWidth)); 179 | const auto shortDim = static_cast(std::min(imageHeight, imageWidth)); 180 | 181 | const auto newLong = 182 | static_cast(static_cast(size) * longDim / shortDim); 183 | 184 | const auto newHeight = 185 | static_cast((imageHeight > imageWidth) ? newLong : size); 186 | const auto newWidth = 187 | static_cast((imageHeight > imageWidth) ? size : newLong); 188 | 189 | cv::resize( 190 | image, image, cv::Size(newWidth, newHeight), 0, 0, cv::INTER_LINEAR); 191 | } 192 | 193 | inline void ImageClassifier::centerCrop(cv::Mat& image, 194 | const std::vector& cropSize) { 195 | auto imageHeight = image.rows; 196 | auto imageWidth = image.cols; 197 | 198 | const auto cropHeight = static_cast(cropSize[0]); 199 | const auto cropWidth = static_cast(cropSize[1]); 200 | 201 | if (cropHeight > imageWidth || cropWidth > imageHeight) { 202 | const auto padLeft = (cropHeight - imageWidth) / 2; 203 | const auto padTop = (cropWidth - imageHeight) / 2; 204 | const auto padRight = (cropHeight - imageWidth + 1) / 2; 205 | const auto padBottom = (cropWidth - imageHeight + 1) / 2; 206 | 207 | cv::copyMakeBorder(image, 208 | image, 209 | padTop, 210 | padBottom, 211 | padLeft, 212 | padRight, 213 | cv::BORDER_CONSTANT, 214 | cv::Scalar(0, 0, 0)); 215 | imageHeight = image.rows; 216 | imageWidth = image.cols; 217 | } 218 | 219 | const auto cropTop = 220 | static_cast(std::floor((imageHeight - cropWidth) / 2.0)); 221 | const auto cropLeft = 222 | static_cast(std::floor((imageWidth - cropHeight) / 2.0)); 223 | 224 | const cv::Rect cropRegion(cropLeft, cropTop, cropHeight, cropWidth); 225 | image = image(cropRegion); 226 | } 227 | 228 | inline void ImageClassifier::normalize(cv::Mat& image) { 229 | const cv::Scalar mean(0.485, 0.456, 0.406); 230 | const cv::Scalar std(0.229, 0.224, 0.225); 231 | 232 | cv::subtract(image, mean, image); 233 | cv::divide(image, std, image); 234 | } 235 | 236 | template 237 | inline void ImageClassifier::writeImageToInputBuffer(const cv::Mat& inputImage, 238 | nonstd::span& output) { 239 | const auto height = static_cast(inputImage.rows); 240 | const auto width = static_cast(inputImage.cols); 241 | 242 | const auto numChannels = static_cast(inputImage.channels()); 243 | const auto rowSize = width * numChannels; 244 | 245 | for (size_t i = 0; i < height; ++i) { 246 | const auto hOffset = i * rowSize; 247 | for (size_t j = 0; j < width; ++j) { 248 | const auto wOffset = hOffset + j * numChannels; 249 | if constexpr (std::is_same_v) { 250 | const auto& pixel = inputImage.at( 251 | static_cast(i), static_cast(j)); 252 | 253 | output[wOffset] = pixel[0]; 254 | output[wOffset + 1] = pixel[1]; 255 | output[wOffset + 2] = pixel[2]; 256 | } else { 257 | const auto& pixel = inputImage.at( 258 | static_cast(i), static_cast(j)); 259 | 260 | output[wOffset] = pixel[0]; 261 | output[wOffset + 1] = pixel[1]; 262 | output[wOffset + 2] = pixel[2]; 263 | } 264 | } 265 | } 266 | } 267 | 268 | template 269 | inline void ImageClassifier::preprocess(cv::Mat& image, 270 | const std::vector& dimensions, 271 | nonstd::span& modelInput) { 272 | const auto resizedSize = nextPowerOfTwo(dimensions[1]); 273 | resize(image, resizedSize); 274 | 275 | const std::vector cropDimensions = {dimensions[1], dimensions[2]}; 276 | centerCrop(image, cropDimensions); 277 | 278 | // normalize(image); 279 | 280 | writeImageToInputBuffer(image, modelInput); 281 | } 282 | 283 | template 284 | inline auto ImageClassifier::softmax(const nonstd::span& elements) 285 | -> std::vector { 286 | const float maxInput = 287 | *std::max_element(elements.cbegin(), elements.cend()); 288 | 289 | std::vector softmaxValues; 290 | softmaxValues.reserve(elements.size()); 291 | 292 | std::transform(elements.cbegin(), 293 | elements.cend(), 294 | std::back_inserter(softmaxValues), 295 | [maxInput](auto val) { return std::exp(val - maxInput); }); 296 | 297 | const auto expSum = 298 | std::accumulate(softmaxValues.begin(), softmaxValues.end(), 0.0F); 299 | 300 | std::transform(softmaxValues.begin(), 301 | softmaxValues.end(), 302 | softmaxValues.begin(), 303 | [expSum](auto val) { return val / expSum; }); 304 | return softmaxValues; 305 | } 306 | 307 | template 308 | inline auto ImageClassifier::topKIndices(const nonstd::span& elements, 309 | const size_t numPredictions) 310 | -> std::vector { 311 | std::vector indices(elements.size()); 312 | std::iota(indices.begin(), indices.end(), 0); 313 | std::partial_sort( 314 | indices.begin(), 315 | indices.begin() 316 | + static_cast::difference_type>(numPredictions), 317 | indices.end(), 318 | [&elements](size_t val1, size_t val2) { 319 | return elements[val1] > elements[val2]; 320 | }); 321 | indices.resize(numPredictions); 322 | return indices; 323 | } 324 | 325 | inline auto ImageClassifier::loadLabelList( 326 | const std::filesystem::path& labelListPath) -> std::vector { 327 | std::vector labels; 328 | std::ifstream file(labelListPath); 329 | std::string line; 330 | while (std::getline(file, line)) { 331 | labels.push_back(line); 332 | } 333 | return labels; 334 | } 335 | 336 | template 337 | inline void ImageClassifier::printPixel(const nonstd::span& image, 338 | const std::vector& dimensions, 339 | size_t hIndex, 340 | size_t wIndex) { 341 | const auto red = 342 | *(image.cbegin() + hIndex * dimensions[2] * 3 + wIndex * 3); 343 | const auto green = 344 | *(image.cbegin() + hIndex * dimensions[2] * 3 + wIndex * 3 + 1); 345 | const auto blue = 346 | *(image.cbegin() + hIndex * dimensions[2] * 3 + wIndex * 3 + 2); 347 | 348 | fmt::print(stderr, 349 | "pixel ({}, {}): [{}, {}, {}]\n", 350 | hIndex, 351 | wIndex, 352 | red, 353 | green, 354 | blue); 355 | } 356 | 357 | template 358 | inline void ImageClassifier::printPixel(const cv::Mat& image, 359 | size_t hIndex, 360 | size_t wIndex) { 361 | auto pixel = 362 | image.at(static_cast(hIndex), static_cast(wIndex)); 363 | auto red = pixel[0]; 364 | auto green = pixel[1]; 365 | auto blue = pixel[2]; 366 | 367 | fmt::print(stderr, 368 | "pixel ({}, {}): [{}, {}, {}]\n", 369 | hIndex, 370 | wIndex, 371 | red, 372 | green, 373 | blue); 374 | } 375 | -------------------------------------------------------------------------------- /example/mobilenet_v3_small.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "edgerunner/model.hpp" 11 | #include "imageClassifier.hpp" 12 | 13 | auto main() -> int { 14 | const std::filesystem::path modelPath { 15 | "models/tflite/mobilenet_v3_small.tflite"}; 16 | const std::filesystem::path labelListPath { 17 | "models/common/imagenet_labels.txt"}; 18 | 19 | ImageClassifier imageClassifier(modelPath, labelListPath); 20 | 21 | /* use the best delegate available based on the build configuration */ 22 | #if defined(EDGERUNNER_QNN) 23 | imageClassifier.setDelegate(edge::DELEGATE::NPU); 24 | #elif defined(EDGERUNNER_GPU) 25 | imageClassifier.setDelegate(edge::DELEGATE::GPU); 26 | #endif 27 | 28 | const size_t numPredictions = 5; 29 | 30 | const std::vector imagePaths = { 31 | "images/keyboard.jpg", 32 | "images/dog.jpg", 33 | }; 34 | 35 | for (const auto& imagePath : imagePaths) { 36 | try { 37 | if (imageClassifier.loadImage(imagePath) != edge::STATUS::SUCCESS) { 38 | continue; 39 | } 40 | 41 | const auto start = std::chrono::high_resolution_clock::now(); 42 | const auto [predictions, inferenceTime] = 43 | imageClassifier.predict(numPredictions); 44 | const auto end = std::chrono::high_resolution_clock::now(); 45 | const auto predictionTime = 46 | std::chrono::duration(end - start).count(); 47 | 48 | fmt::print(stderr, 49 | fmt::fg(fmt::color::green), 50 | "predictions for {}:\n", 51 | imagePath.filename().string()); 52 | for (const auto& prediction : predictions) { 53 | fmt::print(stderr, 54 | fmt::fg(fmt::color::green), 55 | "\t{} ({:.2f}%)\n", 56 | prediction.first, 57 | 100.0F * prediction.second); 58 | } 59 | fmt::print(stderr, 60 | fmt::fg(fmt::color::yellow), 61 | "prediction time: {}ms\n", 62 | predictionTime); 63 | fmt::print(stderr, 64 | fmt::fg(fmt::color::yellow), 65 | "inference time: {}ms\n", 66 | inferenceTime); 67 | } catch (std::exception& ex) { 68 | fmt::print(stderr, 69 | fmt::fg(fmt::color::red), 70 | "{} example failed: {}\n", 71 | imagePath.stem().string(), 72 | ex.what()); 73 | } 74 | } 75 | 76 | return 0; 77 | } 78 | -------------------------------------------------------------------------------- /images/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/images/dog.jpg -------------------------------------------------------------------------------- /images/keyboard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/images/keyboard.jpg -------------------------------------------------------------------------------- /images/large-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/images/large-logo.png -------------------------------------------------------------------------------- /include/edgerunner/edgerunner.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file edgerunner.hpp 3 | * @brief Header file for the Model Factory 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "edgerunner/edgerunner_export.hpp" 12 | #include "model.hpp" 13 | 14 | namespace edge { 15 | 16 | /** 17 | * @brief Function to create a model from a given file path 18 | * 19 | * This function takes a file path to a model and creates a new Model object 20 | * from it. 21 | * 22 | * createModel() is the intended way to instantiate a Model using the edgerunner 23 | * library 24 | * 25 | * @param modelPath The file path to the model file 26 | * @return A unique pointer to the created Model object 27 | */ 28 | auto EDGERUNNER_EXPORT createModel(const std::filesystem::path& modelPath) 29 | -> std::unique_ptr; 30 | 31 | /** 32 | * @brief Function to create a model from a given buffer 33 | * 34 | * This function takes a buffer of a model and creates a new Model object 35 | * from it. 36 | * 37 | * createModel() is the intended way to instantiate a Model using the edgerunner 38 | * library 39 | * 40 | * @param modelBuffer The buffer of the model file 41 | * @return A unique pointer to the created Model object 42 | */ 43 | auto EDGERUNNER_EXPORT createModel(const nonstd::span& modelBuffer, 44 | const std::string& modelExtension = "tflite") 45 | -> std::unique_ptr; 46 | 47 | } // namespace edge 48 | -------------------------------------------------------------------------------- /include/edgerunner/model.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file model.hpp 3 | * @brief Definition of the Model class, a base class for machine learning 4 | * models. 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "edgerunner/edgerunner_export.hpp" 15 | #include "tensor.hpp" 16 | 17 | namespace edge { 18 | 19 | /** 20 | * @enum DELEGATE 21 | * @brief Enum class representing different types of delegates for model 22 | * execution. 23 | */ 24 | enum class DELEGATE : uint8_t { 25 | CPU, /**< CPU delegate */ 26 | GPU, /**< GPU delegate */ 27 | NPU /**< NPU delegate */ 28 | }; 29 | 30 | /** 31 | * @enum STATUS 32 | * @brief Enum class representing the status of an operation. 33 | */ 34 | enum class STATUS : uint8_t { 35 | SUCCESS, /**< Operation was successful */ 36 | FAIL /**< Operation failed */ 37 | }; 38 | 39 | /** 40 | * @class Model 41 | * @brief A base class for machine learning models. 42 | * 43 | * This class serves as a base class for machine learning models. It provides 44 | * common functionality such as loading a model, accessing inputs and outputs, 45 | * applying a delegate, and executing the model. 46 | */ 47 | class EDGERUNNER_EXPORT Model { 48 | public: 49 | /** 50 | * @brief Constructor for the Model class. 51 | * 52 | * This constructor initializes a Model object with the given model path. 53 | * 54 | * @param modelPath The path to the model file. 55 | */ 56 | explicit Model(const std::filesystem::path& modelPath) 57 | : m_name(modelPath.stem().string()) {} 58 | 59 | Model() = default; 60 | Model(const Model&) = default; 61 | Model(Model&&) = delete; 62 | auto operator=(const Model&) -> Model& = default; 63 | auto operator=(Model&&) -> Model& = delete; 64 | 65 | /** 66 | * @brief Virtual destructor for the Model class. 67 | */ 68 | virtual ~Model() = default; 69 | 70 | /** 71 | * @brief Pure virtual function to load a model from a file path. 72 | * 73 | * This function is a pure virtual function that must be implemented by any 74 | * derived classes. It is used to load a model from a file path. 75 | * 76 | * @param modelPath The path to the model file 77 | * @return STATUS The status of the model loading operation 78 | */ 79 | virtual auto loadModel(const std::filesystem::path& modelPath) 80 | -> STATUS = 0; 81 | 82 | /** 83 | * @brief Pure virtual function to load a model from a file buffer. 84 | * 85 | * This function is a pure virtual function that must be implemented by any 86 | * derived classes. It is used to load a model from a file buffer. 87 | * 88 | * @param modelBuffer The buffer containing the model 89 | * @return STATUS The status of the model loading operation 90 | */ 91 | virtual auto loadModel(const nonstd::span& modelBuffer) 92 | -> STATUS = 0; 93 | 94 | /** 95 | * @brief Get the number of input tensors in the model. 96 | * 97 | * @return The number of input tensors 98 | */ 99 | auto getNumInputs() const -> size_t { return m_inputs.size(); } 100 | 101 | /** 102 | * @brief Get the number of output tensors in the model. 103 | * 104 | * @return The number of output tensors 105 | */ 106 | auto getNumOutputs() const -> size_t { return m_outputs.size(); } 107 | 108 | /** 109 | * @brief Get the input tensor at the specified index. 110 | * 111 | * @param index The index of the input tensor 112 | * @return The input tensor at the specified index, or nullptr if index is 113 | * out of bounds 114 | */ 115 | auto getInput(size_t index) const -> std::shared_ptr; 116 | 117 | /** 118 | * @brief Get the output tensor at the specified index. 119 | * 120 | * @param index The index of the output tensor 121 | * @return The output tensor at the specified index, or nullptr if index is 122 | * out of bounds 123 | */ 124 | auto getOutput(size_t index) const -> std::shared_ptr; 125 | 126 | /** 127 | * @brief Get the inputs of the model. 128 | * 129 | * This function returns a reference to a vector of shared pointers to 130 | * Tensor objects, which represent the inputs of the model. 131 | * 132 | * @return A reference to a vector of shared pointers to Tensor objects 133 | * representing the inputs of the model. 134 | */ 135 | auto getInputs() -> std::vector>& { 136 | return m_inputs; 137 | } 138 | 139 | /** 140 | * @brief Get the outputs of the model. 141 | * 142 | * This function returns a reference to a vector of shared pointers to 143 | * Tensor objects, which represent the outputs of the model. 144 | * 145 | * @return A reference to a vector of shared pointers to Tensor objects 146 | * representing the outputs of the model. 147 | */ 148 | auto getOutputs() -> std::vector>& { 149 | return m_outputs; 150 | } 151 | 152 | /** 153 | * @brief Get the current delegate used for model execution. 154 | * 155 | * @return The delegate currently set for model execution 156 | */ 157 | auto getDelegate() const -> DELEGATE { return m_delegate; } 158 | 159 | /** 160 | * @brief Apply a delegate for model execution. 161 | * 162 | * @param delegate The delegate to apply 163 | * @return The status of the operation 164 | */ 165 | virtual auto applyDelegate(const DELEGATE& delegate) -> STATUS = 0; 166 | 167 | /** 168 | * @brief Execute the model. 169 | * 170 | * @return The status of the operation 171 | */ 172 | virtual auto execute() -> STATUS = 0; 173 | 174 | /** 175 | * @brief Get the name of the model. 176 | * 177 | * @return The name of the model 178 | */ 179 | auto name() const -> const std::string& { return m_name; } 180 | 181 | /** 182 | * @brief Get the status of model creation. 183 | * 184 | * Verify that the model was created successfully 185 | * 186 | * @return The status of model creation 187 | */ 188 | auto getCreationStatus() const -> STATUS { return m_creationStatus; } 189 | 190 | /** 191 | * @brief Get the precision used for model execution. 192 | * 193 | * @return The pricsion used model execution 194 | */ 195 | auto getPrecision() const -> TensorType { return m_precision; } 196 | 197 | protected: 198 | /** 199 | * @brief Set the delegate for model execution. 200 | * 201 | * This method is used by derivatives to allow users to query the currently 202 | * set delegate 203 | * 204 | * @param delegate The delegate to set 205 | */ 206 | void setDelegate(const DELEGATE& delegate) { m_delegate = delegate; } 207 | 208 | /** 209 | * @brief Set the precision for model execution. 210 | * 211 | * This method is used by derivatives to allow users to query the execution 212 | * precision 213 | * 214 | * @param delegate The delegate to set 215 | */ 216 | void setPrecision(const TensorType& precision) { m_precision = precision; } 217 | 218 | /** 219 | * @brief Set the status of model creation. 220 | * 221 | * This method is used by derivatives to allow querying of model creation 222 | * status 223 | * 224 | * @param status The status to set 225 | */ 226 | void setCreationStatus(const STATUS& status) { 227 | if (m_creationStatus == STATUS::SUCCESS) { 228 | m_creationStatus = status; 229 | } 230 | } 231 | 232 | private: 233 | EDGERUNNER_SUPPRESS_C4251 234 | std::string m_name; /**< Name of the model */ 235 | 236 | EDGERUNNER_SUPPRESS_C4251 237 | std::vector> 238 | m_inputs; /**< Input tensors of the model */ 239 | 240 | EDGERUNNER_SUPPRESS_C4251 241 | std::vector> 242 | m_outputs; /**< Output tensors of the model */ 243 | 244 | EDGERUNNER_SUPPRESS_C4251 245 | DELEGATE m_delegate = 246 | DELEGATE::CPU; /**< Delegate used for model execution */ 247 | 248 | EDGERUNNER_SUPPRESS_C4251 249 | TensorType m_precision = 250 | TensorType::FLOAT16; /**< Precision used for model execution */ 251 | 252 | EDGERUNNER_SUPPRESS_C4251 253 | STATUS m_creationStatus = STATUS::SUCCESS; /**< Status of model creation */ 254 | }; 255 | 256 | inline auto Model::getInput(size_t index) const -> std::shared_ptr { 257 | if (index < getNumInputs()) { 258 | return m_inputs[index]; 259 | } 260 | 261 | return nullptr; 262 | } 263 | 264 | inline auto Model::getOutput(size_t index) const -> std::shared_ptr { 265 | if (index < getNumOutputs()) { 266 | return m_outputs[index]; 267 | } 268 | 269 | return nullptr; 270 | } 271 | 272 | } // namespace edge 273 | -------------------------------------------------------------------------------- /include/edgerunner/qnn/backend.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Backend.h 3 | * @brief Definition of the Backend class for handling QNN backends. 4 | * 5 | * This class represents a backend for handling interfacing with QNN backend 6 | * libraries. It provides functionality for loading the backend, creating a 7 | * device, and initializing the backend 8 | * The Backend class is currently restricted to NPU inference support. 9 | */ 10 | 11 | #pragma once 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | /* TODO: move STATUS to dedicated header */ 23 | #include "edgerunner/model.hpp" 24 | 25 | namespace edge::qnn { 26 | 27 | /** 28 | * @class Backend 29 | * @brief Class for handling QNN backends. 30 | */ 31 | class Backend { 32 | public: 33 | /** 34 | * @brief Constructor for the Backend class. 35 | * @param delegate The delegate type for the backend (CPU, GPU, NPU). 36 | * binary. 37 | */ 38 | explicit Backend(DELEGATE delegate); 39 | 40 | Backend(const Backend&) = default; 41 | Backend(Backend&&) = delete; 42 | auto operator=(const Backend&) -> Backend& = delete; 43 | auto operator=(Backend&&) -> Backend& = delete; 44 | 45 | /** 46 | * @brief Destructor for the Backend class. 47 | */ 48 | ~Backend(); 49 | 50 | /** 51 | * @brief Get the status of backend creation 52 | * 53 | * Verify that the backend was created successfully 54 | * 55 | * @return The status of backend creation 56 | */ 57 | auto getCreationStatus() const -> STATUS { return m_creationStatus; } 58 | 59 | /** 60 | * @brief Get the backend handle. 61 | * @return Reference to the backend handle. 62 | */ 63 | auto getHandle() -> auto& { return m_backendHandle; } 64 | /** 65 | * @brief Returns a reference to the device handle. 66 | * 67 | * This function returns a reference to the device handle, allowing access 68 | * to the underlying device handle object. 69 | * 70 | * @return Reference to the device handle. 71 | */ 72 | auto getDeviceHandle() -> auto& { return m_deviceHandle; } 73 | 74 | /** 75 | * @brief Get the QNN interface. 76 | * @return Reference to the QNN interface. 77 | */ 78 | auto getInterface() -> auto& { return m_qnnInterface; } 79 | 80 | /** 81 | * @brief Get the delegate type for the backend. 82 | * @return The delegate type. 83 | */ 84 | auto getDelegate() { return m_delegate; } 85 | 86 | /** 87 | * @brief Static callback function for logging. 88 | * @param fmtStr The format string for the log message. 89 | * @param level The log level. 90 | * @param timestamp The timestamp of the log message. 91 | * @param argp Additional arguments for the log message. 92 | */ 93 | static void logCallback(const char* fmtStr, 94 | QnnLog_Level_t level, 95 | uint64_t timestamp, 96 | va_list argp); 97 | 98 | private: 99 | void setCreationStatus(const STATUS& status) { 100 | if (m_creationStatus == STATUS::SUCCESS) { 101 | m_creationStatus = status; 102 | } 103 | } 104 | 105 | auto loadBackend() -> STATUS; 106 | 107 | auto createLogger() -> STATUS; 108 | 109 | auto initializeBackend() -> STATUS; 110 | 111 | auto createDevice() -> STATUS; 112 | 113 | auto setPowerConfig() -> STATUS; 114 | 115 | auto destroyPowerConfig() const -> STATUS; 116 | 117 | auto validateBackendId(uint32_t backendId) const -> STATUS; 118 | 119 | void* m_backendLibHandle {}; 120 | void* m_systemLibHandle {}; 121 | 122 | Qnn_BackendHandle_t m_backendHandle {}; 123 | QnnBackend_Config_t** m_backendConfig {}; 124 | 125 | Qnn_DeviceHandle_t m_deviceHandle {}; 126 | 127 | Qnn_LogHandle_t m_logHandle {}; 128 | 129 | uint32_t m_powerConfigId {}; 130 | 131 | QnnHtpDevice_PerfInfrastructure_t m_devicePerfInfrastructure {}; 132 | 133 | QNN_INTERFACE_VER_TYPE m_qnnInterface = QNN_INTERFACE_VER_TYPE_INIT; 134 | 135 | DELEGATE m_delegate; 136 | 137 | std::unordered_map m_backendLibrariesByDelegate { 138 | {DELEGATE::CPU, "libQnnCpu.so"}, 139 | {DELEGATE::GPU, "libQnnGpu.so"}, 140 | {DELEGATE::NPU, "libQnnHtp.so"}}; 141 | 142 | uint32_t m_deviceId {}; 143 | QnnHtpDevice_Arch_t m_htpArch {}; 144 | 145 | STATUS m_creationStatus = STATUS::SUCCESS; 146 | }; 147 | 148 | } // namespace edge::qnn 149 | -------------------------------------------------------------------------------- /include/edgerunner/qnn/config.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace edge::qnn { 6 | 7 | /** 8 | * @brief A class template for managing QNN backend configurations 9 | * 10 | * This class template allows for the management of configurations and custom 11 | * configurations. It provides methods for creating new configurations and 12 | * custom configurations, as well as retrieving pointers to QNN API compatible 13 | * configurations arrays. 14 | * 15 | * @tparam ConfigType The type of the configuration 16 | * @tparam CustomConfigType The type of the custom configuration 17 | */ 18 | template 19 | class Config { 20 | public: 21 | /** 22 | * @brief Constructor for Config class 23 | * 24 | * Initializes the default configuration and default custom configuration. 25 | * 26 | * @param defaultConfig The default configuration 27 | * @param defaultCustomConfig The default custom configuration 28 | */ 29 | Config(ConfigType defaultConfig, CustomConfigType defaultCustomConfig) 30 | : m_defaultConfig(defaultConfig) 31 | , m_defaultCustomConfig(defaultCustomConfig) {} 32 | 33 | /** 34 | * @brief Creates a new configuration 35 | * 36 | * Creates a new configuration using the default configuration and adds it 37 | * to the list of configurations. 38 | * 39 | * @return A reference to the newly created configuration 40 | */ 41 | auto createConfig() -> auto& { 42 | m_configs.push_back(m_defaultConfig); 43 | return m_configs.back(); 44 | } 45 | 46 | /** 47 | * @brief Creates a new custom configuration 48 | * 49 | * Creates a new custom configuration using the default custom configuration 50 | * and adds it to the list of custom configurations. The returned custom 51 | * configuration needs to be assigned to a corresponding configuration. 52 | * 53 | * @return A reference to the newly created custom configuration 54 | */ 55 | auto createCustomConfig() -> auto& { 56 | m_customConfigs.push_back(m_defaultCustomConfig); 57 | return m_customConfigs.back(); 58 | } 59 | 60 | /** 61 | * @brief Retrieves pointers to the configurations 62 | * 63 | * Retrieves a null terminated array of pointers to all the configurations. 64 | * 65 | * @return An array of pointers to the configurations 66 | */ 67 | auto getPtr() -> const ConfigType** { 68 | m_configPtrs.clear(); 69 | m_configPtrs.reserve(m_configs.size() + 1); 70 | for (auto& config : m_configs) { 71 | m_configPtrs.push_back(&config); 72 | } 73 | m_configPtrs.push_back(nullptr); 74 | return m_configPtrs.data(); 75 | } 76 | 77 | private: 78 | ConfigType m_defaultConfig; /**< The default configuration */ 79 | CustomConfigType 80 | m_defaultCustomConfig; /**< The default custom configuration */ 81 | std::vector m_configs; /**< List of configurations */ 82 | std::vector 83 | m_customConfigs; /**< List of custom configurations */ 84 | std::vector 85 | m_configPtrs; /**< List of pointers to configurations */ 86 | }; 87 | 88 | } // namespace edge::qnn 89 | -------------------------------------------------------------------------------- /include/edgerunner/qnn/graph.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Graphs.hpp 3 | * @brief Header file for the Graphs class, which manages QNN graphs. 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "edgerunner/model.hpp" 20 | 21 | namespace edge::qnn { 22 | 23 | /** 24 | * @brief Enum representing possible errors that can occur during graph 25 | * operations. 26 | */ 27 | using GraphErrorT = enum GraphError { 28 | GRAPH_NO_ERROR = 0, 29 | GRAPH_TENSOR_ERROR = 1, 30 | GRAPH_PARAMS_ERROR = 2, 31 | GRAPH_NODES_ERROR = 3, 32 | GRAPH_GRAPH_ERROR = 4, 33 | GRAPH_CONTEXT_ERROR = 5, 34 | GRAPH_GENERATION_ERROR = 6, 35 | GRAPH_SETUP_ERROR = 7, 36 | GRAPH_INVALID_ARGUMENT_ERROR = 8, 37 | GRAPH_FILE_ERROR = 9, 38 | GRAPH_MEMORY_ALLOCATE_ERROR = 10, 39 | // Value selected to ensure 32 bits. 40 | GRAPH_UNKNOWN_ERROR = 0x7FFFFFFF 41 | }; 42 | 43 | /** 44 | * @brief Struct representing information about a graph. 45 | */ 46 | using GraphInfoT = struct GraphInfo { 47 | Qnn_GraphHandle_t graph; 48 | char* graphName; 49 | Qnn_Tensor_t* inputTensors; 50 | uint32_t numInputTensors; 51 | Qnn_Tensor_t* outputTensors; 52 | uint32_t numOutputTensors; 53 | }; 54 | 55 | /** 56 | * @brief Struct representing configuration information for a graph. 57 | */ 58 | using GraphConfigInfoT = struct GraphConfigInfo { 59 | char* graphName; 60 | const QnnGraph_Config_t** graphConfigs; 61 | }; 62 | 63 | /** 64 | * @brief Function pointer type for composing graphs. 65 | */ 66 | using ComposeGraphsFnHandleTypeT = GraphErrorT (*)(Qnn_BackendHandle_t, 67 | QNN_INTERFACE_VER_TYPE, 68 | Qnn_ContextHandle_t, 69 | const GraphConfigInfoT**, 70 | const uint32_t, 71 | GraphInfoT***, 72 | uint32_t*, 73 | bool, 74 | QnnLog_Callback_t, 75 | QnnLog_Level_t); 76 | 77 | /** 78 | * @brief Function pointer type for freeing graph information. 79 | */ 80 | using FreeGraphInfoFnHandleTypeT = GraphErrorT (*)(GraphInfoT***, uint32_t); 81 | 82 | /** 83 | * @brief Class for managing QNN graphs. 84 | */ 85 | class Graph { 86 | public: 87 | Graph() = default; 88 | 89 | Graph(const Graph&) = delete; 90 | Graph(Graph&&) = delete; 91 | auto operator=(const Graph&) -> Graph& = delete; 92 | auto operator=(Graph&&) -> Graph& = delete; 93 | 94 | ~Graph(); 95 | 96 | /** 97 | * @brief Get the input tensors for the current graph. 98 | * @return A span of input tensors. 99 | */ 100 | auto getInputs() -> nonstd::span { 101 | return {m_graphInfo->inputTensors, m_graphInfo->numInputTensors}; 102 | } 103 | 104 | /** 105 | * @brief Get the output tensors for the current graph. 106 | * @return A span of output tensors. 107 | */ 108 | auto getOutputs() -> nonstd::span { 109 | return {m_graphInfo->outputTensors, m_graphInfo->numOutputTensors}; 110 | } 111 | 112 | /** 113 | * Loads a model from a shared library located at the specified path. 114 | * 115 | * @param modelPath The path to the shared library containing the model. 116 | * @return STATUS The status of the operation (SUCCESS or ERROR). 117 | */ 118 | auto loadFromSharedLibrary(const std::filesystem::path& modelPath) 119 | -> STATUS; 120 | 121 | /** 122 | * Creates a context for the QNN interface with the specified backend and 123 | * device handles. 124 | * 125 | * Graph keeps a reference to the qnnInterface 126 | * 127 | * @param qnnInterface The handle of the QNN interface. 128 | * @param backendHandle The handle to the QNN backend. 129 | * @param deviceHandle The handle to the QNN device. 130 | * @return STATUS The status of the operation (SUCCESS or ERROR). 131 | */ 132 | auto createContext(QNN_INTERFACE_VER_TYPE& qnnInterface, 133 | Qnn_BackendHandle_t& backendHandle, 134 | Qnn_DeviceHandle_t& deviceHandle) -> STATUS; 135 | 136 | /** 137 | * Composes graphs using the specified QNN backend handle. 138 | * 139 | * @param qnnBackendHandle The handle to the QNN backend. 140 | * @return STATUS The status of the operation (SUCCESS or ERROR). 141 | */ 142 | auto composeGraphs(Qnn_BackendHandle_t& qnnBackendHandle) -> STATUS; 143 | 144 | /** 145 | * @brief Sets the configuration for the composed graphs. 146 | * @param delegate The delegate for the operation. 147 | * @param precision The precision of the operation. 148 | * @return The status of the operation. 149 | */ 150 | auto setGraphConfig(DELEGATE delegate, TensorType precision) -> STATUS; 151 | 152 | /** 153 | * @brief Finalizes the composed graphs. 154 | * @return The status of the operation. 155 | */ 156 | auto finalizeGraphs() -> STATUS; 157 | 158 | /** 159 | * Loads the system library required for loading a cached context from a 160 | * binary buffer 161 | * 162 | * @return STATUS indicating the success or failure of loading the system 163 | * library. 164 | */ 165 | auto loadSystemLibrary() -> STATUS; 166 | 167 | /** 168 | * Loads the context from a binary model buffer. 169 | * 170 | * Graph keeps a reference to the qnnInterface 171 | * 172 | * @param qnnInterface The handle of the QNN interface. 173 | * @param backendHandle The handle to the QNN backend. 174 | * @param deviceHandle The handle to the QNN device. 175 | * @param modelBuffer The binary model buffer containing the model data. 176 | * @return STATUS indicating the success or failure of loading the context 177 | * from the binary model buffer. 178 | */ 179 | auto loadContextFromBinary(QNN_INTERFACE_VER_TYPE& qnnInterface, 180 | Qnn_BackendHandle_t& backendHandle, 181 | Qnn_DeviceHandle_t& deviceHandle, 182 | const nonstd::span& modelBuffer) 183 | -> STATUS; 184 | 185 | /** 186 | * @brief Saves the current context to a binary file. 187 | * @param binaryPath The path to save the context binary file. 188 | * @return The status of the operation. 189 | */ 190 | auto saveContextBinary(const std::filesystem::path& binaryPath) -> STATUS; 191 | 192 | /** 193 | * @brief Retrieves a graph from the current context. 194 | * 195 | * This function retrieves a graph from the current context and returns a 196 | * status code indicating the success or failure of the operation. 197 | * 198 | * @return STATUS - A status code indicating the success or failure of the 199 | * operation. 200 | */ 201 | auto retrieveGraphFromContext() -> STATUS; 202 | 203 | /** 204 | * @brief Executes the graph. 205 | * 206 | * This function executes the graph and returns a status code indicating the 207 | * success or failure of the operation. 208 | * 209 | * @return STATUS - A status code indicating the success or failure of the 210 | * operation. 211 | */ 212 | auto execute() -> STATUS; 213 | 214 | private: 215 | auto setGraph() { m_graphInfo = m_graphsInfo[0] /* NOLINT */; } 216 | 217 | auto setComposeGraphsFnHandle( 218 | ComposeGraphsFnHandleTypeT composeGraphsFnHandle) -> STATUS; 219 | 220 | auto setFreeGraphInfoFnHandle( 221 | FreeGraphInfoFnHandleTypeT freeGraphInfoFnHandle) -> STATUS; 222 | 223 | auto copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t* graphInfoSrc, 224 | GraphInfoT* graphInfoDst) -> bool; 225 | 226 | auto copyGraphsInfo(const QnnSystemContext_GraphInfo_t* graphsInput, 227 | uint32_t numGraphs) -> bool; 228 | 229 | auto copyMetadataToGraphsInfo( 230 | const QnnSystemContext_BinaryInfo_t* binaryInfo) -> bool; 231 | 232 | std::vector m_graphs; 233 | std::vector m_graphPtrs; 234 | 235 | GraphInfoT* m_graphInfo {}; 236 | 237 | GraphInfoT** m_graphsInfo {}; 238 | uint32_t m_graphsCount {}; 239 | 240 | ComposeGraphsFnHandleTypeT m_composeGraphsFnHandle {}; 241 | FreeGraphInfoFnHandleTypeT m_freeGraphInfoFnHandle {}; 242 | 243 | void* m_libModelHandle {}; 244 | 245 | std::vector m_inputTensors; 246 | std::vector m_outputTensors; 247 | 248 | Qnn_ContextHandle_t m_context {}; 249 | 250 | QNN_INTERFACE_VER_TYPE m_qnnInterface {}; 251 | 252 | QNN_SYSTEM_INTERFACE_VER_TYPE m_qnnSystemInterface = 253 | QNN_SYSTEM_INTERFACE_VER_TYPE_INIT; 254 | }; 255 | 256 | } // namespace edge::qnn 257 | -------------------------------------------------------------------------------- /include/edgerunner/qnn/model.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file model.hpp 3 | * @brief Definition of the ModelImpl class, which implements the Model 4 | * interface for QNN models. 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "backend.hpp" 14 | #include "edgerunner/model.hpp" 15 | #include "graph.hpp" 16 | 17 | namespace edge::qnn { 18 | 19 | /** 20 | * @class ModelImpl 21 | * @brief Implementation of the Model interface for QNN models. 22 | */ 23 | class ModelImpl final : public Model { 24 | public: 25 | /** 26 | * @brief Constructor for ModelImpl. 27 | * @param modelPath The path to the QNN model file. 28 | */ 29 | explicit ModelImpl(const std::filesystem::path& modelPath); 30 | 31 | /** 32 | * @brief Constructor for ModelImpl. 33 | * @param modelPath The path to the QNN model file. 34 | */ 35 | explicit ModelImpl(const nonstd::span& modelBuffer); 36 | 37 | ModelImpl(const ModelImpl&) = delete; 38 | ModelImpl(ModelImpl&&) = delete; 39 | auto operator=(const ModelImpl&) -> ModelImpl& = delete; 40 | auto operator=(ModelImpl&&) -> ModelImpl& = delete; 41 | 42 | ~ModelImpl() final = default; 43 | 44 | /** 45 | * @brief Loads the QNN model from the specified path. 46 | * 47 | * This function loads a QNN model from the specified file path. 48 | * The model file should be in the QNN context binary format. 49 | * 50 | * @param modelPath The path to the QNN model file. 51 | * @return STATUS Returns a status indicating whether the model was 52 | * successfully loaded or not. 53 | */ 54 | auto loadModel(const std::filesystem::path& modelPath) -> STATUS final; 55 | 56 | /** 57 | * @brief Loads the QNN model from the specified buffer. 58 | * 59 | * This function loads a QNN model from the provided buffer. The 60 | * buffer should contain the raw data of the QNN model. 61 | * 62 | * @param modelBuffer The buffer containing the QNN model. 63 | * @return STATUS Returns a status indicating whether the model was 64 | * successfully loaded or not. 65 | */ 66 | auto loadModel(const nonstd::span& modelBuffer) -> STATUS final; 67 | 68 | /** 69 | * @brief Applies a delegate to the QNN backend. 70 | * @param delegate The delegate to apply. 71 | * @return The status of the operation. 72 | */ 73 | auto applyDelegate(const DELEGATE& delegate) -> STATUS final; 74 | 75 | /** 76 | * @brief Executes the QNN model. 77 | * @return The status of the operation. 78 | */ 79 | auto execute() -> STATUS final; 80 | 81 | private: 82 | /** 83 | * Loads a QNN model from a serialized binary buffer. 84 | * 85 | * This function takes a nonstd::span modelBuffer as input and 86 | * attempts to load a model from the binary data contained within it. 87 | * 88 | * @param modelBuffer A nonstd::span containing the binary data of 89 | * the model to be loaded. 90 | * 91 | * @return STATUS The status of the operation (SUCCESS or FAIL). 92 | */ 93 | auto loadFromContextBinary(const nonstd::span& modelBuffer) 94 | -> STATUS; 95 | 96 | /** 97 | * @brief Composes the graphs for the loaded QNN model. 98 | * 99 | * This function composes the graphs for the loaded QNN model based on the 100 | * model configuration. 101 | * 102 | * @return STATUS The status of the operation (SUCCESS or FAIL). 103 | */ 104 | auto composeGraphs() -> STATUS; 105 | 106 | /** 107 | * Detects graph operation precision 108 | * 109 | * This function queries the graph to detect what precision the graph should 110 | * be executed in. This is required in particular for QNN delegate 111 | */ 112 | auto detectPrecision() -> TensorType; 113 | 114 | /** 115 | * @brief Allocates input and output tensors 116 | * 117 | * This function allocates input and output tensors. Should be used before 118 | * executing. 119 | * 120 | * @return STATUS The status of the operation (SUCCESS or FAIL). 121 | */ 122 | auto allocate() -> STATUS; 123 | 124 | static auto initializeBackend() -> STATUS { 125 | if (m_backend == nullptr) { 126 | m_backend = std::make_unique(DELEGATE::NPU); 127 | } 128 | 129 | return m_backend->getCreationStatus(); 130 | } 131 | 132 | std::filesystem::path m_modelPath; ///< The path to the QNN model file 133 | 134 | static std::unique_ptr m_backend; 135 | 136 | Graph m_graph; 137 | 138 | bool m_loadCachedBinary {}; 139 | }; 140 | 141 | } // namespace edge::qnn 142 | -------------------------------------------------------------------------------- /include/edgerunner/qnn/tensor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file tensor.hpp 3 | * @brief Definition of the TensorImpl class, a concrete implementation of the 4 | * Tensor interface. 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "edgerunner/tensor.hpp" 15 | 16 | namespace edge::qnn { 17 | 18 | /** 19 | * @class TensorImpl 20 | * @brief Concrete implementation of the Tensor interface for QNN. 21 | */ 22 | class EDGERUNNER_EXPORT TensorImpl final : public Tensor { 23 | public: 24 | /** 25 | * @brief Constructor for TensorImpl. 26 | * @param qnnTensor Pointer to the QnnTensor object. 27 | * @param allocate Whether to allocate underlying memory for the Tensor. 28 | */ 29 | explicit TensorImpl(Qnn_Tensor_t* qnnTensor = nullptr, 30 | bool allocate = true); 31 | 32 | TensorImpl(const TensorImpl& other) = default; 33 | TensorImpl(TensorImpl&&) = default; 34 | auto operator=(const TensorImpl&) -> TensorImpl& = default; 35 | auto operator=(TensorImpl&&) -> TensorImpl& = default; 36 | 37 | ~TensorImpl() final = default; 38 | 39 | /** 40 | * @brief Get the name of the tensor. 41 | * @return The name of the tensor as a string. 42 | */ 43 | auto getName() const -> std::string final; 44 | 45 | /** 46 | * @brief Get the type of the tensor. 47 | * @return The type of the tensor as a TensorType enum. 48 | */ 49 | auto getType() const -> TensorType final; 50 | 51 | /** 52 | * @brief Get the dimensions of the tensor. 53 | * @return A vector of size_t representing the dimensions of the tensor. 54 | */ 55 | auto getDimensions() const -> std::vector final; 56 | 57 | /** 58 | * @brief Get the total size of the tensor. 59 | * @return The total size of the tensor in number of elements. 60 | */ 61 | auto getSize() const -> size_t final; 62 | 63 | protected: 64 | /** 65 | * @brief Get a pointer to the data of the tensor. 66 | * @return A void pointer to the data of the tensor. 67 | */ 68 | auto getDataPtr() -> void* final; 69 | 70 | /** 71 | * @brief Get the number of bytes occupied by the tensor data. 72 | * @return The number of bytes occupied by the tensor data. 73 | */ 74 | auto getNumBytes() -> size_t final; 75 | 76 | private: 77 | /** 78 | * @brief Allocate underlying memory for the Tensor 79 | */ 80 | void allocate(); 81 | 82 | EDGERUNNER_SUPPRESS_C4251 83 | Qnn_Tensor_t* m_tensor; ///< The underlying QNN tensor 84 | 85 | EDGERUNNER_SUPPRESS_C4251 86 | std::vector 87 | m_data; ///< The underlying data backing the QNN tensor 88 | }; 89 | 90 | } // namespace edge::qnn 91 | -------------------------------------------------------------------------------- /include/edgerunner/qnn/tensorOps.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace edge::qnn { 12 | 13 | using TensorVariant = std::variant, 14 | std::reference_wrapper>; 15 | 16 | using ConstTensorVariant = 17 | std::variant, 18 | std::reference_wrapper>; 19 | 20 | auto getTensorTypeVariant(Qnn_Tensor_t& tensor) -> TensorVariant; 21 | 22 | auto getTensorTypeVariant(const Qnn_Tensor_t& tensor) -> ConstTensorVariant; 23 | 24 | using TensorMemoryVariant = 25 | std::variant>; 26 | 27 | auto getTensorMemoryPtr(const Qnn_Tensor_t& tensor) -> void*; 28 | 29 | auto getQnnTensorId(const Qnn_Tensor_t& tensor) -> uint32_t; 30 | 31 | auto getQnnTensorName(const Qnn_Tensor_t& tensor) -> const char*; 32 | 33 | auto getQnnTensorType(const Qnn_Tensor_t& tensor) -> Qnn_TensorType_t; 34 | 35 | auto getQnnTensorDataFormat(const Qnn_Tensor_t& tensor) 36 | -> Qnn_TensorDataFormat_t; 37 | 38 | auto getQnnTensorDataType(const Qnn_Tensor_t& tensor) -> Qnn_DataType_t; 39 | 40 | auto getQnnTensorQuantParams(const Qnn_Tensor_t& tensor) 41 | -> Qnn_QuantizeParams_t; 42 | 43 | auto getQnnTensorRank(const Qnn_Tensor_t& tensor) -> uint32_t; 44 | 45 | auto getQnnTensorDimensions(const Qnn_Tensor_t& tensor) -> uint32_t*; 46 | 47 | auto getQnnTensorIsDynamicDimensions(const Qnn_Tensor_t& tensor) -> uint8_t*; 48 | 49 | auto getQnnTensorSparseParams(const Qnn_Tensor_t& tensor) -> Qnn_SparseParams_t; 50 | 51 | auto getQnnTensorMemType(const Qnn_Tensor_t& tensor) -> Qnn_TensorMemType_t; 52 | 53 | auto getQnnTensorClientBuf(const Qnn_Tensor_t& tensor) -> Qnn_ClientBuffer_t; 54 | 55 | auto getQnnTensorMemHandle(const Qnn_Tensor_t& tensor) -> Qnn_MemHandle_t; 56 | 57 | void setQnnTensorId(Qnn_Tensor_t& tensor, uint32_t tensorId); 58 | 59 | void setQnnTensorName(Qnn_Tensor_t& tensor, const char* name); 60 | 61 | void setQnnTensorType(Qnn_Tensor_t& tensor, Qnn_TensorType_t type); 62 | 63 | void setQnnTensorDataFormat(Qnn_Tensor_t& tensor, 64 | Qnn_TensorDataFormat_t dataFormat); 65 | 66 | void setQnnTensorDataType(Qnn_Tensor_t& tensor, Qnn_DataType_t dataType); 67 | 68 | void setQnnTensorQuantParams(Qnn_Tensor_t& tensor, 69 | Qnn_QuantizeParams_t quantizeParams); 70 | 71 | void setQnnTensorRank(Qnn_Tensor_t& tensor, uint32_t rank); 72 | 73 | void setQnnTensorDimensions(Qnn_Tensor_t& tensor, 74 | uint32_t* const dimensions /* NOLINT */); 75 | 76 | void setQnnTensorIsDynamicDimensions(Qnn_Tensor_t& tensor, 77 | uint8_t* isDynamicDimensions); 78 | 79 | void setQnnTensorSparseParams(Qnn_Tensor_t& tensor, 80 | Qnn_SparseParams_t sparseParams); 81 | 82 | void setQnnTensorMemType(Qnn_Tensor_t& tensor, Qnn_TensorMemType_t memType); 83 | 84 | void setQnnTensorClientBuf(Qnn_Tensor_t& tensor, Qnn_ClientBuffer_t clientBuf); 85 | 86 | void setQnnTensorMemHandle(Qnn_Tensor_t& tensor, Qnn_MemHandle_t memHandle); 87 | 88 | void freeQnnTensor(Qnn_Tensor_t& tensor); 89 | 90 | auto deepCopyQnnTensorInfo(Qnn_Tensor_t& dst, const Qnn_Tensor_t& src) -> bool; 91 | 92 | auto createTensorsFromInfo(const Qnn_Tensor_t* tensorsInfoSrc, 93 | uint32_t tensorsCount) -> std::vector; 94 | 95 | } // namespace edge::qnn 96 | -------------------------------------------------------------------------------- /include/edgerunner/tensor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file tensor.hpp 3 | * @brief Definition of the Tensor class, an opaque handler for model input and 4 | * output data 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace edge { 16 | 17 | /** 18 | * @brief Enum class representing different types of tensors 19 | * 20 | * This enum class defines types of input and output Tensors for a Model. 21 | * Each type corresponds to a specific data type that the tensor can 22 | * hold. 23 | * 24 | * Possible values: 25 | * - UNSUPPORTED: Represents an unsupported tensor type 26 | * - NOTYPE: Represents a tensor with no specific data type 27 | * - FLOAT32: Represents a tensor with 32-bit floating point data type 28 | * - FLOAT16: Represents a tensor with 16-bit floating point data type 29 | * - INT32: Represents a tensor with 32-bit integer data type 30 | * - UINT32: Represents a tensor with unsigned 32-bit integer data type 31 | * - INT8: Represents a tensor with 8-bit integer data type 32 | * - UINT8: Represents a tensor with unsigned 8-bit integer data type 33 | * 34 | * @note This enum class is used to specify the data type of a Tensor 35 | */ 36 | enum class TensorType : uint8_t { 37 | UNSUPPORTED, 38 | NOTYPE, 39 | FLOAT16, 40 | FLOAT32, 41 | INT8, 42 | INT16, 43 | INT32, 44 | UINT8, 45 | UINT16, 46 | UINT32, 47 | }; 48 | 49 | /** 50 | * @brief A base class for representing a tensor object 51 | * 52 | * This class defines the interface for a tensor object, providing methods to 53 | * retrieve information about the tensor such as its name, type, dimensions, 54 | * size, and data. 55 | * 56 | * @note This class is meant to be used as a base class and should be subclassed 57 | * to provide concrete implementations. 58 | */ 59 | class Tensor { 60 | public: 61 | /** 62 | * @brief Default constructor for Tensor 63 | */ 64 | Tensor() = default; 65 | 66 | /** 67 | * @brief Copy constructor for Tensor 68 | */ 69 | Tensor(const Tensor&) = default; 70 | 71 | /** 72 | * @brief Move constructor for Tensor 73 | */ 74 | Tensor(Tensor&&) = default; 75 | 76 | /** 77 | * @brief Copy assignment operator for Tensor 78 | */ 79 | auto operator=(const Tensor&) -> Tensor& = default; 80 | 81 | /** 82 | * @brief Move assignment operator for Tensor 83 | */ 84 | auto operator=(Tensor&&) -> Tensor& = default; 85 | 86 | /** 87 | * @brief Virtual destructor for Tensor 88 | */ 89 | virtual ~Tensor() = default; 90 | 91 | /** 92 | * @brief Get the name of the tensor 93 | * 94 | * @return The name of the tensor as a string 95 | */ 96 | virtual auto getName() const -> std::string = 0; 97 | 98 | /** 99 | * @brief Get the type of the tensor 100 | * 101 | * @return The type of the tensor as a TensorType enum 102 | */ 103 | virtual auto getType() const -> TensorType = 0; 104 | 105 | /** 106 | * @brief Get the dimensions of the tensor 107 | * 108 | * @return A vector of size_t representing the dimensions of the tensor 109 | */ 110 | virtual auto getDimensions() const -> std::vector = 0; 111 | 112 | /** 113 | * @brief Get the total size of the tensor 114 | * 115 | * @return The total size of the tensor as a size_t 116 | */ 117 | virtual auto getSize() const -> size_t = 0; 118 | 119 | /** 120 | * @brief Get a non-owning span of the tensor data casted to type T 121 | * 122 | * Use getType() to ensure data is accessed correctly 123 | * 124 | * @tparam T The type to cast the tensor data to 125 | * @return A non-owning span of type T representing the tensor data 126 | */ 127 | template 128 | auto getTensorAs() -> nonstd::span; 129 | 130 | protected: 131 | /** 132 | * @brief Get a pointer to the raw data of the tensor 133 | * 134 | * @return A void pointer to the raw data of the tensor 135 | */ 136 | virtual auto getDataPtr() -> void* = 0; 137 | 138 | /** 139 | * @brief Get the total number of bytes in the tensor data 140 | * 141 | * @return The total number of bytes in the tensor data as a size_t 142 | */ 143 | virtual auto getNumBytes() -> size_t = 0; 144 | }; 145 | 146 | template 147 | auto Tensor::getTensorAs() -> nonstd::span { 148 | auto* dataPtr = getDataPtr(); 149 | 150 | if (dataPtr == nullptr) { 151 | return {}; 152 | } 153 | 154 | const auto numBytes = getNumBytes(); 155 | const auto numElementBytes = sizeof(T); 156 | 157 | const auto numElements = numBytes / numElementBytes; 158 | return {static_cast(dataPtr), numElements}; 159 | } 160 | 161 | } // namespace edge 162 | -------------------------------------------------------------------------------- /include/edgerunner/tflite/model.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file model.hpp 3 | * @brief Definition of the ModelImpl class, which implements the Model 4 | * interface for TensorFlow Lite models. 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "edgerunner/model.hpp" 16 | 17 | namespace edge::tflite { 18 | 19 | /** 20 | * @class ModelImpl 21 | * @brief Implementation of the Model interface for TensorFlow Lite models. 22 | */ 23 | class ModelImpl final : public Model { 24 | public: 25 | /** 26 | * @brief Constructor for ModelImpl. 27 | * @param modelPath The path to the TensorFlow Lite model file. 28 | */ 29 | explicit ModelImpl(const std::filesystem::path& modelPath); 30 | 31 | /** 32 | * @brief Constructor for ModelImpl. 33 | * @param modelPath The path to the TensorFlow Lite model file. 34 | */ 35 | explicit ModelImpl(const nonstd::span& modelBuffer); 36 | 37 | ModelImpl(const ModelImpl&) = delete; 38 | ModelImpl(ModelImpl&&) = delete; 39 | auto operator=(const ModelImpl&) -> ModelImpl& = delete; 40 | auto operator=(ModelImpl&&) -> ModelImpl& = delete; 41 | 42 | /** 43 | * @brief Destructor for ModelImpl. 44 | */ 45 | ~ModelImpl() final; 46 | 47 | /** 48 | * @brief Loads the TensorFlow Lite model from the specified path. 49 | * 50 | * This function loads a TensorFlow Lite model from the specified file path. 51 | * The model file should be in the TensorFlow Lite format. 52 | * 53 | * @param modelPath The path to the TensorFlow Lite model file. 54 | * @return STATUS Returns a status indicating whether the model was 55 | * successfully loaded or not. 56 | */ 57 | auto loadModel(const std::filesystem::path& modelPath) -> STATUS final; 58 | 59 | /** 60 | * @brief Loads the TensorFlow Lite model from the specified buffer. 61 | * 62 | * This function loads a TensorFlow Lite model from the provided buffer. The 63 | * buffer should contain the raw data of the TensorFlow Lite model. 64 | * 65 | * @param modelBuffer The buffer containing the TensorFlow Lite model. 66 | * @return STATUS Returns a status indicating whether the model was 67 | * successfully loaded or not. 68 | */ 69 | auto loadModel(const nonstd::span& modelBuffer) -> STATUS final; 70 | 71 | /** 72 | * @brief Applies a delegate to the TensorFlow Lite interpreter. 73 | * @param delegate The delegate to apply. 74 | * @return The status of the operation. 75 | */ 76 | auto applyDelegate(const DELEGATE& delegate) -> STATUS final; 77 | 78 | /** 79 | * @brief Executes the TensorFlow Lite model. 80 | * @return The status of the operation. 81 | */ 82 | auto execute() -> STATUS final; 83 | 84 | private: 85 | /** 86 | * Creates a new interpreter object. 87 | * 88 | * This function initializes a new interpreter object and sets up any 89 | * necessary resources. 90 | * 91 | * @return The status of the operation. 92 | */ 93 | auto createInterpreter() -> STATUS; 94 | 95 | /** 96 | * Allocates memory for the interpreter. 97 | * 98 | * This function allocates memory for the interpreter, including input and 99 | * output tensors 100 | * 101 | * @return The status of the operation. 102 | */ 103 | auto allocate() -> STATUS; 104 | 105 | /** 106 | * Deletes the delegate object. 107 | * 108 | * This function deletes the delegate object and frees up any resources it 109 | * was using. 110 | */ 111 | void deleteDelegate(); 112 | 113 | /** 114 | * Detects graph operation precision 115 | * 116 | * This function queries the graph to detect what precision the graph should 117 | * be executed in. This is required in particular for QNN delegate 118 | */ 119 | auto detectPrecision() -> TensorType; 120 | 121 | std::filesystem::path 122 | m_modelPath; ///< The path to the TensorFlow Lite model file 123 | 124 | std::unique_ptr<::tflite::FlatBufferModel> 125 | m_modelBuffer; ///< The TensorFlow Lite model buffer 126 | 127 | std::unique_ptr<::tflite::Interpreter> 128 | m_interpreter; ///< The TensorFlow Lite interpreter 129 | 130 | TfLiteDelegate* m_delegate = nullptr; ///< The TensorFlow Lite delegate 131 | }; 132 | 133 | } // namespace edge::tflite 134 | -------------------------------------------------------------------------------- /include/edgerunner/tflite/tensor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file tensor.hpp 3 | * @brief Definition of the TensorImpl class, a concrete implementation of the 4 | * Tensor interface. 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "edgerunner/tensor.hpp" 15 | 16 | namespace edge::tflite { 17 | 18 | /** 19 | * @class TensorImpl 20 | * @brief Concrete implementation of the Tensor interface for TensorFlow Lite. 21 | */ 22 | class EDGERUNNER_EXPORT TensorImpl final : public Tensor { 23 | public: 24 | /** 25 | * @brief Constructor for TensorImpl. 26 | * @param tfLiteTensor Pointer to the TfLiteTensor object. 27 | */ 28 | explicit TensorImpl(TfLiteTensor* tfLiteTensor = nullptr) 29 | : m_tensor(tfLiteTensor) {} 30 | 31 | TensorImpl(const TensorImpl& other) = default; 32 | TensorImpl(TensorImpl&&) = default; 33 | auto operator=(const TensorImpl&) -> TensorImpl& = default; 34 | auto operator=(TensorImpl&&) -> TensorImpl& = default; 35 | 36 | ~TensorImpl() final = default; 37 | 38 | /** 39 | * @brief Get the name of the tensor. 40 | * @return The name of the tensor as a string. 41 | */ 42 | auto getName() const -> std::string final; 43 | 44 | /** 45 | * @brief Get the type of the tensor. 46 | * @return The type of the tensor as a TensorType enum. 47 | */ 48 | auto getType() const -> TensorType final; 49 | 50 | /** 51 | * @brief Get the dimensions of the tensor. 52 | * @return A vector of size_t representing the dimensions of the tensor. 53 | */ 54 | auto getDimensions() const -> std::vector final; 55 | 56 | /** 57 | * @brief Get the total size of the tensor. 58 | * @return The total size of the tensor in number of elements. 59 | */ 60 | auto getSize() const -> size_t final; 61 | 62 | protected: 63 | /** 64 | * @brief Get a pointer to the data of the tensor. 65 | * @return A void pointer to the data of the tensor. 66 | */ 67 | auto getDataPtr() -> void* final; 68 | 69 | /** 70 | * @brief Get the number of bytes occupied by the tensor data. 71 | * @return The number of bytes occupied by the tensor data. 72 | */ 73 | auto getNumBytes() -> size_t final; 74 | 75 | private: 76 | EDGERUNNER_SUPPRESS_C4251 77 | TfLiteTensor* m_tensor; ///< The underlying TFlite tensor 78 | }; 79 | 80 | } // namespace edge::tflite 81 | -------------------------------------------------------------------------------- /models/qnn/mobilenet_v3_large_quantized.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/models/qnn/mobilenet_v3_large_quantized.so -------------------------------------------------------------------------------- /models/qnn/mobilenet_v3_small.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/models/qnn/mobilenet_v3_small.bin -------------------------------------------------------------------------------- /models/qnn/mobilenet_v3_small.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/models/qnn/mobilenet_v3_small.so -------------------------------------------------------------------------------- /models/tflite/mobilenet_v3_large_quantized.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/models/tflite/mobilenet_v3_large_quantized.tflite -------------------------------------------------------------------------------- /models/tflite/mobilenet_v3_small.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neuralize-ai/edgerunner/61978814d0d627ad44d5c7bcf9b205a43f5d2483/models/tflite/mobilenet_v3_small.tflite -------------------------------------------------------------------------------- /profiles/android: -------------------------------------------------------------------------------- 1 | include(default) 2 | 3 | [settings] 4 | os=Android 5 | os.api_level=24 6 | arch=armv8 7 | compiler=clang 8 | compiler.version=17 9 | compiler.libcxx=c++_static 10 | compiler.cppstd=17 11 | 12 | [tool_requires] 13 | android-ndk/r26d 14 | -------------------------------------------------------------------------------- /scripts/run_with_adb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" 4 | ROOT_DIR="${SCRIPT_DIR}/.." 5 | 6 | usage="Usage: $(basename "$0") [-h,--help] [-e,--exe ] [-b,--build-dir ] -- script to execute edgerunner executables on Android devices" 7 | 8 | EXE="" 9 | BUILD_DIR="" 10 | 11 | while [[ $# -gt 0 ]]; do 12 | case $1 in 13 | -h | --help) 14 | echo "$usage" 15 | exit 16 | ;; 17 | -e | --exe) 18 | shift 19 | if [[ -z "$1" || "$1" == --* ]]; then 20 | echo "Error: -e,--exe requires an argument." 21 | echo "$usage" 22 | exit 1 23 | fi 24 | EXE="$1" 25 | shift 26 | ;; 27 | -b | --build-dir) 28 | shift 29 | if [[ -z "$1" || "$1" == --* ]]; then 30 | echo "Error: -b,--build-dir requires an argument." 31 | echo "$usage" 32 | exit 1 33 | fi 34 | BUILD_DIR="$1" 35 | shift 36 | ;; 37 | *) 38 | echo "Unsupported option: $1" 39 | exit 1 40 | ;; 41 | esac 42 | done 43 | 44 | if [[ -z "$EXE" ]]; then 45 | echo "Error: -e,--exe is a mandatory argument." 46 | echo "$usage" 47 | exit 1 48 | fi 49 | 50 | if [[ -z "$BUILD_DIR" ]]; then 51 | echo "Error: -b,--build-dir is a mandatory argument." 52 | echo "$usage" 53 | exit 1 54 | fi 55 | 56 | MODELS_DIR="${ROOT_DIR}/models" 57 | IMAGES_DIR="${ROOT_DIR}/images" 58 | 59 | APP_ROOT_DIR="/data/local/tmp/edgerunner" 60 | APP_BUILD_DIR="${APP_ROOT_DIR}/build" 61 | 62 | adb push --sync "${BUILD_DIR}" "${APP_BUILD_DIR}" 63 | adb push --sync "${MODELS_DIR}" "${APP_BUILD_DIR}/models" 64 | adb push --sync "${IMAGES_DIR}" "${APP_BUILD_DIR}/images" 65 | 66 | adb shell "cd ${APP_BUILD_DIR} && LD_LIBRARY_PATH=. ADSP_LIBRARY_PATH=. ./${EXE}" 67 | 68 | adb shell "rm -rf ${APP_ROOT_DIR}" 69 | -------------------------------------------------------------------------------- /source/edgerunner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "edgerunner/edgerunner.hpp" 7 | 8 | #include 9 | 10 | #include "edgerunner/model.hpp" 11 | 12 | #ifdef EDGERUNNER_TFLITE 13 | # include "edgerunner/tflite/model.hpp" 14 | #endif 15 | 16 | #ifdef EDGERUNNER_QNN 17 | # include "edgerunner/qnn/model.hpp" 18 | #endif 19 | 20 | namespace edge { 21 | 22 | auto createModel(const std::filesystem::path& modelPath) 23 | -> std::unique_ptr { 24 | const auto modelExtension = modelPath.extension().string().substr(1); 25 | 26 | std::unique_ptr model; 27 | 28 | #ifdef EDGERUNNER_TFLITE 29 | if (modelExtension == "tflite") { 30 | model = std::make_unique(modelPath); 31 | } 32 | #endif 33 | 34 | #ifdef EDGERUNNER_QNN 35 | if (modelExtension == "so" || modelExtension == "bin") { 36 | model = std::make_unique(modelPath); 37 | } 38 | #endif 39 | 40 | if (model != nullptr && model->getCreationStatus() == STATUS::SUCCESS) { 41 | return model; 42 | } 43 | 44 | /* unsupported or failed */ 45 | return nullptr; 46 | } 47 | 48 | auto createModel(const nonstd::span& modelBuffer, 49 | const std::string& modelExtension) -> std::unique_ptr { 50 | std::unique_ptr model; 51 | 52 | #ifdef EDGERUNNER_TFLITE 53 | if (modelExtension == "tflite") { 54 | model = std::make_unique(modelBuffer); 55 | } 56 | #endif 57 | 58 | #ifdef EDGERUNNER_QNN 59 | if (modelExtension == "so" || modelExtension == "bin") { 60 | model = std::make_unique(modelBuffer); 61 | } 62 | #endif 63 | 64 | if (model != nullptr && model->getCreationStatus() == STATUS::SUCCESS) { 65 | return model; 66 | } 67 | 68 | /* unsupported or failed */ 69 | return nullptr; 70 | } 71 | 72 | } // namespace edge 73 | -------------------------------------------------------------------------------- /source/qnn/backend.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "edgerunner/qnn/backend.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "edgerunner/model.hpp" 24 | #include "edgerunner/qnn/config.hpp" 25 | 26 | namespace edge::qnn { 27 | 28 | using QnnInterfaceGetProvidersFnT = 29 | Qnn_ErrorHandle_t (*)(const QnnInterface_t***, uint32_t*); 30 | 31 | Backend::Backend(const DELEGATE delegate) 32 | : m_delegate(delegate) { 33 | setCreationStatus(loadBackend()); 34 | setCreationStatus(createLogger()); 35 | setCreationStatus(initializeBackend()); 36 | setCreationStatus(createDevice()); 37 | if (delegate == DELEGATE::NPU) { 38 | setCreationStatus(setPowerConfig()); 39 | } 40 | } 41 | 42 | Backend::~Backend() { 43 | destroyPowerConfig(); 44 | 45 | if (m_deviceHandle != nullptr && m_qnnInterface.deviceFree != nullptr) { 46 | m_qnnInterface.deviceFree(m_deviceHandle); 47 | } 48 | 49 | if (m_backendHandle != nullptr && m_qnnInterface.backendFree != nullptr) { 50 | m_qnnInterface.backendFree(m_backendHandle); 51 | } 52 | 53 | dlclose(m_backendLibHandle); 54 | } 55 | 56 | auto Backend::loadBackend() -> STATUS { 57 | m_backendLibHandle = 58 | dlopen(m_backendLibrariesByDelegate.at(m_delegate).c_str(), 59 | RTLD_NOW | RTLD_LOCAL); 60 | 61 | if (nullptr == m_backendLibHandle) { 62 | return STATUS::FAIL; 63 | } 64 | 65 | auto getInterfaceProviders = 66 | reinterpret_cast /* NOLINT */ ( 67 | dlsym(m_backendLibHandle, "QnnInterface_getProviders")); 68 | 69 | if (nullptr == getInterfaceProviders) { 70 | dlclose(m_backendLibHandle); 71 | return STATUS::FAIL; 72 | } 73 | 74 | QnnInterface_t** interfaceProvidersPtr {}; 75 | uint32_t numProviders {}; 76 | 77 | if (QNN_SUCCESS 78 | != getInterfaceProviders( 79 | const_cast(&interfaceProvidersPtr), 80 | &numProviders)) 81 | { 82 | dlclose(m_backendLibHandle); 83 | return STATUS::FAIL; 84 | } 85 | if (nullptr == interfaceProvidersPtr || 0 == numProviders) { 86 | dlclose(m_backendLibHandle); 87 | return STATUS::FAIL; 88 | } 89 | 90 | const nonstd::span interfaceProviders { 91 | interfaceProvidersPtr, numProviders}; 92 | 93 | uint32_t backendId = 0; 94 | for (const auto& interfaceProvider : interfaceProviders) { 95 | const auto& coreApiVersion = 96 | interfaceProvider->apiVersion.coreApiVersion; 97 | if (QNN_API_VERSION_MAJOR == coreApiVersion.major 98 | && QNN_API_VERSION_MINOR <= coreApiVersion.minor) 99 | { 100 | m_qnnInterface = interfaceProvider->QNN_INTERFACE_VER_NAME; 101 | backendId = interfaceProvider->backendId; 102 | } else { 103 | dlclose(m_backendLibHandle); 104 | return STATUS::FAIL; 105 | } 106 | } 107 | 108 | return validateBackendId(backendId); 109 | } 110 | 111 | void Backend::logCallback(const char* fmtStr, 112 | QnnLog_Level_t level, 113 | uint64_t timestamp, 114 | va_list argp) { 115 | std::string levelStr; 116 | 117 | switch (level) { 118 | case QNN_LOG_LEVEL_ERROR: 119 | levelStr = "ERROR"; 120 | break; 121 | case QNN_LOG_LEVEL_WARN: 122 | levelStr = "WARNING"; 123 | break; 124 | case QNN_LOG_LEVEL_INFO: 125 | levelStr = "INFO"; 126 | break; 127 | case QNN_LOG_LEVEL_DEBUG: 128 | levelStr = "DEBUG"; 129 | break; 130 | case QNN_LOG_LEVEL_VERBOSE: 131 | levelStr = "VERBOSE"; 132 | break; 133 | case QNN_LOG_LEVEL_MAX: 134 | levelStr = "UNKNOWN"; 135 | break; 136 | } 137 | 138 | /* NOLINTBEGIN */ 139 | std::fprintf(stderr, "%8.1lums [%-7s] ", timestamp, levelStr.c_str()); 140 | std::vfprintf(stderr, fmtStr, argp); 141 | std::fprintf(stderr, "\n"); 142 | /* NOLINTEND */ 143 | } 144 | 145 | auto Backend::createLogger() -> STATUS { 146 | if (QNN_SUCCESS 147 | != m_qnnInterface.logCreate( 148 | logCallback, QNN_LOG_LEVEL_ERROR, &m_logHandle)) 149 | { 150 | return STATUS::FAIL; 151 | } 152 | 153 | return STATUS::SUCCESS; 154 | } 155 | 156 | auto Backend::initializeBackend() -> STATUS { 157 | const auto status = m_qnnInterface.backendCreate( 158 | m_logHandle, 159 | const_cast(m_backendConfig), 160 | &m_backendHandle); 161 | if (QNN_BACKEND_NO_ERROR != status) { 162 | return STATUS::FAIL; 163 | } 164 | 165 | return STATUS::SUCCESS; 166 | } 167 | 168 | auto Backend::createDevice() -> STATUS { 169 | const auto& propertyHasCapability = m_qnnInterface.propertyHasCapability; 170 | if (nullptr != propertyHasCapability) { 171 | auto status = propertyHasCapability(QNN_PROPERTY_GROUP_DEVICE); 172 | if (QNN_PROPERTY_ERROR_UNKNOWN_KEY == status) { 173 | return STATUS::FAIL; 174 | } 175 | } 176 | 177 | Config deviceConfig {QNN_DEVICE_CONFIG_INIT, {}}; 178 | 179 | if (nullptr != m_qnnInterface.deviceCreate) { 180 | auto qnnStatus = m_qnnInterface.deviceCreate( 181 | m_logHandle, deviceConfig.getPtr(), &m_deviceHandle); 182 | if (QNN_SUCCESS != qnnStatus) { 183 | return STATUS::FAIL; 184 | } 185 | } else { 186 | return STATUS::FAIL; 187 | } 188 | 189 | return STATUS::SUCCESS; 190 | } 191 | 192 | auto Backend::setPowerConfig() -> STATUS { 193 | if (m_delegate != DELEGATE::NPU) { 194 | return STATUS::FAIL; 195 | } 196 | 197 | QnnDevice_Infrastructure_t deviceInfrastructure = nullptr; 198 | if (QNN_SUCCESS 199 | != m_qnnInterface.deviceGetInfrastructure(&deviceInfrastructure)) 200 | { 201 | return STATUS::FAIL; 202 | } 203 | 204 | const auto* htpDeviceInfraStructure = 205 | static_cast(deviceInfrastructure); 206 | m_devicePerfInfrastructure = htpDeviceInfraStructure->perfInfra; // NOLINT 207 | ; 208 | if (QNN_SUCCESS 209 | != m_devicePerfInfrastructure.createPowerConfigId( 210 | 0, 0, &m_powerConfigId)) 211 | { 212 | return STATUS::FAIL; 213 | } 214 | 215 | QnnHtpPerfInfrastructure_PowerConfig_t powerConfig = 216 | QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIG_INIT; 217 | 218 | powerConfig.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; 219 | 220 | auto& dcvsConfig = powerConfig.dcvsV3Config; // NOLINT 221 | 222 | dcvsConfig.dcvsEnable = 0; 223 | dcvsConfig.setDcvsEnable = 1; 224 | dcvsConfig.contextId = m_powerConfigId; 225 | 226 | dcvsConfig.powerMode = 227 | QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE; 228 | dcvsConfig.setSleepLatency = 1; 229 | dcvsConfig.setBusParams = 1; 230 | dcvsConfig.setCoreParams = 1; 231 | dcvsConfig.sleepDisable = 1; 232 | dcvsConfig.setSleepDisable = 1; 233 | 234 | /* 10-65535 us */ 235 | static constexpr uint32_t SleepLatency = 40; 236 | dcvsConfig.sleepLatency = SleepLatency; 237 | 238 | // set Bus Clock Parameters (refer QnnHtpPerfInfrastructure.h) 239 | dcvsConfig.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; 240 | dcvsConfig.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; 241 | dcvsConfig.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; 242 | 243 | // set Core Clock Parameters (refer QnnHtpPerfInfrastructure.h) 244 | dcvsConfig.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; 245 | dcvsConfig.coreVoltageCornerTarget = 246 | DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; 247 | dcvsConfig.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; 248 | 249 | // Set power config with different performance parameters 250 | std::array powerConfigs = 251 | {&powerConfig, NULL}; 252 | 253 | if (QNN_SUCCESS 254 | != m_devicePerfInfrastructure.setPowerConfig(m_powerConfigId, 255 | powerConfigs.data())) 256 | { 257 | return STATUS::FAIL; 258 | } 259 | 260 | return STATUS::SUCCESS; 261 | } 262 | 263 | auto Backend::destroyPowerConfig() const -> STATUS { 264 | if (QNN_SUCCESS 265 | != m_devicePerfInfrastructure.destroyPowerConfigId(m_powerConfigId)) 266 | { 267 | return STATUS::FAIL; 268 | } 269 | return STATUS::SUCCESS; 270 | } 271 | 272 | auto Backend::validateBackendId(const uint32_t backendId) const -> STATUS { 273 | switch (backendId) { 274 | case QNN_BACKEND_ID_CPU: 275 | return m_delegate == DELEGATE::CPU ? STATUS::SUCCESS : STATUS::FAIL; 276 | case QNN_BACKEND_ID_GPU: 277 | return m_delegate == DELEGATE::GPU ? STATUS::SUCCESS : STATUS::FAIL; 278 | case QNN_BACKEND_ID_HTP: 279 | return m_delegate == DELEGATE::NPU ? STATUS::SUCCESS : STATUS::FAIL; 280 | default: 281 | return STATUS::FAIL; 282 | } 283 | } 284 | 285 | } // namespace edge::qnn 286 | -------------------------------------------------------------------------------- /source/qnn/model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "edgerunner/model.hpp" 11 | 12 | #include 13 | 14 | #include "edgerunner/qnn/backend.hpp" 15 | #include "edgerunner/qnn/model.hpp" 16 | #include "edgerunner/qnn/tensor.hpp" 17 | #include "edgerunner/tensor.hpp" 18 | 19 | namespace edge::qnn { 20 | 21 | std::unique_ptr ModelImpl::m_backend = nullptr; 22 | 23 | ModelImpl::ModelImpl(const std::filesystem::path& modelPath) 24 | : Model(modelPath) { 25 | const auto modelExtension = modelPath.extension().string().substr(1); 26 | m_loadCachedBinary = modelExtension == "bin"; 27 | 28 | setCreationStatus(initializeBackend()); 29 | if (getCreationStatus() == STATUS::FAIL) { 30 | return; 31 | } 32 | 33 | if (!m_loadCachedBinary) { 34 | setCreationStatus(loadModel(modelPath)); 35 | setCreationStatus(composeGraphs()); 36 | setPrecision(detectPrecision()); 37 | setCreationStatus( 38 | m_graph.setGraphConfig(m_backend->getDelegate(), getPrecision())); 39 | setCreationStatus(m_graph.finalizeGraphs()); 40 | 41 | // m_graphInfo.saveContextBinary(name() + ".bin"); 42 | } else { 43 | setCreationStatus(m_graph.loadSystemLibrary()); 44 | 45 | std::ifstream file(modelPath, std::ios::binary); 46 | if (!file) { 47 | setCreationStatus(STATUS::FAIL); 48 | return; 49 | } 50 | 51 | const auto bufferSize = std::filesystem::file_size(modelPath); 52 | 53 | std::vector modelBuffer(bufferSize); 54 | 55 | if (!file.read( 56 | reinterpret_cast /* NOLINT */ (modelBuffer.data()), 57 | static_cast(modelBuffer.size()))) 58 | { 59 | setCreationStatus(STATUS::FAIL); 60 | return; 61 | } 62 | 63 | setCreationStatus(loadModel(modelBuffer)); 64 | } 65 | 66 | setCreationStatus(allocate()); 67 | } 68 | 69 | ModelImpl::ModelImpl(const nonstd::span& modelBuffer) { 70 | setCreationStatus(loadModel(modelBuffer)); 71 | } 72 | 73 | auto ModelImpl::loadModel(const std::filesystem::path& modelPath) -> STATUS { 74 | return m_graph.loadFromSharedLibrary(modelPath); 75 | } 76 | 77 | auto ModelImpl::loadModel(const nonstd::span& modelBuffer) -> STATUS { 78 | return loadFromContextBinary(modelBuffer); 79 | } 80 | 81 | auto ModelImpl::applyDelegate(const DELEGATE& delegate) -> STATUS { 82 | if (delegate != DELEGATE::NPU) { 83 | return STATUS::FAIL; 84 | } 85 | 86 | setDelegate(delegate); 87 | 88 | return STATUS::SUCCESS; 89 | } 90 | 91 | auto ModelImpl::execute() -> STATUS { 92 | return m_graph.execute(); 93 | } 94 | 95 | auto ModelImpl::loadFromContextBinary(const nonstd::span& modelBuffer) 96 | -> STATUS { 97 | auto& qnnInterface = m_backend->getInterface(); 98 | auto& backendHandle = m_backend->getHandle(); 99 | auto& deviceHandle = m_backend->getDeviceHandle(); 100 | 101 | if (m_graph.loadContextFromBinary( 102 | qnnInterface, backendHandle, deviceHandle, modelBuffer) 103 | != STATUS::SUCCESS) 104 | { 105 | return STATUS::FAIL; 106 | } 107 | 108 | return m_graph.retrieveGraphFromContext(); 109 | } 110 | 111 | auto ModelImpl::composeGraphs() -> STATUS { 112 | auto& qnnInterface = m_backend->getInterface(); 113 | auto& qnnBackendHandle = m_backend->getHandle(); 114 | auto& qnnDeviceHandle = m_backend->getDeviceHandle(); 115 | 116 | m_graph.createContext(qnnInterface, qnnBackendHandle, qnnDeviceHandle); 117 | 118 | return m_graph.composeGraphs(qnnBackendHandle); 119 | } 120 | 121 | auto ModelImpl::detectPrecision() -> TensorType { 122 | const auto inputTensorSpecs = m_graph.getInputs(); 123 | 124 | std::vector inputs; 125 | inputs.reserve(inputTensorSpecs.size()); 126 | for (auto& inputTensorSpec : inputTensorSpecs) { 127 | inputs.emplace_back(&inputTensorSpec, false); 128 | } 129 | 130 | for (auto& input : inputs) { 131 | const auto type = input.getType(); 132 | 133 | if (type == TensorType::FLOAT16 || type == TensorType::FLOAT32) { 134 | return TensorType::FLOAT16; 135 | } 136 | } 137 | 138 | return TensorType::UINT8; 139 | } 140 | 141 | auto ModelImpl::allocate() -> STATUS { 142 | auto& inputs = getInputs(); 143 | auto& outputs = getOutputs(); 144 | 145 | inputs.clear(); 146 | outputs.clear(); 147 | 148 | const auto inputTensorSpecs = m_graph.getInputs(); 149 | const auto outputTensorSpecs = m_graph.getOutputs(); 150 | 151 | if (inputTensorSpecs.data() == nullptr 152 | || outputTensorSpecs.data() == nullptr) 153 | { 154 | return STATUS::FAIL; 155 | } 156 | 157 | inputs.reserve(inputTensorSpecs.size()); 158 | for (auto& inputTensorSpec : inputTensorSpecs) { 159 | inputs.emplace_back(std::make_shared(&inputTensorSpec)); 160 | } 161 | 162 | outputs.reserve(outputTensorSpecs.size()); 163 | for (auto& outputTensorSpec : outputTensorSpecs) { 164 | outputs.emplace_back(std::make_shared(&outputTensorSpec)); 165 | } 166 | 167 | return STATUS::SUCCESS; 168 | } 169 | 170 | } // namespace edge::qnn 171 | -------------------------------------------------------------------------------- /source/qnn/tensor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "edgerunner/tensor.hpp" 10 | 11 | #include 12 | #include 13 | 14 | #include "edgerunner/qnn/tensor.hpp" 15 | #include "edgerunner/qnn/tensorOps.hpp" 16 | 17 | namespace edge::qnn { 18 | 19 | TensorImpl::TensorImpl(Qnn_Tensor_t* qnnTensor, const bool allocate) 20 | : m_tensor(qnnTensor) { 21 | if (!allocate) { 22 | return; 23 | } 24 | 25 | /* TODO: use memhandle */ 26 | setQnnTensorMemType(*m_tensor, QNN_TENSORMEMTYPE_RAW); 27 | 28 | Qnn_ClientBuffer_t clientBuffer = QNN_CLIENT_BUFFER_INIT; 29 | 30 | const auto numBytes = getNumBytes(); 31 | 32 | m_data.resize(numBytes); 33 | 34 | clientBuffer.data = m_data.data(); 35 | clientBuffer.dataSize = static_cast(numBytes); 36 | 37 | setQnnTensorClientBuf(*m_tensor, clientBuffer); 38 | } 39 | 40 | auto TensorImpl::getName() const -> std::string { 41 | if (m_tensor == nullptr) { 42 | return ""; 43 | } 44 | 45 | auto tensorVariant = getTensorTypeVariant(*m_tensor); 46 | return std::visit([](auto&& tensor) { return tensor.get().name; }, 47 | tensorVariant); 48 | } 49 | 50 | auto TensorImpl::getType() const -> TensorType { 51 | if (m_tensor == nullptr) { 52 | return TensorType::NOTYPE; 53 | } 54 | 55 | auto tensorVariant = getTensorTypeVariant(*m_tensor); 56 | const auto qnnDataType = std::visit( 57 | [](auto&& tensor) { return tensor.get().dataType; }, tensorVariant); 58 | 59 | switch (qnnDataType) { 60 | case QNN_DATATYPE_FLOAT_16: 61 | return TensorType::FLOAT16; 62 | case QNN_DATATYPE_FLOAT_32: 63 | return TensorType::FLOAT32; 64 | case QNN_DATATYPE_INT_8: 65 | return TensorType::INT8; 66 | case QNN_DATATYPE_INT_16: 67 | return TensorType::INT16; 68 | case QNN_DATATYPE_INT_32: 69 | return TensorType::INT32; 70 | case QNN_DATATYPE_UINT_8: 71 | return TensorType::UINT8; 72 | case QNN_DATATYPE_UINT_16: 73 | return TensorType::UINT16; 74 | case QNN_DATATYPE_UINT_32: 75 | return TensorType::UINT32; 76 | case QNN_DATATYPE_SFIXED_POINT_8: 77 | return TensorType::INT8; 78 | case QNN_DATATYPE_SFIXED_POINT_16: 79 | return TensorType::INT16; 80 | case QNN_DATATYPE_SFIXED_POINT_32: 81 | return TensorType::INT32; 82 | case QNN_DATATYPE_UFIXED_POINT_8: 83 | return TensorType::UINT8; 84 | case QNN_DATATYPE_UFIXED_POINT_16: 85 | return TensorType::UINT16; 86 | case QNN_DATATYPE_UFIXED_POINT_32: 87 | return TensorType::UINT32; 88 | default: 89 | return TensorType::UNSUPPORTED; 90 | } 91 | } 92 | 93 | auto TensorImpl::getDimensions() const -> std::vector { 94 | if (m_tensor == nullptr) { 95 | return {}; 96 | } 97 | 98 | auto tensorVariant = getTensorTypeVariant(*m_tensor); 99 | 100 | const auto qnnDimensions = std::visit( 101 | [](auto&& tensor) { 102 | return nonstd::span {tensor.get().dimensions, 103 | tensor.get().rank}; 104 | }, 105 | tensorVariant); 106 | 107 | return {qnnDimensions.cbegin(), qnnDimensions.cend()}; 108 | } 109 | 110 | auto TensorImpl::getSize() const -> size_t { 111 | if (m_tensor == nullptr) { 112 | return {}; 113 | } 114 | 115 | const auto dimensions = getDimensions(); 116 | return static_cast(std::accumulate( 117 | dimensions.cbegin(), dimensions.cend(), 1, std::multiplies<>())); 118 | } 119 | 120 | auto TensorImpl::getDataPtr() -> void* { 121 | if (m_tensor == nullptr) { 122 | return nullptr; 123 | } 124 | 125 | return getTensorMemoryPtr(*m_tensor); 126 | } 127 | 128 | auto TensorImpl::getNumBytes() -> size_t { 129 | if (m_tensor == nullptr) { 130 | return {}; 131 | } 132 | 133 | size_t numBytes = 0; 134 | 135 | const auto type = getType(); 136 | 137 | switch (type) { 138 | case TensorType::FLOAT16: 139 | numBytes = 2; 140 | break; 141 | case TensorType::FLOAT32: 142 | numBytes = sizeof(float); 143 | break; 144 | case TensorType::INT8: 145 | numBytes = sizeof(int8_t); 146 | break; 147 | case TensorType::INT16: 148 | numBytes = sizeof(int16_t); 149 | break; 150 | case TensorType::INT32: 151 | numBytes = sizeof(int32_t); 152 | break; 153 | case TensorType::UINT8: 154 | numBytes = sizeof(uint8_t); 155 | break; 156 | case TensorType::UINT16: 157 | numBytes = sizeof(uint16_t); 158 | break; 159 | case TensorType::UINT32: 160 | numBytes = sizeof(uint32_t); 161 | break; 162 | default: 163 | return {}; 164 | } 165 | 166 | return numBytes * getSize(); 167 | } 168 | 169 | } // namespace edge::qnn 170 | -------------------------------------------------------------------------------- /source/tflite/model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "edgerunner/model.hpp" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "edgerunner/tensor.hpp" 15 | #include "edgerunner/tflite/model.hpp" 16 | #include "edgerunner/tflite/tensor.hpp" 17 | 18 | #ifdef EDGERUNNER_GPU 19 | # include 20 | #endif 21 | 22 | #ifdef EDGERUNNER_QNN 23 | # include 24 | #endif 25 | 26 | namespace edge::tflite { 27 | 28 | ModelImpl::ModelImpl(const std::filesystem::path& modelPath) 29 | : Model(modelPath) { 30 | setCreationStatus(loadModel(modelPath)); 31 | setCreationStatus(createInterpreter()); 32 | setCreationStatus(allocate()); 33 | setPrecision(detectPrecision()); 34 | } 35 | 36 | ModelImpl::ModelImpl(const nonstd::span& modelBuffer) { 37 | setCreationStatus(loadModel(modelBuffer)); 38 | setCreationStatus(createInterpreter()); 39 | setCreationStatus(allocate()); 40 | setPrecision(detectPrecision()); 41 | } 42 | 43 | auto ModelImpl::loadModel(const std::filesystem::path& modelPath) -> STATUS { 44 | m_modelBuffer = ::tflite::FlatBufferModel::BuildFromFile(modelPath.c_str()); 45 | 46 | if (m_modelBuffer == nullptr) { 47 | return STATUS::FAIL; 48 | } 49 | 50 | return STATUS::SUCCESS; 51 | } 52 | 53 | auto ModelImpl::loadModel(const nonstd::span& modelBuffer) -> STATUS { 54 | m_modelBuffer = ::tflite::FlatBufferModel::BuildFromBuffer( 55 | reinterpret_cast /* NOLINT */ (modelBuffer.data()), 56 | modelBuffer.size()); 57 | 58 | if (m_modelBuffer == nullptr) { 59 | return STATUS::FAIL; 60 | } 61 | 62 | return STATUS::SUCCESS; 63 | } 64 | 65 | auto ModelImpl::createInterpreter() -> STATUS { 66 | const ::tflite::ops::builtin::BuiltinOpResolver opResolver; 67 | if (m_modelBuffer == nullptr 68 | || ::tflite::InterpreterBuilder(*m_modelBuffer, 69 | opResolver)(&m_interpreter) 70 | != kTfLiteOk) 71 | { 72 | return STATUS::FAIL; 73 | } 74 | 75 | return STATUS::SUCCESS; 76 | } 77 | 78 | auto ModelImpl::allocate() -> STATUS { 79 | if (m_interpreter == nullptr 80 | || m_interpreter->AllocateTensors() != kTfLiteOk) 81 | { 82 | return STATUS::FAIL; 83 | } 84 | 85 | const auto numInputs = m_interpreter->inputs().size(); 86 | 87 | auto& inputs = getInputs(); 88 | inputs.clear(); 89 | inputs.reserve(numInputs); 90 | 91 | for (size_t i = 0; i < numInputs; ++i) { 92 | inputs.emplace_back( 93 | std::make_shared(m_interpreter->input_tensor(i))); 94 | } 95 | 96 | const auto numOutputs = m_interpreter->outputs().size(); 97 | 98 | auto& outputs = getOutputs(); 99 | outputs.clear(); 100 | outputs.reserve(numOutputs); 101 | 102 | for (size_t i = 0; i < numOutputs; ++i) { 103 | outputs.emplace_back( 104 | std::make_shared(m_interpreter->output_tensor(i))); 105 | } 106 | 107 | return STATUS::SUCCESS; 108 | } 109 | 110 | auto ModelImpl::detectPrecision() -> TensorType { 111 | auto& inputs = getInputs(); 112 | 113 | /* NOTE: mostly for QNN delegate, if an inputs are float, use fp16 precision 114 | */ 115 | for (auto& input : inputs) { 116 | const auto type = input->getType(); 117 | if (type == TensorType::FLOAT16 || type == TensorType::FLOAT32) { 118 | return TensorType::FLOAT16; 119 | } 120 | } 121 | 122 | return TensorType::UINT8; 123 | } 124 | 125 | auto ModelImpl::applyDelegate(const DELEGATE& delegate) -> STATUS { 126 | /* undo any previous delegate */ 127 | if (createInterpreter() != STATUS::SUCCESS) { 128 | return STATUS::FAIL; 129 | } 130 | 131 | /* cannot apply delegate on top of existing delegate */ 132 | deleteDelegate(); 133 | 134 | STATUS status = STATUS::SUCCESS; 135 | if (delegate == DELEGATE::CPU) { 136 | setDelegate(delegate); 137 | } else if (delegate == DELEGATE::GPU) { 138 | #ifdef EDGERUNNER_GPU 139 | m_delegate = TfLiteGpuDelegateV2Create(nullptr); 140 | 141 | if (m_interpreter->ModifyGraphWithDelegate(m_delegate) != kTfLiteOk) { 142 | status = STATUS::FAIL; 143 | setDelegate(DELEGATE::CPU); 144 | } else { 145 | setDelegate(delegate); 146 | } 147 | #else 148 | status = STATUS::FAIL; 149 | #endif 150 | } else if (delegate == DELEGATE::NPU) { 151 | #ifdef EDGERUNNER_QNN 152 | TfLiteQnnDelegateOptions options = TfLiteQnnDelegateOptionsDefault(); 153 | 154 | options.backend_type = kHtpBackend; 155 | options.log_level = kLogOff; 156 | if (getPrecision() == TensorType::FLOAT16) { 157 | options.htp_options.precision = kHtpFp16; 158 | } 159 | options.htp_options.performance_mode = kHtpBurst; 160 | 161 | m_delegate = TfLiteQnnDelegateCreate(&options); 162 | 163 | if (m_interpreter->ModifyGraphWithDelegate(m_delegate) != kTfLiteOk) { 164 | status = STATUS::FAIL; 165 | setDelegate(DELEGATE::CPU); 166 | } else { 167 | setDelegate(delegate); 168 | } 169 | #else 170 | status = STATUS::FAIL; 171 | #endif 172 | } 173 | 174 | allocate(); 175 | 176 | return status; 177 | } 178 | 179 | auto ModelImpl::execute() -> STATUS { 180 | if (m_interpreter->Invoke() != kTfLiteOk) { 181 | return STATUS::FAIL; 182 | } 183 | 184 | return STATUS::SUCCESS; 185 | } 186 | 187 | void ModelImpl::deleteDelegate() { 188 | if (m_delegate != nullptr) { 189 | #ifdef EDGERUNNER_GPU 190 | if (getDelegate() == DELEGATE::GPU) { 191 | TfLiteGpuDelegateV2Delete(m_delegate); 192 | } 193 | #endif 194 | 195 | if (getDelegate() == DELEGATE::NPU) { 196 | #ifdef EDGERUNNER_QNN 197 | TfLiteQnnDelegateDelete(m_delegate); 198 | #endif 199 | } 200 | } 201 | } 202 | 203 | ModelImpl::~ModelImpl() { 204 | deleteDelegate(); 205 | } 206 | 207 | } // namespace edge::tflite 208 | -------------------------------------------------------------------------------- /source/tflite/tensor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "edgerunner/tensor.hpp" 8 | 9 | #include 10 | 11 | #include "edgerunner/tflite/tensor.hpp" 12 | 13 | namespace edge::tflite { 14 | 15 | auto TensorImpl::getName() const -> std::string { 16 | if (m_tensor == nullptr) { 17 | return ""; 18 | } 19 | return m_tensor->name; 20 | } 21 | 22 | auto TensorImpl::getType() const -> TensorType { 23 | if (m_tensor == nullptr) { 24 | return TensorType::NOTYPE; 25 | } 26 | 27 | switch (m_tensor->type) { 28 | case kTfLiteFloat16: 29 | return TensorType::FLOAT16; 30 | case kTfLiteFloat32: 31 | return TensorType::FLOAT32; 32 | case kTfLiteInt8: 33 | return TensorType::INT8; 34 | case kTfLiteInt16: 35 | return TensorType::INT16; 36 | case kTfLiteInt32: 37 | return TensorType::INT32; 38 | case kTfLiteUInt8: 39 | return TensorType::UINT8; 40 | case kTfLiteUInt16: 41 | return TensorType::UINT16; 42 | case kTfLiteUInt32: 43 | return TensorType::UINT32; 44 | 45 | default: 46 | return TensorType::UNSUPPORTED; 47 | } 48 | } 49 | 50 | auto TensorImpl::getDimensions() const -> std::vector { 51 | if (m_tensor == nullptr) { 52 | return {}; 53 | } 54 | 55 | std::vector dimensions; 56 | dimensions.reserve(static_cast(m_tensor->dims->size)); 57 | for (int i = 0; i < m_tensor->dims->size; ++i) { 58 | dimensions.push_back(static_cast(m_tensor->dims->data[i])); 59 | } 60 | 61 | return dimensions; 62 | } 63 | 64 | auto TensorImpl::getSize() const -> size_t { 65 | if (m_tensor == nullptr) { 66 | return {}; 67 | } 68 | 69 | const auto dimensions = getDimensions(); 70 | return static_cast(std::accumulate( 71 | dimensions.cbegin(), dimensions.cend(), 1, std::multiplies<>())); 72 | } 73 | 74 | auto TensorImpl::getDataPtr() -> void* { 75 | if (m_tensor == nullptr) { 76 | return nullptr; 77 | } 78 | return m_tensor->data.data; 79 | } 80 | 81 | auto TensorImpl::getNumBytes() -> size_t { 82 | if (m_tensor == nullptr) { 83 | return {}; 84 | } 85 | 86 | return m_tensor->bytes; 87 | } 88 | 89 | } // namespace edge::tflite 90 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(edgerunnerTests LANGUAGES CXX) 4 | 5 | include(../cmake/project-is-top-level.cmake) 6 | include(../cmake/folders.cmake) 7 | 8 | # ---- Dependencies ---- 9 | 10 | if(PROJECT_IS_TOP_LEVEL) 11 | find_package(edgerunner REQUIRED) 12 | enable_testing() 13 | endif() 14 | 15 | find_package(Catch2 REQUIRED) 16 | include(Catch) 17 | 18 | # ---- Test data ---- 19 | 20 | if(ANDROID) 21 | foreach(dir ${CONAN_RUNTIME_LIB_DIRS}) 22 | file(GLOB_RECURSE shared_libs "${dir}/*.so") 23 | file(COPY ${shared_libs} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 24 | endforeach() 25 | else() 26 | set(MODELS_DIR "${CMAKE_SOURCE_DIR}/models") 27 | set(MODELS_DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/models") 28 | if(UNIX) 29 | execute_process(COMMAND ln -sfn ${MODELS_DIR} ${MODELS_DEST_DIR}) 30 | elseif(WIN32) 31 | execute_process( 32 | COMMAND cmd.exe /c mklink ${MODELS_DEST_DIR} ${MODELS_DIR} 33 | ) 34 | endif() 35 | endif() 36 | 37 | # ---- Tests ---- 38 | 39 | set(TEST_SOURCES source/bad_model_test.cpp) 40 | 41 | if(edgerunner_ENABLE_TFLITE) 42 | list(APPEND TEST_SOURCES source/tflite_test.cpp 43 | source/tflite_from_buffer_test.cpp source/tflite_delegate_test.cpp 44 | source/tflite_quantized_test.cpp 45 | ) 46 | if(edgerunner_ENABLE_GPU) 47 | list(APPEND TEST_SOURCES source/tflite_gpu_test.cpp) 48 | endif() 49 | if(edgerunner_ENABLE_NPU) 50 | list(APPEND TEST_SOURCES source/tflite_npu_test.cpp) 51 | endif() 52 | endif() 53 | 54 | if(edgerunner_ENABLE_NPU) 55 | list(APPEND TEST_SOURCES source/qnn_shared_library_npu_test.cpp 56 | source/qnn_context_binary_npu_test.cpp source/qnn_quantized_test.cpp 57 | source/qnn_multiple_models_test.cpp 58 | ) 59 | endif() 60 | 61 | add_executable(edgerunner_test ${TEST_SOURCES}) 62 | target_link_libraries( 63 | edgerunner_test PRIVATE edgerunner::edgerunner Catch2::Catch2WithMain 64 | ) 65 | target_compile_features(edgerunner_test PRIVATE cxx_std_17) 66 | 67 | if(ANDROID) 68 | add_custom_target( 69 | test-android 70 | COMMAND "${CMAKE_SOURCE_DIR}/scripts/run_with_adb.sh" -b 71 | "${CMAKE_CURRENT_BINARY_DIR}" -e "edgerunner_test" 72 | VERBATIM 73 | ) 74 | add_dependencies(test-android edgerunner_test) 75 | else() 76 | catch_discover_tests(edgerunner_test) 77 | endif() 78 | 79 | # ---- End-of-file commands ---- 80 | 81 | add_folders(Test) 82 | -------------------------------------------------------------------------------- /test/source/bad_model_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "edgerunner/edgerunner.hpp" 12 | #include "utils.hpp" 13 | 14 | TEST_CASE("Bad model", "[model][misuse]") { 15 | const std::string badPath = "test.bin"; 16 | auto badPathModel = edge::createModel(badPath); 17 | REQUIRE(badPathModel == nullptr); 18 | 19 | const std::string wrongFormatModelPath = 20 | "models/tflite/imagenet_labels.txt"; 21 | std::ifstream wrongFormatFile(wrongFormatModelPath, std::ios::binary); 22 | std::vector wrongFormatModelBuffer( 23 | (std::istreambuf_iterator(wrongFormatFile)), 24 | std::istreambuf_iterator()); 25 | 26 | auto wrongFormatModel = edge::createModel(wrongFormatModelBuffer, "txt"); 27 | REQUIRE(wrongFormatModel == nullptr); 28 | 29 | constexpr size_t ModelBufferSize {10}; 30 | std::vector badModelBuffer(ModelBufferSize); 31 | 32 | auto badBufferModel = edge::createModel(badModelBuffer); 33 | REQUIRE(badBufferModel == nullptr); 34 | 35 | const std::filesystem::path badModelPath {"badModel.tflite"}; 36 | std::ofstream badModelFile(badModelPath, std::ios::binary); 37 | for (const auto& element : badModelPath) { 38 | badModelFile << element; 39 | } 40 | 41 | auto badFileModel = edge::createModel(badModelPath); 42 | REQUIRE(badFileModel == nullptr); 43 | } 44 | -------------------------------------------------------------------------------- /test/source/qnn_context_binary_npu_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "edgerunner/edgerunner.hpp" 12 | #include "edgerunner/model.hpp" 13 | #include "edgerunner/tensor.hpp" 14 | #include "utils.hpp" 15 | 16 | TEST_CASE("QNN context binary NPU runtime", "[qnn][context][npu]") { 17 | const std::string modelPath = "models/qnn/mobilenet_v3_small.bin"; 18 | 19 | auto model = edge::createModel(modelPath); 20 | REQUIRE(model != nullptr); 21 | REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); 22 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 23 | 24 | /* ensure CPU and NPU inference have the same inputs */ 25 | auto cpuInputData = model->getInput(0)->getTensorAs(); 26 | std::fill(cpuInputData.begin(), cpuInputData.end(), 0); 27 | 28 | auto executionStatus = model->execute(); 29 | CHECK(executionStatus == edge::STATUS::SUCCESS); 30 | 31 | const auto cpuOutput = model->getOutput(0)->getTensorAs(); 32 | 33 | /* applying a new delegate releases memory, so need to copy CPU output to 34 | * compare later */ 35 | std::vector cpuResult; 36 | cpuResult.reserve(cpuOutput.size()); 37 | std::copy( 38 | cpuOutput.cbegin(), cpuOutput.cend(), std::back_inserter(cpuResult)); 39 | 40 | const auto delegateStatus = model->applyDelegate(edge::DELEGATE::NPU); 41 | REQUIRE(delegateStatus == edge::STATUS::SUCCESS); 42 | REQUIRE(model->getDelegate() == edge::DELEGATE::NPU); 43 | 44 | const auto numInputs = model->getNumInputs(); 45 | REQUIRE(numInputs == 1); 46 | 47 | const auto numOutputs = model->getNumOutputs(); 48 | REQUIRE(numOutputs == 1); 49 | 50 | auto input = model->getInput(0); 51 | REQUIRE(input->getName() == "image_tensor"); 52 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 53 | REQUIRE(input->getType() == edge::TensorType::FLOAT32); 54 | 55 | auto inputData = input->getTensorAs(); 56 | REQUIRE(inputData.size() == input->getSize()); 57 | 58 | /* ensure CPU and NPU inference have the same inputs */ 59 | std::fill(inputData.begin(), inputData.end(), 0); 60 | 61 | executionStatus = model->execute(); 62 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 63 | 64 | BENCHMARK("execution") { 65 | return model->execute(); 66 | }; 67 | 68 | auto output = model->getOutput(0); 69 | REQUIRE(output->getName() == "class_logits"); 70 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 71 | REQUIRE(output->getType() == edge::TensorType::FLOAT32); 72 | 73 | auto outputData = output->getTensorAs(); 74 | REQUIRE(outputData.size() == output->getSize()); 75 | 76 | const auto mse = meanSquaredError(cpuResult, outputData); 77 | CAPTURE(mse); 78 | REQUIRE(mse < MseThreshold); 79 | } 80 | -------------------------------------------------------------------------------- /test/source/qnn_multiple_models_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "edgerunner/edgerunner.hpp" 6 | #include "edgerunner/model.hpp" 7 | #include "utils.hpp" 8 | 9 | TEST_CASE("QNN multiple models", "[qnn][multiple]") { 10 | const std::string modelPath1 = "models/qnn/mobilenet_v3_small.bin"; 11 | const std::string modelPath2 = "models/qnn/mobilenet_v3_small.so"; 12 | 13 | auto model1 = edge::createModel(modelPath1); 14 | REQUIRE(model1 != nullptr); 15 | 16 | auto model2 = edge::createModel(modelPath2); 17 | REQUIRE(model2 != nullptr); 18 | 19 | auto executionStatus = model1->execute(); 20 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 21 | 22 | executionStatus = model2->execute(); 23 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 24 | } 25 | -------------------------------------------------------------------------------- /test/source/qnn_quantized_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "edgerunner/edgerunner.hpp" 9 | #include "edgerunner/model.hpp" 10 | #include "edgerunner/tensor.hpp" 11 | #include "utils.hpp" 12 | 13 | TEST_CASE("QNN runtime quantized (NPU)", "[qnn][npu][quantized]") { 14 | const std::string modelPath = "models/qnn/mobilenet_v3_large_quantized.so"; 15 | 16 | auto model = edge::createModel(modelPath); 17 | REQUIRE(model != nullptr); 18 | REQUIRE(std::string {"mobilenet_v3_large_quantized"} == model->name()); 19 | 20 | model->applyDelegate(edge::DELEGATE::CPU); 21 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 22 | 23 | REQUIRE(model->getPrecision() == edge::TensorType::UINT8); 24 | 25 | const auto inputs = model->getInputs(); 26 | const auto numInputs = model->getNumInputs(); 27 | REQUIRE(numInputs == 1); 28 | REQUIRE(numInputs == inputs.size()); 29 | 30 | const auto outputs = model->getOutputs(); 31 | const auto numOutputs = model->getNumOutputs(); 32 | REQUIRE(numOutputs == 1); 33 | REQUIRE(numOutputs == outputs.size()); 34 | 35 | auto input = model->getInput(0); 36 | REQUIRE(input->getName() == "image_tensor"); 37 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 38 | REQUIRE(input->getType() == edge::TensorType::UINT8); 39 | REQUIRE(input.get() == inputs[0].get()); 40 | 41 | auto inputData = input->getTensorAs(); 42 | REQUIRE(inputData.size() == input->getSize()); 43 | 44 | auto badInput = model->getInput(1); 45 | REQUIRE(badInput == nullptr); 46 | 47 | auto output = model->getOutput(0); 48 | REQUIRE(output->getName() == "_668"); 49 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 50 | REQUIRE(output->getType() == edge::TensorType::UINT8); 51 | REQUIRE(output.get() == outputs[0].get()); 52 | 53 | auto outputBuffer = output->getTensorAs(); 54 | REQUIRE(outputBuffer.size() == output->getSize()); 55 | 56 | auto badOutput = model->getOutput(1); 57 | REQUIRE(badOutput == nullptr); 58 | 59 | const auto executionStatus = model->execute(); 60 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 61 | 62 | /* verify output buffer is persistent across execution */ 63 | const auto newOutputBuffer = model->getOutput(0)->getTensorAs(); 64 | REQUIRE(outputBuffer.data() == newOutputBuffer.data()); 65 | REQUIRE(outputBuffer.size() == newOutputBuffer.size()); 66 | } 67 | -------------------------------------------------------------------------------- /test/source/qnn_shared_library_npu_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "edgerunner/edgerunner.hpp" 12 | #include "edgerunner/model.hpp" 13 | #include "edgerunner/tensor.hpp" 14 | #include "utils.hpp" 15 | 16 | TEST_CASE("QNN shared library NPU runtime", "[qnn][shared][npu]") { 17 | const std::string modelPath = "models/qnn/mobilenet_v3_small.so"; 18 | 19 | auto model = edge::createModel(modelPath); 20 | REQUIRE(model != nullptr); 21 | REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); 22 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 23 | 24 | /* ensure CPU and NPU inference have the same inputs */ 25 | auto cpuInputData = model->getInput(0)->getTensorAs(); 26 | std::fill(cpuInputData.begin(), cpuInputData.end(), 0); 27 | 28 | auto executionStatus = model->execute(); 29 | CHECK(executionStatus == edge::STATUS::SUCCESS); 30 | 31 | const auto cpuOutput = model->getOutput(0)->getTensorAs(); 32 | 33 | /* applying a new delegate releases memory, so need to copy CPU output to 34 | * compare later */ 35 | std::vector cpuResult; 36 | cpuResult.reserve(cpuOutput.size()); 37 | std::copy( 38 | cpuOutput.cbegin(), cpuOutput.cend(), std::back_inserter(cpuResult)); 39 | 40 | const auto delegateStatus = model->applyDelegate(edge::DELEGATE::NPU); 41 | REQUIRE(delegateStatus == edge::STATUS::SUCCESS); 42 | REQUIRE(model->getDelegate() == edge::DELEGATE::NPU); 43 | 44 | const auto numInputs = model->getNumInputs(); 45 | REQUIRE(numInputs == 1); 46 | 47 | const auto numOutputs = model->getNumOutputs(); 48 | REQUIRE(numOutputs == 1); 49 | 50 | auto input = model->getInput(0); 51 | REQUIRE(input->getName() == "image_tensor"); 52 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 53 | REQUIRE(input->getType() == edge::TensorType::FLOAT32); 54 | 55 | auto inputData = input->getTensorAs(); 56 | REQUIRE(inputData.size() == input->getSize()); 57 | 58 | /* ensure CPU and NPU inference have the same inputs */ 59 | std::fill(inputData.begin(), inputData.end(), 0); 60 | 61 | executionStatus = model->execute(); 62 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 63 | 64 | BENCHMARK("execution") { 65 | return model->execute(); 66 | }; 67 | 68 | auto output = model->getOutput(0); 69 | REQUIRE(output->getName() == "class_logits"); 70 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 71 | REQUIRE(output->getType() == edge::TensorType::FLOAT32); 72 | 73 | auto outputData = output->getTensorAs(); 74 | REQUIRE(outputData.size() == output->getSize()); 75 | 76 | const auto mse = meanSquaredError(cpuResult, outputData); 77 | CAPTURE(mse); 78 | REQUIRE(mse < MseThreshold); 79 | } 80 | -------------------------------------------------------------------------------- /test/source/tflite_delegate_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "edgerunner/edgerunner.hpp" 6 | #include "edgerunner/model.hpp" 7 | #include "utils.hpp" 8 | 9 | TEST_CASE("Tflite delegate ", "[tflite][delegate]") { 10 | const std::string modelPath = "models/tflite/mobilenet_v3_small.tflite"; 11 | 12 | auto model = edge::createModel(modelPath); 13 | REQUIRE(model != nullptr); 14 | REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); 15 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 16 | 17 | auto delegateStatus = model->applyDelegate(edge::DELEGATE::GPU); 18 | 19 | #ifdef EDGERUNNER_GPU 20 | REQUIRE(delegateStatus == edge::STATUS::SUCCESS); 21 | REQUIRE(model->getDelegate() == edge::DELEGATE::GPU); 22 | #else 23 | REQUIRE(delegateStatus == edge::STATUS::FAIL); 24 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 25 | #endif 26 | 27 | model->applyDelegate(edge::DELEGATE::CPU); 28 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 29 | 30 | delegateStatus = model->applyDelegate(edge::DELEGATE::NPU); 31 | 32 | #ifdef EDGERUNNER_QNN 33 | REQUIRE(delegateStatus == edge::STATUS::SUCCESS); 34 | REQUIRE(model->getDelegate() == edge::DELEGATE::NPU); 35 | #else 36 | REQUIRE(delegateStatus == edge::STATUS::FAIL); 37 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 38 | #endif 39 | } 40 | -------------------------------------------------------------------------------- /test/source/tflite_from_buffer_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "edgerunner/edgerunner.hpp" 13 | #include "edgerunner/model.hpp" 14 | #include "edgerunner/tensor.hpp" 15 | 16 | TEST_CASE("Tflite from buffer default runtime (CPU)", "[tflite][buffer][cpu]") { 17 | const std::string modelPath = "models/tflite/mobilenet_v3_small.tflite"; 18 | std::ifstream file(modelPath, std::ios::binary); 19 | std::vector modelBuffer((std::istreambuf_iterator(file)), 20 | std::istreambuf_iterator()); 21 | 22 | auto model = edge::createModel(modelBuffer, "tflite"); 23 | REQUIRE(model != nullptr); 24 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 25 | 26 | const auto inputs = model->getInputs(); 27 | const auto numInputs = model->getNumInputs(); 28 | REQUIRE(numInputs == 1); 29 | REQUIRE(numInputs == inputs.size()); 30 | 31 | const auto outputs = model->getOutputs(); 32 | const auto numOutputs = model->getNumOutputs(); 33 | REQUIRE(numOutputs == 1); 34 | REQUIRE(numOutputs == outputs.size()); 35 | 36 | auto input = model->getInput(0); 37 | REQUIRE(input->getName() == "image_tensor"); 38 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 39 | REQUIRE(input->getType() == edge::TensorType::FLOAT32); 40 | REQUIRE(input.get() == inputs[0].get()); 41 | 42 | auto inputData = input->getTensorAs(); 43 | REQUIRE(inputData.size() == input->getSize()); 44 | 45 | auto output = model->getOutput(0); 46 | REQUIRE(output->getName() == "output_0"); 47 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 48 | REQUIRE(output->getType() == edge::TensorType::FLOAT32); 49 | REQUIRE(output.get() == outputs[0].get()); 50 | 51 | auto outputBuffer = output->getTensorAs(); 52 | REQUIRE(outputBuffer.size() == output->getSize()); 53 | 54 | const auto executionStatus = model->execute(); 55 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 56 | 57 | BENCHMARK("execution") { 58 | return model->execute(); 59 | }; 60 | 61 | /* verify output buffer is persistent across execution */ 62 | const auto newOutputBuffer = model->getOutput(0)->getTensorAs(); 63 | REQUIRE(outputBuffer.data() == newOutputBuffer.data()); 64 | REQUIRE(outputBuffer.size() == newOutputBuffer.size()); 65 | } 66 | -------------------------------------------------------------------------------- /test/source/tflite_gpu_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "edgerunner/edgerunner.hpp" 12 | #include "edgerunner/model.hpp" 13 | #include "edgerunner/tensor.hpp" 14 | #include "utils.hpp" 15 | 16 | TEST_CASE("Tflite GPU runtime", "[tflite][gpu]") { 17 | const std::string modelPath = "models/tflite/mobilenet_v3_small.tflite"; 18 | 19 | auto model = edge::createModel(modelPath); 20 | REQUIRE(model != nullptr); 21 | REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); 22 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 23 | 24 | /* ensure CPU and GPU inference have the same inputs */ 25 | auto cpuInputData = model->getInput(0)->getTensorAs(); 26 | std::fill(cpuInputData.begin(), cpuInputData.end(), 0); 27 | 28 | auto executionStatus = model->execute(); 29 | CHECK(executionStatus == edge::STATUS::SUCCESS); 30 | 31 | const auto cpuOutput = model->getOutput(0)->getTensorAs(); 32 | 33 | /* applying a new delegate releases memory, so need to copy CPU output to 34 | * compare later */ 35 | std::vector cpuResult; 36 | cpuResult.reserve(cpuOutput.size()); 37 | std::copy( 38 | cpuOutput.cbegin(), cpuOutput.cend(), std::back_inserter(cpuResult)); 39 | 40 | const auto delegateStatus = model->applyDelegate(edge::DELEGATE::GPU); 41 | REQUIRE(delegateStatus == edge::STATUS::SUCCESS); 42 | REQUIRE(model->getDelegate() == edge::DELEGATE::GPU); 43 | 44 | const auto numInputs = model->getNumInputs(); 45 | REQUIRE(numInputs == 1); 46 | 47 | const auto numOutputs = model->getNumOutputs(); 48 | REQUIRE(numOutputs == 1); 49 | 50 | auto input = model->getInput(0); 51 | REQUIRE(input->getName() == "image_tensor"); 52 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 53 | REQUIRE(input->getType() == edge::TensorType::FLOAT32); 54 | 55 | auto inputData = input->getTensorAs(); 56 | REQUIRE(inputData.size() == input->getSize()); 57 | 58 | /* ensure CPU and GPU inference have the same inputs */ 59 | std::fill(inputData.begin(), inputData.end(), 0); 60 | 61 | executionStatus = model->execute(); 62 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 63 | 64 | BENCHMARK("execution") { 65 | return model->execute(); 66 | }; 67 | 68 | auto output = model->getOutput(0); 69 | REQUIRE(output->getName() == "output_0"); 70 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 71 | REQUIRE(output->getType() == edge::TensorType::FLOAT32); 72 | 73 | auto outputData = output->getTensorAs(); 74 | REQUIRE(outputData.size() == output->getSize()); 75 | 76 | const auto mse = meanSquaredError(cpuResult, outputData); 77 | CAPTURE(mse); 78 | REQUIRE(mse < MseThreshold); 79 | } 80 | -------------------------------------------------------------------------------- /test/source/tflite_npu_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "edgerunner/edgerunner.hpp" 12 | #include "edgerunner/model.hpp" 13 | #include "edgerunner/tensor.hpp" 14 | #include "utils.hpp" 15 | 16 | TEST_CASE("Tflite NPU runtime", "[tflite][npu]") { 17 | const std::string modelPath = "models/tflite/mobilenet_v3_small.tflite"; 18 | 19 | auto model = edge::createModel(modelPath); 20 | REQUIRE(model != nullptr); 21 | REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); 22 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 23 | 24 | /* ensure CPU and NPU inference have the same inputs */ 25 | auto cpuInputData = model->getInput(0)->getTensorAs(); 26 | std::fill(cpuInputData.begin(), cpuInputData.end(), 0); 27 | 28 | auto executionStatus = model->execute(); 29 | CHECK(executionStatus == edge::STATUS::SUCCESS); 30 | 31 | const auto cpuOutput = model->getOutput(0)->getTensorAs(); 32 | 33 | /* applying a new delegate releases memory, so need to copy CPU output to 34 | * compare later */ 35 | std::vector cpuResult; 36 | cpuResult.reserve(cpuOutput.size()); 37 | std::copy( 38 | cpuOutput.cbegin(), cpuOutput.cend(), std::back_inserter(cpuResult)); 39 | 40 | const auto delegateStatus = model->applyDelegate(edge::DELEGATE::NPU); 41 | REQUIRE(delegateStatus == edge::STATUS::SUCCESS); 42 | REQUIRE(model->getDelegate() == edge::DELEGATE::NPU); 43 | 44 | const auto numInputs = model->getNumInputs(); 45 | REQUIRE(numInputs == 1); 46 | 47 | const auto numOutputs = model->getNumOutputs(); 48 | REQUIRE(numOutputs == 1); 49 | 50 | auto input = model->getInput(0); 51 | REQUIRE(input->getName() == "image_tensor"); 52 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 53 | REQUIRE(input->getType() == edge::TensorType::FLOAT32); 54 | 55 | auto inputData = input->getTensorAs(); 56 | REQUIRE(inputData.size() == input->getSize()); 57 | 58 | /* ensure CPU and NPU inference have the same inputs */ 59 | std::fill(inputData.begin(), inputData.end(), 0); 60 | 61 | executionStatus = model->execute(); 62 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 63 | 64 | BENCHMARK("execution") { 65 | return model->execute(); 66 | }; 67 | 68 | auto output = model->getOutput(0); 69 | REQUIRE(output->getName() == "output_0"); 70 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 71 | REQUIRE(output->getType() == edge::TensorType::FLOAT32); 72 | 73 | auto outputData = output->getTensorAs(); 74 | REQUIRE(outputData.size() == output->getSize()); 75 | 76 | const auto mse = meanSquaredError(cpuResult, outputData); 77 | CAPTURE(mse); 78 | REQUIRE(mse < MseThreshold); 79 | } 80 | -------------------------------------------------------------------------------- /test/source/tflite_quantized_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "edgerunner/edgerunner.hpp" 9 | #include "edgerunner/model.hpp" 10 | #include "edgerunner/tensor.hpp" 11 | #include "utils.hpp" 12 | 13 | TEST_CASE("Tflite default runtime quantized (CPU)", 14 | "[tflite][cpu][quantized]") { 15 | const std::string modelPath = 16 | "models/tflite/mobilenet_v3_large_quantized.tflite"; 17 | 18 | auto model = edge::createModel(modelPath); 19 | REQUIRE(model != nullptr); 20 | REQUIRE(std::string {"mobilenet_v3_large_quantized"} == model->name()); 21 | 22 | model->applyDelegate(edge::DELEGATE::CPU); 23 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 24 | 25 | REQUIRE(model->getPrecision() == edge::TensorType::UINT8); 26 | 27 | const auto inputs = model->getInputs(); 28 | const auto numInputs = model->getNumInputs(); 29 | REQUIRE(numInputs == 1); 30 | REQUIRE(numInputs == inputs.size()); 31 | 32 | const auto outputs = model->getOutputs(); 33 | const auto numOutputs = model->getNumOutputs(); 34 | REQUIRE(numOutputs == 1); 35 | REQUIRE(numOutputs == outputs.size()); 36 | 37 | auto input = model->getInput(0); 38 | REQUIRE(input->getName() == "image_tensor"); 39 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 40 | REQUIRE(input->getType() == edge::TensorType::UINT8); 41 | REQUIRE(input.get() == inputs[0].get()); 42 | 43 | auto inputData = input->getTensorAs(); 44 | REQUIRE(inputData.size() == input->getSize()); 45 | 46 | auto badInput = model->getInput(1); 47 | REQUIRE(badInput == nullptr); 48 | 49 | auto output = model->getOutput(0); 50 | REQUIRE(output->getName() == "class_logits"); 51 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 52 | REQUIRE(output->getType() == edge::TensorType::UINT8); 53 | REQUIRE(output.get() == outputs[0].get()); 54 | 55 | auto outputBuffer = output->getTensorAs(); 56 | REQUIRE(outputBuffer.size() == output->getSize()); 57 | 58 | auto badOutput = model->getOutput(1); 59 | REQUIRE(badOutput == nullptr); 60 | 61 | const auto executionStatus = model->execute(); 62 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 63 | 64 | /* verify output buffer is persistent across execution */ 65 | const auto newOutputBuffer = model->getOutput(0)->getTensorAs(); 66 | REQUIRE(outputBuffer.data() == newOutputBuffer.data()); 67 | REQUIRE(outputBuffer.size() == newOutputBuffer.size()); 68 | } 69 | -------------------------------------------------------------------------------- /test/source/tflite_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "edgerunner/edgerunner.hpp" 9 | #include "edgerunner/model.hpp" 10 | #include "edgerunner/tensor.hpp" 11 | #include "utils.hpp" 12 | 13 | TEST_CASE("Tflite default runtime (CPU)", "[tflite][cpu]") { 14 | const std::string modelPath = "models/tflite/mobilenet_v3_small.tflite"; 15 | 16 | auto model = edge::createModel(modelPath); 17 | REQUIRE(model != nullptr); 18 | REQUIRE(std::string {"mobilenet_v3_small"} == model->name()); 19 | 20 | model->applyDelegate(edge::DELEGATE::CPU); 21 | REQUIRE(model->getDelegate() == edge::DELEGATE::CPU); 22 | 23 | REQUIRE(model->getPrecision() == edge::TensorType::FLOAT16); 24 | 25 | const auto inputs = model->getInputs(); 26 | const auto numInputs = model->getNumInputs(); 27 | REQUIRE(numInputs == 1); 28 | REQUIRE(numInputs == inputs.size()); 29 | 30 | const auto outputs = model->getOutputs(); 31 | const auto numOutputs = model->getNumOutputs(); 32 | REQUIRE(numOutputs == 1); 33 | REQUIRE(numOutputs == outputs.size()); 34 | 35 | auto input = model->getInput(0); 36 | REQUIRE(input->getName() == "image_tensor"); 37 | REQUIRE(input->getDimensions() == std::vector {1, 224, 224, 3}); 38 | REQUIRE(input->getType() == edge::TensorType::FLOAT32); 39 | REQUIRE(input.get() == inputs[0].get()); 40 | 41 | auto inputData = input->getTensorAs(); 42 | REQUIRE(inputData.size() == input->getSize()); 43 | 44 | auto badInput = model->getInput(1); 45 | REQUIRE(badInput == nullptr); 46 | 47 | auto output = model->getOutput(0); 48 | REQUIRE(output->getName() == "output_0"); 49 | REQUIRE(output->getDimensions() == std::vector {1, 1000}); 50 | REQUIRE(output->getType() == edge::TensorType::FLOAT32); 51 | REQUIRE(output.get() == outputs[0].get()); 52 | 53 | auto outputBuffer = output->getTensorAs(); 54 | REQUIRE(outputBuffer.size() == output->getSize()); 55 | 56 | auto badOutput = model->getOutput(1); 57 | REQUIRE(badOutput == nullptr); 58 | 59 | const auto executionStatus = model->execute(); 60 | REQUIRE(executionStatus == edge::STATUS::SUCCESS); 61 | 62 | BENCHMARK("execution") { 63 | return model->execute(); 64 | }; 65 | 66 | /* verify output buffer is persistent across execution */ 67 | const auto newOutputBuffer = model->getOutput(0)->getTensorAs(); 68 | REQUIRE(outputBuffer.data() == newOutputBuffer.data()); 69 | REQUIRE(outputBuffer.size() == newOutputBuffer.size()); 70 | } 71 | -------------------------------------------------------------------------------- /test/source/utils.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | constexpr float MseThreshold = 1.0; 4 | 5 | template 6 | auto meanSquaredError(const C1& input1, const C2& input2) -> T { 7 | return std::transform_reduce(input1.cbegin(), 8 | input1.cend(), 9 | input2.cbegin(), 10 | static_cast(0), 11 | std::plus<>(), 12 | [](auto val1, auto val2) { 13 | const auto error = val1 - val2; 14 | return error * error; 15 | }) 16 | / static_cast(input1.size()); 17 | } 18 | -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.1.2 2 | --------------------------------------------------------------------------------