├── .gitignore
├── .idea
├── compiler.xml
├── copyright
│ └── profiles_settings.xml
├── gradle.xml
├── misc.xml
├── modules.xml
├── runConfigurations.xml
└── vcs.xml
├── README.md
├── app
├── .gitignore
├── CMakeLists.txt
├── build.gradle
├── proguard-rules.pro
└── src
│ ├── androidTest
│ └── java
│ │ └── com
│ │ └── lanytek
│ │ └── deepsensev3
│ │ └── ExampleInstrumentedTest.java
│ ├── main
│ ├── AndroidManifest.xml
│ ├── assets
│ │ └── deepsense.cl
│ ├── cpp
│ │ ├── basic_functions.cpp
│ │ ├── classifier.cpp
│ │ ├── clio.cpp
│ │ ├── deepsense.cpp
│ │ ├── deepsense_lib.cpp
│ │ ├── include
│ │ │ ├── basic_functions.hpp
│ │ │ ├── classifier.hpp
│ │ │ ├── clio.hpp
│ │ │ ├── deepsense_internal_lib.hpp
│ │ │ ├── deepsense_lib.hpp
│ │ │ ├── layers
│ │ │ │ ├── conv_layer.hpp
│ │ │ │ ├── fully_connected.hpp
│ │ │ │ ├── lrn.hpp
│ │ │ │ ├── maxpool.hpp
│ │ │ │ └── softmax.hpp
│ │ │ ├── predefine.hpp
│ │ │ └── utilities.hpp
│ │ ├── layers
│ │ │ ├── conv_layer.cpp
│ │ │ ├── fully_connected.cpp
│ │ │ ├── lrn.cpp
│ │ │ ├── maxpool.cpp
│ │ │ └── softmax.cpp
│ │ └── utilities.cpp
│ ├── java
│ │ └── com
│ │ │ └── lanytek
│ │ │ └── deepsensev3
│ │ │ ├── MainActivity.java
│ │ │ └── Utilities.java
│ └── res
│ │ ├── layout
│ │ └── activity_main.xml
│ │ ├── mipmap-hdpi
│ │ └── ic_launcher.png
│ │ ├── mipmap-mdpi
│ │ └── ic_launcher.png
│ │ ├── mipmap-xhdpi
│ │ └── ic_launcher.png
│ │ ├── mipmap-xxhdpi
│ │ └── ic_launcher.png
│ │ ├── mipmap-xxxhdpi
│ │ └── ic_launcher.png
│ │ ├── values-w820dp
│ │ └── dimens.xml
│ │ └── values
│ │ ├── colors.xml
│ │ ├── dimens.xml
│ │ ├── strings.xml
│ │ └── styles.xml
│ └── test
│ └── java
│ └── com
│ └── lanytek
│ └── deepsensev3
│ └── ExampleUnitTest.java
├── build.gradle
├── distribution
└── opencl
│ ├── include
│ └── CL
│ │ ├── cl.h
│ │ ├── cl.hpp
│ │ ├── cl_ext.h
│ │ ├── cl_ext_qcom.h
│ │ ├── cl_gl.h
│ │ ├── cl_gl_ext.h
│ │ ├── cl_perf_monitor_qcom.h
│ │ ├── cl_platform.h
│ │ └── opencl.h
│ └── lib
│ └── armeabi-v7a
│ ├── Adreno-Android5
│ ├── libOpenCL.so
│ └── libllvm-qcom.so
│ ├── Adreno-Android6
│ ├── libOpenCL.so
│ └── libllvm-qcom.so
│ ├── libGLES_mali.so
│ ├── libOpenCL.so
│ └── libllvm-qcom.so
├── gradle.properties
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
└── settings.gradle
/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | .gradle
3 | /local.properties
4 | /.idea/workspace.xml
5 | /.idea/libraries
6 | .DS_Store
7 | /build
8 | /captures
9 | .externalNativeBuild
10 |
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/gradle.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
18 |
19 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/runConfigurations.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepSense
2 |
3 | Download the models below, extract and copy them onto mobile devices and set the link in DeepSense App to load them.
4 |
5 | VGG-F Link: https://drive.google.com/file/d/0B_GMfaURPvQDQk9sU3FHdU1sUzA/view?usp=sharing
6 |
7 | Yolo Tiny Link: https://drive.google.com/file/d/0B_GMfaURPvQDZVVFMnBXQUU3X2s/view?usp=sharing
8 |
9 | ## The app is configured to work on Samsung Galaxy S7 with Mali GPU, if you need to run it on Adreno-based devices
10 | - 1) copy the appropriate shared libraries (libllvm-qcom.so and libOpenCL.so) from distribution/opencl/lib/armeabi-v7a/Adreno-Android5 OR distribution/opencl/lib/armeabi-v7a/Adreno-Android6 into distribution/opencl/lib/armeabi-v7a
11 | - 2) comment out Mali-shared library in app/CMakeLists.txt and uncomment Adreno shared library
12 |
13 | ## To run the app
14 | - 1) Download and extract the model
15 | - 2) Put the whole model's directory onto device's storage
16 | - 3) Change the path in MainActivity.java
17 | - 4) Run :)
18 |
19 | Enjoy DeepSense
20 |
21 |
22 |
--------------------------------------------------------------------------------
/app/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 |
--------------------------------------------------------------------------------
/app/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Sets the minimum version of CMake required to build the native
2 | # library. You should either keep the default value or only pass a
3 | # value of 3.4.0 or lower.
4 |
5 | cmake_minimum_required(VERSION 3.4.1)
6 |
7 | # configure import libs
8 | set(distribution_DIR ${CMAKE_SOURCE_DIR}/../distribution)
9 | set(source_DIR ${CMAKE_SOURCE_DIR}/src/main/cpp)
10 |
11 | add_library(lib_opencl SHARED IMPORTED)
12 | set_target_properties( lib_opencl PROPERTIES IMPORTED_LOCATION ${distribution_DIR}/opencl/lib/${ANDROID_ABI}/libGLES_mali.so )
13 | #set_target_properties( lib_opencl PROPERTIES IMPORTED_LOCATION ${distribution_DIR}/opencl/lib/${ANDROID_ABI}/libllvm-qcom.so )
14 | #set_target_properties( lib_opencl PROPERTIES IMPORTED_LOCATION ${distribution_DIR}/opencl/lib/${ANDROID_ABI}/libOpenCL.so )
15 |
16 | # Creates and names a library, sets it as either STATIC
17 | # or SHARED, and provides the relative paths to its source code.
18 | # You can define multiple libraries, and CMake builds it for you.
19 | # Gradle automatically packages shared libraries with your APK.
20 |
21 | # build application's shared lib
22 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xlinker --no-warn-mismatch -O2 -mfpu=vfpv3-d16 -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -march=armv7-a -mfloat-abi=hard")
23 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xlinker --no-warn-mismatch -O2 -mfpu=vfpv3-d16 -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -march=armv7-a -mfloat-abi=hard -std=gnu++11")
24 | set(STLPORT_FORCE_REBUILD "true")
25 |
26 | add_library( # Sets the name of the library.
27 | deepsense
28 |
29 | # Sets the library as a shared library.
30 | SHARED
31 |
32 | # Provides a relative path to your source file(s).
33 | # Associated headers in the same location as their source
34 | # file are automatically included.
35 | src/main/cpp/deepsense.cpp
36 | ${source_DIR}/deepsense_lib.cpp
37 | ${source_DIR}/clio.cpp
38 | ${source_DIR}/basic_functions.cpp
39 | ${source_DIR}/utilities.cpp
40 | ${source_DIR}/classifier.cpp
41 | ${source_DIR}/layers/conv_layer.cpp
42 | ${source_DIR}/layers/maxpool.cpp
43 | ${source_DIR}/layers/fully_connected.cpp
44 | ${source_DIR}/layers/softmax.cpp
45 | ${source_DIR}/layers/lrn.cpp)
46 |
47 | # Searches for a specified prebuilt library and stores the path as a
48 | # variable. Because system libraries are included in the search path by
49 | # default, you only need to specify the name of the public NDK library
50 | # you want to add. CMake verifies that the library exists before
51 | # completing its build.
52 |
53 | target_include_directories(deepsense PRIVATE ${distribution_DIR}/opencl/include ${distribution_DIR}/clblast/include)
54 | include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include)
55 |
56 | find_library( # Sets the name of the path variable.
57 | log-lib
58 |
59 | # Specifies the name of the NDK library that
60 | # you want CMake to locate.
61 | log
62 | m_hard)
63 |
64 | # Specifies libraries CMake should link to your target library. You
65 | # can link multiple libraries, such as libraries you define in the
66 | # build script, prebuilt third-party libraries, or system libraries.
67 |
68 | target_link_libraries( # Specifies the target library.
69 | deepsense
70 |
71 | lib_opencl
72 |
73 | # Links the target library to the log library
74 | # included in the NDK.
75 | ${log-lib} )
76 |
--------------------------------------------------------------------------------
/app/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.application'
2 |
3 | android {
4 | compileSdkVersion 22
5 | buildToolsVersion "22.0.0"
6 | defaultConfig {
7 | applicationId "com.lanytek.deepsensev3"
8 | minSdkVersion 19
9 | targetSdkVersion 21
10 | versionCode 1
11 | versionName "1.0"
12 | testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
13 | externalNativeBuild {
14 | cmake {
15 | cppFlags "-std=c++11"
16 | }
17 | }
18 | ndk {
19 | // Specifies the ABI configurations of your native
20 | // libraries Gradle should build and package with your APK.
21 | abiFilters 'armeabi-v7a'
22 | }
23 | }
24 | buildTypes {
25 | release {
26 | minifyEnabled false
27 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
28 | }
29 | }
30 | sourceSets {
31 | main {
32 | // let gradle pack the shared library into apk
33 | jniLibs.srcDirs = ['../distribution/opencl/lib']
34 | }
35 | }
36 | externalNativeBuild {
37 | cmake {
38 | path "CMakeLists.txt"
39 | }
40 | }
41 | }
42 |
43 | dependencies {
44 | compile fileTree(dir: 'libs', include: ['*.jar'])
45 | androidTestCompile('com.android.support.test.espresso:espresso-core:2.2.2', {
46 | exclude group: 'com.android.support', module: 'support-annotations'
47 | })
48 | compile 'com.android.support:appcompat-v7:22+'
49 | testCompile 'junit:junit:4.12'
50 | compile 'com.squareup.picasso:picasso:2.5.2'
51 | }
52 |
--------------------------------------------------------------------------------
/app/proguard-rules.pro:
--------------------------------------------------------------------------------
1 | # Add project specific ProGuard rules here.
2 | # By default, the flags in this file are appended to flags specified
3 | # in /home/JC1DA/Android/Sdk/tools/proguard/proguard-android.txt
4 | # You can edit the include path and order by changing the proguardFiles
5 | # directive in build.gradle.
6 | #
7 | # For more details, see
8 | # http://developer.android.com/guide/developing/tools/proguard.html
9 |
10 | # Add any project specific keep options here:
11 |
12 | # If your project uses WebView with JS, uncomment the following
13 | # and specify the fully qualified class name to the JavaScript interface
14 | # class:
15 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
16 | # public *;
17 | #}
18 |
--------------------------------------------------------------------------------
/app/src/androidTest/java/com/lanytek/deepsensev3/ExampleInstrumentedTest.java:
--------------------------------------------------------------------------------
1 | package com.lanytek.deepsensev3;
2 |
3 | import android.content.Context;
4 | import android.support.test.InstrumentationRegistry;
5 | import android.support.test.runner.AndroidJUnit4;
6 |
7 | import org.junit.Test;
8 | import org.junit.runner.RunWith;
9 |
10 | import static org.junit.Assert.*;
11 |
12 | /**
13 | * Instrumentation test, which will execute on an Android device.
14 | *
15 | * @see Testing documentation
16 | */
17 | @RunWith(AndroidJUnit4.class)
18 | public class ExampleInstrumentedTest {
19 | @Test
20 | public void useAppContext() throws Exception {
21 | // Context of the app under test.
22 | Context appContext = InstrumentationRegistry.getTargetContext();
23 |
24 | assertEquals("com.lanytek.deepsensev3", appContext.getPackageName());
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/app/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/app/src/main/assets/deepsense.cl:
--------------------------------------------------------------------------------
1 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable
2 |
3 | static inline int getIndexFrom3D(int d1, int d2, int d3, int i1, int i2, int i3) {
4 | return i1 * (d2 * d3) + i2 * d3 + i3;
5 | }
6 |
7 | static inline int getIndexFrom4D(int d1, int d2, int d3, int d4, int i1, int i2, int i3, int i4) {
8 | return i1 * (d2 * d3 * d4) + i2 * (d3 * d4) + i3 * d4 + i4;
9 | }
10 |
11 | kernel void convertFloatToHalf(
12 | global const float *input,
13 | global half *output) {
14 | int idx = get_global_id(0);
15 | vstore_half(input[idx], 0, &output[idx]);
16 | }
17 |
18 | kernel void convertHalfToFloat(
19 | global const half *input,
20 | global float *output) {
21 | int idx = get_global_id(0);
22 | //output[idx] = convert_float(input[idx]);
23 | output[idx] = (float)input[idx];
24 | }
25 |
26 | __kernel void conv_kernel_half(
27 | __global const half *input,
28 | const int input_w,
29 | const int input_h,
30 | const int input_c,
31 | __global const half *conv_weight,
32 | __global const half *bias,
33 | const int conv_w,
34 | const int conv_h,
35 | const int conv_c,
36 | const int conv_n,
37 | const int stride_w,
38 | const int stride_h,
39 | const int pad_left,
40 | const int pad_right,
41 | const int pad_top,
42 | const int pad_bot,
43 | __global half *output,
44 | const int output_w,
45 | const int output_h,
46 | const int output_c) {
47 | int x,y,z,n,i,j;
48 |
49 | int threadId_x = get_global_id(0);
50 | int threadId_y = get_global_id(1);
51 | int threadId_z = get_global_id(2);
52 |
53 | int useBase3 = (input_c % 3 == 0) ? 1 : 0;
54 |
55 | for(n = threadId_z ; n < output_c ; n += get_global_size(2)) {
56 | for(j = threadId_y ; j < output_h ; j += get_global_size(1)) {
57 | for(i = threadId_x ; i < output_w ; i += get_global_size(0)) {
58 | half result = 0.0f;
59 | for(y = 0 ; y < conv_h ; y++) {
60 | int global_input_y = j * stride_h - pad_top + y;
61 | for(x = 0 ; x < conv_w ; x++) {
62 | int global_input_x = i * stride_w - pad_left + x;
63 | if(global_input_x >= 0 && global_input_y >= 0 && global_input_x < input_w && global_input_y < input_h) {
64 | if(useBase3 == 1) {
65 | for(z = 0 ; z < conv_c ; z += 3) {
66 | int global_filter_index = getIndexFrom4D(conv_n, conv_h, conv_w, conv_c, n, y, x, z);
67 | int global_input_index = getIndexFrom3D(input_h, input_w, input_c, global_input_y, global_input_x, z);
68 |
69 | half2 tmp_input = vload2(0, &input[global_input_index]);
70 | half2 tmp_weight = vload2(0, &conv_weight[global_filter_index]);
71 | result += dot(tmp_input, tmp_weight);
72 |
73 | result += input[global_input_index + 2] * conv_weight[global_filter_index + 2];
74 | }
75 | } else {
76 | for(z = 0 ; z < conv_c ; z += 16) {
77 | int global_filter_index = getIndexFrom4D(conv_n, conv_h, conv_w, conv_c, n, y, x, z);
78 | int global_input_index = getIndexFrom3D(input_h, input_w, input_c, global_input_y, global_input_x, z);
79 |
80 | half16 tmp_input = vload16(0, &input[global_input_index]);
81 | half16 tmp_weight = vload16(0, &conv_weight[global_filter_index]);
82 |
83 | result += dot(tmp_input.s0123, tmp_weight.s0123);
84 | result += dot(tmp_input.s4567, tmp_weight.s4567);
85 | result += dot(tmp_input.s89ab, tmp_weight.s89ab);
86 | result += dot(tmp_input.scdef, tmp_weight.scdef);
87 | }
88 | }
89 | }
90 | }
91 | }
92 |
93 | result += bias[n];
94 |
95 | output[getIndexFrom3D(output_h, output_w, output_c, j, i, n)] = result;
96 | }
97 | }
98 | }
99 | }
100 |
101 | __kernel void conv_kernel_float(
102 | __global const float *input,
103 | const int input_w,
104 | const int input_h,
105 | const int input_c,
106 | __global const float *conv_weight,
107 | __global const float *bias,
108 | const int conv_w,
109 | const int conv_h,
110 | const int conv_c,
111 | const int conv_n,
112 | const int stride_w,
113 | const int stride_h,
114 | const int pad_left,
115 | const int pad_right,
116 | const int pad_top,
117 | const int pad_bot,
118 | __global float *output,
119 | const int output_w,
120 | const int output_h,
121 | const int output_c) {
122 | int x,y,z,n,i,j;
123 |
124 | int threadId_x = get_global_id(0);
125 | int threadId_y = get_global_id(1);
126 | int threadId_z = get_global_id(2);
127 |
128 | int useBase3 = (input_c % 3 == 0) ? 1 : 0;
129 |
130 | for(n = threadId_z ; n < output_c ; n += get_global_size(2)) {
131 | for(j = threadId_y ; j < output_h ; j += get_global_size(1)) {
132 | for(i = threadId_x ; i < output_w ; i += get_global_size(0)) {
133 | float result = 0.0f;
134 | for(y = 0 ; y < conv_h ; y++) {
135 | int global_input_y = j * stride_h - pad_top + y;
136 | for(x = 0 ; x < conv_w ; x++) {
137 | int global_input_x = i * stride_w - pad_left + x;
138 | if(global_input_x >= 0 && global_input_y >= 0 && global_input_x < input_w && global_input_y < input_h) {
139 | if(useBase3 == 1) {
140 | for(z = 0 ; z < conv_c ; z += 3) {
141 | int global_filter_index = getIndexFrom4D(conv_n, conv_h, conv_w, conv_c, n, y, x, z);
142 | int global_input_index = getIndexFrom3D(input_h, input_w, input_c, global_input_y, global_input_x, z);
143 |
144 | float2 tmp_input = vload2(0, &input[global_input_index]);
145 | float2 tmp_weight = vload2(0, &conv_weight[global_filter_index]);
146 | result += dot(tmp_input, tmp_weight);
147 |
148 | result += input[global_input_index + 2] * conv_weight[global_filter_index + 2];
149 | }
150 | } else {
151 | for(z = 0 ; z < conv_c ; z += 16) {
152 | int global_filter_index = getIndexFrom4D(conv_n, conv_h, conv_w, conv_c, n, y, x, z);
153 | int global_input_index = getIndexFrom3D(input_h, input_w, input_c, global_input_y, global_input_x, z);
154 |
155 | float16 tmp_input = vload16(0, &input[global_input_index]);
156 | float16 tmp_weight = vload16(0, &conv_weight[global_filter_index]);
157 |
158 | result += dot(tmp_input.s0123, tmp_weight.s0123);
159 | result += dot(tmp_input.s4567, tmp_weight.s4567);
160 | result += dot(tmp_input.s89ab, tmp_weight.s89ab);
161 | result += dot(tmp_input.scdef, tmp_weight.scdef);
162 | }
163 | }
164 | }
165 | }
166 | }
167 |
168 | result += bias[n];
169 |
170 | output[getIndexFrom3D(output_h, output_w, output_c, j, i, n)] = result;
171 | }
172 | }
173 | }
174 | }
175 |
176 | __kernel void conv_fc_kernel_half(
177 | __global const half *input,
178 | const int input_w,
179 | const int input_h,
180 | const int input_c,
181 | __global const half *conv_weight,
182 | __global const half *bias,
183 | const int conv_w,
184 | const int conv_h,
185 | const int conv_c,
186 | const int conv_n,
187 | const int stride_w,
188 | const int stride_h,
189 | const int pad_left,
190 | const int pad_right,
191 | const int pad_top,
192 | const int pad_bot,
193 | __global half *output,
194 | const int output_w,
195 | const int output_h,
196 | const int output_c
197 | ) {
198 | for(int threadId_x = get_global_id(0) ; threadId_x < output_c ; threadId_x += get_global_size(0)) {
199 | int i;
200 | int weight_start_index = getIndexFrom4D(conv_n, conv_h, conv_w, conv_c, threadId_x, 0, 0, 0);
201 | float result = 0.0f;
202 |
203 | int remaining = conv_w * conv_h * conv_c;
204 | i = 0;
205 | while(remaining > 0 && remaining / 16 > 0) {
206 | half16 tmp_input = vload16(0, &input[i]);
207 | half16 tmp_weight = vload16(0, &conv_weight[weight_start_index + i]);
208 |
209 | result += dot(tmp_input.s0123, tmp_weight.s0123);
210 | result += dot(tmp_input.s4567, tmp_weight.s4567);
211 | result += dot(tmp_input.s89ab, tmp_weight.s89ab);
212 | result += dot(tmp_input.scdef, tmp_weight.scdef);
213 |
214 | remaining -= 16;
215 | i += 16;
216 | }
217 |
218 | while(remaining > 0 && remaining / 4 > 0) {
219 | half4 tmp_input = vload4(0, &input[i]);
220 | half4 tmp_weight = vload4(0, &conv_weight[weight_start_index + i]);
221 |
222 | result += dot(tmp_input, tmp_weight);
223 |
224 | remaining -= 4;
225 | i += 4;
226 | }
227 |
228 | while(remaining > 0) {
229 | result += input[i] * conv_weight[weight_start_index + i];
230 |
231 | remaining--;
232 | i++;
233 | }
234 |
235 | result += bias[threadId_x];
236 |
237 | output[threadId_x] = result;
238 | //vstore_half(result, 0, &output[threadId_x]);
239 | }
240 | }
241 |
242 | __kernel void conv_fc_kernel_float(
243 | __global const float *input,
244 | const int input_w,
245 | const int input_h,
246 | const int input_c,
247 | __global const float *conv_weight,
248 | __global const float *bias,
249 | const int conv_w,
250 | const int conv_h,
251 | const int conv_c,
252 | const int conv_n,
253 | const int stride_w,
254 | const int stride_h,
255 | const int pad_left,
256 | const int pad_right,
257 | const int pad_top,
258 | const int pad_bot,
259 | __global float *output,
260 | const int output_w,
261 | const int output_h,
262 | const int output_c
263 | ) {
264 | for(int threadId_x = get_global_id(0) ; threadId_x < output_c ; threadId_x += get_global_size(0)) {
265 | int weight_start_index = getIndexFrom4D(conv_n, conv_h, conv_w, conv_c, threadId_x, 0, 0, 0);
266 | float result = 0.0f;
267 |
268 | int remaining = conv_w * conv_h * conv_c;
269 | int i = 0;
270 | while(remaining > 0 && remaining / 16 > 0) {
271 | float16 tmp_input = vload16(0, &input[i]);
272 | float16 tmp_weight = vload16(0, &conv_weight[weight_start_index + i]);
273 |
274 | result += dot(tmp_input.s0123, tmp_weight.s0123);
275 | result += dot(tmp_input.s4567, tmp_weight.s4567);
276 | result += dot(tmp_input.s89ab, tmp_weight.s89ab);
277 | result += dot(tmp_input.scdef, tmp_weight.scdef);
278 |
279 | remaining -= 16;
280 | i += 16;
281 | }
282 |
283 | while(remaining > 0 && remaining / 4 > 0) {
284 | float4 tmp_input = vload4(0, &input[i]);
285 | float4 tmp_weight = vload4(0, &conv_weight[weight_start_index + i]);
286 |
287 | result += dot(tmp_input, tmp_weight);
288 |
289 | remaining -= 4;
290 | i += 4;
291 | }
292 |
293 | while(remaining > 0) {
294 | result += input[i] * conv_weight[weight_start_index + i];
295 |
296 | remaining--;
297 | i++;
298 | }
299 |
300 | result += bias[threadId_x];
301 |
302 | output[threadId_x] = result;
303 | }
304 | }
305 |
306 | kernel void fully_connected_kernel_half(
307 | global const half *input_frame,
308 | const int input_w,
309 | const int input_h,
310 | const int input_d,
311 | global const half *layer_W,
312 | global const half *layer_bias,
313 | global half *output_frame,
314 | const int output_size
315 | ) {
316 | int thrIdx = get_global_id(0);
317 | int maxThreads = get_global_size(0);
318 |
319 | for(int n = thrIdx; n < output_size ; n += maxThreads) {
320 | float result = 0.0f;
321 |
322 | int input_idx = 0;
323 | int filter_idx = n * input_h * input_w * input_d;
324 |
325 | int idx_remaining = input_h * input_w * input_d;
326 |
327 | while(idx_remaining >= 4) {
328 | half4 tmp1 = vload4(0, &input_frame[input_idx]);
329 | half4 tmp2 = vload4(0, &layer_W[filter_idx]);
330 | result += dot(tmp1,tmp2);
331 |
332 | input_idx += 4;
333 | filter_idx += 4;
334 | idx_remaining -= 4;
335 | }
336 |
337 | while(idx_remaining >= 2) {
338 | half2 tmp1 = vload2(0, &input_frame[input_idx]);
339 | half2 tmp2 = vload2(0, &layer_W[filter_idx]);
340 | result += dot(tmp1,tmp2);
341 |
342 | input_idx += 2;
343 | filter_idx += 2;
344 | idx_remaining -= 2;
345 | }
346 |
347 | while(idx_remaining > 0) {
348 | half tmp1 = input_frame[input_idx];
349 | half tmp2 = layer_W[filter_idx];
350 | result += tmp1 * tmp2;
351 |
352 | idx_remaining -= 1;
353 | }
354 |
355 | result += layer_bias[n];
356 |
357 | output_frame[n] = result;
358 | }
359 | }
360 |
361 | kernel void fully_connected_kernel_float(
362 | global const float *input_frame,
363 | const int input_w,
364 | const int input_h,
365 | const int input_d,
366 | global const float *layer_W,
367 | global const float *layer_bias,
368 | global float *output_frame,
369 | const int output_size
370 | ) {
371 | int thrIdx = get_global_id(0);
372 | int maxThreads = get_global_size(0);
373 |
374 | for(int n = thrIdx; n < output_size ; n += maxThreads) {
375 | float result = 0.0f;
376 |
377 | int input_idx = 0;
378 | int filter_idx = n * input_h * input_w * input_d;
379 |
380 | int idx_remaining = input_h * input_w * input_d;
381 |
382 | while(idx_remaining >= 4) {
383 | float4 tmp1 = vload4(0, &input_frame[input_idx]);
384 | float4 tmp2 = vload4(0, &layer_W[filter_idx]);
385 | result += dot(tmp1,tmp2);
386 |
387 | input_idx += 4;
388 | filter_idx += 4;
389 | idx_remaining -= 4;
390 | }
391 |
392 | while(idx_remaining > 0) {
393 | float tmp1 = input_frame[input_idx];
394 | float tmp2 = layer_W[filter_idx];
395 | result += tmp1 * tmp2;
396 |
397 | idx_remaining -= 1;
398 | }
399 |
400 | result += layer_bias[n];
401 |
402 | output_frame[n] = result;
403 | }
404 | }
405 |
406 | __kernel void maxpool_kernel_half(
407 | __global const half *input_frame,
408 | const int input_w,
409 | const int input_h,
410 | const int input_d,
411 | const int size,
412 | const int stride_1,
413 | const int stride_2,
414 | const int pad_1,
415 | const int pad_2,
416 | const int pad_3,
417 | const int pad_4,
418 | __global half *output_frame,
419 | const int output_w,
420 | const int output_h,
421 | const int output_d) {
422 |
423 | int thrId_i = get_global_id(0);
424 | int thrId_j = get_global_id(1);
425 | int thrId_k = get_global_id(2);
426 |
427 | int max_i = get_global_size(0);
428 | int max_j = get_global_size(1);
429 | int max_k = get_global_size(2);
430 |
431 | int i,j,k;
432 | int x,y;
433 |
434 | for(i = thrId_i ; i < output_w ; i += max_i) {
435 | for(j = thrId_j ; j < output_h ; j += max_j) {
436 | for(k = thrId_k ; k < output_d ; k += max_k) {
437 | half max = -9999.9f;
438 | for(x = 0 ; x < size ; x++) {
439 | for(y = 0 ; y < size ; y++) {
440 | int x_ = i * stride_1 + x - pad_1;
441 | int y_ = j * stride_2 + y - pad_3;
442 | int valid = (x_ >= 0 && x_ < input_w && y_ >= 0 && y_ < input_h);
443 | //float val = (valid != 0) ? input_frame[getIndexFrom3D(input_h, input_w, input_d, y_, x_, k)] : -999999.9f;
444 | half val = (valid != 0) ? input_frame[getIndexFrom3D(input_h, input_w, input_d, y_, x_, k)] : 0.0f;
445 | max = (val > max) ? val : max;
446 | }
447 | }
448 | output_frame[getIndexFrom3D(output_h, output_w, output_d, j, i, k)] = max;
449 | //vstore_half(max, 0, &output_frame[getIndexFrom3D(output_h, output_w, output_d, j, i, k)]);
450 | }
451 | }
452 | }
453 | }
454 |
455 | __kernel void maxpool_kernel_float(
456 | __global const float *input_frame,
457 | const int input_w,
458 | const int input_h,
459 | const int input_d,
460 | const int size,
461 | const int stride_1,
462 | const int stride_2,
463 | const int pad_1,
464 | const int pad_2,
465 | const int pad_3,
466 | const int pad_4,
467 | __global float *output_frame,
468 | const int output_w,
469 | const int output_h,
470 | const int output_c) {
471 |
472 | int thrId_i = get_global_id(0);
473 | int thrId_j = get_global_id(1);
474 | int thrId_k = get_global_id(2);
475 |
476 | int max_i = get_global_size(0);
477 | int max_j = get_global_size(1);
478 | int max_k = get_global_size(2);
479 |
480 | int i,j,k;
481 | int x,y;
482 |
483 | for(i = thrId_i ; i < output_w ; i += max_i) {
484 | for(j = thrId_j ; j < output_h ; j += max_j) {
485 | for(k = thrId_k ; k < output_c ; k += max_k) {
486 | float max = -9999.9f;
487 | for(x = 0 ; x < size ; x++) {
488 | for(y = 0 ; y < size ; y++) {
489 | int x_ = i * stride_1 + x - pad_1;
490 | int y_ = j * stride_2 + y - pad_3;
491 | int valid = (x_ >= 0 && x_ < input_w && y_ >= 0 && y_ < input_h);
492 | //float val = (valid != 0) ? input_frame[getIndexFrom3D(input_h, input_w, input_d, y_, x_, k)] : -999999.9f;
493 | float val = (valid != 0) ? input_frame[getIndexFrom3D(input_h, input_w, input_d, y_, x_, k)] : 0.0f;
494 | max = (val > max) ? val : max;
495 | }
496 | }
497 | output_frame[getIndexFrom3D(output_h, output_w, output_c, j, i, k)] = max;
498 | }
499 | }
500 | }
501 | }
502 |
503 | __kernel void cross_channels_lrn_kernel_half(
504 | __global const half *in, //[h x w x c]
505 | const int channels,
506 | const int height,
507 | const int width,
508 | const int k,
509 | const int size,
510 | const float alpha_over_size,
511 | const float beta,
512 | __global half *out) {
513 |
514 | half beta_half = 0.0f;
515 | vstore_half(beta, 0, &beta_half);
516 |
517 | for(int w = get_global_id(0) ; w < width ; w += get_global_size(0)) {
518 | for(int h = get_global_id(1) ; h < height ; h += get_global_size(1)) {
519 | int offset = (h * width + w) * channels;
520 | int head = 0;
521 | int pre_pad = (size - 1) / 2;
522 | int post_pad = size - pre_pad - 1;
523 | half accum_scale = 0;
524 |
525 | while (head < post_pad) {
526 | half data = in[offset + head];
527 | accum_scale += data * data;
528 | head++;
529 | }
530 |
531 | while (head < size) {
532 | half data = in[offset + head];
533 | accum_scale += data * data;
534 | half scale = k + accum_scale * alpha_over_size;
535 | out[offset + head - post_pad] = in[offset + head - post_pad] * pow(scale, -beta_half);
536 | head++;
537 | }
538 |
539 | while (head < channels) {
540 | half data = in[offset + head];
541 | accum_scale += data * data;
542 | data = in[offset + head - size];
543 | accum_scale -= data * data;
544 | half scale = k + accum_scale * alpha_over_size;
545 | out[offset + head - post_pad] = in[offset + head - post_pad] * pow(scale, -beta_half);
546 | head++;
547 | }
548 |
549 | while (head < channels + post_pad) {
550 | half data = in[offset + head - size];
551 | accum_scale -= data * data;
552 | half scale = k + accum_scale * alpha_over_size;
553 | out[offset + head - post_pad] = in[offset + head - post_pad] * pow(scale, -beta_half);
554 | head++;
555 | }
556 | }
557 | }
558 | }
559 |
560 | __kernel void cross_channels_lrn_kernel_float(
561 | __global const float *input, //[h x w x c]
562 | const int channels,
563 | const int height,
564 | const int width,
565 | const int k,
566 | const int size,
567 | const float alpha_over_size,
568 | const float beta,
569 | __global float *output) {
570 |
571 | for(int w = get_global_id(0) ; w < width ; w += get_global_size(0)) {
572 | for(int h = get_global_id(1) ; h < height ; h += get_global_size(1)) {
573 | int offset = getIndexFrom3D(height, width, channels, h, w, 0);
574 | int head = 0;
575 | int pre_pad = (size - 1) / 2;
576 | int post_pad = size - pre_pad - 1;
577 | float accum_scale = 0;
578 |
579 | const __global float *in = input + offset;
580 | __global float *out = output + offset;
581 |
582 | while (head < post_pad) {
583 | float data = in[head];
584 | accum_scale += data * data;
585 | head++;
586 | }
587 |
588 | while (head < size) {
589 | float data = in[head];
590 | accum_scale += data * data;
591 | float scale = k + accum_scale * alpha_over_size;
592 | out[head - post_pad] = in[head - post_pad] * pow(scale, -beta);
593 | head++;
594 | }
595 |
596 | while (head < channels) {
597 | float data = in[head];
598 | accum_scale += data * data;
599 | data = in[head - size];
600 | accum_scale -= data * data;
601 | float scale = k + accum_scale * alpha_over_size;
602 | out[head - post_pad] = in[head - post_pad] * pow(scale, -beta);
603 | head++;
604 | }
605 |
606 | while (head < channels + post_pad) {
607 | float data = in[head - size];
608 | accum_scale -= data * data;
609 | float scale = k + accum_scale * alpha_over_size;
610 | out[ head - post_pad] = in[head - post_pad] * pow(scale, -beta);
611 | head++;
612 | }
613 | }
614 | }
615 | }
616 |
617 | __kernel void activation_kernel_half(
618 | __global half *data,
619 | const int activation) {
620 | half result = data[get_global_id(0)];
621 |
622 | switch(activation) {
623 | case 0:
624 | //no activation
625 | break;
626 | case 1:
627 | //RAMP
628 | result = result * (result > 0) + 0.1 * result;
629 | break;
630 | case 2:
631 | //LOGISTIC
632 | result = 1.0 / (1.0 + exp(-result));
633 | break;
634 | case 3:
635 | //LEAKY
636 | result = (result > 0) ? result : 0.1 * result;
637 | break;
638 | case 4:
639 | //LINEAR
640 | break;
641 | case 5:
642 | //RELU
643 | result = (result > 0) ? result : 0.0f;
644 | break;
645 | }
646 |
647 | data[get_global_id(0)] = result;
648 | }
649 |
650 | __kernel void activation_kernel_float(
651 | __global float *data,
652 | const int activation) {
653 | float result = data[get_global_id(0)];
654 |
655 | switch(activation) {
656 | case 0:
657 | //no activation
658 | break;
659 | case 1:
660 | //RAMP
661 | result = result * (result > 0) + 0.1 * result;
662 | break;
663 | case 2:
664 | //LOGISTIC
665 | result = 1.0 / (1.0 + exp(-result));
666 | break;
667 | case 3:
668 | //LEAKY
669 | result = (result > 0) ? result : 0.1 * result;
670 | break;
671 | case 4:
672 | //LINEAR
673 | break;
674 | case 5:
675 | //RELU
676 | result = (result > 0) ? result : 0.0f;
677 | break;
678 | }
679 |
680 | data[get_global_id(0)] = result;
681 | }
--------------------------------------------------------------------------------
/app/src/main/cpp/basic_functions.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | timestamp_t get_timestamp () {
11 | struct timeval now;
12 | gettimeofday (&now, NULL);
13 | return now.tv_usec + (timestamp_t)now.tv_sec * 1000000;
14 | }
15 |
16 | int getIndexFrom4D(int d1, int d2, int d3, int d4, int i1, int i2, int i3, int i4) {
17 | return i1 * (d2 * d3 * d4) + i2 * (d3 * d4) + i3 * d4 + i4;
18 | }
19 |
20 | float getDataFrom4D(float *data, int d1, int d2, int d3, int d4, int i1, int i2, int i3, int i4) {
21 | int index = i1 * (d2 * d3 * d4) + i2 * (d3 * d4) + i3 * d4 + i4;
22 | return data[index];
23 | }
24 |
25 | int getIndexFrom3D(int d1, int d2, int d3, int i1, int i2, int i3) {
26 | return i1 * (d2 * d3) + i2 * d3 + i3;
27 | }
28 |
29 | float getDataFrom3D(float *data, int d1, int d2, int d3, int i1, int i2, int i3) {
30 | int index = i1 * (d2 * d3) + i2 * d3 + i3;
31 | return data[index];
32 | }
33 |
34 | cnn_frame *activate_RAMP(cnn_frame *frame) {
35 | int i;
36 | for(i = 0 ; i < frame->c * frame->h * frame->w ; i++) {
37 | float x = frame->data[i];
38 | frame->data[i] = x * (x > 0) + 0.1 * x;
39 | }
40 | return frame;
41 | }
42 |
43 | cnn_frame *activate_LOGISTIC(cnn_frame *frame) {
44 | int i;
45 | for(i = 0 ; i < frame->c * frame->h * frame->w ; i++) {
46 | float x = frame->data[i];
47 | frame->data[i] = 1./(1. + exp(-x));
48 | }
49 | return frame;
50 | }
51 |
52 | cnn_frame *activate_RELU(cnn_frame *frame) {
53 | int i;
54 | for(i = 0 ; i < frame->c * frame->h * frame->w ; i++) {
55 | float x = frame->data[i];
56 | frame->data[i] =(x > 0) ? x : 0;
57 | }
58 | return frame;
59 | }
60 |
61 | cnn_frame *activate_LEAKY(cnn_frame *frame) {
62 | int i;
63 | for(i = 0 ; i < frame->c * frame->h * frame->w ; i++) {
64 | float x = frame->data[i];
65 | frame->data[i] =(x > 0) ? x : 0.1 * x;
66 | }
67 | return frame;
68 | }
69 |
70 | cnn_frame *doFeedForward_Activation(cnn_frame *frame, int activation) {
71 | if(activation == NO_ACTIVATION)
72 | return frame;
73 |
74 | if(!frame->useGPU) {
75 | switch(activation) {
76 | case LOGISTIC:
77 | activate_LOGISTIC(frame);
78 | break;
79 | case RAMP:
80 | activate_RAMP(frame);
81 | break;
82 | case LEAKY:
83 | activate_LEAKY(frame);
84 | break;
85 | case RELU:
86 | activate_RELU(frame);
87 | break;
88 | }
89 | } else {
90 | OpenCLObjects *openCLObjects = getOpenClObject();
91 | cl_int err = CL_SUCCESS;
92 | int i = 0;
93 |
94 | cl_kernel kernel = (frame->useHalf) ? openCLObjects->activation_kernel.kernel : openCLObjects->activation_kernel_float.kernel;
95 | err = clSetKernelArg(kernel, i++, sizeof(cl_mem), &frame->cl_data);
96 | err |= clSetKernelArg(kernel, i++, sizeof(int), &activation);
97 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
98 |
99 | size_t globalSize[1] = {(size_t)(frame->w * frame->h * frame->c)};
100 |
101 | err = clEnqueueNDRangeKernel(
102 | openCLObjects->queue,
103 | kernel,
104 | 1,
105 | 0,
106 | globalSize,
107 | 0,
108 | 0, 0, 0
109 | );
110 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
111 |
112 | err |= clFinish(openCLObjects->queue);
113 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
114 | }
115 |
116 | return frame;
117 | }
118 |
119 | cnn_frame *frame_init(int w, int h, int c) {
120 | cnn_frame *frame = (cnn_frame *)calloc(1, sizeof(cnn_frame));
121 | frame->w = w;
122 | frame->h = h;
123 | frame->c = c;
124 | frame->data = (float *)calloc(w * h * c, sizeof(float));
125 | frame->useGPU = 0;
126 | frame->useHalf = 0;
127 | return frame;
128 | }
129 |
130 | cnn_frame *frame_init_gpu(int w, int h, int c) {
131 | cnn_frame *frame = (cnn_frame *)calloc(1, sizeof(cnn_frame));
132 | frame->w = w;
133 | frame->h = h;
134 | frame->c = c;
135 | frame->useGPU = 1;
136 | frame->useHalf = 0;
137 |
138 | cl_int err;
139 | OpenCLObjects *openCLObjects = getOpenClObject();
140 |
141 | frame->cl_data = clCreateBuffer(
142 | openCLObjects->context,
143 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
144 | frame->w * frame->h * frame->c * sizeof(float), //size in bytes
145 | NULL,//buffer of data
146 | &err);
147 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
148 |
149 | if(err == CL_SUCCESS)
150 | return frame;
151 | else {
152 | free(frame);
153 | return NULL;
154 | }
155 | }
156 |
157 | cnn_frame *frame_init_gpu_half(int w, int h, int c) {
158 | cnn_frame *frame = (cnn_frame *)calloc(1, sizeof(cnn_frame));
159 | frame->w = w;
160 | frame->h = h;
161 | frame->c = c;
162 | frame->useGPU = 1;
163 | frame->useHalf = 1;
164 |
165 | cl_int err;
166 | OpenCLObjects *openCLObjects = getOpenClObject();
167 |
168 | frame->cl_data = clCreateBuffer(
169 | openCLObjects->context,
170 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
171 | frame->w * frame->h * frame->c * sizeof(cl_half), //size in bytes
172 | NULL,//buffer of data
173 | &err);
174 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
175 |
176 | if(err == CL_SUCCESS)
177 | return frame;
178 | else {
179 | free(frame);
180 | return NULL;
181 | }
182 | }
183 |
184 | cnn_frame * frame_clone(cnn_frame *src) {
185 | if(!src->useGPU) {
186 | cnn_frame *frame = frame_init(src->w, src->h, src->c);
187 | memcpy(frame->data, src->data, frame->w * frame->h * frame->c * sizeof(float));
188 | return frame;
189 | } else {
190 | cl_int err = CL_SUCCESS;
191 | cnn_frame *frame = NULL;
192 | if(src->useHalf == 0)
193 | frame = frame_init_gpu(src->w, src->h, src->c);
194 | else
195 | frame = frame_init_gpu_half(src->w, src->h, src->c);
196 |
197 | if(frame == NULL)
198 | return NULL;
199 |
200 | int mapped_size = src->w * src->h * src->c * sizeof(float);
201 | if(src->useHalf == 1)
202 | mapped_size = src->w * src->h * src->c * sizeof(cl_half);
203 |
204 | OpenCLObjects *openCLObjects = getOpenClObject();
205 | float *buf_src = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
206 | src->cl_data, \
207 | CL_TRUE, CL_MAP_READ, \
208 | 0, \
209 | mapped_size, \
210 | 0, NULL, NULL, &err);
211 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
212 |
213 | float *buf_dst = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
214 | frame->cl_data, \
215 | CL_TRUE, CL_MAP_WRITE, \
216 | 0, \
217 | mapped_size, \
218 | 0, NULL, NULL, &err);
219 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
220 |
221 | memcpy((void*)buf_dst, (void*)buf_src, mapped_size);
222 |
223 | clEnqueueUnmapMemObject(openCLObjects->queue, \
224 | src->cl_data, \
225 | buf_src, \
226 | 0, NULL, NULL);
227 |
228 | clEnqueueUnmapMemObject(openCLObjects->queue, \
229 | frame->cl_data, \
230 | buf_dst, \
231 | 0, NULL, NULL);
232 | return frame;
233 | }
234 | }
235 |
236 | cnn_frame* frame_convert_to_gpu_float(cnn_frame *frame) {
237 | if(frame->useGPU && !frame->useHalf)
238 | return frame;
239 |
240 | OpenCLObjects *openCLObjects = getOpenClObject();
241 | cnn_frame *output = frame_init_gpu(frame->w, frame->h, frame->c);
242 | int err = CL_SUCCESS;
243 |
244 | if(!frame->useGPU) {
245 | //CPU-mode
246 | float *buf_dest = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
247 | output->cl_data, \
248 | CL_TRUE, CL_MAP_WRITE, \
249 | 0, \
250 | output->w * output->h * output->c * sizeof(cl_float), \
251 | 0, NULL, NULL, &err);
252 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
253 |
254 | memcpy((void *)buf_dest, frame->data, output->w * output->h * output->c * sizeof(cl_float));
255 |
256 | clEnqueueUnmapMemObject(openCLObjects->queue, \
257 | output->cl_data, \
258 | buf_dest, \
259 | 0, NULL, NULL);
260 | } else {
261 | //GPU-half-mode
262 | cl_kernel kernel = openCLObjects->convert_half_to_float_kernel.kernel;
263 | err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &frame->cl_data);
264 | err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output->cl_data);
265 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
266 |
267 | size_t convertSize[1] = {(size_t) output->w * output->h * output->c};
268 | err = clEnqueueNDRangeKernel(
269 | openCLObjects->queue,
270 | kernel,
271 | 1,
272 | 0,
273 | convertSize,
274 | 0,
275 | 0, 0, 0
276 | );
277 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
278 |
279 | err = clFinish(openCLObjects->queue);
280 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
281 | }
282 |
283 | frame_free(frame);
284 |
285 | //test
286 | {
287 | float *buf_dest = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
288 | output->cl_data, \
289 | CL_TRUE, CL_MAP_READ, \
290 | 0, \
291 | output->w * output->h * output->c * sizeof(cl_float), \
292 | 0, NULL, NULL, &err);
293 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
294 |
295 | clEnqueueUnmapMemObject(openCLObjects->queue, \
296 | output->cl_data, \
297 | buf_dest, \
298 | 0, NULL, NULL);
299 | }
300 |
301 | return output;
302 | }
303 |
304 | cnn_frame* frame_convert_to_gpu_half(cnn_frame *frame) {
305 | if(frame->useGPU && frame->useHalf)
306 | return frame;
307 |
308 | cnn_frame *output = frame_init_gpu_half(frame->w, frame->h, frame->c);
309 | OpenCLObjects *openCLObjects = getOpenClObject();
310 | int err = CL_SUCCESS;
311 |
312 | cl_mem cl_data = NULL;
313 |
314 | if(!frame->useGPU) {
315 | //cpu-mode
316 | cl_data = clCreateBuffer(
317 | openCLObjects->context,
318 | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
319 | frame->w * frame->h * frame->c * sizeof(float), //size in bytes
320 | frame->data,//buffer of data
321 | &err);
322 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
323 | } else {
324 | //gpu-float-mode
325 | cl_data = frame->cl_data;
326 | }
327 |
328 | cl_kernel kernel = openCLObjects->convert_float_to_half_kernel.kernel;
329 | err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_data);
330 | err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output->cl_data);
331 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
332 |
333 | size_t convertSize[1] = {(size_t) output->w * output->h * output->c};
334 | err = clEnqueueNDRangeKernel(
335 | openCLObjects->queue,
336 | kernel,
337 | 1,
338 | 0,
339 | convertSize,
340 | 0,
341 | 0, 0, 0
342 | );
343 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
344 |
345 | err = clFinish(openCLObjects->queue);
346 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
347 |
348 | if(!frame->useGPU)
349 | clReleaseMemObject(cl_data);
350 |
351 | frame_free(frame);
352 |
353 | //test
354 | {
355 | float *buf_dest = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
356 | output->cl_data, \
357 | CL_TRUE, CL_MAP_READ, \
358 | 0, \
359 | output->w * output->h * output->c * sizeof(cl_half), \
360 | 0, NULL, NULL, &err);
361 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
362 |
363 | clEnqueueUnmapMemObject(openCLObjects->queue, \
364 | output->cl_data, \
365 | buf_dest, \
366 | 0, NULL, NULL);
367 | }
368 |
369 | return output;
370 | }
371 |
372 | cnn_frame * frame_convert_to_cpu(cnn_frame *frame) {
373 | if(!frame->useGPU)
374 | return frame;
375 |
376 | cnn_frame *output = frame_init(frame->w, frame->h, frame->c);
377 | OpenCLObjects *openCLObjects = getOpenClObject();
378 | int err = CL_SUCCESS;
379 |
380 | //convert half to float first
381 | if(frame->useHalf) {
382 |
383 | cnn_frame *tmp = frame_init_gpu(frame->w, frame->h, frame->c);
384 |
385 | cl_kernel kernel = openCLObjects->convert_half_to_float_kernel.kernel;
386 | err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &frame->cl_data);
387 | err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &tmp->cl_data);
388 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
389 |
390 | size_t convertSize[1] = {(size_t) tmp->w * tmp->h * tmp->c};
391 | err = clEnqueueNDRangeKernel(
392 | openCLObjects->queue,
393 | kernel,
394 | 1,
395 | 0,
396 | convertSize,
397 | 0,
398 | 0, 0, 0
399 | );
400 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
401 |
402 | err = clFinish(openCLObjects->queue);
403 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
404 |
405 | frame_free(frame);
406 | frame = tmp;
407 | }
408 |
409 | //map gpu-mem to cpu-mem and copy data
410 | float *buf_src = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
411 | frame->cl_data, \
412 | CL_TRUE, CL_MAP_READ, \
413 | 0, \
414 | frame->w * frame->h * frame->c * sizeof(cl_float), \
415 | 0, NULL, NULL, &err);
416 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
417 |
418 | memcpy((void*)output->data, (void*)buf_src, frame->w * frame->h * frame->c * sizeof(cl_float));
419 |
420 | clEnqueueUnmapMemObject(openCLObjects->queue, \
421 | frame->cl_data, \
422 | buf_src, \
423 | 0, NULL, NULL);
424 |
425 | frame_free(frame);
426 |
427 | return output;
428 | }
429 |
430 | void frame_free(cnn_frame *frame) {
431 | if(frame->useGPU == 0)
432 | free(frame->data);
433 | else {
434 | clReleaseMemObject(frame->cl_data);
435 | }
436 | free(frame);
437 | }
--------------------------------------------------------------------------------
/app/src/main/cpp/classifier.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | float * cnn_doClassification(cnn_frame *frame, cnn *model) {
10 | cnn_frame *result = frame;
11 |
12 | double totalTime = 0;
13 | double t0,t1;
14 | double global_t0 = get_timestamp();
15 |
16 | OpenCLObjects *openCLObjects = getOpenClObject();
17 | cl_int err;
18 | cl_event event;
19 |
20 | for(int i = 0 ; i < model->nLayers ; i++) {
21 | cnn_layer *layer = &model->layers[i];
22 |
23 | t0 = get_timestamp();
24 |
25 | result = layer->doFeedForward(result, layer);
26 |
27 | if(result->useGPU) {
28 | int size = result->w * result->h * result->c * (layer->useHalf ? sizeof(cl_half) : sizeof(cl_float));
29 | float *buf_dest = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
30 | result->cl_data, \
31 | CL_TRUE, CL_MAP_READ, \
32 | 0, \
33 | size, \
34 | 0, NULL, NULL, &err);
35 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
36 |
37 | if(!result->useHalf)
38 | LOGD("1st data: %f", buf_dest[0]);
39 |
40 | clEnqueueUnmapMemObject(openCLObjects->queue, \
41 | result->cl_data, \
42 | buf_dest, \
43 | 0, NULL, NULL);
44 | }
45 |
46 | t1 = get_timestamp();
47 | double milsecs = (t1 - t0) / 1000.0L;
48 |
49 | LOGD("Processed layer %d in %f ms\n", (i + 1), milsecs);
50 | }
51 |
52 | if(result != NULL && result->useGPU) {
53 |
54 | result = frame_convert_to_gpu_float(result);
55 |
56 | result->data = (float *)malloc(result->w * result->h * result->c * sizeof(float));
57 |
58 | err = clEnqueueReadBuffer (openCLObjects->queue,
59 | result->cl_data,
60 | true,
61 | 0,
62 | result->w * result->h * result->c * sizeof(float),
63 | result->data,
64 | 0,
65 | 0,
66 | 0);
67 |
68 | err |= clFinish(openCLObjects->queue);
69 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
70 |
71 | err = clReleaseMemObject(result->cl_data);
72 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
73 |
74 | result->useGPU = 0;
75 | }
76 |
77 | timestamp_t global_t1 = get_timestamp();
78 |
79 | totalTime = (global_t1 - global_t0) / 1000.0L;
80 |
81 | LOGD("CNN finished in %f ms\n", totalTime);
82 |
83 | float *output = (result == NULL) ? NULL : result->data;
84 |
85 | if(result != NULL)
86 | frame_free(result);
87 |
88 | return output;
89 | }
90 |
--------------------------------------------------------------------------------
/app/src/main/cpp/clio.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | const char* opencl_error_to_str (cl_int error) {
4 | #define CASE_CL_CONSTANT(NAME) case NAME: return #NAME;
5 |
6 | // Suppose that no combinations are possible.
7 | switch(error) {
8 | CASE_CL_CONSTANT(CL_SUCCESS)
9 | CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
10 | CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
11 | CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
12 | CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
13 | CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
14 | CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
15 | CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
16 | CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
17 | CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
18 | CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
19 | CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
20 | CASE_CL_CONSTANT(CL_MAP_FAILURE)
21 | CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
22 | CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
23 | CASE_CL_CONSTANT(CL_INVALID_VALUE)
24 | CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
25 | CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
26 | CASE_CL_CONSTANT(CL_INVALID_DEVICE)
27 | CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
28 | CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
29 | CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
30 | CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
31 | CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
32 | CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
33 | CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
34 | CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
35 | CASE_CL_CONSTANT(CL_INVALID_BINARY)
36 | CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
37 | CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
38 | CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
39 | CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
40 | CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
41 | CASE_CL_CONSTANT(CL_INVALID_KERNEL)
42 | CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
43 | CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
44 | CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
45 | CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
46 | CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
47 | CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
48 | CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
49 | CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
50 | CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
51 | CASE_CL_CONSTANT(CL_INVALID_EVENT)
52 | CASE_CL_CONSTANT(CL_INVALID_OPERATION)
53 | CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
54 | CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
55 | CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
56 | CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
57 | CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
58 |
59 | default:
60 | return "UNKNOWN ERROR CODE";
61 | }
62 |
63 | #undef CASE_CL_CONSTANT
64 | }
--------------------------------------------------------------------------------
/app/src/main/cpp/deepsense.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | cnn *model = NULL;
11 | OpenCLObjects openCLObjects;
12 |
13 | OpenCLObjects *getOpenClObject() {
14 | return &openCLObjects;
15 | }
16 |
17 | cnn *getModel() {
18 | return model;
19 | }
20 |
21 | extern "C" void Java_com_lanytek_deepsensev3_MainActivity_InitGPU(
22 | JNIEnv* env,
23 | jobject thiz,
24 | jstring model_dir_path,
25 | jstring packageName
26 | ) {
27 | //init GPU first
28 | const char *packageNameStr = env->GetStringUTFChars(packageName, 0);
29 | init_OpenCL(CL_DEVICE_TYPE_GPU, openCLObjects, packageNameStr);
30 | env->ReleaseStringUTFChars(packageName, packageNameStr);
31 |
32 | //init model
33 | const char *modelPath = env->GetStringUTFChars(model_dir_path, 0);
34 | if(model != NULL) {
35 | cnn_free(model);
36 | }
37 |
38 | model = cnn_loadModel(modelPath, 1);
39 |
40 | env->ReleaseStringUTFChars(model_dir_path, modelPath);
41 | }
42 |
43 | extern "C" jfloatArray Java_com_lanytek_deepsensev3_MainActivity_GetInferrence(
44 | JNIEnv* env,
45 | jobject thisObject,
46 | jfloatArray input
47 | ) {
48 | if(model == NULL)
49 | return NULL;
50 |
51 | cnn_frame *frame = frame_init(model->input_w, model->input_h, model->input_c);
52 | jfloat* data = env->GetFloatArrayElements(input, 0);
53 | memcpy(frame->data, data, model->input_w * model->input_h * model->input_c * sizeof(float));
54 | env->ReleaseFloatArrayElements(input, data, 0);
55 |
56 | float *result = cnn_doClassification(frame, model);
57 |
58 | if(result != NULL) {
59 | int outputSize = model->layers[model->nLayers - 1].output_w * model->layers[model->nLayers - 1].output_h * model->layers[model->nLayers - 1].output_c;
60 | jfloatArray resultArr = env->NewFloatArray(outputSize);
61 | env->SetFloatArrayRegion(resultArr, 0, outputSize, result);
62 | //may lead to memory leak
63 | return resultArr;
64 | } else
65 | return NULL;
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/app/src/main/cpp/deepsense_lib.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by JC1DA on 6/3/16.
3 | //
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 | static inline int CMP_OPTION(char *str, const char *option) {
20 | int ret = strncmp(str, option, strlen(option)) == 0 ? 1 : 0;
21 | return ret;
22 | }
23 |
24 | static inline int PARSE_ACTIVATION(char *line) {
25 | char buf[32];
26 | sscanf(line,"ACTIVATION: %s\n",buf);
27 | if(CMP_OPTION(buf, "RAMP"))
28 | return RAMP;
29 | else if(CMP_OPTION(buf, "LOGISTIC"))
30 | return LOGISTIC;
31 | else if(CMP_OPTION(buf, "LEAKY"))
32 | return LEAKY;
33 | else if(CMP_OPTION(buf, "RELU"))
34 | return RELU;
35 | return NO_ACTIVATION;
36 | }
37 |
38 | cnn *cnn_loadModel(const char *modelDirPath, int useGPU) {
39 | cnn *model = (cnn *)calloc(1, sizeof(cnn));
40 | model->useGPU = useGPU;
41 |
42 | {
43 | /* read number of layers */
44 | char fileNameBuf[256];
45 | char line[256];
46 | sprintf(fileNameBuf,"%s/model",modelDirPath);
47 |
48 | FILE *fp = fopen(fileNameBuf,"r");
49 | //PLEASE FILL IN NEW FORMAT
50 | while(fgets(line, sizeof(line), fp)) {
51 | if(CMP_OPTION(line, "NUMLAYERS"))
52 | sscanf(line, "NUMLAYERS: %d\n", &model->nLayers);
53 | else if(CMP_OPTION(line, "W"))
54 | sscanf(line, "W: %d\n", &model->input_w);
55 | else if(CMP_OPTION(line, "H"))
56 | sscanf(line, "H: %d\n", &model->input_h);
57 | else if(CMP_OPTION(line, "C"))
58 | sscanf(line, "C: %d\n", &model->input_c);
59 | }
60 | fclose(fp);
61 | }
62 |
63 | model->layers = (cnn_layer *)calloc(model->nLayers, sizeof(cnn_layer));
64 | cnn_layer *layers = model->layers;
65 |
66 | for(int i = 1 ; i <= model->nLayers ; i++) {
67 | char fileNameBuf[256];
68 | char line[256];
69 |
70 | sprintf(fileNameBuf,"%s/l_%d",modelDirPath,i);
71 |
72 | cnn_layer *layer = &layers[i - 1];
73 | layer->index = i - 1;
74 | layer->useGPU = model->useGPU;
75 | layer->type = LAYER_TYPE_UNKNOWN;
76 | layer->activation = NO_ACTIVATION;
77 |
78 | LOGD("Loading layer %d\n", i);
79 |
80 | FILE *layerfp = fopen(fileNameBuf,"r");
81 | while (fgets(line, sizeof(line), layerfp)) {
82 | if(layer->type == LAYER_TYPE_UNKNOWN) {
83 | if(CMP_OPTION(line, "CONV")) {
84 | layer->type = LAYER_TYPE_CONV;
85 | layer->conv_layer = (cnn_layer_conv *) calloc(1, sizeof(cnn_layer_conv));
86 | if (!layer->useGPU)
87 | layer->doFeedForward = doFeedForward_CONV;
88 | else {
89 | layer->doFeedForward = doFeedForward_CONV_GPU;
90 | }
91 | layer->conv_layer->group = 1;
92 | } else if(CMP_OPTION(line, "FULLY_CONNECTED")) {
93 | layer->type = LAYER_TYPE_FULLY_CONNECTED;
94 | layer->connected_layer = (cnn_layer_fully_connected *) calloc(1,
95 | sizeof(cnn_layer_fully_connected));
96 | layer->connected_layer->need_reshape = 0;
97 | if (!layer->useGPU)
98 | layer->doFeedForward = doFeedForward_FULLY_CONNECTED;
99 | else
100 | layer->doFeedForward = doFeedForward_FULLY_CONNECTED_GPU;
101 | } else if(CMP_OPTION(line, "MAXPOOL")) {
102 | layer->type = LAYER_TYPE_MAXPOOL;
103 | layer->maxpool_layer = (cnn_layer_maxpool *) calloc(1,
104 | sizeof(cnn_layer_maxpool));
105 | if (!layer->useGPU)
106 | layer->doFeedForward = doFeedForward_MAXPOOL;
107 | else {
108 | layer->doFeedForward = doFeedForward_MAXPOOL_GPU;
109 | }
110 | } else if(CMP_OPTION(line, "SOFTMAX")) {
111 | layer->type = LAYER_TYPE_SOFTMAX;
112 | layer->doFeedForward = doFeedForward_SOFTMAX;
113 | } else if(CMP_OPTION(line, "LRN_NORM")) {
114 | layer->type = LAYER_TYPE_LRN_NORMALIZE;
115 | layer->lrn_layer = (cnn_layer_lrn *) malloc(sizeof(cnn_layer_lrn));
116 | layer->lrn_layer->k = 1;
117 | if (!layer->useGPU)
118 | layer->doFeedForward = doFeedForward_LRN;
119 | else
120 | layer->doFeedForward = doFeedForward_LRN_GPU;
121 | }
122 | } else {
123 |
124 | if(CMP_OPTION(line, "USE_HALF")) {
125 | sscanf(line,"USE_HALF: %d", &layer->useHalf);
126 | if(layer->useHalf != 0)
127 | layer->useHalf = 1;
128 | }
129 |
130 |
131 | switch(layer->type) {
132 | case LAYER_TYPE_CONV:
133 | if(CMP_OPTION(line, "STRIDE")) {
134 | sscanf(line, "STRIDE: %d %d\n", \
135 | &layer->conv_layer->stride[0], \
136 | &layer->conv_layer->stride[1]);
137 | } else if(CMP_OPTION(line, "PAD")) {
138 | sscanf(line,"PAD: %d %d %d %d\n", \
139 | &layer->conv_layer->pad[0], \
140 | &layer->conv_layer->pad[1], \
141 | &layer->conv_layer->pad[2], \
142 | &layer->conv_layer->pad[3]);
143 | } else if(CMP_OPTION(line, "WIDTH")) {
144 | sscanf(line,"WIDTH: %d\n",&layer->conv_layer->w);
145 | } else if(CMP_OPTION(line, "HEIGHT")) {
146 | sscanf(line,"HEIGHT: %d\n",&layer->conv_layer->h);
147 | } else if(CMP_OPTION(line, "IN_CHANNELS")) {
148 | sscanf(line,"IN_CHANNELS: %d\n",&layer->conv_layer->c);
149 | } else if(CMP_OPTION(line, "OUT_CHANNELS")) {
150 | sscanf(line,"OUT_CHANNELS: %d\n",&layer->conv_layer->n);
151 | } else if(CMP_OPTION(line, "ACTIVATION")) {
152 | layer->activation = PARSE_ACTIVATION(line);
153 | } else if(CMP_OPTION(line, "GROUP")) {
154 | sscanf(line,"GROUP: %d\n",&layer->conv_layer->group);
155 | }
156 | break;
157 | case LAYER_TYPE_FULLY_CONNECTED:
158 | if(CMP_OPTION(line, "INPUTSIZE")) {
159 | sscanf(line, "INPUTSIZE: %d\n", &layer->connected_layer->inputSize);
160 | } else if(CMP_OPTION(line, "OUTPUTSIZE")) {
161 | sscanf(line,"OUTPUTSIZE: %d\n", &layer->connected_layer->outputSize);
162 | } else if(CMP_OPTION(line, "ACTIVATION")) {
163 | layer->activation = PARSE_ACTIVATION(line);
164 | } else if(CMP_OPTION(line, "RESHAPE")) {
165 | sscanf(line,"RESHAPE: %d\n",&layer->connected_layer->need_reshape);
166 | }
167 | break;
168 | case LAYER_TYPE_MAXPOOL:
169 | if(CMP_OPTION(line, "SIZE")) {
170 | sscanf(line,"SIZE: %d\n", &layer->maxpool_layer->size);
171 | } else if(CMP_OPTION(line, "STRIDE")) {
172 | sscanf(line,"STRIDE: %d %d\n", &layer->maxpool_layer->stride[0], &layer->maxpool_layer->stride[1]);
173 | } else if(CMP_OPTION(line, "PAD")) {
174 | sscanf(line,"PAD: %d %d %d %d\n", &layer->maxpool_layer->pad[0], &layer->maxpool_layer->pad[1], \
175 | &layer->maxpool_layer->pad[2], &layer->maxpool_layer->pad[3]);
176 | }
177 | break;
178 | case LAYER_TYPE_LRN_NORMALIZE:
179 | if(CMP_OPTION(line, "SIZE")) {
180 | sscanf(line,"SIZE: %d\n", &layer->lrn_layer->size);
181 | } else if(CMP_OPTION(line, "ALPHA")) {
182 | sscanf(line,"ALPHA: %f\n", &layer->lrn_layer->alpha);
183 | } else if(CMP_OPTION(line, "BETA")) {
184 | sscanf(line,"BETA: %f\n", &layer->lrn_layer->beta);
185 | }
186 | break;
187 | case LAYER_TYPE_SOFTMAX:
188 | break;
189 | case LAYER_TYPE_UNKNOWN:
190 | break;
191 | }
192 | }
193 | }
194 | fclose(layerfp);
195 |
196 | if(layer->type == LAYER_TYPE_CONV) {
197 | //determine output size
198 | if(layer->index == 0) {
199 | layer->output_w = (model->input_w + \
200 | layer->conv_layer->pad[0] + \
201 | layer->conv_layer->pad[1] - \
202 | layer->conv_layer->w) / \
203 | layer->conv_layer->stride[0] + 1;
204 | layer->output_h = (model->input_h + \
205 | layer->conv_layer->pad[2] + \
206 | layer->conv_layer->pad[3] - \
207 | layer->conv_layer->h) / \
208 | layer->conv_layer->stride[1] + 1;
209 | layer->output_c = layer->conv_layer->n;
210 | } else {
211 | layer->output_w = (layers[layer->index - 1].output_w + \
212 | layer->conv_layer->pad[0] + \
213 | layer->conv_layer->pad[1] - \
214 | layer->conv_layer->w) / \
215 | layer->conv_layer->stride[0] + 1;
216 | layer->output_h = (layers[layer->index - 1].output_h + \
217 | layer->conv_layer->pad[2] + \
218 | layer->conv_layer->pad[3] - \
219 | layer->conv_layer->h) / \
220 | layer->conv_layer->stride[1] + 1;
221 | layer->output_c = layer->conv_layer->n;
222 | }
223 |
224 | //switch to another kernel if this conv layer is equivalent to fully connected layer
225 | if(layer->output_h == 1 && layer->output_w == 1 && model->useGPU) {
226 | layer->doFeedForward = doFeedForward_CONV_FC_GPU;
227 | }
228 |
229 | //LOAD BIAS & WEIGHTS DATA
230 | char biasFilePath[256];
231 | strcpy(biasFilePath, fileNameBuf);
232 | strcat(biasFilePath, "_bias");
233 | FILE *biasfp = fopen(biasFilePath, "r");
234 | if(!layer->useGPU) {
235 | layer->conv_layer->bias = (float *)calloc(layer->conv_layer->n, sizeof(float));
236 | fread(layer->conv_layer->bias, sizeof(float), layer->conv_layer->n, biasfp);
237 | } else {
238 | cl_int err;
239 | OpenCLObjects *openCLObjects = getOpenClObject();
240 |
241 | layer->conv_layer->cl_bias = clCreateBuffer(
242 | openCLObjects->context,
243 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
244 | layer->conv_layer->n * sizeof(float), //size in bytes
245 | NULL,//buffer of data
246 | &err);
247 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
248 |
249 | float *mappedBuffer = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
250 | layer->conv_layer->cl_bias, \
251 | CL_TRUE, CL_MAP_WRITE, \
252 | 0, \
253 | layer->conv_layer->n * sizeof(float), \
254 | 0, NULL, NULL, NULL);
255 |
256 | fread(mappedBuffer, sizeof(float), layer->conv_layer->n, biasfp);
257 |
258 | clEnqueueUnmapMemObject(openCLObjects->queue, \
259 | layer->conv_layer->cl_bias, \
260 | mappedBuffer, \
261 | 0, NULL, NULL);
262 |
263 | if(layer->useHalf) {
264 | cl_mem cl_bias_half = clCreateBuffer(
265 | openCLObjects->context,
266 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
267 | layer->conv_layer->n * sizeof(cl_half), //size in bytes
268 | NULL,//buffer of data
269 | &err);
270 |
271 | err = clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 0, sizeof(cl_mem), &layer->conv_layer->cl_bias);
272 | err |= clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 1, sizeof(cl_mem), &cl_bias_half);
273 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
274 |
275 | size_t convertSize[1] = {(size_t) layer->conv_layer->n};
276 | err = clEnqueueNDRangeKernel(
277 | openCLObjects->queue,
278 | openCLObjects->convert_float_to_half_kernel.kernel,
279 | 1,
280 | 0,
281 | convertSize,
282 | 0,
283 | 0, 0, 0
284 | );
285 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
286 | err = clFinish(openCLObjects->queue);
287 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
288 |
289 | clReleaseMemObject(layer->conv_layer->cl_bias);
290 |
291 | layer->conv_layer->cl_bias = cl_bias_half;
292 | }
293 | }
294 | fclose(biasfp);
295 |
296 | char wFilePath[256];
297 | strcpy(wFilePath, fileNameBuf);
298 | strcat(wFilePath, "_weight");
299 | FILE *wfp = fopen(wFilePath, "r");
300 | if(!layer->useGPU) {
301 | layer->conv_layer->W = (float *) calloc(\
302 | layer->conv_layer->w * \
303 | layer->conv_layer->h * \
304 | layer->conv_layer->c * \
305 | layer->conv_layer->n / layer->conv_layer->group, sizeof(float));
306 |
307 | /*
308 | * Our old model format is [n x c x h x w]
309 | * We change memory layout from [n x c x h x w] into [n x h x w x c]
310 | */
311 |
312 | float *buffer = (float *)malloc(layer->conv_layer->h * layer->conv_layer->w * sizeof(float));
313 | for(int k = 0 ; k < layer->conv_layer->n / layer->conv_layer->group ; k++) {
314 | for(int c = 0 ; c < layer->conv_layer->c ; c++) {
315 | fread(buffer, sizeof(float), layer->conv_layer->h * layer->conv_layer->w, wfp);
316 | for(int h = 0 ; h < layer->conv_layer->h ; h++) {
317 | for(int w = 0 ; w < layer->conv_layer->w ; w++) {
318 | int buf_idx = h * layer->conv_layer->w + w;
319 | int new_idx = getIndexFrom4D(
320 | layer->conv_layer->n,
321 | layer->conv_layer->h,
322 | layer->conv_layer->w,
323 | layer->conv_layer->c,
324 | k, h, w, c
325 | );
326 | layer->conv_layer->W[new_idx] = buffer[buf_idx];
327 | }
328 | }
329 | }
330 | }
331 | free(buffer);
332 | } else {
333 | cl_int err;
334 | OpenCLObjects *openCLObjects = getOpenClObject();
335 |
336 | layer->conv_layer->cl_W = clCreateBuffer(
337 | openCLObjects->context,
338 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
339 | layer->conv_layer->w * layer->conv_layer->h * layer->conv_layer->c * layer->conv_layer->n / layer->conv_layer->group * sizeof(float), //size in bytes
340 | NULL,//buffer of data
341 | &err);
342 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
343 |
344 | float *mappedBuffer = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
345 | layer->conv_layer->cl_W, \
346 | CL_TRUE, CL_MAP_WRITE, \
347 | 0, \
348 | layer->conv_layer->w * layer->conv_layer->h * layer->conv_layer->c * layer->conv_layer->n / layer->conv_layer->group * sizeof(float), \
349 | 0, NULL, NULL, NULL);
350 |
351 | float *buffer = (float *)malloc(layer->conv_layer->h * layer->conv_layer->w * sizeof(float));
352 | for(int k = 0 ; k < layer->conv_layer->n / layer->conv_layer->group ; k++) {
353 | for(int c = 0 ; c < layer->conv_layer->c ; c++) {
354 | fread(buffer, sizeof(float), layer->conv_layer->h * layer->conv_layer->w, wfp);
355 | for(int h = 0 ; h < layer->conv_layer->h ; h++) {
356 | for(int w = 0 ; w < layer->conv_layer->w ; w++) {
357 | int buf_idx = h * layer->conv_layer->w + w;
358 | int new_idx = getIndexFrom4D(
359 | layer->conv_layer->n,
360 | layer->conv_layer->h,
361 | layer->conv_layer->w,
362 | layer->conv_layer->c,
363 | k, h, w, c
364 | );
365 | mappedBuffer[new_idx] = buffer[buf_idx];
366 | }
367 | }
368 | }
369 | }
370 |
371 | free(buffer);
372 |
373 | clEnqueueUnmapMemObject(openCLObjects->queue, \
374 | layer->conv_layer->cl_W, \
375 | mappedBuffer, \
376 | 0, NULL, NULL);
377 |
378 | if(layer->useHalf == 1) {
379 | cl_mem cl_W_half = clCreateBuffer(
380 | openCLObjects->context,
381 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
382 | layer->conv_layer->w * layer->conv_layer->h * layer->conv_layer->c * layer->conv_layer->n / layer->conv_layer->group * sizeof(cl_half), //size in bytes
383 | NULL,//buffer of data
384 | &err);
385 |
386 | err = clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 0, sizeof(cl_mem), &layer->conv_layer->cl_W);
387 | err |= clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 1, sizeof(cl_mem), &cl_W_half);
388 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
389 |
390 | size_t convertSize[1] = {(size_t)layer->conv_layer->w * layer->conv_layer->h * layer->conv_layer->c * layer->conv_layer->n / layer->conv_layer->group};
391 | err = clEnqueueNDRangeKernel(
392 | openCLObjects->queue,
393 | openCLObjects->convert_float_to_half_kernel.kernel,
394 | 1,
395 | 0,
396 | convertSize,
397 | 0,
398 | 0, 0, 0
399 | );
400 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
401 |
402 | err = clFinish(openCLObjects->queue);
403 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
404 |
405 | clReleaseMemObject(layer->conv_layer->cl_W);
406 |
407 | layer->conv_layer->cl_W = cl_W_half;
408 | }
409 | }
410 | fclose(wfp);
411 | }
412 |
413 | if(layer->type == LAYER_TYPE_FULLY_CONNECTED) {
414 | layer->output_w = 1;
415 | layer->output_h = 1;
416 | layer->output_c = layer->connected_layer->outputSize;
417 |
418 | layer->connected_layer->weightSize = layer->connected_layer->inputSize * layer->connected_layer->outputSize;
419 |
420 | //LOAD BIAS AND WEIGHTS DATA
421 | char biasFilePath[256];
422 | strcpy(biasFilePath, fileNameBuf);
423 | strcat(biasFilePath, "_bias");
424 | FILE *biasfp = fopen(biasFilePath, "r");
425 | if(!layer->useGPU) {
426 | layer->connected_layer->bias = (float *)calloc(layer->connected_layer->outputSize, sizeof(float));
427 | fread(layer->connected_layer->bias, sizeof(float), layer->connected_layer->outputSize, biasfp);
428 | } else {
429 | cl_int err;
430 | OpenCLObjects *openCLObjects = getOpenClObject();
431 |
432 | layer->connected_layer->cl_bias = clCreateBuffer(
433 | openCLObjects->context,
434 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
435 | layer->connected_layer->outputSize * sizeof(float), //size in bytes
436 | NULL,//buffer of data
437 | &err);
438 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
439 |
440 | float *mappedBuffer = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
441 | layer->connected_layer->cl_bias, \
442 | CL_TRUE, CL_MAP_WRITE, \
443 | 0, \
444 | layer->connected_layer->outputSize * sizeof(float), \
445 | 0, NULL, NULL, NULL);
446 |
447 | fread(mappedBuffer, sizeof(float), layer->connected_layer->outputSize, biasfp);
448 |
449 | clEnqueueUnmapMemObject(openCLObjects->queue, \
450 | layer->connected_layer->cl_bias, \
451 | mappedBuffer, \
452 | 0, NULL, NULL);
453 |
454 | if(layer->useHalf) {
455 | cl_mem cl_bias_half = clCreateBuffer(
456 | openCLObjects->context,
457 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
458 | layer->connected_layer->outputSize * sizeof(cl_half), //size in bytes
459 | NULL,//buffer of data
460 | &err);
461 |
462 | err = clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 0, sizeof(cl_mem), &layer->connected_layer->cl_bias);
463 | err |= clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 1, sizeof(cl_mem), &cl_bias_half);
464 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
465 |
466 | size_t convertSize[1] = {(size_t) layer->connected_layer->outputSize};
467 | err = clEnqueueNDRangeKernel(
468 | openCLObjects->queue,
469 | openCLObjects->convert_float_to_half_kernel.kernel,
470 | 1,
471 | 0,
472 | convertSize,
473 | 0,
474 | 0, 0, 0
475 | );
476 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
477 |
478 | err = clFinish(openCLObjects->queue);
479 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
480 |
481 | clReleaseMemObject(layer->connected_layer->cl_bias);
482 |
483 | layer->connected_layer->cl_bias = cl_bias_half;
484 | }
485 | }
486 | fclose(biasfp);
487 |
488 | char wFilePath[256];
489 | strcpy(wFilePath, fileNameBuf);
490 | strcat(wFilePath, "_weight");
491 | FILE *wfp = fopen(wFilePath, "r");
492 | if(!layer->useGPU) {
493 | layer->connected_layer->W = (float *) calloc(\
494 | layer->connected_layer->weightSize, sizeof(float));
495 | fread(layer->connected_layer->W, sizeof(float),
496 | layer->connected_layer->weightSize,
497 | wfp);
498 | } else {
499 | cl_int err;
500 | OpenCLObjects *openCLObjects = getOpenClObject();
501 |
502 | layer->connected_layer->cl_W = clCreateBuffer(
503 | openCLObjects->context,
504 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
505 | layer->connected_layer->weightSize * sizeof(float), //size in bytes
506 | NULL,//buffer of data
507 | &err);
508 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
509 |
510 | float *mappedBuffer = (float *)clEnqueueMapBuffer(openCLObjects->queue, \
511 | layer->connected_layer->cl_W, \
512 | CL_TRUE, CL_MAP_WRITE, \
513 | 0, \
514 | layer->connected_layer->weightSize * sizeof(float), \
515 | 0, NULL, NULL, NULL);
516 |
517 | if(!layer->connected_layer->need_reshape) {
518 | //file is formatted [(c x h x w) x outputsize]
519 | //this is for LRCN
520 | float * buffer = (float *)malloc(layer->connected_layer->outputSize * sizeof(float));
521 | int input_h = (layer->index == 0) ? model->input_h : layers[layer->index - 1].output_h;
522 | int input_w = (layer->index == 0) ? model->input_w : layers[layer->index - 1].output_w;
523 | int input_c = (layer->index == 0) ? model->input_c : layers[layer->index - 1].output_c;
524 |
525 | for(int c = 0 ; c < input_c ; c++) {
526 | for(int h = 0 ; h < input_h ; h++) {
527 | for(int w = 0 ; w < input_w ; w++) {
528 | fread(buffer, sizeof(float), layer->connected_layer->outputSize, wfp);
529 | for(int n = 0 ; n < layer->connected_layer->outputSize ; n++) {
530 | int idx = getIndexFrom4D(layer->connected_layer->outputSize, input_h, input_w, input_c, n, h, w, c);
531 | mappedBuffer[idx] = buffer[n];
532 | }
533 | }
534 | }
535 | }
536 | free(buffer);
537 | } else {
538 | //file is formatted [outputsize x (c x h x w)]
539 | int input_h = (layer->index == 0) ? model->input_h : layers[layer->index - 1].output_h;
540 | int input_w = (layer->index == 0) ? model->input_w : layers[layer->index - 1].output_w;
541 | int input_c = (layer->index == 0) ? model->input_c : layers[layer->index - 1].output_c;
542 |
543 | int size = input_h * input_w * input_c;
544 | float *buffer = (float *)malloc(size * sizeof(float));
545 | for(int n = 0 ; n < layer->connected_layer->outputSize ; n++) {
546 | fread(buffer, sizeof(float), size, wfp); //[c x h x w]
547 | //need to convert to h x w x c
548 | int f_idx = 0;
549 | for(int c = 0 ; c < input_c ; c++) {
550 | for(int h = 0 ; h < input_h ; h++) {
551 | for(int w = 0 ; w < input_w ; w++) {
552 | int idx = getIndexFrom4D(layer->connected_layer->outputSize, input_h, input_w, input_c, n, h, w, c);
553 | mappedBuffer[idx] = buffer[f_idx];
554 | f_idx++;
555 | }
556 | }
557 | }
558 | }
559 | free(buffer);
560 | }
561 |
562 | clEnqueueUnmapMemObject(openCLObjects->queue, \
563 | layer->connected_layer->cl_W, \
564 | mappedBuffer, \
565 | 0, NULL, NULL);
566 |
567 | if(layer->useHalf) {
568 | cl_mem cl_W_half = clCreateBuffer(
569 | openCLObjects->context,
570 | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
571 | layer->connected_layer->weightSize * sizeof(cl_half), //size in bytes
572 | NULL,//buffer of data
573 | &err);
574 |
575 | err = clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 0, sizeof(cl_mem), &layer->connected_layer->cl_W);
576 | err |= clSetKernelArg(openCLObjects->convert_float_to_half_kernel.kernel, 1, sizeof(cl_mem), &cl_W_half);
577 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
578 |
579 | size_t convertSize[1] = {(size_t) layer->connected_layer->weightSize};
580 | err = clEnqueueNDRangeKernel(
581 | openCLObjects->queue,
582 | openCLObjects->convert_float_to_half_kernel.kernel,
583 | 1,
584 | 0,
585 | convertSize,
586 | 0,
587 | 0, 0, 0
588 | );
589 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
590 |
591 | err = clFinish(openCLObjects->queue);
592 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
593 |
594 | clReleaseMemObject(layer->connected_layer->cl_W);
595 |
596 | layer->connected_layer->cl_W = cl_W_half;
597 | }
598 | }
599 | fclose(wfp);
600 | }
601 |
602 | if(layer->type == LAYER_TYPE_MAXPOOL) {
603 | layer->output_w = 1 + (layers[layer->index - 1].output_w + layer->maxpool_layer->pad[0] + layer->maxpool_layer->pad[1] - layer->maxpool_layer->size) / layer->maxpool_layer->stride[0];
604 | layer->output_h = 1 + (layers[layer->index - 1].output_h + layer->maxpool_layer->pad[2] + layer->maxpool_layer->pad[3] - layer->maxpool_layer->size) / layer->maxpool_layer->stride[1];
605 | layer->output_c = layers[layer->index - 1].output_c;
606 | }
607 |
608 | if(layer->type == LAYER_TYPE_SOFTMAX) {
609 | layer->output_w = 1;
610 | layer->output_h = 1;
611 | layer->output_c = layers[layer->index - 1].output_c;
612 | }
613 |
614 | if(layer->type == LAYER_TYPE_LRN_NORMALIZE) {
615 | layer->output_w = layers[layer->index - 1].output_w;
616 | layer->output_h = layers[layer->index - 1].output_h;
617 | layer->output_c = layers[layer->index - 1].output_c;
618 | }
619 |
620 | int input_w = (i == 1) ? model->input_w : layers[i - 2].output_w;
621 | int input_h = (i == 1) ? model->input_h : layers[i - 2].output_h;
622 | int input_c = (i == 1) ? model->input_c : layers[i - 2].output_c;
623 |
624 | LOGD("Layer %d has input[%d %d %d] and output [%d %d %d]",(i), \
625 | input_c, input_h, input_w,
626 | layer->output_c, layer->output_h, layer->output_w);
627 | }
628 |
629 | return model;
630 | }
631 |
632 | void cnn_free(cnn *model) {
633 | int i;
634 | for(i = 0 ; i < model->nLayers ; i++) {
635 | cnn_layer *layer = &model->layers[i];
636 | if(layer->type == LAYER_TYPE_CONV) {
637 | if(!model->useGPU) {
638 | free(layer->conv_layer->bias);
639 | free(layer->conv_layer->W);
640 | } else {
641 | clReleaseMemObject(layer->conv_layer->cl_W);
642 | clReleaseMemObject(layer->conv_layer->cl_bias);
643 | }
644 | free(layer->conv_layer);
645 | } else if(layer->type == LAYER_TYPE_FULLY_CONNECTED) {
646 | if(!model->useGPU) {
647 | free(layer->connected_layer->bias);
648 | free(layer->connected_layer->W);
649 | } else {
650 | clReleaseMemObject(layer->conv_layer->cl_W);
651 | clReleaseMemObject(layer->conv_layer->cl_bias);
652 | }
653 | free(layer->connected_layer);
654 | } else if(layer->type == LAYER_TYPE_MAXPOOL) {
655 | free(layer->maxpool_layer);
656 | } else if(layer->type == LAYER_TYPE_LRN_NORMALIZE) {
657 | free(layer->lrn_layer);
658 | }
659 | }
660 |
661 | if(model->averageImage != NULL)
662 | free(model->averageImage);
663 |
664 | free(model->layers);
665 | free(model);
666 | }
--------------------------------------------------------------------------------
/app/src/main/cpp/include/basic_functions.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __BASIC_FUNCTIONS_HPP__
2 | #define __BASIC_FUNCTIONS_HPP__
3 |
4 | #include "deepsense_lib.hpp"
5 |
6 | int getIndexFrom4D(int d1, int d2, int d3, int d4, int i1, int i2, int i3, int i4);
7 | float getDataFrom4D(float *data, int d1, int d2, int d3, int d4, int i1, int i2, int i3, int i4);
8 | int getIndexFrom3D(int d1, int d2, int d3, int i1, int i2, int i3);
9 | float getDataFrom3D(float *data, int d1, int d2, int d3, int i1, int i2, int i3);
10 |
11 | cnn_frame *activate_RAMP(cnn_frame *frame);
12 | cnn_frame *activate_LOGISTIC(cnn_frame *frame);
13 | cnn_frame *activate_RELU(cnn_frame *frame);
14 | cnn_frame *activate_LEAKY(cnn_frame *frame);
15 | cnn_frame *doFeedForward_Activation(cnn_frame *frame, int activation);
16 |
17 | cnn_frame * frame_init(int w, int h, int c);
18 | cnn_frame * frame_init_gpu(int w, int h, int c);
19 | cnn_frame * frame_init_gpu_half(int w, int h, int c);
20 | cnn_frame * frame_clone(cnn_frame *src);
21 | cnn_frame * frame_convert_to_gpu_float(cnn_frame *frame);
22 | cnn_frame * frame_convert_to_gpu_half(cnn_frame *frame);
23 | cnn_frame * frame_convert_to_cpu(cnn_frame *frame);
24 | void frame_free(cnn_frame *frame);
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/classifier.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __CLASSIFIER_HPP__
2 | #define __CLASSIFIER_HPP__
3 |
4 | #include "deepsense_lib.hpp"
5 |
6 | float * cnn_doClassification(cnn_frame *frame, cnn *model);
7 | #endif
8 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/clio.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __CLIO_HPP__
2 | #define __CLIO_HPP__
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | // Commonly-defined shortcuts for LogCat output from native C applications.
9 | #define LOG_TAG PROGRAM_NAME
10 | #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
11 | #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
12 |
13 | /* This function helps to create informative messages in
14 | * case when OpenCL errors occur. The function returns a string
15 | * representation for an OpenCL error code.
16 | * For example, "CL_DEVICE_NOT_FOUND" instead of "-1".
17 | */
18 | const char* opencl_error_to_str (cl_int error);
19 |
20 | #define SAMPLE_CHECK_ERRORS(ERR) \
21 | if(ERR != CL_SUCCESS) \
22 | { \
23 | LOGD \
24 | ( \
25 | "OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \
26 | opencl_error_to_str(ERR), __FILE__, __LINE__ \
27 | ); \
28 | \
29 | return; \
30 | }
31 |
32 | #define SAMPLE_CHECK_ERRORS_WITH_RETURN(ERR) \
33 | if(ERR != CL_SUCCESS) \
34 | { \
35 | LOGD \
36 | ( \
37 | "OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \
38 | opencl_error_to_str(ERR), __FILE__, __LINE__ \
39 | ); \
40 | \
41 | return ERR; \
42 | }
43 |
44 | #define SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(ERR) \
45 | if(ERR != CL_SUCCESS) \
46 | { \
47 | LOGD \
48 | ( \
49 | "OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \
50 | opencl_error_to_str(ERR), __FILE__, __LINE__ \
51 | ); \
52 | \
53 | return NULL; \
54 | }
55 |
56 | #endif
57 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/deepsense_internal_lib.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __LNN_INTERNAL_LIB_HPP__
2 | #define __LNN_INTERNAL_LIB_HPP__
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #include
9 |
10 | typedef unsigned long long timestamp_t;
11 | timestamp_t get_timestamp();
12 |
13 | OpenCLObjects *getOpenClObject();
14 | cnn *getModel();
15 |
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/deepsense_lib.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __LNN_LIB_HPP__
2 | #define __LNN_LIB_HPP__
3 |
4 | #include
5 |
6 | /*
7 | * This structure acts as both input and output to CNN Layer
8 | */
9 | typedef struct {
10 | int w;
11 | int h;
12 | int c;
13 | float *data;
14 | int useGPU;
15 | int useHalf;
16 | cl_mem cl_data;
17 | } cnn_frame;
18 |
19 | typedef enum {
20 | NO_ACTIVATION,
21 | RAMP,
22 | LOGISTIC,
23 | LEAKY,
24 | LINEAR,
25 | RELU
26 | } activation_function;
27 |
28 | typedef enum {\
29 | LAYER_TYPE_UNKNOWN = 0, \
30 | LAYER_TYPE_CONV, \
31 | LAYER_TYPE_FULLY_CONNECTED, \
32 | LAYER_TYPE_MAXPOOL, \
33 | LAYER_TYPE_SOFTMAX, \
34 | LAYER_TYPE_LRN_NORMALIZE, \
35 | LAYER_TYPE_LSTM
36 | } layer_type;
37 |
38 | typedef struct {
39 | int clip;
40 | int clip_count;
41 | int input_size;
42 | int output_size;
43 | float *W_x; //[4*output_size x input_size]
44 | float *W_h;
45 | float *bias;
46 | cl_mem cl_W_x;
47 | cl_mem cl_W_h;
48 | cl_mem cl_bias;
49 | //internal states
50 | float *prev_H;
51 | float *prev_C;
52 | cl_mem cl_prev_H;
53 | cl_mem cl_prev_C;
54 | int forward_temp_data;
55 | int need_reshape;
56 | } cnn_layer_lstm;
57 |
58 | typedef struct {
59 | int k;
60 | int size;
61 | float alpha;
62 | float beta;
63 | } cnn_layer_lrn;
64 |
65 | typedef struct {
66 | int stride[2];
67 | int pad[4];
68 | int w; //width
69 | int h; //height
70 | int c; //channel
71 | int n; //number of neurons
72 | int group;
73 | float *W;
74 | float *bias;
75 | cl_mem cl_W;
76 | cl_mem cl_bias;
77 | } cnn_layer_conv;
78 |
79 | typedef struct {
80 | int weightSize;
81 | int inputSize;
82 | int outputSize;
83 | float *W;
84 | float *bias;
85 | cl_mem cl_W;
86 | cl_mem cl_bias;
87 | int need_reshape;
88 | } cnn_layer_fully_connected;
89 |
90 | typedef struct {
91 | int size;
92 | int stride[2];
93 | int pad[4];
94 | } cnn_layer_maxpool;
95 |
96 | typedef struct {
97 | int index;
98 | int useGPU;
99 | int useHalf;
100 | int output_w;
101 | int output_h;
102 | int output_c;
103 | layer_type type;
104 | cnn_layer_conv *conv_layer;
105 | cnn_layer_fully_connected *connected_layer;
106 | cnn_layer_maxpool *maxpool_layer;
107 | cnn_layer_lrn *lrn_layer;
108 | cnn_layer_lstm *lstm_layer;
109 | cnn_frame *(*doFeedForward)(cnn_frame *frame, void *layer);
110 | int activation;
111 | } cnn_layer;
112 |
113 | typedef struct {
114 | int nLayers;
115 | int useGPU;
116 | int useHalf;
117 | int input_w;
118 | int input_h;
119 | int input_c;
120 | float *averageImage;
121 | cnn_layer *layers;
122 | } cnn;
123 |
124 | cnn * cnn_loadModel(const char *modelDirPath, int useGPU);
125 | void cnn_free(cnn *model);
126 |
127 | #endif
128 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/layers/conv_layer.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __CONV_LAYER__
2 | #define __CONV_LAYER__
3 |
4 | #include
5 |
6 | cnn_frame *doFeedForward_CONV(cnn_frame *frame, void *layer);
7 | cnn_frame *doFeedForward_CONV_GPU(cnn_frame *frame, void *layer);
8 | cnn_frame *doFeedForward_CONV_FC_GPU(cnn_frame *frame, void *layer);
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/layers/fully_connected.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __FULLY_CONNECTED_HPP__
2 | #define __FULLY_CONNECTED_HPP__
3 |
4 | #include
5 |
6 | cnn_frame *doFeedForward_FULLY_CONNECTED(cnn_frame *frame, void *layer);
7 | cnn_frame *doFeedForward_FULLY_CONNECTED_GPU(cnn_frame *frame, void *layer);
8 |
9 | #endif
10 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/layers/lrn.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __LRN_HPP__
2 | #define __LRN_HPP__
3 |
4 | #include
5 |
6 | cnn_frame *doFeedForward_LRN(cnn_frame *frame, void *layer);
7 | cnn_frame *doFeedForward_LRN_GPU(cnn_frame *frame, void *layer);
8 |
9 | #endif
10 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/layers/maxpool.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __MAXPOOL_HPP__
2 | #define __MAXPOOL_HPP__
3 |
4 | #include
5 |
6 | cnn_frame *doFeedForward_MAXPOOL(cnn_frame *frame, void *layer);
7 | cnn_frame *doFeedForward_MAXPOOL_GPU(cnn_frame *frame, void *layer);
8 |
9 | #endif
10 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/layers/softmax.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __SOFTMAX_HPP__
2 | #define __SOFTMAX_HPP__
3 |
4 | #include
5 |
6 | cnn_frame *doFeedForward_SOFTMAX(cnn_frame *frame, void *layer);
7 |
8 | #endif
9 |
10 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/predefine.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __PREDEFINE_H
2 | #define __PREDEFINE_H
3 |
4 | #include
5 |
6 | #define PROGRAM_NAME "DEEPSENSE"
7 | #define PROGRAM_KERNEL_NAME "deepsense.cl"
8 |
9 | typedef struct {
10 | cl_kernel kernel;
11 | size_t kernel_max_workgroup_size;
12 | } lany_kernel;
13 |
14 | struct OpenCLObjects {
15 | // Regular OpenCL objects:
16 | cl_platform_id platform;
17 | cl_device_id device;
18 | cl_context context;
19 | cl_command_queue queue; //we use single queue only
20 | cl_program program;
21 |
22 | //half kernels
23 | lany_kernel conv_kernel;
24 | lany_kernel conv_fc_kernel;
25 | lany_kernel fully_connected_kernel;
26 | lany_kernel maxpool_kernel;
27 | lany_kernel lrn_kernel;
28 | lany_kernel activation_kernel;
29 |
30 | //float kernels
31 | lany_kernel conv_kernel_float;
32 | lany_kernel conv_fc_kernel_float;
33 | lany_kernel fully_connected_kernel_float;
34 | lany_kernel maxpool_kernel_float;
35 | lany_kernel lrn_kernel_float;
36 | lany_kernel activation_kernel_float;
37 |
38 | lany_kernel convert_float_to_half_kernel;
39 | lany_kernel convert_half_to_float_kernel;
40 | };
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/app/src/main/cpp/include/utilities.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __UTILITIES_HPP__
2 | #define __UTILITIES_HPP__
3 |
4 | #include
5 | #include
6 |
7 | void init_OpenCL(
8 | cl_device_type required_device_type,
9 | OpenCLObjects& openCLObjects,
10 | const char *packageName);
11 |
12 | void shutdown_OpenCL (OpenCLObjects& openCLObjects);
13 |
14 | #endif
15 |
--------------------------------------------------------------------------------
/app/src/main/cpp/layers/conv_layer.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | cnn_frame *doFeedForward_CONV(cnn_frame *frame, void *layer) {
7 | LOGD("Running function %s", __PRETTY_FUNCTION__);
8 |
9 | frame = frame_convert_to_cpu(frame);
10 |
11 | cnn_layer_conv *conv_layer = ((cnn_layer *)layer)->conv_layer;
12 |
13 | cnn_frame *output = frame_init(\
14 | (frame->w + conv_layer->pad[0] + conv_layer->pad[1] - conv_layer->w) / conv_layer->stride[0] + 1, \
15 | (frame->h + conv_layer->pad[2] + conv_layer->pad[3] - conv_layer->h) / conv_layer->stride[1] + 1, \
16 | conv_layer->n);
17 |
18 | int i, j, k, x, y, z;
19 | for(i = 0 ; i < output->c; i++) {
20 | for(j = 0 ; j < output->h ; j++) {
21 | for(k = 0 ; k < output->w ; k++) {
22 | float result = 0.0f;
23 | for(x = 0 ; x < conv_layer->c; x++) {
24 | for(y = 0 ; y < conv_layer->h; y++) {
25 | for(z = 0 ; z < conv_layer->w ; z++) {
26 | int w = k * conv_layer->stride[0] - conv_layer->pad[0] + z;
27 | int h = j * conv_layer->stride[1] - conv_layer->pad[2] + y;
28 | if(w < 0 || w >= frame->w)
29 | continue;
30 | if(h < 0 || h >= frame->h)
31 | continue;
32 |
33 | float tmp1 = getDataFrom3D(frame->data, frame->h, frame->w, frame->c, h, w, x);
34 | float tmp2 = getDataFrom4D(conv_layer->W, conv_layer->n, conv_layer->h, conv_layer->w, conv_layer->c, i, y, z, x);
35 | result += tmp1 * tmp2;
36 | }
37 | }
38 | }
39 |
40 | result += conv_layer->bias[i];
41 | output->data[getIndexFrom3D(output->c, output->h, output->w, i, j, k)] = result;
42 | }
43 | }
44 | }
45 |
46 | frame_free(frame);
47 |
48 | doFeedForward_Activation(output, ((cnn_layer *)layer)->activation);
49 |
50 | return output;
51 | }
52 |
53 | cnn_frame *doFeedForward_CONV_GPU(cnn_frame *frame, void *layer) {
54 | LOGD("Running function %s", __PRETTY_FUNCTION__);
55 |
56 | OpenCLObjects *openCLObjects = getOpenClObject();
57 | cl_int err = CL_SUCCESS;
58 |
59 | frame = ((cnn_layer *)layer)->useHalf ? frame_convert_to_gpu_half(frame) : frame_convert_to_gpu_float(frame);
60 |
61 | cnn_layer_conv *conv_layer = ((cnn_layer *)layer)->conv_layer;
62 |
63 | int output_w = (frame->w + conv_layer->pad[0] + conv_layer->pad[1] - conv_layer->w) / conv_layer->stride[0] + 1;
64 | int output_h = (frame->h + conv_layer->pad[2] + conv_layer->pad[3] - conv_layer->h) / conv_layer->stride[1] + 1;
65 | int output_c = conv_layer->n;
66 |
67 | cnn_frame *output = ((cnn_layer *)layer)->useHalf ? frame_init_gpu_half(output_w, output_h, output_c) : frame_init_gpu(output_w, output_h, output_c);
68 |
69 | cl_mem cl_frame = frame->cl_data;
70 | cl_mem cl_result = output->cl_data;
71 |
72 | int i = 0;
73 | cl_kernel kernel = ((cnn_layer *)layer)->useHalf ? openCLObjects->conv_kernel.kernel : openCLObjects->conv_kernel_float.kernel;
74 |
75 | err = clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_frame);
76 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->w);
77 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->h);
78 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->c);
79 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem),&conv_layer->cl_W);
80 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem),&conv_layer->cl_bias);
81 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->w);
82 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->h);
83 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->c);
84 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->n);
85 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->stride[0]);
86 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->stride[1]);
87 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[0]);
88 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[1]);
89 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[2]);
90 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[3]);
91 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_result);
92 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_w);
93 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_h);
94 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_c);
95 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
96 |
97 | size_t localSize[3] = {8 , 8, 1};
98 | int global_x = ((output->w - 1) / localSize[0] + 1) * localSize[0];
99 | int global_y = ((output->h - 1) / localSize[1] + 1) * localSize[1];
100 |
101 | int didive = 8;
102 | int gs3 = output_c % didive == 0 ? output_c / didive : output_c;
103 |
104 | size_t globalSize[3] = {(size_t)global_x, (size_t)global_y, (size_t)gs3};
105 |
106 | err = clEnqueueNDRangeKernel(
107 | openCLObjects->queue,
108 | kernel,
109 | 3,
110 | 0,
111 | globalSize,
112 | localSize,
113 | 0, 0, 0
114 | );
115 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
116 |
117 | err |= clFinish(openCLObjects->queue);
118 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
119 |
120 | doFeedForward_Activation(output, ((cnn_layer *)layer)->activation);
121 |
122 | frame_free(frame);
123 |
124 | return output;
125 | }
126 |
127 | cnn_frame *doFeedForward_CONV_FC_GPU(cnn_frame *frame, void *layer) {
128 | LOGD("Running function %s", __PRETTY_FUNCTION__);
129 |
130 | frame = ((cnn_layer *)layer)->useHalf ? frame_convert_to_gpu_half(frame) : frame_convert_to_gpu_float(frame);
131 |
132 | cl_int err = CL_SUCCESS;
133 |
134 | cnn_layer_conv *conv_layer = ((cnn_layer *)layer)->conv_layer;
135 | OpenCLObjects *openCLObjects = getOpenClObject();
136 |
137 | int output_w = (frame->w + conv_layer->pad[0] + conv_layer->pad[1] - conv_layer->w) / conv_layer->stride[0] + 1;
138 | int output_h = (frame->h + conv_layer->pad[2] + conv_layer->pad[3] - conv_layer->h) / conv_layer->stride[1] + 1;
139 | int output_c = conv_layer->n;
140 |
141 | cnn_frame *output = ((cnn_layer *)layer)->useHalf ? frame_init_gpu_half(output_w, output_h, output_c) : frame_init_gpu(output_w, output_h, output_c);
142 |
143 | cl_mem cl_frame = frame->cl_data;
144 | cl_mem cl_result = output->cl_data;
145 |
146 | int i = 0;
147 | cl_kernel kernel = ((cnn_layer *)layer)->useHalf ? openCLObjects->conv_fc_kernel.kernel : openCLObjects->conv_fc_kernel_float.kernel;
148 | err = clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_frame);
149 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->w);
150 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->h);
151 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->c);
152 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &conv_layer->cl_W);
153 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &conv_layer->cl_bias);
154 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->w);
155 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->h);
156 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->c);
157 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->n);
158 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->stride[0]);
159 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->stride[1]);
160 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[0]);
161 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[1]);
162 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[2]);
163 | err |= clSetKernelArg(kernel, i++, sizeof(int), &conv_layer->pad[3]);
164 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_result);
165 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_w);
166 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_h);
167 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_c);
168 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
169 |
170 | //size_t globalSize[1] = {(size_t)output->c};
171 | size_t globalSize[1] = {(size_t)(output->c % 256 == 0 ? 256 : output->c)};
172 |
173 | err = clEnqueueNDRangeKernel(
174 | openCLObjects->queue,
175 | kernel,
176 | 1,
177 | 0,
178 | globalSize,
179 | 0,
180 | 0, 0, 0
181 | );
182 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
183 |
184 | err |= clFinish(openCLObjects->queue);
185 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
186 |
187 | frame_free(frame);
188 |
189 | doFeedForward_Activation(output, ((cnn_layer *)layer)->activation);
190 |
191 | return output;
192 | }
193 |
194 |
--------------------------------------------------------------------------------
/app/src/main/cpp/layers/fully_connected.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | cnn_frame *doFeedForward_FULLY_CONNECTED(cnn_frame *frame, void *layer) {
8 | LOGD("Running function %s", __PRETTY_FUNCTION__);
9 |
10 | frame = frame_convert_to_cpu(frame);
11 |
12 | cnn_layer_fully_connected *connected_layer = ((cnn_layer *)layer)->connected_layer;
13 |
14 | cnn_frame *output = frame_init(1, 1, connected_layer->outputSize);
15 |
16 | for(int n = 0 ; n < connected_layer->outputSize ; n++) {
17 | output->data[n] = 0;
18 | for(int i = 0 ; i < frame->c ; i++) {
19 | for(int j = 0 ; j < frame->h ; j++) {
20 | for(int k = 0 ; k < frame->w ; k++) {
21 | int index = getIndexFrom3D(frame->c, frame->h, frame->w, i , j , k);
22 | output->data[n] += frame->data[index] * connected_layer->W[index * connected_layer->outputSize + n];
23 | }
24 | }
25 | }
26 |
27 | output->data[n] += connected_layer->bias[n];
28 | }
29 |
30 | doFeedForward_Activation(output, ((cnn_layer *)layer)->activation);
31 |
32 | frame_free(frame);
33 |
34 | return output;
35 | }
36 |
37 | cnn_frame *doFeedForward_FULLY_CONNECTED_GPU(cnn_frame *frame, void *layer) {
38 | LOGD("Running function %s", __PRETTY_FUNCTION__);
39 |
40 | frame = ((cnn_layer *)layer)->useHalf ? frame_convert_to_gpu_half(frame) : frame_convert_to_gpu_float(frame);
41 |
42 | cl_int err;
43 | cnn_layer_fully_connected *connected_layer = ((cnn_layer *)layer)->connected_layer;
44 | OpenCLObjects *openCLObjects = getOpenClObject();
45 |
46 | cnn_frame *output = ((cnn_layer *)layer)->useHalf ? frame_init_gpu_half(1, 1, connected_layer->outputSize) : frame_init_gpu(1, 1, connected_layer->outputSize);
47 |
48 | int i = 0;
49 | cl_kernel kernel = ((cnn_layer *)layer)->useHalf ? openCLObjects->fully_connected_kernel.kernel : openCLObjects->fully_connected_kernel_float.kernel;
50 | err = clSetKernelArg(kernel, i++, sizeof(cl_mem), &frame->cl_data);
51 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->w);
52 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->h);
53 | err |= clSetKernelArg(kernel, i++, sizeof(int), &frame->c);
54 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &connected_layer->cl_W);
55 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &connected_layer->cl_bias);
56 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &output->cl_data);
57 | err |= clSetKernelArg(kernel, i++, sizeof(int), &connected_layer->outputSize);
58 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
59 |
60 | //size_t globalSize[1] = {(size_t)connected_layer->outputSize};
61 | size_t globalSize[1] = {(size_t)256};
62 |
63 | err = clEnqueueNDRangeKernel(
64 | openCLObjects->queue,
65 | kernel,
66 | 1,
67 | 0,
68 | globalSize,
69 | 0,
70 | 0, 0, 0
71 | );
72 | err |= clFinish(openCLObjects->queue);
73 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
74 |
75 | doFeedForward_Activation(output, ((cnn_layer *)layer)->activation);
76 |
77 | frame_free(frame);
78 |
79 | return output;
80 | }
--------------------------------------------------------------------------------
/app/src/main/cpp/layers/lrn.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | cnn_frame *doFeedForward_LRN(cnn_frame *frame, void *layer) {
8 | LOGD("Running function %s", __PRETTY_FUNCTION__);
9 |
10 | frame = frame_convert_to_cpu(frame);
11 |
12 | cnn_layer_lrn *lrn_layer = ((cnn_layer *)layer)->lrn_layer;
13 |
14 | int channels = frame->c;
15 | int width = frame->w;
16 | int height = frame->h;
17 |
18 | cnn_frame *output = frame_init(width, height, channels);
19 |
20 | float alpha_over_size = lrn_layer->alpha / lrn_layer->size;
21 | int size = lrn_layer->size;
22 | int k = lrn_layer->k;
23 | float beta = lrn_layer->beta;
24 |
25 | float *in = frame->data;
26 | float *out = output->data;
27 |
28 | for(int w = 0 ; w < width ; w++) {
29 | for(int h = 0 ; h < height ; h++) {
30 | int offset = (h * width + w) * channels;
31 | int head = 0;
32 | int pre_pad = (size - 1) / 2;
33 | int post_pad = size - pre_pad - 1;
34 | float accum_scale = 0;
35 |
36 | while (head < post_pad) {
37 | float data = in[offset + head];
38 | accum_scale += data * data;
39 | head++;
40 | }
41 |
42 | while (head < size) {
43 | float data = in[offset + head];
44 | accum_scale += data * data;
45 | float scale = k + accum_scale * alpha_over_size;
46 | out[offset + head - post_pad] = in[offset + head - post_pad] * pow(scale, -beta);
47 | head++;
48 | }
49 |
50 | while (head < channels) {
51 | float data = in[offset + head];
52 | accum_scale += data * data;
53 | data = in[offset + head - size];
54 | accum_scale -= data * data;
55 | float scale = k + accum_scale * alpha_over_size;
56 | out[offset + head - post_pad] = in[offset + head - post_pad] * pow(scale, -beta);
57 | head++;
58 | }
59 |
60 | while (head < channels + post_pad) {
61 | float data = in[offset + head - size];
62 | accum_scale -= data * data;
63 | float scale = k + accum_scale * alpha_over_size;
64 | out[offset + head - post_pad] = in[offset + head - post_pad] * pow(scale, -beta);
65 | head++;
66 | }
67 | }
68 | }
69 |
70 | frame_free(frame);
71 | return output;
72 | }
73 |
74 | cnn_frame *doFeedForward_LRN_GPU(cnn_frame *frame, void *layer) {
75 | LOGD("Running function %s", __PRETTY_FUNCTION__);
76 |
77 | cnn_layer_lrn *lrn_layer = ((cnn_layer *)layer)->lrn_layer;
78 |
79 | cl_int err;
80 | OpenCLObjects *openCLObjects = getOpenClObject();
81 |
82 | int channels = frame->c;
83 | int width = frame->w;
84 | int height = frame->h;
85 |
86 | cnn_frame *output = ((cnn_layer *)layer)->useHalf ? frame_init_gpu_half(width, height, channels) : frame_init_gpu(width, height, channels);
87 |
88 | cl_mem cl_frame = frame->cl_data;
89 | cl_mem cl_result = output->cl_data;
90 |
91 | float alpha_over_size = lrn_layer->alpha / lrn_layer->size;
92 |
93 | int i = 0;
94 | cl_kernel kernel = ((cnn_layer *)layer)->useHalf ? openCLObjects->lrn_kernel.kernel : openCLObjects->lrn_kernel_float.kernel;
95 |
96 | err = clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_frame);
97 | err |= clSetKernelArg(kernel, i++, sizeof(int), &channels);
98 | err |= clSetKernelArg(kernel, i++, sizeof(int), &height);
99 | err |= clSetKernelArg(kernel, i++, sizeof(int), &width);
100 | err |= clSetKernelArg(kernel, i++, sizeof(int), &lrn_layer->k);
101 | err |= clSetKernelArg(kernel, i++, sizeof(int), &lrn_layer->size);
102 | err |= clSetKernelArg(kernel, i++, sizeof(int), &alpha_over_size);
103 | err |= clSetKernelArg(kernel, i++, sizeof(int), &lrn_layer->beta);
104 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_result);
105 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
106 |
107 | size_t globalSize[2] = {(size_t)width, (size_t)height};
108 |
109 | err = clEnqueueNDRangeKernel(
110 | openCLObjects->queue,
111 | kernel,
112 | 2,
113 | 0,
114 | globalSize,
115 | 0,
116 | 0, 0, 0
117 | );
118 | err |= clFinish(openCLObjects->queue);
119 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
120 |
121 | frame_free(frame);
122 |
123 | return output;
124 | }
--------------------------------------------------------------------------------
/app/src/main/cpp/layers/maxpool.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | cnn_frame *doFeedForward_MAXPOOL(cnn_frame *frame, void *layer) {
7 | LOGD("Running function %s", __PRETTY_FUNCTION__);
8 |
9 | frame = frame_convert_to_cpu(frame);
10 |
11 | cnn_layer_maxpool *maxpool_layer = ((cnn_layer *)layer)->maxpool_layer;
12 |
13 | int w = 1 + (frame->w + maxpool_layer->pad[0] + maxpool_layer->pad[1] - maxpool_layer->size) / maxpool_layer->stride[0];
14 | int h = 1 + (frame->h + maxpool_layer->pad[2] + maxpool_layer->pad[3] - maxpool_layer->size) / maxpool_layer->stride[1];
15 | int d = frame->c;
16 | cnn_frame *output = frame_init(w, h, d);
17 |
18 | for(int k = 0 ; k < output->c ; k++) {
19 | for(int h = 0; h < output->h; h++) {
20 | for(int w = 0; w < output->w ; w++) {
21 | float max = -999999.9f;
22 | for(int x = 0 ; x < maxpool_layer->size ; x++) {
23 | for(int y = 0 ; y < maxpool_layer->size ; y++) {
24 | int x_ = w * maxpool_layer->stride[0] + x - maxpool_layer->pad[0];
25 | int y_ = h * maxpool_layer->stride[1] + y - maxpool_layer->pad[2];
26 | int valid = (x_ >= 0 && x_ < frame->w && y_ >= 0 && y_ < frame->h);
27 | float val = (valid != 0) ? frame->data[getIndexFrom3D(frame->h, frame->w, frame->c, y_, x_, k)] : -999999.9f;
28 | max = (val > max) ? val : max;
29 | }
30 | }
31 | output->data[getIndexFrom3D(output->h, output->w, output->c, h, w, k)] = max;
32 | }
33 | }
34 | }
35 |
36 | frame_free(frame);
37 |
38 | return output;
39 | }
40 |
41 | cnn_frame *doFeedForward_MAXPOOL_GPU(cnn_frame *frame, void *layer) {
42 | LOGD("Running function %s", __PRETTY_FUNCTION__);
43 |
44 | frame = ((cnn_layer *)layer)->useHalf ? frame_convert_to_gpu_half(frame) : frame_convert_to_gpu_float(frame);
45 |
46 | cnn_layer_maxpool *maxpool_layer = ((cnn_layer *)layer)->maxpool_layer;
47 |
48 | cl_int err;
49 | OpenCLObjects *openCLObjects = getOpenClObject();
50 |
51 | int input_w = frame->w;
52 | int input_h = frame->h;
53 | int input_d = frame->c;
54 |
55 | //prepare output
56 | int output_w = 1 + (frame->w + maxpool_layer->pad[0] + maxpool_layer->pad[1] - maxpool_layer->size) / maxpool_layer->stride[0];
57 | int output_h = 1 + (frame->h + maxpool_layer->pad[2] + maxpool_layer->pad[3] - maxpool_layer->size) / maxpool_layer->stride[1];
58 | int output_c = frame->c;
59 |
60 | cnn_frame *output = ((cnn_layer *)layer)->useHalf ? frame_init_gpu_half(output_w, output_h, output_c) : frame_init_gpu(output_w, output_h, output_c);
61 |
62 | cl_mem cl_frame = frame->cl_data;
63 | cl_mem cl_result = output->cl_data;
64 |
65 | int i = 0;
66 |
67 | cl_kernel kernel = ((cnn_layer *)layer)->useHalf ? openCLObjects->maxpool_kernel.kernel : openCLObjects->maxpool_kernel_float.kernel;
68 |
69 | err = clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_frame);
70 | err |= clSetKernelArg(kernel, i++, sizeof(int), &input_w);
71 | err |= clSetKernelArg(kernel, i++, sizeof(int), &input_h);
72 | err |= clSetKernelArg(kernel, i++, sizeof(int), &input_d);
73 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->size);
74 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->stride[0]);
75 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->stride[1]);
76 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->pad[0]);
77 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->pad[1]);
78 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->pad[2]);
79 | err |= clSetKernelArg(kernel, i++, sizeof(int), &maxpool_layer->pad[3]);
80 | err |= clSetKernelArg(kernel, i++, sizeof(cl_mem), &cl_result);
81 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_w);
82 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_h);
83 | err |= clSetKernelArg(kernel, i++, sizeof(int), &output_c);
84 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
85 |
86 | size_t globalSize[3] = {(size_t)output_w, (size_t)output_h, (size_t)output_c};
87 |
88 | err = clEnqueueNDRangeKernel(
89 | openCLObjects->queue,
90 | kernel,
91 | 3,
92 | 0,
93 | globalSize,
94 | 0,
95 | 0, 0, 0
96 | );
97 | err |= clFinish(openCLObjects->queue);
98 | SAMPLE_CHECK_ERRORS_WITH_NULL_RETURN(err);
99 |
100 | frame_free(frame);
101 |
102 | return output;
103 | }
104 |
--------------------------------------------------------------------------------
/app/src/main/cpp/layers/softmax.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | cnn_frame *doFeedForward_SOFTMAX(cnn_frame *frame, void *layer) {
8 | LOGD("Running function %s", __PRETTY_FUNCTION__);
9 |
10 | double dsum = 0;
11 | int i;
12 |
13 | frame = frame_convert_to_cpu(frame);
14 |
15 | for(i = 0 ; i < frame->c ; i++) {
16 | dsum += exp((double)frame->data[i]);
17 | }
18 |
19 | for(i = 0 ; i < frame->c ; i++) {
20 | frame->data[i] = (float)(exp((double)frame->data[i]) / dsum);
21 | }
22 |
23 | return frame;
24 | }
25 |
--------------------------------------------------------------------------------
/app/src/main/cpp/utilities.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | #include
9 | #include
10 | #include
11 |
12 | char packageNameBuf[256];
13 |
14 | inline char* load_Program(const char *inputPath) {
15 | FILE *fp = fopen(inputPath,"r");
16 | int fd = fileno(fp);
17 | struct stat buf;
18 | fstat(fd, &buf);
19 | int size = buf.st_size;
20 |
21 | char *buffer = (char *)malloc(size + 1);
22 | buffer[size] = '\0';
23 | fread(buffer, size, 1, fp);
24 | fclose(fp);
25 |
26 | return buffer;
27 | }
28 |
29 | cl_int compile_Program(OpenCLObjects& openCLObjects, const char* kernelFileName) {
30 |
31 | std::string kernelPath;
32 | cl_int err = CL_SUCCESS;
33 |
34 | kernelPath.append("/data/data/");
35 | kernelPath.append(packageNameBuf);
36 | kernelPath.append("/app_execdir/");
37 | kernelPath.append(kernelFileName);
38 |
39 | char* tmp = load_Program(kernelPath.c_str());
40 | std::string tmpStr(tmp);
41 | const char *kernelSource = tmpStr.c_str();
42 | free(tmp);
43 |
44 | openCLObjects.program =
45 | clCreateProgramWithSource (
46 | openCLObjects.context,
47 | 1,
48 | &kernelSource,
49 | 0,
50 | &err
51 | );
52 | SAMPLE_CHECK_ERRORS_WITH_RETURN(err);
53 |
54 | err = clBuildProgram(openCLObjects.program, 0, 0, "-O3 -cl-mad-enable -cl-fast-relaxed-math", 0, 0);
55 |
56 | if(err == CL_BUILD_PROGRAM_FAILURE) {
57 | size_t log_length = 0;
58 |
59 | err = clGetProgramBuildInfo(
60 | openCLObjects.program,
61 | openCLObjects.device,
62 | CL_PROGRAM_BUILD_LOG,
63 | 0,
64 | 0,
65 | &log_length);
66 | SAMPLE_CHECK_ERRORS_WITH_RETURN(err);
67 |
68 | //vector log(log_length);
69 | char* logbuf = (char*)malloc(log_length);
70 |
71 | err = clGetProgramBuildInfo(
72 | openCLObjects.program,
73 | openCLObjects.device,
74 | CL_PROGRAM_BUILD_LOG,
75 | log_length,
76 | (void*)logbuf,
77 | 0);
78 | SAMPLE_CHECK_ERRORS_WITH_RETURN(err);
79 |
80 | LOGE("Error happened during the build of OpenCL program.\nBuild log:%s", logbuf);
81 |
82 | free(logbuf);
83 | }
84 |
85 | //SAMPLE_CHECK_ERRORS_WITH_RETURN(err);
86 | return CL_SUCCESS;
87 | }
88 |
89 | void init_OpenCL(
90 | cl_device_type required_device_type,
91 | OpenCLObjects& openCLObjects,
92 | const char *packageName) {
93 |
94 | using namespace std;
95 | cl_int err = CL_SUCCESS;
96 |
97 | LOGD("init_OpenCL: Initializing GPU\n");
98 |
99 | strcpy(packageNameBuf, packageName);
100 |
101 | cl_uint num_of_platforms = 0;
102 | err = clGetPlatformIDs(0, 0, &num_of_platforms);
103 | SAMPLE_CHECK_ERRORS(err);
104 |
105 | vector platforms(num_of_platforms);
106 | // Get IDs for all platforms.
107 | err = clGetPlatformIDs(num_of_platforms, &platforms[0], 0);
108 | SAMPLE_CHECK_ERRORS(err);
109 |
110 | cl_uint i = 0;
111 | size_t platform_name_length = 0;
112 | err = clGetPlatformInfo(
113 | platforms[i],
114 | CL_PLATFORM_NAME,
115 | 0,
116 | 0,
117 | &platform_name_length
118 | );
119 | SAMPLE_CHECK_ERRORS(err);
120 |
121 | vector platform_name(platform_name_length);
122 | err = clGetPlatformInfo(
123 | platforms[i],
124 | CL_PLATFORM_NAME,
125 | platform_name_length,
126 | &platform_name[0],
127 | 0
128 | );
129 | SAMPLE_CHECK_ERRORS(err);
130 |
131 | openCLObjects.platform = platforms[0];
132 |
133 | cl_context_properties context_props[] = {
134 | CL_CONTEXT_PLATFORM,
135 | cl_context_properties(openCLObjects.platform),
136 | 0};
137 |
138 | openCLObjects.context = clCreateContextFromType(
139 | context_props,
140 | required_device_type,
141 | 0,
142 | 0,
143 | &err
144 | );
145 | SAMPLE_CHECK_ERRORS(err);
146 |
147 | err = clGetContextInfo(
148 | openCLObjects.context,
149 | CL_CONTEXT_DEVICES,
150 | sizeof(openCLObjects.device),
151 | &openCLObjects.device,
152 | 0);
153 | SAMPLE_CHECK_ERRORS(err);
154 |
155 | openCLObjects.queue = clCreateCommandQueue (
156 | openCLObjects.context,
157 | openCLObjects.device,
158 | 0, // Creating queue properties, refer to the OpenCL specification for details.
159 | &err);
160 | SAMPLE_CHECK_ERRORS(err);
161 |
162 | err = compile_Program(openCLObjects, PROGRAM_KERNEL_NAME);
163 | SAMPLE_CHECK_ERRORS(err);
164 |
165 | cl_device_local_mem_type local_mem_type;
166 | clGetDeviceInfo(
167 | openCLObjects.device,
168 | CL_DEVICE_LOCAL_MEM_TYPE,
169 | sizeof(cl_device_local_mem_type),
170 | &local_mem_type,
171 | NULL
172 | );
173 | LOGD("CL_DEVICE_LOCAL_MEM_TYPE %u", local_mem_type);
174 |
175 | openCLObjects.conv_kernel.kernel = clCreateKernel(openCLObjects.program, "conv_kernel_half", &err);
176 | SAMPLE_CHECK_ERRORS(err);
177 | clGetKernelWorkGroupInfo(
178 | openCLObjects.conv_kernel.kernel,
179 | openCLObjects.device,
180 | CL_KERNEL_WORK_GROUP_SIZE,
181 | sizeof(size_t),
182 | &openCLObjects.conv_kernel.kernel_max_workgroup_size,
183 | NULL
184 | );
185 |
186 | openCLObjects.conv_kernel_float.kernel = clCreateKernel(openCLObjects.program, "conv_kernel_float", &err);
187 | SAMPLE_CHECK_ERRORS(err);
188 | clGetKernelWorkGroupInfo(
189 | openCLObjects.conv_kernel_float.kernel,
190 | openCLObjects.device,
191 | CL_KERNEL_WORK_GROUP_SIZE,
192 | sizeof(size_t),
193 | &openCLObjects.conv_kernel_float.kernel_max_workgroup_size,
194 | NULL
195 | );
196 |
197 | openCLObjects.maxpool_kernel.kernel = clCreateKernel(openCLObjects.program, "maxpool_kernel_half", &err);
198 | SAMPLE_CHECK_ERRORS(err);
199 | clGetKernelWorkGroupInfo(
200 | openCLObjects.maxpool_kernel.kernel,
201 | openCLObjects.device,
202 | CL_KERNEL_WORK_GROUP_SIZE,
203 | sizeof(size_t),
204 | &openCLObjects.maxpool_kernel.kernel_max_workgroup_size,
205 | NULL
206 | );
207 |
208 | openCLObjects.maxpool_kernel_float.kernel = clCreateKernel(openCLObjects.program, "maxpool_kernel_float", &err);
209 | SAMPLE_CHECK_ERRORS(err);
210 | clGetKernelWorkGroupInfo(
211 | openCLObjects.maxpool_kernel_float.kernel,
212 | openCLObjects.device,
213 | CL_KERNEL_WORK_GROUP_SIZE,
214 | sizeof(size_t),
215 | &openCLObjects.maxpool_kernel_float.kernel_max_workgroup_size,
216 | NULL
217 | );
218 |
219 | openCLObjects.conv_fc_kernel.kernel = clCreateKernel(openCLObjects.program, "conv_fc_kernel_half", &err);
220 | SAMPLE_CHECK_ERRORS(err);
221 | clGetKernelWorkGroupInfo(
222 | openCLObjects.conv_fc_kernel.kernel,
223 | openCLObjects.device,
224 | CL_KERNEL_WORK_GROUP_SIZE,
225 | sizeof(size_t),
226 | &openCLObjects.conv_fc_kernel.kernel_max_workgroup_size,
227 | NULL
228 | );
229 |
230 | openCLObjects.conv_fc_kernel_float.kernel = clCreateKernel(openCLObjects.program, "conv_fc_kernel_float", &err);
231 | SAMPLE_CHECK_ERRORS(err);
232 | clGetKernelWorkGroupInfo(
233 | openCLObjects.conv_fc_kernel_float.kernel,
234 | openCLObjects.device,
235 | CL_KERNEL_WORK_GROUP_SIZE,
236 | sizeof(size_t),
237 | &openCLObjects.conv_fc_kernel_float.kernel_max_workgroup_size,
238 | NULL
239 | );
240 |
241 | openCLObjects.fully_connected_kernel.kernel = clCreateKernel(openCLObjects.program, "fully_connected_kernel_half", &err);
242 | SAMPLE_CHECK_ERRORS(err);
243 | clGetKernelWorkGroupInfo(
244 | openCLObjects.fully_connected_kernel.kernel,
245 | openCLObjects.device,
246 | CL_KERNEL_WORK_GROUP_SIZE,
247 | sizeof(size_t),
248 | &openCLObjects.fully_connected_kernel.kernel_max_workgroup_size,
249 | NULL
250 | );
251 |
252 | openCLObjects.fully_connected_kernel_float.kernel = clCreateKernel(openCLObjects.program, "fully_connected_kernel_float", &err);
253 | SAMPLE_CHECK_ERRORS(err);
254 | clGetKernelWorkGroupInfo(
255 | openCLObjects.fully_connected_kernel_float.kernel,
256 | openCLObjects.device,
257 | CL_KERNEL_WORK_GROUP_SIZE,
258 | sizeof(size_t),
259 | &openCLObjects.fully_connected_kernel_float.kernel_max_workgroup_size,
260 | NULL
261 | );
262 |
263 | openCLObjects.lrn_kernel.kernel = clCreateKernel(openCLObjects.program, "cross_channels_lrn_kernel_half", &err);
264 | clGetKernelWorkGroupInfo(
265 | openCLObjects.lrn_kernel.kernel,
266 | openCLObjects.device,
267 | CL_KERNEL_WORK_GROUP_SIZE,
268 | sizeof(size_t),
269 | &openCLObjects.lrn_kernel.kernel_max_workgroup_size,
270 | NULL
271 | );
272 | SAMPLE_CHECK_ERRORS(err);
273 |
274 | openCLObjects.lrn_kernel_float.kernel = clCreateKernel(openCLObjects.program, "cross_channels_lrn_kernel_float", &err);
275 | clGetKernelWorkGroupInfo(
276 | openCLObjects.lrn_kernel_float.kernel,
277 | openCLObjects.device,
278 | CL_KERNEL_WORK_GROUP_SIZE,
279 | sizeof(size_t),
280 | &openCLObjects.lrn_kernel_float.kernel_max_workgroup_size,
281 | NULL
282 | );
283 | SAMPLE_CHECK_ERRORS(err);
284 |
285 | openCLObjects.activation_kernel.kernel = clCreateKernel(openCLObjects.program, "activation_kernel_half", &err);
286 | SAMPLE_CHECK_ERRORS(err);
287 |
288 | openCLObjects.activation_kernel_float.kernel = clCreateKernel(openCLObjects.program, "activation_kernel_float", &err);
289 | SAMPLE_CHECK_ERRORS(err);
290 |
291 | openCLObjects.convert_float_to_half_kernel.kernel = clCreateKernel(openCLObjects.program, "convertFloatToHalf", &err);
292 | SAMPLE_CHECK_ERRORS(err);
293 |
294 | openCLObjects.convert_half_to_float_kernel.kernel = clCreateKernel(openCLObjects.program, "convertHalfToFloat", &err);
295 | SAMPLE_CHECK_ERRORS(err);
296 |
297 | LOGD("initOpenCL finished successfully");
298 | }
299 |
300 | void shutdown_OpenCL (OpenCLObjects& openCLObjects) {
301 | cl_int err = CL_SUCCESS;
302 |
303 | err = clReleaseKernel(openCLObjects.conv_kernel.kernel);
304 | SAMPLE_CHECK_ERRORS(err);
305 |
306 | err = clReleaseKernel(openCLObjects.conv_fc_kernel.kernel);
307 | SAMPLE_CHECK_ERRORS(err);
308 |
309 | err = clReleaseKernel(openCLObjects.maxpool_kernel.kernel);
310 | SAMPLE_CHECK_ERRORS(err);
311 |
312 | err = clReleaseKernel(openCLObjects.lrn_kernel.kernel);
313 | SAMPLE_CHECK_ERRORS(err);
314 |
315 | err = clReleaseKernel(openCLObjects.fully_connected_kernel.kernel);
316 | SAMPLE_CHECK_ERRORS(err);
317 |
318 | err = clReleaseKernel(openCLObjects.conv_kernel_float.kernel);
319 | SAMPLE_CHECK_ERRORS(err);
320 |
321 | err = clReleaseKernel(openCLObjects.conv_fc_kernel_float.kernel);
322 | SAMPLE_CHECK_ERRORS(err);
323 |
324 | err = clReleaseKernel(openCLObjects.maxpool_kernel_float.kernel);
325 | SAMPLE_CHECK_ERRORS(err);
326 |
327 | err = clReleaseKernel(openCLObjects.lrn_kernel_float.kernel);
328 | SAMPLE_CHECK_ERRORS(err);
329 |
330 | err = clReleaseKernel(openCLObjects.fully_connected_kernel_float.kernel);
331 | SAMPLE_CHECK_ERRORS(err);
332 |
333 | err = clReleaseKernel(openCLObjects.convert_float_to_half_kernel.kernel);
334 | SAMPLE_CHECK_ERRORS(err);
335 |
336 | err = clReleaseKernel(openCLObjects.convert_half_to_float_kernel.kernel);
337 | SAMPLE_CHECK_ERRORS(err);
338 |
339 | err = clReleaseProgram(openCLObjects.program);
340 | SAMPLE_CHECK_ERRORS(err);
341 |
342 | err = clReleaseCommandQueue(openCLObjects.queue);
343 | SAMPLE_CHECK_ERRORS(err);
344 |
345 | err = clReleaseContext(openCLObjects.context);
346 | SAMPLE_CHECK_ERRORS(err);
347 |
348 | LOGD("shutdownOpenCL finished successfully");
349 | }
--------------------------------------------------------------------------------
/app/src/main/java/com/lanytek/deepsensev3/MainActivity.java:
--------------------------------------------------------------------------------
1 | package com.lanytek.deepsensev3;
2 |
3 | import android.app.Activity;
4 | import android.content.Intent;
5 | import android.database.Cursor;
6 | import android.graphics.Bitmap;
7 | import android.graphics.Canvas;
8 | import android.graphics.Color;
9 | import android.graphics.Paint;
10 | import android.net.Uri;
11 | import android.os.AsyncTask;
12 | import android.os.Environment;
13 | import android.provider.MediaStore;
14 | import android.support.v7.app.AppCompatActivity;
15 | import android.os.Bundle;
16 | import android.util.Log;
17 | import android.view.View;
18 | import android.widget.Button;
19 | import android.widget.ImageView;
20 | import android.widget.TextView;
21 |
22 | import com.squareup.picasso.Picasso;
23 |
24 | import java.io.BufferedReader;
25 | import java.io.File;
26 | import java.io.FileNotFoundException;
27 | import java.io.FileReader;
28 | import java.io.IOException;
29 | import java.util.ArrayList;
30 | import java.util.List;
31 | import java.util.concurrent.ExecutorService;
32 | import java.util.concurrent.Executors;
33 | import java.util.concurrent.TimeUnit;
34 |
35 | public class MainActivity extends AppCompatActivity {
36 | public static String TAG = "DeepSense";
37 |
38 | private List img_recognition_descriptions = new ArrayList<>();
39 | private static final String [] yolo_descriptions = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
40 |
41 | private static String model_yolo_tiny = (new File(Environment.getExternalStorageDirectory(), "YoloModels/Yolo-Tiny-New-Format")).getAbsolutePath();
42 | private static String model_img_recognition = (new File(Environment.getExternalStorageDirectory(), "ImageNetModels/Vgg_F-New-Format")).getAbsolutePath();
43 |
44 | private Activity activity = this;
45 |
46 | private ImageView iv;
47 |
48 | private Button btn_loadModelGPU;
49 | private Button btn_processImage;
50 | private TextView tv_runtime, tv_desc;
51 |
52 | private static final int SELECT_PICTURE = 9999;
53 | private String selectedImagePath = null;
54 |
55 | // Used to load the 'native-lib' library on application startup.
56 | static {
57 | System.loadLibrary("deepsense");
58 | }
59 |
60 | @Override
61 | protected void onCreate(Bundle savedInstanceState) {
62 | super.onCreate(savedInstanceState);
63 | setContentView(R.layout.activity_main);
64 |
65 | iv = (ImageView) findViewById(R.id.iv_image);
66 | btn_loadModelGPU = (Button) findViewById(R.id.btn_loadModelGPU);
67 | btn_processImage = (Button) findViewById(R.id.btn_processImage);
68 | tv_runtime = (TextView) findViewById(R.id.tv_runTime);
69 | tv_desc = (TextView) findViewById(R.id.tv_desc);
70 |
71 | new async_copy_kernel_code().execute("deepsense.cl");
72 |
73 | btn_loadModelGPU.setOnClickListener(new View.OnClickListener() {
74 | @Override
75 | public void onClick(View v) {
76 | new async_loadModel().execute();
77 | }
78 | });
79 |
80 | btn_processImage.setOnClickListener(new View.OnClickListener() {
81 | @Override
82 | public void onClick(View v) {
83 | new async_processImage_yolo().execute();
84 | //new async_processImage_img_recognition().execute();
85 | }
86 | });
87 |
88 | iv.setOnClickListener(new View.OnClickListener() {
89 | @Override
90 | public void onClick(View v) {
91 | Intent intent = new Intent();
92 | intent.setType("image/*");
93 | intent.setAction(Intent.ACTION_GET_CONTENT);
94 | startActivityForResult(Intent.createChooser(intent, "Select Picture"), SELECT_PICTURE);
95 | }
96 | });
97 | }
98 |
99 | private void setButtons(boolean isEnabled) {
100 | //btn_loadModelCPU.setEnabled(isEnabled);
101 | btn_loadModelGPU.setEnabled(isEnabled);
102 | btn_processImage.setEnabled(isEnabled);
103 | }
104 |
105 | private class async_copy_kernel_code extends AsyncTask {
106 |
107 | @Override
108 | protected void onPreExecute() {
109 | super.onPreExecute();
110 | setButtons(false);
111 | }
112 |
113 | @Override
114 | protected Void doInBackground(String... params) {
115 | for(String p : params) {
116 | Utilities.copyFile(activity, p);
117 | }
118 | return null;
119 | }
120 |
121 | @Override
122 | protected void onPostExecute(Void aVoid) {
123 | super.onPostExecute(aVoid);
124 | setButtons(true);
125 | }
126 | }
127 |
128 | private class async_loadModel extends AsyncTask {
129 |
130 | @Override
131 | protected void onPreExecute() {
132 | setButtons(false);
133 | super.onPreExecute();
134 | }
135 |
136 | @Override
137 | protected Void doInBackground(Void... params) {
138 | if(new File(model_img_recognition + "/description").exists()) {
139 | try {
140 | img_recognition_descriptions.clear();
141 | BufferedReader br = new BufferedReader(new FileReader(new File(model_img_recognition + "/description")));
142 | String line;
143 | while((line = br.readLine()) != null) {
144 | img_recognition_descriptions.add(line);
145 | }
146 | br.close();
147 | } catch (FileNotFoundException e) {
148 | e.printStackTrace();
149 | } catch (IOException e) {
150 | e.printStackTrace();
151 | }
152 | }
153 |
154 | InitGPU(model_yolo_tiny, activity.getPackageName());
155 | return null;
156 | }
157 |
158 | @Override
159 | protected void onPostExecute(Void aVoid) {
160 | super.onPostExecute(aVoid);
161 | setButtons(true);
162 | }
163 | }
164 |
165 | private class async_processImage_img_recognition extends AsyncTask {
166 |
167 | private double t1,t2;
168 | private double cnn_runtime;
169 | private float [] result;
170 | private Bitmap bm = null;
171 | private int best_idx = -1;
172 |
173 | @Override
174 | protected void onPreExecute() {
175 | btn_processImage.setEnabled(false);
176 | tv_runtime.setText("------");
177 | tv_desc.setText("...");
178 | t1 = System.currentTimeMillis();
179 | super.onPreExecute();
180 | }
181 |
182 | @Override
183 | protected void onPostExecute(Void aVoid) {
184 | super.onPostExecute(aVoid);
185 | t2 = System.currentTimeMillis();
186 | double runtime = t2 - t1;
187 | btn_processImage.setEnabled(true);
188 | tv_runtime.setText(cnn_runtime + " / " + runtime + " ms");
189 | tv_desc.setText(img_recognition_descriptions.get(best_idx));
190 | }
191 |
192 | @Override
193 | protected Void doInBackground(Void... voids) {
194 |
195 | if(selectedImagePath != null) {
196 | final int IMG_X = 224;
197 | final int IMG_Y = 224;
198 | final int IMG_C = 3;
199 |
200 | final float [] bitmapArray = new float[IMG_X * IMG_Y * IMG_C];
201 |
202 | try {
203 | bm = Picasso.with(activity)
204 | .load(new File(selectedImagePath))
205 | .config(Bitmap.Config.ARGB_8888)
206 | .resize(448,448)
207 | .get();
208 | } catch (IOException e) {
209 | e.printStackTrace();
210 | }
211 |
212 | if(bm != null) {
213 | ExecutorService executor = Executors.newFixedThreadPool(8);
214 |
215 | final double scaleX = (double)IMG_X / (double)bm.getWidth();
216 | final double scaleY = (double)IMG_Y / (double)bm.getHeight();
217 |
218 | for(int i = 0 ; i < 224 ; i++) {
219 | final int finalI = i;
220 | executor.execute(new Runnable() {
221 | @Override
222 | public void run() {
223 | for(int j = 0 ; j < IMG_Y ; j++) {
224 | int pixel = bm.getPixel((int)Math.ceil(1/scaleX * finalI),(int)Math.ceil(1/scaleY * j));
225 | float b = (float)(pixel & 0x000000ff);
226 | float g = (float)((pixel >> 8) & 0x000000ff);
227 | float r = (float)((pixel >> 16) & 0x000000ff);
228 | int index = finalI * IMG_Y + j;
229 | bitmapArray[index * 3] = r - 122.803f;
230 | bitmapArray[index * 3 + 1] = g - 114.885f;
231 | bitmapArray[index * 3 + 2] = b - 101.572f;
232 | }
233 | }
234 | });
235 | }
236 |
237 | executor.shutdown();
238 | try {
239 | executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
240 | } catch (InterruptedException e) {
241 | e.printStackTrace();
242 | }
243 |
244 | double x1 = System.currentTimeMillis();
245 | float [] result = GetInferrence(bitmapArray);
246 | double x2 = System.currentTimeMillis();
247 | cnn_runtime = x2 - x1;
248 | Log.d(TAG,"CNN RUNTIME: " + cnn_runtime + "ms");
249 |
250 | //get top-1
251 | float best_prob = 0;
252 | for(int i = 0 ; i < 1000 ; i ++) {
253 | if(best_prob < result[i]) {
254 | best_idx = i;
255 | best_prob = result[i];
256 | }
257 | }
258 |
259 | Log.d(TAG,"Image classified as : " + img_recognition_descriptions.get(best_idx));
260 | }
261 | }
262 |
263 | return null;
264 | }
265 | }
266 |
267 | private class async_processImage_yolo extends AsyncTask {
268 |
269 | private double t1,t2;
270 | private double cnn_runtime;
271 | private float [] result;
272 | private Bitmap bm = null;
273 |
274 | @Override
275 | protected void onPreExecute() {
276 | btn_processImage.setEnabled(false);
277 | tv_runtime.setText("------");
278 | t1 = System.currentTimeMillis();
279 | super.onPreExecute();
280 | }
281 |
282 | @Override
283 | protected Void doInBackground(Void... params) {
284 |
285 | if(selectedImagePath != null) {
286 | final int IMG_X = 448;
287 | final int IMG_Y = 448;
288 | final int IMG_C = 3;
289 |
290 | final float [] bitmapArray = new float[IMG_X * IMG_Y * IMG_C];
291 |
292 | try {
293 | bm = Picasso.with(activity)
294 | .load(new File(selectedImagePath))
295 | .config(Bitmap.Config.ARGB_8888)
296 | .resize(IMG_X,IMG_Y)
297 | .get();
298 | } catch (IOException e) {
299 | e.printStackTrace();
300 | }
301 |
302 | if(bm != null) {
303 | /*ExecutorService executor = Executors.newFixedThreadPool(8);
304 |
305 | for(int w = 0 ; w < bm.getWidth() ; w++) {
306 | final int finalW = w;
307 | executor.execute(new Runnable() {
308 | @Override
309 | public void run() {
310 | for(int h = 0 ; h < bm.getHeight() ; h++) {
311 | int pixel = bm.getPixel(finalW, h);
312 | for(int c = 0 ; c < 3 ; c++) {
313 | bitmapArray[h * IMG_X * IMG_C + finalW * IMG_C + c] = getColorPixel(pixel, c);
314 | }
315 | }
316 | }
317 | });
318 | }
319 |
320 | executor.shutdown();
321 | try {
322 | executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
323 | } catch (InterruptedException e) {
324 | e.printStackTrace();
325 | }*/
326 |
327 | for(int w = 0 ; w < bm.getWidth() ; w++) {
328 | for(int h = 0 ; h < bm.getHeight() ; h++) {
329 | int pixel = bm.getPixel(w, h);
330 | for(int c = 0 ; c < 3 ; c++) {
331 | bitmapArray[h * IMG_X * IMG_C + w * IMG_C + c] = Utilities.getColorPixel(pixel, c);
332 | }
333 | }
334 | }
335 | }
336 |
337 | double x1 = System.currentTimeMillis();
338 | float [] result = GetInferrence(bitmapArray);
339 | double x2 = System.currentTimeMillis();
340 | cnn_runtime = x2 - x1;
341 | Log.d(TAG,"CNN RUNTIME: " + cnn_runtime + "ms");
342 |
343 | int classes = 20;
344 | int side = 7;
345 | int num = 2;
346 | float thresh = 0.15f;
347 |
348 | //process result first
349 | float [][] probs = new float[side * side * num][classes];
350 | Utilities.box[] boxes = new Utilities.box[side * side * num];
351 | for(int j = 0 ; j < boxes.length ; j++)
352 | boxes[j] = new Utilities.box();
353 |
354 | Utilities.convert_yolo_detections(result, classes, num, 1, side, 1, 1, thresh, probs, boxes, 0);
355 | Utilities.do_nms_sort(boxes, probs, side * side * num, classes, 0.5f);
356 |
357 | //do box drawing
358 | final Bitmap mutableBitmap = Bitmap.createScaledBitmap(
359 | bm, 512, 512, false).copy(bm.getConfig(), true);
360 | final Canvas canvas = new Canvas(mutableBitmap);
361 |
362 | for(int i = 0; i < side * side * num; ++i){
363 |
364 | int classid = -1;
365 | float maxprob = -100000.0f;
366 | for(int j = 0 ; j < classes ; j++) {
367 | if(probs[i][j] > maxprob) {
368 | classid = j;
369 | maxprob = probs[i][j];
370 | }
371 | }
372 |
373 | if(classid < 0)
374 | continue;
375 |
376 | float prob = probs[i][classid];
377 | if(prob > thresh){
378 | Utilities.box b = boxes[i];
379 |
380 | int left = (int) ((b.x-b.w/2.) * mutableBitmap.getWidth());
381 | int right = (int) ((b.x+b.w/2.) * mutableBitmap.getWidth());
382 | int top = (int) ((b.y-b.h/2.) * mutableBitmap.getHeight());
383 | int bot = (int) ((b.y+b.h/2.) * mutableBitmap.getHeight());
384 |
385 | if(left < 0) left = 0;
386 | if(right > mutableBitmap.getWidth() - 1) right = mutableBitmap.getWidth() - 1;
387 | if(top < 0) top = 0;
388 | if(bot > mutableBitmap.getHeight() - 1) bot = mutableBitmap.getHeight() - 1;
389 |
390 | Paint p = new Paint();
391 | p.setStrokeWidth(p.getStrokeWidth() * 3);
392 | p.setColor(Color.RED);
393 | canvas.drawLine(left, top, right, top, p);
394 | canvas.drawLine(left, top, left, bot, p);
395 | canvas.drawLine(left, bot, right, bot, p);
396 | canvas.drawLine(right, top, right, bot, p);
397 |
398 | p.setTextSize(48f);
399 | p.setColor(Color.BLUE);
400 | canvas.drawText("" + yolo_descriptions[classid],left + (right - left)/2,top + (bot - top)/2,p);
401 | }
402 | }
403 |
404 | activity.runOnUiThread(new Runnable() {
405 | @Override
406 | public void run() {
407 | iv.setImageBitmap(mutableBitmap);
408 | }
409 | });
410 | }
411 |
412 | return null;
413 | }
414 |
415 | @Override
416 | protected void onPostExecute(Void aVoid) {
417 | super.onPostExecute(aVoid);
418 | t2 = System.currentTimeMillis();
419 | double runtime = t2 - t1;
420 | btn_processImage.setEnabled(true);
421 | tv_runtime.setText(cnn_runtime + " / " + runtime + " ms");
422 | }
423 | }
424 |
425 | public String getPath(Uri uri) {
426 | String[] projection = { MediaStore.Images.Media.DATA };
427 | Cursor cursor = managedQuery(uri, projection, null, null, null);
428 | int column_index = cursor.getColumnIndexOrThrow(MediaStore.Images.Media.DATA);
429 | cursor.moveToFirst();
430 | return cursor.getString(column_index);
431 | }
432 |
433 | public void onActivityResult(int requestCode, int resultCode, Intent data) {
434 | if (resultCode == RESULT_OK) {
435 | if (requestCode == SELECT_PICTURE) {
436 | Uri selectedImageUri = data.getData();
437 | selectedImagePath = getPath(selectedImageUri);
438 | if(selectedImagePath != null)
439 | iv.setImageURI(selectedImageUri);
440 | }
441 | }
442 | }
443 |
444 | /**
445 | * A native method that is implemented by the 'native-lib' native library,
446 | * which is packaged with this application.
447 | */
448 | public native void InitGPU(String model_dir_path, String packageName);
449 | public native float [] GetInferrence(float [] input);
450 |
451 | }
452 |
--------------------------------------------------------------------------------
/app/src/main/java/com/lanytek/deepsensev3/Utilities.java:
--------------------------------------------------------------------------------
1 | package com.lanytek.deepsensev3;
2 |
3 | import android.app.Activity;
4 |
5 | import java.io.File;
6 | import java.io.FileOutputStream;
7 | import java.io.IOException;
8 | import java.io.InputStream;
9 | import java.io.OutputStream;
10 | import java.util.Arrays;
11 | import java.util.Comparator;
12 | import java.util.concurrent.ExecutorService;
13 | import java.util.concurrent.Executors;
14 | import java.util.concurrent.TimeUnit;
15 |
16 | /**
17 | * Created by JC1DA on 3/16/16.
18 | */
19 | public class Utilities {
20 | public static void copyFile(Activity activity, final String f) {
21 | InputStream in;
22 | try {
23 | in = activity.getAssets().open(f);
24 | final File of = new File(activity.getDir("execdir", activity.MODE_PRIVATE), f);
25 |
26 | final OutputStream out = new FileOutputStream(of);
27 |
28 | final byte b[] = new byte[65535];
29 | int sz = 0;
30 | while ((sz = in.read(b)) > 0) {
31 | out.write(b, 0, sz);
32 | }
33 | in.close();
34 | out.close();
35 | } catch (IOException e) {
36 | e.printStackTrace();
37 | }
38 | }
39 |
40 | public static class box {
41 | public float x,y,w,h;
42 | }
43 |
44 | public static class sortable_bbox {
45 | public int index;
46 | public int classid;
47 | public float [][] probs;
48 | }
49 |
50 | public static float getColorPixel(int pixel, int color) {
51 | float value = 0;
52 |
53 | switch (color) {
54 | case 0:
55 | value = (float)((pixel >> 16) & 0x000000ff) / 255.0f;
56 | break;
57 | case 1:
58 | value = (float)((pixel >> 8) & 0x000000ff) / 255.0f;
59 | break;
60 | case 2:
61 | value = (float)(pixel & 0x000000ff) / 255.0f;
62 | break;
63 | }
64 |
65 | return value;
66 | }
67 |
68 | public static float colors[][] = { {1.0f,0.0f,1.0f} , {0.0f,0.0f,1.0f} , {0.0f,1.0f,1.0f} , {0.0f,1.0f,0.0f} , {1.0f,1.0f,0.0f} , {1.0f,0.0f,0.0f} };
69 | public static float get_color(int c, int x, int max)
70 | {
71 | float ratio = ((float)x/max)*5;
72 | int i = (int) Math.floor(ratio);
73 | int j = (int) Math.ceil(ratio);
74 | ratio -= i;
75 | float r = (1-ratio) * colors[i][c] + ratio*colors[j][c];
76 | //printf("%f\n", r);
77 | return r;
78 | }
79 |
80 | public static void convert_yolo_detections(float [] predictions, int classes, int num, int square, int side, int w, int h, float thresh, float [][] probs, box [] boxes, int only_objectness)
81 | {
82 | int i,j,n;
83 | //int per_cell = 5*num+classes;
84 | for (i = 0; i < side * side; ++i){
85 | int row = i / side;
86 | int col = i % side;
87 | for(n = 0; n < num; ++n){
88 | int index = i*num + n;
89 | int p_index = side*side*classes + i*num + n;
90 | float scale = predictions[p_index];
91 | int box_index = side*side*(classes + num) + (i*num + n)*4;
92 | boxes[index].x = (predictions[box_index + 0] + col) / side * w;
93 | boxes[index].y = (predictions[box_index + 1] + row) / side * h;
94 | boxes[index].w = (float) (Math.pow(predictions[box_index + 2], ((square != 0) ? 2 : 1)) * w);
95 | boxes[index].h = (float) (Math.pow(predictions[box_index + 3], ((square != 0) ? 2 : 1)) * h);
96 | for(j = 0; j < classes; ++j){
97 | int class_index = i*classes;
98 | float prob = scale*predictions[class_index+j];
99 | probs[index][j] = (prob > thresh) ? prob : 0;
100 | }
101 | if(only_objectness != 0){
102 | probs[index][0] = scale;
103 | }
104 | }
105 | }
106 | }
107 |
108 | public static void convert_yolo_detections_mt(final float [] predictions, final int classes, final int num, final int square, final int side, final int w, final int h, final float thresh, final float [][] probs, final box [] boxes, final int only_objectness)
109 | {
110 |
111 | ExecutorService executor = Executors.newFixedThreadPool(4);
112 | for (int idx = 0; idx < side * side; ++idx){
113 | final int i = idx;
114 | Runnable worker = new Runnable() {
115 | @Override
116 | public void run() {
117 | int row = i / side;
118 | int col = i % side;
119 | for(int n = 0; n < num; ++n){
120 | int index = i*num + n;
121 | int p_index = side*side*classes + i*num + n;
122 | float scale = predictions[p_index];
123 | int box_index = side*side*(classes + num) + (i*num + n)*4;
124 | boxes[index].x = (predictions[box_index + 0] + col) / side * w;
125 | boxes[index].y = (predictions[box_index + 1] + row) / side * h;
126 | boxes[index].w = (float) (Math.pow(predictions[box_index + 2], ((square != 0) ? 2 : 1)) * w);
127 | boxes[index].h = (float) (Math.pow(predictions[box_index + 3], ((square != 0) ? 2 : 1)) * h);
128 | for(int j = 0; j < classes; ++j){
129 | int class_index = i*classes;
130 | float prob = scale*predictions[class_index+j];
131 | probs[index][j] = (prob > thresh) ? prob : 0;
132 | }
133 | if(only_objectness != 0){
134 | probs[index][0] = scale;
135 | }
136 | }
137 | }
138 | };
139 | executor.execute(worker);
140 | }
141 |
142 | executor.shutdown();
143 | try {
144 | executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
145 | } catch (InterruptedException e) {
146 | e.printStackTrace();
147 | }
148 | }
149 |
150 | public static float overlap(float x1, float w1, float x2, float w2)
151 | {
152 | float l1 = x1 - w1/2;
153 | float l2 = x2 - w2/2;
154 | float left = l1 > l2 ? l1 : l2;
155 | float r1 = x1 + w1/2;
156 | float r2 = x2 + w2/2;
157 | float right = r1 < r2 ? r1 : r2;
158 | return right - left;
159 | }
160 |
161 | public static float box_intersection(box a, box b)
162 | {
163 | float w = overlap(a.x, a.w, b.x, b.w);
164 | float h = overlap(a.y, a.h, b.y, b.h);
165 | if(w < 0 || h < 0) return 0;
166 | float area = w*h;
167 | return area;
168 | }
169 |
170 | public static float box_union(box a, box b)
171 | {
172 | float i = box_intersection(a, b);
173 | float u = a.w*a.h + b.w*b.h - i;
174 | return u;
175 | }
176 |
177 | public static float box_iou(box a, box b)
178 | {
179 | return box_intersection(a, b)/box_union(a, b);
180 | }
181 |
182 | public static void do_nms_sort(box [] boxes, float [][] probs, int total, int classes, float thresh)
183 | {
184 | int i, j, k;
185 | sortable_bbox [] s = new sortable_bbox[total];
186 | for(i = 0 ; i < s.length ; i++)
187 | s[i] = new sortable_bbox();
188 |
189 | for(i = 0; i < total; ++i){
190 | s[i].index = i;
191 | s[i].classid = 0;
192 | s[i].probs = probs;
193 | }
194 |
195 | for(k = 0; k < classes; ++k){
196 | for(i = 0; i < total; ++i){
197 | s[i].classid = k;
198 | }
199 |
200 | Arrays.sort(s, new Comparator() {
201 | @Override
202 | public int compare(sortable_bbox a, sortable_bbox b) {
203 | float diff = a.probs[a.index][a.classid] - b.probs[b.index][b.classid];
204 | if (diff < 0) return 1;
205 | else if (diff > 0) return -1;
206 | return 0;
207 | }
208 | });
209 |
210 | for(i = 0; i < total; ++i){
211 | if(probs[s[i].index][k] == 0) continue;
212 | box a = boxes[s[i].index];
213 | for(j = i+1; j < total; ++j){
214 | box b = boxes[s[j].index];
215 | if (box_iou(a, b) > thresh){
216 | probs[s[j].index][k] = 0;
217 | }
218 | }
219 | }
220 | }
221 | }
222 |
223 | }
224 |
--------------------------------------------------------------------------------
/app/src/main/res/layout/activity_main.xml:
--------------------------------------------------------------------------------
1 |
10 |
11 |
15 |
16 |
21 |
22 |
23 |
24 |
29 |
30 |
35 |
36 |
42 |
43 |
44 |
45 |
50 |
51 |
56 |
57 |
63 |
64 |
65 |
66 |
72 |
73 |
80 |
81 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/app/src/main/res/mipmap-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/app/src/main/res/mipmap-hdpi/ic_launcher.png
--------------------------------------------------------------------------------
/app/src/main/res/mipmap-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/app/src/main/res/mipmap-mdpi/ic_launcher.png
--------------------------------------------------------------------------------
/app/src/main/res/mipmap-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/app/src/main/res/mipmap-xhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/app/src/main/res/mipmap-xxhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
--------------------------------------------------------------------------------
/app/src/main/res/values-w820dp/dimens.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 64dp
6 |
7 |
--------------------------------------------------------------------------------
/app/src/main/res/values/colors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | #3F51B5
4 | #303F9F
5 | #FF4081
6 |
7 |
--------------------------------------------------------------------------------
/app/src/main/res/values/dimens.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 16dp
4 | 16dp
5 |
6 |
--------------------------------------------------------------------------------
/app/src/main/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 | DeepSensev3
3 |
4 |
--------------------------------------------------------------------------------
/app/src/main/res/values/styles.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/app/src/test/java/com/lanytek/deepsensev3/ExampleUnitTest.java:
--------------------------------------------------------------------------------
1 | package com.lanytek.deepsensev3;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.*;
6 |
7 | /**
8 | * Example local unit test, which will execute on the development machine (host).
9 | *
10 | * @see Testing documentation
11 | */
12 | public class ExampleUnitTest {
13 | @Test
14 | public void addition_isCorrect() throws Exception {
15 | assertEquals(4, 2 + 2);
16 | }
17 | }
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | // Top-level build file where you can add configuration options common to all sub-projects/modules.
2 |
3 | buildscript {
4 | repositories {
5 | jcenter()
6 | }
7 | dependencies {
8 | classpath 'com.android.tools.build:gradle:2.2.3'
9 |
10 | // NOTE: Do not place your application dependencies here; they belong
11 | // in the individual module build.gradle files
12 | }
13 | }
14 |
15 | allprojects {
16 | repositories {
17 | jcenter()
18 | }
19 | }
20 |
21 | task clean(type: Delete) {
22 | delete rootProject.buildDir
23 | }
24 |
--------------------------------------------------------------------------------
/distribution/opencl/include/CL/cl_ext.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2010 The Khronos Group Inc.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and/or associated documentation files (the
6 | * "Materials"), to deal in the Materials without restriction, including
7 | * without limitation the rights to use, copy, modify, merge, publish,
8 | * distribute, sublicense, and/or sell copies of the Materials, and to
9 | * permit persons to whom the Materials are furnished to do so, subject to
10 | * the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included
13 | * in all copies or substantial portions of the Materials.
14 | *
15 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
22 | ******************************************************************************/
23 |
24 |
25 | /* cl_ext.h contains OpenCL extensions which don't have external */
26 | /* (OpenGL, D3D) dependencies. */
27 |
28 | #ifndef __CL_EXT_H
29 | #define __CL_EXT_H
30 |
31 | #ifdef __cplusplus
32 | extern "C" {
33 | #endif
34 |
35 | #ifdef __APPLE__
36 | #include
37 | #include
38 | #else
39 | #include
40 | #endif
41 |
42 | /* cl_khr_fp64 extension - no extension #define since it has no functions */
43 | #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
44 |
45 | /* cl_khr_fp16 extension - no extension #define since it has no functions */
46 | #define CL_DEVICE_HALF_FP_CONFIG 0x1033
47 |
48 | /* Memory object destruction
49 | *
50 | * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
51 | *
52 | * Registers a user callback function that will be called when the memory object is deleted and its resources
53 | * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
54 | * stack associated with memobj. The registered user callback functions are called in the reverse order in
55 | * which they were registered. The user callback functions are called and then the memory object is deleted
56 | * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
57 | * notified when the memory referenced by host_ptr, specified when the memory object is created and used as
58 | * the storage bits for the memory object, can be reused or freed.
59 | *
60 | * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
61 | *
62 | * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
63 | * before using.
64 | */
65 | #define cl_APPLE_SetMemObjectDestructor 1
66 | cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
67 | void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
68 | void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
69 |
70 |
71 | /* Context Logging Functions
72 | *
73 | * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
74 | * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
75 | * before using.
76 | *
77 | * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
78 | */
79 | #define cl_APPLE_ContextLoggingFunctions 1
80 | extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
81 | const void * /* private_info */,
82 | size_t /* cb */,
83 | void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
84 |
85 | /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
86 | extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
87 | const void * /* private_info */,
88 | size_t /* cb */,
89 | void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
90 |
91 | /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
92 | extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
93 | const void * /* private_info */,
94 | size_t /* cb */,
95 | void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
96 |
97 |
98 | /************************
99 | * cl_khr_icd extension *
100 | ************************/
101 | #define cl_khr_icd 1
102 |
103 | /* cl_platform_info */
104 | #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
105 |
106 | /* Additional Error Codes */
107 | #define CL_PLATFORM_NOT_FOUND_KHR -1001
108 |
109 | extern CL_API_ENTRY cl_int CL_API_CALL
110 | clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
111 | cl_platform_id * /* platforms */,
112 | cl_uint * /* num_platforms */);
113 |
114 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
115 | cl_uint /* num_entries */,
116 | cl_platform_id * /* platforms */,
117 | cl_uint * /* num_platforms */);
118 |
119 |
120 | /******************************************
121 | * cl_nv_device_attribute_query extension *
122 | ******************************************/
123 | /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
124 | #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
125 | #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
126 | #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
127 | #define CL_DEVICE_WARP_SIZE_NV 0x4003
128 | #define CL_DEVICE_GPU_OVERLAP_NV 0x4004
129 | #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
130 | #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
131 |
132 |
133 | /*********************************
134 | * cl_amd_device_attribute_query *
135 | *********************************/
136 | #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
137 |
138 |
139 | #ifdef CL_VERSION_1_1
140 | /***********************************
141 | * cl_ext_device_fission extension *
142 | ***********************************/
143 | #define cl_ext_device_fission 1
144 |
145 | extern CL_API_ENTRY cl_int CL_API_CALL
146 | clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
147 |
148 | typedef CL_API_ENTRY cl_int
149 | (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
150 |
151 | extern CL_API_ENTRY cl_int CL_API_CALL
152 | clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
153 |
154 | typedef CL_API_ENTRY cl_int
155 | (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
156 |
157 | typedef cl_ulong cl_device_partition_property_ext;
158 | extern CL_API_ENTRY cl_int CL_API_CALL
159 | clCreateSubDevicesEXT( cl_device_id /*in_device*/,
160 | const cl_device_partition_property_ext * /* properties */,
161 | cl_uint /*num_entries*/,
162 | cl_device_id * /*out_devices*/,
163 | cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
164 |
165 | typedef CL_API_ENTRY cl_int
166 | ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
167 | const cl_device_partition_property_ext * /* properties */,
168 | cl_uint /*num_entries*/,
169 | cl_device_id * /*out_devices*/,
170 | cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
171 |
172 | /* cl_device_partition_property_ext */
173 | #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
174 | #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
175 | #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
176 | #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
177 |
178 | /* clDeviceGetInfo selectors */
179 | #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
180 | #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
181 | #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
182 | #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
183 | #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
184 |
185 | /* error codes */
186 | #define CL_DEVICE_PARTITION_FAILED_EXT -1057
187 | #define CL_INVALID_PARTITION_COUNT_EXT -1058
188 | #define CL_INVALID_PARTITION_NAME_EXT -1059
189 |
190 | /* CL_AFFINITY_DOMAINs */
191 | #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
192 | #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
193 | #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
194 | #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
195 | #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
196 | #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
197 |
198 | /* cl_device_partition_property_ext list terminators */
199 | #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
200 | #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
201 | #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
202 |
203 |
204 |
205 | #endif /* CL_VERSION_1_1 */
206 |
207 | #ifdef __cplusplus
208 | }
209 | #endif
210 |
211 |
212 | #endif /* __CL_EXT_H */
213 |
--------------------------------------------------------------------------------
/distribution/opencl/include/CL/cl_ext_qcom.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2009-2012 QUALCOMM Incorporated.
2 | * All Rights Reserved. QUALCOMM Proprietary and Confidential.
3 | */
4 |
5 | #ifndef __OPENCL_CL_EXT_QCOM_H
6 | #define __OPENCL_CL_EXT_QCOM_H
7 |
8 | #ifdef __cplusplus
9 | extern "C" {
10 | #endif
11 |
12 |
13 | /*********************************
14 | * cl_perf_monitor_qcom extension *
15 | **********************************/
16 | typedef struct _cl_perf_monitor_qcom * cl_perf_monitor_qcom;
17 | typedef struct _cl_perf_monitor_group_id_qcom * cl_perf_monitor_group_id_qcom;
18 | typedef struct _cl_perf_monitor_counter_id_qcom * cl_perf_monitor_counter_id_qcom;
19 |
20 | typedef cl_uint cl_perf_monitor_group_info_qcom;
21 | typedef cl_uint cl_perf_monitor_counter_info_qcom;
22 | typedef cl_uint cl_perf_monitor_info_qcom;
23 | typedef cl_uint cl_perf_monitor_counter_type;
24 |
25 |
26 | extern CL_API_ENTRY cl_int CL_API_CALL
27 | clGetPerfMonitorGroupInfoQCOM(cl_device_id /* device */,
28 | cl_perf_monitor_group_id_qcom /* group */,
29 | cl_perf_monitor_group_info_qcom /* param_name */,
30 | size_t /* param_value_size */,
31 | void * /* param_value */,
32 | size_t * /* param_value_size_ret */);
33 |
34 | extern CL_API_ENTRY cl_int CL_API_CALL
35 | clGetPerfMonitorCounterInfoQCOM(cl_device_id /* device */,
36 | cl_perf_monitor_counter_id_qcom /* counter*/,
37 | cl_perf_monitor_counter_info_qcom /* param_name */,
38 | size_t /* param_value_size */,
39 | void * /* param_value */,
40 | size_t * /* param_value_size_ret */);
41 |
42 | extern CL_API_ENTRY cl_perf_monitor_qcom CL_API_CALL
43 | clCreatePerfMonitorQCOM(cl_context /* context */,
44 | cl_device_id /* device */,
45 | cl_int /* num_counters */,
46 | cl_perf_monitor_counter_id_qcom * /* counter_list */,
47 | cl_int * /* errcode_ret */);
48 |
49 | extern CL_API_ENTRY cl_int CL_API_CALL
50 | clRetainPerfMonitorQCOM(cl_perf_monitor_qcom /* monitor */);
51 |
52 | extern CL_API_ENTRY cl_int CL_API_CALL
53 | clReleasePerfMonitorQCOM(cl_perf_monitor_qcom /* monitor */);
54 |
55 | extern CL_API_ENTRY cl_int CL_API_CALL
56 | clEnqueueBeginPerfMonitorQCOM(cl_command_queue /* command_queue */,
57 | cl_perf_monitor_qcom /* monitor */,
58 | cl_uint /* num_events_in_wait_list */,
59 | const cl_event * /* event_wait_list */,
60 | cl_event * /* event */);
61 |
62 | extern CL_API_ENTRY cl_int CL_API_CALL
63 | clEnqueueEndPerfMonitorQCOM(cl_command_queue /* command_queue */,
64 | cl_perf_monitor_qcom /* monitor */,
65 | cl_uint /* num_events_in_wait_list */,
66 | const cl_event * /* event_wait_list */,
67 | cl_event * /* event */);
68 |
69 | extern CL_API_ENTRY cl_int CL_API_CALL
70 | clEnqueueReadPerfMonitorQCOM(cl_command_queue /* command_queue */,
71 | cl_perf_monitor_qcom /* monitor */,
72 | cl_bool /* blocking_read */,
73 | void * /* ptr */,
74 | cl_uint /* num_events_in_wait_list */,
75 | const cl_event * /* event_wait_list */,
76 | cl_event * /* event */);
77 |
78 | extern CL_API_ENTRY cl_int CL_API_CALL
79 | clGetPerfMonitorInfoQCOM(cl_perf_monitor_qcom /* monitor */,
80 | cl_perf_monitor_info_qcom /* param_name */,
81 | size_t /* param_value_size */,
82 | void * /* param_value */,
83 | size_t * /* param_value_size_ret */);
84 |
85 | /* cl_device_info */
86 | #define CL_DEVICE_MAX_PERF_MONITOR_GROUPS_QCOM 0xA000
87 | #define CL_DEVICE_PERF_MONITOR_GROUP_LIST_QCOM 0xA001
88 |
89 | /* cl_perf_monitor_group_info_qcom */
90 | #define CL_PERF_MONITOR_MAX_COUNTERS_QCOM 0xA100
91 | #define CL_PERF_MONITOR_MAX_ACTIVE_COUNTERS_QCOM 0xA101
92 | #define CL_PERF_MONITOR_GROUP_NAME_QCOM 0xA102
93 | #define CL_PERF_MONITOR_COUNTER_LIST_QCOM 0xA103
94 |
95 | /* cl_perf_monitor_counter_info_qcom */
96 | #define CL_PERF_MONITOR_COUNTER_NAME_QCOM 0xA200
97 | #define CL_PERF_MONITOR_COUNTER_GROUP_QCOM 0xA201
98 | #define CL_PERF_MONITOR_COUNTER_ID_QCOM 0xA202
99 | #define CL_PERF_MONITOR_COUNTER_TYPE_QCOM 0xA203
100 | #define CL_PERF_MONITOR_COUNTER_RANGE_QCOM 0xA204
101 |
102 |
103 | /* cl_perf_monitor_counter_type */
104 | #define CL_UNSIGNED_INT64_QCOM 0xA300
105 | #define CL_PERCENTAGE_QCOM 0xA301
106 |
107 | /* cl_perf_monitor_info_qcom */
108 | #define CL_PERF_MONITOR_RESULT_SIZE_QCOM 0xA400
109 | #define CL_PERF_MONITOR_CONTEXT_QCOM 0xA401
110 | #define CL_PERF_MONITOR_REFERENCE_COUNT_QCOM 0xA402
111 |
112 | /* New error codes */
113 | #define CL_INVALID_PERF_MONITOR_QCOM 0xA500
114 | #define CL_INVALID_PERF_MONITOR_GROUP_QCOM 0xA501
115 | #define CL_INVALID_PERF_MONITOR_COUNTER_QCOM 0xA502
116 | #define CL_PERF_MONITOR_RESULT_NOT_AVAILABLE 0xA503
117 |
118 | /* cl_command_type */
119 | #define CL_COMMAND_BEGIN_PERF_MONITOR_QCOM 0xA600
120 | #define CL_COMMAND_END_PERF_MONITOR_QCOM 0xA601
121 | #define CL_COMMAND_READ_PERF_MONITOR_QCOM 0xA602
122 |
123 |
124 | /*********************************
125 | * cl_qcom_extended_queries_1 extension *
126 | **********************************/
127 |
128 | /* Accepted by clGetPlatformInfo */
129 | #define CL_PLATFORM_NUM_OBJECTS_QCOM 0xA700
130 |
131 | /* Accepted by clGetDeviceInfo */
132 | #define CL_DEVICE_NUM_32BIT_GPRS_PER_COMPUTE_UNIT_QCOM 0xA800
133 | #define CL_DEVICE_NUM_WAVE_SIZES_QCOM 0xA801
134 | #define CL_DEVICE_SUPPORTED_WAVE_SIZES_QCOM 0xA802
135 | #define CL_DEVICE_INSTRUCTION_STORAGE_SIZE_QCOM 0xA803
136 |
137 | /* Accepted by clGetProgramBuildInfo */
138 | #define CL_PROGRAM_WAVE_SIZE_QCOM 0xA900
139 | #define CL_PROGRAM_INSTRUCTION_SIZE_QCOM 0xA901
140 | #define CL_PROGRAM_DISASSEMBLY_QCOM 0xA902
141 |
142 | /* Accepted by clGetKernelWorkGroupInfo */
143 | #define CL_KERNEL_NUM_32BIT_GPRS_PER_WORK_ITEM_QCOM 0xAA00
144 | #define CL_KERNEL_STACK_SIZE_PER_WORK_ITEM_QCOM 0xAA01
145 |
146 | /* Accepted by clGetKernelArgInfo */
147 | #define CL_KERNEL_ARG_VALUE_QCOM 0xAB00
148 |
149 | #ifdef __cplusplus
150 | }
151 | #endif
152 |
153 | #endif /* __OPENCL_CL_EXT_QCOM_H */
154 |
--------------------------------------------------------------------------------
/distribution/opencl/include/CL/cl_gl.h:
--------------------------------------------------------------------------------
1 | /**********************************************************************************
2 | * Copyright (c) 2011 The Khronos Group Inc.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and/or associated documentation files (the
6 | * "Materials"), to deal in the Materials without restriction, including
7 | * without limitation the rights to use, copy, modify, merge, publish,
8 | * distribute, sublicense, and/or sell copies of the Materials, and to
9 | * permit persons to whom the Materials are furnished to do so, subject to
10 | * the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included
13 | * in all copies or substantial portions of the Materials.
14 | *
15 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
22 | **********************************************************************************/
23 |
24 | #ifndef __OPENCL_CL_GL_H
25 | #define __OPENCL_CL_GL_H
26 |
27 | #ifdef __APPLE__
28 | #include
29 | #else
30 | #include
31 | #endif
32 |
33 | #ifdef __cplusplus
34 | extern "C" {
35 | #endif
36 |
37 | typedef cl_uint cl_gl_object_type;
38 | typedef cl_uint cl_gl_texture_info;
39 | typedef cl_uint cl_gl_platform_info;
40 | typedef struct __GLsync *cl_GLsync;
41 |
42 | /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
43 | #define CL_GL_OBJECT_BUFFER 0x2000
44 | #define CL_GL_OBJECT_TEXTURE2D 0x2001
45 | #define CL_GL_OBJECT_TEXTURE3D 0x2002
46 | #define CL_GL_OBJECT_RENDERBUFFER 0x2003
47 | #define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
48 | #define CL_GL_OBJECT_TEXTURE1D 0x200F
49 | #define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
50 | #define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
51 |
52 | /* cl_gl_texture_info */
53 | #define CL_GL_TEXTURE_TARGET 0x2004
54 | #define CL_GL_MIPMAP_LEVEL 0x2005
55 |
56 |
57 | extern CL_API_ENTRY cl_mem CL_API_CALL
58 | clCreateFromGLBuffer(cl_context /* context */,
59 | cl_mem_flags /* flags */,
60 | cl_GLuint /* bufobj */,
61 | int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
62 |
63 | extern CL_API_ENTRY cl_mem CL_API_CALL
64 | clCreateFromGLTexture(cl_context /* context */,
65 | cl_mem_flags /* flags */,
66 | cl_GLenum /* target */,
67 | cl_GLint /* miplevel */,
68 | cl_GLuint /* texture */,
69 | cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
70 |
71 | extern CL_API_ENTRY cl_mem CL_API_CALL
72 | clCreateFromGLRenderbuffer(cl_context /* context */,
73 | cl_mem_flags /* flags */,
74 | cl_GLuint /* renderbuffer */,
75 | cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
76 |
77 | extern CL_API_ENTRY cl_int CL_API_CALL
78 | clGetGLObjectInfo(cl_mem /* memobj */,
79 | cl_gl_object_type * /* gl_object_type */,
80 | cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
81 |
82 | extern CL_API_ENTRY cl_int CL_API_CALL
83 | clGetGLTextureInfo(cl_mem /* memobj */,
84 | cl_gl_texture_info /* param_name */,
85 | size_t /* param_value_size */,
86 | void * /* param_value */,
87 | size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
88 |
89 | extern CL_API_ENTRY cl_int CL_API_CALL
90 | clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
91 | cl_uint /* num_objects */,
92 | const cl_mem * /* mem_objects */,
93 | cl_uint /* num_events_in_wait_list */,
94 | const cl_event * /* event_wait_list */,
95 | cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
96 |
97 | extern CL_API_ENTRY cl_int CL_API_CALL
98 | clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
99 | cl_uint /* num_objects */,
100 | const cl_mem * /* mem_objects */,
101 | cl_uint /* num_events_in_wait_list */,
102 | const cl_event * /* event_wait_list */,
103 | cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
104 |
105 |
106 | #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
107 | extern CL_API_ENTRY cl_mem CL_API_CALL
108 | clCreateFromGLTexture2D(cl_context /* context */,
109 | cl_mem_flags /* flags */,
110 | cl_GLenum /* target */,
111 | cl_GLint /* miplevel */,
112 | cl_GLuint /* texture */,
113 | cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
114 |
115 | extern CL_API_ENTRY cl_mem CL_API_CALL
116 | clCreateFromGLTexture3D(cl_context /* context */,
117 | cl_mem_flags /* flags */,
118 | cl_GLenum /* target */,
119 | cl_GLint /* miplevel */,
120 | cl_GLuint /* texture */,
121 | cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
122 | #endif /* CL_USE_DEPRECATED_OPENCL_1_2_APIS */
123 |
124 | /* cl_khr_gl_sharing extension */
125 |
126 | #define cl_khr_gl_sharing 1
127 |
128 | typedef cl_uint cl_gl_context_info;
129 |
130 | /* Additional Error Codes */
131 | #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
132 |
133 | /* cl_gl_context_info */
134 | #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
135 | #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
136 |
137 | /* Additional cl_context_properties */
138 | #define CL_GL_CONTEXT_KHR 0x2008
139 | #define CL_EGL_DISPLAY_KHR 0x2009
140 | #define CL_GLX_DISPLAY_KHR 0x200A
141 | #define CL_WGL_HDC_KHR 0x200B
142 | #define CL_CGL_SHAREGROUP_KHR 0x200C
143 |
144 | extern CL_API_ENTRY cl_int CL_API_CALL
145 | clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
146 | cl_gl_context_info /* param_name */,
147 | size_t /* param_value_size */,
148 | void * /* param_value */,
149 | size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
150 |
151 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
152 | const cl_context_properties * properties,
153 | cl_gl_context_info param_name,
154 | size_t param_value_size,
155 | void * param_value,
156 | size_t * param_value_size_ret);
157 |
158 | #ifdef __cplusplus
159 | }
160 | #endif
161 |
162 | #endif /* __OPENCL_CL_GL_H */
163 |
--------------------------------------------------------------------------------
/distribution/opencl/include/CL/cl_gl_ext.h:
--------------------------------------------------------------------------------
1 | /**********************************************************************************
2 | * Copyright (c) 2008-2010 The Khronos Group Inc.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and/or associated documentation files (the
6 | * "Materials"), to deal in the Materials without restriction, including
7 | * without limitation the rights to use, copy, modify, merge, publish,
8 | * distribute, sublicense, and/or sell copies of the Materials, and to
9 | * permit persons to whom the Materials are furnished to do so, subject to
10 | * the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included
13 | * in all copies or substantial portions of the Materials.
14 | *
15 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
22 | **********************************************************************************/
23 |
24 |
25 | /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
26 | /* OpenGL dependencies. */
27 |
28 | #ifndef __OPENCL_CL_GL_EXT_H
29 | #define __OPENCL_CL_GL_EXT_H
30 |
31 | #ifdef __cplusplus
32 | extern "C" {
33 | #endif
34 |
35 | #ifdef __APPLE__
36 | #include
37 | #else
38 | #include
39 | #endif
40 |
41 | /*
42 | * For each extension, follow this template
43 | * cl_VEN_extname extension */
44 | /* #define cl_VEN_extname 1
45 | * ... define new types, if any
46 | * ... define new tokens, if any
47 | * ... define new APIs, if any
48 | *
49 | * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
50 | * This allows us to avoid having to decide whether to include GL headers or GLES here.
51 | */
52 |
53 | /*
54 | * cl_khr_gl_event extension
55 | * See section 9.9 in the OpenCL 1.1 spec for more information
56 | */
57 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
58 |
59 | extern CL_API_ENTRY cl_event CL_API_CALL
60 | clCreateEventFromGLsyncKHR(cl_context /* context */,
61 | cl_GLsync /* cl_GLsync */,
62 | cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
63 |
64 | #ifdef __cplusplus
65 | }
66 | #endif
67 |
68 | #endif /* __OPENCL_CL_GL_EXT_H */
69 |
--------------------------------------------------------------------------------
/distribution/opencl/include/CL/cl_perf_monitor_qcom.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2009-2011 QUALCOMM Incorporated.
2 | * All Rights Reserved. QUALCOMM Proprietary and Confidential.
3 | */
4 |
5 | /* This file deprecated. It's only provided for backwards compatibility with older applications.
6 | * New applications are strongly recommended to include cl_ext_qcom.h instead.
7 | */
8 | #include
9 |
10 |
--------------------------------------------------------------------------------
/distribution/opencl/include/CL/opencl.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2010 The Khronos Group Inc.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and/or associated documentation files (the
6 | * "Materials"), to deal in the Materials without restriction, including
7 | * without limitation the rights to use, copy, modify, merge, publish,
8 | * distribute, sublicense, and/or sell copies of the Materials, and to
9 | * permit persons to whom the Materials are furnished to do so, subject to
10 | * the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included
13 | * in all copies or substantial portions of the Materials.
14 | *
15 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
22 | ******************************************************************************/
23 |
24 |
25 | #ifndef __OPENCL_H
26 | #define __OPENCL_H
27 |
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 |
32 | #ifdef __APPLE__
33 |
34 | #include
35 | #include
36 | #include
37 | #include
38 |
39 | #else
40 |
41 | #include
42 | #include
43 | #include
44 | #include
45 |
46 | #endif
47 |
48 | #ifdef __cplusplus
49 | }
50 | #endif
51 |
52 | #endif /* __OPENCL_H */
53 |
54 |
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/Adreno-Android5/libOpenCL.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/Adreno-Android5/libOpenCL.so
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/Adreno-Android5/libllvm-qcom.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/Adreno-Android5/libllvm-qcom.so
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/Adreno-Android6/libOpenCL.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/Adreno-Android6/libOpenCL.so
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/Adreno-Android6/libllvm-qcom.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/Adreno-Android6/libllvm-qcom.so
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/libGLES_mali.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/libGLES_mali.so
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/libOpenCL.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/libOpenCL.so
--------------------------------------------------------------------------------
/distribution/opencl/lib/armeabi-v7a/libllvm-qcom.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/distribution/opencl/lib/armeabi-v7a/libllvm-qcom.so
--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | # Project-wide Gradle settings.
2 |
3 | # IDE (e.g. Android Studio) users:
4 | # Gradle settings configured through the IDE *will override*
5 | # any settings specified in this file.
6 |
7 | # For more details on how to configure your build environment visit
8 | # http://www.gradle.org/docs/current/userguide/build_environment.html
9 |
10 | # Specifies the JVM arguments used for the daemon process.
11 | # The setting is particularly useful for tweaking memory settings.
12 | org.gradle.jvmargs=-Xmx1536m
13 |
14 | # When configured, Gradle will run in incubating parallel mode.
15 | # This option should only be used with decoupled projects. More details, visit
16 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
17 | # org.gradle.parallel=true
18 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JC1DA/DeepSense/d3703856286c8d20b49be26eeb8764f0ea25c511/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Mon Dec 28 10:00:20 PST 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.14.1-all.zip
7 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10 | DEFAULT_JVM_OPTS=""
11 |
12 | APP_NAME="Gradle"
13 | APP_BASE_NAME=`basename "$0"`
14 |
15 | # Use the maximum available, or set MAX_FD != -1 to use that value.
16 | MAX_FD="maximum"
17 |
18 | warn ( ) {
19 | echo "$*"
20 | }
21 |
22 | die ( ) {
23 | echo
24 | echo "$*"
25 | echo
26 | exit 1
27 | }
28 |
29 | # OS specific support (must be 'true' or 'false').
30 | cygwin=false
31 | msys=false
32 | darwin=false
33 | case "`uname`" in
34 | CYGWIN* )
35 | cygwin=true
36 | ;;
37 | Darwin* )
38 | darwin=true
39 | ;;
40 | MINGW* )
41 | msys=true
42 | ;;
43 | esac
44 |
45 | # Attempt to set APP_HOME
46 | # Resolve links: $0 may be a link
47 | PRG="$0"
48 | # Need this for relative symlinks.
49 | while [ -h "$PRG" ] ; do
50 | ls=`ls -ld "$PRG"`
51 | link=`expr "$ls" : '.*-> \(.*\)$'`
52 | if expr "$link" : '/.*' > /dev/null; then
53 | PRG="$link"
54 | else
55 | PRG=`dirname "$PRG"`"/$link"
56 | fi
57 | done
58 | SAVED="`pwd`"
59 | cd "`dirname \"$PRG\"`/" >/dev/null
60 | APP_HOME="`pwd -P`"
61 | cd "$SAVED" >/dev/null
62 |
63 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64 |
65 | # Determine the Java command to use to start the JVM.
66 | if [ -n "$JAVA_HOME" ] ; then
67 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68 | # IBM's JDK on AIX uses strange locations for the executables
69 | JAVACMD="$JAVA_HOME/jre/sh/java"
70 | else
71 | JAVACMD="$JAVA_HOME/bin/java"
72 | fi
73 | if [ ! -x "$JAVACMD" ] ; then
74 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75 |
76 | Please set the JAVA_HOME variable in your environment to match the
77 | location of your Java installation."
78 | fi
79 | else
80 | JAVACMD="java"
81 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82 |
83 | Please set the JAVA_HOME variable in your environment to match the
84 | location of your Java installation."
85 | fi
86 |
87 | # Increase the maximum file descriptors if we can.
88 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89 | MAX_FD_LIMIT=`ulimit -H -n`
90 | if [ $? -eq 0 ] ; then
91 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92 | MAX_FD="$MAX_FD_LIMIT"
93 | fi
94 | ulimit -n $MAX_FD
95 | if [ $? -ne 0 ] ; then
96 | warn "Could not set maximum file descriptor limit: $MAX_FD"
97 | fi
98 | else
99 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100 | fi
101 | fi
102 |
103 | # For Darwin, add options to specify how the application appears in the dock
104 | if $darwin; then
105 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106 | fi
107 |
108 | # For Cygwin, switch paths to Windows format before running java
109 | if $cygwin ; then
110 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112 | JAVACMD=`cygpath --unix "$JAVACMD"`
113 |
114 | # We build the pattern for arguments to be converted via cygpath
115 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116 | SEP=""
117 | for dir in $ROOTDIRSRAW ; do
118 | ROOTDIRS="$ROOTDIRS$SEP$dir"
119 | SEP="|"
120 | done
121 | OURCYGPATTERN="(^($ROOTDIRS))"
122 | # Add a user-defined pattern to the cygpath arguments
123 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125 | fi
126 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
127 | i=0
128 | for arg in "$@" ; do
129 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131 |
132 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134 | else
135 | eval `echo args$i`="\"$arg\""
136 | fi
137 | i=$((i+1))
138 | done
139 | case $i in
140 | (0) set -- ;;
141 | (1) set -- "$args0" ;;
142 | (2) set -- "$args0" "$args1" ;;
143 | (3) set -- "$args0" "$args1" "$args2" ;;
144 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150 | esac
151 | fi
152 |
153 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154 | function splitJvmOpts() {
155 | JVM_OPTS=("$@")
156 | }
157 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159 |
160 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
161 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12 | set DEFAULT_JVM_OPTS=
13 |
14 | set DIRNAME=%~dp0
15 | if "%DIRNAME%" == "" set DIRNAME=.
16 | set APP_BASE_NAME=%~n0
17 | set APP_HOME=%DIRNAME%
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windowz variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | if "%@eval[2+2]" == "4" goto 4NT_args
53 |
54 | :win9xME_args
55 | @rem Slurp the command line arguments.
56 | set CMD_LINE_ARGS=
57 | set _SKIP=2
58 |
59 | :win9xME_args_slurp
60 | if "x%~1" == "x" goto execute
61 |
62 | set CMD_LINE_ARGS=%*
63 | goto execute
64 |
65 | :4NT_args
66 | @rem Get arguments from the 4NT Shell from JP Software
67 | set CMD_LINE_ARGS=%$
68 |
69 | :execute
70 | @rem Setup the command line
71 |
72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73 |
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76 |
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if "%ERRORLEVEL%"=="0" goto mainEnd
80 |
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85 | exit /b 1
86 |
87 | :mainEnd
88 | if "%OS%"=="Windows_NT" endlocal
89 |
90 | :omega
91 |
--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | include ':app'
2 |
--------------------------------------------------------------------------------