├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── Info.plist ├── LICENSE.txt ├── README.md ├── arm.toolchain.cmake ├── benchmark ├── CMakeLists.txt ├── README.md ├── benchncnn-int8.cpp └── benchncnn.cpp ├── build.sh ├── compiler.sh ├── examples ├── CMakeLists.txt ├── Python_NCNN │ ├── Makefile │ ├── ncnn.cpp │ ├── ncnn.h │ ├── ncnn2python.cpp │ └── readme ├── Python_NCNN_JRfacessd │ ├── Makefile │ ├── ncnn.cpp │ ├── ncnn.h │ ├── ncnn2python.cpp │ └── readme ├── fasterrcnn.cpp ├── mobilenetssd.cpp ├── mobilenetv2ssdlite.cpp ├── pose_demo.cpp ├── rfcn.cpp ├── shufflenetv2.cpp ├── squeezencnn │ ├── AndroidManifest.xml │ ├── ant.properties │ ├── assets │ │ └── synset_words.txt │ ├── build.xml │ ├── jni │ │ ├── Android.mk │ │ ├── Application.mk │ │ ├── squeezencnn_jni.cpp │ │ └── squeezenet_v1.1.id.h │ ├── local.properties │ ├── proguard-project.txt │ ├── project.properties │ ├── res │ │ ├── layout │ │ │ └── main.xml │ │ └── values │ │ │ └── strings.xml │ └── src │ │ └── com │ │ └── tencent │ │ └── squeezencnn │ │ ├── MainActivity.java │ │ └── SqueezeNcnn.java ├── squeezenet.cpp ├── squeezenetssd.cpp ├── synset_words.txt ├── yolov2.cpp └── yolov3.cpp ├── images ├── 128-ncnn.png ├── 16-ncnn.png ├── 256-ncnn.png ├── 32-ncnn.png └── 64-ncnn.png ├── package.sh ├── src ├── CMakeLists.txt ├── allocator.cpp ├── allocator.h ├── benchmark.cpp ├── benchmark.cpp.tq ├── benchmark.h ├── blob.cpp ├── blob.h ├── command.cpp ├── command.h ├── cpu.cpp ├── cpu.h ├── gpu.cpp ├── gpu.h ├── help.cpp ├── help.h ├── layer.cpp ├── layer.h ├── layer │ ├── absval.cpp │ ├── absval.h │ ├── argmax.cpp │ ├── argmax.h │ ├── arm │ │ ├── absval_arm.cpp │ │ ├── absval_arm.h │ │ ├── batchnorm_arm.cpp │ │ ├── batchnorm_arm.h │ │ ├── bias_arm.cpp │ │ ├── bias_arm.h │ │ ├── clip_arm.cpp │ │ ├── clip_arm.h │ │ ├── convolution_1x1.h │ │ ├── convolution_1x1_int8.h │ │ ├── convolution_2x2.h │ │ ├── convolution_3x3.h │ │ ├── convolution_3x3_e2e_int8.h │ │ ├── convolution_3x3_int8.h │ │ ├── convolution_4x4.h │ │ ├── convolution_5x5.h │ │ ├── convolution_5x5_int8.h │ │ ├── convolution_7x7.h │ │ ├── convolution_7x7_int8.h │ │ ├── convolution_arm.cpp │ │ ├── convolution_arm.h │ │ ├── convolution_sgemm_int8.h │ │ ├── convolutiondepthwise_3x3.h │ │ ├── convolutiondepthwise_3x3_int8.h │ │ ├── convolutiondepthwise_5x5.h │ │ ├── convolutiondepthwise_arm.cpp │ │ ├── convolutiondepthwise_arm.h │ │ ├── deconvolution_3x3.h │ │ ├── deconvolution_4x4.h │ │ ├── deconvolution_arm.cpp │ │ ├── deconvolution_arm.h │ │ ├── deconvolutiondepthwise_arm.cpp │ │ ├── deconvolutiondepthwise_arm.h │ │ ├── dequantize_arm.cpp │ │ ├── dequantize_arm.h │ │ ├── eltwise_arm.cpp │ │ ├── eltwise_arm.h │ │ ├── innerproduct_arm.cpp │ │ ├── innerproduct_arm.h │ │ ├── interp_arm.cpp │ │ ├── interp_arm.h │ │ ├── lrn_arm.cpp │ │ ├── lrn_arm.h │ │ ├── neon_mathfun.h │ │ ├── pooling_2x2.h │ │ ├── pooling_3x3.h │ │ ├── pooling_arm.cpp │ │ ├── pooling_arm.h │ │ ├── prelu_arm.cpp │ │ ├── prelu_arm.h │ │ ├── quantize_arm.cpp │ │ ├── quantize_arm.h │ │ ├── relu_arm.cpp │ │ ├── relu_arm.h │ │ ├── requantize_arm.cpp │ │ ├── requantize_arm.h │ │ ├── scale_arm.cpp │ │ ├── scale_arm.h │ │ ├── sigmoid_arm.cpp │ │ ├── sigmoid_arm.h │ │ ├── softmax_arm.cpp │ │ └── softmax_arm.h │ ├── batchnorm.cpp │ ├── batchnorm.h │ ├── bias.cpp │ ├── bias.h │ ├── binaryop.cpp │ ├── binaryop.h │ ├── bnll.cpp │ ├── bnll.h │ ├── cast.cpp │ ├── cast.h │ ├── clip.cpp │ ├── clip.h │ ├── concat.cpp │ ├── concat.h │ ├── convolution.cpp │ ├── convolution.h │ ├── convolutiondepthwise.cpp │ ├── convolutiondepthwise.h │ ├── crop.cpp │ ├── crop.h │ ├── deconvolution.cpp │ ├── deconvolution.h │ ├── deconvolutiondepthwise.cpp │ ├── deconvolutiondepthwise.h │ ├── dequantize.cpp │ ├── dequantize.h │ ├── detectionoutput.cpp │ ├── detectionoutput.h │ ├── dropout.cpp │ ├── dropout.h │ ├── eltwise.cpp │ ├── eltwise.h │ ├── elu.cpp │ ├── elu.h │ ├── embed.cpp │ ├── embed.h │ ├── exp.cpp │ ├── exp.h │ ├── expanddims.cpp │ ├── expanddims.h │ ├── flatten.cpp │ ├── flatten.h │ ├── innerproduct.cpp │ ├── innerproduct.h │ ├── input.cpp │ ├── input.h │ ├── instancenorm.cpp │ ├── instancenorm.h │ ├── interp.cpp │ ├── interp.h │ ├── log.cpp │ ├── log.h │ ├── lrn.cpp │ ├── lrn.h │ ├── lstm.cpp │ ├── lstm.h │ ├── memorydata.cpp │ ├── memorydata.h │ ├── mvn.cpp │ ├── mvn.h │ ├── normalize.cpp │ ├── normalize.h │ ├── packing.cpp │ ├── packing.h │ ├── padding.cpp │ ├── padding.h │ ├── permute.cpp │ ├── permute.h │ ├── pooling.cpp │ ├── pooling.h │ ├── power.cpp │ ├── power.h │ ├── prelu.cpp │ ├── prelu.h │ ├── priorbox.cpp │ ├── priorbox.h │ ├── proposal.cpp │ ├── proposal.h │ ├── psroipooling.cpp │ ├── psroipooling.h │ ├── quantize.cpp │ ├── quantize.h │ ├── reduction.cpp │ ├── reduction.h │ ├── relu.cpp │ ├── relu.h │ ├── reorg.cpp │ ├── reorg.h │ ├── requantize.cpp │ ├── requantize.h │ ├── reshape.cpp │ ├── reshape.h │ ├── rnn.cpp │ ├── rnn.h │ ├── roialign.cpp │ ├── roialign.h │ ├── roipooling.cpp │ ├── roipooling.h │ ├── scale.cpp │ ├── scale.h │ ├── shader │ │ ├── absval.comp │ │ ├── absval_pack4.comp │ │ ├── batchnorm.comp │ │ ├── batchnorm_pack4.comp │ │ ├── binaryop.comp │ │ ├── binaryop_pack4.comp │ │ ├── cast_fp16_to_fp32.comp │ │ ├── cast_fp16_to_fp32_pack4.comp │ │ ├── cast_fp32_to_fp16.comp │ │ ├── cast_fp32_to_fp16_pack4.comp │ │ ├── clip.comp │ │ ├── clip_pack4.comp │ │ ├── concat.comp │ │ ├── concat_pack4.comp │ │ ├── concat_pack4to1.comp │ │ ├── convolution.comp │ │ ├── convolution_1x1s1d1.comp │ │ ├── convolution_pack1to4.comp │ │ ├── convolution_pack4.comp │ │ ├── convolution_pack4to1.comp │ │ ├── convolutiondepthwise.comp │ │ ├── convolutiondepthwise_group.comp │ │ ├── convolutiondepthwise_group_pack1to4.comp │ │ ├── convolutiondepthwise_group_pack4.comp │ │ ├── convolutiondepthwise_group_pack4to1.comp │ │ ├── convolutiondepthwise_pack4.comp │ │ ├── crop.comp │ │ ├── crop_pack4.comp │ │ ├── deconvolution.comp │ │ ├── deconvolution_pack1to4.comp │ │ ├── deconvolution_pack4.comp │ │ ├── deconvolution_pack4to1.comp │ │ ├── deconvolutiondepthwise.comp │ │ ├── deconvolutiondepthwise_group.comp │ │ ├── deconvolutiondepthwise_group_pack1to4.comp │ │ ├── deconvolutiondepthwise_group_pack4.comp │ │ ├── deconvolutiondepthwise_group_pack4to1.comp │ │ ├── deconvolutiondepthwise_pack4.comp │ │ ├── dropout.comp │ │ ├── dropout_pack4.comp │ │ ├── eltwise.comp │ │ ├── eltwise_pack4.comp │ │ ├── flatten.comp │ │ ├── flatten_pack4.comp │ │ ├── innerproduct.comp │ │ ├── innerproduct_pack1to4.comp │ │ ├── innerproduct_pack4.comp │ │ ├── innerproduct_pack4to1.comp │ │ ├── interp.comp │ │ ├── interp_bicubic.comp │ │ ├── interp_bicubic_coeffs.comp │ │ ├── interp_bicubic_pack4.comp │ │ ├── interp_pack4.comp │ │ ├── lrn_norm.comp │ │ ├── lrn_norm_across_channel_pack4.comp │ │ ├── lrn_norm_within_channel_pack4.comp │ │ ├── lrn_square_pad.comp │ │ ├── lrn_square_pad_across_channel_pack4.comp │ │ ├── lrn_square_pad_within_channel_pack4.comp │ │ ├── packing_1to4.comp │ │ ├── packing_4to1.comp │ │ ├── padding.comp │ │ ├── padding_pack4.comp │ │ ├── permute.comp │ │ ├── permute_pack4to1.comp │ │ ├── pooling.comp │ │ ├── pooling_global.comp │ │ ├── pooling_global_pack4.comp │ │ ├── pooling_pack4.comp │ │ ├── prelu.comp │ │ ├── prelu_pack4.comp │ │ ├── priorbox.comp │ │ ├── priorbox_mxnet.comp │ │ ├── relu.comp │ │ ├── relu_pack4.comp │ │ ├── reorg.comp │ │ ├── reorg_pack1to4.comp │ │ ├── reorg_pack4.comp │ │ ├── reshape.comp │ │ ├── reshape_pack1to4.comp │ │ ├── reshape_pack4.comp │ │ ├── reshape_pack4to1.comp │ │ ├── scale.comp │ │ ├── scale_pack4.comp │ │ ├── shufflechannel.comp │ │ ├── shufflechannel_pack4.comp │ │ ├── sigmoid.comp │ │ ├── sigmoid_pack4.comp │ │ ├── softmax_div_sum.comp │ │ ├── softmax_div_sum_pack4.comp │ │ ├── softmax_exp_sub_max.comp │ │ ├── softmax_exp_sub_max_pack4.comp │ │ ├── softmax_reduce_max.comp │ │ ├── softmax_reduce_max_pack4.comp │ │ ├── softmax_reduce_sum.comp │ │ ├── softmax_reduce_sum_pack4.comp │ │ ├── tanh.comp │ │ ├── tanh_pack4.comp │ │ ├── unaryop.comp │ │ └── unaryop_pack4.comp │ ├── shufflechannel.cpp │ ├── shufflechannel.h │ ├── sigmoid.cpp │ ├── sigmoid.h │ ├── slice.cpp │ ├── slice.h │ ├── softmax.cpp │ ├── softmax.h │ ├── split.cpp │ ├── split.h │ ├── spp.cpp │ ├── spp.h │ ├── squeeze.cpp │ ├── squeeze.h │ ├── tanh.cpp │ ├── tanh.h │ ├── threshold.cpp │ ├── threshold.h │ ├── tile.cpp │ ├── tile.h │ ├── unaryop.cpp │ ├── unaryop.h │ ├── x86 │ │ ├── avx_mathfun.h │ │ ├── convolution_1x1.h │ │ ├── convolution_1x1_int8.h │ │ ├── convolution_3x3.h │ │ ├── convolution_3x3_int8.h │ │ ├── convolution_5x5.h │ │ ├── convolution_5x5_int8.h │ │ ├── convolution_7x7_int8.h │ │ ├── convolution_sgemm_int8.h │ │ ├── convolution_x86.cpp │ │ ├── convolution_x86.h │ │ ├── convolutiondepthwise_3x3.h │ │ ├── convolutiondepthwise_3x3_int8.h │ │ ├── convolutiondepthwise_x86.cpp │ │ ├── convolutiondepthwise_x86.h │ │ └── sse_mathfun.h │ ├── yolodetectionoutput.cpp │ ├── yolodetectionoutput.h │ ├── yolov3detectionoutput.cpp │ └── yolov3detectionoutput.h ├── layer_declaration.h.in ├── layer_registry.h.in ├── layer_shader_registry.h.in ├── layer_shader_spv_data.h.in ├── layer_type.h ├── layer_type_enum.h.in ├── mat.cpp ├── mat.h ├── mat_pixel.cpp ├── mat_pixel_resize.cpp ├── modelbin.cpp ├── modelbin.h ├── net.cpp ├── net.h ├── opencv.cpp ├── opencv.h ├── paramdict.cpp ├── paramdict.h ├── pipeline.cpp ├── pipeline.h └── platform.h.in ├── toolchains ├── aarch64-linux-gnu.toolchain.cmake ├── arm-linux-gnueabi.toolchain.cmake ├── arm-linux-gnueabihf.toolchain.cmake ├── himix100.toolchain.cmake ├── hisiv300.toolchain.cmake ├── hisiv500.toolchain.cmake ├── host.gcc.toolchain.cmake ├── ios.toolchain.cmake ├── iossimxc-x64.toolchain.cmake ├── iossimxc.toolchain.cmake ├── iosxc-arm64.toolchain.cmake ├── iosxc.toolchain.cmake └── pi3.toolchain.cmake └── tools ├── CMakeLists.txt ├── caffe ├── CMakeLists.txt ├── caffe.proto └── caffe2ncnn.cpp ├── darknet └── readme.txt ├── mxnet ├── CMakeLists.txt └── mxnet2ncnn.cpp ├── ncnn2mem.cpp ├── onnx ├── CMakeLists.txt ├── onnx.proto └── onnx2ncnn.cpp ├── plugin ├── ImageWatchNCNN.natvis ├── README.md └── snapshot.png ├── pytorch └── readme.txt └── tensorflow ├── CMakeLists.txt ├── attr_value.proto ├── function.proto ├── graph.proto ├── node_def.proto ├── op_def.proto ├── resource_handle.proto ├── tensor.proto ├── tensor_shape.proto ├── tensorflow2ncnn.cpp ├── types.proto └── versions.proto /.gitignore: -------------------------------------------------------------------------------- 1 | # CMake build directory 2 | *build*/ 3 | 4 | # Backup files. 5 | *~ 6 | 7 | # Prerequisites 8 | *.d 9 | 10 | # Compiled Object files 11 | *.slo 12 | *.lo 13 | *.o 14 | *.obj 15 | 16 | # Precompiled Headers 17 | *.gch 18 | *.pch 19 | 20 | # Compiled Dynamic libraries 21 | *.so 22 | *.dylib 23 | *.dll 24 | 25 | # Fortran module files 26 | *.mod 27 | *.smod 28 | 29 | # Compiled Static libraries 30 | *.lai 31 | *.la 32 | *.a 33 | *.lib 34 | 35 | # Executables 36 | *.exe 37 | *.out 38 | *.app 39 | 40 | # vscode 41 | .vscode 42 | 43 | # model file 44 | *.param 45 | *.bin 46 | *.caffemodel 47 | *.prototxt 48 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | dist: trusty 3 | 4 | language: cpp 5 | 6 | compiler: 7 | - g++ 8 | 9 | addons: 10 | apt: 11 | sources: 12 | - ubuntu-toolchain-r-test 13 | packages: 14 | - cmake 15 | 16 | install: 17 | - wget https://github.com/google/protobuf/archive/v3.5.1.tar.gz 18 | - tar -xzvf v3.5.1.tar.gz 19 | - pushd protobuf-3.5.1 && ./autogen.sh && ./configure --prefix=/usr && make -j2 && sudo make install && sudo ldconfig && popd 20 | 21 | script: 22 | - mkdir build 23 | - cd build 24 | - cmake .. 25 | - make -j2 26 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | ## Acknowledgements 3 | - Thanks to bug1989 [https://github.com/bug1989] for contributing the initial quantized int8 inference code and a large variety of device benchmark 4 | -------------------------------------------------------------------------------- /Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleName 6 | ncnn 7 | CFBundleIdentifier 8 | com.tencent.ncnn 9 | CFBundleVersion 10 | 1.0 11 | CFBundleShortVersionString 12 | 1.0 13 | CFBundleSignature 14 | ???? 15 | CFBundlePackageType 16 | FMWK 17 | 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ncnn-int8-e2e 2 | 3 | ncnn-int8-e2e 是基于ncnn int8社区版本低比特(小于8bit)量化魔改版 4 | 5 | 6 | # 目标 7 | 8 | * 支持activation 7bit、weight 6bit量化 9 | * inference time缩减为float32的50% 10 | * x86 simulator int8 inference 11 | * arm runtime int8 inference 12 | 13 | 14 | --- 15 | 16 | ### x86平台编译 17 | ``` 18 | ./compiler.sh linux 19 | 20 | ``` 21 | 22 | ### caffe2ncnn使用说明 23 | 24 | 主要区分float32与int8网络模型转换时的差异,以pose_hrnet101为例子 25 | 26 | #### float32的ncnn模型 27 | ``` 28 | ./caffe2ncnn pose_hrnet101.prototxt pose_hrnet101.caffemodel pose_hrnet101-fp32.param pose_hrnet101-fp32.bin 29 | ``` 30 | 31 | #### int8的ncnn模型 32 | 其中的pose_hrnet101.table文件由PyTorch int8 e2e量化工具生成 33 | ``` 34 | ./caffe2ncnn pose_hrnet101.prototxt pose_hrnet101.caffemodel pose_hrnet101-int8.param pose_hrnet101-int8.bin 0 pose_hrnet101.table 35 | ``` 36 | 37 | #### pose example 38 | ``` 39 | ./compiler.sh linux 40 | 41 | ``` 42 | 编译完成后,pose位于dg-ncnn/build-linux/install/bin/ 43 | 运行float32的模型 44 | ``` 45 | $ ./pose pose.jpg pose_hrnet101-fp32.param pose_hrnet101-fp32.bin 1 1 2 46 | --- DeepGlint ncnn post demo --- 10:20:05 Apr 22 2019 47 | iter cost 0/1: 192.42602539ms 48 | 40 40 7 49 | now value: 0.855324(8,6) 50 | now value: 0.746584(16,7) 51 | now value: 0.488715(4,13) 52 | now value: 0.600376(24,11) 53 | now value: 0.573444(4,22) 54 | now value: 0.556145(30,13) 55 | now value: 0.147505(6,28) 56 | 57 | ``` 58 | 运行int8的模型 59 | ``` 60 | $ ./pose pose.jpg pose_hrnet101-int8.param pose_hrnet101-int8.bin 1 1 2 61 | --- DeepGlint ncnn post demo --- 10:20:05 Apr 22 2019 62 | iter cost 0/1: 283.76904297ms 63 | 40 40 7 64 | now value: 0.848378(8,6) 65 | now value: 0.720524(16,7) 66 | now value: 0.485292(4,13) 67 | now value: 0.537115(24,11) 68 | now value: 0.571759(4,22) 69 | now value: 0.555397(30,13) 70 | now value: 0.175053(7,28) 71 | 72 | ``` 73 | 74 | 75 | -------------------------------------------------------------------------------- /arm.toolchain.cmake: -------------------------------------------------------------------------------- 1 | SET( CMAKE_SYSTEM_NAME Linux ) 2 | SET( CMAKE_BUILD_TYPE "Release" ) 3 | SET( ANDROID true) 4 | SET( CMAKE_C_COMPILER "arm-hisiv500-linux-gcc" ) 5 | SET( CMAKE_CXX_COMPILER "arm-hisiv500-linux-g++" ) 6 | SET( CMAKE_SYSTEM_PROCESSOR "armv7-a" ) 7 | SET( ARM_C_CXX_FLAGS "-O3 -mfloat-abi=softfp -mfpu=neon-vfpv4 -mcpu=cortex-a17.cortex-a7 -fopenmp" ) 8 | SET( ARM_C_CXX_FLAGS "${ARM_C_CXX_FLAGS} -ffast-math -ftree-vectorize" ) 9 | SET( CMAKE_CXX_FLAGS "${ARM_C_CXX_FLAGS} " CACHE STRING "c++ flags" ) 10 | SET( CMAKE_C_FLAGS "${ARM_C_CXX_FLAGS} " CACHE STRING "c flags" ) -------------------------------------------------------------------------------- /benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src) 3 | include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src) 4 | 5 | add_executable(benchncnn benchncnn.cpp) 6 | set_property(TARGET benchncnn PROPERTY COMPILE_FLAGS "-fpie") 7 | set_property(TARGET benchncnn PROPERTY LINK_FLAGS "-pie") 8 | target_link_libraries(benchncnn PRIVATE ncnn) 9 | 10 | if(NCNN_VULKAN) 11 | target_link_libraries(benchncnn PRIVATE ${Vulkan_LIBRARY}) 12 | endif() 13 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) 3 | if(NOT OpenCV_FOUND) 4 | find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc) 5 | endif() 6 | 7 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src) 8 | include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src) 9 | 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -std=c++11" ) 11 | set(NCNN_EXAMPLE_LINK_LIBRARIES ncnn ${OpenCV_LIBS}) 12 | if(NCNN_VULKAN) 13 | list(APPEND NCNN_EXAMPLE_LINK_LIBRARIES ${Vulkan_LIBRARY}) 14 | endif() 15 | 16 | add_executable(pose pose_demo.cpp) 17 | target_link_libraries(pose ${NCNN_EXAMPLE_LINK_LIBRARIES}) 18 | 19 | install(TARGETS 20 | pose 21 | RUNTIME DESTINATION bin 22 | ) -------------------------------------------------------------------------------- /examples/Python_NCNN/Makefile: -------------------------------------------------------------------------------- 1 | BOOST_INCLUDE_PATH=/usr/include 2 | BOOST_LIB_PATH=/usr/lib/x86_64-linux-gpu 3 | NCNN_INCLUDE_PATH=/train/trainset/1/docker_run/dg-ncnn-ncnn-int8-e2e-20190507/build/install/include 4 | NCNN_LIB_PATH=/train/trainset/1/docker_run/dg-ncnn-ncnn-int8-e2e-20190507/build/install/lib 5 | PYTHON_LIB_PATH=/usr/lib/x86_64-linux-gpu 6 | PYTHON_INCLUDE_PATH=/usr/include/python2.7 7 | NUMPY_INCLUDE_PATH=/usr/local/lib/python2.7/dist-packages/numpy/core/include 8 | 9 | all: 10 | g++ -fPIC -fpermissive -I ${BOOST_INCLUDE_PATH} -I ${PYTHON_INCLUDE_PATH} -I ${NUMPY_INCLUDE_PATH} -I ${NCNN_INCLUDE_PATH} -c ncnn.cpp 11 | g++ -fPIC -fpermissive -I ${BOOST_INCLUDE_PATH} -I ${PYTHON_INCLUDE_PATH} -I ${NUMPY_INCLUDE_PATH} -I ${NCNN_INCLUDE_PATH} -c ncnn2python.cpp 12 | g++ -shared -o ncnn.so ncnn.o ncnn2python.o -L ${BOOST_LIB_PATH} -lboost_python -L ${PYTHON_LIB_PATH} -lpython2.7 -L ${NCNN_LIB_PATH} -lncnn 13 | cp ncnn.so /usr/local/lib/python2.7/dist-packages/ 14 | 15 | 16 | clean: 17 | rm ncnn.so ncnn.o ncnn2python.o 18 | -------------------------------------------------------------------------------- /examples/Python_NCNN/ncnn.h: -------------------------------------------------------------------------------- 1 | #include "net.h" 2 | #include "cpu.h" 3 | #include "benchmark.h" 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace boost::python; 9 | namespace bp = boost::python; 10 | using namespace std; 11 | 12 | 13 | class Net{ 14 | public: 15 | Net(); 16 | ~Net(); 17 | int load_param(const char * paramPath); 18 | int load_model(const char * modelPath); 19 | void setInputBlobName(string name); 20 | void setOutputBlobName(string name); 21 | int inference(object & input_object,object & output_object,int inputHeight,int inputWidth); 22 | int inference_debug_writeOutputBlob2File(object & input_object,object & output_object,int inputHeight,int inputWidth); 23 | private: 24 | ncnn::Net net; 25 | string inputBlobNmae; 26 | string outputBlobName; 27 | 28 | }; -------------------------------------------------------------------------------- /examples/Python_NCNN/ncnn2python.cpp: -------------------------------------------------------------------------------- 1 | #include "ncnn.h" 2 | 3 | BOOST_PYTHON_MODULE(ncnn) 4 | { 5 | class_("net",init<>()) 6 | .def("load_param",&Net::load_param) 7 | .def("load_model",&Net::load_model) 8 | .def("setInputBlobName",&Net::setInputBlobName) 9 | .def("setOutputBlobName",&Net::setOutputBlobName) 10 | .def("inference",&Net::inference) 11 | .def("inference_debug_writeOutputBlob2File",&Net::inference_debug_writeOutputBlob2File) 12 | ; 13 | } -------------------------------------------------------------------------------- /examples/Python_NCNN/readme: -------------------------------------------------------------------------------- 1 | 1. 安装依赖包 2 | sudo apt-get install libboost-python-dev 3 | 4 | 2. 修改Makefile开头的变量路径 5 | 6 | 7 | 3. 执行make命令编译ncnn.so 8 | 9 | 4. 把ncnn.so放到site-packages目录下(/home/zyy/anaconda3/lib/python3.7/site-packages),在python 环境下import ncnn 进行测试,不报错即加载成功 10 | 11 | -------------------------------------------------------------------------------- /examples/Python_NCNN_JRfacessd/Makefile: -------------------------------------------------------------------------------- 1 | BOOST_INCLUDE_PATH=/usr/include 2 | BOOST_LIB_PATH=/usr/lib 3 | NCNN_INCLUDE_PATH=../../build-linux/install/include 4 | NCNN_LIB_PATH=../../build-linux/install/lib 5 | PYTHON_INCLUDE_PATH=/root/anaconda3/include/python3.6m 6 | PYTHON_LIB_PATH=/root/anaconda3/lib 7 | 8 | all: 9 | g++ -fPIC -std=c++11 -fpermissive -I ${BOOST_INCLUDE_PATH} -I ${PYTHON_INCLUDE_PATH} -I ${NCNN_INCLUDE_PATH} -c ncnn.cpp 10 | g++ -fPIC -std=c++11 -fpermissive -I ${BOOST_INCLUDE_PATH} -I ${PYTHON_INCLUDE_PATH} -I ${NCNN_INCLUDE_PATH} -c ncnn2python.cpp 11 | g++ -shared -std=c++11 -o ncnn.so ncnn.o ncnn2python.o -L ${BOOST_LIB_PATH} -lboost_python-py35 -L ${PYTHON_LIB_PATH} -lpython3.6m -L ${NCNN_LIB_PATH} -lncnn 12 | cp ncnn.so /root/anaconda3/lib/python3.6/site-packages/ 13 | 14 | 15 | clean: 16 | rm ncnn.so ncnn.o ncnn2python.o 17 | -------------------------------------------------------------------------------- /examples/Python_NCNN_JRfacessd/ncnn.h: -------------------------------------------------------------------------------- 1 | #include "net.h" 2 | #include "cpu.h" 3 | #include "benchmark.h" 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace boost::python; 9 | namespace bp = boost::python; 10 | using namespace std; 11 | 12 | 13 | class Net{ 14 | public: 15 | Net(); 16 | ~Net(); 17 | int load_param(const char * paramPath); 18 | int load_model(const char * modelPath); 19 | void setInputBlobName(string name); 20 | void setOutputBlobName(string name); 21 | int inference(object & input_object,object & output_object,int inputHeight,int inputWidth); 22 | int Net::inferenceJRfaceDet(object & input_object,object & output_object,int inputHeight,int inputWidth);//测试景润人脸检测模型 23 | int inference_debug_writeOutputBlob2File(object &input_object,int inputHeight, int inputWidth,string typeName,int beginLayerIndex,int endLayerIndex,string imagename); 24 | private: 25 | ncnn::Net net; 26 | string inputBlobNmae; 27 | string outputBlobName; 28 | 29 | }; -------------------------------------------------------------------------------- /examples/Python_NCNN_JRfacessd/ncnn2python.cpp: -------------------------------------------------------------------------------- 1 | #include "ncnn.h" 2 | 3 | BOOST_PYTHON_MODULE(ncnn) 4 | { 5 | class_("net",init<>()) 6 | .def("load_param",&Net::load_param) 7 | .def("load_model",&Net::load_model) 8 | .def("setInputBlobName",&Net::setInputBlobName) 9 | .def("setOutputBlobName",&Net::setOutputBlobName) 10 | .def("inference",&Net::inference) 11 | .def("inferenceJRfaceDet",&Net::inferenceJRfaceDet) 12 | .def("inference_debug_writeOutputBlob2File",&Net::inference_debug_writeOutputBlob2File) 13 | ; 14 | } -------------------------------------------------------------------------------- /examples/Python_NCNN_JRfacessd/readme: -------------------------------------------------------------------------------- 1 | 1. 安装依赖包 2 | sudo apt-get install libboost-python-dev 3 | 4 | 2. 修改Makefile开头的变量路径 5 | 6 | 7 | 3. 执行make命令编译ncnn.so 8 | 9 | 4. 把ncnn.so放到site-packages目录下(/home/zyy/anaconda3/lib/python3.7/site-packages),在python 环境下import ncnn 进行测试,不报错即加载成功 10 | 11 | -------------------------------------------------------------------------------- /examples/squeezencnn/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/squeezencnn/ant.properties: -------------------------------------------------------------------------------- 1 | # This file is used to override default values used by the Ant build system. 2 | # 3 | # This file must be checked into Version Control Systems, as it is 4 | # integral to the build system of your project. 5 | 6 | # This file is only used by the Ant script. 7 | 8 | # You can use this to override default values such as 9 | # 'source.dir' for the location of your java source folder and 10 | # 'out.dir' for the location of your output folder. 11 | 12 | # You can also use it define how the release builds are signed by declaring 13 | # the following properties: 14 | # 'key.store' for the location of your keystore and 15 | # 'key.alias' for the name of the key to use. 16 | # The password will be asked during the build when you use the 'release' target. 17 | 18 | key.store=/home/nihui/osd/nihuini-release-key.keystore 19 | key.alias=nihuini 20 | key.store.password=nihuini 21 | key.alias.password=nihuini 22 | -------------------------------------------------------------------------------- /examples/squeezencnn/assets/synset_words.txt: -------------------------------------------------------------------------------- 1 | ../../synset_words.txt -------------------------------------------------------------------------------- /examples/squeezencnn/jni/Android.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | 3 | # change this folder path to yours 4 | NCNN_INSTALL_PATH := /home/nihui/osd/ncnn-release/ncnn-android-vulkan-lib 5 | 6 | include $(CLEAR_VARS) 7 | LOCAL_MODULE := ncnn 8 | LOCAL_SRC_FILES := $(NCNN_INSTALL_PATH)/$(TARGET_ARCH_ABI)/libncnn.a 9 | include $(PREBUILT_STATIC_LIBRARY) 10 | 11 | include $(CLEAR_VARS) 12 | 13 | LOCAL_MODULE := squeezencnn 14 | LOCAL_SRC_FILES := squeezencnn_jni.cpp 15 | 16 | LOCAL_C_INCLUDES := $(NCNN_INSTALL_PATH)/include 17 | 18 | LOCAL_STATIC_LIBRARIES := ncnn 19 | 20 | LOCAL_CFLAGS := -O2 -fvisibility=hidden -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math 21 | LOCAL_CPPFLAGS := -O2 -fvisibility=hidden -fvisibility-inlines-hidden -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math 22 | LOCAL_LDFLAGS += -Wl,--gc-sections 23 | 24 | LOCAL_CFLAGS += -fopenmp 25 | LOCAL_CPPFLAGS += -fopenmp 26 | LOCAL_LDFLAGS += -fopenmp 27 | 28 | LOCAL_LDLIBS := -lz -llog -ljnigraphics -lvulkan 29 | 30 | include $(BUILD_SHARED_LIBRARY) 31 | -------------------------------------------------------------------------------- /examples/squeezencnn/jni/Application.mk: -------------------------------------------------------------------------------- 1 | APP_STL := c++_static 2 | APP_ABI := armeabi-v7a arm64-v8a 3 | APP_PLATFORM := android-24 4 | -------------------------------------------------------------------------------- /examples/squeezencnn/local.properties: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by Android Tools. 2 | # Do not modify this file -- YOUR CHANGES WILL BE ERASED! 3 | # 4 | # This file must *NOT* be checked into Version Control Systems, 5 | # as it contains information specific to your local configuration. 6 | 7 | # location of the SDK. This is only used by Ant 8 | # For customization when using a Version Control System, please read the 9 | # header note. 10 | sdk.dir=/home/nihui/osd/android-sdk-linux 11 | -------------------------------------------------------------------------------- /examples/squeezencnn/proguard-project.txt: -------------------------------------------------------------------------------- 1 | # To enable ProGuard in your project, edit project.properties 2 | # to define the proguard.config property as described in that file. 3 | # 4 | # Add project specific ProGuard rules here. 5 | # By default, the flags in this file are appended to flags specified 6 | # in ${sdk.dir}/tools/proguard/proguard-android.txt 7 | # You can edit the include path and order by changing the ProGuard 8 | # include property in project.properties. 9 | # 10 | # For more details, see 11 | # http://developer.android.com/guide/developing/tools/proguard.html 12 | 13 | # Add any project specific keep options here: 14 | 15 | # If your project uses WebView with JS, uncomment the following 16 | # and specify the fully qualified class name to the JavaScript interface 17 | # class: 18 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 19 | # public *; 20 | #} 21 | -------------------------------------------------------------------------------- /examples/squeezencnn/project.properties: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by Android Tools. 2 | # Do not modify this file -- YOUR CHANGES WILL BE ERASED! 3 | # 4 | # This file must be checked in Version Control Systems. 5 | # 6 | # To customize properties used by the Ant build system edit 7 | # "ant.properties", and override values to adapt the script to your 8 | # project structure. 9 | # 10 | # To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home): 11 | #proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt 12 | 13 | # Project target. 14 | target=android-24 15 | -------------------------------------------------------------------------------- /examples/squeezencnn/res/layout/main.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 11 | 12 |