├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── README.md ├── cccc-cuda ├── CMakeLists.txt ├── cccc-cuda.vcxproj ├── cccc-cuda.vcxproj.user ├── cuda_functions.cu └── cuda_functions.h ├── cccc-hip ├── CMakeLists.txt ├── cccc-hip.vcxproj ├── cccc-hip.vcxproj.filters ├── cccc-hip.vcxproj.user ├── hip_functions.h └── hip_functions.hip ├── cccc-lite.sln ├── cccc-mnist ├── CMakeLists.txt ├── Cifar10Reader.cpp ├── Cifar10Reader.h ├── DataPreparerMnist.cpp ├── DataPreparerMnist.h ├── MnistReader.cpp ├── MnistReader.h ├── build.sh ├── cccc-mnist.vcxproj ├── cccc-mnist.vcxproj.filters ├── cccc-mnist.vcxproj.user ├── cccc_extension.h └── extension.cpp ├── cccc-windows ├── Application.cpp ├── Application.h ├── CMakeLists.txt ├── cccc-windows.vcxproj ├── cccc-windows.vcxproj.filters ├── cccc-windows.vcxproj.user └── windows.cpp ├── cccc ├── Activer.cpp ├── Activer.h ├── AdditionalCost.cpp ├── AdditionalCost.h ├── CMakeLists.txt ├── DataPreparer.cpp ├── DataPreparer.h ├── DataPreparerFactory.cpp ├── DataPreparerFactory.h ├── DataPreparerImage.cpp ├── DataPreparerImage.h ├── DataPreparerTxt.cpp ├── DataPreparerTxt.h ├── GpuControl.cpp ├── GpuControl.h ├── Layer.cpp ├── Layer.h ├── Log.cpp ├── Log.h ├── MainProcess.cpp ├── MainProcess.h ├── Matrix.cpp ├── Matrix.h ├── MatrixData.cpp ├── MatrixData.h ├── MatrixEx.cpp ├── MatrixEx.h ├── MatrixOp.cpp ├── MatrixOp.h ├── Net.cpp ├── Net.h ├── NetCifa.cpp ├── NetCifa.h ├── NetLayer.cpp ├── NetLayer.h ├── Option.cpp ├── Option.h ├── Solver.cpp ├── Solver.h ├── TensorDesc.cpp ├── TensorDesc.h ├── VectorMath.h ├── blas_types.h ├── cblas_real.h ├── cccc.vcxproj ├── cccc.vcxproj.filters ├── cccc.vcxproj.user ├── cublas_real.h ├── cuda_half_hack.h ├── cuda_libs.inc ├── dll_export.h ├── gpu_lib.cpp ├── gpu_lib.h ├── hip_libs.inc ├── math_supplement.h ├── predict.cpp ├── resource.h ├── rocblas_real.h └── types.h ├── include ├── cblas.h ├── cuda │ └── cudnn2.h ├── cuda_driver_types.h ├── cuda_type_fake.h ├── hip_runtime_api_part.h ├── hip_runtime_type_part.h └── hip_type_fake.h ├── logo.png └── work └── mnist-lenet.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | *.bmp 3 | *.d 4 | *.db 5 | *.deps 6 | *.diagsession 7 | *.dll 8 | *.enc 9 | *.exe 10 | *.exp 11 | *.html 12 | *.idb 13 | *.ilk 14 | *.iobj 15 | *.ipdb 16 | *.ipynb 17 | *.lastcodeanalysissucceeded 18 | *.log 19 | *.o 20 | *.obj 21 | *.opendb 22 | *.pdb 23 | *.pdf 24 | *.pyc 25 | *.pyd 26 | *.res 27 | *.sdf 28 | *.tlog 29 | *.vspx 30 | *.xml 31 | *.zip 32 | .vs/ 33 | .vscode/ 34 | lib/x64/ 35 | work/*.txt 36 | work/openblas_haswell_lib/ 37 | work/save/ 38 | work/*.py 39 | x64/ 40 | *ubyte 41 | will-python/will.py 42 | will-python/will_wrap.cxx 43 | .vs/ 44 | build/ 45 | *.so 46 | *.gz 47 | will-windows/x64/ 48 | will/x64/ 49 | will-cuda/x64/ 50 | will-mnist/x64/ 51 | will-image/x64/ 52 | will-predict/x64/ 53 | will-python/x64/ 54 | will-test/x64/ 55 | *.onnx 56 | *.pb 57 | *.json 58 | *.aps 59 | *.bak 60 | cccc-hip/Debug/ 61 | cccc-hip/Release/ 62 | *.patch 63 | *.bat 64 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(libcccc) 2 | cmake_minimum_required(VERSION 2.6) 3 | 4 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 5 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 6 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 7 | 8 | option(CCCC_CPU "CPU support only" OFF) 9 | 10 | if(NOT CCCC_CPU) 11 | add_subdirectory(cccc-cuda) 12 | else() 13 | add_definitions(-D NO_CUDA=1) 14 | endif() 15 | add_subdirectory(cccc) 16 | add_subdirectory(cccc-windows) 17 | add_subdirectory(cccc-mnist) 18 | 19 | if(WITH_PYTHON) 20 | add_subdirectory(cccc-python) 21 | endif() 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CCCC 2 | 3 | 4 | cccc为libwill的核心部分,负责神经网络的训练与推理。 5 | 6 | 英文代号“will”意为“心愿”。 7 | 8 | ## 简介 9 | 10 | cccc是同时支持第一代基于层和第二代基于操作的神经网络工具包,但是安装和使用远比同类工具简单。 11 | 12 | cccc正式支持Windows,且以Windows为主要开发平台。可以在不需要nccl的情况下进行并行。 13 | 14 | cccc的设计同时支持动态图和静态图。 15 | 16 | cccc同时支持N卡和A卡,无需为不同的显卡平台编译两个版本。甚至可以在同一电脑上安装两种显卡并行计算(并不是建议你这样做)。 17 | 18 | cccc的功能并不及其他的开源平台,其特色是简单的配置和单机下的速度,请酌情使用。 19 | 20 | ## 编译说明 21 | 22 | ### Windows下编译 23 | 24 | - 任何支持C++20以上的Visual Studio和可以相互配合的CUDA均可以使用,建议使用较新的版本。 25 | - 下载cuDNN的开发包,将h文件,lib文件和dll文件复制到cuda工具箱目录中的include,lib/x64和bin目录。或者复制到自己指定的某个目录也可以,但是可能需要自己设置环境变量。 26 | - 检查环境变量CUDA_PATH的值,通常应该是“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vxx.x”(后面的数字为版本号,有可能不同)。 27 | - 在cccc-cuda.vcproj文件中,有两处涉及cuda版本号,类似“CUDA xx.x.targets/props”,需按照自己的实际情况手动修改。 28 | - 需要安装线性代数库OpenBLAS(推荐使用vcpkg或者msys2),将其库文件置于链接器可以找到的位置。 29 | - 如需支持AMD的显卡,则应下载AMD的相关开发包并安装,同时检查gpu_lib.h中对应的宏。注意miopen仅有非官方编译的Windows版本,目前计算卷积的速度很慢。 30 | - 查看gpu_lib.h开头的ENABLE_CUDA和ENABLE_HIP的设置,修改此处或配置中的预处理部分打开或关闭对应的平台。 31 | - 一些dll文件默认情况并不在PATH环境变量中,应手动复制到work目录或者PATH环境变量中的目录,包括openblas.dll等。 32 | - 下载MNIST的文件解压后,放入work/mnist目录,文件名应为:t10k-images.idx3-ubyte,t10k-labels.idx1-ubyte,train-images.idx3-ubyte,train-labels.idx1-ubyte。某些解压软件可能解压后中间的.会变成-,请自行修改。 33 | - 编译Visual Studio工程,如只需要核心功能,请仅编译cccc-windows。执行以下命令测试效果,正常情况下准确率应在99%以上。 34 | ```shell 35 | cccc-windows -c mnist-lenet.ini 36 | ``` 37 | - 也可以使用FashionMNIST来测试,通过mnist_path选项可以设置不同的文件所在目录。通常测试集上准确率可以到91%左右。 38 | 39 | ### Linux下编译 40 | 41 | #### x86_64 42 | - 请自行安装和配置CUDA,HIP(如需要)和OpenBLAS,尽量使用系统提供的包管理器自动安装的版本。随安装的版本不同,有可能需要修改cmake文件。 43 | - CUDA的默认安装文件夹应该是/usr/local/cuda,但是一些Linux发行版可能会安装至其他目录,这时需要修改CMakeLists.txt中的包含目录和链接目录。 44 | - 下载cuDNN,放到/usr/local/cuda,注意lib的目录有可能含有64。 45 | - 在neural目录下执行```cmake .```生成Makefile。 46 | - ```make```编译,可以加上-j加快速度。 47 | - 生成的可执行文件在bin文件夹。 48 | - 推荐自行建立一个专用于编译的目录,例如: 49 | ```shell 50 | mkdir build 51 | cd build 52 | cmake .. 53 | make 54 | ``` 55 | - 在work目录下有一个范例脚本,可以用来直接编译出所有的组件,建议参考。 56 | - 扩展需要单独编译。 57 | 58 | ### mlcc 59 | 60 | 本工程依赖作者编写的一个公共的功能库,请从获取最新版本,并将其置于与本目录(cccc-lite)同级的路径下。 61 | 62 | ### logo 63 | 64 | logo由Dr. Cheng ZD设计。 65 | 66 | 旧版logo由Mr. Zou YB设计。 67 | 68 | ### 关于lite版 69 | 70 | lite版只支持几个基本的激活函数和卷积、池化等基本连接。 71 | 72 | 仅能使用一张显卡训练。 73 | 74 | 不支持半精度。 75 | -------------------------------------------------------------------------------- /cccc-cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cccc-cuda) 2 | cmake_minimum_required(VERSION 2.6) 3 | find_package(CUDA QUIET REQUIRED) 4 | 5 | set(CUDA_NVCC_FLAGS "-O3 --generate-code arch=compute_75,code=sm_75 --generate-code arch=compute_61,code=sm_61") 6 | if (CMAKE_COMPILER_IS_GNUCC) 7 | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion 8 | OUTPUT_VARIABLE GCC_VERSION) 9 | string(REGEX MATCHALL "[0-9]+" GCC_VERSION_COMPONENTS ${GCC_VERSION}) 10 | #list(GET GCC_VERSION_COMPONENTS 0 GCC_MAJOR) 11 | #list(GET GCC_VERSION_COMPONENTS 1 GCC_MINOR) 12 | endif() 13 | 14 | if(GCC_VERSION VERSION_GREATER 10) 15 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " -ccbin gcc-10") 16 | endif() 17 | 18 | SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/../lib) 19 | cuda_add_library(cccc-cuda SHARED cuda_functions.cu) 20 | 21 | install(TARGETS cccc-cuda LIBRARY DESTINATION lib) 22 | -------------------------------------------------------------------------------- /cccc-cuda/cccc-cuda.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0} 24 | will_cuda 25 | 26 | 27 | 28 | StaticLibrary 29 | true 30 | MultiByte 31 | v143 32 | 33 | 34 | StaticLibrary 35 | false 36 | true 37 | MultiByte 38 | v143 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | true 53 | 54 | 55 | 56 | Level3 57 | Disabled 58 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 59 | 60 | 61 | true 62 | Console 63 | %(AdditionalDependencies) 64 | 65 | 66 | 67 | 68 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 69 | 70 | 71 | 64 72 | compute_89,sm_89;compute_86,sm_86;compute_75,sm_75;compute_61,sm_61 73 | 74 | 75 | Static 76 | 77 | 78 | 79 | 80 | Level3 81 | MaxSpeed 82 | true 83 | true 84 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 85 | 86 | 87 | true 88 | true 89 | true 90 | Console 91 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 92 | 93 | 94 | 95 | 96 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 97 | 98 | 99 | 64 100 | 101 | 102 | compute_89,sm_89;compute_86,sm_86;compute_75,sm_75;compute_61,sm_61 103 | 104 | 105 | cudart_static.lib 106 | $(CUDA_PATH)/lib/x64 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /cccc-cuda/cccc-cuda.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /cccc-cuda/cuda_functions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef C_EXPORT 4 | #ifdef _WIN32 5 | #define C_EXPORT extern "C" __declspec(dllexport) 6 | #else 7 | #define C_EXPORT extern "C" 8 | #endif 9 | #endif 10 | 11 | C_EXPORT int cuda_half2float(void* p1, void* p2, unsigned int size); 12 | 13 | C_EXPORT int cuda_reciprocal(int type, void* A, void* B, unsigned int size, float scale, float epsilon); 14 | C_EXPORT int cuda_addnumber(int type, void* A, void* R, unsigned int size, float number, float scale); 15 | C_EXPORT int cuda_pow(int type, void* A, void* R, unsigned int size, float bias, float a2); 16 | C_EXPORT int cuda_sparse(int type, void* p1, void* p2, unsigned int size, float rou, float beta); 17 | C_EXPORT int cuda_sign(int type, void* A, void* R, unsigned int size, float v, float section); 18 | C_EXPORT int cuda_cross_entropy(int type, void* A, void* B, void* R, unsigned int size, float a, float scale); 19 | C_EXPORT int cuda_cross_entropy2(int type, void* A, void* B, void* R, unsigned int size, float a, float scale); 20 | 21 | C_EXPORT int cuda_div(int type, void* A, void* B, void* R, unsigned int size, float a, float b, float scale); 22 | C_EXPORT int cuda_sectionlimit(int type, void* p1, void* p2, void* p3, unsigned int size, float v0, float v1); 23 | C_EXPORT int cuda_ada_delta_update(int type, void* mean_d2, void* mean_ada_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon); 24 | C_EXPORT int cuda_adam_update(int type, void* mean_d, void* mean_d2, void* d, void* ada_d, unsigned int size, float beta1, float beta2, float epsilon, float t); 25 | C_EXPORT int cuda_rms_prop_update(int type, void* mead_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon); 26 | 27 | C_EXPORT int cuda_sin(int type, void* A, void* R, unsigned int size, float a, float b); 28 | C_EXPORT int cuda_cos(int type, void* A, void* R, unsigned int size, float a, float b); 29 | 30 | C_EXPORT int cuda_zigzag(int type, void* A, void* R, unsigned int size, float a1, float a2); 31 | C_EXPORT int cuda_zigzagb(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float a1, float a2); 32 | 33 | C_EXPORT int cuda_step(int type, void* A, void* R, unsigned int size, float unuse1, float unuse2); 34 | 35 | C_EXPORT int cuda_leaky_relu(int type, void* A, void* R, unsigned int size, float leak, float a2, float a3); 36 | C_EXPORT int cuda_leaky_relub(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float leak, float a2, float a3); 37 | 38 | C_EXPORT int cuda_max(int type, void* A, void* B, void* R, unsigned int size, float unuse1, float unuse2, float unuse3); 39 | C_EXPORT int cuda_maxb(int type, void* A, void* dA, void* B, void* dB, void* R, void* dR, unsigned int size, float alpha, float beta_a, float beta_b); 40 | -------------------------------------------------------------------------------- /cccc-hip/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cccc-hip) 2 | cmake_minimum_required(VERSION 2.6) 3 | 4 | 5 | set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") 6 | 7 | enable_language(${GPU_RUNTIME}) 8 | set(CMAKE_${GPU_RUNTIME}_STANDARD 17) 9 | set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) 10 | set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) 11 | 12 | if(WIN32) 13 | set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation") 14 | else() 15 | set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") 16 | endif() 17 | 18 | list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") 19 | 20 | add_library(cccc-hip SHARED hip_functions.hip) 21 | # Make example runnable using ctest 22 | set(include_dirs "../../Common") 23 | if(GPU_RUNTIME STREQUAL "CUDA") 24 | list(APPEND include_dirs "${ROCM_ROOT}/include") 25 | endif() 26 | 27 | #target_include_directories(${project_name} PRIVATE ${include_dirs}) 28 | set_source_files_properties(hip_functions.hip PROPERTIES LANGUAGE ${GPU_RUNTIME}) 29 | 30 | install(TARGETS cccc-hip) 31 | 32 | 33 | -------------------------------------------------------------------------------- /cccc-hip/cccc-hip.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | Document 19 | 20 | 21 | 22 | 17.0 23 | 6.1 24 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965} 25 | Win32Proj 26 | cccc-hip 27 | 10.0 28 | 29 | 30 | 31 | DynamicLibrary 32 | true 33 | HIP clang 6.1 34 | Unicode 35 | 36 | 37 | DynamicLibrary 38 | false 39 | HIP clang 6.1 40 | Unicode 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | true 58 | $(SolutionDir)$(Platform)\$(Configuration)\ 59 | 60 | 61 | false 62 | $(SolutionDir)$(Platform)\$(Configuration)\ 63 | 64 | 65 | 66 | Level1 67 | __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 68 | 69 | 70 | Console 71 | true 72 | 73 | 74 | 75 | 76 | __CUDACC__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 77 | 78 | 79 | 80 | 81 | true 82 | 83 | 84 | 85 | 86 | Level2 87 | true 88 | __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 89 | stdcpp20 90 | 91 | 92 | Console 93 | true 94 | true 95 | Enabled 96 | 97 | 98 | 99 | 100 | true 101 | __CUDACC__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 102 | 103 | 104 | UseLinkTimeCodeGeneration 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /cccc-hip/cccc-hip.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {332db0f2-c13e-4842-8592-6345ed4d8674} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu 7 | 8 | 9 | {9cc8b9fc-1c76-4eba-abd5-c65784a2acd8} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh 11 | 12 | 13 | {db4d8471-8ce1-4d38-b219-69467c4c6d52} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /cccc-hip/cccc-hip.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /cccc-hip/hip_functions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef C_EXPORT 4 | #ifdef _WIN32 5 | #define C_EXPORT extern "C" __declspec(dllexport) 6 | #else 7 | #define C_EXPORT extern "C" 8 | #endif 9 | #endif 10 | 11 | C_EXPORT int hip_half2float(void* p1, void* p2, unsigned int size); 12 | 13 | C_EXPORT int hip_reciprocal(int type, void* A, void* B, unsigned int size, float scale, float epsilon); 14 | C_EXPORT int hip_addnumber(int type, void* A, void* R, unsigned int size, float number, float scale); 15 | C_EXPORT int hip_pow(int type, void* A, void* R, unsigned int size, float bias, float a2); 16 | C_EXPORT int hip_sparse(int type, void* p1, void* p2, unsigned int size, float rou, float beta); 17 | C_EXPORT int hip_sign(int type, void* A, void* R, unsigned int size, float v, float section); 18 | C_EXPORT int hip_cross_entropy(int type, void* A, void* B, void* R, unsigned int size, float a, float scale); 19 | C_EXPORT int hip_cross_entropy2(int type, void* A, void* B, void* R, unsigned int size, float a, float scale); 20 | 21 | C_EXPORT int hip_div(int type, void* A, void* B, void* R, unsigned int size, float a, float b, float scale); 22 | C_EXPORT int hip_sectionlimit(int type, void* p1, void* p2, void* p3, unsigned int size, float v0, float v1); 23 | C_EXPORT int hip_ada_delta_update(int type, void* mean_d2, void* mean_ada_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon); 24 | C_EXPORT int hip_adam_update(int type, void* mean_d, void* mean_d2, void* d, void* ada_d, unsigned int size, float beta1, float beta2, float epsilon, float t); 25 | C_EXPORT int hip_rms_prop_update(int type, void* mead_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon); 26 | 27 | C_EXPORT int hip_sin(int type, void* A, void* R, unsigned int size, float a, float b); 28 | C_EXPORT int hip_cos(int type, void* A, void* R, unsigned int size, float a, float b); 29 | 30 | C_EXPORT int hip_zigzag(int type, void* A, void* R, unsigned int size, float a1, float a2); 31 | C_EXPORT int hip_zigzagb(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float a1, float a2); 32 | 33 | C_EXPORT int hip_step(int type, void* A, void* R, unsigned int size, float unuse1, float unuse2); 34 | 35 | C_EXPORT int hip_leaky_relu(int type, void* A, void* R, unsigned int size, float leak, float a2, float a3); 36 | C_EXPORT int hip_leaky_relub(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float leak, float a2, float a3); 37 | 38 | C_EXPORT int hip_max(int type, void* A, void* B, void* R, unsigned int size, float unuse1, float unuse2, float unuse3); 39 | C_EXPORT int hip_maxb(int type, void* A, void* dA, void* B, void* dB, void* R, void* dR, unsigned int size, float alpha, float beta_a, float beta_b); 40 | 41 | C_EXPORT int hip_addbias(float* m, float* b, float* r, unsigned int size_m, unsigned int size_mchannel, unsigned int size_b, float a1, float a2); 42 | C_EXPORT int hip_addbiasb(float* bd, float* rd, unsigned int size_m, unsigned int size_mchannel, unsigned int size_b, float a1, float a2); 43 | C_EXPORT int hip_softmax(float* x, float* y, unsigned int size, unsigned int channel, float a1, float a2); 44 | 45 | //only support 2D, square window, stride = window, no padding 46 | C_EXPORT int hip_pool(float* x, float* y, unsigned int w0, unsigned int h0, unsigned int c, unsigned n, unsigned int w1, unsigned int h1, unsigned int size_win, int type, float a1, float a2); 47 | C_EXPORT int hip_poolb(float* x, float* dx, float* y, float* dy, unsigned int w0, unsigned int h0, unsigned int c, unsigned n, unsigned int w1, unsigned int h1, unsigned int size_win, int type, float a1, float a2); 48 | 49 | C_EXPORT int hip_conv2d(float* x, float* w, float* y, int w0, int h0, int c0, int n, int w1, int h1, int c1, int winw, int winh, int stride, int padding, float a1, float a2); 50 | C_EXPORT int hip_conv2db_d(float* dx, float* w, float* dy, int w0, int h0, int c0, int n, int w1, int h1, int c1, int winw, int winh, int stride, int padding, float a1, float a2); 51 | C_EXPORT int hip_conv2db_w(float* x, float* dw, float* dy, int w0, int h0, int c0, int n, int w1, int h1, int c1, int winw, int winh, int stride, int padding, float a1, float a2); 52 | -------------------------------------------------------------------------------- /cccc-lite.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.0.31903.59 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc", "cccc\cccc.vcxproj", "{271C3640-1916-4736-B07F-DABF0F3914DF}" 7 | ProjectSection(ProjectDependencies) = postProject 8 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0} = {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0} 9 | EndProjectSection 10 | EndProject 11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-windows", "cccc-windows\cccc-windows.vcxproj", "{4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}" 12 | ProjectSection(ProjectDependencies) = postProject 13 | {271C3640-1916-4736-B07F-DABF0F3914DF} = {271C3640-1916-4736-B07F-DABF0F3914DF} 14 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE} = {A5C5A5AE-E816-4189-9518-23190FDAC9AE} 15 | EndProjectSection 16 | EndProject 17 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-cuda", "cccc-cuda\cccc-cuda.vcxproj", "{41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}" 18 | EndProject 19 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-mnist", "cccc-mnist\cccc-mnist.vcxproj", "{A5C5A5AE-E816-4189-9518-23190FDAC9AE}" 20 | ProjectSection(ProjectDependencies) = postProject 21 | {271C3640-1916-4736-B07F-DABF0F3914DF} = {271C3640-1916-4736-B07F-DABF0F3914DF} 22 | EndProjectSection 23 | EndProject 24 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-hip", "cccc-hip\cccc-hip.vcxproj", "{0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}" 25 | EndProject 26 | Global 27 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 28 | Debug|x64 = Debug|x64 29 | Release|x64 = Release|x64 30 | EndGlobalSection 31 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 32 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Debug|x64.ActiveCfg = Debug|x64 33 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Debug|x64.Build.0 = Debug|x64 34 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Release|x64.ActiveCfg = Release|x64 35 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Release|x64.Build.0 = Release|x64 36 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Debug|x64.ActiveCfg = Debug|x64 37 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Debug|x64.Build.0 = Debug|x64 38 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Release|x64.ActiveCfg = Release|x64 39 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Release|x64.Build.0 = Release|x64 40 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Debug|x64.ActiveCfg = Debug|x64 41 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Debug|x64.Build.0 = Debug|x64 42 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Release|x64.ActiveCfg = Release|x64 43 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Release|x64.Build.0 = Release|x64 44 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Debug|x64.ActiveCfg = Debug|x64 45 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Debug|x64.Build.0 = Debug|x64 46 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Release|x64.ActiveCfg = Release|x64 47 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Release|x64.Build.0 = Release|x64 48 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Debug|x64.ActiveCfg = Debug|x64 49 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Debug|x64.Build.0 = Debug|x64 50 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Release|x64.ActiveCfg = Release|x64 51 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Release|x64.Build.0 = Release|x64 52 | EndGlobalSection 53 | GlobalSection(SolutionProperties) = preSolution 54 | HideSolutionNode = FALSE 55 | EndGlobalSection 56 | GlobalSection(ExtensibilityGlobals) = postSolution 57 | SolutionGuid = {9BF9770D-70D3-4C2A-B800-4BA6FD0EF3B6} 58 | EndGlobalSection 59 | EndGlobal 60 | -------------------------------------------------------------------------------- /cccc-mnist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cccc-mnist) 2 | 3 | set(CMAKE_VERBOSE_MAKEFILE on) 4 | set(CMAKE_CXX_COMPILER $ENV{CXX}) 5 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -pthread -fopenmp") 6 | 7 | aux_source_directory(./ SRC_LIST) 8 | 9 | include_directories(../include /usr/include /usr/local/include /usr/local/cuda/include ../src ../../mlcc ../cccc-cuda ../cccc) 10 | link_directories(.) 11 | 12 | add_library(cccc-mnist SHARED ${SRC_LIST}) 13 | target_link_libraries(cccc-mnist cccc) 14 | 15 | -------------------------------------------------------------------------------- /cccc-mnist/Cifar10Reader.cpp: -------------------------------------------------------------------------------- 1 | #include "Cifar10Reader.h" 2 | #include "Log.h" 3 | #include "filefunc.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | Cifar10Reader::Cifar10Reader() 9 | { 10 | } 11 | 12 | static void readData(Matrix& X, Matrix& Y, const std::string& filename, int begin) 13 | { 14 | std::vector arr; 15 | filefunc::readFileToVector(filename, arr); 16 | int count = begin - 1; 17 | int k = 0; 18 | for (int i = 0; i < arr.size(); i++) 19 | { 20 | if (i % 3073 == 0) 21 | { 22 | count++; 23 | Y.setData(arr[i], count, 1); 24 | k = 0; 25 | } 26 | else 27 | { 28 | X.setData(k++, count, arr[i] / 255.0); 29 | } 30 | } 31 | //for (int in = begin; in < begin+10000; in++) 32 | //{ 33 | // for (int ic = 0; ic < 3; ic++) 34 | // { 35 | // real min1 = 1, max1 = 0; 36 | // for (int iw = 0; iw < 32; iw++) 37 | // { 38 | // for (int ih = 0; ih < 32; ih++) 39 | // { 40 | // auto v = X.getData(iw, ih, ic, in); 41 | // min1 = std::min(v,min1); 42 | // max1 = std::max(v, max1); 43 | // } 44 | // } 45 | // for (int iw = 0; iw < 32; iw++) 46 | // { 47 | // for (int ih = 0; ih < 32; ih++) 48 | // { 49 | // auto& v = X.getData(iw, ih, ic, in); 50 | // v /= max1 - min1; 51 | // } 52 | // } 53 | // } 54 | //} 55 | } 56 | 57 | void Cifar10Reader::load(Matrix& X, Matrix& Y, std::string path /*= "cifa10"*/, int data_type /*= 1*/) 58 | { 59 | if (path.back() != '/' || path.back() != '\\') 60 | { 61 | path += '/'; 62 | } 63 | if (data_type == 1) 64 | { 65 | X.resize(32, 32, 3, 50000); 66 | Y.resize(10, 50000); 67 | Y.fillData(0); 68 | LOG("Loading Cifar10 train data... "); 69 | readData(X, Y, path + "data_batch_1.bin", 0); 70 | readData(X, Y, path + "data_batch_2.bin", 10000); 71 | readData(X, Y, path + "data_batch_3.bin", 20000); 72 | readData(X, Y, path + "data_batch_4.bin", 30000); 73 | readData(X, Y, path + "data_batch_5.bin", 40000); 74 | LOG("done\n"); 75 | } 76 | else if (data_type == 2) 77 | { 78 | X.resize(32, 32, 3, 10000); 79 | Y.resize(10, 10000); 80 | Y.fillData(0); 81 | LOG("Loading Cifar10 test data... "); 82 | readData(X, Y, path + "test_batch.bin", 0); 83 | LOG("done\n"); 84 | } 85 | else 86 | { 87 | LOG("Please check Cifar10 type: 1 - train set (50000), 2 - test set (10000)\n"); 88 | return; 89 | } 90 | //X.message(); 91 | //Y.message(); 92 | } 93 | 94 | } // namespace cccc 95 | -------------------------------------------------------------------------------- /cccc-mnist/Cifar10Reader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc-mnist/Cifar10Reader.h -------------------------------------------------------------------------------- /cccc-mnist/DataPreparerMnist.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc-mnist/DataPreparerMnist.cpp -------------------------------------------------------------------------------- /cccc-mnist/DataPreparerMnist.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "DataPreparerImage.h" 3 | #include "Random.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | class DataPreparerMnist : public DataPreparerImage 9 | { 10 | private: 11 | int type_ = 1; 12 | int random_diff_ = 0; 13 | int remove59_ = 0; 14 | int reverse_color_ = 0; 15 | Random rand_; 16 | 17 | public: 18 | DataPreparerMnist(); 19 | virtual ~DataPreparerMnist(); 20 | 21 | void initData() override; 22 | void init2() override; 23 | void fillData0() override; 24 | void transOne(Matrix& X1, Matrix& Y1) override; 25 | }; 26 | 27 | } // namespace cccc -------------------------------------------------------------------------------- /cccc-mnist/MnistReader.cpp: -------------------------------------------------------------------------------- 1 | #include "MnistReader.h" 2 | #include "Log.h" 3 | #include "filefunc.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | MnistReader::MnistReader() 9 | { 10 | } 11 | 12 | void MnistReader::getDataSize(const std::string& file_image, int* w, int* h, int* n) 13 | { 14 | auto content = filefunc::readFile(file_image, 16); 15 | reverse(content.data() + 4, 4); 16 | reverse(content.data() + 8, 4); 17 | reverse(content.data() + 12, 4); 18 | if (n) 19 | { 20 | *n = *(int*)(content.data() + 4); 21 | } 22 | if (w) 23 | { 24 | *w = *(int*)(content.data() + 8); 25 | } 26 | if (h) 27 | { 28 | *h = *(int*)(content.data() + 12); 29 | } 30 | } 31 | 32 | void MnistReader::readLabelFile(const std::string& filename, void* y_data, DataType data_type) 33 | { 34 | int s = label_; 35 | auto content = filefunc::readFile(filename); 36 | reverse(content.data() + 4, 4); 37 | int n = *(int*)(content.data() + 4); 38 | memset(y_data, 0, MatrixData::getDataTypeSize(data_type) * n * s); 39 | for (int i = 0; i < n; i++) 40 | { 41 | int pos = *(content.data() + 8 + i); 42 | MatrixData::setData((char*)y_data + MatrixData::getDataTypeSize(data_type) * (i * s + pos % s), data_type, 1); 43 | //y_data[i * s + pos % s] = 1; 44 | } 45 | } 46 | 47 | void MnistReader::readImageFile(const std::string& filename, void* x_data, DataType data_type) 48 | { 49 | auto content = filefunc::readFile(filename); 50 | reverse(content.data() + 4, 4); 51 | reverse(content.data() + 8, 4); 52 | reverse(content.data() + 12, 4); 53 | int n = *(int*)(content.data() + 4); 54 | int w = *(int*)(content.data() + 8); 55 | int h = *(int*)(content.data() + 12); 56 | int size = n * w * h; 57 | memset(x_data, 0, MatrixData::getDataTypeSize(data_type) * size); 58 | for (int i = 0; i < size; i++) 59 | { 60 | auto v = *(uint8_t*)(content.data() + 16 + i); 61 | auto p = (char*)x_data + MatrixData::getDataTypeSize(data_type) * i; 62 | MatrixData::setData(p, data_type, v / 255.0); 63 | } 64 | } 65 | 66 | void MnistReader::readData(const std::string& file_label, const std::string& file_image, Matrix& X, Matrix& Y) 67 | { 68 | int w, h, n; 69 | getDataSize(file_image, &w, &h, &n); 70 | X.resize(w, h, 1, n); 71 | Y.resize(label_, n); 72 | //train.createA(); 73 | readImageFile(file_image, X.getDataPtr(), X.getDataType()); 74 | readLabelFile(file_label, Y.getDataPtr(), Y.getDataType()); 75 | } 76 | 77 | void MnistReader::reverse(char* c, int n) 78 | { 79 | for (int i = 0; i < n / 2; i++) 80 | { 81 | auto& a = *(c + i); 82 | auto& b = *(c + n - 1 - i); 83 | auto t = b; 84 | b = a; 85 | a = t; 86 | } 87 | } 88 | 89 | void MnistReader::load(Matrix& X, Matrix& Y, std::string path /*= "mnist"*/, int data_type /*= 1*/) 90 | { 91 | if (path.back() != '/' || path.back() != '\\') 92 | { 93 | path += '/'; 94 | } 95 | std::string label; 96 | std::string image; 97 | if (data_type == 1) 98 | { 99 | label = path + "train-labels.idx1-ubyte"; 100 | image = path + "train-images.idx3-ubyte"; 101 | LOG("Loading MNIST train data... "); 102 | } 103 | else if (data_type == 2) 104 | { 105 | label = path + "t10k-labels.idx1-ubyte"; 106 | image = path + "t10k-images.idx3-ubyte"; 107 | LOG("Loading MNIST test data... "); 108 | } 109 | else 110 | { 111 | LOG("Please check MNIST type: 1 - train set (60000), 2 - test set (10000)\n"); 112 | return; 113 | } 114 | readData(label, image, X, Y); 115 | LOG("done\n"); 116 | } 117 | 118 | } // namespace cccc -------------------------------------------------------------------------------- /cccc-mnist/MnistReader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc-mnist/MnistReader.h -------------------------------------------------------------------------------- /cccc-mnist/build.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | mkdir build 3 | cd build 4 | cmake .. 5 | make -j 6 | cd ../extension 7 | cp -p ../build/lib/*.so . 8 | cmake . 9 | make 10 | cp -p *.so ../work/ 11 | cp -p ../build/bin/* ../work/ 12 | 13 | -------------------------------------------------------------------------------- /cccc-mnist/cccc-mnist.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE} 32 | Win32Proj 33 | neural 34 | cccc-mnist 35 | 10.0 36 | 37 | 38 | 39 | DynamicLibrary 40 | true 41 | v143 42 | Unicode 43 | 44 | 45 | DynamicLibrary 46 | false 47 | v143 48 | true 49 | Unicode 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | true 65 | 66 | 67 | false 68 | 69 | 70 | 71 | 72 | 73 | Level3 74 | Disabled 75 | _DEBUG;_WINDOWS;%(PreprocessorDefinitions) 76 | ../include;../include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include 77 | false 78 | true 79 | true 80 | 81 | 82 | Console 83 | true 84 | ../lib/x64;$(OutDir) 85 | cccc.lib;%(AdditionalDependencies) 86 | %(ForceSymbolReferences) 87 | true 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | Level3 97 | 98 | 99 | MaxSpeed 100 | true 101 | true 102 | NNO_CUDA;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 103 | ../include;../include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include 104 | true 105 | true 106 | false 107 | Precise 108 | false 109 | true 110 | stdcpplatest 111 | 112 | 113 | Console 114 | true 115 | true 116 | true 117 | lib/x64;$(OutDir) 118 | cccc.lib;%(AdditionalDependencies) 119 | %(ForceSymbolReferences) 120 | 121 | 122 | true 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /cccc-mnist/cccc-mnist.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 6 | h;hh;hpp;hxx;hm;inl;inc;xsd 7 | 8 | 9 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 10 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 11 | 12 | 13 | 14 | 15 | headers 16 | 17 | 18 | headers 19 | 20 | 21 | headers 22 | 23 | 24 | headers 25 | 26 | 27 | 28 | 29 | sources 30 | 31 | 32 | sources 33 | 34 | 35 | sources 36 | 37 | 38 | sources 39 | 40 | 41 | sources 42 | 43 | 44 | sources 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /cccc-mnist/cccc-mnist.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /cccc-mnist/cccc_extension.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "dll_export.h" 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | 9 | DLL_EXPORT void* dp_ext(); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | -------------------------------------------------------------------------------- /cccc-mnist/extension.cpp: -------------------------------------------------------------------------------- 1 | #include "DataPreparerMnist.h" 2 | #include "cccc_extension.h" 3 | 4 | DLL_EXPORT void* dp_ext() 5 | { 6 | return new cccc::DataPreparerMnist(); 7 | } 8 | -------------------------------------------------------------------------------- /cccc-windows/Application.cpp: -------------------------------------------------------------------------------- 1 | #include "Application.h" 2 | #include "MainProcess.h" 3 | #include "filefunc.h" 4 | #include "strfunc.h" 5 | 6 | namespace cccc 7 | { 8 | 9 | void Application::run() 10 | { 11 | MainProcess mp; 12 | 13 | std::map replace_pairs; 14 | auto replace_strings = strfunc::splitString(replace_string_, ";"); 15 | for (auto& r : replace_strings) 16 | { 17 | auto rs = strfunc::splitString(r, ":"); 18 | if (rs.size() >= 1) 19 | { 20 | rs.resize(2); 21 | replace_pairs[rs[0]] = rs[1]; 22 | } 23 | } 24 | 25 | auto filenames = strfunc::splitString(ini_file_); 26 | for (auto filename : filenames) 27 | { 28 | if (!filefunc::fileExist(filename)) 29 | { 30 | LOG_ERR("{} doesn't exist!\n", filename.c_str()); 31 | } 32 | auto ini_str = filefunc::readFileToString(filename); 33 | //替换掉一些字符 34 | for (auto rp : replace_pairs) 35 | { 36 | strfunc::replaceAllSubStringRef(ini_str, rp.first, rp.second); 37 | } 38 | mp.getOption()->loadString(ini_str); 39 | } 40 | auto load_filenames = strfunc::splitString(mp.getOption()->getString("train", "load_ini"), ","); 41 | for (auto filename : load_filenames) 42 | { 43 | if (filename != "") 44 | { 45 | mp.getOption()->loadFile(filename); 46 | } 47 | } 48 | 49 | //format the string into ini style by inserting '\n' 50 | strfunc::replaceAllSubStringRef(add_option_string_, "[", "\n["); 51 | strfunc::replaceAllSubStringRef(add_option_string_, "]", "]\n"); 52 | strfunc::replaceAllSubStringRef(add_option_string_, ";", "\n"); 53 | mp.getOption()->loadString(add_option_string_); 54 | 55 | if (mp.init() != 0) 56 | { 57 | return; 58 | } 59 | loop_ = true; 60 | while (loop_) 61 | { 62 | mp.run(); 63 | loop_ = false; 64 | } 65 | } 66 | 67 | } // namespace cccc -------------------------------------------------------------------------------- /cccc-windows/Application.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cccc 5 | { 6 | 7 | class Application 8 | { 9 | private: 10 | bool loop_ = true; 11 | 12 | public: 13 | std::string ini_file_; 14 | std::string add_option_string_; 15 | std::string replace_string_; 16 | 17 | public: 18 | void run(); 19 | }; 20 | 21 | } // namespace cccc -------------------------------------------------------------------------------- /cccc-windows/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cccc-windows) 2 | cmake_minimum_required(VERSION 2.6) 3 | 4 | aux_source_directory(./ SRC_LIST) 5 | 6 | set(CMAKE_VERBOSE_MAKEFILE on) 7 | set(CMAKE_CXX_COMPILER $ENV{CXX}) 8 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -pthread -fopenmp -DNDEBUG") 9 | 10 | include_directories(/usr/include /usr/local/include /usr/local/cuda/include /usr/local/cuda/include/crt ../../mlcc ../cccc-cuda ../include ../cccc /opt/rocm/include) 11 | link_directories(/usr/local/cuda/lib64) 12 | 13 | add_executable(cccc-windows ${SRC_LIST}) 14 | target_link_libraries(cccc-windows cccc) 15 | 16 | install(TARGETS cccc-windows RUNTIME DESTINATION bin) 17 | -------------------------------------------------------------------------------- /cccc-windows/cccc-windows.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F} 15 | Win32Proj 16 | neural 17 | cccc-windows 18 | 10.0 19 | 20 | 21 | 22 | Application 23 | true 24 | v143 25 | Unicode 26 | 27 | 28 | Application 29 | false 30 | v143 31 | true 32 | Unicode 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | true 48 | 49 | 50 | false 51 | 52 | 53 | 54 | 55 | 56 | Level3 57 | Disabled 58 | _DEBUG;_WINDOWS;%(PreprocessorDefinitions) 59 | ../include;include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include 60 | false 61 | false 62 | true 63 | stdcpplatest 64 | 65 | 66 | Console 67 | true 68 | ../lib/x64;$(SolutionDir)$(Platform)\$(Configuration)\ 69 | cccc.lib;%(AdditionalDependencies) 70 | %(ForceSymbolReferences) 71 | true 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | Level3 81 | 82 | 83 | MaxSpeed 84 | true 85 | true 86 | NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 87 | ../include;../include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include 88 | true 89 | true 90 | false 91 | Precise 92 | false 93 | true 94 | stdcpplatest 95 | 96 | 97 | Console 98 | true 99 | true 100 | true 101 | ../lib/x64;$(SolutionDir)$(Platform)\$(Configuration)\ 102 | cccc.lib;%(AdditionalDependencies) 103 | %(ForceSymbolReferences) 104 | 105 | 106 | true 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /cccc-windows/cccc-windows.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 6 | h;hh;hpp;hxx;hm;inl;inc;xsd 7 | 8 | 9 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 10 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 11 | 12 | 13 | 14 | 15 | sources 16 | 17 | 18 | sources 19 | 20 | 21 | sources 22 | 23 | 24 | sources 25 | 26 | 27 | 28 | 29 | headers 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /cccc-windows/cccc-windows.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -c mnist-lenet.ini 5 | $(SolutionDir)work 6 | WindowsLocalDebugger 7 | $(TargetPath) 8 | 9 | 10 | -c mnist-lenet.ini 11 | $(SolutionDir)work 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /cccc-windows/windows.cpp: -------------------------------------------------------------------------------- 1 | #include "Application.h" 2 | #include "Log.h" 3 | #include "cmdline.h" 4 | #include 5 | 6 | #ifdef _MSC_VER 7 | #include 8 | #endif 9 | 10 | int main(int argc, char* argv[]) 11 | { 12 | cmdline::parser cmd; 13 | 14 | cmd.add("config", 'c', "config file (ini format) of the net", false, "will.ini"); 15 | cmd.add("add-config", 'a', "additional config string ([sec1]key1=v1;key2=v2[sec2]key1=v1...)", false, ""); 16 | cmd.add("replace", 'r', "replace strings before loading ini (old1:new1;old2:new2...)", false, ""); 17 | cmd.add("version", 'v', "version information"); 18 | 19 | #ifdef _MSC_VER 20 | cmd.parse_check(GetCommandLineA()); 21 | #else 22 | cmd.parse_check(argc, argv); 23 | #endif 24 | 25 | if (cmd.exist("config")) 26 | { 27 | cccc::Application app; 28 | app.ini_file_ = cmd.get("config"); 29 | app.add_option_string_ = cmd.get("add-config"); 30 | app.replace_string_ = cmd.get("replace"); 31 | app.run(); 32 | } 33 | else if (cmd.exist("version")) 34 | { 35 | cccc::LOG("CCCC (A Deep Neural Net library) command line interface\n"); 36 | cccc::LOG("Built with "); 37 | #if defined(_MSC_VER) 38 | cccc::LOG("Microsoft Visual Studio {}\n", _MSC_VER); 39 | #elif defined(__clang__) 40 | cccc::LOG("Clang {}\n", __clang_version__); 41 | #elif defined(__GNUC__) 42 | cccc::LOG("GNU C {}\n", __VERSION__); 43 | #else 44 | cccc::LOG("Unknown complier\n"); 45 | #endif 46 | cccc::LOG("Commemorating my great teacher and friend Dr. Yu Wang\n"); 47 | } 48 | else 49 | { 50 | if (argc >= 1) 51 | { 52 | cmd.parse_check({ argv[0], "--help" }); 53 | } 54 | } 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /cccc/Activer.cpp: -------------------------------------------------------------------------------- 1 | #include "Activer.h" 2 | 3 | namespace cccc 4 | { 5 | 6 | Activer::Activer() 7 | { 8 | } 9 | 10 | Activer::~Activer() 11 | { 12 | } 13 | 14 | void Activer::init(Option* op, std::string section, LayerConnectionType ct) 15 | { 16 | active_function_ = op->getEnum(section, "active", ACTIVE_FUNCTION_NONE); 17 | cost_function_ = op->getEnum(section, "cost", COST_FUNCTION_CROSS_ENTROPY); //cost似乎是应该属于net的,待查 18 | cost_rate_ = op->getVector(section, "cost_rate"); 19 | 20 | real learn_rate_base = op->getReal(section, "learn_rate_base", 1e-2); 21 | switch (active_function_) 22 | { 23 | case ACTIVE_FUNCTION_CLIPPED_RELU: 24 | real_vector_ = { op->get(section, "clipped_relu", 0.5) }; 25 | break; 26 | case ACTIVE_FUNCTION_DROPOUT: 27 | { 28 | //Dropout会重设两个矩阵 29 | //2 int: work_phase, seed 30 | int_vector_ = 31 | { 32 | int(ACTIVE_PHASE_TRAIN), 33 | int(op->getReal(section, "dropout_seed", INT_MAX * random_generator_.rand())) 34 | }; 35 | random_generator_.set_seed(); 36 | random_generator_.set_random_type(RANDOM_UNIFORM); 37 | random_generator_.set_parameter(0, 1); 38 | //1 real: dropout_rate 39 | real_vector_ = 40 | { 41 | op->get(section, "dropout_rate", 0.5) 42 | }; 43 | break; 44 | } 45 | case ACTIVE_FUNCTION_LOCAL_RESPONSE_NORMALIZATION: 46 | //1 int: lrn_n 47 | int_vector_ = 48 | { 49 | op->getInt(section, "lrn_n", 5) 50 | }; 51 | //3 real: lrn_alpha, lrn_beta, lrn_k 52 | real_vector_ = 53 | { 54 | op->get(section, "lrn_alpha", 1e-4), 55 | op->get(section, "lrn_beta", 0.75), 56 | op->get(section, "lrn_k", 2.0) 57 | }; 58 | break; 59 | case ACTIVE_FUNCTION_LOCAL_CONSTRAST_NORMALIZATION: 60 | //1 int: lcn_n 61 | int_vector_ = 62 | { 63 | op->getInt(section, "lcn_n", 5) 64 | }; 65 | //3 real: lcn_alpha, lcn_beta, lcn_k (not sure these are useful) 66 | real_vector_ = 67 | { 68 | op->get(section, "lcn_alpha", 1), 69 | op->get(section, "lcn_beta", 0.5), 70 | op->get(section, "lcn_k", 1e-5) 71 | }; 72 | break; 73 | case ACTIVE_FUNCTION_DIVISIVE_NORMALIZATION: 74 | int_vector_ = 75 | { 76 | int(op->get(section, "dn_n", 5)) 77 | }; 78 | //same to lcn, real[3] is learn rate of means 79 | real_vector_ = 80 | { 81 | op->get(section, "dn_alpha", 1), 82 | op->get(section, "dn_beta", 0.5), 83 | op->get(section, "dn_k", 1e-5), 84 | op->get(section, "dn_rate", learn_rate_base) 85 | }; 86 | break; 87 | case ACTIVE_FUNCTION_BATCH_NORMALIZATION: 88 | { 89 | auto bt = op->getEnum(section, "bn_type", BATCH_NORMALIZATION_AUTO); 90 | if (bt == BATCH_NORMALIZATION_AUTO) 91 | { 92 | if (ct == LAYER_CONNECTION_CONVOLUTION) 93 | { 94 | bt = BATCH_NORMALIZATION_SPATIAL; 95 | } 96 | else 97 | { 98 | bt = BATCH_NORMALIZATION_PER_ACTIVATION; 99 | } 100 | } 101 | //2 int: work_phase, batch_normalization 102 | int_vector_ = 103 | { 104 | int(ACTIVE_PHASE_TRAIN), 105 | int(bt) 106 | }; 107 | //3 real: train_rate, exp_aver_factor, epsilon 108 | real_vector_ = 109 | { 110 | op->get(section, "bn_rate", learn_rate_base), 111 | op->get(section, "bn_exp_aver_factor", 1), 112 | op->get(section, "bn_epsilon", 1e-5) 113 | }; 114 | break; 115 | } 116 | case ACTIVE_FUNCTION_SPATIAL_TRANSFORMER: 117 | //1 real: train_rate 118 | real_vector_ = 119 | { 120 | op->get(section, "st_train_rate", 0) 121 | }; 122 | break; 123 | case ACTIVE_FUNCTION_RECURRENT: 124 | int_vector_ = 125 | { 126 | op->getInt(section, "rnn_num_layers", 2), 127 | op->getEnum(section, "rnn_type", RECURRENT_RELU), 128 | op->getEnum(section, "rnn_direction", RECURRENT_DIRECTION_UNI), 129 | op->getEnum(section, "rnn_input", RECURRENT_INPUT_LINEAR) 130 | }; 131 | break; 132 | case ACTIVE_FUNCTION_ZERO_CHANNEL: 133 | int_vector_ = op->getVector(section, "zero_channels"); 134 | break; 135 | default: 136 | break; 137 | } 138 | } 139 | 140 | //激活前的准备工作,主要用于一些激活函数在训练和测试时有不同设置 141 | void Activer::activePrepare(ActivePhaseType ap) 142 | { 143 | switch (active_function_) 144 | { 145 | case ACTIVE_FUNCTION_DROPOUT: 146 | int_vector_[0] = int(ap); 147 | if (ap == ACTIVE_PHASE_TRAIN) 148 | { 149 | random_generator_.set_random_type(RANDOM_UNIFORM); 150 | int_vector_[1] = int(INT_MAX * random_generator_.rand()); 151 | } 152 | break; 153 | case ACTIVE_FUNCTION_BATCH_NORMALIZATION: 154 | int_vector_[0] = int(ap); 155 | break; 156 | default: 157 | break; 158 | } 159 | } 160 | 161 | void Activer::initBuffer(Matrix& X) 162 | { 163 | MatrixEx::activeBufferInit(X, active_function_, int_vector_, real_vector_, matrix_vector_); 164 | if (!cost_rate_.empty()) 165 | { 166 | matrix_cost_rate_ = Matrix(X.getDim(), UnitType::CPU); 167 | matrix_cost_rate_.fillData(1); 168 | for (int ir = 0; ir < X.getRow(); ir++) 169 | { 170 | for (int in = 0; in < X.getNumber(); in++) 171 | { 172 | if (ir < cost_rate_.size()) 173 | { 174 | matrix_cost_rate_.getData(ir, in) = cost_rate_[ir]; 175 | } 176 | } 177 | } 178 | matrix_cost_rate_.toGPU(); 179 | } 180 | } 181 | 182 | void Activer::backwardCost(Matrix& A, Matrix& X, Matrix& Y, Matrix& dA, Matrix& dX) 183 | { 184 | if (cost_function_ == COST_FUNCTION_RMSE) 185 | { 186 | Matrix::add(A, Y, dA, 1, -1); 187 | backward(A, X); 188 | } 189 | else 190 | { 191 | //交叉熵的推导结果就是直接相减,注意此处并未更新DA 192 | Matrix::add(A, Y, dX, 1, -1); 193 | } 194 | if (matrix_cost_rate_.getDataSize() > 0) 195 | { 196 | Matrix::elementMul(dX, matrix_cost_rate_, dX); 197 | } 198 | } 199 | 200 | //dY will be changed 201 | //实际上反向传播并未直接使用,额外消耗计算资源,不要多用 202 | real Activer::calCostValue(Matrix& A, Matrix& dA, Matrix& Y, Matrix& dY) 203 | { 204 | //todo::seem not right 205 | if (dY.getDataSize() == 0) 206 | { 207 | return 0; 208 | } 209 | if (cost_function_ == COST_FUNCTION_RMSE) 210 | { 211 | Matrix::add(A, Y, dY, 1, -1); 212 | return dY.dotSelf(); 213 | } 214 | else 215 | { 216 | switch (active_function_) 217 | { 218 | case ACTIVE_FUNCTION_SIGMOID: 219 | Matrix::crossEntropy2(A, Y, dY, 1e-5); 220 | break; 221 | case ACTIVE_FUNCTION_SOFTMAX: 222 | case ACTIVE_FUNCTION_SOFTMAX_FAST: 223 | Matrix::crossEntropy(A, Y, dY, 1e-5); 224 | break; 225 | default: 226 | Matrix::add(A, Y, dY, 1, -1); 227 | break; 228 | } 229 | return dY.sumAbs(); 230 | } 231 | } 232 | 233 | bool Activer::needSave(int i) 234 | { 235 | return active_function_ == ACTIVE_FUNCTION_BATCH_NORMALIZATION 236 | || active_function_ == ACTIVE_FUNCTION_SPATIAL_TRANSFORMER; 237 | } 238 | 239 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Activer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Matrix.h" 3 | #include "MatrixEx.h" 4 | #include "Option.h" 5 | #include "Random.h" 6 | 7 | namespace cccc 8 | { 9 | 10 | class Activer 11 | { 12 | public: 13 | Activer(); 14 | virtual ~Activer(); 15 | Activer(const Activer&) = delete; 16 | Activer& operator=(const Activer&) = delete; 17 | 18 | protected: 19 | //激活函数相关 20 | ActiveFunctionType active_function_ = ACTIVE_FUNCTION_NONE; 21 | //bool need_active_ex_ = false; 22 | std::vector real_vector_; 23 | std::vector int_vector_; 24 | std::vector matrix_vector_; 25 | //代价函数 26 | CostFunctionType cost_function_ = COST_FUNCTION_CROSS_ENTROPY; 27 | std::vector cost_rate_; //按照channel放大某些类别在损失函数中的比例,可以认为是强调某些类别的正确性 28 | Matrix matrix_cost_rate_; 29 | 30 | Random random_generator_; 31 | ActivePhaseType active_phase_ = ACTIVE_PHASE_TRAIN; 32 | 33 | public: 34 | void forward(Matrix& X, Matrix& A) 35 | { 36 | activePrepare(active_phase_); 37 | MatrixEx::activeForward(X, A, active_function_, int_vector_, real_vector_, matrix_vector_); 38 | } 39 | 40 | void backward(Matrix& A, Matrix& X) 41 | { 42 | MatrixEx::activeBackward(X, A, active_function_, int_vector_, real_vector_, matrix_vector_); 43 | } 44 | 45 | bool isNone() { return active_function_ == ACTIVE_FUNCTION_NONE; } 46 | void init(Option* op, std::string section, LayerConnectionType ct); 47 | void activePrepare(ActivePhaseType ap); 48 | void initBuffer(Matrix& X); 49 | void setCostFunction(CostFunctionType cf) { cost_function_ = cf; } 50 | 51 | void backwardCost(Matrix& A, Matrix& X, Matrix& Y, Matrix& dA, Matrix& dX); 52 | real calCostValue(Matrix& A, Matrix& dA, Matrix& Y, Matrix& dY); 53 | 54 | ActiveFunctionType getActiveFunction() { return active_function_; } 55 | void setActivePhase(ActivePhaseType ap) { active_phase_ = ap; } 56 | 57 | CostFunctionType getCostFunction_() { return cost_function_; } 58 | 59 | private: 60 | bool needSave(int i); 61 | }; 62 | 63 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/AdditionalCost.cpp: -------------------------------------------------------------------------------- 1 | #include "AdditionalCost.h" 2 | #include "MatrixEx.h" 3 | 4 | namespace cccc 5 | { 6 | 7 | AdditionalCost::AdditionalCost() 8 | { 9 | } 10 | 11 | AdditionalCost::~AdditionalCost() 12 | { 13 | destory(); 14 | } 15 | 16 | void AdditionalCost::init(Option* op, std::string section, Matrix& A) 17 | { 18 | batch_ = A.getNumber(); 19 | 20 | sparse_beta_ = op->getReal(section, "sparse_beta", 0); 21 | if (sparse_beta_ != 0) 22 | { 23 | sparse_rou_ = op->getReal(section, "sparse_rou", 0.1); 24 | sparse_rou_hat_.resize(A); 25 | sparse_rou_hat_vector_.resize(A.getRow(), 1); 26 | as_sparse_.resize(batch_, 1); 27 | as_sparse_.fillData(1); 28 | } 29 | 30 | diverse_beta_ = op->getReal(section, "diverse_beta", 0); 31 | if (diverse_beta_ != 0) 32 | { 33 | diverse_epsilon_ = op->getReal(section, "diverse_epsilon", 1e-8); 34 | diverse_aver_.resize(A); 35 | diverse_aver2_.resize(A); 36 | diverse_aver3_.resize(A); 37 | as_diverse_aver_.resize(1, 1, A.getChannel() * A.getNumber(), 1); 38 | as_diverse_aver_.fillData(1); 39 | diverse_A_ = A.createShared(); 40 | diverse_A_.resize(1, 1, A.getChannel() * A.getNumber(), 1); 41 | diverse_workspace_.resize(1, int(1e6)); 42 | } 43 | } 44 | 45 | void AdditionalCost::destory() 46 | { 47 | } 48 | 49 | //the extra-cost function often modifies dA with A 50 | void AdditionalCost::modifyDA(Matrix& A, Matrix& dA) 51 | { 52 | //sparse 53 | if (sparse_beta_ != 0) 54 | { 55 | Matrix::mulVector(A, as_sparse_, sparse_rou_hat_vector_, 1.0 / batch_); 56 | MatrixEx::sparse(sparse_rou_hat_, sparse_rou_hat_, sparse_rou_, sparse_beta_); 57 | Matrix::copyData(sparse_rou_hat_vector_, sparse_rou_hat_); 58 | sparse_rou_hat_.repeat(); 59 | Matrix::add(dA, sparse_rou_hat_, dA); 60 | } 61 | //diverse, dropped 62 | if (false && diverse_beta_ != 0) 63 | { 64 | //use convolution to calculate average cross channel and number 65 | //int cn = A->getChannel() * A->getNumber(); 66 | //diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, 1); 67 | //A->resize(A->getWidth(), A->getHeight(), cn, 1); 68 | //MatrixExtend::convolutionForward(A, diverse_aver_, as_diverse_aver_, diverse_workspace_, diverse_workspace2_, 1, 1, 1.0 / cn); 69 | //A->resize(dA); 70 | //diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, cn); 71 | //diverse_aver_->repeat(); 72 | 73 | //Matrix::add(A, diverse_aver_, diverse_aver_, 1, -1); 74 | 75 | //Matrix::add(dA, diverse_aver_, dA, 1, -diverse_beta_); 76 | 77 | /*diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, 1); 78 | A->resize(A->getWidth(), A->getHeight(), cn, 1); 79 | MatrixExtend::convolutionForward(A, diverse_aver_, as_diverse_aver_, diverse_workspace_, diverse_workspace2_, 1, 1, 1.0 / cn); 80 | A->resize(dA); 81 | diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, cn); 82 | diverse_aver_->repeat(); 83 | Matrix::elementPow(diverse_aver_, diverse_aver2_, 2); 84 | Matrix::elementPow(diverse_aver_, diverse_aver3_, 3); 85 | Matrix::elementPow(A, diverse_aver_, 2); 86 | Matrix::add(diverse_aver_, diverse_aver2_, diverse_aver_, 1, -1); 87 | Matrix::elementDiv(diverse_aver_, diverse_aver3_, diverse_aver_, 0, diverse_epsilon_); 88 | Matrix::add(dA, diverse_aver_, dA);*/ 89 | } 90 | } 91 | 92 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/AdditionalCost.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Matrix.h" 3 | #include "Option.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | class AdditionalCost 9 | { 10 | public: 11 | AdditionalCost(); 12 | virtual ~AdditionalCost(); 13 | AdditionalCost(const AdditionalCost&) = delete; 14 | AdditionalCost& operator=(const AdditionalCost&) = delete; 15 | 16 | private: 17 | real sparse_beta_ = 0; 18 | real sparse_rou_ = 0.1; 19 | Matrix sparse_rou_hat_; 20 | Matrix sparse_rou_hat_vector_; 21 | Matrix as_sparse_; 22 | int batch_ = 0; 23 | 24 | real diverse_beta_ = 0; 25 | real diverse_epsilon_; 26 | Matrix diverse_aver_; 27 | Matrix diverse_aver2_; 28 | Matrix diverse_aver3_; 29 | Matrix as_diverse_aver_; 30 | Matrix diverse_workspace_; 31 | Matrix diverse_A_; 32 | std::vector diverse_workspace2_; 33 | 34 | public: 35 | void init(Option* op, std::string section, Matrix& A); 36 | 37 | private: 38 | void destory(); 39 | 40 | public: 41 | void modifyDA(Matrix& A, Matrix& dA); 42 | }; 43 | 44 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cccc) 2 | cmake_minimum_required(VERSION 2.6) 3 | 4 | file(GLOB SRC_LIST 5 | ./*.cpp 6 | ./layer/*.cpp 7 | ./*.c 8 | ../../mlcc/Cifa.cpp 9 | ../../mlcc/filefunc.cpp 10 | ../../mlcc/strfunc.cpp 11 | ) 12 | 13 | set(CMAKE_VERBOSE_MAKEFILE on) 14 | set(CMAKE_CXX_COMPILER $ENV{CXX}) 15 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -pthread -fopenmp -DNDEBUG") 16 | 17 | add_definitions(-DENABLE_CUDA=1 -DENABLE_HIP=0) 18 | 19 | include_directories(/usr/include /usr/local/include /usr/local/cuda/include /usr/local/cuda/include/crt ../../mlcc ../include . ./layer ../cccc-cuda ../cccc-hip) 20 | link_directories(/usr/local/cuda/lib64) 21 | 22 | set(LINK_LIBRARIES cudnn cudart cublas openblas) 23 | if(NOT CCCC_CPU) 24 | include_directories(../cccc-cuda) 25 | list(APPEND LINK_LIBRARIES cccc-cuda) 26 | endif() 27 | 28 | add_library(cccc SHARED ${SRC_LIST}) 29 | target_link_libraries(cccc LINK_PRIVATE ${LINK_LIBRARIES}) 30 | 31 | install(TARGETS cccc LIBRARY DESTINATION lib) 32 | -------------------------------------------------------------------------------- /cccc/DataPreparer.cpp: -------------------------------------------------------------------------------- 1 | #include "DataPreparer.h" 2 | #include "filefunc.h" 3 | #include 4 | #include 5 | 6 | namespace cccc 7 | { 8 | 9 | DataPreparer::DataPreparer() 10 | : X(DataType::CURRENT, UnitType::CPU), 11 | Y(DataType::CURRENT, UnitType::CPU), 12 | X0(DataType::CURRENT, UnitType::CPU), 13 | Y0(DataType::CURRENT, UnitType::CPU), 14 | LW(DataType::CURRENT, UnitType::CPU), 15 | LW0(DataType::CURRENT, UnitType::CPU) 16 | { 17 | //rand_.set_seed(); 18 | rand_.set_parameter(0, 1); 19 | } 20 | 21 | DataPreparer::~DataPreparer() 22 | { 23 | } 24 | 25 | void DataPreparer::init() 26 | { 27 | OPTION_GET_INT(shuffle_); 28 | OPTION_GET_INT(trans_); 29 | if (create_by_dll_ != "") 30 | { 31 | fill_ = 1; 32 | } 33 | OPTION_GET_INT(fill_); 34 | OPTION_GET_INT(fill_group_); 35 | init2(); 36 | LOG("Initialize dataset\n"); 37 | resetDataDim(); 38 | initData(); 39 | } 40 | 41 | //初始化训练集准备器 42 | int DataPreparer::getFillGroup() 43 | { 44 | if (fill_) 45 | { 46 | return fill_group_; 47 | } 48 | return -1; 49 | } 50 | 51 | void DataPreparer::initData() 52 | { 53 | X.resizeNumber(fill_group_); 54 | Y.resizeNumber(fill_group_); 55 | X0.resizeNumber(fill_group_); 56 | Y0.resizeNumber(fill_group_); 57 | 58 | queue_origin_.resize(X.getNumber()); 59 | for (int i = 0; i < queue_origin_.size(); i++) 60 | { 61 | queue_origin_[i] = i; 62 | } 63 | } 64 | 65 | void DataPreparer::shuffleQueue(std::vector& train_queue) 66 | { 67 | std::random_device rd; 68 | std::mt19937 g(rd()); 69 | std::shuffle(train_queue.begin(), train_queue.end(), g); 70 | } 71 | 72 | //数据准备器将原始数据打乱,进行变换之后上传至data 73 | //对于验证来说,这里前面有些拖沓 74 | void DataPreparer::prepareData(int epoch, const std::string& info) 75 | { 76 | //重新采集未变换的数据 77 | if (fill_) 78 | { 79 | LOG("Fill data for epoch {}\n", epoch + 1); 80 | fillData0(); 81 | } 82 | 83 | int concurrency = std::thread::hardware_concurrency() / 2; 84 | 85 | //可能包含对训练数据的变换 86 | if (X.getDataPtr() != X0.getDataPtr() && Y.getDataPtr() != Y0.getDataPtr()) 87 | { 88 | if (shuffle_ == 0) 89 | { 90 | Matrix::copyData(X0, X); 91 | Matrix::copyData(Y0, Y); 92 | Matrix::copyData(LW0, LW); 93 | } 94 | else 95 | { 96 | LOG("Shuffle data for epoch {}\n", epoch + 1); 97 | shuffleQueue(queue_origin_); 98 | 99 | //#pragma omp parallel 100 | // { 101 | // auto x = X0.cloneSharedCol(0), y = Y0.cloneSharedCol(0); 102 | // auto dim = x.getDim(); 103 | // //std::swap(dim[dim.size() - 1], dim[dim.size() - 2]); 104 | // //x.resize(dim); 105 | // //dim = y.getDim(); 106 | // //std::swap(dim[dim.size() - 1], dim[dim.size() - 2]); 107 | // //y.resize(dim); 108 | //#pragma omp for 109 | // for (int i = 0; i < queue_origin_.size(); i++) 110 | // { 111 | // Matrix::copyRows(X0, queue_origin_[i], X, i, 1); 112 | // Matrix::copyRows(Y0, queue_origin_[i], Y, i, 1); 113 | // Matrix::copyRows(LW0, queue_origin_[i], LW, i, 1); 114 | // x.shareData(X, 0, i); 115 | // y.shareData(Y, 0, i); 116 | // transOne(x, y); 117 | // } 118 | // } 119 | std::vector threads(concurrency); 120 | int count_th = (queue_origin_.size() + concurrency - 1) / concurrency; 121 | int i_th = 0; 122 | for (auto& t : threads) 123 | { 124 | t = std::thread{ [i_th, count_th, this]() 125 | { 126 | auto x = X0.createSharedCol(0), y = Y0.createSharedCol(0); 127 | auto dim = x.getDim(); 128 | for (int i = i_th * count_th; i < std::min(i_th * count_th + count_th, int(queue_origin_.size())); i++) 129 | { 130 | Matrix::copyRows(X0, queue_origin_[i], X, i, 1); 131 | Matrix::copyRows(Y0, queue_origin_[i], Y, i, 1); 132 | Matrix::copyRows(LW0, queue_origin_[i], LW, i, 1); 133 | if (trans_) 134 | { 135 | x.shareData(X, 0, i); 136 | y.shareData(Y, 0, i); 137 | transOne(x, y); 138 | } 139 | } 140 | } }; 141 | i_th++; 142 | } 143 | for (auto& t : threads) 144 | { 145 | t.join(); 146 | } 147 | } 148 | } 149 | LOG("Data prepared for epoch {}\n", epoch + 1); 150 | } 151 | 152 | //依据网络的输入和输出创建一个,避免可能的失败 153 | void DataPreparer::resetDataDim() 154 | { 155 | X.resizeKeepNumber(dim0_); 156 | Y.resizeKeepNumber(dim1_); 157 | X0.resizeKeepNumber(dim0_); 158 | Y0.resizeKeepNumber(dim1_); 159 | } 160 | 161 | std::string DataPreparer::getMessage(int i) 162 | { 163 | if (i >= 0 && i < message_.size()) 164 | { 165 | return message_[i]; 166 | } 167 | return "Error."; 168 | } 169 | 170 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Matrix.h" 3 | #include "Option.h" 4 | #include "Random.h" 5 | 6 | namespace cccc 7 | { 8 | 9 | class DLL_EXPORT DataPreparer 10 | { 11 | public: 12 | Matrix X; //数据变换后的形态,例如打散,亮度对比度,增加噪声等 13 | Matrix Y; 14 | Matrix LW; //控制损失函数的权重 15 | 16 | Matrix X0; //数据的原始状态,一般是直接从文件中读取的样子 17 | Matrix Y0; 18 | Matrix LW0; 19 | 20 | public: 21 | friend class DataPreparerFactory; 22 | 23 | public: 24 | DataPreparer(); 25 | virtual ~DataPreparer(); 26 | DataPreparer(const DataPreparer&) = delete; 27 | DataPreparer& operator=(const DataPreparer&) = delete; 28 | 29 | protected: 30 | void init(); 31 | virtual void init2() {} 32 | int getFillGroup(); 33 | //virtual void destroy() {} 34 | 35 | public: 36 | virtual void initData(); 37 | virtual void fillData0() {} 38 | virtual void transOne(Matrix& X1, Matrix& Y1) {} 39 | virtual void showOneData(int number) {} 40 | 41 | protected: 42 | std::string getSection() { return section_; } 43 | 44 | private: 45 | std::string create_by_dll_; 46 | 47 | public: 48 | void shuffleQueue(std::vector& train_queue); 49 | void prepareData(int epoch, const std::string& info); 50 | 51 | void resetDataDim(); 52 | 53 | protected: 54 | int shuffle_ = 1; //是否乱序训练集 55 | int trans_ = 0; //是否变换 56 | int fill_ = 0; //是否填充 57 | int fill_group_ = 0; //填充的组数 58 | 59 | std::string section_ = "data_preparer"; 60 | Option* option_; 61 | 62 | //for macro in Option.h 63 | std::string& section = section_; 64 | Option*& option = option_; 65 | 66 | //图的尺寸 67 | std::vector dim0_, dim1_; 68 | 69 | std::vector queue_origin_; //填充的顺序 70 | 71 | private: 72 | std::vector message_; 73 | Random rand_; 74 | 75 | public: 76 | std::string getMessage(int i); 77 | int isFill() { return fill_; } 78 | 79 | }; 80 | 81 | //template 82 | //void parallel_for_each(std::function fn, int concurrency) 83 | //{ 84 | // std::vector threads(concurrency); 85 | // int count_th = (queue_origin_.size() + concurrency - 1) / concurrency; 86 | // int i_th = 0; 87 | // for (auto& t : threads) 88 | // { 89 | // t = std::thread{[](){fn();}}; 90 | // i_th++; 91 | // } 92 | // for (auto& t : threads) 93 | // { 94 | // t.join(); 95 | // } 96 | //} 97 | 98 | 99 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparerFactory.cpp: -------------------------------------------------------------------------------- 1 | #include "DataPreparerFactory.h" 2 | #include "DataPreparerImage.h" 3 | #include "DataPreparerTxt.h" 4 | #include "DynamicLibrary.h" 5 | #include "filefunc.h" 6 | 7 | namespace cccc 8 | { 9 | 10 | DataPreparer* DataPreparerFactory::create(Option* op, const std::string& section, const std::vector& dim0, const std::vector& dim1) 11 | { 12 | DataPreparer* dp = nullptr; 13 | std::string library_name = op->getString(section, "library"); 14 | #ifdef _WIN32 15 | #ifdef _DEBUG 16 | library_name = op->getString(section, "library_dlld", library_name); 17 | #else 18 | library_name = op->getString(section, "library_dll", library_name); 19 | #endif 20 | #else 21 | library_name = op->getString(section, "library_so", library_name); 22 | if (strfunc::toLowerCase(filefunc::getFileExt(library_name)) == "dll") 23 | { 24 | library_name = filefunc::getFilenameWithoutPath(filefunc::getFileMainname(library_name)) + ".so"; 25 | if (library_name.find("lib") != 0) 26 | { 27 | library_name = "lib" + library_name; 28 | } 29 | } 30 | #endif 31 | auto function_name = op->getString(section, "function", "dp_ext"); 32 | 33 | LOG("Try to create data preparer with section \"{}\"\n", section); 34 | if (!library_name.empty() && !function_name.empty()) 35 | { 36 | //此处请注意:64位系统中dll的函数名形式上与__cdecl一致。深度学习耗费内存较大,故此处不再对32位系统进行处理 37 | using MYFUNC = void* (*)(); 38 | MYFUNC func = nullptr; 39 | func = (MYFUNC)DynamicLibrary::getFunction(library_name, function_name); 40 | if (func) 41 | { 42 | dp = (DataPreparer*)func(); 43 | LOG("Create from {} in {}\n", function_name, library_name); 44 | dp->create_by_dll_ = library_name; 45 | } 46 | else 47 | { 48 | LOG("Failed to load {} in {}\n", function_name, library_name); 49 | } 50 | } 51 | if (dp == nullptr) 52 | { 53 | auto mode = op->getString(section, "mode", "image"); 54 | if (op->hasSection(section)) 55 | { 56 | mode = ""; 57 | } 58 | if (mode == "image") 59 | { 60 | dp = new DataPreparerImage(); 61 | LOG("Create default image data preparer\n"); 62 | } 63 | else if (mode == "txt") 64 | { 65 | dp = new DataPreparerTxt(); 66 | LOG("Create default txt data preparer\n"); 67 | } 68 | else 69 | { 70 | dp = new DataPreparer(); 71 | LOG("Create default data preparer\n"); 72 | } 73 | } 74 | 75 | dp->option_ = op; 76 | dp->section_ = section; 77 | dp->dim0_ = dim0; 78 | dp->dim1_ = dim1; 79 | dp->init(); 80 | 81 | return dp; 82 | } 83 | 84 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparerFactory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "DataPreparer.h" 3 | 4 | namespace cccc 5 | { 6 | 7 | struct DataPreparerFactory : public DataPreparer 8 | { 9 | private: 10 | static DataPreparer* create(Option* op, const std::string& section, const std::vector& dim0, const std::vector& dim1); 11 | 12 | public: 13 | using UniquePtr = std::unique_ptr; 14 | inline static UniquePtr makeUniquePtr(Option* op, const std::string& section, const std::vector& dim0, const std::vector& dim1) 15 | { 16 | UniquePtr p(create(op, section, dim0, dim1)); 17 | return p; 18 | } 19 | }; 20 | 21 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparerImage.cpp: -------------------------------------------------------------------------------- 1 | #include "DataPreparerImage.h" 2 | #include "Timer.h" 3 | #include "filefunc.h" 4 | #include "strfunc.h" 5 | #include 6 | #include 7 | #include 8 | 9 | #define _USE_MATH_DEFINES 10 | #include 11 | 12 | namespace cccc 13 | { 14 | 15 | DataPreparerImage::DataPreparerImage() 16 | { 17 | } 18 | 19 | DataPreparerImage::~DataPreparerImage() 20 | { 21 | } 22 | 23 | void DataPreparerImage::init2() 24 | { 25 | OPTION_GET_INT(flip_); 26 | OPTION_GET_INT(transpose_); 27 | 28 | OPTION_GET_INT(d_channel_); 29 | OPTION_GET_REAL(d_noise_); 30 | 31 | OPTION_GET_NUMVECTOR(d_contrast_, 2, 0); 32 | OPTION_GET_NUMVECTOR(d_brightness_, 2, 0); 33 | 34 | //LOG("Options for image processing {} end\n\n", section_); 35 | } 36 | 37 | //变换一张图 38 | //此处将原始数据进行一些干扰,适应不同的情况 39 | void DataPreparerImage::transOne(Matrix& X1, Matrix& Y1) 40 | { 41 | bool need_limit = false; //图片的值需要限制到0~1 42 | 43 | if (flip_ != 0) 44 | { 45 | //flip -1, 0, 1, other value means do nothing 46 | int f = floor(rand_.rand() * 4) - 1; 47 | X1.flip(f); 48 | } 49 | if (transpose_ != 0) 50 | { 51 | //transpose 0, 1 52 | int t = floor(rand_.rand() * 2) - 1; 53 | if (t) 54 | { 55 | X1.transpose(); 56 | } 57 | } 58 | 59 | //噪点 60 | if (d_noise_ != 0 && X1.getDeviceType() == UnitType::CPU) 61 | { 62 | need_limit = true; 63 | for (int i = 0; i < X1.getDataSize(); i++) 64 | { 65 | X1.setData(i, 0, X1.getData(i, 0) + rand_.rand() * d_noise_ * 2 - d_noise_); 66 | } 67 | } 68 | 69 | //亮度和对比度连y一起变换 70 | //亮度 71 | if (d_brightness_[1] >= d_brightness_[0] && d_brightness_[0] != 0) 72 | { 73 | need_limit = true; 74 | if (d_channel_ == 0) 75 | { 76 | auto b = d_brightness_[0] + (d_brightness_[1] - d_brightness_[0]) * rand_.rand(); 77 | X1.addNumber(b); 78 | } 79 | else 80 | { 81 | for (int i = 0; i < X1.getNumber(); i++) 82 | { 83 | auto b = d_brightness_[0] + (d_brightness_[1] - d_brightness_[0]) * rand_.rand(); 84 | X1.addNumberCol(b, 1, i); 85 | } 86 | } 87 | } 88 | //对比度 89 | if (d_contrast_[1] >= d_contrast_[0] && d_contrast_[0] != 0) 90 | { 91 | need_limit = true; 92 | if (d_channel_ == 0) 93 | { 94 | auto c = 1 + d_contrast_[0] + (d_contrast_[1] - d_contrast_[0]) * rand_.rand(); 95 | X1.scale(c); 96 | } 97 | else 98 | { 99 | for (int i = 0; i < X1.getNumber(); i++) 100 | { 101 | auto c = 1 + d_contrast_[0] + (d_contrast_[1] - d_contrast_[0]) * rand_.rand(); 102 | X1.scaleCol(c, i); 103 | } 104 | } 105 | } 106 | if (need_limit) 107 | { 108 | X1.sectionLimit(0, 1); 109 | } 110 | } 111 | 112 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparerImage.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "DataPreparer.h" 3 | 4 | namespace cccc 5 | { 6 | 7 | class DLL_EXPORT DataPreparerImage : public DataPreparer 8 | { 9 | protected: 10 | int flip_ = 0; //翻转 11 | int transpose_ = 0; //转置 12 | std::vector d_contrast_; //变换对比度 13 | std::vector d_brightness_; //变换亮度 14 | int d_channel_ = 0; //是否通道分别变换 15 | double d_noise_ = 0; //增加随机噪声 16 | 17 | public: 18 | DataPreparerImage(); 19 | virtual ~DataPreparerImage(); 20 | 21 | void init2() override; 22 | void transOne(Matrix& X1, Matrix& Y1) override; 23 | 24 | private: 25 | Random rand_; 26 | }; 27 | 28 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparerTxt.cpp: -------------------------------------------------------------------------------- 1 | #include "DataPreparerTxt.h" 2 | #include "VectorMath.h" 3 | #include "filefunc.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | DataPreparerTxt::DataPreparerTxt() 9 | { 10 | } 11 | 12 | DataPreparerTxt::~DataPreparerTxt() 13 | { 14 | } 15 | 16 | void DataPreparerTxt::init2() 17 | { 18 | input_ = VectorMath::multiply(dim0_, dim0_.size() - 1); 19 | output_ = VectorMath::multiply(dim1_, dim1_.size() - 1); 20 | std::string filename = option_->getString(section_, "file", "file.txt"); 21 | content_ = filefunc::readFileToString(filename); 22 | } 23 | 24 | void DataPreparerTxt::fillData0() 25 | { 26 | rand_.set_seed(); 27 | 28 | for (int index = 0; index < X.getNumber(); index++) 29 | { 30 | int r = rand_.rand() * (content_.size() / 2 - input_ - output_); 31 | for (int i = 0; i < input_; i++) 32 | { 33 | X.setData(i, index, getContent(r + i)); 34 | } 35 | for (int i = 0; i < output_; i++) 36 | { 37 | Y.setData(i, index, getContent(r + i + input_)); 38 | } 39 | } 40 | } 41 | 42 | float DataPreparerTxt::getContent(int i) 43 | { 44 | auto p = (uint16_t*)(&content_[i * 2]); 45 | return (*p) / 65536.0; 46 | } 47 | 48 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/DataPreparerTxt.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "DataPreparer.h" 3 | #include 4 | 5 | namespace cccc 6 | { 7 | 8 | class DataPreparerTxt : public DataPreparer 9 | { 10 | private: 11 | int input_; 12 | int output_; 13 | std::string content_; 14 | 15 | public: 16 | DataPreparerTxt(); 17 | virtual ~DataPreparerTxt(); 18 | 19 | void init2() override; 20 | void fillData0() override; 21 | 22 | private: 23 | float getContent(int i); 24 | 25 | private: 26 | Random rand_; 27 | }; 28 | 29 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/GpuControl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "TensorDesc.h" 3 | #include "types.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct cudnnContext; 10 | struct miopenHandle; 11 | 12 | namespace cccc 13 | { 14 | class Rocblas; 15 | class Cublas; 16 | 17 | enum ApiType 18 | { 19 | API_UNKNOWN = 0, 20 | API_CUDA, //NVIDIA 21 | API_HIP, //AMD 22 | }; 23 | 24 | //GPU架构 25 | enum CudaArch 26 | { 27 | ARCH_UNKNOWN = 0, 28 | ARCH_FERMI = 2, 29 | ARCH_KEPLER = 3, 30 | ARCH_MAXWELL = 5, 31 | ARCH_PASCAL = 6, 32 | //where is ARCH_VOLTA?? 33 | ARCH_TURING = 7, 34 | ARCH_AMPERE = 8, 35 | }; 36 | 37 | enum memcpyKind 38 | { 39 | HostToHost = 0, /**< Host -> Host */ 40 | HostToDevice = 1, /**< Host -> Device */ 41 | DeviceToHost = 2, /**< Device -> Host */ 42 | DeviceToDevice = 3, /**< Device -> Device */ 43 | Default = 4 /**< Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing */ 44 | }; 45 | 46 | //该类包含一些cuda的基本参数,例如cublas和cudnn的handle 47 | //此类型不能被随机创建,而仅能从已知的对象中选择一个 48 | class DLL_EXPORT GpuControl 49 | { 50 | public: 51 | GpuControl(); 52 | ~GpuControl(); 53 | 54 | GpuControl(GpuControl&) = delete; 55 | GpuControl& operator=(GpuControl&) = delete; 56 | GpuControl(GpuControl&&) = default; 57 | GpuControl& operator=(GpuControl&&) = default; 58 | 59 | private: 60 | //以下静态变量用于记录设备全局信息,相关过程应在主进程中进行 61 | static int device_count_; 62 | static int device_count_c_; 63 | static int device_count_h_; 64 | static std::vector gpu_devices_turn_; 65 | static std::atomic auto_choose_turn_; 66 | 67 | public: 68 | static void checkDevices(); 69 | static int getDeviceCount(); 70 | static GpuControl* getCurrentCuda(); 71 | //static void setCurrentCuda(GpuControl* gpu); 72 | static void setUseCPU(); 73 | 74 | static UnitType getGlobalCudaType(); 75 | static void evaluateDevices(); 76 | static std::vector gpuDevicesTurn(); 77 | 78 | void setAsCurrent(); 79 | int getDeviceID() const { return gpu_id_; } 80 | void getFreeMemory(size_t& free, size_t& total) const; 81 | int init(int dev_id = -1); 82 | ApiType getApiType() { return api_type_; } 83 | void destroy(); 84 | 85 | int malloc(void** p, size_t size); 86 | int free(void* p); 87 | int memcpy(void* dst, const void* src, size_t count, memcpyKind kind); 88 | 89 | private: 90 | static int autoChooseId(); 91 | 92 | private: 93 | bool inited_ = false; 94 | int gpu_id_ = -1; //从0开始,为总计数 95 | ApiType api_type_{ API_UNKNOWN }; 96 | 97 | int api_id_ = -1; //cuda或hip的id,从0开始 98 | 99 | CudaArch micro_arch_{ ARCH_UNKNOWN }; 100 | 101 | public: 102 | Cublas* cublas_ = nullptr; 103 | cudnnContext* cudnn_handle_ = nullptr; 104 | 105 | Rocblas* rocblas_ = nullptr; 106 | miopenHandle* miopen_handle_ = nullptr; 107 | 108 | size_t memory_used_ = 0; 109 | 110 | public: 111 | OtherDesc other_desc_; 112 | template 113 | T getDesc() 114 | { 115 | return other_desc_.getDesc(); 116 | } 117 | //设置激活函数,用于简化代码 118 | static void setActivationDesc(void* activation, int mode, double v); 119 | 120 | public: 121 | ActivePhaseType active_phase_ = ACTIVE_PHASE_TRAIN; 122 | void setActivePhase(ActivePhaseType ap) { active_phase_ = ap; } 123 | static std::string lastCudnnErrorString(); 124 | }; 125 | 126 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Layer.cpp: -------------------------------------------------------------------------------- 1 | #include "Layer.h" 2 | #include "MatrixEx.h" 3 | #include "Random.h" 4 | #include "VectorMath.h" 5 | 6 | namespace cccc 7 | { 8 | 9 | Layer::Layer() 10 | { 11 | } 12 | 13 | Layer::~Layer() 14 | { 15 | } 16 | 17 | //设置名字 18 | void Layer::setName(const std::string& name) 19 | { 20 | layer_name_ = name; 21 | } 22 | 23 | //这个会检查前一层 24 | LayerConnectionType Layer::getConnection2() 25 | { 26 | auto ct = connetion_type_; 27 | if (connetion_type_ == LAYER_CONNECTION_DIRECT) 28 | { 29 | ct = prev_layer_->connetion_type_; 30 | } 31 | return ct; 32 | } 33 | 34 | //必须先设置option和layer_name 35 | void Layer::message() 36 | { 37 | //激活函数相关 38 | auto string_vector_int = [](const std::vector& v) 39 | { 40 | std::string str; 41 | for (auto i : v) 42 | { 43 | str += std::to_string(i) + ", "; 44 | } 45 | if (str.size() >= 2) 46 | { 47 | str.resize(str.size() - 2); 48 | } 49 | return str; 50 | }; 51 | 52 | //输出本层信息 53 | LOG(" name: {}\n", layer_name_); 54 | LOG(" type: {}\n", option_->getStringFromEnum(connetion_type_)); 55 | LOG(" active: {}\n", option_->getStringFromEnum(option_->getEnum(layer_name_, "active", ACTIVE_FUNCTION_NONE))); 56 | //LOG(" solver: {}\n", option_->getStringFromEnum(solver_->getSolverType())); 57 | auto dim = A_->getDim(); 58 | dim.pop_back(); 59 | LOG(" out nodes: {} {}\n", A_->getRow(), dim); 60 | if (W_->getDataSize() > 0) 61 | { 62 | LOG(" weight size: {} {}\n", W_->getDataSize(), W_->getDim()); 63 | } 64 | if (b_->getDataSize() > 0) 65 | { 66 | LOG(" bias size: {}\n", b_->getDataSize()); 67 | } 68 | LOG(" x data size: {} [{}, batch = {}]\n", A_->getDataSize(), A_->getRow(), A_->getNumber()); 69 | //LOG(" have bias: %d\n", need_bias_); 70 | 71 | auto string_layer_names = [](std::vector& ls) 72 | { 73 | std::string str; 74 | for (auto l : ls) 75 | { 76 | str += l->layer_name_ + ", "; 77 | } 78 | if (str.size() >= 2) 79 | { 80 | str.resize(str.size() - 2); 81 | } 82 | return str; 83 | }; 84 | 85 | if (!prev_layers_.empty()) 86 | { 87 | LOG(" prev layer(s): {}\n", string_layer_names(prev_layers_)); 88 | if (connetion_type_ != LAYER_CONNECTION_COMBINE && prev_layers_.size() > 1) 89 | { 90 | LOG("Warning: only {} is effective!", prev_layer_->getName().c_str()); 91 | } 92 | } 93 | if (!next_layers_.empty()) 94 | { 95 | LOG(" next layer(s): {}\n", string_layer_names(next_layers_)); 96 | } 97 | } 98 | 99 | void Layer::makeMatrixOp(std::vector& op_queue) 100 | { 101 | int batch = option_->getInt("train", "batch", 1); 102 | switch (connetion_type_) 103 | { 104 | case LAYER_CONNECTION_FULLCONNECT: 105 | { 106 | MatrixOp op; 107 | auto dim = option_->getVector(layer_name_, "node"); 108 | int out_channel = option_->getInt(layer_name_, "channel", 0); 109 | if (out_channel > 0) 110 | { 111 | dim.push_back(out_channel); 112 | } 113 | W_->resize(VectorMath::multiply(dim), prev_layer_->A_->getRow()); 114 | dim.push_back(prev_layer_->A_->getNumber()); 115 | op.as_mul(W_, prev_layer_->A_, A_, 1, dim); 116 | op_queue.push_back(op); 117 | if (option_->getInt(layer_name_, "need_bias", 1)) 118 | { 119 | MatrixOp op; 120 | b_->resize(A_->getChannel(), 1); 121 | op.as_addBias(A_, b_, A_); 122 | op_queue.push_back(op); 123 | } 124 | break; 125 | } 126 | case LAYER_CONNECTION_CONVOLUTION: 127 | case LAYER_CONNECTION_CORRELATION: 128 | { 129 | MatrixOp op; 130 | int node = option_->getInt(layer_name_, "node", 1); 131 | int out_channel = option_->getInt(layer_name_, "channel", node); 132 | auto prev_dim = prev_layer_->A_->getDim(); 133 | auto prev_dim_window = prev_dim; 134 | int window_dim_size = prev_dim.size() - 2; 135 | auto window = option_->getVector(layer_name_, "window"); 136 | auto stride = option_->getVector(layer_name_, "stride"); 137 | auto padding = option_->getVector(layer_name_, "padding"); 138 | VectorMath::force_resize(window, window_dim_size, 1); 139 | VectorMath::force_resize(stride, window_dim_size, 1); 140 | VectorMath::force_resize(padding, window_dim_size, 0); 141 | auto weight_dim = window; 142 | weight_dim.push_back(prev_layer_->A_->getChannel()); 143 | weight_dim.push_back(out_channel); 144 | W_->resize(weight_dim); 145 | int t = 0; 146 | if (connetion_type_ == LAYER_CONNECTION_CONVOLUTION) 147 | { 148 | t = 0; 149 | } 150 | else if (connetion_type_ == LAYER_CONNECTION_CORRELATION) 151 | { 152 | t = 1; 153 | } 154 | op.as_conv(prev_layer_->A_, W_, A_, stride, padding, t); 155 | op_queue.push_back(op); 156 | if (option_->getInt(layer_name_, "need_bias", 1)) 157 | { 158 | MatrixOp op; 159 | b_->resize(A_->getChannel(), 1); 160 | op.as_addBias(A_, b_, A_); 161 | op_queue.push_back(op); 162 | } 163 | break; 164 | } 165 | case LAYER_CONNECTION_POOLING: 166 | { 167 | MatrixOp op; 168 | auto prev_dim = prev_layer_->A_->getDim(); 169 | auto prev_dim_window = prev_dim; 170 | int window_dim_size = prev_dim.size() - 2; 171 | auto window = option_->getVector(layer_name_, "window"); 172 | auto stride = option_->getVector(layer_name_, "stride"); 173 | auto padding = option_->getVector(layer_name_, "padding"); 174 | VectorMath::force_resize(window, window_dim_size, 1); 175 | VectorMath::force_resize(stride, window_dim_size, -1); 176 | for (int i = 0; i < window_dim_size; i++) 177 | { 178 | if (stride[i] < 0) 179 | { 180 | stride[i] = window[i]; 181 | } 182 | } 183 | VectorMath::force_resize(padding, window_dim_size, 0); 184 | auto reverse_type = POOLING_NOT_REVERSE; 185 | if (option_->getInt(layer_name_, "reverse", 0)) 186 | { 187 | reverse_type = POOLING_REVERSE; 188 | } 189 | auto pool_type = option_->getEnum(layer_name_, "pool_type", POOLING_MAX); 190 | op.as_pool(prev_layer_->A_, A_, pool_type, reverse_type, window, stride, padding); 191 | op_queue.push_back(op); 192 | break; 193 | } 194 | case LAYER_CONNECTION_DIRECT: 195 | { 196 | //layer = new LayerDirect(); 197 | break; 198 | } 199 | case LAYER_CONNECTION_COMBINE: 200 | { 201 | MatrixOp op; 202 | std::vector in; 203 | for (auto l : prev_layers_) 204 | { 205 | in.push_back(l->A_); 206 | } 207 | if (option_->getEnum(layer_name_, "combine_type") == COMBINE_CONCAT) 208 | { 209 | op.as_concat(in, A_); 210 | } 211 | else 212 | { 213 | op.as_add(in, A_); 214 | } 215 | op_queue.push_back(op); 216 | break; 217 | } 218 | case LAYER_CONNECTION_NONE: 219 | { 220 | auto dim = option_->getVector(layer_name_, "data"); 221 | int out_channel = option_->getInt(layer_name_, "channel", 0); 222 | if (out_channel > 0) 223 | { 224 | dim.push_back(out_channel); 225 | } 226 | dim.push_back(batch); 227 | A_->resize(dim); 228 | break; 229 | } 230 | } 231 | auto active = option_->getEnum(layer_name_, "active", ACTIVE_FUNCTION_NONE); 232 | if (active != ACTIVE_FUNCTION_NONE) 233 | { 234 | MatrixOp op; 235 | auto Ap = A_; 236 | A_ = makeMatrixSP(); 237 | float coef = option_->getReal(layer_name_, "coef", 0.2); 238 | op.as_active(Ap, A_, active, {}, { coef }, {}); 239 | op_queue.push_back(op); 240 | } 241 | } 242 | 243 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Layer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "MatrixOp.h" 3 | #include "Option.h" 4 | #include 5 | #include 6 | 7 | namespace cccc 8 | { 9 | 10 | //神经层 11 | //下面凡是有两个函数的,在无后缀函数中有公共部分,在带后缀函数中是各自子类的功能 12 | class Layer 13 | { 14 | public: 15 | Layer(); 16 | virtual ~Layer(); 17 | Layer(const Layer&) = delete; 18 | Layer& operator=(const Layer&) = delete; 19 | 20 | protected: 21 | int id_; 22 | std::string layer_name_; //本层的名字 23 | 24 | Layer* prev_layer_ = nullptr; //仅合并数据层可以有多个前层,其余层若多于一个会报警 25 | std::vector prev_layers_; //除了合并数据层外,该向量仅用于生成网络结构,计算时无用 26 | std::vector next_layers_; //后层可以有多个,它们获取到的是同样的数据 27 | 28 | LayerVisibleType visible_type_ = LAYER_VISIBLE_HIDDEN; 29 | LayerConnectionType connetion_type_ = LAYER_CONNECTION_NONE; //这个字段仅为标记,实际连接方式是虚继承 30 | 31 | Option* option_; 32 | 33 | public: 34 | //前面一些字段的设置 35 | void setID(int id) { id_ = id; } 36 | int getBatchSize(); 37 | void setVisible(LayerVisibleType vt) { visible_type_ = vt; } 38 | LayerConnectionType getConnection() { return connetion_type_; } 39 | LayerConnectionType getConnection2(); 40 | void setConnection(LayerConnectionType ct) { connetion_type_ = ct; } 41 | //void getOutputSize(int& width, int& height, int& channel) { width = out_width_; height = out_height_; channel = out_channel_; } 42 | const std::string& getName() { return layer_name_; } 43 | void setName(const std::string& name); 44 | 45 | void setOption(Option* op) { option_ = op; } 46 | //static void setEpochCount(int ec) { epoch_count_ = ec; } 47 | 48 | public: 49 | void addPrevLayers(Layer* layer) 50 | { 51 | prev_layer_ = layer; 52 | prev_layers_.push_back(layer); 53 | } 54 | void addNextLayers(Layer* layer) { next_layers_.push_back(layer); } 55 | 56 | Layer* getPrevLayer() { return prev_layer_; } 57 | std::vector getPrevLayers() { return prev_layers_; } 58 | std::vector getNextLayers() { return next_layers_; } 59 | void clearConnect() 60 | { 61 | prev_layer_ = nullptr; 62 | prev_layers_.clear(); 63 | next_layers_.clear(); 64 | } 65 | int getNextLayersCount() { return next_layers_.size(); } 66 | Layer* getNextLayer(int i) { return next_layers_[i]; } 67 | 68 | public: 69 | //这几个矩阵形式相同,计算顺序: X, A, ..., dA, dX 70 | //当本层没有激活函数时,A与X指向同一对象,dA与dX指向同一对象 71 | //MatrixSP X_; //X收集上一层的输出 72 | MatrixSP A_ = makeMatrixSP(); //激活函数作用之后就是本层输出A,输入层需要直接设置A 73 | MatrixSP W_ = makeMatrixSP(); 74 | MatrixSP b_ = makeMatrixSP(); 75 | 76 | public: 77 | void message(); 78 | void makeMatrixOp(std::vector& op_queue); 79 | }; 80 | 81 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Log.cpp: -------------------------------------------------------------------------------- 1 | #include "Log.h" 2 | 3 | #ifdef _WIN32 4 | #define NOMINMAX 5 | #include 6 | #include 7 | #endif 8 | 9 | namespace cccc 10 | { 11 | void showMessageBox(const std::string& str) 12 | { 13 | #ifdef _WIN32 14 | MessageBoxA(NULL, str.c_str(), "cccc", MB_ICONERROR); 15 | #endif 16 | } 17 | 18 | void fatalError(const std::string& str) 19 | { 20 | LOG_ERR("{}", str); 21 | showMessageBox(str); 22 | exit(0); 23 | } 24 | } //namespace cccc 25 | -------------------------------------------------------------------------------- /cccc/Log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "fmt1.h" 3 | #include 4 | 5 | namespace cccc 6 | { 7 | static inline int errorCount; 8 | 9 | template 10 | void LOG(Args&&... args) 11 | { 12 | std::cout << fmt1::format(std::forward(args)...); 13 | fflush(stdout); 14 | } 15 | 16 | template 17 | void LOG_ERR(Args&&... args) 18 | { 19 | if (errorCount++ >= 2000) 20 | { 21 | std::cerr << "Too many errors, exit program.\n"; 22 | fflush(stderr); 23 | exit(0); 24 | } 25 | std::cerr << fmt1::format(std::forward(args)...); 26 | fflush(stderr); 27 | } 28 | 29 | void showMessageBox(const std::string& str); 30 | void fatalError(const std::string& str); 31 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/MainProcess.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "DataPreparerFactory.h" 3 | #include "Net.h" 4 | #include "Timer.h" 5 | #include 6 | #include 7 | 8 | namespace cccc 9 | { 10 | 11 | //总调度 12 | class DLL_EXPORT MainProcess 13 | { 14 | public: 15 | MainProcess(); 16 | virtual ~MainProcess(); 17 | MainProcess(const MainProcess&) = delete; 18 | MainProcess& operator=(const MainProcess&) = delete; 19 | 20 | public: 21 | int brain_id_ = 0; 22 | 23 | protected: 24 | int batch_ = 0; 25 | std::function running_callback_ = nullptr; //回调函数 26 | Option option_; 27 | Timer timer_total_; 28 | int MP_count_ = 1; 29 | std::vector> nets_; 30 | std::vector gpus_; 31 | WorkModeType work_mode_ = WORK_MODE_NORMAL; 32 | 33 | public: 34 | void setCallback(std::function f) { running_callback_ = f; } 35 | 36 | Option* getOption() { return &option_; } 37 | 38 | protected: 39 | //epoch和iter的统计 40 | int epoch_count_ = 0; 41 | int iter_count_ = 0; 42 | 43 | public: 44 | int getIterCount() { return iter_count_; } 45 | 46 | void setIterCount(int ic) { iter_count_ = ic; } 47 | 48 | public: 49 | DataPreparerFactory::UniquePtr data_preparer_; //训练集准备器 50 | 51 | DataPreparerFactory::UniquePtr& getDataPreparer() { return data_preparer_; } 52 | 53 | DataPreparerFactory::UniquePtr data_preparer2_; //测试集准备器 54 | 55 | public: 56 | //此处需注意顺序 57 | //nets初始化之后如不修改出入的数据结构,则可以再次初始化,即在半途更换网络结构,求解器等操作可以实现 58 | int init(const std::string& ini = ""); 59 | int loadIni(const std::string& ini = ""); 60 | int testGPUDevice(); 61 | int initNets(); 62 | int initData(); 63 | 64 | virtual void initDataPreparer(); 65 | void initTrainData(); 66 | void initTestData(); 67 | std::string makeSaveName(const std::string& save_format, int epoch_count); 68 | std::string makeSaveSign(); 69 | 70 | public: 71 | //以下构成一组调度范例 72 | void train(std::vector>& nets, DataPreparer* data_preparer, int total_epochs); 73 | //void train(std::vector>& net, DataPreparer* data_preparer, int epochs) { train({ net }, data_preparer, epochs); } 74 | 75 | private: 76 | //在主线程进行数据准备,副线程进行训练和测试 77 | struct TrainInfo 78 | { 79 | std::atomic data_prepared; //0 未准备好,1 cpu准备完毕, 2 gpu准备完毕 80 | std::atomic data_distributed; //复制完数据的线程数 81 | std::atomic stop; //结束信息 82 | std::atomic trained; //已经训练完成的网络个数 83 | std::atomic dweight_uncollected; //梯度同步完毕 84 | 85 | //std::atomic need_reset; //需要重置求解器的信号 86 | 87 | TrainInfo() { reset(); } 88 | 89 | void reset() 90 | { 91 | data_prepared = 0; 92 | data_distributed = 0; 93 | stop = 0; 94 | trained = 0; 95 | dweight_uncollected = 0; 96 | } 97 | }; 98 | 99 | void trainOneNet(std::vector>& nets, int net_id, TrainInfo& ti, int total_epochs); 100 | 101 | bool checkTestEffective(std::vector& resultv_max, std::vector& resultv); 102 | bool checkTrainHealth(const std::vector& resultvp, const std::vector& resultv, float l1p, float l2p, float l1, float l2, double increase_limited, int effective_epoch_count); 103 | 104 | public: 105 | //获取网络的时候,应注意当前的gpu 106 | Net* getNet(int i = 0) const 107 | { 108 | if (nets_.size() > i) 109 | { 110 | return nets_[i].get(); 111 | } 112 | return nullptr; 113 | } 114 | 115 | const std::vector>& getNets() { return nets_; } 116 | 117 | public: 118 | void run(int train_epochs = -1); 119 | void testData(Net* net, int force_output = 0, int test_type = 0); 120 | void extraTest(Net* net, const std::string& section, int force_output = 0, int test_type = 0); 121 | 122 | public: 123 | int testExternalData(void* x, void* y, void* a, int n, int attack_times = 0, double* error = nullptr); 124 | }; 125 | 126 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/MatrixData.cpp: -------------------------------------------------------------------------------- 1 | #include "MatrixData.h" 2 | #include "Log.h" 3 | #include "Matrix.h" 4 | #include "gpu_lib.h" 5 | 6 | namespace cccc 7 | { 8 | //此处有问题,目前来看只能在同设备中resize 9 | 10 | void* MatrixData::resize(int64_t size, DataType data_type, bool reserve_data, bool force) 11 | { 12 | data_type_ = data_type; 13 | if (size <= occupy_data_size_ && !force) 14 | { 15 | return data_; 16 | } 17 | float* new_data = nullptr; 18 | auto device_type = UnitType::GPU; 19 | if (!reserve_data) //数据不需保留可以先行释放,可以节省显存的使用,否则峰值占用是新旧尺寸之和 20 | { 21 | release(); 22 | } 23 | if (gpu_) 24 | { 25 | gpu_->setAsCurrent(); 26 | if (gpu_->malloc((void**)&new_data, size * getDataTypeSize(data_type_)) == 0) 27 | { 28 | gpu_->memory_used_ += size * getDataTypeSize(data_type_); 29 | //LOG("Device {} MALLOC {:g}, memory used {:g}!\n", cuda_->getDeviceID(), 1.0 * size * sizeof(real), 1.0 * cuda_->memory_used_); 30 | //if (size * sizeof(real) > 3e8) 31 | //{ 32 | // LOG(2, "Very big!\n"); 33 | //} 34 | } 35 | else 36 | { 37 | LOG("Device {} FAIL TO MALLOC {:g}!\n", gpu_->getDeviceID(), 1.0 * size * getDataTypeSize(data_type_)); 38 | } 39 | } 40 | else 41 | { 42 | device_type = UnitType::CPU; 43 | new_data = new float[size]; 44 | } 45 | if (reserve_data) 46 | { 47 | copy(getApiType(), data_, getApiType(), new_data, std::min(size, occupy_data_size_), data_type_); 48 | release(); 49 | } 50 | occupy_data_size_ = size; 51 | return data_ = new_data; 52 | } 53 | 54 | void MatrixData::release() 55 | { 56 | if (data_ == nullptr) 57 | { 58 | occupy_data_size_ = 0; 59 | return; 60 | } 61 | if (gpu_) 62 | { 63 | auto current_gpu = GpuControl::getCurrentCuda(); 64 | if (current_gpu != gpu_) 65 | { 66 | gpu_->setAsCurrent(); 67 | } 68 | auto status = gpu_->free(data_); 69 | if (status == 0) 70 | { 71 | gpu_->memory_used_ -= occupy_data_size_ * getDataTypeSize(data_type_); 72 | //LOG("Device {} FREE {:g}, memory used {:g}!\n", gpu_->getDeviceID(), 1.0 * occupy_data_size_ * getDataTypeSize(data_type_), 1.0 * gpu_->memory_used_); 73 | } 74 | else 75 | { 76 | //LOG("Device {} FAIL TO FREE {:g}!\n", gpu_->getDeviceID(), 1.0 * occupy_data_size_ * getDataTypeSize(data_type_)); 77 | } 78 | if (current_gpu != gpu_) 79 | { 80 | current_gpu->setAsCurrent(); 81 | } 82 | } 83 | else 84 | { 85 | delete[] data_; 86 | } 87 | occupy_data_size_ = 0; 88 | data_ = nullptr; 89 | } 90 | 91 | int64_t MatrixData::copy(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size, DataType dt) 92 | { 93 | return copyByByte(dt_src, src, dt_dst, dst, size * getDataTypeSize(dt)); 94 | } 95 | 96 | int64_t MatrixData::copyByByte(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size_in_byte) 97 | { 98 | if (src == nullptr || dst == nullptr || src == dst) 99 | { 100 | return 0; 101 | } 102 | int state = 0; 103 | //cuda 104 | if (dt_dst == API_CUDA && dt_src == API_CUDA) 105 | { 106 | state = cudaMemcpy(dst, src, size_in_byte, cudaMemcpyDeviceToDevice); 107 | } 108 | else if (dt_dst == API_CUDA && dt_src == API_UNKNOWN) 109 | { 110 | state = cudaMemcpy(dst, src, size_in_byte, cudaMemcpyHostToDevice); 111 | } 112 | else if (dt_dst == API_UNKNOWN && dt_src == API_CUDA) 113 | { 114 | state = cudaMemcpy(dst, src, size_in_byte, cudaMemcpyDeviceToHost); 115 | } 116 | //hip 117 | else if (dt_dst == API_HIP && dt_src == API_HIP) 118 | { 119 | state = hipMemcpy(dst, src, size_in_byte, hipMemcpyDeviceToDevice); 120 | } 121 | else if (dt_dst == API_HIP && dt_src == API_UNKNOWN) 122 | { 123 | state = hipMemcpy(dst, src, size_in_byte, hipMemcpyHostToDevice); 124 | } 125 | else if (dt_dst == API_UNKNOWN && dt_src == API_HIP) 126 | { 127 | state = hipMemcpy(dst, src, size_in_byte, hipMemcpyDeviceToHost); 128 | } 129 | //cuda&hip 130 | else if (dt_dst == API_CUDA && dt_src == API_HIP) 131 | { 132 | auto temp = new char[size_in_byte]; 133 | state += hipMemcpy(temp, src, size_in_byte, hipMemcpyDeviceToHost); 134 | state += cudaMemcpy(dst, temp, size_in_byte, cudaMemcpyHostToDevice); 135 | delete[] temp; 136 | } 137 | else if (dt_dst == API_HIP && dt_src == API_CUDA) 138 | { 139 | auto temp = new char[size_in_byte]; 140 | state += cudaMemcpy(temp, src, size_in_byte, cudaMemcpyDeviceToHost); 141 | state += hipMemcpy(dst, temp, size_in_byte, hipMemcpyHostToDevice); 142 | delete[] temp; 143 | } 144 | //other 145 | else 146 | { 147 | memcpy(dst, src, size_in_byte); 148 | } 149 | if (state != 0) 150 | { 151 | LOG_ERR("Memcpy error: {}, size in byte {:g}\n", state, 1.0 * size_in_byte); 152 | } 153 | return size_in_byte; 154 | } 155 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/MatrixData.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "GpuControl.h" 3 | #include 4 | 5 | namespace cccc 6 | { 7 | 8 | //因存在多设备可能,数据须同时保存其设备指针,需以共享指针使用此类 9 | struct MatrixData 10 | { 11 | GpuControl* gpu_ = nullptr; //为空表示在cpu中,也即默认情况 12 | void* data_ = nullptr; 13 | int64_t occupy_data_size_ = 0; //实际占用的数据长度,当重设置尺寸不大于此值的时候不会重新分配内存 14 | DataType data_type_ = DataType::FLOAT; 15 | 16 | public: 17 | MatrixData() = default; 18 | ~MatrixData() { release(); } 19 | MatrixData(const MatrixData&) = delete; 20 | MatrixData& operator=(const MatrixData) = delete; 21 | void setCuda(GpuControl* gpu) { gpu_ = gpu; } 22 | void setCudaAsCurrent() { gpu_ = GpuControl::getCurrentCuda(); } 23 | void setDataType(DataType dt) { data_type_ = dt; } 24 | DataType getDataType() const { return data_type_; } 25 | size_t getDataTypeSize() const { return getDataTypeSize(data_type_); } 26 | size_t size() const { return occupy_data_size_; } 27 | size_t sizeInByte() const { return occupy_data_size_ * getDataTypeSize(); } 28 | void* resize(int64_t size, DataType data_type, bool reserve_data = true, bool force = false); //resize前应setCuda 29 | std::shared_ptr make_shared_data() { return std::make_shared(data_); } 30 | void release(); 31 | ApiType getApiType() const 32 | { 33 | if (gpu_) { return gpu_->getApiType(); } 34 | return API_UNKNOWN; 35 | } 36 | void* getDataPtr(int i) const { return (char*)data_ + i * getDataTypeSize(); } 37 | float getData(int i) const 38 | { 39 | auto p = getDataPtr(i); 40 | switch (getDataType()) 41 | { 42 | case DataType::FLOAT: 43 | return *(float*)p; 44 | case DataType::DOUBLE: 45 | return *(double*)p; 46 | case DataType::HALF: 47 | return *(half*)p; 48 | default: 49 | return 0; 50 | } 51 | } 52 | template 53 | static void setData(void* p, DataType data_type, T v) 54 | { 55 | switch (data_type) 56 | { 57 | case DataType::FLOAT: 58 | *(float*)p = v; 59 | break; 60 | case DataType::DOUBLE: 61 | *(double*)p = v; 62 | break; 63 | case DataType::HALF: 64 | *(half*)p = v; 65 | break; 66 | } 67 | } 68 | template 69 | void setData(int i, T v) { setData(getDataPtr(i), getDataType(), v); } 70 | void setData(int i, void* p, DataType data_type) //data_type为p的数据类型 71 | { 72 | switch (data_type) 73 | { 74 | case DataType::FLOAT: 75 | setData(i, *(float*)p); 76 | break; 77 | case DataType::DOUBLE: 78 | setData(i, *(double*)p); 79 | break; 80 | case DataType::HALF: 81 | setData(i, *(half*)p); 82 | break; 83 | } 84 | } 85 | 86 | public: 87 | static int64_t copy(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size, DataType dt); 88 | static int64_t copyByByte(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size_in_byte); 89 | static size_t getDataTypeSize(DataType dt) 90 | { 91 | switch (dt) 92 | { 93 | case DataType::FLOAT: 94 | return sizeof(float); 95 | case DataType::DOUBLE: 96 | return sizeof(double); 97 | case DataType::HALF: 98 | return sizeof(half); 99 | default: 100 | return 0; 101 | } 102 | } 103 | }; 104 | 105 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/MatrixEx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Matrix.h" 3 | 4 | namespace cccc 5 | { 6 | 7 | //该类中均不是矩阵基本计算,全部为静态函数 8 | class DLL_EXPORT MatrixEx : public Matrix 9 | { 10 | private: 11 | MatrixEx() = delete; 12 | 13 | public: 14 | //以下函数不属于矩阵基本运算 15 | 16 | //按channel加偏置 17 | static void addBias(const Matrix& X, const Matrix& bias, Matrix& Y, float a = 1, float b = 1); 18 | static void addBiasBackward(const Matrix& X, Matrix& bias, const Matrix& Y, float a = 1, float b = 1); 19 | 20 | // the function is private for concat the data and append the data 21 | static void concatByChannel(const std::vector& X_vector, Matrix& Y); 22 | static void concatByChannelBackward(std::vector& X_vector, const Matrix& Y); 23 | static void splitByChannel(const Matrix& X, std::vector& Y_vector); 24 | 25 | static void activeBufferInit(const Matrix& X, ActiveFunctionType af, std::vector& int_vector, std::vector& real_vector, std::vector& matrix_vector); 26 | 27 | //激活的实际计算 28 | //激活和反向激活中,输入和输出矩阵都是同维度 29 | //请注意反向的情况,常数a和r的含义与正向的对应关系不同 30 | static void activeForward(const Matrix& X, Matrix& Y, ActiveFunctionType af, 31 | std::vector& int_vector, std::vector& real_vector, std::vector& matrix_vector, float a = 1, float r = 0); 32 | static void activeBackward(Matrix& X, const Matrix& Y, ActiveFunctionType af, 33 | std::vector& int_vector, std::vector& real_vector, std::vector& matrix_vector, float a = 1, float r = 0); 34 | 35 | static void activeForwardSimple(const Matrix& X, Matrix& Y, ActiveFunctionType af, float a = 1, float r = 0); 36 | static void activeBackwardSimple(Matrix& X, const Matrix& Y, ActiveFunctionType af, float a = 1, float r = 0); 37 | 38 | static void poolingForward(const Matrix& X, Matrix& Y, PoolingType pooling_type, PoolingReverseType reverse_type, 39 | const std::vector& window, const std::vector& stride, const std::vector& padding, 40 | float a = 1, float r = 0, Matrix* workspace = nullptr); 41 | static void poolingBackward(Matrix& X, const Matrix& Y, PoolingType pooling_type, PoolingReverseType reverse_type, 42 | const std::vector& window, const std::vector& stride, const std::vector& padding, 43 | float a = 1, float r = 0, Matrix* workspace = nullptr); 44 | 45 | struct ConvMethod 46 | { 47 | int algo = -1, math_type = -1, group_number = 0; 48 | }; 49 | 50 | static const int conv_method_count = 8; 51 | static void convolutionForward(const Matrix& X, const Matrix& W, Matrix& Y, 52 | const std::vector& stride, const std::vector& padding, 53 | float a = 1, float r = 0, ConvMethod* method = nullptr, Matrix* workspace = nullptr); 54 | static void convolutionBackward(Matrix& X, Matrix& W, const Matrix& Y, 55 | const std::vector& stride, const std::vector& padding, 56 | float a = 1, float rx = 0, float aw = 1, float rw = 0, 57 | ConvMethod* method_dx = nullptr, ConvMethod* method_dw = nullptr, Matrix* workspace_dx = nullptr, Matrix* workspace_dw = nullptr); 58 | static void convolutionBackwardDX(Matrix& X, const Matrix& W, const Matrix& Y, 59 | const std::vector& stride, const std::vector& padding, 60 | float a = 1, float r = 0, ConvMethod* method_dx = nullptr, Matrix* workspace_dx = nullptr); 61 | static void convolutionBackwardDW(const Matrix& X, Matrix& W, const Matrix& Y, 62 | const std::vector& stride, const std::vector& padding, 63 | float a = 1, float r = 0, ConvMethod* method_dw = nullptr, Matrix* workspace_dw = nullptr); 64 | 65 | static void dropoutForward(const Matrix& X, Matrix& Y, ActivePhaseType work_phase, float v, int seed, Matrix& rg_stat, Matrix& reverse_space); 66 | static void dropoutBackward(Matrix& X, const Matrix& Y, float v, int seed, Matrix& rg_stat, Matrix& reverse_space); 67 | 68 | //GPU only ---------------------------------------------------------------------------------------------------- 69 | 70 | //以下带有可以训练调节的参数 71 | static void batchNormalizationForward(const Matrix& X, Matrix& Y, ActivePhaseType work_phase, BatchNormalizationType bn_type, 72 | float& exp_aver_factor, float epsilon, Matrix& scale, Matrix& bias, Matrix& result_running_mean, Matrix& result_running_variance, 73 | Matrix& result_save_mean, Matrix& result_save_inv_variance); 74 | static void batchNormalizationBackward(Matrix& X, const Matrix& Y, ActivePhaseType work_phase, BatchNormalizationType bn_type, 75 | float epsilon, float rate, Matrix& scale, Matrix& bias, Matrix& saved_mean, Matrix& saved_inv_variance, Matrix& result_dscale, Matrix& result_dbias); 76 | 77 | //GPU only ---------------------------------------------------------------------------------------------------- 78 | 79 | //此处计算出的ada_d才是实际的更新梯度,之后应将其加在参数上 80 | static void adaDeltaUpdate(Matrix& mean_d2, Matrix& mean_ada_d2, Matrix& d, Matrix& ada_d, float rou, float epsilon); 81 | static void adamUpdate(Matrix& mean_d, Matrix& mean_d2, Matrix& d, Matrix& ada_d, float beta1, float beta2, float epsilon, float t); 82 | static void adaRMSPropUpdate(Matrix& mean_d2, Matrix& d, Matrix& ada_d, float rou, float epsilon); 83 | static void sparse(Matrix& rou_hat, Matrix& R, float rou, float beta); 84 | 85 | static void fill(Matrix& m, RandomFillType random_type, int in, int out); 86 | 87 | static void sin(const Matrix& X, Matrix& Y, float a = 1); 88 | static void cos(const Matrix& X, Matrix& Y, float a = 1); 89 | static void zigzag(const Matrix& X, Matrix& Y); 90 | static void zigzagb(Matrix& X, const Matrix& Y); 91 | 92 | static void step(const Matrix& X, Matrix& Y); 93 | 94 | static void leaky_relu(const Matrix& X, Matrix& Y, float l, float a = 1, float b = 0); 95 | static void leaky_relub(Matrix& X, const Matrix& Y, float l, float a = 1, float b = 0); 96 | 97 | static void correlationForward(const Matrix& X, const Matrix& W, Matrix& Y, std::vector& methods, std::vector& workspaces, 98 | const std::vector& stride, const std::vector& padding, float a = 1, float r = 0); 99 | static void correlationBackward(Matrix& X, Matrix& W, const Matrix& Y, std::vector& methods, std::vector& workspaces, 100 | const std::vector& stride, const std::vector& padding, float a = 1, float rx = 0, float rw = 0); 101 | static void matrix_max(const Matrix& X1, const Matrix& X2, Matrix& Y); 102 | static void matrix_maxb(Matrix& X1, Matrix& X2, const Matrix& Y, float a1, float a2, float r); 103 | }; 104 | 105 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/MatrixOp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "Log.h" 5 | #include "Matrix.h" 6 | 7 | namespace cccc 8 | { 9 | 10 | //矩阵操作,规定网络前向只准使用此文件中的计算 11 | 12 | enum class MatrixOpType 13 | { 14 | NONE, 15 | ADD, 16 | MUL, 17 | ELE_MUL, 18 | ADD_BIAS, 19 | CONCAT, 20 | ACTIVE, 21 | POOL, 22 | CONV, 23 | CORR, 24 | RESHAPE, 25 | MAX, 26 | LOSS, 27 | L2, 28 | }; 29 | 30 | struct Any; 31 | template 32 | concept notParaAny = !std::is_same_v, Any>; 33 | 34 | struct Any : std::any 35 | { 36 | template 37 | const T& to() const { return std::any_cast(*this); } 38 | 39 | template 40 | T& to() { return std::any_cast(*this); } 41 | 42 | template 43 | Any(T&& t) : 44 | std::any(std::forward(t)) 45 | { 46 | } 47 | }; 48 | 49 | using VectorAny = std::vector; 50 | 51 | class DLL_EXPORT MatrixOp 52 | { 53 | private: 54 | MatrixOpType type_ = MatrixOpType::NONE; 55 | std::vector in_; //输入数据 56 | std::vector wb_; //参数,通常是权重和偏置 57 | std::vector out_; //输出数据 58 | //以下参数一般外界不可见 59 | //常用的类型 60 | std::vector a_, b_; 61 | std::vector window_, stride_, padding_; 62 | std::vector workspace_; 63 | //未知或多变的类型 64 | VectorAny anys_; 65 | 66 | //float dw_scale_ = 1; 67 | 68 | public: 69 | MatrixOp() = default; 70 | 71 | void set(MatrixOpType t, const std::vector& m_in, const std::vector& m_wb, const std::vector& m_out, std::vector&& a = {}, std::vector&& b = {}, VectorAny&& pv = {}, std::vector&& workspace = {}, 72 | std::vector&& window = {}, std::vector&& stride = {}, std::vector&& padding = {}) 73 | { 74 | type_ = t; 75 | in_ = m_in; 76 | wb_ = m_wb; 77 | out_ = m_out; 78 | 79 | a_ = a; 80 | b_ = b; 81 | a_.resize(in_.size(), 1); 82 | b_.resize(out_.size(), 0); 83 | 84 | anys_ = pv; 85 | 86 | window_ = window; 87 | stride_ = stride; 88 | padding_ = padding; 89 | workspace_ = workspace; 90 | 91 | if (out_.size() > 0 && out_[0]->getDataSize() == 0) 92 | { 93 | LOG_ERR("Error: output is empty!\n"); 94 | } 95 | } 96 | 97 | static void forward(std::vector& op_queue); 98 | static void backward(std::vector& op_queue, std::vector& loss, bool clear_d); 99 | void forwardData(); 100 | void backwardDataWeight(); 101 | void backwardLoss(); 102 | 103 | static std::string ir(const std::vector& op_queue); 104 | std::string print() const; 105 | 106 | MatrixOpType getType() { return type_; } 107 | 108 | std::vector& getMatrixIn() { return in_; } 109 | 110 | std::vector& getMatrixWb() { return wb_; } 111 | 112 | std::vector& getMatrixOut() { return out_; } 113 | 114 | ActiveFunctionType getActiveType() const; 115 | int setActiveType(ActiveFunctionType af); 116 | 117 | static void simpleQueue(std::vector& op_queue, Matrix& X, Matrix& A); //仅保留计算图中与X和A有关联的部分 118 | 119 | public: 120 | static void getDefaultStridePadding(MatrixOpType type, const std::vector& dim, std::vector& stride, std::vector& padding); 121 | 122 | void setNeedReverse(bool r) 123 | { 124 | for (auto& m : in_) 125 | { 126 | m->setNeedBack(r); 127 | } 128 | for (auto& m : wb_) 129 | { 130 | m->setNeedBack(r); 131 | } 132 | } 133 | 134 | //void setDWScale(float s) { dw_scale_ = s; } 135 | 136 | public: 137 | //下面这些函数会设置这个op的参数,并自动计算Y的尺寸返回 138 | void as_scale(const MatrixSP& X, const MatrixSP& Y, float r); 139 | void as_mul(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y, float a = 1, std::vector dim = {}); 140 | void as_elementMul(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y, float a = 1); 141 | void as_add(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y, float a = 1, float b = 1); 142 | void as_add(const std::vector& X_vector, const MatrixSP& Y); 143 | void as_addBias(const MatrixSP& X, const MatrixSP& bias, const MatrixSP& Y, float a = 1, float b = 1); 144 | void as_concat(const std::vector& X_vector, const MatrixSP& Y); 145 | void as_active(const MatrixSP& X, const MatrixSP& Y, ActiveFunctionType af); 146 | void as_active(const MatrixSP& X, const MatrixSP& Y, ActiveFunctionType af, std::vector&& int_vector, std::vector&& real_vector, std::vector&& matrix_vector); 147 | void as_pool(const MatrixSP& X, const MatrixSP& Y, PoolingType pooling_type, PoolingReverseType reverse_type, std::vector window, std::vector stride, std::vector padding, float a = 1); 148 | void as_conv(const MatrixSP& X, const MatrixSP& W, const MatrixSP& Y, std::vector stride, std::vector padding, int conv_algo, float a = 1); 149 | void as_reshape(const MatrixSP& X, const MatrixSP& Y, std::vector& dim); 150 | void as_max(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y); 151 | 152 | //以下专为处理损失函数 153 | private: 154 | double value_ = 0; 155 | double scale_ = 1; 156 | 157 | friend std::vector operator+(const std::vector& A, const std::vector& B); 158 | friend std::vector operator*(const std::vector& A, double v); 159 | friend std::vector operator*(double v, const std::vector& A); 160 | friend std::vector crossEntropy(MatrixSP& A, MatrixSP& Y); 161 | friend std::vector L2(MatrixSP& A); 162 | 163 | public: 164 | double calc(const MatrixOp& op) { return op.value_ * op.scale_; } 165 | 166 | double calc(const std::vector& ops) 167 | { 168 | double sum = 0; 169 | for (auto& op : ops) 170 | { 171 | sum += calc(op); 172 | } 173 | return sum; 174 | } 175 | }; 176 | 177 | //基础运算结束 178 | 179 | //以下为处理损失函数 180 | std::vector operator+(const std::vector& A, const std::vector& B); 181 | std::vector& operator+=(std::vector& A, const std::vector& B); 182 | std::vector operator*(const std::vector& A, double v); 183 | std::vector operator*(double v, const std::vector& A); 184 | 185 | std::vector crossEntropy(const MatrixSP& A, const MatrixSP& Y); 186 | std::vector L2(const MatrixSP& A); 187 | std::vector L2(const std::vector& v); 188 | 189 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Net.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "MatrixOp.h" 3 | #include "Solver.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | class DLL_EXPORT Net 9 | { 10 | public: 11 | Net(); 12 | virtual ~Net(); 13 | Net(const Net&) = delete; 14 | Net& operator=(const Net&) = delete; 15 | 16 | protected: 17 | Option* option_; 18 | 19 | Matrix all_weights_; 20 | 21 | Solver solver_; 22 | 23 | std::vector op_queue_; 24 | std::vector loss_; 25 | 26 | std::vector weights_; 27 | std::vector solvers_for_weight_; 28 | 29 | MatrixSP X_, A_; 30 | MatrixSP Y_ = makeMatrixSP(); 31 | MatrixSP loss_weight_ = makeMatrixSP(); //该变量用以强调或忽视某些样本 32 | 33 | GpuControl* gpu_; 34 | 35 | int seperate_update_weight_ = 0; 36 | 37 | public: 38 | int init(); 39 | virtual int init2() = 0; 40 | 41 | void setGpu(GpuControl* gpu) { gpu_ = gpu; } 42 | 43 | GpuControl* getGpu() { return gpu_; } 44 | 45 | void setDeviceSelf() { gpu_->setAsCurrent(); } 46 | 47 | Matrix& getAllWeights() { return all_weights_; } 48 | 49 | void setOption(Option* op) { option_ = op; } 50 | 51 | int getBatch() { return getX().getNumber(); } 52 | 53 | public: 54 | Matrix& getX() { return *X_; } 55 | 56 | Matrix& getY() { return *Y_; } 57 | 58 | Matrix& getA() { return *A_; } 59 | 60 | Matrix& getLossWeight() { return *loss_weight_; } 61 | 62 | void initLossWeight() { loss_weight_->resize(Y_->getDim()); } 63 | 64 | public: 65 | void active(Matrix* X, Matrix* Y, Matrix* A, bool back, float* error); 66 | 67 | void updateWeight(); 68 | 69 | virtual int saveWeight(const std::string& filename, const std::string& sign = ""); 70 | int loadWeight(const std::string& str, int load_mode = 0); 71 | 72 | int weightDataSize() const; 73 | float weightSumAbs() const; 74 | float weightNorm2() const; 75 | void calNorm(int& n, float& l1, float& l2) const; 76 | void outputNorm() const; 77 | 78 | private: 79 | int resetBatchSize(int n); 80 | std::vector getTestGroup(); 81 | 82 | public: 83 | int test(Matrix* X, Matrix* Y, Matrix* A, const std::string& info = "", 84 | int output_group = 0, int test_type = 0, int attack_times = 0, 85 | double* result_ptr = nullptr, std::vector>* resultv_ptr = nullptr); 86 | 87 | protected: 88 | void combineWeights(std::vector& weights, Matrix& result); 89 | void initWeights(); 90 | std::string ir(); 91 | 92 | public: 93 | void attack(Matrix* X, Matrix* Y); 94 | void groupAttack(Matrix* X, Matrix* Y, int attack_times); 95 | 96 | public: 97 | Solver& getSolver() { return solver_; } 98 | 99 | public: 100 | virtual void doSomeThing() {} 101 | 102 | //Net* clone(int clone_data = 0); 103 | }; 104 | 105 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/NetCifa.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Cifa.h" 3 | #include "MatrixOp.h" 4 | #include "Net.h" 5 | 6 | namespace cccc 7 | { 8 | 9 | class DLL_EXPORT NetCifa : public Net 10 | { 11 | public: 12 | NetCifa(); 13 | ~NetCifa(); 14 | 15 | private: 16 | cifa::Cifa cifa_; 17 | 18 | using Loss = std::vector; 19 | using MatrixGroup = std::vector; 20 | static std::vector getVector(cifa::ObjectVector& v, int index); 21 | static std::vector getIntVector(cifa::ObjectVector& v, int index); 22 | static std::vector getRealVector(cifa::ObjectVector& v, int index); 23 | 24 | std::string message_; 25 | 26 | public: 27 | virtual int init2() override; 28 | 29 | int runScript(const std::string& script); 30 | int registerFunctions(); 31 | 32 | private: 33 | void setXA(const MatrixSP& X, const MatrixSP& A) 34 | { 35 | X_ = X; 36 | A_ = A; 37 | } 38 | }; 39 | 40 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/NetLayer.cpp: -------------------------------------------------------------------------------- 1 | #include "NetLayer.h" 2 | #include "VectorMath.h" 3 | #include "strfunc.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | NetLayer::NetLayer() 9 | { 10 | } 11 | 12 | NetLayer::~NetLayer() 13 | { 14 | } 15 | 16 | //注意:输入和输出层的名字可以自定义,因此一个ini中可以写多个网络 17 | int NetLayer::init2() 18 | { 19 | //batch在一开始会用来设置网络的初始大小 20 | int state = createAndConnectLayers(); 21 | if (state) 22 | { 23 | LOG("Net structure is wrong!\n"); 24 | return 1; 25 | } 26 | Y_->resize(A_->getDim()); 27 | return 0; 28 | } 29 | 30 | int NetLayer::createAndConnectLayers() 31 | { 32 | Layer *layer_in = nullptr, *layer_out = nullptr; 33 | std::string layer_in_name = "layer_in"; 34 | std::string layer_out_name = "layer_out"; 35 | std::vector all_layer_vector; //全部查找到的层,包含一些多余的 36 | std::vector layer_vector; //此向量的顺序即计算顺序 37 | 38 | //lambda函数:计算上层和下层 39 | auto connect_layers = [this]() 40 | { 41 | for (auto& name_layer : all_layer_map_) 42 | { 43 | auto& l = name_layer.second; 44 | auto nexts = strfunc::splitString(option_->getString(l->getName(), "next"), ","); 45 | for (auto& next : nexts) 46 | { 47 | strfunc::replaceAllSubStringRef(next, " ", ""); 48 | if (next != "" && all_layer_map_.count(next) > 0) 49 | { 50 | l->addNextLayers(all_layer_map_[next].get()); 51 | all_layer_map_[next]->addPrevLayers(l.get()); 52 | } 53 | } 54 | } 55 | }; 56 | //lambda函数:层是否已经在向量中 57 | auto contains = [&](std::vector& v, Layer* l) -> bool 58 | { 59 | return std::find(v.begin(), v.end(), l) != v.end(); 60 | }; 61 | //lambda函数:递归将层压入向量 62 | //最后一个参数为假,仅计算是否存在连接,为真则是严格计算传导顺序 63 | std::function&, bool)> push_cal_stack = [&](Layer* layer, int direct, std::vector& stack, bool turn) 64 | { 65 | //层连接不能回环 66 | if (layer == nullptr || contains(stack, layer)) 67 | { 68 | return; 69 | } 70 | std::vector connect0, connect1; 71 | connect1 = layer->getNextLayers(); 72 | connect0 = layer->getPrevLayers(); 73 | 74 | if (direct < 0) 75 | { 76 | std::swap(connect0, connect1); 77 | } 78 | //前面的层都被压入,才压入本层 79 | bool contain_all0 = true; 80 | for (auto& l : connect0) 81 | { 82 | if (!contains(stack, l)) 83 | { 84 | contain_all0 = false; 85 | break; 86 | } 87 | } 88 | if (!turn || (!contains(stack, layer) && contain_all0)) 89 | { 90 | stack.push_back(layer); 91 | } 92 | else 93 | { 94 | return; 95 | } 96 | for (auto& l : connect1) 97 | { 98 | push_cal_stack(l, direct, stack, turn); 99 | } 100 | }; 101 | 102 | //查找所有存在定义的层 103 | auto sections = option_->getAllSections(); 104 | 105 | //先把层都创建起来 106 | for (auto& section : sections) 107 | { 108 | if (strfunc::toLowerCase(section).find("layer") == 0) 109 | { 110 | //LOG("Found layer {}\n", section); 111 | auto ct = option_->getEnum(section, "type", LAYER_CONNECTION_NONE); 112 | auto l = std::make_shared(); 113 | l->setConnection(ct); 114 | l->setOption(option_); 115 | l->setName(section); 116 | 117 | //此处检查是否某些层是否实际上无输出,注意并不严格,网络的正确性应由用户验证 118 | auto dim = option_->getVector(section, "node"); 119 | dim.push_back(option_->getInt(section, "channel", 1)); 120 | if (VectorMath::multiply(dim) <= 0) 121 | { 122 | option_->setKey(section, "next", ""); 123 | } 124 | all_layer_vector.push_back(l.get()); 125 | if (l->getName() == layer_in_name) 126 | { 127 | layer_in = l.get(); 128 | l->setVisible(LAYER_VISIBLE_IN); 129 | } 130 | if (l->getName() == layer_out_name) 131 | { 132 | layer_out = l.get(); 133 | l->setVisible(LAYER_VISIBLE_OUT); 134 | } 135 | all_layer_map_[l->getName()] = l; 136 | } 137 | } 138 | //连接,计算双向的连接,清除无用项,重新连接 139 | connect_layers(); 140 | std::vector forward, backward; 141 | //层的计算顺序 142 | push_cal_stack(layer_in, 1, forward, false); 143 | push_cal_stack(layer_out, -1, backward, false); 144 | 145 | //不在双向层中的都废弃 146 | std::vector>> temp; 147 | for (auto& name_layer : all_layer_map_) 148 | { 149 | auto l = name_layer.second.get(); 150 | if (!(contains(forward, l) && contains(backward, l))) 151 | { 152 | temp.push_back(name_layer); 153 | } 154 | } 155 | for (auto& name_layer : temp) 156 | { 157 | all_layer_map_.erase(name_layer.first); 158 | //safe_delete(name_layer.second); 159 | //LOG("Remove bad layer {}\n", name_layer.first); 160 | } 161 | for (auto& l : all_layer_vector) 162 | { 163 | l->clearConnect(); 164 | } 165 | connect_layers(); 166 | forward.clear(); 167 | backward.clear(); 168 | push_cal_stack(layer_in, 1, layer_vector, true); 169 | for (int i = 0; i < layer_vector.size(); i++) 170 | { 171 | layer_vector[i]->setID(i); 172 | } 173 | //push_cal_stack(layer_out, -1, backward, true); 174 | 175 | //若不包含out项则有误 176 | if (!contains(layer_vector, layer_out)) 177 | { 178 | return 1; 179 | } 180 | 181 | //初始化 182 | for (auto& layer : layer_vector) 183 | { 184 | layer->makeMatrixOp(op_queue_); 185 | } 186 | int index = 0; 187 | if (option_->getInt("train", "output_net", 1)) 188 | { 189 | for (auto& layer : layer_vector) 190 | { 191 | LOG("---------- Layer {:3} ----------\n", index++); 192 | layer->message(); 193 | } 194 | } 195 | X_ = layer_vector.front()->A_; 196 | A_ = layer_vector.back()->A_; 197 | 198 | all_layer_map_.clear(); 199 | 200 | auto loss_weight_values = strfunc::findNumbers(option_->getString(layer_out_name, "loss_weight")); 201 | if (loss_weight_values.size() > 0) 202 | { 203 | LOG("Loss weight {}: {}\n", loss_weight_values.size(), loss_weight_values); 204 | } 205 | loss_weight_->resize(loss_weight_values.size(), 1); 206 | loss_weight_->importData(loss_weight_values.data(), loss_weight_values.size()); 207 | loss_weight_->resizeNumber(A_->getNumber()); 208 | loss_weight_->repeat(1); 209 | return 0; 210 | } 211 | 212 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/NetLayer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Layer.h" 3 | #include "Net.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | class NetLayer : public Net 9 | { 10 | public: 11 | NetLayer(); 12 | virtual ~NetLayer(); 13 | 14 | private: 15 | //网络结构 16 | 17 | std::map> all_layer_map_; //按名字保存的层 18 | 19 | public: 20 | virtual int init2() override; 21 | 22 | private: 23 | int createAndConnectLayers(); 24 | }; 25 | 26 | } //namespace cccc -------------------------------------------------------------------------------- /cccc/Option.cpp: -------------------------------------------------------------------------------- 1 | #include "Option.h" 2 | #include "strfunc.h" 3 | 4 | namespace cccc 5 | { 6 | 7 | Option::Option() 8 | { 9 | initEnums(); 10 | } 11 | 12 | Option::Option(const std::string& filename) : Option() 13 | { 14 | loadFile(filename); 15 | } 16 | 17 | //void Option::setKeys(const std::string& section, const std::string& pairs) 18 | //{ 19 | // setKeys(section, strfunc::splitString(pairs, ";")); 20 | //} 21 | // 22 | //void Option::setKeys(const std::string& section, const std::vector& pairs) 23 | //{ 24 | // for (auto pair : pairs) 25 | // { 26 | // strfunc::replaceAllSubStringRef(pair, " ", ""); 27 | // auto p = pair.find("="); 28 | // if (p != std::string::npos) 29 | // { 30 | // auto key = pair.substr(0, p); 31 | // auto value = pair.substr(p + 1); 32 | // setKey(section, key, value); 33 | // } 34 | // } 35 | //} 36 | 37 | std::string Option::dealString(std::string str, bool allow_path) 38 | { 39 | size_t p = 0; 40 | while ((p = str.find("{", p)) != std::string::npos) 41 | { 42 | auto p0 = p; 43 | p++; 44 | std::string section, key; 45 | auto p1 = str.find("::", p); 46 | if (p1 != std::string::npos) 47 | { 48 | section = str.substr(p, p1 - p); 49 | p = p1 + 2; 50 | } 51 | auto p2 = str.find("}", p); 52 | { 53 | key = str.substr(p, p2 - p); 54 | p = p2 + 1; 55 | } 56 | if (hasKey(section, key)) 57 | { 58 | std::string sub = getString(section, key); 59 | if (!allow_path) 60 | { 61 | #ifdef _WIN32 62 | strfunc::replaceAllSubStringRef(sub, "\\", ""); 63 | #endif 64 | strfunc::replaceAllSubStringRef(sub, "/", ""); 65 | } 66 | if (sub.length() > 50) 67 | { 68 | sub = sub.substr(0, 50); 69 | } 70 | strfunc::replaceAllSubStringRef(str, str.substr(p0, p - p0), sub); 71 | p = p0 + sub.size(); 72 | } 73 | } 74 | return str; 75 | } 76 | 77 | //初始化map,注意一些设置是有别名的 78 | //所有字符小写!!!! 79 | //第一项通常是默认项 80 | void Option::initEnums() 81 | { 82 | registerEnum( 83 | { 84 | { "float", DataType::FLOAT }, 85 | { "double", DataType::DOUBLE }, 86 | { "half", DataType::HALF }, 87 | }); 88 | 89 | registerEnum( 90 | { 91 | { "cpu", UnitType::CPU }, 92 | { "gpu", UnitType::GPU }, 93 | }); 94 | 95 | registerEnum( 96 | { 97 | { "none", ACTIVE_FUNCTION_NONE }, 98 | { "sigmoid", ACTIVE_FUNCTION_SIGMOID }, 99 | { "relu", ACTIVE_FUNCTION_RELU }, 100 | { "tanh", ACTIVE_FUNCTION_TANH }, 101 | { "clipped_relu", ACTIVE_FUNCTION_CLIPPED_RELU }, 102 | { "elu", ACTIVE_FUNCTION_ELU }, 103 | { "softmax", ACTIVE_FUNCTION_SOFTMAX }, 104 | { "softmax_fast", ACTIVE_FUNCTION_SOFTMAX_FAST }, 105 | { "softmax_log", ACTIVE_FUNCTION_SOFTMAX_LOG }, 106 | { "dropout", ACTIVE_FUNCTION_DROPOUT }, 107 | { "recurrent", ACTIVE_FUNCTION_RECURRENT }, 108 | { "softplus", ACTIVE_FUNCTION_SOFTPLUS }, 109 | { "local_response_normalization", ACTIVE_FUNCTION_LOCAL_RESPONSE_NORMALIZATION }, 110 | { "lrn", ACTIVE_FUNCTION_LOCAL_RESPONSE_NORMALIZATION }, 111 | { "local_constrast_normalization", ACTIVE_FUNCTION_LOCAL_CONSTRAST_NORMALIZATION }, 112 | { "lcn", ACTIVE_FUNCTION_LOCAL_CONSTRAST_NORMALIZATION }, 113 | { "divisive_normalization", ACTIVE_FUNCTION_DIVISIVE_NORMALIZATION }, 114 | { "dn", ACTIVE_FUNCTION_DIVISIVE_NORMALIZATION }, 115 | { "batch_normalization", ACTIVE_FUNCTION_BATCH_NORMALIZATION }, 116 | { "bn", ACTIVE_FUNCTION_BATCH_NORMALIZATION }, 117 | { "spatial_transformer", ACTIVE_FUNCTION_SPATIAL_TRANSFORMER }, 118 | { "sum_max", ACTIVE_FUNCTION_SUMMAX }, 119 | { "zero_channel", ACTIVE_FUNCTION_ZERO_CHANNEL }, 120 | { "sigmoid_ce", ACTIVE_FUNCTION_SIGMOID_CE }, 121 | { "softmax_ce", ACTIVE_FUNCTION_SOFTMAX_CE }, 122 | { "softmax_fast_ce", ACTIVE_FUNCTION_SOFTMAX_FAST_CE }, 123 | { "sin", ACTIVE_FUNCTION_SIN }, 124 | { "zigzag", ACTIVE_FUNCTION_ZIGZAG }, 125 | { "leaky_relu", ACTIVE_FUNCTION_LEAKY_RELU }, 126 | { "lrelu", ACTIVE_FUNCTION_LEAKY_RELU }, 127 | { "selu", ACTIVE_FUNCTION_SELU }, 128 | { "square", ACTIVE_FUNCTION_SQUARE }, 129 | { "abs", ACTIVE_FUNCTION_ABS }, 130 | { "sinp", ACTIVE_FUNCTION_SIN_PLUS }, 131 | { "silu", ACTIVE_FUNCTION_SILU }, 132 | { "sigmoid3", ACTIVE_FUNCTION_SIGMOID3 }, 133 | { "softmax3", ACTIVE_FUNCTION_SOFTMAX3 }, 134 | }); 135 | 136 | registerEnum( 137 | { 138 | { "none", LAYER_CONNECTION_NONE }, 139 | { "null", LAYER_CONNECTION_NONE }, 140 | { "full_connect", LAYER_CONNECTION_FULLCONNECT }, 141 | { "full", LAYER_CONNECTION_FULLCONNECT }, 142 | { "fc", LAYER_CONNECTION_FULLCONNECT }, 143 | { "convolution", LAYER_CONNECTION_CONVOLUTION }, 144 | { "conv", LAYER_CONNECTION_CONVOLUTION }, 145 | { "pooling", LAYER_CONNECTION_POOLING }, 146 | { "pool", LAYER_CONNECTION_POOLING }, 147 | { "direct", LAYER_CONNECTION_DIRECT }, 148 | { "correlation", LAYER_CONNECTION_CORRELATION }, 149 | { "corr", LAYER_CONNECTION_CORRELATION }, 150 | { "conv2", LAYER_CONNECTION_CORRELATION }, 151 | { "combine", LAYER_CONNECTION_COMBINE }, 152 | { "extract", LAYER_CONNECTION_EXTRACT }, 153 | { "roteigen", LAYER_CONNECTION_ROTATE_EIGEN }, 154 | { "norm2", LAYER_CONNECTION_NORM2 }, 155 | { "transpose", LAYER_CONNECTION_TRANSPOSE }, 156 | { "nac", LAYER_CONNECTION_NAC }, 157 | }); 158 | 159 | registerEnum( 160 | { 161 | { "rmse", COST_FUNCTION_RMSE }, 162 | { "cross_entropy", COST_FUNCTION_CROSS_ENTROPY }, 163 | }); 164 | 165 | registerEnum( 166 | { 167 | { "max", POOLING_MAX }, 168 | { "average", POOLING_AVERAGE_NOPADDING }, 169 | { "average_no_padding", POOLING_AVERAGE_NOPADDING }, 170 | { "average_padding", POOLING_AVERAGE_PADDING }, 171 | { "ma", POOLING_MA }, 172 | }); 173 | 174 | registerEnum( 175 | { 176 | { "concat", COMBINE_CONCAT }, 177 | { "add", COMBINE_ADD }, 178 | }); 179 | 180 | registerEnum( 181 | { 182 | { "xavier", RANDOM_FILL_XAVIER }, 183 | { "constant", RANDOM_FILL_CONSTANT }, 184 | { "gaussian", RANDOM_FILL_GAUSSIAN }, 185 | { "msra", RANDOM_FILL_GAUSSIAN }, 186 | { "lecun", RANDOM_FILL_LECUN }, 187 | }); 188 | 189 | registerEnum( 190 | { 191 | { "fixed", ADJUST_LEARN_RATE_FIXED }, 192 | { "scale_inter", ADJUST_LEARN_RATE_SCALE_INTER }, 193 | { "linear_inter", ADJUST_LEARN_RATE_LINEAR_INTER }, 194 | { "steps", ADJUST_LEARN_RATE_STEPS }, 195 | { "steps_warm", ADJUST_LEARN_RATE_STEPS_WARM }, 196 | { "steps_auto", ADJUST_LEARN_RATE_STEPS_AUTO }, 197 | }); 198 | 199 | registerEnum( 200 | { 201 | { "per_active", BATCH_NORMALIZATION_PER_ACTIVATION }, 202 | { "spatial", BATCH_NORMALIZATION_SPATIAL }, 203 | { "auto", BATCH_NORMALIZATION_AUTO }, 204 | }); 205 | 206 | registerEnum( 207 | { 208 | { "relu", RECURRENT_RELU }, 209 | { "tanh", RECURRENT_TANH }, 210 | { "lstm", RECURRENT_LSTM }, 211 | { "gru", RECURRENT_GRU }, 212 | }); 213 | 214 | registerEnum( 215 | { 216 | { "uni", RECURRENT_DIRECTION_UNI }, 217 | { "bi", RECURRENT_DIRECTION_BI }, 218 | }); 219 | 220 | registerEnum( 221 | { 222 | { "linear", RECURRENT_INPUT_LINEAR }, 223 | { "skip", RECURRENT_INPUT_SKIP }, 224 | }); 225 | 226 | registerEnum( 227 | { 228 | { "standard", RECURRENT_ALGO_STANDARD }, 229 | { "static", RECURRENT_ALGO_PERSIST_STATIC }, 230 | { "dynamic", RECURRENT_ALGO_PERSIST_DYNAMIC }, 231 | }); 232 | 233 | registerEnum( 234 | { 235 | { "sgd", SOLVER_SGD }, 236 | { "nag", SOLVER_NAG }, 237 | { "ada_delta", SOLVER_ADA_DELTA }, 238 | { "adam", SOLVER_ADAM }, 239 | { "rms_prop", SOLVER_RMS_PROP }, 240 | }); 241 | 242 | registerEnum( 243 | { 244 | { "normal", WORK_MODE_NORMAL }, 245 | { "gan", WORK_MODE_GAN }, 246 | { "prune", WORK_MODE_PRUNE }, 247 | }); 248 | 249 | registerEnum( 250 | { 251 | { "active", PRUNE_ACTIVE }, 252 | { "weight", PRUNE_WEIGHT }, 253 | }); 254 | } 255 | 256 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Option.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ConsoleControl.h" 3 | #include "Log.h" 4 | #include "types.h" 5 | #include 6 | #include 7 | #define VIRTUAL_GET_STRING 8 | #include "INIReader.h" 9 | 10 | namespace cccc 11 | { 12 | 13 | //该类用于读取配置文件,并转换其中的字串设置为枚举 14 | //注意实数只获取双精度数,如果是单精度模式会包含隐式转换 15 | //获取整数的时候,先获取双精度数并强制转换 16 | 17 | class Option : public INIReaderNoUnderline 18 | { 19 | public: 20 | Option(); 21 | Option(const std::string& filename); 22 | 23 | public: 24 | std::string dealString(std::string str, bool allow_path); 25 | 26 | //以下为枚举值的处理 27 | private: 28 | struct CompareNoUnderline 29 | { 30 | bool operator()(const std::string& l, const std::string& r) const 31 | { 32 | auto l1 = l; 33 | auto r1 = r; 34 | auto replaceAllString = [](std::string& s, const std::string& oldstring, const std::string& newstring) 35 | { 36 | int pos = s.find(oldstring); 37 | while (pos >= 0) 38 | { 39 | s.erase(pos, oldstring.length()); 40 | s.insert(pos, newstring); 41 | pos = s.find(oldstring, pos + newstring.length()); 42 | } 43 | }; 44 | replaceAllString(l1, "_", ""); 45 | replaceAllString(r1, "_", ""); 46 | std::transform(l1.begin(), l1.end(), l1.begin(), ::tolower); 47 | std::transform(r1.begin(), r1.end(), r1.begin(), ::tolower); 48 | return l1 < r1; 49 | } 50 | }; 51 | 52 | std::map> enum_map_; 53 | std::map> enum_map_reverse_; 54 | 55 | //注册枚举值 56 | template 57 | void registerEnum(std::vector> members) 58 | { 59 | for (auto m : members) 60 | { 61 | enum_map_[typeid(T).name()][m.first] = int(m.second); 62 | //反查map只保留第一个,注册时需注意顺序 63 | if (enum_map_reverse_[typeid(T).name()].count(int(m.second)) == 0) 64 | { 65 | enum_map_reverse_[typeid(T).name()][int(m.second)] = m.first; 66 | } 67 | } 68 | } 69 | 70 | void initEnums(); 71 | 72 | mutable std::map> outputed_keys_; 73 | 74 | private: 75 | template 76 | void outputValue(const std::string& section, const std::string& key, const T& value, const T& default_value) const 77 | { 78 | int count = outputed_keys_[section][key]++; 79 | if (output_ == 0 80 | || (output_ == 2 && value == default_value)) 81 | { 82 | return; 83 | } 84 | if (count == 0) 85 | { 86 | ConsoleControl::setColor(CONSOLE_COLOR_GREEN); 87 | LOG(" [\"{}\"][\"{}\"] = {}\n", section, key, value); 88 | ConsoleControl::setColor(CONSOLE_COLOR_NONE); 89 | } 90 | } 91 | 92 | int output_ = 1; //0表示不输出,1表示输出,2表示若与默认值不同则输出 93 | 94 | public: 95 | void setOutput(int output) 96 | { 97 | output_ = output; 98 | } 99 | 100 | std::string getString(const std::string& section, const std::string& key, const std::string& default_value = "") const 101 | { 102 | auto value = INIReaderNoUnderline::getString(section, key, default_value); 103 | outputValue(section, key, value.substr(0, value.find_first_of(";\n")), default_value); 104 | return value; 105 | } 106 | 107 | int getInt(const std::string& section, const std::string& key, int default_value = 0) const 108 | { 109 | auto value = INIReaderNoUnderline::getInt(section, key, default_value); 110 | outputValue(section, key, value, default_value); 111 | return value; 112 | } 113 | 114 | float getReal(const std::string& section, const std::string& key, float default_value = 0) const 115 | { 116 | float value = INIReaderNoUnderline::getReal(section, key, default_value); 117 | outputValue(section, key, value, default_value); 118 | return value; 119 | } 120 | 121 | template 122 | std::vector getVector(const std::string& section, const std::string& key, const std::string& split_chars = ",", const std::vector& default_v = {}) const 123 | { 124 | auto value = INIReaderNoUnderline::getVector(section, key, split_chars, default_v); 125 | outputValue(section, key, value, default_v); 126 | return value; 127 | } 128 | 129 | //将字串转为枚举值 130 | template 131 | T getEnumFromString(const std::string& value_str) 132 | { 133 | return T(enum_map_[typeid(T).name()][value_str]); 134 | } 135 | 136 | //从配置中直接读出枚举值 137 | //按照C++推荐,最后的参数默认值应为T{},但swig不能正确识别 138 | template 139 | T getEnum(const std::string& section, const std::string& key, T default_value = T(0)) 140 | { 141 | std::string value_str = INIReaderNoUnderline::getString(section, key); 142 | T v = default_value; 143 | if (enum_map_[typeid(T).name()].count(value_str) > 0) 144 | { 145 | v = T(enum_map_[typeid(T).name()][value_str]); 146 | } 147 | else 148 | { 149 | if (!value_str.empty()) 150 | { 151 | LOG("Warning: undefined value \"{}\" for {}, set to {}!\n", value_str, key, getStringFromEnum(T(0))); 152 | } 153 | } 154 | outputValue(section, key, getStringFromEnum(v), getStringFromEnum(default_value)); 155 | return v; 156 | } 157 | 158 | //反查枚举值为字串 159 | template 160 | std::string getStringFromEnum(T e) 161 | { 162 | return enum_map_reverse_[typeid(T).name()][int(e)]; 163 | } 164 | 165 | public: 166 | //去掉下划线,使输出略为美观 167 | static std::string removeEndUnderline(const std::string& str) 168 | { 169 | if (!str.empty() && str.back() == '_') 170 | { 171 | return str.substr(0, str.size() - 1); 172 | } 173 | return str; 174 | } 175 | }; 176 | 177 | //以下宏用于简化参数的读取,不可用于其他,必须预先定义option和section 178 | #define NAME_STR(a) (Option::removeEndUnderline(#a).c_str()) 179 | #define OPTION_GET_INT(a) \ 180 | do { \ 181 | a = option->getInt(section, NAME_STR(a), a); \ 182 | } while (0) 183 | #define OPTION_GET_REAL(a) \ 184 | do { \ 185 | a = option->getReal(section, NAME_STR(a), a); \ 186 | } while (0) 187 | #define OPTION_GET_STRING(a) \ 188 | do { \ 189 | a = option->getString(section, NAME_STR(a), a); \ 190 | } while (0) 191 | #define OPTION_GET_NUMVECTOR(v, size, fill) \ 192 | do { \ 193 | v.resize(size, fill); \ 194 | v = option->getVector(section, NAME_STR(v), ",", v); \ 195 | } while (0) 196 | #define OPTION_GET_STRINGVECTOR(v) \ 197 | do { \ 198 | v = option->getVector(section, NAME_STR(v), ",", v); \ 199 | } while (0) 200 | 201 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/Solver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Matrix.h" 3 | #include "Option.h" 4 | 5 | namespace cccc 6 | { 7 | 8 | //更新参数的策略 9 | class DLL_EXPORT Solver 10 | { 11 | public: 12 | Solver(); 13 | virtual ~Solver(); 14 | Solver(const Solver&) = delete; 15 | Solver& operator=(const Solver&) = delete; 16 | 17 | protected: 18 | //int batch_; //每批个数 19 | 20 | Matrix W_sign_; //L1范数调整 21 | 22 | float weight_decay_ = 0, weight_decay_l1_ = 0; //正则化参数或权重的衰减 23 | 24 | float momentum_ = 0.9; //上次的dWeight保留,即动量 25 | int momentum_clear_epoch_ = -1; //每隔数个epoch将动量积累清零 26 | 27 | //学习率调整相关 28 | AdjustLearnRateType lr_adjust_method_ = ADJUST_LEARN_RATE_FIXED; 29 | 30 | int lr_keep_count_ = 0; //学习率保持不变的计数 31 | int lr_change_count_ = 0; //学习率改变的计数 32 | 33 | float learn_rate_base_ = 0.01; //基础学习率 34 | float learn_rate_ = 0.01; //学习率 35 | 36 | int lr_steps_ = 3; 37 | float lr_step_decay_ = 0.1; 38 | int lr_test_stable_time_ = 20; //数值稳定的判断次数 39 | double lr_test_std_limit_ = 0.01; //数值稳定的判断标准差 40 | 41 | std::vector lr_inter_epoch_; 42 | std::vector lr_inter_rate_; 43 | 44 | float lr_weight_scale_ = 1; //权重的学习系数,乘以学习率为实际学习率 45 | float lr_bias_scale_ = 2; //偏置的学习系数 46 | 47 | //求解器相关 48 | SolverType solver_type_ = SOLVER_SGD; 49 | float time_step_ = 0; 50 | int switch_sgd_epoch_ = -1; 51 | float switch_sgd_random_ = 0; 52 | int switch_solver_ = 0; 53 | 54 | std::vector restrict_dweight_{ 0, 0 }; //是否需要限制梯度,以L1为准,梯度除以batch后,不应超过此值乘以权重 55 | std::vector restrict_dweight_count_{ 0, 0 }; //上述操作的计数 56 | 57 | //求解器所需要的特殊参数 58 | std::vector real_vector_; 59 | std::vector int_vector_; 60 | std::vector W_vector_; 61 | 62 | Matrix dW_; //实际用于更新的权重梯度 63 | 64 | float dw_scale_ = 1e-4; //dw的放大率,减小可以避免一些数值溢出 65 | 66 | public: 67 | SolverType getSolverType() const { return solver_type_; } 68 | 69 | float getMomentum() const; 70 | 71 | void setMomentum(float m) { momentum_ = m; } 72 | 73 | void setLearnRateBase(float lrb) { learn_rate_base_ = lrb; } 74 | 75 | float getLearnRateBase() const { return learn_rate_base_; } 76 | 77 | float getLearnRate() const { return learn_rate_; } 78 | 79 | void resetTimeStep() { time_step_ = 0; } 80 | 81 | std::vector& getWVector() { return W_vector_; } 82 | 83 | float getDWScale() const { return dw_scale_; } 84 | 85 | void setDWScale(float s) { dw_scale_ = s; } 86 | 87 | //int getTrainEpochs() const { return train_epochs_; } 88 | 89 | public: 90 | void init(Option* option, std::string section, Matrix& W); 91 | int adjustLearnRate(int epoch, int total_epoch); 92 | int adjustLearnRate2(int epoch, int total_epoch, const std::vector>& test_info); 93 | void updateWeightBiasPre(Matrix& W); 94 | void updateWeights(Matrix& W, int batch); 95 | void reset(); 96 | void cancelRestrictDWeight(); 97 | 98 | void outputState() const; 99 | 100 | private: 101 | void destory(); 102 | 103 | int findInter(int x, std::vector& xv); 104 | }; 105 | 106 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/TensorDesc.cpp: -------------------------------------------------------------------------------- 1 | #include "TensorDesc.h" 2 | #include "Log.h" 3 | #include "gpu_lib.h" 4 | #include "types.h" 5 | #include 6 | 7 | namespace cccc 8 | { 9 | 10 | TensorDesc::TensorDesc(uint32_t flag) 11 | { 12 | #if ENABLE_CUDA 13 | if ((flag & 1) && cudnnCreateTensorDescriptor) 14 | { 15 | cudnnCreateTensorDescriptor(&cudnn_tensor_desc_); 16 | } 17 | #endif 18 | #if ENABLE_HIP 19 | if ((flag & 2) && miopenCreateTensorDescriptor) 20 | { 21 | miopenCreateTensorDescriptor(&miopen_tensor_desc_); 22 | } 23 | #endif 24 | } 25 | 26 | TensorDesc::~TensorDesc() 27 | { 28 | #if ENABLE_CUDA 29 | if (cudnn_tensor_desc_) 30 | { 31 | cudnnDestroyTensorDescriptor(cudnn_tensor_desc_); 32 | } 33 | #endif 34 | #if ENABLE_HIP 35 | if (miopen_tensor_desc_) 36 | { 37 | miopenDestroyTensorDescriptor(miopen_tensor_desc_); 38 | } 39 | #endif 40 | } 41 | 42 | void TensorDesc::setDesc4D(DataType data_type, int w, int h, int c, int n) 43 | { 44 | if (n * c * h * w > 0) 45 | { 46 | if (cudnn_tensor_desc_) 47 | { 48 | 49 | auto r = cudnnSetTensor4dDescriptor(cudnn_tensor_desc_, CUDNN_TENSOR_NCHW, toCudnnDataType(data_type), n, c, h, w); 50 | if (r) 51 | { 52 | cccc::LOG_ERR("Set tensor failed!\n"); 53 | } 54 | } 55 | if (miopen_tensor_desc_) 56 | { 57 | auto r = miopenSet4dTensorDescriptor(miopen_tensor_desc_, toMiopenDataType(data_type), n, c, h, w); 58 | if (r) 59 | { 60 | cccc::LOG_ERR("Set tensor failed!\n"); 61 | } 62 | } 63 | } 64 | } 65 | 66 | void TensorDesc::setDescND(DataType data_type, std::vector dim) 67 | { 68 | std::vector dim1, stride; 69 | int size = dim.size(); 70 | 71 | dim1 = dim; 72 | stride.resize(size); 73 | std::reverse(dim1.begin(), dim1.end()); 74 | int s = 1; 75 | for (int i = 0; i < size; i++) 76 | { 77 | stride[size - 1 - i] = s; 78 | s *= dim1[size - 1 - i]; 79 | } 80 | if (cudnn_tensor_desc_) 81 | { 82 | if (size > CUDNN_DIM_MAX) 83 | { 84 | LOG_ERR("Error: wrong dimensions of tensor!\n"); 85 | return; 86 | } 87 | cudnnSetTensorNdDescriptor(cudnn_tensor_desc_, toCudnnDataType(data_type), size, dim1.data(), stride.data()); 88 | } 89 | if (miopen_tensor_desc_) 90 | { 91 | miopenSetTensorDescriptor(miopen_tensor_desc_, toMiopenDataType(data_type), size, dim1.data(), stride.data()); 92 | } 93 | } 94 | #if ENABLE_CUDA 95 | #define CREATE_CUDNN_DESC(type) \ 96 | template <> \ 97 | void OtherDesc::create(void** p) \ 98 | { \ 99 | cudnnCreate##type((cudnn##type##_t*)p); \ 100 | } 101 | template <> 102 | void OtherDesc::create(void** p) 103 | { 104 | } 105 | CREATE_CUDNN_DESC(PoolingDescriptor) 106 | #endif 107 | #if ENABLE_HIP 108 | #define CREATE_MIOPEN_DESC(type) \ 109 | template <> \ 110 | void OtherDesc::create(void** p) \ 111 | { \ 112 | miopenCreate##type((miopen##type##_t*)p); \ 113 | } 114 | 115 | CREATE_MIOPEN_DESC(ConvolutionDescriptor) 116 | CREATE_MIOPEN_DESC(ActivationDescriptor) 117 | CREATE_MIOPEN_DESC(PoolingDescriptor) 118 | #endif 119 | #if !ENABLE_CUDA && !ENABLE_HIP 120 | template <> 121 | void OtherDesc::create(void** p) 122 | { 123 | } 124 | #endif 125 | } //namespace cccc -------------------------------------------------------------------------------- /cccc/TensorDesc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "types.h" 3 | #include 4 | #include 5 | #include 6 | 7 | struct cudnnTensorStruct; 8 | struct miopenTensorDescriptor; 9 | 10 | namespace cccc 11 | { 12 | 13 | class TensorDesc 14 | { 15 | private: 16 | cudnnTensorStruct* cudnn_tensor_desc_ = nullptr; 17 | miopenTensorDescriptor* miopen_tensor_desc_ = nullptr; 18 | 19 | public: 20 | TensorDesc(uint32_t flag = 3); //1: cudnn, 2: miopen, 3: both 21 | ~TensorDesc(); 22 | 23 | cudnnTensorStruct* cudnnDesc() { return cudnn_tensor_desc_; } 24 | miopenTensorDescriptor* miopenDesc() { return miopen_tensor_desc_; } 25 | void setDesc4D(DataType data_type, int w, int h, int c, int n); 26 | void setDescND(DataType data_type, std::vector dim); 27 | }; 28 | 29 | class OtherDesc 30 | { 31 | std::unordered_map desc_; 32 | template 33 | static void create(void** p); 34 | 35 | public: 36 | template 37 | T getDesc() 38 | { 39 | auto it = desc_.find(typeid(T).hash_code()); 40 | if (it != desc_.end()) 41 | { 42 | return T(it->second); 43 | } 44 | else 45 | { 46 | auto& p = desc_[typeid(T).hash_code()]; 47 | create(&p); 48 | return T(p); 49 | } 50 | } 51 | }; 52 | 53 | } //namespace cccc -------------------------------------------------------------------------------- /cccc/VectorMath.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cccc 5 | { 6 | 7 | namespace VectorMath 8 | { 9 | //向量数学类,全部是模板函数 10 | //cpu计算只使用float 11 | #define VECTOR(fv, f) \ 12 | inline void fv(const void* Av, void* Rv, int size, float a = 1, float r = 0) \ 13 | { \ 14 | auto A = (const float*)Av; \ 15 | auto R = (float*)Rv; \ 16 | for (int i = 0; i < size; i++) { R[i] = a * f(A[i]) + r * R[i]; } \ 17 | } 18 | #define VECTOR_B(fv, content) \ 19 | inline void fv(const void* Av, const void* DAv, const void* Rv, void* DRv, int size, float a = 1, float r = 0) \ 20 | { \ 21 | auto A = (const float*)Av; \ 22 | auto DA = (const float*)DAv; \ 23 | auto R = (const float*)Rv; \ 24 | auto DR = (float*)DRv; \ 25 | for (int i = 0; i < size; i++) { DR[i] = a * (content) + r * DR[i]; } \ 26 | } 27 | 28 | template 29 | inline T sigmoid(T x) 30 | { 31 | return 1 / (1 + exp(-x)); 32 | } 33 | template 34 | inline T softplus(T x) { return log(1 + exp(x)); } 35 | template 36 | inline T relu(T x) { return x > T(0) ? x : T(0); } 37 | 38 | VECTOR(log_v, log); 39 | VECTOR(exp_v, exp); 40 | 41 | VECTOR(sigmoid_v, sigmoid); 42 | VECTOR(relu_v, relu); 43 | VECTOR(tanh_v, tanh); 44 | VECTOR(softplus_v, softplus); 45 | template 46 | void linear_v(T* x, T* a, int size) { memcpy(a, x, sizeof(T) * size); } 47 | 48 | inline void clipped_relu_v(const void* Av, void* Rv, float v, int size, float a = 1, float r = 0) 49 | { 50 | auto A = (const float*)Av; 51 | auto R = (float*)Rv; 52 | for (int i = 0; i < size; i++) 53 | { 54 | if (A[i] > v) 55 | { 56 | R[i] = a * v + r * R[i]; 57 | } 58 | else if (A[i] < 0) 59 | { 60 | R[i] = r * R[i]; 61 | } 62 | else 63 | { 64 | R[i] = a * A[i] + r * R[i]; 65 | } 66 | } 67 | } 68 | 69 | VECTOR_B(exp_vb, A[i]); 70 | VECTOR_B(sigmoid_vb, A[i] * (1 - A[i]) * DA[i]); //sigmoid导数直接使用a计算 71 | VECTOR_B(relu_vb, R[i] > 0 ? DA[i] : 0); 72 | VECTOR_B(tanh_vb, (1 - A[i] * A[i]) * DA[i]); 73 | VECTOR_B(softplus_vb, sigmoid(R[i])); 74 | VECTOR_B(linear_vb, 1); 75 | 76 | inline void clipped_relu_vb(const void* Av, const void* DAv, const void* Rv, void* DRv, float v, int size, float a = 1, float r = 0) 77 | { 78 | auto A = (const float*)Av; 79 | auto DA = (const float*)DAv; 80 | auto R = (const float*)Rv; 81 | auto DR = (float*)DRv; 82 | for (int i = 0; i < size; i++) 83 | { 84 | DR[i] = a * ((R[i] > 0) && (R[i] < v) ? DA[i] : 0) + r * DR[i]; 85 | } 86 | } 87 | 88 | //下面3个都是softmax用的 89 | inline void minus_max(void* xv, int size) 90 | { 91 | auto x = (float*)xv; 92 | auto m = x[0]; 93 | for (int i = 1; i < size; i++) 94 | { 95 | m = std::max(x[i], m); 96 | } 97 | for (int i = 0; i < size; i++) 98 | { 99 | x[i] -= m; 100 | } 101 | } 102 | 103 | inline void softmax_vb_sub(const void* av, const void* dav, float v, void* dxv, int size) 104 | { 105 | auto a = (const float*)av; 106 | auto da = (const float*)dav; 107 | auto dx = (float*)dxv; 108 | for (int i = 0; i < size; i++) 109 | { 110 | dx[i] = a[i] * (da[i] - v); 111 | } 112 | } 113 | 114 | inline void softmaxlog_vb_sub(const void* av, const void* dav, float v, void* dxv, int size) 115 | { 116 | auto a = (const float*)av; 117 | auto da = (const float*)dav; 118 | auto dx = (float*)dxv; 119 | for (int i = 0; i < size; i++) 120 | { 121 | dx[i] = da[i] - v * exp(a[i]); 122 | } 123 | } 124 | 125 | template 126 | bool inbox(T _x, T _y, T x, T y, T w, T h) 127 | { 128 | return _x >= x && _y >= y && _x < x + h && _y < y + h; 129 | } 130 | 131 | template 132 | T sum(T* x, int size) 133 | { 134 | T sum = 0; 135 | for (int i = 0; i < size; i++) 136 | { 137 | sum += x[i]; 138 | } 139 | return sum; 140 | } 141 | 142 | //please use this in "do while" loop 143 | inline int get_next_coord(std::vector& r, const std::vector& dim) 144 | { 145 | int ret = 0; 146 | for (int i = 0; i < r.size(); i++) 147 | { 148 | if (r[i] < dim[i] - 1) 149 | { 150 | ret = 1; 151 | r[i]++; 152 | break; 153 | } 154 | else 155 | { 156 | r[i] = 0; 157 | } 158 | } 159 | return ret; 160 | } 161 | 162 | //inline std::vector> get_all_coords(const std::vector& dim) 163 | //{ 164 | // std::vector> r; 165 | // std::vector a(dim.size(), 0); 166 | // r.push_back(a); 167 | // while (get_next_coord(a, dim)) 168 | // { 169 | // r.push_back(a); 170 | // } 171 | // return r; 172 | //} 173 | 174 | template 175 | void force_resize(std::vector& vec, int size, T v) 176 | { 177 | if (vec.size() == 0) 178 | { 179 | vec.resize(size, v); 180 | } 181 | else 182 | { 183 | vec.resize(size, vec.back()); 184 | } 185 | } 186 | 187 | template 188 | T multiply(const std::vector& vec, int size = -1) 189 | { 190 | T ret = 1; 191 | if (size < 0) 192 | { 193 | size = vec.size(); 194 | } 195 | else 196 | { 197 | size = (std::min)(int(vec.size()), size); 198 | } 199 | for (int i = 0; i < size; i++) 200 | { 201 | ret *= vec[i]; 202 | } 203 | return ret; 204 | } 205 | 206 | #undef VECTOR 207 | #undef VECTOR_B 208 | 209 | template 210 | bool vector_have(const std::vector& ops, const T& op) 211 | { 212 | for (auto& o : ops) 213 | { 214 | if (op == o) 215 | { 216 | return true; 217 | } 218 | } 219 | return false; 220 | } 221 | } // namespace VectorMath 222 | 223 | //极端情况使用vector可能过于臃肿 224 | //通常没必要 225 | /* 226 | template 227 | class SimpleVector 228 | { 229 | private: 230 | T* data_ = nullptr; 231 | int size_ = 0; 232 | public: 233 | SimpleVector() {} 234 | SimpleVector(int n) 235 | { 236 | data_ = new T[n]; 237 | size_ = n; 238 | } 239 | ~SimpleVector() { delete[] data_; } 240 | int size() { return size_; } 241 | void resize(int n) 242 | { 243 | if (data_) { delete[] data_; } 244 | data_ = new T[n]; 245 | } 246 | T& operator [](int i) { return data_[i]; } 247 | T& getData(int i) { return data_[i]; } 248 | void init(T t) { for (int i=0; i < size_; i++) { data_[i]=t; } } 249 | }; 250 | */ 251 | 252 | } // namespace cccc -------------------------------------------------------------------------------- /cccc/cccc.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {90a7b465-28b4-4b3e-8b54-a57d71d3d271} 6 | 7 | 8 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 9 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 10 | 11 | 12 | {e6a60d5c-664d-4817-bcf1-85ab3ff8dc78} 13 | 14 | 15 | {2a3933f8-115f-45e7-834d-0367a65f965c} 16 | Data*.* 17 | 18 | 19 | {6625ebba-819f-41bf-8501-72e44f10b3a5} 20 | Layer*.* 21 | 22 | 23 | {b1000c3a-726a-497c-b4f3-7bde70c0545f} 24 | 25 | 26 | {7d95de28-1607-45df-8a8e-d11c06a3cadf} 27 | Matrix*.* 28 | 29 | 30 | {3b64f97b-b7f4-4b47-8fe8-199b9b17969a} 31 | 32 | 33 | {8b6e00fa-12c4-4110-8076-633dc67bf76c} 34 | 35 | 36 | {5fef8200-bbaa-48fe-9413-48c583ea301f} 37 | 38 | 39 | {701564cd-bff1-4f2a-9973-02b5596c730e} 40 | 41 | 42 | {3f9ecb0c-a7d1-4b0c-b70d-8e209e326922} 43 | 44 | 45 | {e12d9f9d-dbb5-4ea1-a636-3ce3e6908db5} 46 | 47 | 48 | {81cb7be8-6daa-48f2-99dd-17f39bfde5bc} 49 | 50 | 51 | {0b817fea-8808-42c4-8adb-6c357148dd45} 52 | 53 | 54 | 55 | 56 | data 57 | 58 | 59 | matrix 60 | 61 | 62 | mainprocess 63 | 64 | 65 | option 66 | 67 | 68 | data 69 | 70 | 71 | data 72 | 73 | 74 | solver 75 | 76 | 77 | data 78 | 79 | 80 | net 81 | 82 | 83 | net 84 | 85 | 86 | net 87 | 88 | 89 | gpu 90 | 91 | 92 | layer 93 | 94 | 95 | matrix 96 | 97 | 98 | matrix 99 | 100 | 101 | matrix 102 | 103 | 104 | gpu 105 | 106 | 107 | predict 108 | 109 | 110 | mlcc 111 | 112 | 113 | mlcc 114 | 115 | 116 | matrix 117 | 118 | 119 | log 120 | 121 | 122 | mlcc 123 | 124 | 125 | 126 | 127 | data 128 | 129 | 130 | matrix 131 | 132 | 133 | math 134 | 135 | 136 | mainprocess 137 | 138 | 139 | base 140 | 141 | 142 | option 143 | 144 | 145 | data 146 | 147 | 148 | data 149 | 150 | 151 | solver 152 | 153 | 154 | data 155 | 156 | 157 | math 158 | 159 | 160 | math 161 | 162 | 163 | base 164 | 165 | 166 | net 167 | 168 | 169 | net 170 | 171 | 172 | net 173 | 174 | 175 | gpu 176 | 177 | 178 | layer 179 | 180 | 181 | matrix 182 | 183 | 184 | matrix 185 | 186 | 187 | matrix 188 | 189 | 190 | gpu 191 | 192 | 193 | log 194 | 195 | 196 | predict 197 | 198 | 199 | mlcc 200 | 201 | 202 | mlcc 203 | 204 | 205 | mlcc 206 | 207 | 208 | mlcc 209 | 210 | 211 | mlcc 212 | 213 | 214 | mlcc 215 | 216 | 217 | mlcc 218 | 219 | 220 | mlcc 221 | 222 | 223 | mlcc 224 | 225 | 226 | mlcc 227 | 228 | 229 | math 230 | 231 | 232 | mlcc 233 | 234 | 235 | gpu 236 | 237 | 238 | gpu 239 | 240 | 241 | gpu 242 | 243 | 244 | matrix 245 | 246 | 247 | mlcc 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | gpu 258 | 259 | 260 | -------------------------------------------------------------------------------- /cccc/cccc.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | WindowsLocalDebugger 9 | 10 | 11 | 12 | 13 | 14 | 15 | WindowsLocalDebugger 16 | 17 | -------------------------------------------------------------------------------- /cccc/cuda_libs.inc: -------------------------------------------------------------------------------- 1 | IMPORT(cublasCreate_v2) 2 | IMPORT(cublasDestroy_v2) 3 | IMPORT(cublasGetVersion) 4 | IMPORT(cublasIsamax_v2) 5 | IMPORT(cublasSasum_v2) 6 | IMPORT(cublasSdot_v2) 7 | IMPORT(cublasSetMathMode) 8 | IMPORT(cublasSgemm_v2) 9 | IMPORT(cublasSgemv_v2) 10 | IMPORT(cublasSscal_v2) 11 | IMPORT(cublasScalEx ) 12 | IMPORT(cublasIdamax_v2) 13 | IMPORT(cublasHgemm) 14 | IMPORT(cublasDscal_v2) 15 | IMPORT(cublasDotEx) 16 | IMPORT(cublasDgemv_v2) 17 | IMPORT(cublasDgemm_v2) 18 | IMPORT(cublasDdot_v2) 19 | IMPORT(cublasDasum_v2) 20 | 21 | IMPORT(cudnnActivationBackward) 22 | IMPORT(cudnnActivationForward) 23 | IMPORT(cudnnAddTensor) 24 | IMPORT(cudnnBatchNormalizationBackward) 25 | IMPORT(cudnnBatchNormalizationForwardInference) 26 | IMPORT(cudnnBatchNormalizationForwardTraining) 27 | IMPORT(cudnnConvolutionBackwardBias) 28 | IMPORT(cudnnConvolutionBackwardData) 29 | IMPORT(cudnnConvolutionBackwardFilter) 30 | IMPORT(cudnnConvolutionForward) 31 | IMPORT(cudnnCreate) 32 | IMPORT(cudnnCreateActivationDescriptor) 33 | IMPORT(cudnnCreateConvolutionDescriptor) 34 | IMPORT(cudnnCreateDropoutDescriptor) 35 | IMPORT(cudnnCreateFilterDescriptor) 36 | IMPORT(cudnnCreateLRNDescriptor) 37 | IMPORT(cudnnCreateOpTensorDescriptor) 38 | IMPORT(cudnnCreatePoolingDescriptor) 39 | IMPORT(cudnnCreateRNNDescriptor) 40 | IMPORT(cudnnCreateSpatialTransformerDescriptor) 41 | IMPORT(cudnnCreateTensorDescriptor) 42 | IMPORT(cudnnDeriveBNTensorDescriptor) 43 | IMPORT(cudnnDestroy) 44 | IMPORT(cudnnDestroyActivationDescriptor) 45 | IMPORT(cudnnDestroyConvolutionDescriptor) 46 | IMPORT(cudnnDestroyDropoutDescriptor) 47 | IMPORT(cudnnDestroyFilterDescriptor) 48 | IMPORT(cudnnDestroyLRNDescriptor) 49 | IMPORT(cudnnDestroyOpTensorDescriptor) 50 | IMPORT(cudnnDestroyPoolingDescriptor) 51 | IMPORT(cudnnDestroyRNNDescriptor) 52 | IMPORT(cudnnDestroySpatialTransformerDescriptor) 53 | IMPORT(cudnnDestroyTensorDescriptor) 54 | IMPORT(cudnnDropoutBackward) 55 | IMPORT(cudnnDropoutForward) 56 | IMPORT(cudnnDropoutGetReserveSpaceSize) 57 | IMPORT(cudnnDropoutGetStatesSize) 58 | IMPORT(cudnnFindConvolutionBackwardDataAlgorithm) 59 | IMPORT(cudnnFindConvolutionBackwardFilterAlgorithm) 60 | IMPORT(cudnnFindConvolutionForwardAlgorithm) 61 | IMPORT(cudnnGetConvolutionBackwardDataWorkspaceSize) 62 | IMPORT(cudnnGetConvolutionBackwardFilterWorkspaceSize) 63 | IMPORT(cudnnGetConvolutionForwardWorkspaceSize) 64 | IMPORT(cudnnGetErrorString) 65 | IMPORT(cudnnGetTensor4dDescriptor) 66 | IMPORT(cudnnGetTensorNdDescriptor) 67 | IMPORT(cudnnGetVersion) 68 | IMPORT(cudnnLRNCrossChannelBackward) 69 | IMPORT(cudnnLRNCrossChannelForward) 70 | IMPORT(cudnnOpTensor) 71 | IMPORT(cudnnPoolingBackward) 72 | IMPORT(cudnnPoolingForward) 73 | IMPORT(cudnnSetActivationDescriptor) 74 | IMPORT(cudnnSetConvolution2dDescriptor) 75 | IMPORT(cudnnSetConvolutionMathType) 76 | IMPORT(cudnnSetConvolutionNdDescriptor) 77 | IMPORT(cudnnSetDropoutDescriptor) 78 | IMPORT(cudnnSetFilter4dDescriptor) 79 | IMPORT(cudnnSetFilterNdDescriptor) 80 | IMPORT(cudnnSetLRNDescriptor) 81 | IMPORT(cudnnSetOpTensorDescriptor) 82 | IMPORT(cudnnSetPooling2dDescriptor) 83 | IMPORT(cudnnSetPoolingNdDescriptor) 84 | IMPORT(cudnnSetTensor) 85 | IMPORT(cudnnSetTensor4dDescriptor) 86 | IMPORT(cudnnSetTensorNdDescriptor) 87 | IMPORT(cudnnSoftmaxBackward) 88 | IMPORT(cudnnSoftmaxForward) 89 | IMPORT(cudnnGetLastErrorString) 90 | 91 | IMPORT(cudaMemset) 92 | IMPORT(cudaGetDeviceProperties_v2, "cudaGetDeviceProperties") 93 | IMPORT(cudaGetErrorString) 94 | IMPORT(cudaMalloc) 95 | IMPORT(cudaSetDevice) 96 | IMPORT(cudaDeviceGetPCIBusId) 97 | IMPORT(cudaMemGetInfo) 98 | IMPORT(cudaFree) 99 | IMPORT(cudaRuntimeGetVersion) 100 | IMPORT(cudaGetDeviceCount) 101 | IMPORT(cudaDriverGetVersion) 102 | IMPORT(cudaMemcpy) 103 | IMPORT(cudaMemcpyPeer) 104 | 105 | UNIMPORT_OR_BLANKTEMP(cuda_reciprocal) 106 | UNIMPORT_OR_BLANKTEMP(cuda_addnumber) 107 | UNIMPORT_OR_BLANKTEMP(cuda_pow) 108 | UNIMPORT_OR_BLANKTEMP(cuda_sparse) 109 | UNIMPORT_OR_BLANKTEMP(cuda_sign) 110 | UNIMPORT_OR_BLANKTEMP(cuda_cross_entropy) 111 | UNIMPORT_OR_BLANKTEMP(cuda_cross_entropy2) 112 | UNIMPORT_OR_BLANKTEMP(cuda_add) 113 | UNIMPORT_OR_BLANKTEMP(cuda_mul) 114 | UNIMPORT_OR_BLANKTEMP(cuda_div) 115 | UNIMPORT_OR_BLANKTEMP(cuda_sectionlimit) 116 | UNIMPORT_OR_BLANKTEMP(cuda_ada_update) 117 | UNIMPORT_OR_BLANKTEMP(cuda_ada_delta_update) 118 | UNIMPORT_OR_BLANKTEMP(cuda_adam_update) 119 | UNIMPORT_OR_BLANKTEMP(cuda_rms_prop_update) 120 | UNIMPORT_OR_BLANKTEMP(cuda_sin) 121 | UNIMPORT_OR_BLANKTEMP(cuda_cos) 122 | UNIMPORT_OR_BLANKTEMP(cuda_zigzag) 123 | UNIMPORT_OR_BLANKTEMP(cuda_zigzagb) 124 | UNIMPORT_OR_BLANKTEMP(cuda_step) 125 | UNIMPORT_OR_BLANKTEMP(cuda_leaky_relu) 126 | UNIMPORT_OR_BLANKTEMP(cuda_leaky_relub) 127 | UNIMPORT_OR_BLANKTEMP(cuda_max) 128 | UNIMPORT_OR_BLANKTEMP(cuda_maxb) -------------------------------------------------------------------------------- /cccc/dll_export.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef DLL_EXPORT 4 | #ifdef _WIN32 5 | #ifdef _WINDLL 6 | #define DLL_EXPORT __declspec(dllexport) 7 | #else 8 | #define DLL_EXPORT 9 | #endif 10 | #else 11 | #define DLL_EXPORT __attribute__((visibility("default"))) 12 | #endif 13 | #endif 14 | 15 | #ifdef __cplusplus 16 | #define EXTERN_C_BEGIN \ 17 | extern "C" \ 18 | { 19 | #define EXTERN_C_END } 20 | #else 21 | #define EXTERN_C_BEGIN 22 | #define EXTERN_C_END 23 | #endif -------------------------------------------------------------------------------- /cccc/gpu_lib.cpp: -------------------------------------------------------------------------------- 1 | #include "gpu_lib.h" 2 | #include "DynamicLibrary.h" 3 | #include "Log.h" 4 | #include 5 | #include 6 | 7 | namespace cccc 8 | { 9 | #if defined(_WIN32) && defined(AUTO_LOAD_GPU_FUNCTIONS) 10 | 11 | #define UNIMPORT_OR_BLANKTEMP(func) 12 | 13 | #define IMPORT(func, ...) func##_t func = nullptr; 14 | #if ENABLE_CUDA 15 | #include "cuda_libs.inc" 16 | #endif 17 | #if ENABLE_HIP 18 | #include "hip_libs.inc" 19 | #endif 20 | #undef IMPORT 21 | 22 | class gpu_assist_class_t 23 | { 24 | public: 25 | gpu_assist_class_t() 26 | { 27 | std::vector strs; 28 | #define IMPORT(func, ...) \ 29 | strs = \ 30 | { \ 31 | #func, \ 32 | __VA_ARGS__ \ 33 | }; \ 34 | for (auto& lib : libs) \ 35 | { \ 36 | for (auto str : strs) \ 37 | { \ 38 | func = (func##_t)DynamicLibrary::getFunction(lib, str); \ 39 | if (func) \ 40 | { \ 41 | if (std::string(#func) == str) \ 42 | { /*LOG(stderr, "Found {} in {}\n", #func, lib);*/ \ 43 | libs_used[lib].push_back(#func); \ 44 | } \ 45 | else \ 46 | { \ 47 | } \ 48 | break; \ 49 | } \ 50 | } \ 51 | if (func) { break; } \ 52 | } \ 53 | IMPORT_2(func) 54 | #define IMPORT_2(func) \ 55 | if (func) { func_c.push_back(#func); } \ 56 | else { func_c_failed.push_back(#func); } 57 | #if ENABLE_CUDA 58 | #include "cuda_libs.inc" 59 | #endif 60 | #undef IMPORT_2 61 | #define IMPORT_2(func) \ 62 | if (func) { func_h.push_back(#func); } \ 63 | else { func_h_failed.push_back(#func); } 64 | #if ENABLE_HIP 65 | #include "hip_libs.inc" 66 | #endif 67 | #undef IMPORT 68 | int sum = 0; 69 | for (auto& lu : libs_used) 70 | { 71 | //LOG("Loaded dynamic library {} for {} functions\n", lu.first, lu.second.size()); 72 | sum += lu.second.size(); 73 | } 74 | LOG("Found {} functions, {} failed\n", sum, func_c_failed.size() + func_h_failed.size()); 75 | #if ENABLE_CUDA 76 | if (func_c.size() > 0 && func_c_failed.size() > 0) 77 | { 78 | LOG("Some CUDA functions are lost: {}\n", func_c_failed); 79 | } 80 | if (func_c.size() == 0) 81 | { 82 | LOG("No CUDA libraries!\n"); 83 | } 84 | #endif 85 | #if ENABLE_HIP 86 | if (func_h.size() > 0 && func_h_failed.size() > 0) 87 | { 88 | LOG("Some HIP functions are lost: {}\n", func_h_failed); 89 | } 90 | if (func_h.size() == 0) 91 | { 92 | LOG("No HIP libraries!\n"); 93 | } 94 | #endif 95 | } 96 | 97 | private: 98 | std::vector libs = { 99 | "cudart64_12", 100 | "cudart64_110", 101 | "cudart64_100", 102 | "cublas64_12", 103 | "cublas64_11", 104 | "cublas64_100", 105 | "cudnn_graph64_9", 106 | "cudnn_adv64_9", 107 | "cudnn_cnn64_9", 108 | "cudnn_ops64_9", 109 | "cudnn_adv_infer64_8", 110 | "cudnn_adv_train64_8", 111 | "cudnn_cnn_infer64_8", 112 | "cudnn_cnn_train64_8", 113 | "cudnn_ops_infer64_8", 114 | "cudnn_ops_train64_8", 115 | "cudnn64_7", 116 | "amdhip64_6", //rocm 6.x: amdhip64_6.dll, before rocm 5.x: amdhip64.dll 117 | "rocblas", 118 | "miopen", 119 | "cccc-hip", 120 | }; 121 | std::map> libs_used; 122 | std::vector func_c, func_h, func_c_failed, func_h_failed; 123 | }; 124 | 125 | void find_gpu_functions() 126 | { 127 | static gpu_assist_class_t gpu_assist_class; 128 | } 129 | 130 | #else 131 | void find_gpu_functions() 132 | { 133 | } 134 | #endif 135 | } //namespace cccc 136 | -------------------------------------------------------------------------------- /cccc/gpu_lib.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ENABLE_CUDA 4 | #define ENABLE_CUDA 1 5 | #endif 6 | #ifndef ENABLE_HIP 7 | #ifdef _WIN32 8 | #define ENABLE_HIP 1 9 | #else 10 | #define ENABLE_HIP 0 11 | #endif 12 | #endif 13 | 14 | #if ENABLE_CUDA 15 | #include "cuda_runtime_api.h" 16 | #define __CUDA_RUNTIME_H__ 17 | #include "cublas_v2.h" 18 | #include "cuda_functions.h" 19 | #include "cudnn.h" 20 | #endif 21 | 22 | #if (ENABLE_CUDA) && (ENABLE_HIP) 23 | #include "hip_runtime_api_part.h" 24 | #endif 25 | 26 | #if (ENABLE_CUDA) && !(ENABLE_HIP) 27 | #include "hip_runtime_type_part.h" 28 | #include "hip_type_fake.h" 29 | #endif 30 | 31 | #if !(ENABLE_CUDA) && (ENABLE_HIP) 32 | #define __HIP_PLATFORM_AMD__ 33 | #define __HIP_DISABLE_CPP_FUNCTIONS__ 34 | #include "cuda_type_fake.h" 35 | #include "hip/hip_runtime_api.h" 36 | #endif 37 | 38 | #if ENABLE_HIP 39 | #include "hip_functions.h" 40 | #include "miopen/miopen.h" 41 | #include "rocblas/rocblas.h" 42 | #endif 43 | 44 | #ifndef _DEBUG 45 | #ifndef AUTO_LOAD_GPU_FUNCTIONS 46 | #define AUTO_LOAD_GPU_FUNCTIONS 47 | #endif 48 | #endif 49 | 50 | namespace cccc 51 | { 52 | #if defined(_WIN32) && defined(AUTO_LOAD_GPU_FUNCTIONS) 53 | #define IMPORT(func, ...) \ 54 | using func##_t = decltype(&func); \ 55 | extern func##_t func; 56 | #define UNIMPORT_OR_BLANKTEMP(func) 57 | #if ENABLE_CUDA 58 | #include "cuda_libs.inc" 59 | #endif 60 | #if ENABLE_HIP 61 | #include "hip_libs.inc" 62 | #endif 63 | #undef IMPORT 64 | #undef UNIMPORT_OR_BLANKTEMP 65 | #endif 66 | 67 | #define IMPORT(func, ...) \ 68 | template \ 69 | inline int func(Args&&... args) { return 0; } 70 | #define UNIMPORT_OR_BLANKTEMP(func) IMPORT(func) 71 | #if !ENABLE_CUDA 72 | #include "cuda_libs.inc" 73 | #endif 74 | #if !ENABLE_HIP 75 | #include "hip_libs.inc" 76 | #endif 77 | #undef IMPORT 78 | #undef UNIMPORT_OR_BLANKTEMP 79 | 80 | }; //namespace cccc 81 | 82 | //After defined of the function ptrs 83 | #include "cublas_real.h" 84 | #include "rocblas_real.h" 85 | 86 | #include "types.h" 87 | 88 | namespace cccc 89 | { 90 | 91 | inline cudnnDataType_t toCudnnDataType(DataType dt) 92 | { 93 | const cudnnDataType_t t[] = { CUDNN_DATA_FLOAT, CUDNN_DATA_DOUBLE, CUDNN_DATA_HALF }; 94 | return t[int(dt)]; 95 | } 96 | 97 | inline miopenDataType_t toMiopenDataType(DataType dt) 98 | { 99 | const miopenDataType_t t[] = { miopenFloat, miopenDouble, miopenHalf }; 100 | return t[int(dt)]; 101 | } 102 | 103 | #if ENABLE_CUDA 104 | #define CUDNN_DESC(T) \ 105 | class cudnn##T##Desc \ 106 | { \ 107 | private: \ 108 | cudnn##T##Descriptor_t desc_; \ 109 | \ 110 | public: \ 111 | cudnn##T##Desc() { cudnnCreate##T##Descriptor(&desc_); } \ 112 | ~cudnn##T##Desc() { cudnnDestroy##T##Descriptor(desc_); } \ 113 | cudnn##T##Descriptor_t operator()() { return (cudnn##T##Descriptor_t)desc_; } \ 114 | cudnn##T##Desc(const cudnn##T##Desc&) = delete; \ 115 | cudnn##T##Desc& operator=(const cudnn##T##Desc&) = delete; \ 116 | }; 117 | #endif 118 | #define CUDNN_DESC2(T) \ 119 | class cudnn##T##Desc \ 120 | { \ 121 | private: \ 122 | int desc_[64]{}; \ 123 | \ 124 | public: \ 125 | cudnn##T##Descriptor_t operator()() { return (cudnn##T##Descriptor_t)desc_; } \ 126 | }; 127 | 128 | class cudnnConvolutionDesc 129 | { 130 | private: 131 | int desc_[64]{}; 132 | 133 | public: 134 | cudnnConvolutionDesc() 135 | { 136 | desc_[23] = 1; //此值从cudnn的create结果推断得到,原理不负责 137 | } 138 | 139 | cudnnConvolutionDescriptor_t operator()() { return (cudnnConvolutionDescriptor_t)desc_; } 140 | }; 141 | CUDNN_DESC2(Tensor) 142 | CUDNN_DESC2(Filter) 143 | CUDNN_DESC2(Pooling) 144 | CUDNN_DESC2(Activation) 145 | CUDNN_DESC2(Dropout) 146 | CUDNN_DESC2(OpTensor) 147 | CUDNN_DESC2(LRN) 148 | CUDNN_DESC2(SpatialTransformer) 149 | CUDNN_DESC2(CTCLoss) 150 | 151 | //CUDNN_DESC(Convolution); 152 | #if ENABLE_HIP 153 | #define MIOPEN_DESC(T) \ 154 | class miopen##T##Desc \ 155 | { \ 156 | private: \ 157 | miopen##T##Descriptor_t desc_; \ 158 | \ 159 | public: \ 160 | miopen##T##Desc() { miopenCreate##T##Descriptor(&desc_); } \ 161 | ~miopen##T##Desc() { miopenDestroy##T##Descriptor(desc_); } \ 162 | miopen##T##Descriptor_t operator()() { return (cudnn##T##Descriptor_t)desc_; } \ 163 | miopen##T##Desc(const miopen##T##Desc&) = delete; \ 164 | miopen##T##Desc& operator=(const miopen##T##Desc&) = delete; \ 165 | }; 166 | #endif 167 | #define MIOPEN_DESC2(T) \ 168 | class miopen##T##Desc \ 169 | { \ 170 | private: \ 171 | int desc_[64]{}; \ 172 | \ 173 | public: \ 174 | miopen##T##Descriptor_t operator()() { return (miopen##T##Descriptor_t)desc_; } \ 175 | }; 176 | 177 | MIOPEN_DESC2(Tensor) 178 | MIOPEN_DESC2(Convolution) 179 | MIOPEN_DESC2(Pooling) 180 | MIOPEN_DESC2(Activation) 181 | MIOPEN_DESC2(Dropout) 182 | MIOPEN_DESC2(RNN) 183 | MIOPEN_DESC2(LRN) 184 | 185 | void find_gpu_functions(); 186 | 187 | } //namespace cccc -------------------------------------------------------------------------------- /cccc/hip_libs.inc: -------------------------------------------------------------------------------- 1 | IMPORT(rocblas_create_handle) 2 | IMPORT(rocblas_destroy_handle) 3 | IMPORT(rocblas_isamax) 4 | IMPORT(rocblas_sasum) 5 | IMPORT(rocblas_sdot) 6 | IMPORT(rocblas_sgemm) 7 | IMPORT(rocblas_sgemv) 8 | IMPORT(rocblas_sscal) 9 | IMPORT(rocblas_sgeam) 10 | 11 | IMPORT(hipMemset) 12 | IMPORT(hipGetDeviceProperties) 13 | IMPORT(hipMalloc) 14 | IMPORT(hipSetDevice) 15 | IMPORT(hipDeviceGetPCIBusId) 16 | IMPORT(hipMemGetInfo) 17 | IMPORT(hipFree) 18 | IMPORT(hipRuntimeGetVersion) 19 | IMPORT(hipGetDeviceCount) 20 | IMPORT(hipDriverGetVersion) 21 | IMPORT(hipMemcpy) 22 | 23 | IMPORT(hip_addnumber) 24 | IMPORT(hip_ada_delta_update) 25 | IMPORT(hip_adam_update) 26 | IMPORT(hip_addbias) 27 | IMPORT(hip_pow) 28 | IMPORT(hip_div) 29 | IMPORT(hip_sign) 30 | IMPORT(hip_reciprocal) 31 | IMPORT(hip_cross_entropy) 32 | IMPORT(hip_cross_entropy2) 33 | 34 | IMPORT(miopenActivationBackward) 35 | IMPORT(miopenActivationForward) 36 | IMPORT(miopenConvolutionBackwardBias) 37 | IMPORT(miopenConvolutionBackwardData) 38 | IMPORT(miopenConvolutionBackwardDataGetWorkSpaceSize) 39 | IMPORT(miopenConvolutionBackwardWeights) 40 | IMPORT(miopenConvolutionBackwardWeightsGetWorkSpaceSize) 41 | IMPORT(miopenConvolutionForward) 42 | IMPORT(miopenConvolutionForwardBias) 43 | IMPORT(miopenConvolutionForwardGetWorkSpaceSize) 44 | IMPORT(miopenCreate) 45 | IMPORT(miopenCreateActivationDescriptor) 46 | IMPORT(miopenCreateConvolutionDescriptor) 47 | IMPORT(miopenCreatePoolingDescriptor) 48 | IMPORT(miopenCreateTensorDescriptor) 49 | IMPORT(miopenDestroy) 50 | IMPORT(miopenDestroyActivationDescriptor) 51 | IMPORT(miopenDestroyConvolutionDescriptor) 52 | IMPORT(miopenDestroyPoolingDescriptor) 53 | IMPORT(miopenDestroyTensorDescriptor) 54 | IMPORT(miopenFindConvolutionBackwardDataAlgorithm) 55 | IMPORT(miopenFindConvolutionBackwardWeightsAlgorithm) 56 | IMPORT(miopenFindConvolutionForwardAlgorithm) 57 | IMPORT(miopenGet4dTensorDescriptor) 58 | IMPORT(miopenGetErrorString) 59 | IMPORT(miopenInitConvolutionDescriptor) 60 | IMPORT(miopenInitConvolutionNdDescriptor) 61 | IMPORT(miopenOpTensor) 62 | IMPORT(miopenPoolingBackward) 63 | IMPORT(miopenPoolingForward) 64 | IMPORT(miopenPoolingGetWorkSpaceSize) 65 | IMPORT(miopenSet2dPoolingDescriptor) 66 | IMPORT(miopenSet4dTensorDescriptor) 67 | IMPORT(miopenSetActivationDescriptor) 68 | IMPORT(miopenSetTensorDescriptor) 69 | IMPORT(miopenSoftmaxBackward_V2) 70 | IMPORT(miopenSoftmaxForward_V2) 71 | 72 | 73 | -------------------------------------------------------------------------------- /cccc/math_supplement.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cccc 5 | { 6 | template 7 | T linear_inter(int x, int x1, int x2, T y1, T y2) 8 | { 9 | T y = y1 + (y2 - y1) / (x2 - x1) * (x - x1); 10 | return y; 11 | } 12 | 13 | template 14 | T scale_inter(int x, int x1, int x2, T y1, T y2) 15 | { 16 | T p = pow(y2 / y1, 1.0 * (x - x1) / (x2 - x1)); 17 | T y = y1 * p; 18 | return y; 19 | } 20 | 21 | template 22 | T clip(T x, T y1, T y2) 23 | { 24 | if (x < y1) { return y1; } 25 | if (x > y2) { return y2; } 26 | return x; 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /cccc/predict.cpp: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /cccc/resource.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc/resource.h -------------------------------------------------------------------------------- /cccc/types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dll_export.h" 4 | #include 5 | #include 6 | #include 7 | 8 | #define VAR_NAME(a) #a 9 | 10 | #define CCCC_NAMESPACE_BEGIN \ 11 | namespace cccc \ 12 | { 13 | #define CCCC_NAMESPACE_END \ 14 | } 15 | 16 | namespace cccc 17 | { 18 | using half = float; 19 | 20 | //数据类型,因常用于Matrix,为避免重载冲突,使用严格的枚举类型 21 | enum class DataType 22 | { 23 | FLOAT = 0, 24 | DOUBLE = 1, 25 | HALF = 2, 26 | CURRENT = 65535, 27 | }; 28 | 29 | //使用设备的类型,主要决定数据位置,同上使用严格的枚举类型 30 | enum class UnitType 31 | { 32 | CPU = 0, 33 | GPU, 34 | }; 35 | 36 | //激活函数种类 37 | //注意如果需要引用CUDNN中的值,必须要按顺序写 38 | enum ActiveFunctionType 39 | { 40 | ACTIVE_FUNCTION_NONE = -1, 41 | ACTIVE_FUNCTION_SIGMOID = 0, 42 | ACTIVE_FUNCTION_RELU = 1, 43 | ACTIVE_FUNCTION_TANH = 2, 44 | ACTIVE_FUNCTION_CLIPPED_RELU = 3, 45 | ACTIVE_FUNCTION_ELU = 4, //only GPU 46 | ACTIVE_FUNCTION_SOFTMAX, 47 | ACTIVE_FUNCTION_SOFTMAX_FAST, 48 | ACTIVE_FUNCTION_SOFTMAX_LOG, 49 | ACTIVE_FUNCTION_ABSMAX, 50 | ACTIVE_FUNCTION_DROPOUT, 51 | ACTIVE_FUNCTION_RECURRENT, 52 | ACTIVE_FUNCTION_SOFTPLUS, //only CPU 53 | ACTIVE_FUNCTION_LOCAL_RESPONSE_NORMALIZATION, 54 | ACTIVE_FUNCTION_LOCAL_CONSTRAST_NORMALIZATION, 55 | ACTIVE_FUNCTION_DIVISIVE_NORMALIZATION, 56 | ACTIVE_FUNCTION_BATCH_NORMALIZATION, 57 | ACTIVE_FUNCTION_SPATIAL_TRANSFORMER, 58 | ACTIVE_FUNCTION_SQUARE, 59 | ACTIVE_FUNCTION_SUMMAX, 60 | ACTIVE_FUNCTION_ZERO_CHANNEL, 61 | ACTIVE_FUNCTION_SIGMOID_CE, //CE为交叉熵,表示反向时误差原样回传,用于多出口网络,下同 62 | ACTIVE_FUNCTION_SOFTMAX_CE, 63 | ACTIVE_FUNCTION_SOFTMAX_FAST_CE, 64 | ACTIVE_FUNCTION_SIN, 65 | ACTIVE_FUNCTION_ZIGZAG, 66 | ACTIVE_FUNCTION_SIN_STEP, 67 | ACTIVE_FUNCTION_LEAKY_RELU, 68 | ACTIVE_FUNCTION_SELU, 69 | ACTIVE_FUNCTION_ABS, 70 | ACTIVE_FUNCTION_SIN_PLUS, 71 | ACTIVE_FUNCTION_SILU, 72 | ACTIVE_FUNCTION_SIGMOID3, //CE为交叉熵,表示反向时误差原样回传,用于多出口网络,下同 73 | ACTIVE_FUNCTION_SOFTMAX3, 74 | }; 75 | 76 | enum ActivePhaseType 77 | { 78 | ACTIVE_PHASE_TRAIN, //训练 79 | ACTIVE_PHASE_TEST, //测试 80 | ACTIVE_PHASE_ONLY_TEST, //仅测试,表示中间结果无需保留 81 | }; 82 | 83 | //池化种类,与cuDNN直接对应,可以类型转换 84 | enum PoolingType 85 | { 86 | POOLING_MAX = 0, 87 | POOLING_AVERAGE_PADDING = 1, 88 | POOLING_AVERAGE_NOPADDING = 2, 89 | POOLING_MA, //实验功能,正向max,反向average 90 | }; 91 | 92 | //是否反卷积 93 | enum PoolingReverseType 94 | { 95 | POOLING_NOT_REVERSE = 0, 96 | POOLING_REVERSE = 1, 97 | }; 98 | 99 | //合并种类 100 | enum CombineType 101 | { 102 | COMBINE_CONCAT, 103 | COMBINE_ADD, 104 | }; 105 | 106 | //代价函数种类 107 | enum CostFunctionType 108 | { 109 | COST_FUNCTION_RMSE, 110 | COST_FUNCTION_CROSS_ENTROPY, 111 | }; 112 | 113 | //for layer 114 | //隐藏,输入,输出 115 | enum LayerVisibleType 116 | { 117 | LAYER_VISIBLE_HIDDEN, 118 | LAYER_VISIBLE_IN, 119 | LAYER_VISIBLE_OUT, 120 | }; 121 | 122 | //连接类型 123 | enum LayerConnectionType 124 | { 125 | LAYER_CONNECTION_NONE, //无连接,用于输入层,不需要特殊设置 126 | LAYER_CONNECTION_FULLCONNECT, //全连接 127 | LAYER_CONNECTION_CONVOLUTION, //卷积 128 | LAYER_CONNECTION_POOLING, //池化 129 | LAYER_CONNECTION_DIRECT, //直连 130 | LAYER_CONNECTION_CORRELATION, //相关 131 | LAYER_CONNECTION_COMBINE, //合并 132 | LAYER_CONNECTION_EXTRACT, //抽取 133 | LAYER_CONNECTION_ROTATE_EIGEN, //旋转 134 | LAYER_CONNECTION_NORM2, //求出每组数据的模 135 | LAYER_CONNECTION_TRANSPOSE, //NCHW2NHWC 136 | LAYER_CONNECTION_NAC, //NAC 137 | }; 138 | 139 | //for net 140 | 141 | //初始化权重模式 142 | enum RandomFillType 143 | { 144 | RANDOM_FILL_CONSTANT, 145 | RANDOM_FILL_XAVIER, 146 | RANDOM_FILL_GAUSSIAN, 147 | RANDOM_FILL_MSRA, 148 | RANDOM_FILL_LECUN, 149 | }; 150 | 151 | //调整学习率模式 152 | enum AdjustLearnRateType 153 | { 154 | ADJUST_LEARN_RATE_FIXED, 155 | ADJUST_LEARN_RATE_SCALE_INTER, 156 | ADJUST_LEARN_RATE_LINEAR_INTER, 157 | ADJUST_LEARN_RATE_STEPS, 158 | ADJUST_LEARN_RATE_STEPS_WARM, 159 | ADJUST_LEARN_RATE_STEPS_AUTO, 160 | }; 161 | 162 | enum BatchNormalizationType 163 | { 164 | BATCH_NORMALIZATION_PER_ACTIVATION = 0, 165 | BATCH_NORMALIZATION_SPATIAL = 1, 166 | BATCH_NORMALIZATION_AUTO, 167 | }; 168 | 169 | enum RecurrentType 170 | { 171 | RECURRENT_RELU = 0, 172 | RECURRENT_TANH = 1, 173 | RECURRENT_LSTM = 2, 174 | RECURRENT_GRU = 3, 175 | }; 176 | 177 | enum RecurrentDirectionType 178 | { 179 | RECURRENT_DIRECTION_UNI = 0, 180 | RECURRENT_DIRECTION_BI = 1, 181 | }; 182 | 183 | enum RecurrentInputType 184 | { 185 | RECURRENT_INPUT_LINEAR = 0, 186 | RECURRENT_INPUT_SKIP = 1, 187 | }; 188 | 189 | enum RecurrentAlgoType 190 | { 191 | RECURRENT_ALGO_STANDARD = 0, 192 | RECURRENT_ALGO_PERSIST_STATIC = 1, 193 | RECURRENT_ALGO_PERSIST_DYNAMIC = 2, 194 | }; 195 | 196 | enum SolverType 197 | { 198 | SOLVER_SGD, 199 | SOLVER_NAG, 200 | SOLVER_ADA_DELTA, 201 | SOLVER_ADAM, 202 | SOLVER_RMS_PROP, 203 | }; 204 | 205 | enum WorkModeType 206 | { 207 | WORK_MODE_NORMAL, 208 | WORK_MODE_PRUNE, 209 | WORK_MODE_GAN, 210 | }; 211 | 212 | enum PruneType 213 | { 214 | PRUNE_ACTIVE, 215 | PRUNE_WEIGHT, 216 | }; 217 | 218 | struct TestInfo 219 | { 220 | double accuracy = 0; 221 | int64_t right = 0, total = 0; 222 | }; 223 | 224 | } // namespace cccc -------------------------------------------------------------------------------- /include/hip_type_fake.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | enum 4 | { 5 | miopenFloat, 6 | miopenDouble, 7 | miopenHalf, 8 | miopenPoolingMax, 9 | miopenPoolingAverage, 10 | miopenPoolingAverageInclusive, 11 | MIOPEN_SOFTMAX_ACCURATE, 12 | MIOPEN_SOFTMAX_FAST, 13 | MIOPEN_SOFTMAX_LOG, 14 | MIOPEN_SOFTMAX_MODE_INSTANCE, 15 | miopenActivationLOGISTIC, 16 | miopenActivationRELU, 17 | miopenActivationTANH, 18 | miopenConvolution, 19 | 20 | }; 21 | 22 | #define CUDNN_BN_MIN_EPSILON 1e-5 23 | 24 | struct Cheat 25 | { 26 | size_t memory; 27 | int algo, mathType; 28 | }; 29 | 30 | using hipError = int; 31 | using miopenActivationDescriptor_t = void*; 32 | using miopenActivationMode_t = int; 33 | using miopenBatchNormMode_t = int; 34 | using miopenConvolutionBwdDataAlgo_t = int; 35 | using miopenConvolutionBwdDataAlgoPerf_t = Cheat; 36 | using miopenConvolutionBwdFilterAlgo_t = int; 37 | using miopenConvolutionBwdFilterAlgoPerf_t = Cheat; 38 | using miopenConvolutionDescriptor_t = void*; 39 | using miopenConvolutionFwdAlgo_t = int; 40 | using miopenConvolutionFwdAlgo_t = int; 41 | using miopenConvolutionFwdAlgoPerf_t = Cheat; 42 | using miopenConvolutionFwdAlgoPerf_t = Cheat; 43 | using miopenDataType_t = int; 44 | using miopenDropoutDescriptor_t = void*; 45 | using miopenFilterDescriptor_t = void*; 46 | using miopenHandle_t = void*; 47 | using miopenLRNDescriptor_t = void*; 48 | using miopenMathType_t = int; 49 | using miopenOpTensorDescriptor_t = void*; 50 | using miopenPoolingDescriptor_t = void*; 51 | using miopenPoolingMode_t = int; 52 | using miopenRNNDescriptor_t = void*; 53 | using miopenSpatialTransformerDescriptor_t = void*; 54 | using miopenStatus_t = int; 55 | using miopenConvFwdAlgorithm_t = int; 56 | using miopenConvBwdDataAlgorithm_t = int; 57 | using miopenConvBwdWeightsAlgorithm_t = int; 58 | 59 | struct miopenTensorStruct 60 | { 61 | }; 62 | using miopenTensorDescriptor_t = miopenTensorStruct*; 63 | 64 | struct miopenConvAlgoPerf_t 65 | { 66 | union 67 | { 68 | miopenConvFwdAlgorithm_t fwd_algo; 69 | miopenConvBwdWeightsAlgorithm_t bwd_weights_algo; 70 | miopenConvBwdDataAlgorithm_t bwd_data_algo; 71 | }; 72 | float time; 73 | size_t memory; 74 | }; -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/logo.png -------------------------------------------------------------------------------- /work/mnist-lenet.ini: -------------------------------------------------------------------------------- 1 | [train] 2 | LoadFile = save/save-25.txt 3 | save_sign=MNIST 4 | LoadNet = 0 5 | 6 | Batch = 100 7 | WorkType = 0 8 | save_epoch=5 9 | 10 | test_test = 1 11 | test_train = 1 12 | test_train_origin=0 13 | test_test_origin=0 14 | 15 | Trainepochs = 10 16 | OutIter = 200 17 | Testepoch = 1 18 | 19 | LearnRateBase =1e-2 20 | weight_decay = 5e-4 21 | Momentum = 0.9 22 | 23 | lr_adjust_method = fixed 24 | lr_inter_set=300 1, 500 1 25 | lr_scale_epoch=5,50 26 | lr_scale_rate=1,0.01 27 | 28 | 29 | Testmax = 1 30 | USE_CUDA = 1 31 | 32 | solver=sgd 33 | #force_sgd_epoch=5 34 | 35 | generate=0 36 | diverse_beta=0.0 37 | output_log=1 38 | output_net=1 39 | 40 | mp=1 41 | 42 | cifa=1 43 | 44 | [data_preparer] 45 | library_dll=..\x64\Release\cccc-mnist.dll 46 | library_dlld=..\x64\Debug\cccc-mnist.dll 47 | library=cccc-mnist 48 | trans = 0 49 | type=1 50 | transpose = 0 51 | random_diff=1 52 | d_noise=0 53 | remove59=0 54 | flip=0 55 | transpose=0 56 | d_contrast=-0.2,0.2 57 | d_brightness=-0.2,0.2 58 | #fill_group=100 59 | 60 | [data_preparer2] 61 | library_dll=..\x64\Release\cccc-mnist.dll 62 | library_dlld=..\x64\Debug\cccc-mnist.dll 63 | library=cccc-mnist 64 | trans = 1 65 | #path=qmnist 66 | type=2 67 | transpose = 0 68 | remove59=0 69 | #fill_group=100 70 | 71 | [cifa] 72 | structure=" 73 | batch=100; 74 | act = active_relu; 75 | X = Matrix(28, 28, 1, batch); 76 | A1 = active(maxpool(conv(X, Matrix(5, 5, 1, 50)) + Matrix(1, 1, 50, 1), { 2, 2 }), act); 77 | A2 = active(maxpool(conv(A1, Matrix(5, 5, 50, 50)) + Matrix(1, 1, 50, 1), { 2, 2 }), act); 78 | A = sigmoid_ce(Matrix(10, 256) * active(Matrix(256, A2.row()) * A2 + Matrix(256, 1), act) + Matrix(10, 1)); 79 | setXY(X, A); 80 | //addLoss(crossEntropy(A, Y));" 81 | 82 | structure1= 83 | auto batch=100; 84 | auto act = active_relu; 85 | auto X = Matrix(28, 28, 1, batch); 86 | auto W3 = Matrix(256, X.row()); 87 | auto b3 = Matrix(256, 1); 88 | auto W4 = Matrix(10, 256); 89 | auto b4 = Matrix(10, 1); 90 | auto A = softmax_ce(W4 * active(W3 * X + b3,act) + b4); 91 | auto Y = Matrix(1 , 1, 10, batch); 92 | setXY(X, A); 93 | //addWeight(W3, b3, W4, b4); 94 | //addLoss(crossEntropy(A, Y)); 95 | --------------------------------------------------------------------------------