├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── README.md
├── cccc-cuda
├── CMakeLists.txt
├── cccc-cuda.vcxproj
├── cccc-cuda.vcxproj.user
├── cuda_functions.cu
└── cuda_functions.h
├── cccc-hip
├── CMakeLists.txt
├── cccc-hip.vcxproj
├── cccc-hip.vcxproj.filters
├── cccc-hip.vcxproj.user
├── hip_functions.h
└── hip_functions.hip
├── cccc-lite.sln
├── cccc-mnist
├── CMakeLists.txt
├── Cifar10Reader.cpp
├── Cifar10Reader.h
├── DataPreparerMnist.cpp
├── DataPreparerMnist.h
├── MnistReader.cpp
├── MnistReader.h
├── build.sh
├── cccc-mnist.vcxproj
├── cccc-mnist.vcxproj.filters
├── cccc-mnist.vcxproj.user
├── cccc_extension.h
└── extension.cpp
├── cccc-windows
├── Application.cpp
├── Application.h
├── CMakeLists.txt
├── cccc-windows.vcxproj
├── cccc-windows.vcxproj.filters
├── cccc-windows.vcxproj.user
└── windows.cpp
├── cccc
├── Activer.cpp
├── Activer.h
├── AdditionalCost.cpp
├── AdditionalCost.h
├── CMakeLists.txt
├── DataPreparer.cpp
├── DataPreparer.h
├── DataPreparerFactory.cpp
├── DataPreparerFactory.h
├── DataPreparerImage.cpp
├── DataPreparerImage.h
├── DataPreparerTxt.cpp
├── DataPreparerTxt.h
├── GpuControl.cpp
├── GpuControl.h
├── Layer.cpp
├── Layer.h
├── Log.cpp
├── Log.h
├── MainProcess.cpp
├── MainProcess.h
├── Matrix.cpp
├── Matrix.h
├── MatrixData.cpp
├── MatrixData.h
├── MatrixEx.cpp
├── MatrixEx.h
├── MatrixOp.cpp
├── MatrixOp.h
├── Net.cpp
├── Net.h
├── NetCifa.cpp
├── NetCifa.h
├── NetLayer.cpp
├── NetLayer.h
├── Option.cpp
├── Option.h
├── Solver.cpp
├── Solver.h
├── TensorDesc.cpp
├── TensorDesc.h
├── VectorMath.h
├── blas_types.h
├── cblas_real.h
├── cccc.vcxproj
├── cccc.vcxproj.filters
├── cccc.vcxproj.user
├── cublas_real.h
├── cuda_half_hack.h
├── cuda_libs.inc
├── dll_export.h
├── gpu_lib.cpp
├── gpu_lib.h
├── hip_libs.inc
├── math_supplement.h
├── predict.cpp
├── resource.h
├── rocblas_real.h
└── types.h
├── include
├── cblas.h
├── cuda
│ └── cudnn2.h
├── cuda_driver_types.h
├── cuda_type_fake.h
├── hip_runtime_api_part.h
├── hip_runtime_type_part.h
└── hip_type_fake.h
├── logo.png
└── work
└── mnist-lenet.ini
/.gitignore:
--------------------------------------------------------------------------------
1 | *.bin
2 | *.bmp
3 | *.d
4 | *.db
5 | *.deps
6 | *.diagsession
7 | *.dll
8 | *.enc
9 | *.exe
10 | *.exp
11 | *.html
12 | *.idb
13 | *.ilk
14 | *.iobj
15 | *.ipdb
16 | *.ipynb
17 | *.lastcodeanalysissucceeded
18 | *.log
19 | *.o
20 | *.obj
21 | *.opendb
22 | *.pdb
23 | *.pdf
24 | *.pyc
25 | *.pyd
26 | *.res
27 | *.sdf
28 | *.tlog
29 | *.vspx
30 | *.xml
31 | *.zip
32 | .vs/
33 | .vscode/
34 | lib/x64/
35 | work/*.txt
36 | work/openblas_haswell_lib/
37 | work/save/
38 | work/*.py
39 | x64/
40 | *ubyte
41 | will-python/will.py
42 | will-python/will_wrap.cxx
43 | .vs/
44 | build/
45 | *.so
46 | *.gz
47 | will-windows/x64/
48 | will/x64/
49 | will-cuda/x64/
50 | will-mnist/x64/
51 | will-image/x64/
52 | will-predict/x64/
53 | will-python/x64/
54 | will-test/x64/
55 | *.onnx
56 | *.pb
57 | *.json
58 | *.aps
59 | *.bak
60 | cccc-hip/Debug/
61 | cccc-hip/Release/
62 | *.patch
63 | *.bat
64 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/.gitmodules
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(libcccc)
2 | cmake_minimum_required(VERSION 2.6)
3 |
4 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
5 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
6 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
7 |
8 | option(CCCC_CPU "CPU support only" OFF)
9 |
10 | if(NOT CCCC_CPU)
11 | add_subdirectory(cccc-cuda)
12 | else()
13 | add_definitions(-D NO_CUDA=1)
14 | endif()
15 | add_subdirectory(cccc)
16 | add_subdirectory(cccc-windows)
17 | add_subdirectory(cccc-mnist)
18 |
19 | if(WITH_PYTHON)
20 | add_subdirectory(cccc-python)
21 | endif()
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CCCC
2 |
3 |
4 | cccc为libwill的核心部分,负责神经网络的训练与推理。
5 |
6 | 英文代号“will”意为“心愿”。
7 |
8 | ## 简介
9 |
10 | cccc是同时支持第一代基于层和第二代基于操作的神经网络工具包,但是安装和使用远比同类工具简单。
11 |
12 | cccc正式支持Windows,且以Windows为主要开发平台。可以在不需要nccl的情况下进行并行。
13 |
14 | cccc的设计同时支持动态图和静态图。
15 |
16 | cccc同时支持N卡和A卡,无需为不同的显卡平台编译两个版本。甚至可以在同一电脑上安装两种显卡并行计算(并不是建议你这样做)。
17 |
18 | cccc的功能并不及其他的开源平台,其特色是简单的配置和单机下的速度,请酌情使用。
19 |
20 | ## 编译说明
21 |
22 | ### Windows下编译
23 |
24 | - 任何支持C++20以上的Visual Studio和可以相互配合的CUDA均可以使用,建议使用较新的版本。
25 | - 下载cuDNN的开发包,将h文件,lib文件和dll文件复制到cuda工具箱目录中的include,lib/x64和bin目录。或者复制到自己指定的某个目录也可以,但是可能需要自己设置环境变量。
26 | - 检查环境变量CUDA_PATH的值,通常应该是“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vxx.x”(后面的数字为版本号,有可能不同)。
27 | - 在cccc-cuda.vcproj文件中,有两处涉及cuda版本号,类似“CUDA xx.x.targets/props”,需按照自己的实际情况手动修改。
28 | - 需要安装线性代数库OpenBLAS(推荐使用vcpkg或者msys2),将其库文件置于链接器可以找到的位置。
29 | - 如需支持AMD的显卡,则应下载AMD的相关开发包并安装,同时检查gpu_lib.h中对应的宏。注意miopen仅有非官方编译的Windows版本,目前计算卷积的速度很慢。
30 | - 查看gpu_lib.h开头的ENABLE_CUDA和ENABLE_HIP的设置,修改此处或配置中的预处理部分打开或关闭对应的平台。
31 | - 一些dll文件默认情况并不在PATH环境变量中,应手动复制到work目录或者PATH环境变量中的目录,包括openblas.dll等。
32 | - 下载MNIST的文件解压后,放入work/mnist目录,文件名应为:t10k-images.idx3-ubyte,t10k-labels.idx1-ubyte,train-images.idx3-ubyte,train-labels.idx1-ubyte。某些解压软件可能解压后中间的.会变成-,请自行修改。
33 | - 编译Visual Studio工程,如只需要核心功能,请仅编译cccc-windows。执行以下命令测试效果,正常情况下准确率应在99%以上。
34 | ```shell
35 | cccc-windows -c mnist-lenet.ini
36 | ```
37 | - 也可以使用FashionMNIST来测试,通过mnist_path选项可以设置不同的文件所在目录。通常测试集上准确率可以到91%左右。
38 |
39 | ### Linux下编译
40 |
41 | #### x86_64
42 | - 请自行安装和配置CUDA,HIP(如需要)和OpenBLAS,尽量使用系统提供的包管理器自动安装的版本。随安装的版本不同,有可能需要修改cmake文件。
43 | - CUDA的默认安装文件夹应该是/usr/local/cuda,但是一些Linux发行版可能会安装至其他目录,这时需要修改CMakeLists.txt中的包含目录和链接目录。
44 | - 下载cuDNN,放到/usr/local/cuda,注意lib的目录有可能含有64。
45 | - 在neural目录下执行```cmake .```生成Makefile。
46 | - ```make```编译,可以加上-j加快速度。
47 | - 生成的可执行文件在bin文件夹。
48 | - 推荐自行建立一个专用于编译的目录,例如:
49 | ```shell
50 | mkdir build
51 | cd build
52 | cmake ..
53 | make
54 | ```
55 | - 在work目录下有一个范例脚本,可以用来直接编译出所有的组件,建议参考。
56 | - 扩展需要单独编译。
57 |
58 | ### mlcc
59 |
60 | 本工程依赖作者编写的一个公共的功能库,请从获取最新版本,并将其置于与本目录(cccc-lite)同级的路径下。
61 |
62 | ### logo
63 |
64 | logo由Dr. Cheng ZD设计。
65 |
66 | 旧版logo由Mr. Zou YB设计。
67 |
68 | ### 关于lite版
69 |
70 | lite版只支持几个基本的激活函数和卷积、池化等基本连接。
71 |
72 | 仅能使用一张显卡训练。
73 |
74 | 不支持半精度。
75 |
--------------------------------------------------------------------------------
/cccc-cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(cccc-cuda)
2 | cmake_minimum_required(VERSION 2.6)
3 | find_package(CUDA QUIET REQUIRED)
4 |
5 | set(CUDA_NVCC_FLAGS "-O3 --generate-code arch=compute_75,code=sm_75 --generate-code arch=compute_61,code=sm_61")
6 | if (CMAKE_COMPILER_IS_GNUCC)
7 | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion
8 | OUTPUT_VARIABLE GCC_VERSION)
9 | string(REGEX MATCHALL "[0-9]+" GCC_VERSION_COMPONENTS ${GCC_VERSION})
10 | #list(GET GCC_VERSION_COMPONENTS 0 GCC_MAJOR)
11 | #list(GET GCC_VERSION_COMPONENTS 1 GCC_MINOR)
12 | endif()
13 |
14 | if(GCC_VERSION VERSION_GREATER 10)
15 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " -ccbin gcc-10")
16 | endif()
17 |
18 | SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/../lib)
19 | cuda_add_library(cccc-cuda SHARED cuda_functions.cu)
20 |
21 | install(TARGETS cccc-cuda LIBRARY DESTINATION lib)
22 |
--------------------------------------------------------------------------------
/cccc-cuda/cccc-cuda.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Release
10 | x64
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}
24 | will_cuda
25 |
26 |
27 |
28 | StaticLibrary
29 | true
30 | MultiByte
31 | v143
32 |
33 |
34 | StaticLibrary
35 | false
36 | true
37 | MultiByte
38 | v143
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | true
53 |
54 |
55 |
56 | Level3
57 | Disabled
58 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
59 |
60 |
61 | true
62 | Console
63 | %(AdditionalDependencies)
64 |
65 |
66 |
67 |
68 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
69 |
70 |
71 | 64
72 | compute_89,sm_89;compute_86,sm_86;compute_75,sm_75;compute_61,sm_61
73 |
74 |
75 | Static
76 |
77 |
78 |
79 |
80 | Level3
81 | MaxSpeed
82 | true
83 | true
84 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
85 |
86 |
87 | true
88 | true
89 | true
90 | Console
91 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
92 |
93 |
94 |
95 |
96 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
97 |
98 |
99 | 64
100 |
101 |
102 | compute_89,sm_89;compute_86,sm_86;compute_75,sm_75;compute_61,sm_61
103 |
104 |
105 | cudart_static.lib
106 | $(CUDA_PATH)/lib/x64
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/cccc-cuda/cccc-cuda.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/cccc-cuda/cuda_functions.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef C_EXPORT
4 | #ifdef _WIN32
5 | #define C_EXPORT extern "C" __declspec(dllexport)
6 | #else
7 | #define C_EXPORT extern "C"
8 | #endif
9 | #endif
10 |
11 | C_EXPORT int cuda_half2float(void* p1, void* p2, unsigned int size);
12 |
13 | C_EXPORT int cuda_reciprocal(int type, void* A, void* B, unsigned int size, float scale, float epsilon);
14 | C_EXPORT int cuda_addnumber(int type, void* A, void* R, unsigned int size, float number, float scale);
15 | C_EXPORT int cuda_pow(int type, void* A, void* R, unsigned int size, float bias, float a2);
16 | C_EXPORT int cuda_sparse(int type, void* p1, void* p2, unsigned int size, float rou, float beta);
17 | C_EXPORT int cuda_sign(int type, void* A, void* R, unsigned int size, float v, float section);
18 | C_EXPORT int cuda_cross_entropy(int type, void* A, void* B, void* R, unsigned int size, float a, float scale);
19 | C_EXPORT int cuda_cross_entropy2(int type, void* A, void* B, void* R, unsigned int size, float a, float scale);
20 |
21 | C_EXPORT int cuda_div(int type, void* A, void* B, void* R, unsigned int size, float a, float b, float scale);
22 | C_EXPORT int cuda_sectionlimit(int type, void* p1, void* p2, void* p3, unsigned int size, float v0, float v1);
23 | C_EXPORT int cuda_ada_delta_update(int type, void* mean_d2, void* mean_ada_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon);
24 | C_EXPORT int cuda_adam_update(int type, void* mean_d, void* mean_d2, void* d, void* ada_d, unsigned int size, float beta1, float beta2, float epsilon, float t);
25 | C_EXPORT int cuda_rms_prop_update(int type, void* mead_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon);
26 |
27 | C_EXPORT int cuda_sin(int type, void* A, void* R, unsigned int size, float a, float b);
28 | C_EXPORT int cuda_cos(int type, void* A, void* R, unsigned int size, float a, float b);
29 |
30 | C_EXPORT int cuda_zigzag(int type, void* A, void* R, unsigned int size, float a1, float a2);
31 | C_EXPORT int cuda_zigzagb(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float a1, float a2);
32 |
33 | C_EXPORT int cuda_step(int type, void* A, void* R, unsigned int size, float unuse1, float unuse2);
34 |
35 | C_EXPORT int cuda_leaky_relu(int type, void* A, void* R, unsigned int size, float leak, float a2, float a3);
36 | C_EXPORT int cuda_leaky_relub(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float leak, float a2, float a3);
37 |
38 | C_EXPORT int cuda_max(int type, void* A, void* B, void* R, unsigned int size, float unuse1, float unuse2, float unuse3);
39 | C_EXPORT int cuda_maxb(int type, void* A, void* dA, void* B, void* dB, void* R, void* dR, unsigned int size, float alpha, float beta_a, float beta_b);
40 |
--------------------------------------------------------------------------------
/cccc-hip/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(cccc-hip)
2 | cmake_minimum_required(VERSION 2.6)
3 |
4 |
5 | set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
6 |
7 | enable_language(${GPU_RUNTIME})
8 | set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
9 | set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
10 | set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
11 |
12 | if(WIN32)
13 | set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation")
14 | else()
15 | set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
16 | endif()
17 |
18 | list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}")
19 |
20 | add_library(cccc-hip SHARED hip_functions.hip)
21 | # Make example runnable using ctest
22 | set(include_dirs "../../Common")
23 | if(GPU_RUNTIME STREQUAL "CUDA")
24 | list(APPEND include_dirs "${ROCM_ROOT}/include")
25 | endif()
26 |
27 | #target_include_directories(${project_name} PRIVATE ${include_dirs})
28 | set_source_files_properties(hip_functions.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
29 |
30 | install(TARGETS cccc-hip)
31 |
32 |
33 |
--------------------------------------------------------------------------------
/cccc-hip/cccc-hip.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Release
10 | x64
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | Document
19 |
20 |
21 |
22 | 17.0
23 | 6.1
24 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}
25 | Win32Proj
26 | cccc-hip
27 | 10.0
28 |
29 |
30 |
31 | DynamicLibrary
32 | true
33 | HIP clang 6.1
34 | Unicode
35 |
36 |
37 | DynamicLibrary
38 | false
39 | HIP clang 6.1
40 | Unicode
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | true
58 | $(SolutionDir)$(Platform)\$(Configuration)\
59 |
60 |
61 | false
62 | $(SolutionDir)$(Platform)\$(Configuration)\
63 |
64 |
65 |
66 | Level1
67 | __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
68 |
69 |
70 | Console
71 | true
72 |
73 |
74 |
75 |
76 | __CUDACC__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
77 |
78 |
79 |
80 |
81 | true
82 |
83 |
84 |
85 |
86 | Level2
87 | true
88 | __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
89 | stdcpp20
90 |
91 |
92 | Console
93 | true
94 | true
95 | Enabled
96 |
97 |
98 |
99 |
100 | true
101 | __CUDACC__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
102 |
103 |
104 | UseLinkTimeCodeGeneration
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/cccc-hip/cccc-hip.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {332db0f2-c13e-4842-8592-6345ed4d8674}
6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
7 |
8 |
9 | {9cc8b9fc-1c76-4eba-abd5-c65784a2acd8}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
11 |
12 |
13 | {db4d8471-8ce1-4d38-b219-69467c4c6d52}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Header Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 |
--------------------------------------------------------------------------------
/cccc-hip/cccc-hip.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/cccc-hip/hip_functions.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef C_EXPORT
4 | #ifdef _WIN32
5 | #define C_EXPORT extern "C" __declspec(dllexport)
6 | #else
7 | #define C_EXPORT extern "C"
8 | #endif
9 | #endif
10 |
11 | C_EXPORT int hip_half2float(void* p1, void* p2, unsigned int size);
12 |
13 | C_EXPORT int hip_reciprocal(int type, void* A, void* B, unsigned int size, float scale, float epsilon);
14 | C_EXPORT int hip_addnumber(int type, void* A, void* R, unsigned int size, float number, float scale);
15 | C_EXPORT int hip_pow(int type, void* A, void* R, unsigned int size, float bias, float a2);
16 | C_EXPORT int hip_sparse(int type, void* p1, void* p2, unsigned int size, float rou, float beta);
17 | C_EXPORT int hip_sign(int type, void* A, void* R, unsigned int size, float v, float section);
18 | C_EXPORT int hip_cross_entropy(int type, void* A, void* B, void* R, unsigned int size, float a, float scale);
19 | C_EXPORT int hip_cross_entropy2(int type, void* A, void* B, void* R, unsigned int size, float a, float scale);
20 |
21 | C_EXPORT int hip_div(int type, void* A, void* B, void* R, unsigned int size, float a, float b, float scale);
22 | C_EXPORT int hip_sectionlimit(int type, void* p1, void* p2, void* p3, unsigned int size, float v0, float v1);
23 | C_EXPORT int hip_ada_delta_update(int type, void* mean_d2, void* mean_ada_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon);
24 | C_EXPORT int hip_adam_update(int type, void* mean_d, void* mean_d2, void* d, void* ada_d, unsigned int size, float beta1, float beta2, float epsilon, float t);
25 | C_EXPORT int hip_rms_prop_update(int type, void* mead_d2, void* d, void* ada_d, unsigned int size, float rou, float epsilon);
26 |
27 | C_EXPORT int hip_sin(int type, void* A, void* R, unsigned int size, float a, float b);
28 | C_EXPORT int hip_cos(int type, void* A, void* R, unsigned int size, float a, float b);
29 |
30 | C_EXPORT int hip_zigzag(int type, void* A, void* R, unsigned int size, float a1, float a2);
31 | C_EXPORT int hip_zigzagb(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float a1, float a2);
32 |
33 | C_EXPORT int hip_step(int type, void* A, void* R, unsigned int size, float unuse1, float unuse2);
34 |
35 | C_EXPORT int hip_leaky_relu(int type, void* A, void* R, unsigned int size, float leak, float a2, float a3);
36 | C_EXPORT int hip_leaky_relub(int type, void* A, void* dA, void* R, void* dR, unsigned int size, float leak, float a2, float a3);
37 |
38 | C_EXPORT int hip_max(int type, void* A, void* B, void* R, unsigned int size, float unuse1, float unuse2, float unuse3);
39 | C_EXPORT int hip_maxb(int type, void* A, void* dA, void* B, void* dB, void* R, void* dR, unsigned int size, float alpha, float beta_a, float beta_b);
40 |
41 | C_EXPORT int hip_addbias(float* m, float* b, float* r, unsigned int size_m, unsigned int size_mchannel, unsigned int size_b, float a1, float a2);
42 | C_EXPORT int hip_addbiasb(float* bd, float* rd, unsigned int size_m, unsigned int size_mchannel, unsigned int size_b, float a1, float a2);
43 | C_EXPORT int hip_softmax(float* x, float* y, unsigned int size, unsigned int channel, float a1, float a2);
44 |
45 | //only support 2D, square window, stride = window, no padding
46 | C_EXPORT int hip_pool(float* x, float* y, unsigned int w0, unsigned int h0, unsigned int c, unsigned n, unsigned int w1, unsigned int h1, unsigned int size_win, int type, float a1, float a2);
47 | C_EXPORT int hip_poolb(float* x, float* dx, float* y, float* dy, unsigned int w0, unsigned int h0, unsigned int c, unsigned n, unsigned int w1, unsigned int h1, unsigned int size_win, int type, float a1, float a2);
48 |
49 | C_EXPORT int hip_conv2d(float* x, float* w, float* y, int w0, int h0, int c0, int n, int w1, int h1, int c1, int winw, int winh, int stride, int padding, float a1, float a2);
50 | C_EXPORT int hip_conv2db_d(float* dx, float* w, float* dy, int w0, int h0, int c0, int n, int w1, int h1, int c1, int winw, int winh, int stride, int padding, float a1, float a2);
51 | C_EXPORT int hip_conv2db_w(float* x, float* dw, float* dy, int w0, int h0, int c0, int n, int w1, int h1, int c1, int winw, int winh, int stride, int padding, float a1, float a2);
52 |
--------------------------------------------------------------------------------
/cccc-lite.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.0.31903.59
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc", "cccc\cccc.vcxproj", "{271C3640-1916-4736-B07F-DABF0F3914DF}"
7 | ProjectSection(ProjectDependencies) = postProject
8 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0} = {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}
9 | EndProjectSection
10 | EndProject
11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-windows", "cccc-windows\cccc-windows.vcxproj", "{4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}"
12 | ProjectSection(ProjectDependencies) = postProject
13 | {271C3640-1916-4736-B07F-DABF0F3914DF} = {271C3640-1916-4736-B07F-DABF0F3914DF}
14 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE} = {A5C5A5AE-E816-4189-9518-23190FDAC9AE}
15 | EndProjectSection
16 | EndProject
17 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-cuda", "cccc-cuda\cccc-cuda.vcxproj", "{41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}"
18 | EndProject
19 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-mnist", "cccc-mnist\cccc-mnist.vcxproj", "{A5C5A5AE-E816-4189-9518-23190FDAC9AE}"
20 | ProjectSection(ProjectDependencies) = postProject
21 | {271C3640-1916-4736-B07F-DABF0F3914DF} = {271C3640-1916-4736-B07F-DABF0F3914DF}
22 | EndProjectSection
23 | EndProject
24 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cccc-hip", "cccc-hip\cccc-hip.vcxproj", "{0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}"
25 | EndProject
26 | Global
27 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
28 | Debug|x64 = Debug|x64
29 | Release|x64 = Release|x64
30 | EndGlobalSection
31 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
32 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Debug|x64.ActiveCfg = Debug|x64
33 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Debug|x64.Build.0 = Debug|x64
34 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Release|x64.ActiveCfg = Release|x64
35 | {271C3640-1916-4736-B07F-DABF0F3914DF}.Release|x64.Build.0 = Release|x64
36 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Debug|x64.ActiveCfg = Debug|x64
37 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Debug|x64.Build.0 = Debug|x64
38 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Release|x64.ActiveCfg = Release|x64
39 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}.Release|x64.Build.0 = Release|x64
40 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Debug|x64.ActiveCfg = Debug|x64
41 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Debug|x64.Build.0 = Debug|x64
42 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Release|x64.ActiveCfg = Release|x64
43 | {41A4761D-BA0D-47FF-83CA-F4445F7AEDE0}.Release|x64.Build.0 = Release|x64
44 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Debug|x64.ActiveCfg = Debug|x64
45 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Debug|x64.Build.0 = Debug|x64
46 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Release|x64.ActiveCfg = Release|x64
47 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}.Release|x64.Build.0 = Release|x64
48 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Debug|x64.ActiveCfg = Debug|x64
49 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Debug|x64.Build.0 = Debug|x64
50 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Release|x64.ActiveCfg = Release|x64
51 | {0C83E964-B8EF-4EB9-BEA5-8446F6A7A965}.Release|x64.Build.0 = Release|x64
52 | EndGlobalSection
53 | GlobalSection(SolutionProperties) = preSolution
54 | HideSolutionNode = FALSE
55 | EndGlobalSection
56 | GlobalSection(ExtensibilityGlobals) = postSolution
57 | SolutionGuid = {9BF9770D-70D3-4C2A-B800-4BA6FD0EF3B6}
58 | EndGlobalSection
59 | EndGlobal
60 |
--------------------------------------------------------------------------------
/cccc-mnist/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(cccc-mnist)
2 |
3 | set(CMAKE_VERBOSE_MAKEFILE on)
4 | set(CMAKE_CXX_COMPILER $ENV{CXX})
5 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -pthread -fopenmp")
6 |
7 | aux_source_directory(./ SRC_LIST)
8 |
9 | include_directories(../include /usr/include /usr/local/include /usr/local/cuda/include ../src ../../mlcc ../cccc-cuda ../cccc)
10 | link_directories(.)
11 |
12 | add_library(cccc-mnist SHARED ${SRC_LIST})
13 | target_link_libraries(cccc-mnist cccc)
14 |
15 |
--------------------------------------------------------------------------------
/cccc-mnist/Cifar10Reader.cpp:
--------------------------------------------------------------------------------
1 | #include "Cifar10Reader.h"
2 | #include "Log.h"
3 | #include "filefunc.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | Cifar10Reader::Cifar10Reader()
9 | {
10 | }
11 |
12 | static void readData(Matrix& X, Matrix& Y, const std::string& filename, int begin)
13 | {
14 | std::vector arr;
15 | filefunc::readFileToVector(filename, arr);
16 | int count = begin - 1;
17 | int k = 0;
18 | for (int i = 0; i < arr.size(); i++)
19 | {
20 | if (i % 3073 == 0)
21 | {
22 | count++;
23 | Y.setData(arr[i], count, 1);
24 | k = 0;
25 | }
26 | else
27 | {
28 | X.setData(k++, count, arr[i] / 255.0);
29 | }
30 | }
31 | //for (int in = begin; in < begin+10000; in++)
32 | //{
33 | // for (int ic = 0; ic < 3; ic++)
34 | // {
35 | // real min1 = 1, max1 = 0;
36 | // for (int iw = 0; iw < 32; iw++)
37 | // {
38 | // for (int ih = 0; ih < 32; ih++)
39 | // {
40 | // auto v = X.getData(iw, ih, ic, in);
41 | // min1 = std::min(v,min1);
42 | // max1 = std::max(v, max1);
43 | // }
44 | // }
45 | // for (int iw = 0; iw < 32; iw++)
46 | // {
47 | // for (int ih = 0; ih < 32; ih++)
48 | // {
49 | // auto& v = X.getData(iw, ih, ic, in);
50 | // v /= max1 - min1;
51 | // }
52 | // }
53 | // }
54 | //}
55 | }
56 |
57 | void Cifar10Reader::load(Matrix& X, Matrix& Y, std::string path /*= "cifa10"*/, int data_type /*= 1*/)
58 | {
59 | if (path.back() != '/' || path.back() != '\\')
60 | {
61 | path += '/';
62 | }
63 | if (data_type == 1)
64 | {
65 | X.resize(32, 32, 3, 50000);
66 | Y.resize(10, 50000);
67 | Y.fillData(0);
68 | LOG("Loading Cifar10 train data... ");
69 | readData(X, Y, path + "data_batch_1.bin", 0);
70 | readData(X, Y, path + "data_batch_2.bin", 10000);
71 | readData(X, Y, path + "data_batch_3.bin", 20000);
72 | readData(X, Y, path + "data_batch_4.bin", 30000);
73 | readData(X, Y, path + "data_batch_5.bin", 40000);
74 | LOG("done\n");
75 | }
76 | else if (data_type == 2)
77 | {
78 | X.resize(32, 32, 3, 10000);
79 | Y.resize(10, 10000);
80 | Y.fillData(0);
81 | LOG("Loading Cifar10 test data... ");
82 | readData(X, Y, path + "test_batch.bin", 0);
83 | LOG("done\n");
84 | }
85 | else
86 | {
87 | LOG("Please check Cifar10 type: 1 - train set (50000), 2 - test set (10000)\n");
88 | return;
89 | }
90 | //X.message();
91 | //Y.message();
92 | }
93 |
94 | } // namespace cccc
95 |
--------------------------------------------------------------------------------
/cccc-mnist/Cifar10Reader.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc-mnist/Cifar10Reader.h
--------------------------------------------------------------------------------
/cccc-mnist/DataPreparerMnist.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc-mnist/DataPreparerMnist.cpp
--------------------------------------------------------------------------------
/cccc-mnist/DataPreparerMnist.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "DataPreparerImage.h"
3 | #include "Random.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | class DataPreparerMnist : public DataPreparerImage
9 | {
10 | private:
11 | int type_ = 1;
12 | int random_diff_ = 0;
13 | int remove59_ = 0;
14 | int reverse_color_ = 0;
15 | Random rand_;
16 |
17 | public:
18 | DataPreparerMnist();
19 | virtual ~DataPreparerMnist();
20 |
21 | void initData() override;
22 | void init2() override;
23 | void fillData0() override;
24 | void transOne(Matrix& X1, Matrix& Y1) override;
25 | };
26 |
27 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc-mnist/MnistReader.cpp:
--------------------------------------------------------------------------------
1 | #include "MnistReader.h"
2 | #include "Log.h"
3 | #include "filefunc.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | MnistReader::MnistReader()
9 | {
10 | }
11 |
12 | void MnistReader::getDataSize(const std::string& file_image, int* w, int* h, int* n)
13 | {
14 | auto content = filefunc::readFile(file_image, 16);
15 | reverse(content.data() + 4, 4);
16 | reverse(content.data() + 8, 4);
17 | reverse(content.data() + 12, 4);
18 | if (n)
19 | {
20 | *n = *(int*)(content.data() + 4);
21 | }
22 | if (w)
23 | {
24 | *w = *(int*)(content.data() + 8);
25 | }
26 | if (h)
27 | {
28 | *h = *(int*)(content.data() + 12);
29 | }
30 | }
31 |
32 | void MnistReader::readLabelFile(const std::string& filename, void* y_data, DataType data_type)
33 | {
34 | int s = label_;
35 | auto content = filefunc::readFile(filename);
36 | reverse(content.data() + 4, 4);
37 | int n = *(int*)(content.data() + 4);
38 | memset(y_data, 0, MatrixData::getDataTypeSize(data_type) * n * s);
39 | for (int i = 0; i < n; i++)
40 | {
41 | int pos = *(content.data() + 8 + i);
42 | MatrixData::setData((char*)y_data + MatrixData::getDataTypeSize(data_type) * (i * s + pos % s), data_type, 1);
43 | //y_data[i * s + pos % s] = 1;
44 | }
45 | }
46 |
47 | void MnistReader::readImageFile(const std::string& filename, void* x_data, DataType data_type)
48 | {
49 | auto content = filefunc::readFile(filename);
50 | reverse(content.data() + 4, 4);
51 | reverse(content.data() + 8, 4);
52 | reverse(content.data() + 12, 4);
53 | int n = *(int*)(content.data() + 4);
54 | int w = *(int*)(content.data() + 8);
55 | int h = *(int*)(content.data() + 12);
56 | int size = n * w * h;
57 | memset(x_data, 0, MatrixData::getDataTypeSize(data_type) * size);
58 | for (int i = 0; i < size; i++)
59 | {
60 | auto v = *(uint8_t*)(content.data() + 16 + i);
61 | auto p = (char*)x_data + MatrixData::getDataTypeSize(data_type) * i;
62 | MatrixData::setData(p, data_type, v / 255.0);
63 | }
64 | }
65 |
66 | void MnistReader::readData(const std::string& file_label, const std::string& file_image, Matrix& X, Matrix& Y)
67 | {
68 | int w, h, n;
69 | getDataSize(file_image, &w, &h, &n);
70 | X.resize(w, h, 1, n);
71 | Y.resize(label_, n);
72 | //train.createA();
73 | readImageFile(file_image, X.getDataPtr(), X.getDataType());
74 | readLabelFile(file_label, Y.getDataPtr(), Y.getDataType());
75 | }
76 |
77 | void MnistReader::reverse(char* c, int n)
78 | {
79 | for (int i = 0; i < n / 2; i++)
80 | {
81 | auto& a = *(c + i);
82 | auto& b = *(c + n - 1 - i);
83 | auto t = b;
84 | b = a;
85 | a = t;
86 | }
87 | }
88 |
89 | void MnistReader::load(Matrix& X, Matrix& Y, std::string path /*= "mnist"*/, int data_type /*= 1*/)
90 | {
91 | if (path.back() != '/' || path.back() != '\\')
92 | {
93 | path += '/';
94 | }
95 | std::string label;
96 | std::string image;
97 | if (data_type == 1)
98 | {
99 | label = path + "train-labels.idx1-ubyte";
100 | image = path + "train-images.idx3-ubyte";
101 | LOG("Loading MNIST train data... ");
102 | }
103 | else if (data_type == 2)
104 | {
105 | label = path + "t10k-labels.idx1-ubyte";
106 | image = path + "t10k-images.idx3-ubyte";
107 | LOG("Loading MNIST test data... ");
108 | }
109 | else
110 | {
111 | LOG("Please check MNIST type: 1 - train set (60000), 2 - test set (10000)\n");
112 | return;
113 | }
114 | readData(label, image, X, Y);
115 | LOG("done\n");
116 | }
117 |
118 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc-mnist/MnistReader.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scarsty/cccc-lite/8a82b51444927af362edc2060af9fa5c045e14e8/cccc-mnist/MnistReader.h
--------------------------------------------------------------------------------
/cccc-mnist/build.sh:
--------------------------------------------------------------------------------
1 | cd ..
2 | mkdir build
3 | cd build
4 | cmake ..
5 | make -j
6 | cd ../extension
7 | cp -p ../build/lib/*.so .
8 | cmake .
9 | make
10 | cp -p *.so ../work/
11 | cp -p ../build/bin/* ../work/
12 |
13 |
--------------------------------------------------------------------------------
/cccc-mnist/cccc-mnist.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Release
10 | x64
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | {A5C5A5AE-E816-4189-9518-23190FDAC9AE}
32 | Win32Proj
33 | neural
34 | cccc-mnist
35 | 10.0
36 |
37 |
38 |
39 | DynamicLibrary
40 | true
41 | v143
42 | Unicode
43 |
44 |
45 | DynamicLibrary
46 | false
47 | v143
48 | true
49 | Unicode
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 | true
65 |
66 |
67 | false
68 |
69 |
70 |
71 |
72 |
73 | Level3
74 | Disabled
75 | _DEBUG;_WINDOWS;%(PreprocessorDefinitions)
76 | ../include;../include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include
77 | false
78 | true
79 | true
80 |
81 |
82 | Console
83 | true
84 | ../lib/x64;$(OutDir)
85 | cccc.lib;%(AdditionalDependencies)
86 | %(ForceSymbolReferences)
87 | true
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 | Level3
97 |
98 |
99 | MaxSpeed
100 | true
101 | true
102 | NNO_CUDA;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)
103 | ../include;../include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include
104 | true
105 | true
106 | false
107 | Precise
108 | false
109 | true
110 | stdcpplatest
111 |
112 |
113 | Console
114 | true
115 | true
116 | true
117 | lib/x64;$(OutDir)
118 | cccc.lib;%(AdditionalDependencies)
119 | %(ForceSymbolReferences)
120 |
121 |
122 | true
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
--------------------------------------------------------------------------------
/cccc-mnist/cccc-mnist.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
6 | h;hh;hpp;hxx;hm;inl;inc;xsd
7 |
8 |
9 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
10 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
11 |
12 |
13 |
14 |
15 | headers
16 |
17 |
18 | headers
19 |
20 |
21 | headers
22 |
23 |
24 | headers
25 |
26 |
27 |
28 |
29 | sources
30 |
31 |
32 | sources
33 |
34 |
35 | sources
36 |
37 |
38 | sources
39 |
40 |
41 | sources
42 |
43 |
44 | sources
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/cccc-mnist/cccc-mnist.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/cccc-mnist/cccc_extension.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "dll_export.h"
3 |
4 | #ifdef __cplusplus
5 | extern "C"
6 | {
7 | #endif
8 |
9 | DLL_EXPORT void* dp_ext();
10 |
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 |
--------------------------------------------------------------------------------
/cccc-mnist/extension.cpp:
--------------------------------------------------------------------------------
1 | #include "DataPreparerMnist.h"
2 | #include "cccc_extension.h"
3 |
4 | DLL_EXPORT void* dp_ext()
5 | {
6 | return new cccc::DataPreparerMnist();
7 | }
8 |
--------------------------------------------------------------------------------
/cccc-windows/Application.cpp:
--------------------------------------------------------------------------------
1 | #include "Application.h"
2 | #include "MainProcess.h"
3 | #include "filefunc.h"
4 | #include "strfunc.h"
5 |
6 | namespace cccc
7 | {
8 |
9 | void Application::run()
10 | {
11 | MainProcess mp;
12 |
13 | std::map replace_pairs;
14 | auto replace_strings = strfunc::splitString(replace_string_, ";");
15 | for (auto& r : replace_strings)
16 | {
17 | auto rs = strfunc::splitString(r, ":");
18 | if (rs.size() >= 1)
19 | {
20 | rs.resize(2);
21 | replace_pairs[rs[0]] = rs[1];
22 | }
23 | }
24 |
25 | auto filenames = strfunc::splitString(ini_file_);
26 | for (auto filename : filenames)
27 | {
28 | if (!filefunc::fileExist(filename))
29 | {
30 | LOG_ERR("{} doesn't exist!\n", filename.c_str());
31 | }
32 | auto ini_str = filefunc::readFileToString(filename);
33 | //替换掉一些字符
34 | for (auto rp : replace_pairs)
35 | {
36 | strfunc::replaceAllSubStringRef(ini_str, rp.first, rp.second);
37 | }
38 | mp.getOption()->loadString(ini_str);
39 | }
40 | auto load_filenames = strfunc::splitString(mp.getOption()->getString("train", "load_ini"), ",");
41 | for (auto filename : load_filenames)
42 | {
43 | if (filename != "")
44 | {
45 | mp.getOption()->loadFile(filename);
46 | }
47 | }
48 |
49 | //format the string into ini style by inserting '\n'
50 | strfunc::replaceAllSubStringRef(add_option_string_, "[", "\n[");
51 | strfunc::replaceAllSubStringRef(add_option_string_, "]", "]\n");
52 | strfunc::replaceAllSubStringRef(add_option_string_, ";", "\n");
53 | mp.getOption()->loadString(add_option_string_);
54 |
55 | if (mp.init() != 0)
56 | {
57 | return;
58 | }
59 | loop_ = true;
60 | while (loop_)
61 | {
62 | mp.run();
63 | loop_ = false;
64 | }
65 | }
66 |
67 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc-windows/Application.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace cccc
5 | {
6 |
7 | class Application
8 | {
9 | private:
10 | bool loop_ = true;
11 |
12 | public:
13 | std::string ini_file_;
14 | std::string add_option_string_;
15 | std::string replace_string_;
16 |
17 | public:
18 | void run();
19 | };
20 |
21 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc-windows/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(cccc-windows)
2 | cmake_minimum_required(VERSION 2.6)
3 |
4 | aux_source_directory(./ SRC_LIST)
5 |
6 | set(CMAKE_VERBOSE_MAKEFILE on)
7 | set(CMAKE_CXX_COMPILER $ENV{CXX})
8 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -pthread -fopenmp -DNDEBUG")
9 |
10 | include_directories(/usr/include /usr/local/include /usr/local/cuda/include /usr/local/cuda/include/crt ../../mlcc ../cccc-cuda ../include ../cccc /opt/rocm/include)
11 | link_directories(/usr/local/cuda/lib64)
12 |
13 | add_executable(cccc-windows ${SRC_LIST})
14 | target_link_libraries(cccc-windows cccc)
15 |
16 | install(TARGETS cccc-windows RUNTIME DESTINATION bin)
17 |
--------------------------------------------------------------------------------
/cccc-windows/cccc-windows.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Release
10 | x64
11 |
12 |
13 |
14 | {4D42F1DC-AFF8-4F03-AF3A-8B2528D38D0F}
15 | Win32Proj
16 | neural
17 | cccc-windows
18 | 10.0
19 |
20 |
21 |
22 | Application
23 | true
24 | v143
25 | Unicode
26 |
27 |
28 | Application
29 | false
30 | v143
31 | true
32 | Unicode
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 | true
48 |
49 |
50 | false
51 |
52 |
53 |
54 |
55 |
56 | Level3
57 | Disabled
58 | _DEBUG;_WINDOWS;%(PreprocessorDefinitions)
59 | ../include;include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include
60 | false
61 | false
62 | true
63 | stdcpplatest
64 |
65 |
66 | Console
67 | true
68 | ../lib/x64;$(SolutionDir)$(Platform)\$(Configuration)\
69 | cccc.lib;%(AdditionalDependencies)
70 | %(ForceSymbolReferences)
71 | true
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | Level3
81 |
82 |
83 | MaxSpeed
84 | true
85 | true
86 | NDEBUG;_WINDOWS;%(PreprocessorDefinitions)
87 | ../include;../include/cuda;../../mlcc;../cccc;$(VCPKG_LIB_PATH)\x64-windows\include
88 | true
89 | true
90 | false
91 | Precise
92 | false
93 | true
94 | stdcpplatest
95 |
96 |
97 | Console
98 | true
99 | true
100 | true
101 | ../lib/x64;$(SolutionDir)$(Platform)\$(Configuration)\
102 | cccc.lib;%(AdditionalDependencies)
103 | %(ForceSymbolReferences)
104 |
105 |
106 | true
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
--------------------------------------------------------------------------------
/cccc-windows/cccc-windows.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
6 | h;hh;hpp;hxx;hm;inl;inc;xsd
7 |
8 |
9 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
10 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
11 |
12 |
13 |
14 |
15 | sources
16 |
17 |
18 | sources
19 |
20 |
21 | sources
22 |
23 |
24 | sources
25 |
26 |
27 |
28 |
29 | headers
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/cccc-windows/cccc-windows.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | -c mnist-lenet.ini
5 | $(SolutionDir)work
6 | WindowsLocalDebugger
7 | $(TargetPath)
8 |
9 |
10 | -c mnist-lenet.ini
11 | $(SolutionDir)work
12 | WindowsLocalDebugger
13 |
14 |
--------------------------------------------------------------------------------
/cccc-windows/windows.cpp:
--------------------------------------------------------------------------------
1 | #include "Application.h"
2 | #include "Log.h"
3 | #include "cmdline.h"
4 | #include
5 |
6 | #ifdef _MSC_VER
7 | #include
8 | #endif
9 |
10 | int main(int argc, char* argv[])
11 | {
12 | cmdline::parser cmd;
13 |
14 | cmd.add("config", 'c', "config file (ini format) of the net", false, "will.ini");
15 | cmd.add("add-config", 'a', "additional config string ([sec1]key1=v1;key2=v2[sec2]key1=v1...)", false, "");
16 | cmd.add("replace", 'r', "replace strings before loading ini (old1:new1;old2:new2...)", false, "");
17 | cmd.add("version", 'v', "version information");
18 |
19 | #ifdef _MSC_VER
20 | cmd.parse_check(GetCommandLineA());
21 | #else
22 | cmd.parse_check(argc, argv);
23 | #endif
24 |
25 | if (cmd.exist("config"))
26 | {
27 | cccc::Application app;
28 | app.ini_file_ = cmd.get("config");
29 | app.add_option_string_ = cmd.get("add-config");
30 | app.replace_string_ = cmd.get("replace");
31 | app.run();
32 | }
33 | else if (cmd.exist("version"))
34 | {
35 | cccc::LOG("CCCC (A Deep Neural Net library) command line interface\n");
36 | cccc::LOG("Built with ");
37 | #if defined(_MSC_VER)
38 | cccc::LOG("Microsoft Visual Studio {}\n", _MSC_VER);
39 | #elif defined(__clang__)
40 | cccc::LOG("Clang {}\n", __clang_version__);
41 | #elif defined(__GNUC__)
42 | cccc::LOG("GNU C {}\n", __VERSION__);
43 | #else
44 | cccc::LOG("Unknown complier\n");
45 | #endif
46 | cccc::LOG("Commemorating my great teacher and friend Dr. Yu Wang\n");
47 | }
48 | else
49 | {
50 | if (argc >= 1)
51 | {
52 | cmd.parse_check({ argv[0], "--help" });
53 | }
54 | }
55 |
56 | return 0;
57 | }
58 |
--------------------------------------------------------------------------------
/cccc/Activer.cpp:
--------------------------------------------------------------------------------
1 | #include "Activer.h"
2 |
3 | namespace cccc
4 | {
5 |
6 | Activer::Activer()
7 | {
8 | }
9 |
10 | Activer::~Activer()
11 | {
12 | }
13 |
14 | void Activer::init(Option* op, std::string section, LayerConnectionType ct)
15 | {
16 | active_function_ = op->getEnum(section, "active", ACTIVE_FUNCTION_NONE);
17 | cost_function_ = op->getEnum(section, "cost", COST_FUNCTION_CROSS_ENTROPY); //cost似乎是应该属于net的,待查
18 | cost_rate_ = op->getVector(section, "cost_rate");
19 |
20 | real learn_rate_base = op->getReal(section, "learn_rate_base", 1e-2);
21 | switch (active_function_)
22 | {
23 | case ACTIVE_FUNCTION_CLIPPED_RELU:
24 | real_vector_ = { op->get(section, "clipped_relu", 0.5) };
25 | break;
26 | case ACTIVE_FUNCTION_DROPOUT:
27 | {
28 | //Dropout会重设两个矩阵
29 | //2 int: work_phase, seed
30 | int_vector_ =
31 | {
32 | int(ACTIVE_PHASE_TRAIN),
33 | int(op->getReal(section, "dropout_seed", INT_MAX * random_generator_.rand()))
34 | };
35 | random_generator_.set_seed();
36 | random_generator_.set_random_type(RANDOM_UNIFORM);
37 | random_generator_.set_parameter(0, 1);
38 | //1 real: dropout_rate
39 | real_vector_ =
40 | {
41 | op->get(section, "dropout_rate", 0.5)
42 | };
43 | break;
44 | }
45 | case ACTIVE_FUNCTION_LOCAL_RESPONSE_NORMALIZATION:
46 | //1 int: lrn_n
47 | int_vector_ =
48 | {
49 | op->getInt(section, "lrn_n", 5)
50 | };
51 | //3 real: lrn_alpha, lrn_beta, lrn_k
52 | real_vector_ =
53 | {
54 | op->get(section, "lrn_alpha", 1e-4),
55 | op->get(section, "lrn_beta", 0.75),
56 | op->get(section, "lrn_k", 2.0)
57 | };
58 | break;
59 | case ACTIVE_FUNCTION_LOCAL_CONSTRAST_NORMALIZATION:
60 | //1 int: lcn_n
61 | int_vector_ =
62 | {
63 | op->getInt(section, "lcn_n", 5)
64 | };
65 | //3 real: lcn_alpha, lcn_beta, lcn_k (not sure these are useful)
66 | real_vector_ =
67 | {
68 | op->get(section, "lcn_alpha", 1),
69 | op->get(section, "lcn_beta", 0.5),
70 | op->get(section, "lcn_k", 1e-5)
71 | };
72 | break;
73 | case ACTIVE_FUNCTION_DIVISIVE_NORMALIZATION:
74 | int_vector_ =
75 | {
76 | int(op->get(section, "dn_n", 5))
77 | };
78 | //same to lcn, real[3] is learn rate of means
79 | real_vector_ =
80 | {
81 | op->get(section, "dn_alpha", 1),
82 | op->get(section, "dn_beta", 0.5),
83 | op->get(section, "dn_k", 1e-5),
84 | op->get(section, "dn_rate", learn_rate_base)
85 | };
86 | break;
87 | case ACTIVE_FUNCTION_BATCH_NORMALIZATION:
88 | {
89 | auto bt = op->getEnum(section, "bn_type", BATCH_NORMALIZATION_AUTO);
90 | if (bt == BATCH_NORMALIZATION_AUTO)
91 | {
92 | if (ct == LAYER_CONNECTION_CONVOLUTION)
93 | {
94 | bt = BATCH_NORMALIZATION_SPATIAL;
95 | }
96 | else
97 | {
98 | bt = BATCH_NORMALIZATION_PER_ACTIVATION;
99 | }
100 | }
101 | //2 int: work_phase, batch_normalization
102 | int_vector_ =
103 | {
104 | int(ACTIVE_PHASE_TRAIN),
105 | int(bt)
106 | };
107 | //3 real: train_rate, exp_aver_factor, epsilon
108 | real_vector_ =
109 | {
110 | op->get(section, "bn_rate", learn_rate_base),
111 | op->get(section, "bn_exp_aver_factor", 1),
112 | op->get(section, "bn_epsilon", 1e-5)
113 | };
114 | break;
115 | }
116 | case ACTIVE_FUNCTION_SPATIAL_TRANSFORMER:
117 | //1 real: train_rate
118 | real_vector_ =
119 | {
120 | op->get(section, "st_train_rate", 0)
121 | };
122 | break;
123 | case ACTIVE_FUNCTION_RECURRENT:
124 | int_vector_ =
125 | {
126 | op->getInt(section, "rnn_num_layers", 2),
127 | op->getEnum(section, "rnn_type", RECURRENT_RELU),
128 | op->getEnum(section, "rnn_direction", RECURRENT_DIRECTION_UNI),
129 | op->getEnum(section, "rnn_input", RECURRENT_INPUT_LINEAR)
130 | };
131 | break;
132 | case ACTIVE_FUNCTION_ZERO_CHANNEL:
133 | int_vector_ = op->getVector(section, "zero_channels");
134 | break;
135 | default:
136 | break;
137 | }
138 | }
139 |
140 | //激活前的准备工作,主要用于一些激活函数在训练和测试时有不同设置
141 | void Activer::activePrepare(ActivePhaseType ap)
142 | {
143 | switch (active_function_)
144 | {
145 | case ACTIVE_FUNCTION_DROPOUT:
146 | int_vector_[0] = int(ap);
147 | if (ap == ACTIVE_PHASE_TRAIN)
148 | {
149 | random_generator_.set_random_type(RANDOM_UNIFORM);
150 | int_vector_[1] = int(INT_MAX * random_generator_.rand());
151 | }
152 | break;
153 | case ACTIVE_FUNCTION_BATCH_NORMALIZATION:
154 | int_vector_[0] = int(ap);
155 | break;
156 | default:
157 | break;
158 | }
159 | }
160 |
161 | void Activer::initBuffer(Matrix& X)
162 | {
163 | MatrixEx::activeBufferInit(X, active_function_, int_vector_, real_vector_, matrix_vector_);
164 | if (!cost_rate_.empty())
165 | {
166 | matrix_cost_rate_ = Matrix(X.getDim(), UnitType::CPU);
167 | matrix_cost_rate_.fillData(1);
168 | for (int ir = 0; ir < X.getRow(); ir++)
169 | {
170 | for (int in = 0; in < X.getNumber(); in++)
171 | {
172 | if (ir < cost_rate_.size())
173 | {
174 | matrix_cost_rate_.getData(ir, in) = cost_rate_[ir];
175 | }
176 | }
177 | }
178 | matrix_cost_rate_.toGPU();
179 | }
180 | }
181 |
182 | void Activer::backwardCost(Matrix& A, Matrix& X, Matrix& Y, Matrix& dA, Matrix& dX)
183 | {
184 | if (cost_function_ == COST_FUNCTION_RMSE)
185 | {
186 | Matrix::add(A, Y, dA, 1, -1);
187 | backward(A, X);
188 | }
189 | else
190 | {
191 | //交叉熵的推导结果就是直接相减,注意此处并未更新DA
192 | Matrix::add(A, Y, dX, 1, -1);
193 | }
194 | if (matrix_cost_rate_.getDataSize() > 0)
195 | {
196 | Matrix::elementMul(dX, matrix_cost_rate_, dX);
197 | }
198 | }
199 |
200 | //dY will be changed
201 | //实际上反向传播并未直接使用,额外消耗计算资源,不要多用
202 | real Activer::calCostValue(Matrix& A, Matrix& dA, Matrix& Y, Matrix& dY)
203 | {
204 | //todo::seem not right
205 | if (dY.getDataSize() == 0)
206 | {
207 | return 0;
208 | }
209 | if (cost_function_ == COST_FUNCTION_RMSE)
210 | {
211 | Matrix::add(A, Y, dY, 1, -1);
212 | return dY.dotSelf();
213 | }
214 | else
215 | {
216 | switch (active_function_)
217 | {
218 | case ACTIVE_FUNCTION_SIGMOID:
219 | Matrix::crossEntropy2(A, Y, dY, 1e-5);
220 | break;
221 | case ACTIVE_FUNCTION_SOFTMAX:
222 | case ACTIVE_FUNCTION_SOFTMAX_FAST:
223 | Matrix::crossEntropy(A, Y, dY, 1e-5);
224 | break;
225 | default:
226 | Matrix::add(A, Y, dY, 1, -1);
227 | break;
228 | }
229 | return dY.sumAbs();
230 | }
231 | }
232 |
233 | bool Activer::needSave(int i)
234 | {
235 | return active_function_ == ACTIVE_FUNCTION_BATCH_NORMALIZATION
236 | || active_function_ == ACTIVE_FUNCTION_SPATIAL_TRANSFORMER;
237 | }
238 |
239 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/Activer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "Matrix.h"
3 | #include "MatrixEx.h"
4 | #include "Option.h"
5 | #include "Random.h"
6 |
7 | namespace cccc
8 | {
9 |
10 | class Activer
11 | {
12 | public:
13 | Activer();
14 | virtual ~Activer();
15 | Activer(const Activer&) = delete;
16 | Activer& operator=(const Activer&) = delete;
17 |
18 | protected:
19 | //激活函数相关
20 | ActiveFunctionType active_function_ = ACTIVE_FUNCTION_NONE;
21 | //bool need_active_ex_ = false;
22 | std::vector real_vector_;
23 | std::vector int_vector_;
24 | std::vector matrix_vector_;
25 | //代价函数
26 | CostFunctionType cost_function_ = COST_FUNCTION_CROSS_ENTROPY;
27 | std::vector cost_rate_; //按照channel放大某些类别在损失函数中的比例,可以认为是强调某些类别的正确性
28 | Matrix matrix_cost_rate_;
29 |
30 | Random random_generator_;
31 | ActivePhaseType active_phase_ = ACTIVE_PHASE_TRAIN;
32 |
33 | public:
34 | void forward(Matrix& X, Matrix& A)
35 | {
36 | activePrepare(active_phase_);
37 | MatrixEx::activeForward(X, A, active_function_, int_vector_, real_vector_, matrix_vector_);
38 | }
39 |
40 | void backward(Matrix& A, Matrix& X)
41 | {
42 | MatrixEx::activeBackward(X, A, active_function_, int_vector_, real_vector_, matrix_vector_);
43 | }
44 |
45 | bool isNone() { return active_function_ == ACTIVE_FUNCTION_NONE; }
46 | void init(Option* op, std::string section, LayerConnectionType ct);
47 | void activePrepare(ActivePhaseType ap);
48 | void initBuffer(Matrix& X);
49 | void setCostFunction(CostFunctionType cf) { cost_function_ = cf; }
50 |
51 | void backwardCost(Matrix& A, Matrix& X, Matrix& Y, Matrix& dA, Matrix& dX);
52 | real calCostValue(Matrix& A, Matrix& dA, Matrix& Y, Matrix& dY);
53 |
54 | ActiveFunctionType getActiveFunction() { return active_function_; }
55 | void setActivePhase(ActivePhaseType ap) { active_phase_ = ap; }
56 |
57 | CostFunctionType getCostFunction_() { return cost_function_; }
58 |
59 | private:
60 | bool needSave(int i);
61 | };
62 |
63 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/AdditionalCost.cpp:
--------------------------------------------------------------------------------
1 | #include "AdditionalCost.h"
2 | #include "MatrixEx.h"
3 |
4 | namespace cccc
5 | {
6 |
7 | AdditionalCost::AdditionalCost()
8 | {
9 | }
10 |
11 | AdditionalCost::~AdditionalCost()
12 | {
13 | destory();
14 | }
15 |
16 | void AdditionalCost::init(Option* op, std::string section, Matrix& A)
17 | {
18 | batch_ = A.getNumber();
19 |
20 | sparse_beta_ = op->getReal(section, "sparse_beta", 0);
21 | if (sparse_beta_ != 0)
22 | {
23 | sparse_rou_ = op->getReal(section, "sparse_rou", 0.1);
24 | sparse_rou_hat_.resize(A);
25 | sparse_rou_hat_vector_.resize(A.getRow(), 1);
26 | as_sparse_.resize(batch_, 1);
27 | as_sparse_.fillData(1);
28 | }
29 |
30 | diverse_beta_ = op->getReal(section, "diverse_beta", 0);
31 | if (diverse_beta_ != 0)
32 | {
33 | diverse_epsilon_ = op->getReal(section, "diverse_epsilon", 1e-8);
34 | diverse_aver_.resize(A);
35 | diverse_aver2_.resize(A);
36 | diverse_aver3_.resize(A);
37 | as_diverse_aver_.resize(1, 1, A.getChannel() * A.getNumber(), 1);
38 | as_diverse_aver_.fillData(1);
39 | diverse_A_ = A.createShared();
40 | diverse_A_.resize(1, 1, A.getChannel() * A.getNumber(), 1);
41 | diverse_workspace_.resize(1, int(1e6));
42 | }
43 | }
44 |
45 | void AdditionalCost::destory()
46 | {
47 | }
48 |
49 | //the extra-cost function often modifies dA with A
50 | void AdditionalCost::modifyDA(Matrix& A, Matrix& dA)
51 | {
52 | //sparse
53 | if (sparse_beta_ != 0)
54 | {
55 | Matrix::mulVector(A, as_sparse_, sparse_rou_hat_vector_, 1.0 / batch_);
56 | MatrixEx::sparse(sparse_rou_hat_, sparse_rou_hat_, sparse_rou_, sparse_beta_);
57 | Matrix::copyData(sparse_rou_hat_vector_, sparse_rou_hat_);
58 | sparse_rou_hat_.repeat();
59 | Matrix::add(dA, sparse_rou_hat_, dA);
60 | }
61 | //diverse, dropped
62 | if (false && diverse_beta_ != 0)
63 | {
64 | //use convolution to calculate average cross channel and number
65 | //int cn = A->getChannel() * A->getNumber();
66 | //diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, 1);
67 | //A->resize(A->getWidth(), A->getHeight(), cn, 1);
68 | //MatrixExtend::convolutionForward(A, diverse_aver_, as_diverse_aver_, diverse_workspace_, diverse_workspace2_, 1, 1, 1.0 / cn);
69 | //A->resize(dA);
70 | //diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, cn);
71 | //diverse_aver_->repeat();
72 |
73 | //Matrix::add(A, diverse_aver_, diverse_aver_, 1, -1);
74 |
75 | //Matrix::add(dA, diverse_aver_, dA, 1, -diverse_beta_);
76 |
77 | /*diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, 1);
78 | A->resize(A->getWidth(), A->getHeight(), cn, 1);
79 | MatrixExtend::convolutionForward(A, diverse_aver_, as_diverse_aver_, diverse_workspace_, diverse_workspace2_, 1, 1, 1.0 / cn);
80 | A->resize(dA);
81 | diverse_aver_->resize(A->getWidth(), A->getHeight(), 1, cn);
82 | diverse_aver_->repeat();
83 | Matrix::elementPow(diverse_aver_, diverse_aver2_, 2);
84 | Matrix::elementPow(diverse_aver_, diverse_aver3_, 3);
85 | Matrix::elementPow(A, diverse_aver_, 2);
86 | Matrix::add(diverse_aver_, diverse_aver2_, diverse_aver_, 1, -1);
87 | Matrix::elementDiv(diverse_aver_, diverse_aver3_, diverse_aver_, 0, diverse_epsilon_);
88 | Matrix::add(dA, diverse_aver_, dA);*/
89 | }
90 | }
91 |
92 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/AdditionalCost.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "Matrix.h"
3 | #include "Option.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | class AdditionalCost
9 | {
10 | public:
11 | AdditionalCost();
12 | virtual ~AdditionalCost();
13 | AdditionalCost(const AdditionalCost&) = delete;
14 | AdditionalCost& operator=(const AdditionalCost&) = delete;
15 |
16 | private:
17 | real sparse_beta_ = 0;
18 | real sparse_rou_ = 0.1;
19 | Matrix sparse_rou_hat_;
20 | Matrix sparse_rou_hat_vector_;
21 | Matrix as_sparse_;
22 | int batch_ = 0;
23 |
24 | real diverse_beta_ = 0;
25 | real diverse_epsilon_;
26 | Matrix diverse_aver_;
27 | Matrix diverse_aver2_;
28 | Matrix diverse_aver3_;
29 | Matrix as_diverse_aver_;
30 | Matrix diverse_workspace_;
31 | Matrix diverse_A_;
32 | std::vector diverse_workspace2_;
33 |
34 | public:
35 | void init(Option* op, std::string section, Matrix& A);
36 |
37 | private:
38 | void destory();
39 |
40 | public:
41 | void modifyDA(Matrix& A, Matrix& dA);
42 | };
43 |
44 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(cccc)
2 | cmake_minimum_required(VERSION 2.6)
3 |
4 | file(GLOB SRC_LIST
5 | ./*.cpp
6 | ./layer/*.cpp
7 | ./*.c
8 | ../../mlcc/Cifa.cpp
9 | ../../mlcc/filefunc.cpp
10 | ../../mlcc/strfunc.cpp
11 | )
12 |
13 | set(CMAKE_VERBOSE_MAKEFILE on)
14 | set(CMAKE_CXX_COMPILER $ENV{CXX})
15 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -pthread -fopenmp -DNDEBUG")
16 |
17 | add_definitions(-DENABLE_CUDA=1 -DENABLE_HIP=0)
18 |
19 | include_directories(/usr/include /usr/local/include /usr/local/cuda/include /usr/local/cuda/include/crt ../../mlcc ../include . ./layer ../cccc-cuda ../cccc-hip)
20 | link_directories(/usr/local/cuda/lib64)
21 |
22 | set(LINK_LIBRARIES cudnn cudart cublas openblas)
23 | if(NOT CCCC_CPU)
24 | include_directories(../cccc-cuda)
25 | list(APPEND LINK_LIBRARIES cccc-cuda)
26 | endif()
27 |
28 | add_library(cccc SHARED ${SRC_LIST})
29 | target_link_libraries(cccc LINK_PRIVATE ${LINK_LIBRARIES})
30 |
31 | install(TARGETS cccc LIBRARY DESTINATION lib)
32 |
--------------------------------------------------------------------------------
/cccc/DataPreparer.cpp:
--------------------------------------------------------------------------------
1 | #include "DataPreparer.h"
2 | #include "filefunc.h"
3 | #include
4 | #include
5 |
6 | namespace cccc
7 | {
8 |
9 | DataPreparer::DataPreparer()
10 | : X(DataType::CURRENT, UnitType::CPU),
11 | Y(DataType::CURRENT, UnitType::CPU),
12 | X0(DataType::CURRENT, UnitType::CPU),
13 | Y0(DataType::CURRENT, UnitType::CPU),
14 | LW(DataType::CURRENT, UnitType::CPU),
15 | LW0(DataType::CURRENT, UnitType::CPU)
16 | {
17 | //rand_.set_seed();
18 | rand_.set_parameter(0, 1);
19 | }
20 |
21 | DataPreparer::~DataPreparer()
22 | {
23 | }
24 |
25 | void DataPreparer::init()
26 | {
27 | OPTION_GET_INT(shuffle_);
28 | OPTION_GET_INT(trans_);
29 | if (create_by_dll_ != "")
30 | {
31 | fill_ = 1;
32 | }
33 | OPTION_GET_INT(fill_);
34 | OPTION_GET_INT(fill_group_);
35 | init2();
36 | LOG("Initialize dataset\n");
37 | resetDataDim();
38 | initData();
39 | }
40 |
41 | //初始化训练集准备器
42 | int DataPreparer::getFillGroup()
43 | {
44 | if (fill_)
45 | {
46 | return fill_group_;
47 | }
48 | return -1;
49 | }
50 |
51 | void DataPreparer::initData()
52 | {
53 | X.resizeNumber(fill_group_);
54 | Y.resizeNumber(fill_group_);
55 | X0.resizeNumber(fill_group_);
56 | Y0.resizeNumber(fill_group_);
57 |
58 | queue_origin_.resize(X.getNumber());
59 | for (int i = 0; i < queue_origin_.size(); i++)
60 | {
61 | queue_origin_[i] = i;
62 | }
63 | }
64 |
65 | void DataPreparer::shuffleQueue(std::vector& train_queue)
66 | {
67 | std::random_device rd;
68 | std::mt19937 g(rd());
69 | std::shuffle(train_queue.begin(), train_queue.end(), g);
70 | }
71 |
72 | //数据准备器将原始数据打乱,进行变换之后上传至data
73 | //对于验证来说,这里前面有些拖沓
74 | void DataPreparer::prepareData(int epoch, const std::string& info)
75 | {
76 | //重新采集未变换的数据
77 | if (fill_)
78 | {
79 | LOG("Fill data for epoch {}\n", epoch + 1);
80 | fillData0();
81 | }
82 |
83 | int concurrency = std::thread::hardware_concurrency() / 2;
84 |
85 | //可能包含对训练数据的变换
86 | if (X.getDataPtr() != X0.getDataPtr() && Y.getDataPtr() != Y0.getDataPtr())
87 | {
88 | if (shuffle_ == 0)
89 | {
90 | Matrix::copyData(X0, X);
91 | Matrix::copyData(Y0, Y);
92 | Matrix::copyData(LW0, LW);
93 | }
94 | else
95 | {
96 | LOG("Shuffle data for epoch {}\n", epoch + 1);
97 | shuffleQueue(queue_origin_);
98 |
99 | //#pragma omp parallel
100 | // {
101 | // auto x = X0.cloneSharedCol(0), y = Y0.cloneSharedCol(0);
102 | // auto dim = x.getDim();
103 | // //std::swap(dim[dim.size() - 1], dim[dim.size() - 2]);
104 | // //x.resize(dim);
105 | // //dim = y.getDim();
106 | // //std::swap(dim[dim.size() - 1], dim[dim.size() - 2]);
107 | // //y.resize(dim);
108 | //#pragma omp for
109 | // for (int i = 0; i < queue_origin_.size(); i++)
110 | // {
111 | // Matrix::copyRows(X0, queue_origin_[i], X, i, 1);
112 | // Matrix::copyRows(Y0, queue_origin_[i], Y, i, 1);
113 | // Matrix::copyRows(LW0, queue_origin_[i], LW, i, 1);
114 | // x.shareData(X, 0, i);
115 | // y.shareData(Y, 0, i);
116 | // transOne(x, y);
117 | // }
118 | // }
119 | std::vector threads(concurrency);
120 | int count_th = (queue_origin_.size() + concurrency - 1) / concurrency;
121 | int i_th = 0;
122 | for (auto& t : threads)
123 | {
124 | t = std::thread{ [i_th, count_th, this]()
125 | {
126 | auto x = X0.createSharedCol(0), y = Y0.createSharedCol(0);
127 | auto dim = x.getDim();
128 | for (int i = i_th * count_th; i < std::min(i_th * count_th + count_th, int(queue_origin_.size())); i++)
129 | {
130 | Matrix::copyRows(X0, queue_origin_[i], X, i, 1);
131 | Matrix::copyRows(Y0, queue_origin_[i], Y, i, 1);
132 | Matrix::copyRows(LW0, queue_origin_[i], LW, i, 1);
133 | if (trans_)
134 | {
135 | x.shareData(X, 0, i);
136 | y.shareData(Y, 0, i);
137 | transOne(x, y);
138 | }
139 | }
140 | } };
141 | i_th++;
142 | }
143 | for (auto& t : threads)
144 | {
145 | t.join();
146 | }
147 | }
148 | }
149 | LOG("Data prepared for epoch {}\n", epoch + 1);
150 | }
151 |
152 | //依据网络的输入和输出创建一个,避免可能的失败
153 | void DataPreparer::resetDataDim()
154 | {
155 | X.resizeKeepNumber(dim0_);
156 | Y.resizeKeepNumber(dim1_);
157 | X0.resizeKeepNumber(dim0_);
158 | Y0.resizeKeepNumber(dim1_);
159 | }
160 |
161 | std::string DataPreparer::getMessage(int i)
162 | {
163 | if (i >= 0 && i < message_.size())
164 | {
165 | return message_[i];
166 | }
167 | return "Error.";
168 | }
169 |
170 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "Matrix.h"
3 | #include "Option.h"
4 | #include "Random.h"
5 |
6 | namespace cccc
7 | {
8 |
9 | class DLL_EXPORT DataPreparer
10 | {
11 | public:
12 | Matrix X; //数据变换后的形态,例如打散,亮度对比度,增加噪声等
13 | Matrix Y;
14 | Matrix LW; //控制损失函数的权重
15 |
16 | Matrix X0; //数据的原始状态,一般是直接从文件中读取的样子
17 | Matrix Y0;
18 | Matrix LW0;
19 |
20 | public:
21 | friend class DataPreparerFactory;
22 |
23 | public:
24 | DataPreparer();
25 | virtual ~DataPreparer();
26 | DataPreparer(const DataPreparer&) = delete;
27 | DataPreparer& operator=(const DataPreparer&) = delete;
28 |
29 | protected:
30 | void init();
31 | virtual void init2() {}
32 | int getFillGroup();
33 | //virtual void destroy() {}
34 |
35 | public:
36 | virtual void initData();
37 | virtual void fillData0() {}
38 | virtual void transOne(Matrix& X1, Matrix& Y1) {}
39 | virtual void showOneData(int number) {}
40 |
41 | protected:
42 | std::string getSection() { return section_; }
43 |
44 | private:
45 | std::string create_by_dll_;
46 |
47 | public:
48 | void shuffleQueue(std::vector& train_queue);
49 | void prepareData(int epoch, const std::string& info);
50 |
51 | void resetDataDim();
52 |
53 | protected:
54 | int shuffle_ = 1; //是否乱序训练集
55 | int trans_ = 0; //是否变换
56 | int fill_ = 0; //是否填充
57 | int fill_group_ = 0; //填充的组数
58 |
59 | std::string section_ = "data_preparer";
60 | Option* option_;
61 |
62 | //for macro in Option.h
63 | std::string& section = section_;
64 | Option*& option = option_;
65 |
66 | //图的尺寸
67 | std::vector dim0_, dim1_;
68 |
69 | std::vector queue_origin_; //填充的顺序
70 |
71 | private:
72 | std::vector message_;
73 | Random rand_;
74 |
75 | public:
76 | std::string getMessage(int i);
77 | int isFill() { return fill_; }
78 |
79 | };
80 |
81 | //template
82 | //void parallel_for_each(std::function fn, int concurrency)
83 | //{
84 | // std::vector threads(concurrency);
85 | // int count_th = (queue_origin_.size() + concurrency - 1) / concurrency;
86 | // int i_th = 0;
87 | // for (auto& t : threads)
88 | // {
89 | // t = std::thread{[](){fn();}};
90 | // i_th++;
91 | // }
92 | // for (auto& t : threads)
93 | // {
94 | // t.join();
95 | // }
96 | //}
97 |
98 |
99 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparerFactory.cpp:
--------------------------------------------------------------------------------
1 | #include "DataPreparerFactory.h"
2 | #include "DataPreparerImage.h"
3 | #include "DataPreparerTxt.h"
4 | #include "DynamicLibrary.h"
5 | #include "filefunc.h"
6 |
7 | namespace cccc
8 | {
9 |
10 | DataPreparer* DataPreparerFactory::create(Option* op, const std::string& section, const std::vector& dim0, const std::vector& dim1)
11 | {
12 | DataPreparer* dp = nullptr;
13 | std::string library_name = op->getString(section, "library");
14 | #ifdef _WIN32
15 | #ifdef _DEBUG
16 | library_name = op->getString(section, "library_dlld", library_name);
17 | #else
18 | library_name = op->getString(section, "library_dll", library_name);
19 | #endif
20 | #else
21 | library_name = op->getString(section, "library_so", library_name);
22 | if (strfunc::toLowerCase(filefunc::getFileExt(library_name)) == "dll")
23 | {
24 | library_name = filefunc::getFilenameWithoutPath(filefunc::getFileMainname(library_name)) + ".so";
25 | if (library_name.find("lib") != 0)
26 | {
27 | library_name = "lib" + library_name;
28 | }
29 | }
30 | #endif
31 | auto function_name = op->getString(section, "function", "dp_ext");
32 |
33 | LOG("Try to create data preparer with section \"{}\"\n", section);
34 | if (!library_name.empty() && !function_name.empty())
35 | {
36 | //此处请注意:64位系统中dll的函数名形式上与__cdecl一致。深度学习耗费内存较大,故此处不再对32位系统进行处理
37 | using MYFUNC = void* (*)();
38 | MYFUNC func = nullptr;
39 | func = (MYFUNC)DynamicLibrary::getFunction(library_name, function_name);
40 | if (func)
41 | {
42 | dp = (DataPreparer*)func();
43 | LOG("Create from {} in {}\n", function_name, library_name);
44 | dp->create_by_dll_ = library_name;
45 | }
46 | else
47 | {
48 | LOG("Failed to load {} in {}\n", function_name, library_name);
49 | }
50 | }
51 | if (dp == nullptr)
52 | {
53 | auto mode = op->getString(section, "mode", "image");
54 | if (op->hasSection(section))
55 | {
56 | mode = "";
57 | }
58 | if (mode == "image")
59 | {
60 | dp = new DataPreparerImage();
61 | LOG("Create default image data preparer\n");
62 | }
63 | else if (mode == "txt")
64 | {
65 | dp = new DataPreparerTxt();
66 | LOG("Create default txt data preparer\n");
67 | }
68 | else
69 | {
70 | dp = new DataPreparer();
71 | LOG("Create default data preparer\n");
72 | }
73 | }
74 |
75 | dp->option_ = op;
76 | dp->section_ = section;
77 | dp->dim0_ = dim0;
78 | dp->dim1_ = dim1;
79 | dp->init();
80 |
81 | return dp;
82 | }
83 |
84 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparerFactory.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "DataPreparer.h"
3 |
4 | namespace cccc
5 | {
6 |
7 | struct DataPreparerFactory : public DataPreparer
8 | {
9 | private:
10 | static DataPreparer* create(Option* op, const std::string& section, const std::vector& dim0, const std::vector& dim1);
11 |
12 | public:
13 | using UniquePtr = std::unique_ptr;
14 | inline static UniquePtr makeUniquePtr(Option* op, const std::string& section, const std::vector& dim0, const std::vector& dim1)
15 | {
16 | UniquePtr p(create(op, section, dim0, dim1));
17 | return p;
18 | }
19 | };
20 |
21 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparerImage.cpp:
--------------------------------------------------------------------------------
1 | #include "DataPreparerImage.h"
2 | #include "Timer.h"
3 | #include "filefunc.h"
4 | #include "strfunc.h"
5 | #include
6 | #include
7 | #include
8 |
9 | #define _USE_MATH_DEFINES
10 | #include
11 |
12 | namespace cccc
13 | {
14 |
15 | DataPreparerImage::DataPreparerImage()
16 | {
17 | }
18 |
19 | DataPreparerImage::~DataPreparerImage()
20 | {
21 | }
22 |
23 | void DataPreparerImage::init2()
24 | {
25 | OPTION_GET_INT(flip_);
26 | OPTION_GET_INT(transpose_);
27 |
28 | OPTION_GET_INT(d_channel_);
29 | OPTION_GET_REAL(d_noise_);
30 |
31 | OPTION_GET_NUMVECTOR(d_contrast_, 2, 0);
32 | OPTION_GET_NUMVECTOR(d_brightness_, 2, 0);
33 |
34 | //LOG("Options for image processing {} end\n\n", section_);
35 | }
36 |
37 | //变换一张图
38 | //此处将原始数据进行一些干扰,适应不同的情况
39 | void DataPreparerImage::transOne(Matrix& X1, Matrix& Y1)
40 | {
41 | bool need_limit = false; //图片的值需要限制到0~1
42 |
43 | if (flip_ != 0)
44 | {
45 | //flip -1, 0, 1, other value means do nothing
46 | int f = floor(rand_.rand() * 4) - 1;
47 | X1.flip(f);
48 | }
49 | if (transpose_ != 0)
50 | {
51 | //transpose 0, 1
52 | int t = floor(rand_.rand() * 2) - 1;
53 | if (t)
54 | {
55 | X1.transpose();
56 | }
57 | }
58 |
59 | //噪点
60 | if (d_noise_ != 0 && X1.getDeviceType() == UnitType::CPU)
61 | {
62 | need_limit = true;
63 | for (int i = 0; i < X1.getDataSize(); i++)
64 | {
65 | X1.setData(i, 0, X1.getData(i, 0) + rand_.rand() * d_noise_ * 2 - d_noise_);
66 | }
67 | }
68 |
69 | //亮度和对比度连y一起变换
70 | //亮度
71 | if (d_brightness_[1] >= d_brightness_[0] && d_brightness_[0] != 0)
72 | {
73 | need_limit = true;
74 | if (d_channel_ == 0)
75 | {
76 | auto b = d_brightness_[0] + (d_brightness_[1] - d_brightness_[0]) * rand_.rand();
77 | X1.addNumber(b);
78 | }
79 | else
80 | {
81 | for (int i = 0; i < X1.getNumber(); i++)
82 | {
83 | auto b = d_brightness_[0] + (d_brightness_[1] - d_brightness_[0]) * rand_.rand();
84 | X1.addNumberCol(b, 1, i);
85 | }
86 | }
87 | }
88 | //对比度
89 | if (d_contrast_[1] >= d_contrast_[0] && d_contrast_[0] != 0)
90 | {
91 | need_limit = true;
92 | if (d_channel_ == 0)
93 | {
94 | auto c = 1 + d_contrast_[0] + (d_contrast_[1] - d_contrast_[0]) * rand_.rand();
95 | X1.scale(c);
96 | }
97 | else
98 | {
99 | for (int i = 0; i < X1.getNumber(); i++)
100 | {
101 | auto c = 1 + d_contrast_[0] + (d_contrast_[1] - d_contrast_[0]) * rand_.rand();
102 | X1.scaleCol(c, i);
103 | }
104 | }
105 | }
106 | if (need_limit)
107 | {
108 | X1.sectionLimit(0, 1);
109 | }
110 | }
111 |
112 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparerImage.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "DataPreparer.h"
3 |
4 | namespace cccc
5 | {
6 |
7 | class DLL_EXPORT DataPreparerImage : public DataPreparer
8 | {
9 | protected:
10 | int flip_ = 0; //翻转
11 | int transpose_ = 0; //转置
12 | std::vector d_contrast_; //变换对比度
13 | std::vector d_brightness_; //变换亮度
14 | int d_channel_ = 0; //是否通道分别变换
15 | double d_noise_ = 0; //增加随机噪声
16 |
17 | public:
18 | DataPreparerImage();
19 | virtual ~DataPreparerImage();
20 |
21 | void init2() override;
22 | void transOne(Matrix& X1, Matrix& Y1) override;
23 |
24 | private:
25 | Random rand_;
26 | };
27 |
28 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparerTxt.cpp:
--------------------------------------------------------------------------------
1 | #include "DataPreparerTxt.h"
2 | #include "VectorMath.h"
3 | #include "filefunc.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | DataPreparerTxt::DataPreparerTxt()
9 | {
10 | }
11 |
12 | DataPreparerTxt::~DataPreparerTxt()
13 | {
14 | }
15 |
16 | void DataPreparerTxt::init2()
17 | {
18 | input_ = VectorMath::multiply(dim0_, dim0_.size() - 1);
19 | output_ = VectorMath::multiply(dim1_, dim1_.size() - 1);
20 | std::string filename = option_->getString(section_, "file", "file.txt");
21 | content_ = filefunc::readFileToString(filename);
22 | }
23 |
24 | void DataPreparerTxt::fillData0()
25 | {
26 | rand_.set_seed();
27 |
28 | for (int index = 0; index < X.getNumber(); index++)
29 | {
30 | int r = rand_.rand() * (content_.size() / 2 - input_ - output_);
31 | for (int i = 0; i < input_; i++)
32 | {
33 | X.setData(i, index, getContent(r + i));
34 | }
35 | for (int i = 0; i < output_; i++)
36 | {
37 | Y.setData(i, index, getContent(r + i + input_));
38 | }
39 | }
40 | }
41 |
42 | float DataPreparerTxt::getContent(int i)
43 | {
44 | auto p = (uint16_t*)(&content_[i * 2]);
45 | return (*p) / 65536.0;
46 | }
47 |
48 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/DataPreparerTxt.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "DataPreparer.h"
3 | #include
4 |
5 | namespace cccc
6 | {
7 |
8 | class DataPreparerTxt : public DataPreparer
9 | {
10 | private:
11 | int input_;
12 | int output_;
13 | std::string content_;
14 |
15 | public:
16 | DataPreparerTxt();
17 | virtual ~DataPreparerTxt();
18 |
19 | void init2() override;
20 | void fillData0() override;
21 |
22 | private:
23 | float getContent(int i);
24 |
25 | private:
26 | Random rand_;
27 | };
28 |
29 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/GpuControl.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "TensorDesc.h"
3 | #include "types.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | struct cudnnContext;
10 | struct miopenHandle;
11 |
12 | namespace cccc
13 | {
14 | class Rocblas;
15 | class Cublas;
16 |
17 | enum ApiType
18 | {
19 | API_UNKNOWN = 0,
20 | API_CUDA, //NVIDIA
21 | API_HIP, //AMD
22 | };
23 |
24 | //GPU架构
25 | enum CudaArch
26 | {
27 | ARCH_UNKNOWN = 0,
28 | ARCH_FERMI = 2,
29 | ARCH_KEPLER = 3,
30 | ARCH_MAXWELL = 5,
31 | ARCH_PASCAL = 6,
32 | //where is ARCH_VOLTA??
33 | ARCH_TURING = 7,
34 | ARCH_AMPERE = 8,
35 | };
36 |
37 | enum memcpyKind
38 | {
39 | HostToHost = 0, /**< Host -> Host */
40 | HostToDevice = 1, /**< Host -> Device */
41 | DeviceToHost = 2, /**< Device -> Host */
42 | DeviceToDevice = 3, /**< Device -> Device */
43 | Default = 4 /**< Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing */
44 | };
45 |
46 | //该类包含一些cuda的基本参数,例如cublas和cudnn的handle
47 | //此类型不能被随机创建,而仅能从已知的对象中选择一个
48 | class DLL_EXPORT GpuControl
49 | {
50 | public:
51 | GpuControl();
52 | ~GpuControl();
53 |
54 | GpuControl(GpuControl&) = delete;
55 | GpuControl& operator=(GpuControl&) = delete;
56 | GpuControl(GpuControl&&) = default;
57 | GpuControl& operator=(GpuControl&&) = default;
58 |
59 | private:
60 | //以下静态变量用于记录设备全局信息,相关过程应在主进程中进行
61 | static int device_count_;
62 | static int device_count_c_;
63 | static int device_count_h_;
64 | static std::vector gpu_devices_turn_;
65 | static std::atomic auto_choose_turn_;
66 |
67 | public:
68 | static void checkDevices();
69 | static int getDeviceCount();
70 | static GpuControl* getCurrentCuda();
71 | //static void setCurrentCuda(GpuControl* gpu);
72 | static void setUseCPU();
73 |
74 | static UnitType getGlobalCudaType();
75 | static void evaluateDevices();
76 | static std::vector gpuDevicesTurn();
77 |
78 | void setAsCurrent();
79 | int getDeviceID() const { return gpu_id_; }
80 | void getFreeMemory(size_t& free, size_t& total) const;
81 | int init(int dev_id = -1);
82 | ApiType getApiType() { return api_type_; }
83 | void destroy();
84 |
85 | int malloc(void** p, size_t size);
86 | int free(void* p);
87 | int memcpy(void* dst, const void* src, size_t count, memcpyKind kind);
88 |
89 | private:
90 | static int autoChooseId();
91 |
92 | private:
93 | bool inited_ = false;
94 | int gpu_id_ = -1; //从0开始,为总计数
95 | ApiType api_type_{ API_UNKNOWN };
96 |
97 | int api_id_ = -1; //cuda或hip的id,从0开始
98 |
99 | CudaArch micro_arch_{ ARCH_UNKNOWN };
100 |
101 | public:
102 | Cublas* cublas_ = nullptr;
103 | cudnnContext* cudnn_handle_ = nullptr;
104 |
105 | Rocblas* rocblas_ = nullptr;
106 | miopenHandle* miopen_handle_ = nullptr;
107 |
108 | size_t memory_used_ = 0;
109 |
110 | public:
111 | OtherDesc other_desc_;
112 | template
113 | T getDesc()
114 | {
115 | return other_desc_.getDesc();
116 | }
117 | //设置激活函数,用于简化代码
118 | static void setActivationDesc(void* activation, int mode, double v);
119 |
120 | public:
121 | ActivePhaseType active_phase_ = ACTIVE_PHASE_TRAIN;
122 | void setActivePhase(ActivePhaseType ap) { active_phase_ = ap; }
123 | static std::string lastCudnnErrorString();
124 | };
125 |
126 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/Layer.cpp:
--------------------------------------------------------------------------------
1 | #include "Layer.h"
2 | #include "MatrixEx.h"
3 | #include "Random.h"
4 | #include "VectorMath.h"
5 |
6 | namespace cccc
7 | {
8 |
9 | Layer::Layer()
10 | {
11 | }
12 |
13 | Layer::~Layer()
14 | {
15 | }
16 |
17 | //设置名字
18 | void Layer::setName(const std::string& name)
19 | {
20 | layer_name_ = name;
21 | }
22 |
23 | //这个会检查前一层
24 | LayerConnectionType Layer::getConnection2()
25 | {
26 | auto ct = connetion_type_;
27 | if (connetion_type_ == LAYER_CONNECTION_DIRECT)
28 | {
29 | ct = prev_layer_->connetion_type_;
30 | }
31 | return ct;
32 | }
33 |
34 | //必须先设置option和layer_name
35 | void Layer::message()
36 | {
37 | //激活函数相关
38 | auto string_vector_int = [](const std::vector& v)
39 | {
40 | std::string str;
41 | for (auto i : v)
42 | {
43 | str += std::to_string(i) + ", ";
44 | }
45 | if (str.size() >= 2)
46 | {
47 | str.resize(str.size() - 2);
48 | }
49 | return str;
50 | };
51 |
52 | //输出本层信息
53 | LOG(" name: {}\n", layer_name_);
54 | LOG(" type: {}\n", option_->getStringFromEnum(connetion_type_));
55 | LOG(" active: {}\n", option_->getStringFromEnum(option_->getEnum(layer_name_, "active", ACTIVE_FUNCTION_NONE)));
56 | //LOG(" solver: {}\n", option_->getStringFromEnum(solver_->getSolverType()));
57 | auto dim = A_->getDim();
58 | dim.pop_back();
59 | LOG(" out nodes: {} {}\n", A_->getRow(), dim);
60 | if (W_->getDataSize() > 0)
61 | {
62 | LOG(" weight size: {} {}\n", W_->getDataSize(), W_->getDim());
63 | }
64 | if (b_->getDataSize() > 0)
65 | {
66 | LOG(" bias size: {}\n", b_->getDataSize());
67 | }
68 | LOG(" x data size: {} [{}, batch = {}]\n", A_->getDataSize(), A_->getRow(), A_->getNumber());
69 | //LOG(" have bias: %d\n", need_bias_);
70 |
71 | auto string_layer_names = [](std::vector& ls)
72 | {
73 | std::string str;
74 | for (auto l : ls)
75 | {
76 | str += l->layer_name_ + ", ";
77 | }
78 | if (str.size() >= 2)
79 | {
80 | str.resize(str.size() - 2);
81 | }
82 | return str;
83 | };
84 |
85 | if (!prev_layers_.empty())
86 | {
87 | LOG(" prev layer(s): {}\n", string_layer_names(prev_layers_));
88 | if (connetion_type_ != LAYER_CONNECTION_COMBINE && prev_layers_.size() > 1)
89 | {
90 | LOG("Warning: only {} is effective!", prev_layer_->getName().c_str());
91 | }
92 | }
93 | if (!next_layers_.empty())
94 | {
95 | LOG(" next layer(s): {}\n", string_layer_names(next_layers_));
96 | }
97 | }
98 |
99 | void Layer::makeMatrixOp(std::vector& op_queue)
100 | {
101 | int batch = option_->getInt("train", "batch", 1);
102 | switch (connetion_type_)
103 | {
104 | case LAYER_CONNECTION_FULLCONNECT:
105 | {
106 | MatrixOp op;
107 | auto dim = option_->getVector(layer_name_, "node");
108 | int out_channel = option_->getInt(layer_name_, "channel", 0);
109 | if (out_channel > 0)
110 | {
111 | dim.push_back(out_channel);
112 | }
113 | W_->resize(VectorMath::multiply(dim), prev_layer_->A_->getRow());
114 | dim.push_back(prev_layer_->A_->getNumber());
115 | op.as_mul(W_, prev_layer_->A_, A_, 1, dim);
116 | op_queue.push_back(op);
117 | if (option_->getInt(layer_name_, "need_bias", 1))
118 | {
119 | MatrixOp op;
120 | b_->resize(A_->getChannel(), 1);
121 | op.as_addBias(A_, b_, A_);
122 | op_queue.push_back(op);
123 | }
124 | break;
125 | }
126 | case LAYER_CONNECTION_CONVOLUTION:
127 | case LAYER_CONNECTION_CORRELATION:
128 | {
129 | MatrixOp op;
130 | int node = option_->getInt(layer_name_, "node", 1);
131 | int out_channel = option_->getInt(layer_name_, "channel", node);
132 | auto prev_dim = prev_layer_->A_->getDim();
133 | auto prev_dim_window = prev_dim;
134 | int window_dim_size = prev_dim.size() - 2;
135 | auto window = option_->getVector(layer_name_, "window");
136 | auto stride = option_->getVector(layer_name_, "stride");
137 | auto padding = option_->getVector(layer_name_, "padding");
138 | VectorMath::force_resize(window, window_dim_size, 1);
139 | VectorMath::force_resize(stride, window_dim_size, 1);
140 | VectorMath::force_resize(padding, window_dim_size, 0);
141 | auto weight_dim = window;
142 | weight_dim.push_back(prev_layer_->A_->getChannel());
143 | weight_dim.push_back(out_channel);
144 | W_->resize(weight_dim);
145 | int t = 0;
146 | if (connetion_type_ == LAYER_CONNECTION_CONVOLUTION)
147 | {
148 | t = 0;
149 | }
150 | else if (connetion_type_ == LAYER_CONNECTION_CORRELATION)
151 | {
152 | t = 1;
153 | }
154 | op.as_conv(prev_layer_->A_, W_, A_, stride, padding, t);
155 | op_queue.push_back(op);
156 | if (option_->getInt(layer_name_, "need_bias", 1))
157 | {
158 | MatrixOp op;
159 | b_->resize(A_->getChannel(), 1);
160 | op.as_addBias(A_, b_, A_);
161 | op_queue.push_back(op);
162 | }
163 | break;
164 | }
165 | case LAYER_CONNECTION_POOLING:
166 | {
167 | MatrixOp op;
168 | auto prev_dim = prev_layer_->A_->getDim();
169 | auto prev_dim_window = prev_dim;
170 | int window_dim_size = prev_dim.size() - 2;
171 | auto window = option_->getVector(layer_name_, "window");
172 | auto stride = option_->getVector(layer_name_, "stride");
173 | auto padding = option_->getVector(layer_name_, "padding");
174 | VectorMath::force_resize(window, window_dim_size, 1);
175 | VectorMath::force_resize(stride, window_dim_size, -1);
176 | for (int i = 0; i < window_dim_size; i++)
177 | {
178 | if (stride[i] < 0)
179 | {
180 | stride[i] = window[i];
181 | }
182 | }
183 | VectorMath::force_resize(padding, window_dim_size, 0);
184 | auto reverse_type = POOLING_NOT_REVERSE;
185 | if (option_->getInt(layer_name_, "reverse", 0))
186 | {
187 | reverse_type = POOLING_REVERSE;
188 | }
189 | auto pool_type = option_->getEnum(layer_name_, "pool_type", POOLING_MAX);
190 | op.as_pool(prev_layer_->A_, A_, pool_type, reverse_type, window, stride, padding);
191 | op_queue.push_back(op);
192 | break;
193 | }
194 | case LAYER_CONNECTION_DIRECT:
195 | {
196 | //layer = new LayerDirect();
197 | break;
198 | }
199 | case LAYER_CONNECTION_COMBINE:
200 | {
201 | MatrixOp op;
202 | std::vector in;
203 | for (auto l : prev_layers_)
204 | {
205 | in.push_back(l->A_);
206 | }
207 | if (option_->getEnum(layer_name_, "combine_type") == COMBINE_CONCAT)
208 | {
209 | op.as_concat(in, A_);
210 | }
211 | else
212 | {
213 | op.as_add(in, A_);
214 | }
215 | op_queue.push_back(op);
216 | break;
217 | }
218 | case LAYER_CONNECTION_NONE:
219 | {
220 | auto dim = option_->getVector(layer_name_, "data");
221 | int out_channel = option_->getInt(layer_name_, "channel", 0);
222 | if (out_channel > 0)
223 | {
224 | dim.push_back(out_channel);
225 | }
226 | dim.push_back(batch);
227 | A_->resize(dim);
228 | break;
229 | }
230 | }
231 | auto active = option_->getEnum(layer_name_, "active", ACTIVE_FUNCTION_NONE);
232 | if (active != ACTIVE_FUNCTION_NONE)
233 | {
234 | MatrixOp op;
235 | auto Ap = A_;
236 | A_ = makeMatrixSP();
237 | float coef = option_->getReal(layer_name_, "coef", 0.2);
238 | op.as_active(Ap, A_, active, {}, { coef }, {});
239 | op_queue.push_back(op);
240 | }
241 | }
242 |
243 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/Layer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "MatrixOp.h"
3 | #include "Option.h"
4 | #include
5 | #include
6 |
7 | namespace cccc
8 | {
9 |
10 | //神经层
11 | //下面凡是有两个函数的,在无后缀函数中有公共部分,在带后缀函数中是各自子类的功能
12 | class Layer
13 | {
14 | public:
15 | Layer();
16 | virtual ~Layer();
17 | Layer(const Layer&) = delete;
18 | Layer& operator=(const Layer&) = delete;
19 |
20 | protected:
21 | int id_;
22 | std::string layer_name_; //本层的名字
23 |
24 | Layer* prev_layer_ = nullptr; //仅合并数据层可以有多个前层,其余层若多于一个会报警
25 | std::vector prev_layers_; //除了合并数据层外,该向量仅用于生成网络结构,计算时无用
26 | std::vector next_layers_; //后层可以有多个,它们获取到的是同样的数据
27 |
28 | LayerVisibleType visible_type_ = LAYER_VISIBLE_HIDDEN;
29 | LayerConnectionType connetion_type_ = LAYER_CONNECTION_NONE; //这个字段仅为标记,实际连接方式是虚继承
30 |
31 | Option* option_;
32 |
33 | public:
34 | //前面一些字段的设置
35 | void setID(int id) { id_ = id; }
36 | int getBatchSize();
37 | void setVisible(LayerVisibleType vt) { visible_type_ = vt; }
38 | LayerConnectionType getConnection() { return connetion_type_; }
39 | LayerConnectionType getConnection2();
40 | void setConnection(LayerConnectionType ct) { connetion_type_ = ct; }
41 | //void getOutputSize(int& width, int& height, int& channel) { width = out_width_; height = out_height_; channel = out_channel_; }
42 | const std::string& getName() { return layer_name_; }
43 | void setName(const std::string& name);
44 |
45 | void setOption(Option* op) { option_ = op; }
46 | //static void setEpochCount(int ec) { epoch_count_ = ec; }
47 |
48 | public:
49 | void addPrevLayers(Layer* layer)
50 | {
51 | prev_layer_ = layer;
52 | prev_layers_.push_back(layer);
53 | }
54 | void addNextLayers(Layer* layer) { next_layers_.push_back(layer); }
55 |
56 | Layer* getPrevLayer() { return prev_layer_; }
57 | std::vector getPrevLayers() { return prev_layers_; }
58 | std::vector getNextLayers() { return next_layers_; }
59 | void clearConnect()
60 | {
61 | prev_layer_ = nullptr;
62 | prev_layers_.clear();
63 | next_layers_.clear();
64 | }
65 | int getNextLayersCount() { return next_layers_.size(); }
66 | Layer* getNextLayer(int i) { return next_layers_[i]; }
67 |
68 | public:
69 | //这几个矩阵形式相同,计算顺序: X, A, ..., dA, dX
70 | //当本层没有激活函数时,A与X指向同一对象,dA与dX指向同一对象
71 | //MatrixSP X_; //X收集上一层的输出
72 | MatrixSP A_ = makeMatrixSP(); //激活函数作用之后就是本层输出A,输入层需要直接设置A
73 | MatrixSP W_ = makeMatrixSP();
74 | MatrixSP b_ = makeMatrixSP();
75 |
76 | public:
77 | void message();
78 | void makeMatrixOp(std::vector& op_queue);
79 | };
80 |
81 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/Log.cpp:
--------------------------------------------------------------------------------
1 | #include "Log.h"
2 |
3 | #ifdef _WIN32
4 | #define NOMINMAX
5 | #include
6 | #include
7 | #endif
8 |
9 | namespace cccc
10 | {
11 | void showMessageBox(const std::string& str)
12 | {
13 | #ifdef _WIN32
14 | MessageBoxA(NULL, str.c_str(), "cccc", MB_ICONERROR);
15 | #endif
16 | }
17 |
18 | void fatalError(const std::string& str)
19 | {
20 | LOG_ERR("{}", str);
21 | showMessageBox(str);
22 | exit(0);
23 | }
24 | } //namespace cccc
25 |
--------------------------------------------------------------------------------
/cccc/Log.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "fmt1.h"
3 | #include
4 |
5 | namespace cccc
6 | {
7 | static inline int errorCount;
8 |
9 | template
10 | void LOG(Args&&... args)
11 | {
12 | std::cout << fmt1::format(std::forward(args)...);
13 | fflush(stdout);
14 | }
15 |
16 | template
17 | void LOG_ERR(Args&&... args)
18 | {
19 | if (errorCount++ >= 2000)
20 | {
21 | std::cerr << "Too many errors, exit program.\n";
22 | fflush(stderr);
23 | exit(0);
24 | }
25 | std::cerr << fmt1::format(std::forward(args)...);
26 | fflush(stderr);
27 | }
28 |
29 | void showMessageBox(const std::string& str);
30 | void fatalError(const std::string& str);
31 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/MainProcess.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "DataPreparerFactory.h"
3 | #include "Net.h"
4 | #include "Timer.h"
5 | #include
6 | #include
7 |
8 | namespace cccc
9 | {
10 |
11 | //总调度
12 | class DLL_EXPORT MainProcess
13 | {
14 | public:
15 | MainProcess();
16 | virtual ~MainProcess();
17 | MainProcess(const MainProcess&) = delete;
18 | MainProcess& operator=(const MainProcess&) = delete;
19 |
20 | public:
21 | int brain_id_ = 0;
22 |
23 | protected:
24 | int batch_ = 0;
25 | std::function running_callback_ = nullptr; //回调函数
26 | Option option_;
27 | Timer timer_total_;
28 | int MP_count_ = 1;
29 | std::vector> nets_;
30 | std::vector gpus_;
31 | WorkModeType work_mode_ = WORK_MODE_NORMAL;
32 |
33 | public:
34 | void setCallback(std::function f) { running_callback_ = f; }
35 |
36 | Option* getOption() { return &option_; }
37 |
38 | protected:
39 | //epoch和iter的统计
40 | int epoch_count_ = 0;
41 | int iter_count_ = 0;
42 |
43 | public:
44 | int getIterCount() { return iter_count_; }
45 |
46 | void setIterCount(int ic) { iter_count_ = ic; }
47 |
48 | public:
49 | DataPreparerFactory::UniquePtr data_preparer_; //训练集准备器
50 |
51 | DataPreparerFactory::UniquePtr& getDataPreparer() { return data_preparer_; }
52 |
53 | DataPreparerFactory::UniquePtr data_preparer2_; //测试集准备器
54 |
55 | public:
56 | //此处需注意顺序
57 | //nets初始化之后如不修改出入的数据结构,则可以再次初始化,即在半途更换网络结构,求解器等操作可以实现
58 | int init(const std::string& ini = "");
59 | int loadIni(const std::string& ini = "");
60 | int testGPUDevice();
61 | int initNets();
62 | int initData();
63 |
64 | virtual void initDataPreparer();
65 | void initTrainData();
66 | void initTestData();
67 | std::string makeSaveName(const std::string& save_format, int epoch_count);
68 | std::string makeSaveSign();
69 |
70 | public:
71 | //以下构成一组调度范例
72 | void train(std::vector>& nets, DataPreparer* data_preparer, int total_epochs);
73 | //void train(std::vector>& net, DataPreparer* data_preparer, int epochs) { train({ net }, data_preparer, epochs); }
74 |
75 | private:
76 | //在主线程进行数据准备,副线程进行训练和测试
77 | struct TrainInfo
78 | {
79 | std::atomic data_prepared; //0 未准备好,1 cpu准备完毕, 2 gpu准备完毕
80 | std::atomic data_distributed; //复制完数据的线程数
81 | std::atomic stop; //结束信息
82 | std::atomic trained; //已经训练完成的网络个数
83 | std::atomic dweight_uncollected; //梯度同步完毕
84 |
85 | //std::atomic need_reset; //需要重置求解器的信号
86 |
87 | TrainInfo() { reset(); }
88 |
89 | void reset()
90 | {
91 | data_prepared = 0;
92 | data_distributed = 0;
93 | stop = 0;
94 | trained = 0;
95 | dweight_uncollected = 0;
96 | }
97 | };
98 |
99 | void trainOneNet(std::vector>& nets, int net_id, TrainInfo& ti, int total_epochs);
100 |
101 | bool checkTestEffective(std::vector& resultv_max, std::vector& resultv);
102 | bool checkTrainHealth(const std::vector& resultvp, const std::vector& resultv, float l1p, float l2p, float l1, float l2, double increase_limited, int effective_epoch_count);
103 |
104 | public:
105 | //获取网络的时候,应注意当前的gpu
106 | Net* getNet(int i = 0) const
107 | {
108 | if (nets_.size() > i)
109 | {
110 | return nets_[i].get();
111 | }
112 | return nullptr;
113 | }
114 |
115 | const std::vector>& getNets() { return nets_; }
116 |
117 | public:
118 | void run(int train_epochs = -1);
119 | void testData(Net* net, int force_output = 0, int test_type = 0);
120 | void extraTest(Net* net, const std::string& section, int force_output = 0, int test_type = 0);
121 |
122 | public:
123 | int testExternalData(void* x, void* y, void* a, int n, int attack_times = 0, double* error = nullptr);
124 | };
125 |
126 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/MatrixData.cpp:
--------------------------------------------------------------------------------
1 | #include "MatrixData.h"
2 | #include "Log.h"
3 | #include "Matrix.h"
4 | #include "gpu_lib.h"
5 |
6 | namespace cccc
7 | {
8 | //此处有问题,目前来看只能在同设备中resize
9 |
10 | void* MatrixData::resize(int64_t size, DataType data_type, bool reserve_data, bool force)
11 | {
12 | data_type_ = data_type;
13 | if (size <= occupy_data_size_ && !force)
14 | {
15 | return data_;
16 | }
17 | float* new_data = nullptr;
18 | auto device_type = UnitType::GPU;
19 | if (!reserve_data) //数据不需保留可以先行释放,可以节省显存的使用,否则峰值占用是新旧尺寸之和
20 | {
21 | release();
22 | }
23 | if (gpu_)
24 | {
25 | gpu_->setAsCurrent();
26 | if (gpu_->malloc((void**)&new_data, size * getDataTypeSize(data_type_)) == 0)
27 | {
28 | gpu_->memory_used_ += size * getDataTypeSize(data_type_);
29 | //LOG("Device {} MALLOC {:g}, memory used {:g}!\n", cuda_->getDeviceID(), 1.0 * size * sizeof(real), 1.0 * cuda_->memory_used_);
30 | //if (size * sizeof(real) > 3e8)
31 | //{
32 | // LOG(2, "Very big!\n");
33 | //}
34 | }
35 | else
36 | {
37 | LOG("Device {} FAIL TO MALLOC {:g}!\n", gpu_->getDeviceID(), 1.0 * size * getDataTypeSize(data_type_));
38 | }
39 | }
40 | else
41 | {
42 | device_type = UnitType::CPU;
43 | new_data = new float[size];
44 | }
45 | if (reserve_data)
46 | {
47 | copy(getApiType(), data_, getApiType(), new_data, std::min(size, occupy_data_size_), data_type_);
48 | release();
49 | }
50 | occupy_data_size_ = size;
51 | return data_ = new_data;
52 | }
53 |
54 | void MatrixData::release()
55 | {
56 | if (data_ == nullptr)
57 | {
58 | occupy_data_size_ = 0;
59 | return;
60 | }
61 | if (gpu_)
62 | {
63 | auto current_gpu = GpuControl::getCurrentCuda();
64 | if (current_gpu != gpu_)
65 | {
66 | gpu_->setAsCurrent();
67 | }
68 | auto status = gpu_->free(data_);
69 | if (status == 0)
70 | {
71 | gpu_->memory_used_ -= occupy_data_size_ * getDataTypeSize(data_type_);
72 | //LOG("Device {} FREE {:g}, memory used {:g}!\n", gpu_->getDeviceID(), 1.0 * occupy_data_size_ * getDataTypeSize(data_type_), 1.0 * gpu_->memory_used_);
73 | }
74 | else
75 | {
76 | //LOG("Device {} FAIL TO FREE {:g}!\n", gpu_->getDeviceID(), 1.0 * occupy_data_size_ * getDataTypeSize(data_type_));
77 | }
78 | if (current_gpu != gpu_)
79 | {
80 | current_gpu->setAsCurrent();
81 | }
82 | }
83 | else
84 | {
85 | delete[] data_;
86 | }
87 | occupy_data_size_ = 0;
88 | data_ = nullptr;
89 | }
90 |
91 | int64_t MatrixData::copy(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size, DataType dt)
92 | {
93 | return copyByByte(dt_src, src, dt_dst, dst, size * getDataTypeSize(dt));
94 | }
95 |
96 | int64_t MatrixData::copyByByte(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size_in_byte)
97 | {
98 | if (src == nullptr || dst == nullptr || src == dst)
99 | {
100 | return 0;
101 | }
102 | int state = 0;
103 | //cuda
104 | if (dt_dst == API_CUDA && dt_src == API_CUDA)
105 | {
106 | state = cudaMemcpy(dst, src, size_in_byte, cudaMemcpyDeviceToDevice);
107 | }
108 | else if (dt_dst == API_CUDA && dt_src == API_UNKNOWN)
109 | {
110 | state = cudaMemcpy(dst, src, size_in_byte, cudaMemcpyHostToDevice);
111 | }
112 | else if (dt_dst == API_UNKNOWN && dt_src == API_CUDA)
113 | {
114 | state = cudaMemcpy(dst, src, size_in_byte, cudaMemcpyDeviceToHost);
115 | }
116 | //hip
117 | else if (dt_dst == API_HIP && dt_src == API_HIP)
118 | {
119 | state = hipMemcpy(dst, src, size_in_byte, hipMemcpyDeviceToDevice);
120 | }
121 | else if (dt_dst == API_HIP && dt_src == API_UNKNOWN)
122 | {
123 | state = hipMemcpy(dst, src, size_in_byte, hipMemcpyHostToDevice);
124 | }
125 | else if (dt_dst == API_UNKNOWN && dt_src == API_HIP)
126 | {
127 | state = hipMemcpy(dst, src, size_in_byte, hipMemcpyDeviceToHost);
128 | }
129 | //cuda&hip
130 | else if (dt_dst == API_CUDA && dt_src == API_HIP)
131 | {
132 | auto temp = new char[size_in_byte];
133 | state += hipMemcpy(temp, src, size_in_byte, hipMemcpyDeviceToHost);
134 | state += cudaMemcpy(dst, temp, size_in_byte, cudaMemcpyHostToDevice);
135 | delete[] temp;
136 | }
137 | else if (dt_dst == API_HIP && dt_src == API_CUDA)
138 | {
139 | auto temp = new char[size_in_byte];
140 | state += cudaMemcpy(temp, src, size_in_byte, cudaMemcpyDeviceToHost);
141 | state += hipMemcpy(dst, temp, size_in_byte, hipMemcpyHostToDevice);
142 | delete[] temp;
143 | }
144 | //other
145 | else
146 | {
147 | memcpy(dst, src, size_in_byte);
148 | }
149 | if (state != 0)
150 | {
151 | LOG_ERR("Memcpy error: {}, size in byte {:g}\n", state, 1.0 * size_in_byte);
152 | }
153 | return size_in_byte;
154 | }
155 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/MatrixData.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "GpuControl.h"
3 | #include
4 |
5 | namespace cccc
6 | {
7 |
8 | //因存在多设备可能,数据须同时保存其设备指针,需以共享指针使用此类
9 | struct MatrixData
10 | {
11 | GpuControl* gpu_ = nullptr; //为空表示在cpu中,也即默认情况
12 | void* data_ = nullptr;
13 | int64_t occupy_data_size_ = 0; //实际占用的数据长度,当重设置尺寸不大于此值的时候不会重新分配内存
14 | DataType data_type_ = DataType::FLOAT;
15 |
16 | public:
17 | MatrixData() = default;
18 | ~MatrixData() { release(); }
19 | MatrixData(const MatrixData&) = delete;
20 | MatrixData& operator=(const MatrixData) = delete;
21 | void setCuda(GpuControl* gpu) { gpu_ = gpu; }
22 | void setCudaAsCurrent() { gpu_ = GpuControl::getCurrentCuda(); }
23 | void setDataType(DataType dt) { data_type_ = dt; }
24 | DataType getDataType() const { return data_type_; }
25 | size_t getDataTypeSize() const { return getDataTypeSize(data_type_); }
26 | size_t size() const { return occupy_data_size_; }
27 | size_t sizeInByte() const { return occupy_data_size_ * getDataTypeSize(); }
28 | void* resize(int64_t size, DataType data_type, bool reserve_data = true, bool force = false); //resize前应setCuda
29 | std::shared_ptr make_shared_data() { return std::make_shared(data_); }
30 | void release();
31 | ApiType getApiType() const
32 | {
33 | if (gpu_) { return gpu_->getApiType(); }
34 | return API_UNKNOWN;
35 | }
36 | void* getDataPtr(int i) const { return (char*)data_ + i * getDataTypeSize(); }
37 | float getData(int i) const
38 | {
39 | auto p = getDataPtr(i);
40 | switch (getDataType())
41 | {
42 | case DataType::FLOAT:
43 | return *(float*)p;
44 | case DataType::DOUBLE:
45 | return *(double*)p;
46 | case DataType::HALF:
47 | return *(half*)p;
48 | default:
49 | return 0;
50 | }
51 | }
52 | template
53 | static void setData(void* p, DataType data_type, T v)
54 | {
55 | switch (data_type)
56 | {
57 | case DataType::FLOAT:
58 | *(float*)p = v;
59 | break;
60 | case DataType::DOUBLE:
61 | *(double*)p = v;
62 | break;
63 | case DataType::HALF:
64 | *(half*)p = v;
65 | break;
66 | }
67 | }
68 | template
69 | void setData(int i, T v) { setData(getDataPtr(i), getDataType(), v); }
70 | void setData(int i, void* p, DataType data_type) //data_type为p的数据类型
71 | {
72 | switch (data_type)
73 | {
74 | case DataType::FLOAT:
75 | setData(i, *(float*)p);
76 | break;
77 | case DataType::DOUBLE:
78 | setData(i, *(double*)p);
79 | break;
80 | case DataType::HALF:
81 | setData(i, *(half*)p);
82 | break;
83 | }
84 | }
85 |
86 | public:
87 | static int64_t copy(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size, DataType dt);
88 | static int64_t copyByByte(ApiType dt_src, const void* src, ApiType dt_dst, void* dst, int64_t size_in_byte);
89 | static size_t getDataTypeSize(DataType dt)
90 | {
91 | switch (dt)
92 | {
93 | case DataType::FLOAT:
94 | return sizeof(float);
95 | case DataType::DOUBLE:
96 | return sizeof(double);
97 | case DataType::HALF:
98 | return sizeof(half);
99 | default:
100 | return 0;
101 | }
102 | }
103 | };
104 |
105 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/MatrixEx.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "Matrix.h"
3 |
4 | namespace cccc
5 | {
6 |
7 | //该类中均不是矩阵基本计算,全部为静态函数
8 | class DLL_EXPORT MatrixEx : public Matrix
9 | {
10 | private:
11 | MatrixEx() = delete;
12 |
13 | public:
14 | //以下函数不属于矩阵基本运算
15 |
16 | //按channel加偏置
17 | static void addBias(const Matrix& X, const Matrix& bias, Matrix& Y, float a = 1, float b = 1);
18 | static void addBiasBackward(const Matrix& X, Matrix& bias, const Matrix& Y, float a = 1, float b = 1);
19 |
20 | // the function is private for concat the data and append the data
21 | static void concatByChannel(const std::vector& X_vector, Matrix& Y);
22 | static void concatByChannelBackward(std::vector& X_vector, const Matrix& Y);
23 | static void splitByChannel(const Matrix& X, std::vector& Y_vector);
24 |
25 | static void activeBufferInit(const Matrix& X, ActiveFunctionType af, std::vector& int_vector, std::vector& real_vector, std::vector& matrix_vector);
26 |
27 | //激活的实际计算
28 | //激活和反向激活中,输入和输出矩阵都是同维度
29 | //请注意反向的情况,常数a和r的含义与正向的对应关系不同
30 | static void activeForward(const Matrix& X, Matrix& Y, ActiveFunctionType af,
31 | std::vector& int_vector, std::vector& real_vector, std::vector& matrix_vector, float a = 1, float r = 0);
32 | static void activeBackward(Matrix& X, const Matrix& Y, ActiveFunctionType af,
33 | std::vector& int_vector, std::vector& real_vector, std::vector& matrix_vector, float a = 1, float r = 0);
34 |
35 | static void activeForwardSimple(const Matrix& X, Matrix& Y, ActiveFunctionType af, float a = 1, float r = 0);
36 | static void activeBackwardSimple(Matrix& X, const Matrix& Y, ActiveFunctionType af, float a = 1, float r = 0);
37 |
38 | static void poolingForward(const Matrix& X, Matrix& Y, PoolingType pooling_type, PoolingReverseType reverse_type,
39 | const std::vector& window, const std::vector& stride, const std::vector& padding,
40 | float a = 1, float r = 0, Matrix* workspace = nullptr);
41 | static void poolingBackward(Matrix& X, const Matrix& Y, PoolingType pooling_type, PoolingReverseType reverse_type,
42 | const std::vector& window, const std::vector& stride, const std::vector& padding,
43 | float a = 1, float r = 0, Matrix* workspace = nullptr);
44 |
45 | struct ConvMethod
46 | {
47 | int algo = -1, math_type = -1, group_number = 0;
48 | };
49 |
50 | static const int conv_method_count = 8;
51 | static void convolutionForward(const Matrix& X, const Matrix& W, Matrix& Y,
52 | const std::vector& stride, const std::vector& padding,
53 | float a = 1, float r = 0, ConvMethod* method = nullptr, Matrix* workspace = nullptr);
54 | static void convolutionBackward(Matrix& X, Matrix& W, const Matrix& Y,
55 | const std::vector& stride, const std::vector& padding,
56 | float a = 1, float rx = 0, float aw = 1, float rw = 0,
57 | ConvMethod* method_dx = nullptr, ConvMethod* method_dw = nullptr, Matrix* workspace_dx = nullptr, Matrix* workspace_dw = nullptr);
58 | static void convolutionBackwardDX(Matrix& X, const Matrix& W, const Matrix& Y,
59 | const std::vector& stride, const std::vector& padding,
60 | float a = 1, float r = 0, ConvMethod* method_dx = nullptr, Matrix* workspace_dx = nullptr);
61 | static void convolutionBackwardDW(const Matrix& X, Matrix& W, const Matrix& Y,
62 | const std::vector& stride, const std::vector& padding,
63 | float a = 1, float r = 0, ConvMethod* method_dw = nullptr, Matrix* workspace_dw = nullptr);
64 |
65 | static void dropoutForward(const Matrix& X, Matrix& Y, ActivePhaseType work_phase, float v, int seed, Matrix& rg_stat, Matrix& reverse_space);
66 | static void dropoutBackward(Matrix& X, const Matrix& Y, float v, int seed, Matrix& rg_stat, Matrix& reverse_space);
67 |
68 | //GPU only ----------------------------------------------------------------------------------------------------
69 |
70 | //以下带有可以训练调节的参数
71 | static void batchNormalizationForward(const Matrix& X, Matrix& Y, ActivePhaseType work_phase, BatchNormalizationType bn_type,
72 | float& exp_aver_factor, float epsilon, Matrix& scale, Matrix& bias, Matrix& result_running_mean, Matrix& result_running_variance,
73 | Matrix& result_save_mean, Matrix& result_save_inv_variance);
74 | static void batchNormalizationBackward(Matrix& X, const Matrix& Y, ActivePhaseType work_phase, BatchNormalizationType bn_type,
75 | float epsilon, float rate, Matrix& scale, Matrix& bias, Matrix& saved_mean, Matrix& saved_inv_variance, Matrix& result_dscale, Matrix& result_dbias);
76 |
77 | //GPU only ----------------------------------------------------------------------------------------------------
78 |
79 | //此处计算出的ada_d才是实际的更新梯度,之后应将其加在参数上
80 | static void adaDeltaUpdate(Matrix& mean_d2, Matrix& mean_ada_d2, Matrix& d, Matrix& ada_d, float rou, float epsilon);
81 | static void adamUpdate(Matrix& mean_d, Matrix& mean_d2, Matrix& d, Matrix& ada_d, float beta1, float beta2, float epsilon, float t);
82 | static void adaRMSPropUpdate(Matrix& mean_d2, Matrix& d, Matrix& ada_d, float rou, float epsilon);
83 | static void sparse(Matrix& rou_hat, Matrix& R, float rou, float beta);
84 |
85 | static void fill(Matrix& m, RandomFillType random_type, int in, int out);
86 |
87 | static void sin(const Matrix& X, Matrix& Y, float a = 1);
88 | static void cos(const Matrix& X, Matrix& Y, float a = 1);
89 | static void zigzag(const Matrix& X, Matrix& Y);
90 | static void zigzagb(Matrix& X, const Matrix& Y);
91 |
92 | static void step(const Matrix& X, Matrix& Y);
93 |
94 | static void leaky_relu(const Matrix& X, Matrix& Y, float l, float a = 1, float b = 0);
95 | static void leaky_relub(Matrix& X, const Matrix& Y, float l, float a = 1, float b = 0);
96 |
97 | static void correlationForward(const Matrix& X, const Matrix& W, Matrix& Y, std::vector& methods, std::vector& workspaces,
98 | const std::vector& stride, const std::vector& padding, float a = 1, float r = 0);
99 | static void correlationBackward(Matrix& X, Matrix& W, const Matrix& Y, std::vector& methods, std::vector& workspaces,
100 | const std::vector& stride, const std::vector& padding, float a = 1, float rx = 0, float rw = 0);
101 | static void matrix_max(const Matrix& X1, const Matrix& X2, Matrix& Y);
102 | static void matrix_maxb(Matrix& X1, Matrix& X2, const Matrix& Y, float a1, float a2, float r);
103 | };
104 |
105 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/MatrixOp.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | #include "Log.h"
5 | #include "Matrix.h"
6 |
7 | namespace cccc
8 | {
9 |
10 | //矩阵操作,规定网络前向只准使用此文件中的计算
11 |
12 | enum class MatrixOpType
13 | {
14 | NONE,
15 | ADD,
16 | MUL,
17 | ELE_MUL,
18 | ADD_BIAS,
19 | CONCAT,
20 | ACTIVE,
21 | POOL,
22 | CONV,
23 | CORR,
24 | RESHAPE,
25 | MAX,
26 | LOSS,
27 | L2,
28 | };
29 |
30 | struct Any;
31 | template
32 | concept notParaAny = !std::is_same_v, Any>;
33 |
34 | struct Any : std::any
35 | {
36 | template
37 | const T& to() const { return std::any_cast(*this); }
38 |
39 | template
40 | T& to() { return std::any_cast(*this); }
41 |
42 | template
43 | Any(T&& t) :
44 | std::any(std::forward(t))
45 | {
46 | }
47 | };
48 |
49 | using VectorAny = std::vector;
50 |
51 | class DLL_EXPORT MatrixOp
52 | {
53 | private:
54 | MatrixOpType type_ = MatrixOpType::NONE;
55 | std::vector in_; //输入数据
56 | std::vector wb_; //参数,通常是权重和偏置
57 | std::vector out_; //输出数据
58 | //以下参数一般外界不可见
59 | //常用的类型
60 | std::vector a_, b_;
61 | std::vector window_, stride_, padding_;
62 | std::vector workspace_;
63 | //未知或多变的类型
64 | VectorAny anys_;
65 |
66 | //float dw_scale_ = 1;
67 |
68 | public:
69 | MatrixOp() = default;
70 |
71 | void set(MatrixOpType t, const std::vector& m_in, const std::vector& m_wb, const std::vector& m_out, std::vector&& a = {}, std::vector&& b = {}, VectorAny&& pv = {}, std::vector&& workspace = {},
72 | std::vector&& window = {}, std::vector&& stride = {}, std::vector&& padding = {})
73 | {
74 | type_ = t;
75 | in_ = m_in;
76 | wb_ = m_wb;
77 | out_ = m_out;
78 |
79 | a_ = a;
80 | b_ = b;
81 | a_.resize(in_.size(), 1);
82 | b_.resize(out_.size(), 0);
83 |
84 | anys_ = pv;
85 |
86 | window_ = window;
87 | stride_ = stride;
88 | padding_ = padding;
89 | workspace_ = workspace;
90 |
91 | if (out_.size() > 0 && out_[0]->getDataSize() == 0)
92 | {
93 | LOG_ERR("Error: output is empty!\n");
94 | }
95 | }
96 |
97 | static void forward(std::vector& op_queue);
98 | static void backward(std::vector& op_queue, std::vector& loss, bool clear_d);
99 | void forwardData();
100 | void backwardDataWeight();
101 | void backwardLoss();
102 |
103 | static std::string ir(const std::vector& op_queue);
104 | std::string print() const;
105 |
106 | MatrixOpType getType() { return type_; }
107 |
108 | std::vector& getMatrixIn() { return in_; }
109 |
110 | std::vector& getMatrixWb() { return wb_; }
111 |
112 | std::vector& getMatrixOut() { return out_; }
113 |
114 | ActiveFunctionType getActiveType() const;
115 | int setActiveType(ActiveFunctionType af);
116 |
117 | static void simpleQueue(std::vector& op_queue, Matrix& X, Matrix& A); //仅保留计算图中与X和A有关联的部分
118 |
119 | public:
120 | static void getDefaultStridePadding(MatrixOpType type, const std::vector& dim, std::vector& stride, std::vector& padding);
121 |
122 | void setNeedReverse(bool r)
123 | {
124 | for (auto& m : in_)
125 | {
126 | m->setNeedBack(r);
127 | }
128 | for (auto& m : wb_)
129 | {
130 | m->setNeedBack(r);
131 | }
132 | }
133 |
134 | //void setDWScale(float s) { dw_scale_ = s; }
135 |
136 | public:
137 | //下面这些函数会设置这个op的参数,并自动计算Y的尺寸返回
138 | void as_scale(const MatrixSP& X, const MatrixSP& Y, float r);
139 | void as_mul(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y, float a = 1, std::vector dim = {});
140 | void as_elementMul(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y, float a = 1);
141 | void as_add(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y, float a = 1, float b = 1);
142 | void as_add(const std::vector& X_vector, const MatrixSP& Y);
143 | void as_addBias(const MatrixSP& X, const MatrixSP& bias, const MatrixSP& Y, float a = 1, float b = 1);
144 | void as_concat(const std::vector& X_vector, const MatrixSP& Y);
145 | void as_active(const MatrixSP& X, const MatrixSP& Y, ActiveFunctionType af);
146 | void as_active(const MatrixSP& X, const MatrixSP& Y, ActiveFunctionType af, std::vector&& int_vector, std::vector&& real_vector, std::vector&& matrix_vector);
147 | void as_pool(const MatrixSP& X, const MatrixSP& Y, PoolingType pooling_type, PoolingReverseType reverse_type, std::vector window, std::vector stride, std::vector padding, float a = 1);
148 | void as_conv(const MatrixSP& X, const MatrixSP& W, const MatrixSP& Y, std::vector stride, std::vector padding, int conv_algo, float a = 1);
149 | void as_reshape(const MatrixSP& X, const MatrixSP& Y, std::vector& dim);
150 | void as_max(const MatrixSP& X1, const MatrixSP& X2, const MatrixSP& Y);
151 |
152 | //以下专为处理损失函数
153 | private:
154 | double value_ = 0;
155 | double scale_ = 1;
156 |
157 | friend std::vector operator+(const std::vector& A, const std::vector& B);
158 | friend std::vector operator*(const std::vector& A, double v);
159 | friend std::vector operator*(double v, const std::vector& A);
160 | friend std::vector crossEntropy(MatrixSP& A, MatrixSP& Y);
161 | friend std::vector L2(MatrixSP& A);
162 |
163 | public:
164 | double calc(const MatrixOp& op) { return op.value_ * op.scale_; }
165 |
166 | double calc(const std::vector& ops)
167 | {
168 | double sum = 0;
169 | for (auto& op : ops)
170 | {
171 | sum += calc(op);
172 | }
173 | return sum;
174 | }
175 | };
176 |
177 | //基础运算结束
178 |
179 | //以下为处理损失函数
180 | std::vector operator+(const std::vector& A, const std::vector& B);
181 | std::vector& operator+=(std::vector& A, const std::vector& B);
182 | std::vector operator*(const std::vector& A, double v);
183 | std::vector operator*(double v, const std::vector& A);
184 |
185 | std::vector crossEntropy(const MatrixSP& A, const MatrixSP& Y);
186 | std::vector L2(const MatrixSP& A);
187 | std::vector L2(const std::vector& v);
188 |
189 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/Net.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "MatrixOp.h"
3 | #include "Solver.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | class DLL_EXPORT Net
9 | {
10 | public:
11 | Net();
12 | virtual ~Net();
13 | Net(const Net&) = delete;
14 | Net& operator=(const Net&) = delete;
15 |
16 | protected:
17 | Option* option_;
18 |
19 | Matrix all_weights_;
20 |
21 | Solver solver_;
22 |
23 | std::vector op_queue_;
24 | std::vector loss_;
25 |
26 | std::vector weights_;
27 | std::vector solvers_for_weight_;
28 |
29 | MatrixSP X_, A_;
30 | MatrixSP Y_ = makeMatrixSP();
31 | MatrixSP loss_weight_ = makeMatrixSP(); //该变量用以强调或忽视某些样本
32 |
33 | GpuControl* gpu_;
34 |
35 | int seperate_update_weight_ = 0;
36 |
37 | public:
38 | int init();
39 | virtual int init2() = 0;
40 |
41 | void setGpu(GpuControl* gpu) { gpu_ = gpu; }
42 |
43 | GpuControl* getGpu() { return gpu_; }
44 |
45 | void setDeviceSelf() { gpu_->setAsCurrent(); }
46 |
47 | Matrix& getAllWeights() { return all_weights_; }
48 |
49 | void setOption(Option* op) { option_ = op; }
50 |
51 | int getBatch() { return getX().getNumber(); }
52 |
53 | public:
54 | Matrix& getX() { return *X_; }
55 |
56 | Matrix& getY() { return *Y_; }
57 |
58 | Matrix& getA() { return *A_; }
59 |
60 | Matrix& getLossWeight() { return *loss_weight_; }
61 |
62 | void initLossWeight() { loss_weight_->resize(Y_->getDim()); }
63 |
64 | public:
65 | void active(Matrix* X, Matrix* Y, Matrix* A, bool back, float* error);
66 |
67 | void updateWeight();
68 |
69 | virtual int saveWeight(const std::string& filename, const std::string& sign = "");
70 | int loadWeight(const std::string& str, int load_mode = 0);
71 |
72 | int weightDataSize() const;
73 | float weightSumAbs() const;
74 | float weightNorm2() const;
75 | void calNorm(int& n, float& l1, float& l2) const;
76 | void outputNorm() const;
77 |
78 | private:
79 | int resetBatchSize(int n);
80 | std::vector getTestGroup();
81 |
82 | public:
83 | int test(Matrix* X, Matrix* Y, Matrix* A, const std::string& info = "",
84 | int output_group = 0, int test_type = 0, int attack_times = 0,
85 | double* result_ptr = nullptr, std::vector>* resultv_ptr = nullptr);
86 |
87 | protected:
88 | void combineWeights(std::vector& weights, Matrix& result);
89 | void initWeights();
90 | std::string ir();
91 |
92 | public:
93 | void attack(Matrix* X, Matrix* Y);
94 | void groupAttack(Matrix* X, Matrix* Y, int attack_times);
95 |
96 | public:
97 | Solver& getSolver() { return solver_; }
98 |
99 | public:
100 | virtual void doSomeThing() {}
101 |
102 | //Net* clone(int clone_data = 0);
103 | };
104 |
105 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/NetCifa.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "Cifa.h"
3 | #include "MatrixOp.h"
4 | #include "Net.h"
5 |
6 | namespace cccc
7 | {
8 |
9 | class DLL_EXPORT NetCifa : public Net
10 | {
11 | public:
12 | NetCifa();
13 | ~NetCifa();
14 |
15 | private:
16 | cifa::Cifa cifa_;
17 |
18 | using Loss = std::vector;
19 | using MatrixGroup = std::vector;
20 | static std::vector getVector(cifa::ObjectVector& v, int index);
21 | static std::vector getIntVector(cifa::ObjectVector& v, int index);
22 | static std::vector getRealVector(cifa::ObjectVector& v, int index);
23 |
24 | std::string message_;
25 |
26 | public:
27 | virtual int init2() override;
28 |
29 | int runScript(const std::string& script);
30 | int registerFunctions();
31 |
32 | private:
33 | void setXA(const MatrixSP& X, const MatrixSP& A)
34 | {
35 | X_ = X;
36 | A_ = A;
37 | }
38 | };
39 |
40 | } // namespace cccc
--------------------------------------------------------------------------------
/cccc/NetLayer.cpp:
--------------------------------------------------------------------------------
1 | #include "NetLayer.h"
2 | #include "VectorMath.h"
3 | #include "strfunc.h"
4 |
5 | namespace cccc
6 | {
7 |
8 | NetLayer::NetLayer()
9 | {
10 | }
11 |
12 | NetLayer::~NetLayer()
13 | {
14 | }
15 |
16 | //注意:输入和输出层的名字可以自定义,因此一个ini中可以写多个网络
17 | int NetLayer::init2()
18 | {
19 | //batch在一开始会用来设置网络的初始大小
20 | int state = createAndConnectLayers();
21 | if (state)
22 | {
23 | LOG("Net structure is wrong!\n");
24 | return 1;
25 | }
26 | Y_->resize(A_->getDim());
27 | return 0;
28 | }
29 |
30 | int NetLayer::createAndConnectLayers()
31 | {
32 | Layer *layer_in = nullptr, *layer_out = nullptr;
33 | std::string layer_in_name = "layer_in";
34 | std::string layer_out_name = "layer_out";
35 | std::vector all_layer_vector; //全部查找到的层,包含一些多余的
36 | std::vector layer_vector; //此向量的顺序即计算顺序
37 |
38 | //lambda函数:计算上层和下层
39 | auto connect_layers = [this]()
40 | {
41 | for (auto& name_layer : all_layer_map_)
42 | {
43 | auto& l = name_layer.second;
44 | auto nexts = strfunc::splitString(option_->getString(l->getName(), "next"), ",");
45 | for (auto& next : nexts)
46 | {
47 | strfunc::replaceAllSubStringRef(next, " ", "");
48 | if (next != "" && all_layer_map_.count(next) > 0)
49 | {
50 | l->addNextLayers(all_layer_map_[next].get());
51 | all_layer_map_[next]->addPrevLayers(l.get());
52 | }
53 | }
54 | }
55 | };
56 | //lambda函数:层是否已经在向量中
57 | auto contains = [&](std::vector& v, Layer* l) -> bool
58 | {
59 | return std::find(v.begin(), v.end(), l) != v.end();
60 | };
61 | //lambda函数:递归将层压入向量
62 | //最后一个参数为假,仅计算是否存在连接,为真则是严格计算传导顺序
63 | std::function&, bool)> push_cal_stack = [&](Layer* layer, int direct, std::vector& stack, bool turn)
64 | {
65 | //层连接不能回环
66 | if (layer == nullptr || contains(stack, layer))
67 | {
68 | return;
69 | }
70 | std::vector connect0, connect1;
71 | connect1 = layer->getNextLayers();
72 | connect0 = layer->getPrevLayers();
73 |
74 | if (direct < 0)
75 | {
76 | std::swap(connect0, connect1);
77 | }
78 | //前面的层都被压入,才压入本层
79 | bool contain_all0 = true;
80 | for (auto& l : connect0)
81 | {
82 | if (!contains(stack, l))
83 | {
84 | contain_all0 = false;
85 | break;
86 | }
87 | }
88 | if (!turn || (!contains(stack, layer) && contain_all0))
89 | {
90 | stack.push_back(layer);
91 | }
92 | else
93 | {
94 | return;
95 | }
96 | for (auto& l : connect1)
97 | {
98 | push_cal_stack(l, direct, stack, turn);
99 | }
100 | };
101 |
102 | //查找所有存在定义的层
103 | auto sections = option_->getAllSections();
104 |
105 | //先把层都创建起来
106 | for (auto& section : sections)
107 | {
108 | if (strfunc::toLowerCase(section).find("layer") == 0)
109 | {
110 | //LOG("Found layer {}\n", section);
111 | auto ct = option_->getEnum(section, "type", LAYER_CONNECTION_NONE);
112 | auto l = std::make_shared();
113 | l->setConnection(ct);
114 | l->setOption(option_);
115 | l->setName(section);
116 |
117 | //此处检查是否某些层是否实际上无输出,注意并不严格,网络的正确性应由用户验证
118 | auto dim = option_->getVector(section, "node");
119 | dim.push_back(option_->getInt(section, "channel", 1));
120 | if (VectorMath::multiply(dim) <= 0)
121 | {
122 | option_->setKey(section, "next", "");
123 | }
124 | all_layer_vector.push_back(l.get());
125 | if (l->getName() == layer_in_name)
126 | {
127 | layer_in = l.get();
128 | l->setVisible(LAYER_VISIBLE_IN);
129 | }
130 | if (l->getName() == layer_out_name)
131 | {
132 | layer_out = l.get();
133 | l->setVisible(LAYER_VISIBLE_OUT);
134 | }
135 | all_layer_map_[l->getName()] = l;
136 | }
137 | }
138 | //连接,计算双向的连接,清除无用项,重新连接
139 | connect_layers();
140 | std::vector