├── .clang-format
├── .editorconfig
├── .gitignore
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── build.sh
└── src
    ├── benchmark.h
    ├── benchmark_classification.cpp
    ├── classification
        ├── alexnet.h
        ├── darknet.h
        ├── densenet.h
        ├── googlenet.h
        ├── repvgg.h
        ├── resnet.h
        ├── squeezenet.h
        ├── vggnet.h
        └── vovnet.h
    ├── detection
        ├── yolov5.h
        ├── yolov5p6.h
        └── yolov7.h
    └── lm
        └── slm_dels.h


/.clang-format:
--------------------------------------------------------------------------------
 1 | ﻿---
 2 | AccessModifierOffset: '0'
 3 | AlignAfterOpenBracket: AlwaysBreak
 4 | AlignConsecutiveMacros: 'false'
 5 | AlignConsecutiveAssignments: 'false'
 6 | AlignConsecutiveDeclarations: 'false'
 7 | AlignEscapedNewlines: Left
 8 | AlignOperands: 'true'
 9 | AlignTrailingComments: 'false'
10 | AllowAllArgumentsOnNextLine: 'false'
11 | AllowAllConstructorInitializersOnNextLine: 'false'
12 | AllowAllParametersOfDeclarationOnNextLine: 'false'
13 | AllowShortBlocksOnASingleLine: 'false'
14 | AllowShortCaseLabelsOnASingleLine: 'false'
15 | AllowShortFunctionsOnASingleLine: InlineOnly
16 | AllowShortIfStatementsOnASingleLine: Never
17 | AllowShortLambdasOnASingleLine: Inline
18 | AllowShortLoopsOnASingleLine: 'false'
19 | AlwaysBreakAfterReturnType: None
20 | AlwaysBreakBeforeMultilineStrings: 'false'
21 | AlwaysBreakTemplateDeclarations: 'No'
22 | BinPackArguments: 'false'
23 | BinPackParameters: 'false'
24 | BreakBeforeBinaryOperators: None
25 | BreakBeforeBraces: Allman
26 | ColumnLimit: '99'
27 | CompactNamespaces: 'false'
28 | ConstructorInitializerAllOnOneLineOrOnePerLine: 'true'
29 | DerivePointerAlignment: 'true'
30 | DisableFormat: 'false'
31 | FixNamespaceComments: 'true'
32 | IncludeBlocks: Regroup
33 | IndentCaseLabels: 'false'
34 | IndentPPDirectives: None
35 | IndentWidth: '4'
36 | IndentWrappedFunctionNames: 'true'
37 | Language: Cpp
38 | MaxEmptyLinesToKeep: '1'
39 | NamespaceIndentation: All
40 | PointerAlignment: Left
41 | ReflowComments: 'true'
42 | SortIncludes: 'true'
43 | SortUsingDeclarations: 'true'
44 | SpaceAfterCStyleCast: 'false'
45 | SpaceAfterLogicalNot: 'false'
46 | SpaceAfterTemplateKeyword: 'true'
47 | SpaceBeforeAssignmentOperators: 'true'
48 | SpaceBeforeCpp11BracedList: 'false'
49 | SpaceBeforeCtorInitializerColon: 'true'
50 | SpaceBeforeInheritanceColon: 'true'
51 | SpaceBeforeParens: ControlStatements
52 | SpaceBeforeRangeBasedForLoopColon: 'true'
53 | SpaceInEmptyParentheses: 'false'
54 | SpacesBeforeTrailingComments: '2'
55 | SpacesInAngles: 'false'
56 | SpacesInCStyleCastParentheses: 'false'
57 | SpacesInParentheses: 'false'
58 | SpacesInSquareBrackets: 'false'
59 | Standard: Cpp11
60 | TabWidth: '4'
61 | UseTab: Never
62 | 
63 | ...
64 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.{cpp, h}]
 4 | indent_style = space
 5 | indent_size = 4
 6 | tab_width = 4
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | trim_trailing_whitespace = true
10 | insert_final_newline = false
11 | max_line_length = off
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /*
 2 | !/build.sh
 3 | !/.clang-format
 4 | !/CMakeLists.txt
 5 | !/.editorconfig
 6 | !/.gitignore
 7 | !/LICENSE
 8 | !/README.md
 9 | !/src/
10 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project("dlib template project" LANGUAGES CXX)
 3 | 
 4 | set(CPACK_PACKAGE_NAME "dlib-template-project")
 5 | set(CPACK_PACKAGE_VERSION_MAJOR "0")
 6 | set(CPACK_PACKAGE_VERSION_MINOR "0")
 7 | set(CPACK_PACKAGE_VERSION_PATCH "0")
 8 | set(VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
 9 | 
10 | # Use C++ 17
11 | set(CMAKE_CXX_STANDARD 17)
12 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
13 | set(CMAKE_CXX_EXTENSIONS ON)
14 | 
15 | # Colored warnings
16 | option(FORCE_COLORED_OUTPUT "Always produce ANSI-colored output (GNU/Clang only)." ON)
17 | if(${FORCE_COLORED_OUTPUT})
18 |     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
19 |         add_compile_options (-fdiagnostics-color=always)
20 |     elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
21 |         add_compile_options (-fcolor-diagnostics)
22 |     endif()
23 | endif()
24 | 
25 | # DenseNets have a LOT of layers...
26 | add_compile_options(-ftemplate-depth=2000)
27 | 
28 | # Enable ccache if it exists
29 | find_program(CCACHE_FOUND ccache)
30 | if(CCACHE_FOUND)
31 |     set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
32 |     set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
33 | endif (CCACHE_FOUND)
34 | 
35 | # Optimization flags
36 | include(CheckCXXCompilerFlag)
37 | if (CMAKE_BUILD_TYPE STREQUAL "Release")
38 |     CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
39 |     if(COMPILER_SUPPORTS_MARCH_NATIVE)
40 |         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
41 |     endif()
42 | endif()
43 | 
44 | # Dependency management
45 | include(FetchContent)
46 | macro(fetch_content name tag repository)
47 |     FetchContent_Declare(
48 |         ${name}
49 |         GIT_REPOSITORY ${repository}
50 |         GIT_TAG        ${tag}
51 |         GIT_PROGRESS   TRUE
52 |         USES_TERMINAL_DOWNLOAD TRUE
53 |         SOURCE_DIR     ${PROJECT_SOURCE_DIR}/external/${name}
54 |     )
55 |     message("-- Fetching ${name} ${tag}")
56 |     FetchContent_MakeAvailable(${name})
57 | endmacro()
58 | 
59 | macro(add_dlib_executable name)
60 |     add_executable(${name} src/${name}.cpp)
61 |     target_link_libraries(${name} PRIVATE dlib::dlib)
62 |     target_include_directories(${name} PRIVATE src)
63 |     target_compile_options(${name} PRIVATE -Wall -Wextra -pedantic -Wno-deprecated-copy)
64 |     install(TARGETS ${name} DESTINATION bin)
65 | endmacro()
66 | 
67 | fetch_content(dlib master https://github.com/davisking/dlib.git)
68 | 
69 | add_dlib_executable(benchmark_classification)
70 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # dnn
 2 | Deep Neural Network Architectures
 3 | 
 4 | This repository contains the definitions for the following architectures, organized by task.
 5 | 
 6 | ## Contents
 7 | - [Classification](#classification)
 8 |   - [AlexNet](#alexnet)
 9 |   - [SqueezeNet](#squeezenet)
10 |   - [VGGNet](#vggnet)
11 |   - [GoogLeNet](#googlenet)
12 |   - [ResNet](#resnet)
13 |   - [DenseNet](#densenet)
14 |   - [DarkNet](#darknet)
15 |   - [VoVNet](#vovnet)
16 |   - [RepVGG](#repvgg)
17 | - [Detection](#detection)
18 |   - [YOLOv5](#yolov5)
19 | 
20 | ## [Classification](./src/classification)
21 | 
22 | ### [AlexNet](./src/classification/alexnet.h)
23 | 
24 | It contains the definition for the model that started it all.
25 | 
26 | Papers:
27 | - [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks)
28 | 
29 | ### [SqueezeNet](./src/classification/squeezenet.h)
30 | 
31 | In particular, it contains SqueezeNet-{v1.0,v1.1}.
32 | 
33 | Papers:
34 | - [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360)
35 | 
36 | ### [VGGNet](./src/classification/vggnet.h)
37 | 
38 | In particular, it contains VGGNet-{11,13,16,19} variants with batch normalization.
39 | 
40 | Papers:
41 | - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
42 | 
43 | ### [GoogLeNet](./src/classification/googlenet.h)
44 | 
45 | It contains the definition of the GoogLeNet, also known as InceptionV1.
46 | 
47 | Papers:
48 | - [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
49 | 
50 | ### [ResNet](./src/classification/resnet.h)
51 | 
52 | In particular, it contains ResNet-{18,34,50,101,152}-B definitions, in contrast to dlib, which contains the A variants.
53 | 
54 | Papers:
55 | - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
56 | 
57 | ### [DenseNet](./src/classification/densenet.h)
58 | 
59 | In particular, it contains DenseNet-{121,169,201,264,161} definitions.
60 | 
61 | Papers:
62 | - [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
63 | 
64 | ### [DarkNet](./src/classification/darknet.h)
65 | 
66 | In particular, it contains the backbones for DarkNet-19 (introduced in YOLOv1), DarkNet-53 (YOLOv3) and CSPDarknet-53 (YOLOv4).
67 | 
68 | Papers:
69 | - [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640)
70 | - [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767)
71 | - [CSPNet: A New Backbone that can Enhance Learning Capability of CNN](https://arxiv.org/abs/1911.11929)
72 | - [YOLOv4: Optimal Speed and Accuracy of Object Detection](https://arxiv.org/abs/2004.10934)
73 | 
74 | ### [VoVNet](./src/classification/vovnet.h)
75 | 
76 | In particular, it contains implementations for VoVNetv2-{19slim,19,27slim,27,39,57,99}, which are very similar to VoVNetv1 (V2 have identiy mapping and effective Squeeze and Excitation on top of V1).
77 | 
78 | Papers:
79 | - [An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection](https://arxiv.org/abs/1904.09730)
80 | - [CenterMask: Real-Time Anchor-Free Instance Segmentation](https://arxiv.org/abs/1911.06667)
81 | 
82 | ### [RepVGG](./src/classification/repvgg.h)
83 | 
84 | In particular, it contains implementations for RepVGG-{A0,A1,A2,B0,B1,B2,B3}.
85 | 
86 | Note that, at the moment, there is no way to convert from a trained RepVGG model into its inference counterpart.
87 | I will investigate how to do that soon.
88 | 
89 | Papers:
90 | - [RepVGG: Making VGG-style ConvNets Great Again](https://arxiv.org/abs/2101.03697)
91 | 
92 | ## [Detection](./src/detection)
93 | 
94 | ### [YOLOv5](./src/detection/yolov5.h)
95 | 
96 | In particular, it contains implementations for YOLOv5{n,s,m,l,x}, which match the ones in [ultralytics/yolov5](https://github.com/ultralytics/yolov5).
97 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | BUILD_PREFIX=build
 4 | 
 5 | ARG1=`echo $1 | awk '{print tolower($0)}'`
 6 | 
 7 | BUILD_TYPE=Release
 8 | 
 9 | case ${ARG1} in
10 |     release|release/)
11 |         BUILD_TYPE=Release
12 |         shift 1
13 |         ;;
14 |     debug|debug/)
15 |         BUILD_TYPE=Debug
16 |         shift 1
17 |         ;;
18 |     *)
19 |         BUILD_TYPE=Release
20 |         ;;
21 | esac
22 | 
23 | cmake -B ${BUILD_PREFIX}/${BUILD_TYPE} -GNinja \
24 |     -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
25 |     -DUSE_AVX_INSTRUCTIONS=ON \
26 |     -DUSE_SSE2_INSTRUCTIONS=ON \
27 |     -DUSE_SSE4_INSTRUCTIONS=ON \
28 |     ${@}
29 | 
30 | cmake --build ${BUILD_PREFIX}/${BUILD_TYPE} --config ${BUILD_TYPE}
31 | 


--------------------------------------------------------------------------------
/src/benchmark.h:
--------------------------------------------------------------------------------
 1 | #include <dlib/dnn.h>
 2 | 
 3 | class visitor_con_disable_bias
 4 | {
 5 |     public:
 6 |     visitor_con_disable_bias() = default;
 7 |     // ignore other layers
 8 |     template <typename T> void operator()(size_t, T&) {}
 9 |     template <long nf, long nr, long nc, int sy, int sx, int py, int px, typename SUBNET>
10 |     void operator()(size_t, dlib::add_layer<dlib::con_<nf, nr, nc, sy, sx, py, px>, SUBNET>& l)
11 |     {
12 |         l.layer_details().disable_bias();
13 |     }
14 | };
15 | 
16 | class visitor_count_convolutions
17 | {
18 |     public:
19 |     visitor_count_convolutions(size_t& num_convolutions) : num_convolutions(num_convolutions) {}
20 |     // ignore other layers
21 |     template <typename T> void operator()(size_t, T&) {}
22 |     template <long nf, long nr, long nc, int sy, int sx, int py, int px, typename SUBNET>
23 |     void operator()(size_t, dlib::add_layer<dlib::con_<nf, nr, nc, sy, sx, py, px>, SUBNET>&)
24 |     {
25 |         ++num_convolutions;
26 |     }
27 | 
28 |     private:
29 |     size_t& num_convolutions;
30 | };
31 | 
32 | template <typename net_type> auto benchmark(
33 |     const std::string& name,
34 |     net_type& net,
35 |     const size_t batch_size = 1,
36 |     const size_t image_size = 224,
37 |     const int iterations = 100)
38 | {
39 |     using fms = std::chrono::duration<float, std::milli>;
40 |     dlib::resizable_tensor x;
41 |     dlib::matrix<dlib::rgb_pixel> image(image_size, image_size);
42 |     assign_all_pixels(image, dlib::rgb_pixel(0, 0, 0));
43 |     std::vector<dlib::matrix<dlib::rgb_pixel>> batch(batch_size, image);
44 |     dlib::running_stats<double> rs;
45 |     net.to_tensor(batch.begin(), batch.end(), x);
46 |     // warmup for 10 iterations
47 |     for (int i = 0; i < 10; ++i)
48 |     {
49 |         net.forward(x);
50 |     }
51 |     // std::cout << net << '\n';
52 |     for (int i = 0; i < iterations; ++i)
53 |     {
54 |         const auto t0 = std::chrono::steady_clock::now();
55 |         net.forward(x);
56 |         const auto& t = net.subnet().get_output();
57 |         t.host();
58 |         const auto t1 = std::chrono::steady_clock::now();
59 |         rs.add(std::chrono::duration_cast<fms>(t1 - t0).count());
60 |     }
61 |     std::cout << name << " inference: " << rs.mean() << " ms";
62 |     std::cout << " (" << 1.0 / rs.mean() * 1000.0 * batch_size << " fps)";
63 |     std::cout << " #params: " << count_parameters(net);
64 |     std::ostringstream sout;
65 |     serialize(net, sout);
66 |     std::cout << " (memory usage: " << sout.str().size() / 1024.0 / 1024.0 << " MiB)";
67 |     size_t num_convolutions = 0;
68 |     dlib::visit_layers(net, visitor_count_convolutions(num_convolutions));
69 |     std::cout << " #num convolutions: " << num_convolutions << ' ';
70 |     std::cout << " #num layers: " << net_type::num_computational_layers << '\n';
71 |     std::cin.get();
72 | }
73 | 


--------------------------------------------------------------------------------
/src/benchmark_classification.cpp:
--------------------------------------------------------------------------------
  1 | #include "benchmark.h"
  2 | #include "classification/alexnet.h"
  3 | #include "classification/darknet.h"
  4 | #include "classification/densenet.h"
  5 | #include "classification/googlenet.h"
  6 | #include "classification/resnet.h"
  7 | #include "classification/squeezenet.h"
  8 | #include "classification/vggnet.h"
  9 | #include "classification/vovnet.h"
 10 | #include "classification/repvgg.h"
 11 | 
 12 | #include <dlib/cmd_line_parser.h>
 13 | 
 14 | #define DNN_BENCH_ALEXNET 1
 15 | #define DNN_BENCH_VGGNET 1
 16 | #define DNN_BENCH_GOOGLENET 1
 17 | #define DNN_BENCH_RESNET 1
 18 | #define DNN_BENCH_DARKNET 1
 19 | #define DNN_BENCH_DENSENET 1
 20 | #define DNN_BENCH_VOVNET 1
 21 | #define DNN_BENCH_SQUEEZENET 1
 22 | #define DNN_BENCH_REPVGG 1
 23 | 
 24 | int main(const int argc, const char** argv)
 25 | try
 26 | {
 27 | 
 28 |     dlib::command_line_parser parser;
 29 |     parser.add_option("batch-size", "set the batch size (default: 1)", 1);
 30 |     parser.add_option("image-size", "set the image size (default: 224)", 1);
 31 |     parser.add_option("num-outputs", "set the number of fc outputs (default: 1000)", 1);
 32 |     parser.add_option("num-iters", "set the number of iterations (default: 100)", 1);
 33 |     parser.add_option("cuda-blocking", "disable cuda synchronization");
 34 |     parser.set_group_name("Help Options");
 35 |     parser.add_option("h", "alias for --help");
 36 |     parser.add_option("help", "display this message and exit");
 37 |     parser.parse(argc, argv);
 38 | 
 39 |     if (parser.option("h") or parser.option("help"))
 40 |     {
 41 |         parser.print_options();
 42 |         return EXIT_SUCCESS;
 43 |     }
 44 | 
 45 |     const std::string cuda_blocking = parser.option("cuda-blocking") ? "1" : "0";
 46 |     const size_t batch_size = dlib::get_option(parser, "batch-size", 1);
 47 |     const size_t image_size = dlib::get_option(parser, "image-size", 224);
 48 |     const size_t num_outputs = dlib::get_option(parser, "num-outputs", 1000);
 49 |     const int num_iters = dlib::get_option(parser, "num-iters", 100);
 50 |     setenv("CUDA_LAUNCH_BLOCKING", cuda_blocking.c_str(), 1);
 51 |     std::cout << std::fixed << std::setprecision(3);
 52 | 
 53 | #if DNN_BENCH_ALEXNET
 54 |     {
 55 |         alexnet::train tnet;
 56 |         dlib::disable_duplicative_biases(tnet);
 57 |         alexnet::infer net(tnet);
 58 |         net.subnet().layer_details().set_num_outputs(num_outputs);
 59 |         benchmark("alexnet  ", net, batch_size, image_size, num_iters);
 60 |     }
 61 | #endif
 62 | 
 63 | #if DNN_BENCH_SQUEEZENET
 64 |     {
 65 |         squeezenet::train_v1_0 tnet;
 66 |         dlib::disable_duplicative_biases(tnet);
 67 |         squeezenet::infer_v1_0 net(tnet);
 68 |         net.subnet().subnet().subnet().layer_details().set_num_filters(num_outputs);
 69 |         benchmark("sqznet1.0", net, batch_size, image_size, num_iters);
 70 |     }
 71 |     {
 72 |         squeezenet::train_v1_1 tnet;
 73 |         dlib::disable_duplicative_biases(tnet);
 74 |         squeezenet::infer_v1_1 net(tnet);
 75 |         net.subnet().subnet().subnet().layer_details().set_num_filters(num_outputs);
 76 |         benchmark("sqznet1.1", net, batch_size, image_size, num_iters);
 77 |     }
 78 | #endif
 79 | 
 80 | #if DNN_BENCH_VGGNET
 81 |     {
 82 |         vggnet::train_11 tnet;
 83 |         dlib::disable_duplicative_biases(tnet);
 84 |         vggnet::infer_11 net(tnet);
 85 |         net.subnet().layer_details().set_num_outputs(num_outputs);
 86 |         benchmark("vggnet11 ", net, batch_size, image_size, num_iters);
 87 |     }
 88 |     {
 89 |         vggnet::train_13 tnet;
 90 |         dlib::disable_duplicative_biases(tnet);
 91 |         vggnet::infer_13 net(tnet);
 92 |         net.subnet().layer_details().set_num_outputs(num_outputs);
 93 |         benchmark("vggnet13 ", net, batch_size, image_size, num_iters);
 94 |     }
 95 |     {
 96 |         vggnet::train_16 tnet;
 97 |         dlib::disable_duplicative_biases(tnet);
 98 |         vggnet::infer_16 net(tnet);
 99 |         net.subnet().layer_details().set_num_outputs(num_outputs);
100 |         benchmark("vggnet16 ", net, batch_size, image_size, num_iters);
101 |     }
102 |     {
103 |         vggnet::train_19 tnet;
104 |         dlib::disable_duplicative_biases(tnet);
105 |         vggnet::infer_19 net(tnet);
106 |         net.subnet().layer_details().set_num_outputs(num_outputs);
107 |         benchmark("vggnet19 ", net, batch_size, image_size, num_iters);
108 |     }
109 | #endif
110 | 
111 | #if DNN_BENCH_GOOGLENET
112 |     {
113 |         googlenet::train tnet;
114 |         dlib::disable_duplicative_biases(tnet);
115 |         googlenet::infer net(tnet);
116 |         net.subnet().layer_details().set_num_outputs(num_outputs);
117 |         benchmark("googlenet", net, batch_size, image_size, num_iters);
118 |     }
119 | #endif
120 | 
121 | #if DNN_BENCH_RESNET
122 |     {
123 |         resnet::train_18 tnet;
124 |         dlib::disable_duplicative_biases(tnet);
125 |         resnet::infer_18 net(tnet);
126 |         net.subnet().layer_details().set_num_outputs(num_outputs);
127 |         benchmark("resnet18 ", net, batch_size, image_size, num_iters);
128 |     }
129 |     {
130 |         resnet::train_34 tnet;
131 |         dlib::disable_duplicative_biases(tnet);
132 |         resnet::infer_34 net(tnet);
133 |         net.subnet().layer_details().set_num_outputs(num_outputs);
134 |         benchmark("resnet34 ", net, batch_size, image_size, num_iters);
135 |     }
136 |     {
137 |         resnet::train_50 tnet;
138 |         dlib::disable_duplicative_biases(tnet);
139 |         resnet::infer_50 net(tnet);
140 |         net.subnet().layer_details().set_num_outputs(num_outputs);
141 |         benchmark("resnet50 ", net, batch_size, image_size, num_iters);
142 |     }
143 |     {
144 |         resnet::train_101 tnet;
145 |         dlib::disable_duplicative_biases(tnet);
146 |         resnet::infer_101 net(tnet);
147 |         net.subnet().layer_details().set_num_outputs(num_outputs);
148 |         benchmark("resnet101", net, batch_size, image_size, num_iters);
149 |     }
150 |     {
151 |         resnet::train_152 tnet;
152 |         dlib::disable_duplicative_biases(tnet);
153 |         resnet::infer_152 net(tnet);
154 |         net.subnet().layer_details().set_num_outputs(num_outputs);
155 |         benchmark("resnet152", net, batch_size, image_size, num_iters);
156 |     }
157 | #endif
158 | 
159 | #if DNN_BENCH_DARKNET
160 |     {
161 |         darknet::train_19 tnet;
162 |         dlib::disable_duplicative_biases(tnet);
163 |         darknet::infer_19 net(tnet);
164 |         net.subnet().layer_details().set_num_outputs(num_outputs);
165 |         benchmark("darknet19", net, batch_size, image_size, num_iters);
166 |     }
167 |     {
168 |         darknet::train_53 tnet;
169 |         dlib::disable_duplicative_biases(tnet);
170 |         darknet::infer_53 net(tnet);
171 |         net.subnet().layer_details().set_num_outputs(num_outputs);
172 |         benchmark("darknet53", net, batch_size, image_size, num_iters);
173 |     }
174 |     {
175 |         darknet::train_53csp tnet;
176 |         dlib::disable_duplicative_biases(tnet);
177 |         darknet::infer_53csp net(tnet);
178 |         net.subnet().layer_details().set_num_outputs(num_outputs);
179 |         benchmark("darknet53csp", net, batch_size, image_size, num_iters);
180 |     }
181 | #endif
182 | 
183 | #if DNN_BENCH_DENSENET
184 |     {
185 |         densenet::train_121 tnet;
186 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
187 |         densenet::infer_121 net(tnet);
188 |         net.subnet().layer_details().set_num_outputs(num_outputs);
189 |         benchmark("densenet121", net, batch_size, image_size, num_iters);
190 |     }
191 |     {
192 |         densenet::train_169 tnet;
193 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
194 |         densenet::infer_169 net(tnet);
195 |         net.subnet().layer_details().set_num_outputs(num_outputs);
196 |         benchmark("densenet169", net, batch_size, image_size, num_iters);
197 |     }
198 |     {
199 |         densenet::train_201 tnet;
200 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
201 |         densenet::infer_201 net(tnet);
202 |         net.subnet().layer_details().set_num_outputs(num_outputs);
203 |         benchmark("densenet201", net, batch_size, image_size, num_iters);
204 |     }
205 |     {
206 |         densenet::train_265 tnet;
207 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
208 |         densenet::infer_265 net(tnet);
209 |         net.subnet().layer_details().set_num_outputs(num_outputs);
210 |         benchmark("densenet265", net, batch_size, image_size, num_iters);
211 |     }
212 |     {
213 |         densenet::train_161 tnet;
214 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
215 |         densenet::infer_161 net(tnet);
216 |         net.subnet().layer_details().set_num_outputs(num_outputs);
217 |         benchmark("densenet161", net, batch_size, image_size, num_iters);
218 |     }
219 | #endif
220 | 
221 | #if DNN_BENCH_VOVNET
222 |     {
223 |         vovnet::train_19_slim tnet;
224 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
225 |         vovnet::infer_19_slim net(tnet);
226 |         net.subnet().layer_details().set_num_outputs(num_outputs);
227 |         benchmark("vovnet19s", net, batch_size, image_size, num_iters);
228 |     }
229 |     {
230 |         vovnet::train_19 tnet;
231 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
232 |         vovnet::infer_19 net(tnet);
233 |         net.subnet().layer_details().set_num_outputs(num_outputs);
234 |         benchmark("vovnet19 ", net, batch_size, image_size, num_iters);
235 |     }
236 |     {
237 |         vovnet::train_27_slim tnet;
238 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
239 |         vovnet::infer_27_slim net(tnet);
240 |         net.subnet().layer_details().set_num_outputs(num_outputs);
241 |         benchmark("vovnet27s", net, batch_size, image_size, num_iters);
242 |     }
243 |     {
244 |         vovnet::train_27 tnet;
245 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
246 |         vovnet::infer_27 net(tnet);
247 |         net.subnet().layer_details().set_num_outputs(num_outputs);
248 |         benchmark("vovnet27 ", net, batch_size, image_size, num_iters);
249 |     }
250 |     {
251 |         vovnet::train_39 tnet;
252 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
253 |         vovnet::infer_39 net(tnet);
254 |         net.subnet().layer_details().set_num_outputs(num_outputs);
255 |         benchmark("vovnet39 ", net, batch_size, image_size, num_iters);
256 |     }
257 |     {
258 |         vovnet::train_57 tnet;
259 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
260 |         vovnet::infer_57 net(tnet);
261 |         net.subnet().layer_details().set_num_outputs(num_outputs);
262 |         benchmark("vovnet57 ", net, batch_size, image_size, num_iters);
263 |     }
264 |     {
265 |         vovnet::train_99 tnet;
266 |         dlib::visit_layers(tnet, visitor_con_disable_bias());
267 |         vovnet::infer_99 net(tnet);
268 |         net.subnet().layer_details().set_num_outputs(num_outputs);
269 |         benchmark("vovnet99 ", net, batch_size, image_size, num_iters);
270 |     }
271 | #endif
272 | 
273 | #if DNN_BENCH_REPVGG
274 |     {
275 |         repvgg::infer_a0 net;
276 |         net.subnet().layer_details().set_num_outputs(num_outputs);
277 |         benchmark("repvgg_a0 ", net, batch_size, image_size, num_iters);
278 |     }
279 |     {
280 |         repvgg::infer_a1 net;
281 |         net.subnet().layer_details().set_num_outputs(num_outputs);
282 |         benchmark("repvgg_a1 ", net, batch_size, image_size, num_iters);
283 |     }
284 |     {
285 |         repvgg::infer_a2 net;
286 |         net.subnet().layer_details().set_num_outputs(num_outputs);
287 |         benchmark("repvgg_a2 ", net, batch_size, image_size, num_iters);
288 |     }
289 |     {
290 |         repvgg::infer_b0 net;
291 |         net.subnet().layer_details().set_num_outputs(num_outputs);
292 |         benchmark("repvgg_b0 ", net, batch_size, image_size, num_iters);
293 |     }
294 |     {
295 |         repvgg::infer_b1 net;
296 |         net.subnet().layer_details().set_num_outputs(num_outputs);
297 |         benchmark("repvgg_b1 ", net, batch_size, image_size, num_iters);
298 |     }
299 |     {
300 |         repvgg::infer_b2 net;
301 |         net.subnet().layer_details().set_num_outputs(num_outputs);
302 |         benchmark("repvgg_b2 ", net, batch_size, image_size, num_iters);
303 |     }
304 |     {
305 |         repvgg::infer_b3 net;
306 |         net.subnet().layer_details().set_num_outputs(num_outputs);
307 |         benchmark("repvgg_b3 ", net, batch_size, image_size, num_iters);
308 |     }
309 | #endif
310 | 
311 |     return EXIT_SUCCESS;
312 | }
313 | catch (const std::exception& e)
314 | {
315 |     std::cout << e.what() << '\n';
316 |     return EXIT_FAILURE;
317 | }
318 | 


--------------------------------------------------------------------------------
/src/classification/alexnet.h:
--------------------------------------------------------------------------------
 1 | #ifndef AlexNet_H
 2 | #define AlexNet_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace alexnet
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 |     template <template <typename> class ACT, template <typename> class DO>
11 |     struct def
12 |     {
13 |         template <long num_filters, long ks, int s, int p, typename SUBNET>
14 |         using conp = add_layer<con_<num_filters, ks, ks, s, s, p, p>, SUBNET>;
15 | 
16 |         template <typename INPUT>
17 |         using backbone = fc<1000, ACT<fc<4096, DO<ACT<fc<4096, DO<
18 |                          max_pool<3, 3, 2, 2, ACT<conp<256, 3, 1, 1,
19 |                          ACT<conp<384, 3, 1, 1, ACT<conp<384, 3, 1, 1,
20 |                          max_pool<3, 3, 2, 2, ACT<conp<256, 5, 1, 2,
21 |                          max_pool<3, 3, 2, 2, ACT<conp<96, 11, 4, 2,
22 |                          INPUT>>>>>>>>>>>>>>>>>>>>;
23 |     };
24 | 
25 |     using train = loss_multiclass_log<def<relu, dropout>::backbone<input_rgb_image>>;
26 |     using infer = loss_multiclass_log<def<relu, multiply>::backbone<input_rgb_image>>;
27 |     // clang-format on
28 | }  // namespace alexnet
29 | 
30 | #endif  // AlexNet_H
31 | 


--------------------------------------------------------------------------------
/src/classification/darknet.h:
--------------------------------------------------------------------------------
  1 | #ifndef DarkNet_H
  2 | #define DarkNet_H
  3 | 
  4 | #include <dlib/dnn.h>
  5 | 
  6 | namespace darknet
  7 | {
  8 |     // clang-format off
  9 |     using namespace dlib;
 10 | 
 11 |     template <template <typename> class ACT, template <typename> class BN>
 12 |     struct def
 13 |     {
 14 |         template <long nf, long ks, int s, typename SUBNET>
 15 |         using conblock = ACT<BN<add_layer<con_<nf, ks, ks, s, s, ks/2, ks/2>, SUBNET>>>;
 16 | 
 17 |         template <long nf1, long nf2, typename SUBNET>
 18 |         using residual = add_prev1<
 19 |                          conblock<nf1, 3, 1,
 20 |                          conblock<nf2, 1, 1,
 21 |                          tag1<SUBNET>>>>;
 22 | 
 23 |         template <long nf, typename SUBNET> using resv3 = residual<nf, nf / 2, SUBNET>;
 24 |         template <long nf, typename SUBNET> using resv4 = residual<nf, nf, SUBNET>;
 25 | 
 26 |         template <long num_filters, typename SUBNET>
 27 |         using block3 = conblock<num_filters, 3, 1,
 28 |                        conblock<num_filters / 2, 1, 1,
 29 |                        conblock<num_filters, 3, 1,
 30 |                        SUBNET>>>;
 31 | 
 32 |         template <long num_filters, typename SUBNET>
 33 |         using block5 = conblock<num_filters, 3, 1,
 34 |                        conblock<num_filters / 2, 1, 1,
 35 |                        conblock<num_filters, 3, 1,
 36 |                        conblock<num_filters / 2, 1, 1,
 37 |                        conblock<num_filters, 3, 1,
 38 |                        SUBNET>>>>>;
 39 | 
 40 |         template <long nf, long factor, size_t N, template <typename> class RES, typename SUBNET>
 41 |         using cspblock = conblock<nf * factor, 1, 1,
 42 |                          concat2<tag1, tag2,
 43 |                          tag1<conblock<nf, 1, 1,
 44 |                          repeat<N, RES,
 45 |                          conblock<nf, 1, 1,
 46 |                          skip1<
 47 |                          tag2<conblock<nf, 1, 1,
 48 |                          tag1<conblock<nf * factor, 3, 2,
 49 |                          SUBNET>>>>>>>>>>>;
 50 | 
 51 |         template <typename SUBNET> using resv3_64= resv3<64, SUBNET>;
 52 |         template <typename SUBNET> using resv3_128 = resv3<128, SUBNET>;
 53 |         template <typename SUBNET> using resv3_256 = resv3<256, SUBNET>;
 54 |         template <typename SUBNET> using resv3_512 = resv3<512, SUBNET>;
 55 |         template <typename SUBNET> using resv3_1024 = resv3<1024, SUBNET>;
 56 |         template <typename SUBNET> using resv4_64= resv4<64, SUBNET>;
 57 |         template <typename SUBNET> using resv4_128 = resv4<128, SUBNET>;
 58 |         template <typename SUBNET> using resv4_256 = resv4<256, SUBNET>;
 59 |         template <typename SUBNET> using resv4_512 = resv4<512, SUBNET>;
 60 | 
 61 |         template <typename INPUT>
 62 |         using backbone19 = block5<1024,
 63 |                            max_pool<2, 2, 2, 2, block5<512,
 64 |                            max_pool<2, 2, 2, 2, block3<256,
 65 |                            max_pool<2, 2, 2, 2, block3<128,
 66 |                            max_pool<2, 2, 2, 2, conblock<64, 3, 1,
 67 |                            max_pool<2, 2, 2, 2, conblock<32, 3, 1,
 68 |                            INPUT>>>>>>>>>>>;
 69 | 
 70 |         template <typename INPUT>
 71 |         using backbone53 = repeat<4, resv3_1024, conblock<1024, 3, 2,
 72 |                            repeat<8, resv3_512, conblock<512, 3, 2,
 73 |                            repeat<8, resv3_256, conblock<256, 3, 2,
 74 |                            repeat<2, resv3_128, conblock<128, 3, 2,
 75 |                            resv3<64, conblock<64, 3, 2, conblock<32, 3, 1,
 76 |                            INPUT>>>>>>>>>>>;
 77 | 
 78 |         template <typename INPUT>
 79 |         using backbone53csp = cspblock<512, 2, 4, resv4_512,
 80 |                               cspblock<256, 2, 8, resv4_256,
 81 |                               cspblock<128, 2, 8, resv4_128,
 82 |                               cspblock<64, 2, 2, resv4_64,
 83 |                               cspblock<64, 1, 1, resv3_64,
 84 |                               conblock<32, 3, 1,
 85 |                               INPUT>>>>>>;
 86 |     };
 87 | 
 88 |     template <typename SUBNET>
 89 |     using classification_head = loss_multiclass_log<fc<1000, avg_pool_everything<SUBNET>>>;
 90 | 
 91 |     using train_19 = classification_head<def<leaky_relu, bn_con>::backbone19<input_rgb_image>>;
 92 |     using infer_19 = classification_head<def<leaky_relu, affine>::backbone19<input_rgb_image>>;
 93 |     using train_53 = classification_head<def<leaky_relu, bn_con>::backbone53<input_rgb_image>>;
 94 |     using infer_53 = classification_head<def<leaky_relu, affine>::backbone53<input_rgb_image>>;
 95 |     using train_53csp = classification_head<def<mish, bn_con>::backbone53csp<input_rgb_image>>;
 96 |     using infer_53csp = classification_head<def<mish, affine>::backbone53csp<input_rgb_image>>;
 97 |     // clang-format on
 98 | 
 99 | }  // namespace darknet
100 | 
101 | #endif  // DarkNet_H
102 | 


--------------------------------------------------------------------------------
/src/classification/densenet.h:
--------------------------------------------------------------------------------
 1 | #ifndef DenseNet_H
 2 | #define DenseNet_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace densenet
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 |     // ACT can be any activation layer, BN must be bn_con or affine layer and k is the growth rate
11 |     template <template <typename> class ACT, template <typename> class BN, long k>
12 |     struct def
13 |     {
14 |         template <long num_filters, long ks, int s, typename SUBNET>
15 |         using conp = add_layer<con_<num_filters, ks, ks, s, s, ks/2, ks/2>, SUBNET>;
16 | 
17 |         template <typename INPUT>
18 |         using stem = add_layer<max_pool_<3, 3, 2, 2, 1, 1>, ACT<BN<conp<2 * k, 7, 2, INPUT>>>>;
19 | 
20 |         template <long num_filters, typename SUBNET>
21 |         using transition = avg_pool<2, 2, 2, 2, con<num_filters, 1, 1, 1, 1, ACT<BN<SUBNET>>>>;
22 | 
23 |         template <typename SUBNET>
24 |         using dense_layer = concat2<tag1, tag2,
25 |                             tag2<conp<k, 3, 1,
26 |                             ACT<BN<conp<4 * k, 1, 1,
27 |                             ACT<BN<tag1<SUBNET>>>>>>>>>;
28 | 
29 |         template <size_t n4, size_t n3, size_t n2, size_t n1, typename INPUT>
30 |         using backbone = ACT<BN<
31 |                          repeat<n4, dense_layer, transition<k * (2 + n1 + 2 * n2 + 4 * n3) / 8,
32 |                          repeat<n3, dense_layer, transition<k * (2 + n1 + 2 * n2) / 4,
33 |                          repeat<n2, dense_layer, transition<k * (2 + n1) / 2,
34 |                          repeat<n1, dense_layer, stem<INPUT>>>>>>>>>>;
35 |     };
36 | 
37 |     template <typename SUBNET>
38 |     using classification_head = loss_multiclass_log<fc<1000, avg_pool_everything<SUBNET>>>;
39 | 
40 |     using train_121 = classification_head<def<relu, bn_con, 32>::backbone<16, 24, 12, 6, input_rgb_image>>;
41 |     using infer_121 = classification_head<def<relu, affine, 32>::backbone<16, 24, 12, 6, input_rgb_image>>;
42 |     using train_169 = classification_head<def<relu, bn_con, 32>::backbone<32, 32, 12, 6, input_rgb_image>>;
43 |     using infer_169 = classification_head<def<relu, affine, 32>::backbone<32, 32, 12, 6, input_rgb_image>>;
44 |     using train_201 = classification_head<def<relu, bn_con, 32>::backbone<32, 48, 12, 6, input_rgb_image>>;
45 |     using infer_201 = classification_head<def<relu, affine, 32>::backbone<32, 48, 12, 6, input_rgb_image>>;
46 |     using train_265 = classification_head<def<relu, bn_con, 32>::backbone<48, 64, 12, 6, input_rgb_image>>;
47 |     using infer_265 = classification_head<def<relu, affine, 32>::backbone<48, 64, 12, 6, input_rgb_image>>;
48 |     using train_161 = classification_head<def<relu, bn_con, 48>::backbone<24, 36, 12, 6, input_rgb_image>>;
49 |     using infer_161 = classification_head<def<relu, affine, 48>::backbone<24, 36, 12, 6, input_rgb_image>>;
50 | 
51 |     // clang-format on
52 | }  // namespace densenet
53 | 
54 | #endif  // DenseNet_H
55 | 


--------------------------------------------------------------------------------
/src/classification/googlenet.h:
--------------------------------------------------------------------------------
 1 | #ifndef GoogLeNet_H
 2 | #define GoogLeNet_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace googlenet
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 | 
11 |     template <template <typename> class ACT, template <typename> class BN, template <typename> class DO>
12 |     struct def
13 |     {
14 |         template <long num_filters, long ks, int s, int p, typename SUBNET>
15 |         using con_block = ACT<BN<add_layer<con_<num_filters, ks, ks, s, s, p, p>, SUBNET>>>;
16 | 
17 |         template <long ks, int s, int p, typename SUBNET>
18 |         using maxpool = add_layer<max_pool_<ks, ks, s, s, p, p>, SUBNET>;
19 | 
20 |         template <typename INPUT>
21 |         using stem = maxpool<3, 2, 1, con_block<192, 3, 1, 1,
22 |                      con_block<64, 1, 1, 0,
23 |                      maxpool<3, 2, 1, con_block<64, 7, 2, 3,
24 |                      INPUT>>>>>;
25 | 
26 |         template <long nf_1, long nf_3o, long nf_3i, long nf_5o, long nf_5i, long nf_pool, typename SUBNET>
27 |         using inception_block = concat4<itag1, itag2, itag3, itag4,
28 |                                 itag1<con_block<nf_1, 1, 1, 0, iskip<
29 |                                 itag2<con_block<nf_3o, 3, 1, 1, con_block<nf_3i, 1, 1, 0, iskip<
30 |                                 itag3<con_block<nf_5o, 3, 1, 1, con_block<nf_5i, 1, 1, 0, iskip<
31 |                                 itag4<con_block<nf_pool, 3, 1, 1, maxpool<3, 1, 1, itag0<
32 |                                 SUBNET>>>>>>>>>>>>>>>>;
33 | 
34 |         template <typename INPUT>
35 |         using backbone = inception_block<384, 384, 192, 128, 48, 128,
36 |                          inception_block<256, 320, 160, 128, 32, 128,
37 |                          maxpool<3, 2, 1,
38 |                          inception_block<256, 320, 160, 128, 32, 128,
39 |                          inception_block<112, 288, 144, 64, 32, 64,
40 |                          inception_block<128, 256, 128, 64, 24, 64,
41 |                          inception_block<160, 224, 112, 64, 24, 64,
42 |                          inception_block<192, 208, 96, 48, 16, 64,
43 |                          maxpool<3, 2, 1,
44 |                          inception_block<128, 192, 128, 96, 32, 64,
45 |                          inception_block<64, 128, 96, 32, 16, 32,
46 |                          stem<INPUT>>>>>>>>>>>>;
47 | 
48 |          using net_type = loss_multiclass_log<fc<1000, DO<avg_pool_everything<backbone<input_rgb_image>>>>>;
49 |     };
50 |     using train = def<relu, bn_con, dropout>::net_type;
51 |     using infer = def<relu, affine, multiply>::net_type;
52 |     // clang-format on
53 | }  // namespace googlenet
54 | 
55 | #endif  // GoogLeNet_H
56 | 


--------------------------------------------------------------------------------
/src/classification/repvgg.h:
--------------------------------------------------------------------------------
 1 | #ifndef RepVGG_H
 2 | #define RepVGG_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace repvgg
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 |     // ACT can be any activation layer.
11 |     // a_n, a_d: a multiplier numerator and denominator, respectively.
12 |     // b_n, b_d: b multiplier numerator and denominator, respectively.
13 |     template <template <typename> class ACT, long a_n, long a_d, long b_n, long b_d>
14 |     struct def
15 |     {
16 |         static const long filters_0 = std::min<long>(64, 64 * a_n / a_d);
17 |         static const long filters_1 = 64 * a_n / a_d;
18 |         static const long filters_2 = 128 * a_n / a_d;
19 |         static const long filters_3 = 256 * a_n / a_d;
20 |         static const long filters_4 = 512 * b_n / b_d;
21 | 
22 |         // padded convolution
23 |         template <long num_filters, long ks, int s, typename SUBNET>
24 |         using pcon = add_layer<con_<num_filters, ks, ks, s, s, ks/2, ks/2>, SUBNET>;
25 | 
26 |         // batch norm + padded convolution
27 |         template <long num_filters, long ks, int s, typename SUBNET>
28 |         using bcon = bn_con<pcon<num_filters, ks, s, SUBNET>>;
29 | 
30 |         // RepVGG block: 3x3 & 1x1 convolutions
31 |         template <long num_filters, int s, typename SUBNET>
32 |         using repvggblock = add_prev2<bcon<num_filters, 1, s, skip1<tag2<bcon<num_filters, 3, s, tag1<SUBNET>>>>>>;
33 | 
34 |         // RepVGG block + identity (with batch norm): tag1 is the input of the RepVGG block defined above
35 |         template <long num_filters, typename SUBNET>
36 |         using repvggblock_id = add_prev3<bn_con<skip1<tag3<repvggblock<num_filters, 1, SUBNET>>>>>;
37 | 
38 |         template <typename SUBNET> using repvggblock_id_1 = ACT<repvggblock_id<filters_1, SUBNET>>;
39 |         template <typename SUBNET> using repvggblock_id_2 = ACT<repvggblock_id<filters_2, SUBNET>>;
40 |         template <typename SUBNET> using repvggblock_id_3 = ACT<repvggblock_id<filters_3, SUBNET>>;
41 | 
42 |         template <typename SUBNET> using iblock_1 = ACT<pcon<filters_1, 3, 1, SUBNET>>;
43 |         template <typename SUBNET> using iblock_2 = ACT<pcon<filters_2, 3, 1, SUBNET>>;
44 |         template <typename SUBNET> using iblock_3 = ACT<pcon<filters_3, 3, 1, SUBNET>>;
45 | 
46 |         template <size_t nb_3, size_t nb_2, size_t nb_1, typename INPUT>
47 |         using tbackbone = ACT<repvggblock<filters_4, 2,
48 |                           repeat<nb_3, repvggblock_id_3, ACT<repvggblock<filters_3, 2,
49 |                           repeat<nb_2, repvggblock_id_2, ACT<repvggblock<filters_2, 2,
50 |                           repeat<nb_1, repvggblock_id_1, ACT<repvggblock<filters_1, 2,
51 |                           ACT<repvggblock<filters_0, 2, INPUT>>>>>>>>>>>>>;
52 | 
53 |         template <size_t nb_3, size_t nb_2, size_t nb_1, typename INPUT>
54 |         using ibackbone = ACT<pcon<filters_4, 3, 2,
55 |                           repeat<nb_3, iblock_3, ACT<pcon<filters_3, 3, 2,
56 |                           repeat<nb_2, iblock_2, ACT<pcon<filters_2, 3, 2,
57 |                           repeat<nb_1, iblock_1, ACT<pcon<filters_1, 3, 2,
58 |                           ACT<pcon<filters_0, 3, 2, INPUT>>>>>>>>>>>>>;
59 | 
60 |     };
61 | 
62 |     template <long num_filters, typename SUBNET>
63 |     using classification_head = loss_multiclass_log<fc<num_filters, avg_pool_everything<SUBNET>>>;
64 | 
65 |     using train_a0 = classification_head<1000, def<relu, 3, 4, 5, 2>::tbackbone<13, 3, 1, input_rgb_image>>;
66 |     using infer_a0 = classification_head<1000, def<relu, 3, 4, 5, 2>::ibackbone<13, 3, 1, input_rgb_image>>;
67 |     using train_a1 = classification_head<1000, def<relu, 1, 1, 5, 2>::tbackbone<13, 3, 1, input_rgb_image>>;
68 |     using infer_a1 = classification_head<1000, def<relu, 1, 1, 5, 2>::ibackbone<13, 3, 1, input_rgb_image>>;
69 |     using train_a2 = classification_head<1000, def<relu, 3, 2, 11, 4>::tbackbone<13, 3, 1, input_rgb_image>>;
70 |     using infer_a2 = classification_head<1000, def<relu, 3, 2, 11, 4>::ibackbone<13, 3, 1, input_rgb_image>>;
71 |     using train_b0 = classification_head<1000, def<relu, 1, 1, 5, 2>::tbackbone<15, 5, 3, input_rgb_image>>;
72 |     using infer_b0 = classification_head<1000, def<relu, 1, 1, 5, 2>::ibackbone<15, 5, 3, input_rgb_image>>;
73 |     using train_b1 = classification_head<1000, def<relu, 2, 1, 4, 1>::tbackbone<15, 5, 3, input_rgb_image>>;
74 |     using infer_b1 = classification_head<1000, def<relu, 2, 1, 4, 1>::ibackbone<15, 5, 3, input_rgb_image>>;
75 |     using train_b2 = classification_head<1000, def<relu, 5, 2, 5, 1>::tbackbone<15, 5, 3, input_rgb_image>>;
76 |     using infer_b2 = classification_head<1000, def<relu, 5, 2, 5, 1>::ibackbone<15, 5, 3, input_rgb_image>>;
77 |     using train_b3 = classification_head<1000, def<relu, 5, 2, 5, 1>::tbackbone<15, 5, 3, input_rgb_image>>;
78 |     using infer_b3 = classification_head<1000, def<relu, 3, 1, 5, 1>::ibackbone<15, 5, 3, input_rgb_image>>;
79 |     // clang-format on
80 | }  // namespace repvgg
81 | 
82 | #endif  // RepVGG_H
83 | 


--------------------------------------------------------------------------------
/src/classification/resnet.h:
--------------------------------------------------------------------------------
 1 | #ifndef ResNet_H
 2 | #define ResNet_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace resnet
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 |     template <template <typename> class BN = bn_con, template <typename> class ACT = relu, long k = 64>
11 |     struct def
12 |     {
13 |         template <long N, int K, int S, typename SUBNET>
14 |         using conv = add_layer<con_<N, K, K, S, S, K / 2, K / 2>, SUBNET>;
15 | 
16 |         template <typename INPUT>
17 |         using stem = add_layer<max_pool_<3, 3, 2, 2, 1, 1>, ACT<BN<conv<k, 7, 2, INPUT>>>>;
18 | 
19 |         template <long N, int S, typename SUBNET>
20 |         using basicblock = BN<conv<N, 3, 1, ACT<BN<conv<N, 3, S, SUBNET>>>>>;
21 | 
22 |         template<long N, int S, typename SUBNET>
23 |         using bottleneck = BN<conv<4 * N, 1, 1, ACT<BN<conv<N, 3, S, ACT<BN<conv<N, 1, 1, SUBNET>>>>>>>>;
24 | 
25 |         template <template <long, int, typename> class BLOCK, long N, typename SUBNET>
26 |         using residual = ACT<add_prev1<BLOCK<N, 1, tag1<SUBNET>>>>;
27 | 
28 |         template <template <long, int, typename> class BLOCK, long N, long F, long S, typename SUBNET>
29 |         using transition = ACT<add_prev2<BN<conv<N * F, 1, S, skip1<tag2<BLOCK<N, S, tag1<SUBNET>>>>>>>>;
30 | 
31 |         template <typename SUBNET> using resbasicblock_8k = residual<basicblock, 8 * k, SUBNET>;
32 |         template <typename SUBNET> using resbasicblock_4k = residual<basicblock, 4 * k, SUBNET>;
33 |         template <typename SUBNET> using resbasicblock_2k = residual<basicblock, 2 * k, SUBNET>;
34 |         template <typename SUBNET> using resbasicblock_1k = residual<basicblock, 1 * k, SUBNET>;
35 |         template <typename SUBNET> using resbottleneck_8k = residual<bottleneck, 8 * k, SUBNET>;
36 |         template <typename SUBNET> using resbottleneck_4k = residual<bottleneck, 4 * k, SUBNET>;
37 |         template <typename SUBNET> using resbottleneck_2k = residual<bottleneck, 2 * k, SUBNET>;
38 |         template <typename SUBNET> using resbottleneck_1k = residual<bottleneck, 1 * k, SUBNET>;
39 | 
40 |         template <long N8k, long N4k, long N2k, long N1k, typename INPUT>
41 |         using backbone_basicblock = repeat<N8k, resbasicblock_8k, transition<basicblock, 8 * k, 1, 2,
42 |                                     repeat<N4k, resbasicblock_4k, transition<basicblock, 4 * k, 1, 2,
43 |                                     repeat<N2k, resbasicblock_2k, transition<basicblock, 2 * k, 1, 2,
44 |                                     repeat<N1k, resbasicblock_1k, transition<basicblock, 1 * k, 1, 1,
45 |                                     stem<INPUT>>>>>>>>>;
46 | 
47 |         template <long N8k, long N4k, long N2k, long N1k, typename INPUT>
48 |         using backbone_bottleneck = repeat<N8k, resbottleneck_8k, transition<bottleneck, 8 * k, 4, 2,
49 |                                     repeat<N4k, resbottleneck_4k, transition<bottleneck, 4 * k, 4, 2,
50 |                                     repeat<N2k, resbottleneck_2k, transition<bottleneck, 2 * k, 4, 2,
51 |                                     repeat<N1k, resbottleneck_1k, transition<bottleneck, 1 * k, 4, 1,
52 |                                     stem<INPUT>>>>>>>>>;
53 | 
54 |         // the backbones for the classic architectures
55 |         template <typename INPUT> using backbone_18  = backbone_basicblock<1, 1, 1, 1, INPUT>;
56 |         template <typename INPUT> using backbone_34  = backbone_basicblock<2, 5, 3, 2, INPUT>;
57 |         template <typename INPUT> using backbone_50  = backbone_bottleneck<2, 5, 3, 2, INPUT>;
58 |         template <typename INPUT> using backbone_101 = backbone_bottleneck<2, 22, 3, 2, INPUT>;
59 |         template <typename INPUT> using backbone_152 = backbone_bottleneck<2, 35, 7, 2, INPUT>;
60 |     };
61 |     // clang-format on
62 | 
63 |     template <typename SUBNET>
64 |     using classification_head = loss_multiclass_log<fc<1000, avg_pool_everything<SUBNET>>>;
65 | 
66 |     using train_18  = classification_head<def<bn_con, relu>::backbone_18<input_rgb_image>>;
67 |     using infer_18  = classification_head<def<affine, relu>::backbone_18<input_rgb_image>>;
68 |     using train_34  = classification_head<def<bn_con, relu>::backbone_34<input_rgb_image>>;
69 |     using infer_34  = classification_head<def<affine, relu>::backbone_34<input_rgb_image>>;
70 |     using train_50  = classification_head<def<bn_con, relu>::backbone_50<input_rgb_image>>;
71 |     using infer_50  = classification_head<def<affine, relu>::backbone_50<input_rgb_image>>;
72 |     using train_101 = classification_head<def<bn_con, relu>::backbone_101<input_rgb_image>>;
73 |     using infer_101 = classification_head<def<affine, relu>::backbone_101<input_rgb_image>>;
74 |     using train_152 = classification_head<def<bn_con, relu>::backbone_152<input_rgb_image>>;
75 |     using infer_152 = classification_head<def<affine, relu>::backbone_152<input_rgb_image>>;
76 | };  // namespace resnet
77 | 
78 | #endif  // ResNet_H
79 | 


--------------------------------------------------------------------------------
/src/classification/squeezenet.h:
--------------------------------------------------------------------------------
 1 | #ifndef SqueezeNet_H
 2 | #define SqueezeNet_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace squeezenet
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 |     // ACT can be any activation
11 |     template <template <typename> class ACT>
12 |     struct def
13 |     {
14 |         template <long num_filters, long ks, int s, typename SUBNET>
15 |         using conp = add_layer<con_<num_filters, ks, ks, s, s, ks/2, ks/2>, SUBNET>;
16 | 
17 |         template <typename SUBNET>
18 |         using max_pool3 = add_layer<max_pool_<3, 3, 2, 2, 1, 1>, SUBNET>;
19 | 
20 |         template <typename INPUT>
21 |         using stem = max_pool3<ACT<conp<64, 7, 2, INPUT>>>;
22 | 
23 |         template <long nf3x3, long nf1x1, long nf, typename SUBNET>
24 |         using fire_module = concat2<tag2, tag3,
25 |                             tag3<ACT<conp<nf3x3, 3, 1,
26 |                             skip1<
27 |                             tag2<ACT<conp<nf1x1, 1, 1,
28 |                             tag1<ACT<conp<nf, 1, 1,
29 |                             SUBNET>>>>>>>>>>>;
30 | 
31 |          template <typename INPUT>
32 |          using backbone_1_0 = fire_module<256, 256, 64,
33 |                               max_pool3<
34 |                               fire_module<256, 256, 64,
35 |                               fire_module<192, 192, 48,
36 |                               fire_module<192, 192, 48,
37 |                               fire_module<128, 128, 32,
38 |                               max_pool3<
39 |                               fire_module<128, 128, 32,
40 |                               fire_module<64, 64, 16,
41 |                               fire_module<64, 64, 16,
42 |                               stem<INPUT>>>>>>>>>>>;
43 | 
44 |           template <typename INPUT>
45 |           using backbone_1_1 = fire_module<256, 256, 64,
46 |                                fire_module<256, 256, 64,
47 |                                fire_module<192, 192, 48,
48 |                                fire_module<192, 192, 48,
49 |                                max_pool3<
50 |                                fire_module<128, 128, 32,
51 |                                fire_module<128, 128, 32,
52 |                                max_pool3<
53 |                                fire_module<64, 64, 16,
54 |                                fire_module<64, 64, 16,
55 |                                stem<INPUT>>>>>>>>>>>;
56 |     };
57 | 
58 |     // DO must be dropout for train mode and multiply for infer
59 |     template <template <typename> class ACT, template <typename> class DO, typename SUBNET>
60 |     using classification_head = loss_multiclass_log<avg_pool_everything<ACT<con<1000, 1, 1, 1, 1, DO<SUBNET>>>>>;
61 | 
62 |     using train_v1_0 = classification_head<relu, dropout, def<relu>::backbone_1_0<input_rgb_image>>;
63 |     using infer_v1_0 = classification_head<relu, multiply, def<relu>::backbone_1_0<input_rgb_image>>;
64 |     using train_v1_1 = classification_head<relu, dropout, def<relu>::backbone_1_1<input_rgb_image>>;
65 |     using infer_v1_1 = classification_head<relu, multiply, def<relu>::backbone_1_1<input_rgb_image>>;
66 | 
67 |     // clang-format on
68 | }  // namespace squeezenet
69 | 
70 | #endif  // SqueezeNet_H
71 | 


--------------------------------------------------------------------------------
/src/classification/vggnet.h:
--------------------------------------------------------------------------------
 1 | #ifndef VGGNet_H
 2 | #define VGGNet_H
 3 | 
 4 | #include <dlib/dnn.h>
 5 | 
 6 | namespace vggnet
 7 | {
 8 |     // clang-format off
 9 |     using namespace dlib;
10 | 
11 |     template <template <typename> class ACT, template <typename> class BN, template <typename> class DO>
12 |     struct def
13 |     {
14 |         template <long num_filters, long ks, int s, int p, typename SUBNET>
15 |         using conp = add_layer<con_<num_filters, ks, ks, s, s, p, p>, SUBNET>;
16 | 
17 |         // the main vgg building block
18 |         template <long num_filters, typename SUBNET>
19 |         using block = ACT<BN<conp<num_filters, 3, 1, 1, SUBNET>>>;
20 | 
21 |         // some definitions to allow the use of the repeat layer
22 |         template <typename SUBNET> using block_512 = block<512, SUBNET>;
23 |         template <typename SUBNET> using block_256 = block<256, SUBNET>;
24 |         template <typename SUBNET> using block_128 = block<128, SUBNET>;
25 |         template <typename SUBNET> using block_64 = block<64, SUBNET>;
26 | 
27 |         // the vgg backbones
28 |         template <long nb_512, long nb_256, long nb_128, long nb_64, typename INPUT>
29 |         using backbone = max_pool<2, 2, 2, 2, repeat<nb_512, block_512,
30 |                          max_pool<2, 2, 2, 2, repeat<nb_512, block_512,
31 |                          max_pool<2, 2, 2, 2, repeat<nb_256, block_256,
32 |                          max_pool<2, 2, 2, 2, repeat<nb_128, block_128,
33 |                          max_pool<2, 2, 2, 2, repeat<nb_64, block_64, tag1<INPUT>>>>>>>>>>>;
34 | 
35 |         // the final fully connected layers
36 |         template <typename SUBNET>
37 |         using final_fc = fc<1000, DO<ACT<fc<4096, DO<ACT<fc<4096, SUBNET>>>>>>>;
38 | 
39 |         template<typename INPUT> using backbone_11 = final_fc<backbone<2, 2, 1, 1, INPUT>>;
40 |         template<typename INPUT> using backbone_13 = final_fc<backbone<2, 2, 2, 2, INPUT>>;
41 |         template<typename INPUT> using backbone_16 = final_fc<backbone<3, 3, 2, 2, INPUT>>;
42 |         template<typename INPUT> using backbone_19 = final_fc<backbone<4, 4, 2, 2, INPUT>>;
43 |     };
44 | 
45 |     using train_11 = loss_multiclass_log<def<relu, bn_con, dropout>::backbone_11<input_rgb_image>>;
46 |     using infer_11 = loss_multiclass_log<def<relu, affine, multiply>::backbone_11<input_rgb_image>>;
47 |     using train_13 = loss_multiclass_log<def<relu, bn_con, dropout>::backbone_13<input_rgb_image>>;
48 |     using infer_13 = loss_multiclass_log<def<relu, affine, multiply>::backbone_13<input_rgb_image>>;
49 |     using train_16 = loss_multiclass_log<def<relu, bn_con, dropout>::backbone_16<input_rgb_image>>;
50 |     using infer_16 = loss_multiclass_log<def<relu, affine, multiply>::backbone_16<input_rgb_image>>;
51 |     using train_19 = loss_multiclass_log<def<relu, bn_con, dropout>::backbone_19<input_rgb_image>>;
52 |     using infer_19 = loss_multiclass_log<def<relu, affine, multiply>::backbone_19<input_rgb_image>>;
53 | 
54 |     // clang-format on
55 | }  // namespace vggnet
56 | 
57 | #endif  // VGGNet_H
58 | 


--------------------------------------------------------------------------------
/src/classification/vovnet.h:
--------------------------------------------------------------------------------
  1 | #ifndef VoVNet_H
  2 | #define VoVNet_H
  3 | 
  4 | #include <dlib/dnn.h>
  5 | 
  6 | namespace vovnet
  7 | {
  8 |     // clang-format off
  9 |     using namespace dlib;
 10 | 
 11 |     // vov_tag0 is used for identity mapping
 12 |     template <typename SUBNET> using vov_tag0 = add_tag_layer<5050, SUBNET>;
 13 |     template <typename SUBNET> using vov_tag1 = add_tag_layer<5051, SUBNET>;
 14 |     template <typename SUBNET> using vov_tag2 = add_tag_layer<5052, SUBNET>;
 15 |     template <typename SUBNET> using vov_tag3 = add_tag_layer<5053, SUBNET>;
 16 |     template <typename SUBNET> using vov_tag4 = add_tag_layer<5054, SUBNET>;
 17 |     template <typename SUBNET> using vov_tag5 = add_tag_layer<5055, SUBNET>;
 18 |     template <typename SUBNET> using id_mapping = add_prev<vov_tag0, SUBNET>;
 19 | 
 20 |     template <template <typename> class ACT, template <typename> class BN>
 21 |     struct def
 22 |     {
 23 |         // The concatenate layer with custom number of outputs for OSA Module with 3 layers
 24 |         template <long num_filters, typename SUBNET>
 25 |         using concatenate3 = ACT<BN<con<num_filters, 1, 1, 1, 1,
 26 |         add_layer<concat_<vov_tag0, vov_tag1, vov_tag2, vov_tag3>, SUBNET>>>>;
 27 | 
 28 |         // The concatenate layer with custom number of outputs for OSA Module with 5 layers
 29 |         template <long num_filters, typename SUBNET>
 30 |         using concatenate5 = ACT<BN<con<num_filters, 1, 1, 1, 1,
 31 |         add_layer<concat_<vov_tag0, vov_tag1, vov_tag2, vov_tag3, vov_tag4, vov_tag5>, SUBNET>>>>;
 32 | 
 33 |         // 1-padded 3x3 convolution with custom number of filters, kernel size and stride
 34 |         template <long num_filters, int s, typename SUBNET>
 35 |         using con3 = ACT<BN<add_layer<con_<num_filters, 3, 3, s, s, 1, 1>, SUBNET>>>;
 36 | 
 37 |         // Max-pooling with 3x3 kernel size 2-stride and 1-padding
 38 |         template <typename SUBNET> using maxpool = add_layer<max_pool_<3, 3, 2, 2, 1, 1>, SUBNET>;
 39 | 
 40 |         // Stem block
 41 |         template <typename INPUT>
 42 |         using stem = con3<128, 2, con3<64, 1, con3<64, 2, INPUT>>>;
 43 | 
 44 |         // The VoVNet effective Squeeze and Excitation Module
 45 |         template <long num_filters, typename SUBNET>
 46 |         using ese_module = scale_prev2<skip1<
 47 |                            tag2<sig<con<num_filters, 1, 1, 1, 1,
 48 |                            avg_pool_everything<
 49 |                            tag1<SUBNET>>>>>>>;
 50 | 
 51 |         // The VoVNet One-Shot Aggregation Module with 3 inner layers
 52 |         template <long num_filters_out, long num_filters_in, typename SUBNET>
 53 |         using osa_module3 = ese_module<num_filters_out,
 54 |                             concatenate3<num_filters_out,
 55 |                             vov_tag3<con3<num_filters_in, 1,
 56 |                             vov_tag2<con3<num_filters_in, 1,
 57 |                             vov_tag1<con3<num_filters_in, 1,
 58 |                             vov_tag0<SUBNET>>>>>>>>>;
 59 | 
 60 |         // The VoVNet One-Shot Aggregation Module with 5 inner layers
 61 |         template <long num_filters_out, long num_filters_in, typename SUBNET>
 62 |         using osa_module5 = ese_module<num_filters_out,
 63 |                             concatenate5<num_filters_out,
 64 |                             vov_tag5<con3<num_filters_in, 1,
 65 |                             vov_tag4<con3<num_filters_in, 1,
 66 |                             vov_tag3<con3<num_filters_in, 1,
 67 |                             vov_tag2<con3<num_filters_in, 1,
 68 |                             vov_tag1<con3<num_filters_in, 1,
 69 |                             vov_tag0<SUBNET>>>>>>>>>>>>>;
 70 | 
 71 |         // some definitions to allow the use of the repeat layer
 72 |         template <typename SUBNET> using osa_module5_id_512 = id_mapping<osa_module5<512, 160, SUBNET>>;
 73 |         template <typename SUBNET> using osa_module5_id_768 = id_mapping<osa_module5<768, 192, SUBNET>>;
 74 |         template <typename SUBNET> using osa_module5_id_1024 = id_mapping<osa_module5<1024, 224, SUBNET>>;
 75 | 
 76 |         template <typename INPUT>
 77 |         using backbone_19_slim = osa_module3<512, 112,
 78 |                                  maxpool<osa_module3<384, 96,
 79 |                                  maxpool<osa_module3<256, 80,
 80 |                                  maxpool<osa_module3<112, 64,
 81 |                                  stem<INPUT>>>>>>>>;
 82 | 
 83 |         template <typename INPUT>
 84 |         using backbone_19 = osa_module3<1024, 224,
 85 |                             maxpool<osa_module3<768, 192,
 86 |                             maxpool<osa_module3<512, 160,
 87 |                             maxpool<osa_module3<256, 128,
 88 |                             stem<INPUT>>>>>>>>;
 89 | 
 90 |         template <typename INPUT>
 91 |         using backbone_27_slim = osa_module3<512, 112,
 92 |                                  maxpool<osa_module5<384, 96,
 93 |                                  maxpool<osa_module5<256, 80,
 94 |                                  maxpool<osa_module5<112, 64,
 95 |                                  stem<INPUT>>>>>>>>;
 96 | 
 97 |         template <typename INPUT>
 98 |         using backbone_27 = osa_module3<1024, 224,
 99 |                             maxpool<osa_module5<768, 192,
100 |                             maxpool<osa_module5<512, 160,
101 |                             maxpool<osa_module5<256, 128,
102 |                             stem<INPUT>>>>>>>>;
103 | 
104 |         template <typename INPUT>
105 |         using backbone_39 = osa_module5_id_1024<osa_module5<1024, 224,
106 |                             maxpool<osa_module5_id_768<osa_module5<768, 192,
107 |                             maxpool<osa_module5<512, 160,
108 |                             maxpool<osa_module5<256, 128,
109 |                             stem<INPUT>>>>>>>>>>;
110 | 
111 |         template <typename INPUT>
112 |         using backbone_57 = repeat<2, osa_module5_id_1024, osa_module5<1024, 224,
113 |                             maxpool<repeat<3, osa_module5_id_768, osa_module5<768, 192,
114 |                             maxpool<osa_module5<512, 160,
115 |                             maxpool<osa_module5<256, 128,
116 |                             stem<INPUT>>>>>>>>>>;
117 | 
118 |         template <typename INPUT>
119 |         using backbone_99 = repeat<2, osa_module5_id_1024, osa_module5<1014, 224,
120 |                             maxpool<repeat<8, osa_module5_id_768, osa_module5<768, 192,
121 |                             maxpool<repeat<2, osa_module5_id_512, osa_module5<512, 160,
122 |                             maxpool<osa_module5<256, 128,
123 |                             stem<INPUT>>>>>>>>>>>;
124 |     };
125 | 
126 |     template <long num_filters, typename SUBNET>
127 |     using classification_head = loss_multiclass_log<fc<num_filters, avg_pool_everything<SUBNET>>>;
128 | 
129 |     using train_19_slim = classification_head<1000, def<relu, bn_con>::backbone_19_slim<input_rgb_image>>;
130 |     using infer_19_slim = classification_head<1000, def<relu, affine>::backbone_19_slim<input_rgb_image>>;
131 |     using train_19 = classification_head<1000, def<relu, bn_con>::backbone_19<input_rgb_image>>;
132 |     using infer_19 = classification_head<1000, def<relu, affine>::backbone_19<input_rgb_image>>;
133 |     using train_27_slim = classification_head<1000, def<relu, bn_con>::backbone_27_slim<input_rgb_image>>;
134 |     using infer_27_slim = classification_head<1000, def<relu, affine>::backbone_27_slim<input_rgb_image>>;
135 |     using train_27 = classification_head<1000, def<relu, bn_con>::backbone_27<input_rgb_image>>;
136 |     using infer_27 = classification_head<1000, def<relu, affine>::backbone_27<input_rgb_image>>;
137 |     using train_39 = classification_head<1000, def<relu, bn_con>::backbone_39<input_rgb_image>>;
138 |     using infer_39 = classification_head<1000, def<relu, affine>::backbone_39<input_rgb_image>>;
139 |     using train_57 = classification_head<1000, def<relu, bn_con>::backbone_57<input_rgb_image>>;
140 |     using infer_57 = classification_head<1000, def<relu, affine>::backbone_57<input_rgb_image>>;
141 |     using train_99 = classification_head<1000, def<relu, bn_con>::backbone_99<input_rgb_image>>;
142 |     using infer_99 = classification_head<1000, def<relu, affine>::backbone_99<input_rgb_image>>;
143 |     // clang-format on
144 | }  // namespace vovnet
145 | #endif  // VoVNet_H
146 | 


--------------------------------------------------------------------------------
/src/detection/yolov5.h:
--------------------------------------------------------------------------------
  1 | #ifndef yolov5_h_INCLUDED
  2 | #define yolov5_h_INCLUDED
  3 | 
  4 | #include <dlib/dnn.h>
  5 | 
  6 | namespace yolov5
  7 | {
  8 |     using namespace dlib;
  9 |     template <typename SUBNET> using ytag3 = add_tag_layer<4003, SUBNET>;
 10 |     template <typename SUBNET> using ytag4 = add_tag_layer<4004, SUBNET>;
 11 |     template <typename SUBNET> using ytag5 = add_tag_layer<4005, SUBNET>;
 12 |     template <typename SUBNET> using ptag3 = add_tag_layer<7003, SUBNET>;
 13 |     template <typename SUBNET> using ptag4 = add_tag_layer<7004, SUBNET>;
 14 |     template <typename SUBNET> using ptag5 = add_tag_layer<7005, SUBNET>;
 15 | 
 16 |     template <
 17 |         template <typename> class ACT,
 18 |         template <typename> class BN,
 19 |         long depth_num = 1,
 20 |         long depth_den = 1,
 21 |         long width_num = 1,
 22 |         long width_den = 1
 23 |     >
 24 |     struct def
 25 |     {
 26 |         static constexpr long nf = 64 * width_num / width_den;
 27 | 
 28 |         template <long NF, int KS, int S, typename SUBNET>
 29 |         using conv = ACT<BN<add_layer<con_<NF, KS, KS, S, S, (KS-1)/2, (KS-1)/2>, SUBNET>>>;
 30 | 
 31 |         template <long NF, typename SUBNET>
 32 |         using bottleneck = conv<NF, 3, 1, conv<NF, 1, 1, SUBNET>>;
 33 | 
 34 |         template <long NF, typename SUBNET>
 35 |         using resbottleneck = add_prev10<bottleneck<NF, tag10<SUBNET>>>;
 36 | 
 37 |         template <long NF, typename SUBNET>
 38 |         using sppf = conv<NF, 1, 1,
 39 |                      concat4<tag1, tag2, tag3, tag4,
 40 |                 tag4<max_pool<5, 5, 1, 1,
 41 |                 tag3<max_pool<5, 5, 1, 1,
 42 |                 tag2<max_pool<5, 5, 1, 1,
 43 |                 tag1<conv<NF/2, 1, 1, SUBNET>>>>>>>>>>;
 44 | 
 45 |         template <typename SUBNET> using bottleneck_x2 = bottleneck<2 * nf, SUBNET>;
 46 |         template <typename SUBNET> using bottleneck_x4 = bottleneck<4 * nf, SUBNET>;
 47 |         template <typename SUBNET> using bottleneck_x8 = bottleneck<8 * nf, SUBNET>;
 48 |         template <typename SUBNET> using resbottleneck_x1 = resbottleneck<nf, SUBNET>;
 49 |         template <typename SUBNET> using resbottleneck_x2 = resbottleneck<2 * nf, SUBNET>;
 50 |         template <typename SUBNET> using resbottleneck_x4 = resbottleneck<4 * nf, SUBNET>;
 51 |         template <typename SUBNET> using resbottleneck_x8 = resbottleneck<8 * nf, SUBNET>;
 52 | 
 53 |         // CSP Bottleneck with 3 convolutions
 54 |         template <long NF, size_t N, template <typename> class BLOCK, typename SUBNET>
 55 |         using c3 = conv<NF, 1, 1,
 56 |                    concat2<tag8, tag9,
 57 |               tag9<conv<NF/2, 1, 1, skip7<
 58 |               tag8<repeat<N * depth_num / depth_den, BLOCK, conv<NF/2, 1, 1,
 59 |               tag7<SUBNET>>>>>>>>>;
 60 | 
 61 |         template <typename INPUT>
 62 |         using backbone = sppf<16 * nf,
 63 |                    ptag5<c3<16 * nf, 3, resbottleneck_x8,
 64 |                          conv<16 * nf, 3, 2,
 65 |                    ptag4<c3<8 * nf, 9, resbottleneck_x4,
 66 |                          conv<8 * nf, 3, 2,
 67 |                    ptag3<c3<4 * nf, 6, resbottleneck_x2,
 68 |                          conv<4 * nf, 3, 2,
 69 |                          c3<2 * nf, 3, resbottleneck_x1,
 70 |                          conv<2 * nf, 3, 2,
 71 |                          conv<nf, 6, 2,
 72 |                          INPUT>>>>>>>>>>>>>;
 73 | 
 74 |         template <template <typename> class YTAG, typename SUBNET>
 75 |         using yolo = YTAG<sig<con<1, 1, 1, 1, 1, SUBNET>>>;
 76 | 
 77 |         template <typename SUBNET>
 78 |         using head = yolo<ytag5,
 79 |                      c3<16 * nf, 3, bottleneck_x8,
 80 |                      concat2<tag1, tag5,
 81 |                 tag1<conv<8 * nf, 3, 2, skip2<
 82 |                      yolo<ytag4,
 83 |                 tag2<c3<8 * nf, 3, bottleneck_x4,
 84 |                      concat2<tag1, tag4,
 85 |                 tag1<conv<4 * nf, 3, 2, skip2<
 86 |                      yolo<ytag3,
 87 |                 tag2<c3<4 * nf, 3, bottleneck_x2,
 88 |                      concat2<tag1, ptag3,
 89 |                 tag1<upsample<2,
 90 |                 tag4<conv<4 * nf, 1, 1,
 91 |                      c3<8 * nf, 3, bottleneck_x4,
 92 |                      concat2<tag1, ptag4,
 93 |                 tag1<upsample<2,
 94 |                 tag5<conv<8 * nf, 1, 1,
 95 |                      SUBNET>>>>>>>>>>>>>>>>>>>>>>>>>>>;
 96 | 
 97 |         using net_type = loss_yolo<ytag3, ytag4, ytag5, head<backbone<input_rgb_image>>>;
 98 |     };
 99 | 
100 |     using train_type_n = def<leaky_relu, bn_con, 1, 3, 1, 4>::net_type;
101 |     using infer_type_n = def<leaky_relu, affine, 1, 3, 1, 4>::net_type;
102 |     using train_type_s = def<leaky_relu, bn_con, 1, 3, 1, 2>::net_type;
103 |     using infer_type_s = def<leaky_relu, affine, 1, 3, 1, 2>::net_type;
104 |     using train_type_m = def<leaky_relu, bn_con, 2, 3, 3, 4>::net_type;
105 |     using infer_type_m = def<leaky_relu, affine, 2, 3, 3, 4>::net_type;
106 |     using train_type_l = def<leaky_relu, bn_con, 1, 1, 1, 1>::net_type;
107 |     using infer_type_l = def<leaky_relu, affine, 1, 1, 1, 1>::net_type;
108 |     using train_type_x = def<leaky_relu, bn_con, 4, 3, 5, 4>::net_type;
109 |     using infer_type_x = def<leaky_relu, affine, 4, 3, 5, 4>::net_type;
110 | }
111 | 
112 | #endif // yolov5_h_INCLUDED
113 | 


--------------------------------------------------------------------------------
/src/detection/yolov5p6.h:
--------------------------------------------------------------------------------
  1 | #ifndef yolov5_h_INCLUDED
  2 | #define yolov5_h_INCLUDED
  3 | 
  4 | #include <dlib/dnn.h>
  5 | 
  6 | namespace yolov5p6
  7 | {
  8 |     using namespace dlib;
  9 |     template <typename SUBNET> using ytag3 = add_tag_layer<4003, SUBNET>;
 10 |     template <typename SUBNET> using ytag4 = add_tag_layer<4004, SUBNET>;
 11 |     template <typename SUBNET> using ytag5 = add_tag_layer<4005, SUBNET>;
 12 |     template <typename SUBNET> using ytag6 = add_tag_layer<4006, SUBNET>;
 13 |     template <typename SUBNET> using ptag3 = add_tag_layer<7003, SUBNET>;
 14 |     template <typename SUBNET> using ptag4 = add_tag_layer<7004, SUBNET>;
 15 |     template <typename SUBNET> using ptag5 = add_tag_layer<7005, SUBNET>;
 16 |     template <typename SUBNET> using ptag6 = add_tag_layer<7006, SUBNET>;
 17 | 
 18 |     template <
 19 |         template <typename> class ACT,
 20 |         template <typename> class BN,
 21 |         long depth_num = 1,
 22 |         long depth_den = 1,
 23 |         long width_num = 1,
 24 |         long width_den = 1
 25 |     >
 26 |     struct def
 27 |     {
 28 |         static constexpr long nf = 64 * width_num / width_den;
 29 | 
 30 |         template <long NF, int KS, int S, typename SUBNET>
 31 |         using conv = ACT<BN<add_layer<con_<NF, KS, KS, S, S, (KS-1)/2, (KS-1)/2>, SUBNET>>>;
 32 | 
 33 |         template <long NF, typename SUBNET>
 34 |         using bottleneck = conv<NF, 3, 1, conv<NF, 1, 1, SUBNET>>;
 35 | 
 36 |         template <long NF, typename SUBNET>
 37 |         using resbottleneck = add_prev10<bottleneck<NF, tag10<SUBNET>>>;
 38 | 
 39 |         template <long NF, typename SUBNET>
 40 |         using sppf = conv<NF, 1, 1,
 41 |                      concat4<tag1, tag2, tag3, tag4,
 42 |                 tag4<max_pool<5, 5, 1, 1,
 43 |                 tag3<max_pool<5, 5, 1, 1,
 44 |                 tag2<max_pool<5, 5, 1, 1,
 45 |                 tag1<conv<NF/2, 1, 1, SUBNET>>>>>>>>>>;
 46 | 
 47 |         template <typename SUBNET> using bottleneck_x2 = bottleneck<2 * nf, SUBNET>;
 48 |         template <typename SUBNET> using bottleneck_x4 = bottleneck<4 * nf, SUBNET>;
 49 |         template <typename SUBNET> using bottleneck_x6 = bottleneck<6 * nf, SUBNET>;
 50 |         template <typename SUBNET> using bottleneck_x8 = bottleneck<8 * nf, SUBNET>;
 51 |         template <typename SUBNET> using resbottleneck_x1 = resbottleneck<nf, SUBNET>;
 52 |         template <typename SUBNET> using resbottleneck_x2 = resbottleneck<2 * nf, SUBNET>;
 53 |         template <typename SUBNET> using resbottleneck_x4 = resbottleneck<4 * nf, SUBNET>;
 54 |         template <typename SUBNET> using resbottleneck_x6 = resbottleneck<6 * nf, SUBNET>;
 55 |         template <typename SUBNET> using resbottleneck_x8 = resbottleneck<8 * nf, SUBNET>;
 56 | 
 57 |         // CSP Bottleneck with 3 convolutions
 58 |         template <long NF, size_t N, template <typename> class BLOCK, typename SUBNET>
 59 |         using c3 = conv<NF, 1, 1,
 60 |                    concat2<tag8, tag9,
 61 |               tag9<conv<NF/2, 1, 1, skip7<
 62 |               tag8<repeat<N * depth_num / depth_den, BLOCK, conv<NF/2, 1, 1,
 63 |               tag7<SUBNET>>>>>>>>>;
 64 | 
 65 |         template <typename INPUT>
 66 |         using backbone = sppf<16 * nf,
 67 |                    ptag6<c3<16 * nf, 3, resbottleneck_x8,
 68 |                          conv<16 * nf, 3, 2,
 69 |                    ptag5<c3<12 * nf, 3, resbottleneck_x6,
 70 |                          conv<12 * nf, 3, 2,
 71 |                    ptag4<c3<8 * nf, 9, resbottleneck_x4,
 72 |                          conv<8 * nf, 3, 2,
 73 |                    ptag3<c3<4 * nf, 6, resbottleneck_x2,
 74 |                          conv<4 * nf, 3, 2,
 75 |                          c3<2 * nf, 3, resbottleneck_x1,
 76 |                          conv<2 * nf, 3, 2,
 77 |                          conv<nf, 3, 2,
 78 |                          INPUT>>>>>>>>>>>>>>>>;
 79 | 
 80 |         template <template <typename> class YTAG, typename SUBNET>
 81 |         using yolo = YTAG<sig<con<1, 1, 1, 1, 1, SUBNET>>>;
 82 | 
 83 |         template <typename SUBNET>
 84 |         using head = yolo<ytag6,
 85 |                      c3<16 * nf, 3, bottleneck_x8,
 86 |                      concat2<tag1, tag6,
 87 |                 tag1<conv<12 * nf, 3, 2, skip2<
 88 |                      yolo<ytag5,
 89 |                 tag2<c3<12 * nf, 3, bottleneck_x6,
 90 |                      concat2<tag1, tag5,
 91 |                 tag1<conv<8 * nf, 3, 2, skip2<
 92 |                      yolo<ytag4,
 93 |                 tag2<c3<8 * nf, 3, bottleneck_x4,
 94 |                      concat2<tag1, tag4,
 95 |                 tag1<conv<4 * nf, 3, 2, skip2<
 96 |                      yolo<ytag3,
 97 |                 tag2<c3<4 * nf, 3, bottleneck_x2,
 98 |                      concat2<tag1, ptag3,
 99 |                 tag1<upsample<2,
100 |                 tag4<conv<4 * nf, 1, 1,
101 |                      c3<8 * nf, 3, bottleneck_x4,
102 |                      concat2<tag1, ptag4,
103 |                 tag1<upsample<2,
104 |                 tag5<conv<8 * nf, 1, 1,
105 |                      c3<12 * nf, 3, bottleneck_x6,
106 |                      concat2<tag1, ptag5,
107 |                 tag1<upsample<2,
108 |                 tag6<conv<12 * nf, 1, 1,
109 |                      SUBNET>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>;
110 | 
111 |         using net_type = add_loss_layer<loss_yolo_<ytag3, ytag4, ytag5, ytag6>, head<backbone<input_rgb_image>>>;
112 |     };
113 | 
114 |     using train_type_n = def<silu, bn_con, 1, 3, 1, 4>::net_type;
115 |     using infer_type_n = def<silu, affine, 1, 3, 1, 4>::net_type;
116 |     using train_type_s = def<silu, bn_con, 1, 3, 1, 2>::net_type;
117 |     using infer_type_s = def<silu, affine, 1, 3, 1, 2>::net_type;
118 |     using train_type_m = def<silu, bn_con, 2, 3, 3, 4>::net_type;
119 |     using infer_type_m = def<silu, affine, 2, 3, 3, 4>::net_type;
120 |     using train_type_l = def<silu, bn_con, 1, 1, 1, 1>::net_type;
121 |     using infer_type_l = def<silu, affine, 1, 1, 1, 1>::net_type;
122 |     using train_type_x = def<silu, bn_con, 4, 3, 5, 4>::net_type;
123 |     using infer_type_x = def<silu, affine, 4, 3, 5, 4>::net_type;
124 | }
125 | 
126 | #endif // yolov5_h_INCLUDED
127 | 


--------------------------------------------------------------------------------
/src/detection/yolov7.h:
--------------------------------------------------------------------------------
  1 | #ifndef yolov7_h_INCLUDED
  2 | #define yolov7_h_INCLUDED
  3 | 
  4 | #include <dlib/dnn.h>
  5 | 
  6 | namespace yolov7
  7 | {
  8 |     using namespace dlib;
  9 |     template <typename SUBNET> using ytag3 = add_tag_layer<4003, SUBNET>;
 10 |     template <typename SUBNET> using ytag4 = add_tag_layer<4004, SUBNET>;
 11 |     template <typename SUBNET> using ytag5 = add_tag_layer<4005, SUBNET>;
 12 |     template <typename SUBNET> using ptag3 = add_tag_layer<7003, SUBNET>;
 13 |     template <typename SUBNET> using ptag4 = add_tag_layer<7004, SUBNET>;
 14 |     template <typename SUBNET> using ptag5 = add_tag_layer<7005, SUBNET>;
 15 |     template <typename SUBNET> using ntag4 = add_tag_layer<5004, SUBNET>;
 16 | 
 17 |     template <template <typename> class ACT, template <typename> class BN>
 18 |     struct def
 19 |     {
 20 | 
 21 |         template <long NF, int KS, int S, typename SUBNET>
 22 |         using conv = ACT<BN<add_layer<con_<NF, KS, KS, S, S, (KS-1)/2, (KS-1)/2>, SUBNET>>>;
 23 | 
 24 |         template <long NF, typename SUBNET>
 25 |         using transition = concat2<itag2, itag1,
 26 |                      itag2<conv<NF, 3, 2,
 27 |                            conv<NF, 1, 1, iskip<
 28 |                      itag1<conv<NF, 1, 1,
 29 |                            max_pool<2, 2, 2, 2,
 30 |                      itag0<SUBNET>>>>>>>>>;
 31 | 
 32 |         template <long NF, typename SUBNET>
 33 |         using e_elan = conv<NF * 4, 1, 1,
 34 |                        concat4<itag4, itag3, itag2, itag1,
 35 |                  itag4<conv<NF, 3, 1,
 36 |                        conv<NF, 3, 1,
 37 |                  itag3<conv<NF, 3, 1,
 38 |                        conv<NF, 3, 1,
 39 |                  itag2<conv<NF, 1, 1, iskip<
 40 |                  itag1<conv<NF, 1, 1,
 41 |                  itag0<SUBNET>>>>>>>>>>>>>>;
 42 | 
 43 |         template <long NF, template<typename> class TAG, typename SUBNET>
 44 |         using transition2 = concat3<itag2, itag1, TAG,
 45 |                       itag2<conv<NF, 3, 2,
 46 |                             conv<NF, 1, 1, iskip<
 47 |                       itag1<conv<NF, 1, 1,
 48 |                             max_pool<2, 2, 2, 2,
 49 |                       itag0<SUBNET>>>>>>>>>;
 50 | 
 51 |         template <long NF, typename SUBNET>
 52 |         using e_elan2 = conv<NF, 1, 1,
 53 |                    add_layer<concat_<tag1, tag2, tag3, tag4, tag5, tag6>,
 54 |                    tag1<conv<NF / 2, 3, 1,
 55 |                    tag2<conv<NF / 2, 3, 1,
 56 |                    tag3<conv<NF / 2, 3, 1,
 57 |                    tag4<conv<NF / 2, 3, 1,
 58 |                    tag5<conv<NF, 1, 1, iskip<
 59 |                    tag6<conv<NF, 1, 1,
 60 |                   itag0<SUBNET>>>>>>>>>>>>>>>>;
 61 | 
 62 |         template <typename INPUT>
 63 |         using backbone = ptag5<e_elan<256, transition<512,
 64 |                          ptag4<e_elan<256, transition<256,
 65 |                          ptag3<e_elan<128, transition<128,
 66 |                                e_elan<64, conv<128, 3, 2,
 67 |                                conv<64, 3, 1, conv<64, 3, 2,
 68 |                                conv<32, 3, 1, INPUT>>>>>>>>>>>>>>;
 69 | 
 70 |         template <long NF, typename SUBNET>
 71 |         using sppcspc = conv<NF, 1, 1,
 72 |                         concat2<tag1, tag2,
 73 |                    tag2<conv<NF, 1, 1, iskip<
 74 |                    tag1<conv<NF, 3, 1, conv<NF, 1, 1,
 75 |                         concat4<itag1, itag2, itag3, itag4,
 76 |                   itag4<max_pool<5, 5, 1, 1,
 77 |                   itag3<max_pool<5, 5, 1, 1,
 78 |                   itag2<max_pool<5, 5, 1, 1,
 79 |                   itag1<conv<NF, 1, 1, conv<NF, 3, 1, conv<NF, 1, 1,
 80 |                   itag0<SUBNET>>>>>>>>>>>>>>>>>>>>;
 81 | 
 82 |         template <template <typename> class YTAG, typename SUBNET>
 83 |         using yolo = YTAG<sig<con<255, 1, 1, 1, 1, SUBNET>>>;
 84 | 
 85 |         template <typename SUBNET>
 86 |         using head = yolo<ytag5,
 87 |                      e_elan2<512,
 88 |                      transition2<256, tag8, skip9<
 89 |                      yolo<ytag4,
 90 |                 tag9<e_elan2<256,
 91 |                      transition2<128, tag7, skip9<
 92 |                      yolo<ytag3,
 93 |                 tag9<e_elan2<128,
 94 |                      concat2<tag2, tag1,
 95 |                 tag2<conv<128, 1, 1, add_skip_layer<ptag3,
 96 |                 tag1<upsample<2,
 97 |                      conv<128, 1, 1,
 98 |                 tag7<e_elan2<256,
 99 |                      concat2<tag2, tag1,
100 |                 tag2<conv<256, 1, 1, add_skip_layer<ptag4,
101 |                 tag1<upsample<2,
102 |                      conv<256, 1, 1,
103 |                tag8<sppcspc<512,
104 |                SUBNET>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>;
105 | 
106 |         using net_type = loss_yolo<ytag3, ytag4, ytag5, head<backbone<input_rgb_image>>>;
107 |     };
108 | 
109 |     using train_type = def<silu, bn_con>::net_type;
110 |     using infer_type = def<silu, affine>::net_type;
111 | }
112 | 
113 | #endif // yolov7_h_INCLUDED
114 | 


--------------------------------------------------------------------------------
/src/lm/slm_dels.h:
--------------------------------------------------------------------------------
  1 | #ifndef SlmNet_H
  2 | #define SlmNet_H
  3 | 
  4 | /**
  5 |  * @file slm_defs.h
  6 |  * @brief Optimized Transformer neural architecture for language processing
  7 |  *
  8 |  * Implements a Transformer architecture with multi-head attention and RMS
  9 |  * normalization, designed for efficient learning and inference. The architecture
 10 |  * leverages cognitive principles of parallel information processing and
 11 |  * selective attention.
 12 |  *
 13 |  * Key features:
 14 |  * - RMS normalization for enhanced stability
 15 |  * - Optimized residual connections
 16 |  * - Causal masking for autoregressive attention
 17 |  */
 18 | 
 19 | #include <dlib/dnn.h>
 20 | 
 21 | namespace transformer
 22 | {
 23 |     using namespace dlib;
 24 | 
 25 |     // Scale Weights Layer
 26 |     template <long d_k_>
 27 |     class scale_weights_ : public multiply_ {
 28 |     public:
 29 |         explicit scale_weights_() : multiply_(1.0f / std::sqrt(static_cast<float>(d_k_))) {}
 30 |     };
 31 | 
 32 |     template <long d_k, typename SUBNET>
 33 |     using scale_weights = add_layer<scale_weights_<d_k>, SUBNET>;
 34 | 
 35 |     namespace def {
 36 |         template <long num_heads, long d_model, typename SUBNET>
 37 |         using query = extract<0, num_heads, d_model / num_heads, 1, SUBNET>;
 38 | 
 39 |         template <long num_heads, long d_model, typename SUBNET>
 40 |         using key = extract<d_model, num_heads, 1, d_model / num_heads, SUBNET>;
 41 | 
 42 |         template <long num_heads, long d_model, typename SUBNET>
 43 |         using value = extract<(d_model * 2), num_heads, d_model / num_heads, 1, SUBNET>;
 44 | 
 45 |         /**
 46 |          * Multi-Head Attention Layer
 47 |          *
 48 |          * Structure:
 49 |          * 1. Input processing
 50 |          *    - RMS normalization
 51 |          *    - Single linear projection (d_model -> 3*d_model) for Q,K,V
 52 |          * 2. Parallel head processing (num_heads)
 53 |          *    - Split into Q, K, V tensors
 54 |          *    - Key transposition for attention computation
 55 |          * 3. Attention mechanism
 56 |          *    - Scaled dot-product (Q*K^T / sqrt(d_k))
 57 |          *    - Causal masking (tril_mask)
 58 |          *    - Softmax normalization
 59 |          *    - Value weighting
 60 |          * 4. Output
 61 |          *    - Head concatenation
 62 |          *    - Residual connection
 63 |          *
 64 |          * Template parameters:
 65 |          * @param ACT: Activation function type
 66 |          * @param DO: Dropout layer type
 67 |          * @param d_model: Model dimension
 68 |          * @param num_heads: Number of attention heads
 69 |          * @param SUBNET: Input subnet type
 70 |          */
 71 |         template <template <typename> class ACT, template <typename> class DO,
 72 |             long d_model, long num_heads, typename SUBNET>
 73 |         using multihead_attention = add_prev1<DO<extract<0, 1, 1, d_model, multm_prev3<
 74 |             DO<softmaxm<tril_mask<
 75 |             scale_weights<d_model / num_heads,
 76 |             multm_prev4<query<num_heads, d_model, skip2<
 77 |             tag4<key<num_heads, d_model, skip2<
 78 |             tag3<value<num_heads, d_model,
 79 |             tag2<fc_no_bias<d_model * 3, rms_norm<
 80 |             tag1<SUBNET>>>>>>>>>>>>>>>>>>>>;
 81 | 
 82 |         /**
 83 |          * Feed-Forward Network Layer
 84 |          *
 85 |          * Structure:
 86 |          * 1. Input processing
 87 |          *    - RMS normalization
 88 |          *    - Input tagged for residual connection
 89 |          * 2. Transformation
 90 |          *    - Expansion layer (d_model -> 4*d_model)
 91 |          *    - Activation function
 92 |          *    - Projection layer (4*d_model -> d_model)
 93 |          * 3. Output
 94 |          *    - Dropout
 95 |          *    - Residual connection
 96 |          *
 97 |          * Template parameters:
 98 |          * @param ACT: Activation function type
 99 |          * @param DO: Dropout layer type
100 |          * @param d_model: Model dimension
101 |          * @param SUBNET: Input subnet type
102 |          */
103 |         template <template <typename> class ACT, template <typename> class DO, long d_model, typename SUBNET>
104 |         using feed_forward =
105 |             add_prev5<
106 |             DO<extract<0, 1, 1, d_model,
107 |             fc<d_model, ACT<fc<d_model * 4, rms_norm<
108 |             tag5<SUBNET>>>>>>>>;
109 | 
110 |         /**
111 |          * Transformer Block
112 |          *
113 |          * Combines sequentially:
114 |          * 1. Multi-head attention layer
115 |          * 2. Feed-forward network
116 |          *
117 |          * Template parameters:
118 |          * @param ACT: Activation function type
119 |          * @param DO: Dropout layer type
120 |          * @param d_model: Model dimension
121 |          * @param num_heads: Number of attention heads
122 |          * @param SUBNET: Input subnet type
123 |          */
124 |         template <template <typename> class ACT, template <typename> class DO, long seq_len, long d_model, long num_heads, typename SUBNET>
125 |         using transformer_block =
126 |             feed_forward<ACT, DO, d_model,
127 |             multihead_attention<ACT, DO, d_model, num_heads, SUBNET>>;
128 |     }
129 | 
130 |     // Positional Embeddings
131 |     template <long num_embeddings, long embedding_length, typename SUBNET>
132 |     using positional_embeddings = positional_encodings<embeddings<num_embeddings, embedding_length, SUBNET>>;
133 | 
134 |     // Classification Head   
135 |     template <template <typename> class ACT, long embedding_length, typename SUBNET>
136 |     using squeezing = fc<embedding_length / 4, ACT<fc<embedding_length / 8, SUBNET>>>;
137 | 
138 |     template <bool USE_SQUEEZING, template <typename> class ACT, long num_logits, long embedding_length, typename SUBNET>
139 |     struct classification_head_impl;
140 |     template <template <typename> class ACT, long num_logits, long embedding_length, typename SUBNET>
141 |     struct classification_head_impl<true, ACT, num_logits, embedding_length, SUBNET>
142 |     {
143 |         using type = loss_multiclass_log<fc<num_logits, squeezing<ACT, embedding_length, rms_norm<SUBNET>>>>;
144 |     };
145 |     template <template <typename> class ACT, long num_logits, long embedding_length, typename SUBNET>
146 |     struct classification_head_impl<false, ACT, num_logits, embedding_length, SUBNET>
147 |     {
148 |         using type = loss_multiclass_log<fc<num_logits, rms_norm<SUBNET>>>;
149 |     };
150 |     template <bool USE_SQUEEZING, template <typename> class ACT, long num_logits, long embedding_length, typename SUBNET>
151 |     using classification_head = typename classification_head_impl<USE_SQUEEZING, ACT, num_logits, embedding_length, SUBNET>::type;
152 | 
153 |     /**
154 |      * @brief Transformer Model Configuration Template
155 |      *
156 |      * Provides a flexible and type-safe configuration mechanism for Transformer models
157 |      * with compile-time parameter validation and network generation.
158 |      *
159 |      * Template parameters:
160 |      * @param vocab_size Vocabulary size for token embedding
161 |      * @param num_layers Number of Transformer layers
162 |      * @param num_heads Number of attention heads
163 |      * @param embedding_dim Dimension of token embeddings
164 |      * @param max_seq_len Maximum sequence length
165 |      * @param use_squeezing Use squeezing layer
166 |      * @param activation_func Activation function type
167 |      * @param dropout_policy Dropout regularization policy
168 |      */
169 |     template <
170 |         long vocab_size = 5000,                                 // Default vocabulary size
171 |         long num_layers = 6,                                    // Default number of layers
172 |         long num_heads = 8,                                     // Default number of attention heads
173 |         long embedding_dim = 128,                               // Default embedding dimension
174 |         long max_seq_len = 100,                                 // Default maximum sequence length
175 |         bool use_squeezing = false,                             // Default use squeezing layer
176 |         template <typename> class activation_func = gelu,       // Default activation function
177 |         template <typename> class dropout_policy = dropout_10   // Default dropout policy
178 |     >
179 |     struct transformer_config {
180 |         // Core model parameters
181 |         static constexpr long VOCAB_SIZE = vocab_size;
182 |         static constexpr long NUM_LAYERS = num_layers;
183 |         static constexpr long NUM_HEADS = num_heads;
184 |         static constexpr long EMBEDDING_DIM = embedding_dim;
185 |         static constexpr long MAX_SEQ_LEN = max_seq_len;
186 |         static constexpr bool USE_SQUEEZING = use_squeezing;
187 | 
188 |         /**
189 |          * @brief Compile-time validation of model configuration
190 |          *
191 |          * Performs static assertions to ensure valid model parameters
192 |          */
193 |         struct validation {
194 |             static_assert(VOCAB_SIZE > 0, "Vocabulary size must be positive");
195 |             static_assert(NUM_LAYERS > 0, "Number of layers must be positive");
196 |             static_assert(NUM_HEADS > 0, "Number of attention heads must be positive");
197 |             static_assert(EMBEDDING_DIM% NUM_HEADS == 0, "Embedding dimension must be divisible by number of heads");
198 |         };
199 | 
200 |         /**
201 |          * @brief Network type generation based on training/inference mode
202 |          *
203 |          * Generates different network types for training and inference
204 |          * using the configured parameters
205 |          *
206 |          * Template parameters:
207 |          * @tparam is_training Determines training or inference network type
208 |          */
209 |         template <typename SUBNET>
210 |         using t_transformer_block = def::transformer_block<activation_func, dropout_policy, MAX_SEQ_LEN, EMBEDDING_DIM, NUM_HEADS, SUBNET>;
211 |         template <typename SUBNET>
212 |         using i_transformer_block = def::transformer_block<activation_func, multiply, MAX_SEQ_LEN, EMBEDDING_DIM, NUM_HEADS, SUBNET>;
213 | 
214 |         template<bool is_training>
215 |         using network_type = std::conditional_t<is_training,
216 |             classification_head<USE_SQUEEZING, activation_func, VOCAB_SIZE, EMBEDDING_DIM,
217 |             repeat<NUM_LAYERS, t_transformer_block,
218 |             positional_embeddings<VOCAB_SIZE, EMBEDDING_DIM, input<matrix<int, 0, 1>>>>>,
219 |             classification_head<USE_SQUEEZING, activation_func, VOCAB_SIZE, EMBEDDING_DIM,
220 |             repeat<NUM_LAYERS, i_transformer_block,
221 |             positional_embeddings<VOCAB_SIZE, EMBEDDING_DIM, input<matrix<int, 0, 1>>>>>
222 |             >;
223 | 
224 |         /**
225 |          * @brief Model configuration information and debugging utility
226 |          *
227 |          * Provides methods to generate human-readable model configuration details
228 |          */
229 |         struct model_info {
230 |             /**
231 |              * @brief Generate a detailed description of the model configuration
232 |              *
233 |              * @return String containing model configuration details
234 |              */
235 |             static std::string describe() {
236 |                 std::stringstream ss;
237 |                 ss << "Transformer model configuration:\n"
238 |                     << "- vocabulary size: " << VOCAB_SIZE << "\n"
239 |                     << "- layers: " << NUM_LAYERS << "\n"
240 |                     << "- attention heads: " << NUM_HEADS << "\n"
241 |                     << "- embedding dimension: " << EMBEDDING_DIM << "\n"
242 |                     << "- max sequence length: " << MAX_SEQ_LEN;
243 |                 return ss.str();
244 |             }
245 |         };
246 |     };
247 | 
248 |     using vslm = transformer_config<>; // Very Small Language Model
249 | 
250 |     /**
251 |      * @example Configuration and Usage Examples
252 |      *
253 |      * // Creating different transformer configurations
254 |      * using default_transformer = transformer_config<>;
255 |      * using large_transformer_with_squeezing = transformer_config<
256 |      *     50000,  // Larger vocabulary
257 |      *     8,      // More layers
258 |      *     8,      // More heads
259 |      *     512,    // Larger embedding dimension
260 |      *     128,    // Longer sequences
261 |      *     true    // Use squeezing
262 |      * >;
263 |      *
264 |      * // Network type instantiations for different modes
265 |      * using train_network = default_transformer::network_type<true>;
266 |      * using inference_network = default_transformer::network_type<false>;
267 |      *
268 |      * // Utility function to print model configuration
269 |      * void print_model_info() {
270 |      *     std::cout << default_transformer::model_info::describe() << std::endl;
271 |      * }
272 |      *
273 |      * @note
274 |      * - Supports compile-time configuration
275 |      * - Provides static validation of model parameters
276 |      * - Enables dynamic network type generation
277 |      * - Offers advanced hyperparameter tuning utilities
278 |      *
279 |      * @author Cydral
280 |      * @site https://github.com/Cydral/ERNIE
281 |      * @version 1.0
282 |      * @date 11/2024
283 |      */
284 | }
285 | 
286 | #endif // SlmNet_H
287 | 


--------------------------------------------------------------------------------