├── .gitignore ├── LICENSE ├── README.md ├── caffe2onnx ├── __init__.py ├── convert.py ├── proto │ ├── __init__.py │ ├── caffe_upsample.proto │ └── caffe_upsample_pb2.py └── src │ ├── OPs │ ├── Add.py │ ├── Axpy.py │ ├── BatchNorm.py │ ├── Clip.py │ ├── Concat.py │ ├── Conv.py │ ├── ConvTranspose.py │ ├── Crop.py │ ├── DetectionOutput.py │ ├── Dropout.py │ ├── Eltwise.py │ ├── Flatten.py │ ├── Gemm.py │ ├── InstanceNorm.py │ ├── Interp.py │ ├── LRN.py │ ├── Log.py │ ├── LpNormalization.py │ ├── Min.py │ ├── Mul.py │ ├── PRelu.py │ ├── Pooling.py │ ├── Power.py │ ├── PriroBox.py │ ├── ReLU.py │ ├── Reshape.py │ ├── Resize.py │ ├── Shuffle.py │ ├── Sigmoid.py │ ├── Slice.py │ ├── Softmax.py │ ├── Tanh.py │ ├── Transpose.py │ ├── UnPooling.py │ ├── Upsample.py │ └── __init__.py │ ├── __init__.py │ ├── args_parser.py │ ├── c2oObject.py │ ├── caffe2onnx.py │ ├── load_save_model.py │ ├── op_layer_info.py │ └── utils.py ├── docs └── caffe-operators.md └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Valery Asiryan 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # caffe2onnx 2 | This tool converts Caffe models to ONNX via command line (without Caffe environment). 3 | 4 | ## Installation 5 | Install from pypi 6 | ``` 7 | pip install caffe2onnx 8 | ``` 9 | 10 | Install latest from github 11 | ``` 12 | pip install git+https://github.com/asiryan/caffe2onnx 13 | ``` 14 | 15 | Build and install latest from source 16 | ``` 17 | git clone https://github.com/asiryan/caffe2onnx 18 | python setup.py install 19 | ``` 20 | 21 | ## Usage 22 | To get started with **caffe2onnx**, run the *caffe2onnx.convert* command, providing: 23 | * the path to your caffe prototxt, 24 | * the path to your caffe model (*not required*), 25 | * the output path of the onnx model (*not required*), 26 | * frozen graph or not (*not required*). 27 | 28 | ``` 29 | python -m caffe2onnx.convert 30 | --prototxt caffe prototxt file path 31 | [--caffemodel caffe caffemodel file path] 32 | [--onnx output onnx file path] 33 | [--frozen frozen graph or not] 34 | ``` 35 | 36 | ## Operators 37 | See the documentation of [caffe supported operators](https://github.com/FaceONNX/caffe2onnx/blob/main/docs/caffe-operators.md). 38 | 39 | ## References 40 | caffe-onnx by [htshinichi](https://github.com/htshinichi/caffe-onnx) 41 | TNN by [Tencent](https://github.com/Tencent/TNN) 42 | 43 | ## License 44 | [BSD-3](https://github.com/asiryan/caffe2onnx/blob/main/LICENSE) 45 | -------------------------------------------------------------------------------- /caffe2onnx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asiryan/caffe2onnx/1d50d0215f30f8c479cbf73d9cee6a4260db3da1/caffe2onnx/__init__.py -------------------------------------------------------------------------------- /caffe2onnx/convert.py: -------------------------------------------------------------------------------- 1 | import os 2 | from caffe2onnx.src.load_save_model import loadcaffemodel, saveonnxmodel 3 | from caffe2onnx.src.caffe2onnx import Caffe2Onnx 4 | from caffe2onnx.src.args_parser import parse_args 5 | from caffe2onnx.src.utils import freeze 6 | 7 | 8 | def main(args): 9 | prototxt_path = args.prototxt 10 | 11 | if args.caffemodel is None: 12 | caffemodel_path = f'{os.path.splitext(prototxt_path)[0]}.caffemodel' 13 | else: 14 | caffemodel_path = args.caffemodel 15 | 16 | if args.onnx is None: 17 | onnxmodel_path = f'{os.path.splitext(prototxt_path)[0]}.onnx' 18 | else: 19 | onnxmodel_path = args.onnx 20 | 21 | graph, params = loadcaffemodel(prototxt_path, caffemodel_path) 22 | c2o = Caffe2Onnx(graph, params, onnxmodel_path) 23 | onnxmodel = c2o.createOnnxModel() 24 | 25 | if args.frozen is True: 26 | freeze(onnxmodel) 27 | 28 | saveonnxmodel(onnxmodel, onnxmodel_path) 29 | 30 | 31 | if __name__ == '__main__': 32 | args = parse_args() 33 | main(args) 34 | -------------------------------------------------------------------------------- /caffe2onnx/proto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asiryan/caffe2onnx/1d50d0215f30f8c479cbf73d9cee6a4260db3da1/caffe2onnx/proto/__init__.py -------------------------------------------------------------------------------- /caffe2onnx/proto/caffe_upsample.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package caffe; 4 | 5 | // Specifies the shape (dimensions) of a Blob. 6 | message BlobShape { 7 | repeated int64 dim = 1 [packed = true]; 8 | } 9 | 10 | message BlobProto { 11 | optional BlobShape shape = 7; 12 | repeated float data = 5 [packed = true]; 13 | repeated float diff = 6 [packed = true]; 14 | repeated double double_data = 8 [packed = true]; 15 | repeated double double_diff = 9 [packed = true]; 16 | 17 | // 4D dimensions -- deprecated. Use "shape" instead. 18 | optional int32 num = 1 [default = 0]; 19 | optional int32 channels = 2 [default = 0]; 20 | optional int32 height = 3 [default = 0]; 21 | optional int32 width = 4 [default = 0]; 22 | } 23 | 24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances 25 | // around. 26 | message BlobProtoVector { 27 | repeated BlobProto blobs = 1; 28 | } 29 | 30 | message Datum { 31 | optional int32 channels = 1; 32 | optional int32 height = 2; 33 | optional int32 width = 3; 34 | // the actual image data, in bytes 35 | optional bytes data = 4; 36 | optional int32 label = 5; 37 | // Optionally, the datum could also hold float data. 38 | repeated float float_data = 6; 39 | // If true data contains an encoded image that need to be decoded 40 | optional bool encoded = 7 [default = false]; 41 | } 42 | 43 | message FillerParameter { 44 | // The filler type. 45 | optional string type = 1 [default = 'constant']; 46 | optional float value = 2 [default = 0]; // the value in constant filler 47 | optional float min = 3 [default = 0]; // the min value in uniform filler 48 | optional float max = 4 [default = 1]; // the max value in uniform filler 49 | optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 50 | optional float std = 6 [default = 1]; // the std value in Gaussian filler 51 | // The expected number of non-zero output weights for a given input in 52 | // Gaussian filler -- the default -1 means don't perform sparsification. 53 | optional int32 sparse = 7 [default = -1]; 54 | // Normalize the filler variance by fan_in, fan_out, or their average. 55 | // Applies to 'xavier' and 'msra' fillers. 56 | enum VarianceNorm { 57 | FAN_IN = 0; 58 | FAN_OUT = 1; 59 | AVERAGE = 2; 60 | } 61 | optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 62 | } 63 | 64 | message NetParameter { 65 | optional string name = 1; // consider giving the network a name 66 | // DEPRECATED. See InputParameter. The input blobs to the network. 67 | repeated string input = 3; 68 | // DEPRECATED. See InputParameter. The shape of the input blobs. 69 | repeated BlobShape input_shape = 8; 70 | 71 | // 4D input dimensions -- deprecated. Use "input_shape" instead. 72 | // If specified, for each input blob there should be four 73 | // values specifying the num, channels, height and width of the input blob. 74 | // Thus, there should be a total of (4 * #input) numbers. 75 | repeated int32 input_dim = 4; 76 | 77 | // Whether the network will force every layer to carry out backward operation. 78 | // If set False, then whether to carry out backward is determined 79 | // automatically according to the net structure and learning rates. 80 | optional bool force_backward = 5 [default = false]; 81 | // The current "state" of the network, including the phase, level, and stage. 82 | // Some layers may be included/excluded depending on this state and the states 83 | // specified in the layers' include and exclude fields. 84 | optional NetState state = 6; 85 | 86 | // Print debugging information about results while running Net::Forward, 87 | // Net::Backward, and Net::Update. 88 | optional bool debug_info = 7 [default = false]; 89 | 90 | // The layers that make up the net. Each of their configurations, including 91 | // connectivity and behavior, is specified as a LayerParameter. 92 | repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 93 | 94 | // DEPRECATED: use 'layer' instead. 95 | repeated V1LayerParameter layers = 2; 96 | } 97 | 98 | // NOTE 99 | // Update the next available ID when you add a new SolverParameter field. 100 | // 101 | // SolverParameter next available ID: 43 (last added: weights) 102 | message SolverParameter { 103 | ////////////////////////////////////////////////////////////////////////////// 104 | // Specifying the train and test networks 105 | // 106 | // Exactly one train net must be specified using one of the following fields: 107 | // train_net_param, train_net, net_param, net 108 | // One or more test nets may be specified using any of the following fields: 109 | // test_net_param, test_net, net_param, net 110 | // If more than one test net field is specified (e.g., both net and 111 | // test_net are specified), they will be evaluated in the field order given 112 | // above: (1) test_net_param, (2) test_net, (3) net_param/net. 113 | // A test_iter must be specified for each test_net. 114 | // A test_level and/or a test_stage may also be specified for each test_net. 115 | ////////////////////////////////////////////////////////////////////////////// 116 | 117 | // Proto filename for the train net, possibly combined with one or more 118 | // test nets. 119 | optional string net = 24; 120 | // Inline train net param, possibly combined with one or more test nets. 121 | optional NetParameter net_param = 25; 122 | 123 | optional string train_net = 1; // Proto filename for the train net. 124 | repeated string test_net = 2; // Proto filenames for the test nets. 125 | optional NetParameter train_net_param = 21; // Inline train net params. 126 | repeated NetParameter test_net_param = 22; // Inline test net params. 127 | 128 | // The states for the train/test nets. Must be unspecified or 129 | // specified once per net. 130 | // 131 | // By default, train_state will have phase = TRAIN, 132 | // and all test_state's will have phase = TEST. 133 | // Other defaults are set according to the NetState defaults. 134 | optional NetState train_state = 26; 135 | repeated NetState test_state = 27; 136 | 137 | // The number of iterations for each test net. 138 | repeated int32 test_iter = 3; 139 | 140 | // The number of iterations between two testing phases. 141 | optional int32 test_interval = 4 [default = 0]; 142 | optional bool test_compute_loss = 19 [default = false]; 143 | // If true, run an initial test pass before the first iteration, 144 | // ensuring memory availability and printing the starting value of the loss. 145 | optional bool test_initialization = 32 [default = true]; 146 | optional float base_lr = 5; // The base learning rate 147 | // the number of iterations between displaying info. If display = 0, no info 148 | // will be displayed. 149 | optional int32 display = 6; 150 | // Display the loss averaged over the last average_loss iterations 151 | optional int32 average_loss = 33 [default = 1]; 152 | optional int32 max_iter = 7; // the maximum number of iterations 153 | // accumulate gradients over `iter_size` x `batch_size` instances 154 | optional int32 iter_size = 36 [default = 1]; 155 | 156 | // The learning rate decay policy. The currently implemented learning rate 157 | // policies are as follows: 158 | // - fixed: always return base_lr. 159 | // - step: return base_lr * gamma ^ (floor(iter / step)) 160 | // - exp: return base_lr * gamma ^ iter 161 | // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 162 | // - multistep: similar to step but it allows non uniform steps defined by 163 | // stepvalue 164 | // - poly: the effective learning rate follows a polynomial decay, to be 165 | // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 166 | // - sigmoid: the effective learning rate follows a sigmod decay 167 | // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 168 | // 169 | // where base_lr, max_iter, gamma, step, stepvalue and power are defined 170 | // in the solver parameter protocol buffer, and iter is the current iteration. 171 | optional string lr_policy = 8; 172 | optional float gamma = 9; // The parameter to compute the learning rate. 173 | optional float power = 10; // The parameter to compute the learning rate. 174 | optional float momentum = 11; // The momentum value. 175 | optional float weight_decay = 12; // The weight decay. 176 | // regularization types supported: L1 and L2 177 | // controlled by weight_decay 178 | optional string regularization_type = 29 [default = "L2"]; 179 | // the stepsize for learning rate policy "step" 180 | optional int32 stepsize = 13; 181 | // the stepsize for learning rate policy "multistep" 182 | repeated int32 stepvalue = 34; 183 | 184 | // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 185 | // whenever their actual L2 norm is larger. 186 | optional float clip_gradients = 35 [default = -1]; 187 | 188 | optional int32 snapshot = 14 [default = 0]; // The snapshot interval 189 | // The prefix for the snapshot. 190 | // If not set then is replaced by prototxt file path without extension. 191 | // If is set to directory then is augmented by prototxt file name 192 | // without extention. 193 | optional string snapshot_prefix = 15; 194 | // whether to snapshot diff in the results or not. Snapshotting diff will help 195 | // debugging but the final protocol buffer size will be much larger. 196 | optional bool snapshot_diff = 16 [default = false]; 197 | enum SnapshotFormat { 198 | HDF5 = 0; 199 | BINARYPROTO = 1; 200 | } 201 | optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 202 | // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 203 | enum SolverMode { 204 | CPU = 0; 205 | GPU = 1; 206 | } 207 | optional SolverMode solver_mode = 17 [default = GPU]; 208 | // the device_id will that be used in GPU mode. Use device_id = 0 in default. 209 | optional int32 device_id = 18 [default = 0]; 210 | // If non-negative, the seed with which the Solver will initialize the Caffe 211 | // random number generator -- useful for reproducible results. Otherwise, 212 | // (and by default) initialize using a seed derived from the system clock. 213 | optional int64 random_seed = 20 [default = -1]; 214 | 215 | // type of the solver 216 | optional string type = 40 [default = "SGD"]; 217 | 218 | // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 219 | optional float delta = 31 [default = 1e-8]; 220 | // parameters for the Adam solver 221 | optional float momentum2 = 39 [default = 0.999]; 222 | 223 | // RMSProp decay value 224 | // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 225 | optional float rms_decay = 38 [default = 0.99]; 226 | 227 | // If true, print information about the state of the net that may help with 228 | // debugging learning problems. 229 | optional bool debug_info = 23 [default = false]; 230 | 231 | // If false, don't save a snapshot after training finishes. 232 | optional bool snapshot_after_train = 28 [default = true]; 233 | 234 | // DEPRECATED: old solver enum types, use string instead 235 | enum SolverType { 236 | SGD = 0; 237 | NESTEROV = 1; 238 | ADAGRAD = 2; 239 | RMSPROP = 3; 240 | ADADELTA = 4; 241 | ADAM = 5; 242 | } 243 | // DEPRECATED: use type instead of solver_type 244 | optional SolverType solver_type = 30 [default = SGD]; 245 | 246 | // Overlap compute and communication for data parallel training 247 | optional bool layer_wise_reduce = 41 [default = true]; 248 | 249 | // Path to caffemodel file(s) with pretrained weights to initialize finetuning. 250 | // Tha same as command line --weights parameter for caffe train command. 251 | // If command line --weights parameter is specified, it has higher priority 252 | // and overwrites this one(s). 253 | // If --snapshot command line parameter is specified, this one(s) are ignored. 254 | // If several model files are expected, they can be listed in a one 255 | // weights parameter separated by ',' (like in a command string) or 256 | // in repeated weights parameters separately. 257 | repeated string weights = 42; 258 | } 259 | 260 | // A message that stores the solver snapshots 261 | message SolverState { 262 | optional int32 iter = 1; // The current iteration 263 | optional string learned_net = 2; // The file that stores the learned net. 264 | repeated BlobProto history = 3; // The history for sgd solvers 265 | optional int32 current_step = 4 [default = 0]; // The current step for learning rate 266 | } 267 | 268 | enum Phase { 269 | TRAIN = 0; 270 | TEST = 1; 271 | } 272 | 273 | message NetState { 274 | optional Phase phase = 1 [default = TEST]; 275 | optional int32 level = 2 [default = 0]; 276 | repeated string stage = 3; 277 | } 278 | 279 | message NetStateRule { 280 | // Set phase to require the NetState have a particular phase (TRAIN or TEST) 281 | // to meet this rule. 282 | optional Phase phase = 1; 283 | 284 | // Set the minimum and/or maximum levels in which the layer should be used. 285 | // Leave undefined to meet the rule regardless of level. 286 | optional int32 min_level = 2; 287 | optional int32 max_level = 3; 288 | 289 | // Customizable sets of stages to include or exclude. 290 | // The net must have ALL of the specified stages and NONE of the specified 291 | // "not_stage"s to meet the rule. 292 | // (Use multiple NetStateRules to specify conjunctions of stages.) 293 | repeated string stage = 4; 294 | repeated string not_stage = 5; 295 | } 296 | 297 | // Specifies training parameters (multipliers on global learning constants, 298 | // and the name and other settings used for weight sharing). 299 | message ParamSpec { 300 | // The names of the parameter blobs -- useful for sharing parameters among 301 | // layers, but never required otherwise. To share a parameter between two 302 | // layers, give it a (non-empty) name. 303 | optional string name = 1; 304 | 305 | // Whether to require shared weights to have the same shape, or just the same 306 | // count -- defaults to STRICT if unspecified. 307 | optional DimCheckMode share_mode = 2; 308 | enum DimCheckMode { 309 | // STRICT (default) requires that num, channels, height, width each match. 310 | STRICT = 0; 311 | // PERMISSIVE requires only the count (num*channels*height*width) to match. 312 | PERMISSIVE = 1; 313 | } 314 | 315 | // The multiplier on the global learning rate for this parameter. 316 | optional float lr_mult = 3 [default = 1.0]; 317 | 318 | // The multiplier on the global weight decay for this parameter. 319 | optional float decay_mult = 4 [default = 1.0]; 320 | } 321 | 322 | // NOTE 323 | // Update the next available ID when you add a new LayerParameter field. 324 | // 325 | // LayerParameter next available layer-specific ID: 149 (last added: clip_param) 326 | message LayerParameter { 327 | optional string name = 1; // the layer name 328 | optional string type = 2; // the layer type 329 | repeated string bottom = 3; // the name of each bottom blob 330 | repeated string top = 4; // the name of each top blob 331 | 332 | // The train / test phase for computation. 333 | optional Phase phase = 10; 334 | 335 | // The amount of weight to assign each top blob in the objective. 336 | // Each layer assigns a default value, usually of either 0 or 1, 337 | // to each top blob. 338 | repeated float loss_weight = 5; 339 | 340 | // Specifies training parameters (multipliers on global learning constants, 341 | // and the name and other settings used for weight sharing). 342 | repeated ParamSpec param = 6; 343 | 344 | // The blobs containing the numeric parameters of the layer. 345 | repeated BlobProto blobs = 7; 346 | 347 | // Specifies whether to backpropagate to each bottom. If unspecified, 348 | // Caffe will automatically infer whether each input needs backpropagation 349 | // to compute parameter gradients. If set to true for some inputs, 350 | // backpropagation to those inputs is forced; if set false for some inputs, 351 | // backpropagation to those inputs is skipped. 352 | // 353 | // The size must be either 0 or equal to the number of bottoms. 354 | repeated bool propagate_down = 11; 355 | 356 | // Rules controlling whether and when a layer is included in the network, 357 | // based on the current NetState. You may specify a non-zero number of rules 358 | // to include OR exclude, but not both. If no include or exclude rules are 359 | // specified, the layer is always included. If the current NetState meets 360 | // ANY (i.e., one or more) of the specified rules, the layer is 361 | // included/excluded. 362 | repeated NetStateRule include = 8; 363 | repeated NetStateRule exclude = 9; 364 | 365 | // Parameters for data pre-processing. 366 | optional TransformationParameter transform_param = 100; 367 | 368 | // Parameters shared by loss layers. 369 | optional LossParameter loss_param = 101; 370 | 371 | // Layer type-specific parameters. 372 | // 373 | // Note: certain layers may have more than one computational engine 374 | // for their implementation. These layers include an Engine type and 375 | // engine parameter for selecting the implementation. 376 | // The default for the engine is set by the ENGINE switch at compile-time. 377 | optional AccuracyParameter accuracy_param = 102; 378 | optional ArgMaxParameter argmax_param = 103; 379 | optional BatchNormParameter batch_norm_param = 139; 380 | optional BiasParameter bias_param = 141; 381 | optional ClipParameter clip_param = 148; 382 | optional ConcatParameter concat_param = 104; 383 | optional ContrastiveLossParameter contrastive_loss_param = 105; 384 | optional ConvolutionParameter convolution_param = 106; 385 | optional UpsampleParameter upsample_param = 149; 386 | optional CropParameter crop_param = 144; 387 | optional DataParameter data_param = 107; 388 | optional DropoutParameter dropout_param = 108; 389 | optional DummyDataParameter dummy_data_param = 109; 390 | optional EltwiseParameter eltwise_param = 110; 391 | optional ELUParameter elu_param = 140; 392 | optional EmbedParameter embed_param = 137; 393 | optional ExpParameter exp_param = 111; 394 | optional FlattenParameter flatten_param = 135; 395 | optional HDF5DataParameter hdf5_data_param = 112; 396 | optional HDF5OutputParameter hdf5_output_param = 113; 397 | optional HingeLossParameter hinge_loss_param = 114; 398 | optional ImageDataParameter image_data_param = 115; 399 | optional InfogainLossParameter infogain_loss_param = 116; 400 | optional InnerProductParameter inner_product_param = 117; 401 | optional InputParameter input_param = 143; 402 | optional LogParameter log_param = 134; 403 | optional LRNParameter lrn_param = 118; 404 | optional MemoryDataParameter memory_data_param = 119; 405 | optional MVNParameter mvn_param = 120; 406 | optional ParameterParameter parameter_param = 145; 407 | optional PoolingParameter pooling_param = 121; 408 | optional PowerParameter power_param = 122; 409 | optional PReLUParameter prelu_param = 131; 410 | optional PythonParameter python_param = 130; 411 | optional RecurrentParameter recurrent_param = 146; 412 | optional ReductionParameter reduction_param = 136; 413 | optional ReLUParameter relu_param = 123; 414 | optional ReshapeParameter reshape_param = 133; 415 | optional ScaleParameter scale_param = 142; 416 | optional SigmoidParameter sigmoid_param = 124; 417 | optional SoftmaxParameter softmax_param = 125; 418 | optional SPPParameter spp_param = 132; 419 | optional SliceParameter slice_param = 126; 420 | optional SwishParameter swish_param = 147; 421 | optional TanHParameter tanh_param = 127; 422 | optional ThresholdParameter threshold_param = 128; 423 | optional TileParameter tile_param = 138; 424 | optional WindowDataParameter window_data_param = 129; 425 | optional InterpParameter interp_param = 166; 426 | optional ShuffleChannelParameter shuffle_channel_param = 164; 427 | optional PermuteParameter permute_param = 202; 428 | optional PriorBoxParameter prior_box_param = 203; 429 | optional DetectionOutputParameter detection_output_param = 204; 430 | optional DetectionEvaluateParameter detection_evaluate_param = 205; 431 | optional NormalizeParameter norm_param = 206; 432 | optional AxpyParameter axpy_param = 151; 433 | optional ReLU6Parameter relu6_param = 100000; 434 | } 435 | 436 | 437 | message ShuffleChannelParameter { 438 | optional uint32 group = 1[default = 1]; // The number of group 439 | } 440 | 441 | // Message that stores parameters used to apply transformation 442 | // to the data layer's data 443 | message TransformationParameter { 444 | // For data pre-processing, we can do simple scaling and subtracting the 445 | // data mean, if provided. Note that the mean subtraction is always carried 446 | // out before scaling. 447 | optional float scale = 1 [default = 1]; 448 | // Specify if we want to randomly mirror data. 449 | optional bool mirror = 2 [default = false]; 450 | // Specify if we would like to randomly crop an image. 451 | optional uint32 crop_size = 3 [default = 0]; 452 | // mean_file and mean_value cannot be specified at the same time 453 | optional string mean_file = 4; 454 | // if specified can be repeated once (would subtract it from all the channels) 455 | // or can be repeated the same number of times as channels 456 | // (would subtract them from the corresponding channel) 457 | repeated float mean_value = 5; 458 | // Force the decoded image to have 3 color channels. 459 | optional bool force_color = 6 [default = false]; 460 | // Force the decoded image to have 1 color channels. 461 | optional bool force_gray = 7 [default = false]; 462 | } 463 | 464 | // Message that stores parameters shared by loss layers 465 | message LossParameter { 466 | // If specified, ignore instances with the given label. 467 | optional int32 ignore_label = 1; 468 | // How to normalize the loss for loss layers that aggregate across batches, 469 | // spatial dimensions, or other dimensions. Currently only implemented in 470 | // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. 471 | enum NormalizationMode { 472 | // Divide by the number of examples in the batch times spatial dimensions. 473 | // Outputs that receive the ignore label will NOT be ignored in computing 474 | // the normalization factor. 475 | FULL = 0; 476 | // Divide by the total number of output locations that do not take the 477 | // ignore_label. If ignore_label is not set, this behaves like FULL. 478 | VALID = 1; 479 | // Divide by the batch size. 480 | BATCH_SIZE = 2; 481 | // Do not normalize the loss. 482 | NONE = 3; 483 | } 484 | // For historical reasons, the default normalization for 485 | // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. 486 | optional NormalizationMode normalization = 3 [default = VALID]; 487 | // Deprecated. Ignored if normalization is specified. If normalization 488 | // is not specified, then setting this to false will be equivalent to 489 | // normalization = BATCH_SIZE to be consistent with previous behavior. 490 | optional bool normalize = 2; 491 | } 492 | 493 | // Messages that store parameters used by individual layer types follow, in 494 | // alphabetical order. 495 | 496 | message AccuracyParameter { 497 | // When computing accuracy, count as correct by comparing the true label to 498 | // the top k scoring classes. By default, only compare to the top scoring 499 | // class (i.e. argmax). 500 | optional uint32 top_k = 1 [default = 1]; 501 | 502 | // The "label" axis of the prediction blob, whose argmax corresponds to the 503 | // predicted label -- may be negative to index from the end (e.g., -1 for the 504 | // last axis). For example, if axis == 1 and the predictions are 505 | // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 506 | // labels with integer values in {0, 1, ..., C-1}. 507 | optional int32 axis = 2 [default = 1]; 508 | 509 | // If specified, ignore instances with the given label. 510 | optional int32 ignore_label = 3; 511 | } 512 | 513 | message ArgMaxParameter { 514 | // If true produce pairs (argmax, maxval) 515 | optional bool out_max_val = 1 [default = false]; 516 | optional uint32 top_k = 2 [default = 1]; 517 | // The axis along which to maximise -- may be negative to index from the 518 | // end (e.g., -1 for the last axis). 519 | // By default ArgMaxLayer maximizes over the flattened trailing dimensions 520 | // for each index of the first / num dimension. 521 | optional int32 axis = 3; 522 | } 523 | 524 | // Message that stores parameters used by ClipLayer 525 | message ClipParameter { 526 | required float min = 1; 527 | required float max = 2; 528 | } 529 | 530 | message ConcatParameter { 531 | // The axis along which to concatenate -- may be negative to index from the 532 | // end (e.g., -1 for the last axis). Other axes must have the 533 | // same dimension for all the bottom blobs. 534 | // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 535 | optional int32 axis = 2 [default = 1]; 536 | 537 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 538 | optional uint32 concat_dim = 1 [default = 1]; 539 | } 540 | 541 | message BatchNormParameter { 542 | // If false, normalization is performed over the current mini-batch 543 | // and global statistics are accumulated (but not yet used) by a moving 544 | // average. 545 | // If true, those accumulated mean and variance values are used for the 546 | // normalization. 547 | // By default, it is set to false when the network is in the training 548 | // phase and true when the network is in the testing phase. 549 | optional bool use_global_stats = 1; 550 | // What fraction of the moving average remains each iteration? 551 | // Smaller values make the moving average decay faster, giving more 552 | // weight to the recent values. 553 | // Each iteration updates the moving average @f$S_{t-1}@f$ with the 554 | // current mean @f$ Y_t @f$ by 555 | // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ 556 | // is the moving_average_fraction parameter. 557 | optional float moving_average_fraction = 2 [default = .999]; 558 | // Small value to add to the variance estimate so that we don't divide by 559 | // zero. 560 | optional float eps = 3 [default = 1e-5]; 561 | } 562 | 563 | message BiasParameter { 564 | // The first axis of bottom[0] (the first input Blob) along which to apply 565 | // bottom[1] (the second input Blob). May be negative to index from the end 566 | // (e.g., -1 for the last axis). 567 | // 568 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 569 | // top[0] will have the same shape, and bottom[1] may have any of the 570 | // following shapes (for the given value of axis): 571 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 572 | // (axis == 1 == -3) 3; 3x40; 3x40x60 573 | // (axis == 2 == -2) 40; 40x60 574 | // (axis == 3 == -1) 60 575 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 576 | // "axis") -- a scalar bias. 577 | optional int32 axis = 1 [default = 1]; 578 | 579 | // (num_axes is ignored unless just one bottom is given and the bias is 580 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 581 | // number of axes by the second bottom.) 582 | // The number of axes of the input (bottom[0]) covered by the bias 583 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 584 | // Set num_axes := 0, to add a zero-axis Blob: a scalar. 585 | optional int32 num_axes = 2 [default = 1]; 586 | 587 | // (filler is ignored unless just one bottom is given and the bias is 588 | // a learned parameter of the layer.) 589 | // The initialization for the learned bias parameter. 590 | // Default is the zero (0) initialization, resulting in the BiasLayer 591 | // initially performing the identity operation. 592 | optional FillerParameter filler = 3; 593 | } 594 | 595 | message ContrastiveLossParameter { 596 | // margin for dissimilar pair 597 | optional float margin = 1 [default = 1.0]; 598 | // The first implementation of this cost did not exactly match the cost of 599 | // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 600 | // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 601 | // Hadsell paper. New models should probably use this version. 602 | // legacy_version = true uses (margin - d^2). This is kept to support / 603 | // reproduce existing models and results 604 | optional bool legacy_version = 2 [default = false]; 605 | } 606 | 607 | message UpsampleParameter{ 608 | 609 | optional float scale = 1 [default = 0]; 610 | 611 | } 612 | 613 | message ConvolutionParameter { 614 | optional uint32 num_output = 1; // The number of outputs for the layer 615 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 616 | 617 | // Pad, kernel size, and stride are all given as a single value for equal 618 | // dimensions in all spatial dimensions, or once per spatial dimension. 619 | repeated uint32 pad = 3; // The padding size; defaults to 0 620 | repeated uint32 kernel_size = 4; // The kernel size 621 | repeated uint32 stride = 6; // The stride; defaults to 1 622 | // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 623 | // holes. (Kernel dilation is sometimes referred to by its use in the 624 | // algorithme à trous from Holschneider et al. 1987.) 625 | repeated uint32 dilation = 18; // The dilation; defaults to 1 626 | 627 | // For 2D convolution only, the *_h and *_w versions may also be used to 628 | // specify both spatial dimensions. 629 | optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 630 | optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 631 | optional uint32 kernel_h = 11; // The kernel height (2D only) 632 | optional uint32 kernel_w = 12; // The kernel width (2D only) 633 | optional uint32 stride_h = 13; // The stride height (2D only) 634 | optional uint32 stride_w = 14; // The stride width (2D only) 635 | 636 | optional uint32 group = 5 [default = 1]; // The group size for group conv 637 | 638 | optional FillerParameter weight_filler = 7; // The filler for the weight 639 | optional FillerParameter bias_filler = 8; // The filler for the bias 640 | enum Engine { 641 | DEFAULT = 0; 642 | CAFFE = 1; 643 | CUDNN = 2; 644 | } 645 | optional Engine engine = 15 [default = DEFAULT]; 646 | 647 | // The axis to interpret as "channels" when performing convolution. 648 | // Preceding dimensions are treated as independent inputs; 649 | // succeeding dimensions are treated as "spatial". 650 | // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 651 | // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 652 | // groups g>1) filters across the spatial axes (H, W) of the input. 653 | // With (N, C, D, H, W) inputs, and axis == 1, we perform 654 | // N independent 3D convolutions, sliding (C/g)-channels 655 | // filters across the spatial axes (D, H, W) of the input. 656 | optional int32 axis = 16 [default = 1]; 657 | 658 | // Whether to force use of the general ND convolution, even if a specific 659 | // implementation for blobs of the appropriate number of spatial dimensions 660 | // is available. (Currently, there is only a 2D-specific convolution 661 | // implementation; for input blobs with num_axes != 2, this option is 662 | // ignored and the ND implementation will be used.) 663 | optional bool force_nd_im2col = 17 [default = false]; 664 | } 665 | 666 | message CropParameter { 667 | // To crop, elements of the first bottom are selected to fit the dimensions 668 | // of the second, reference bottom. The crop is configured by 669 | // - the crop `axis` to pick the dimensions for cropping 670 | // - the crop `offset` to set the shift for all/each dimension 671 | // to align the cropped bottom with the reference bottom. 672 | // All dimensions up to but excluding `axis` are preserved, while 673 | // the dimensions including and trailing `axis` are cropped. 674 | // If only one `offset` is set, then all dimensions are offset by this amount. 675 | // Otherwise, the number of offsets must equal the number of cropped axes to 676 | // shift the crop in each dimension accordingly. 677 | // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 678 | // and `axis` may be negative to index from the end (e.g., -1 for the last 679 | // axis). 680 | optional int32 axis = 1 [default = 2]; 681 | repeated uint32 offset = 2; 682 | } 683 | 684 | message DataParameter { 685 | enum DB { 686 | LEVELDB = 0; 687 | LMDB = 1; 688 | } 689 | // Specify the data source. 690 | optional string source = 1; 691 | // Specify the batch size. 692 | optional uint32 batch_size = 4; 693 | // The rand_skip variable is for the data layer to skip a few data points 694 | // to avoid all asynchronous sgd clients to start at the same point. The skip 695 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 696 | // be larger than the number of keys in the database. 697 | // DEPRECATED. Each solver accesses a different subset of the database. 698 | optional uint32 rand_skip = 7 [default = 0]; 699 | optional DB backend = 8 [default = LEVELDB]; 700 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 701 | // simple scaling and subtracting the data mean, if provided. Note that the 702 | // mean subtraction is always carried out before scaling. 703 | optional float scale = 2 [default = 1]; 704 | optional string mean_file = 3; 705 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 706 | // crop an image. 707 | optional uint32 crop_size = 5 [default = 0]; 708 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 709 | // data. 710 | optional bool mirror = 6 [default = false]; 711 | // Force the encoded image to have 3 color channels 712 | optional bool force_encoded_color = 9 [default = false]; 713 | // Prefetch queue (Increase if data feeding bandwidth varies, within the 714 | // limit of device memory for GPU training) 715 | optional uint32 prefetch = 10 [default = 4]; 716 | } 717 | 718 | message DropoutParameter { 719 | optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 720 | } 721 | 722 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random 723 | // (or constant) data generated by "Fillers" (see "message FillerParameter"). 724 | message DummyDataParameter { 725 | // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 726 | // shape fields, and 0, 1 or N data_fillers. 727 | // 728 | // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 729 | // If 1 data_filler is specified, it is applied to all top blobs. If N are 730 | // specified, the ith is applied to the ith top blob. 731 | repeated FillerParameter data_filler = 1; 732 | repeated BlobShape shape = 6; 733 | 734 | // 4D dimensions -- deprecated. Use "shape" instead. 735 | repeated uint32 num = 2; 736 | repeated uint32 channels = 3; 737 | repeated uint32 height = 4; 738 | repeated uint32 width = 5; 739 | } 740 | 741 | message EltwiseParameter { 742 | enum EltwiseOp { 743 | PROD = 0; 744 | SUM = 1; 745 | MAX = 2; 746 | } 747 | optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 748 | repeated float coeff = 2; // blob-wise coefficient for SUM operation 749 | 750 | // Whether to use an asymptotically slower (for >2 inputs) but stabler method 751 | // of computing the gradient for the PROD operation. (No effect for SUM op.) 752 | optional bool stable_prod_grad = 3 [default = true]; 753 | } 754 | 755 | // Message that stores parameters used by ELULayer 756 | message ELUParameter { 757 | // Described in: 758 | // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 759 | // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 760 | optional float alpha = 1 [default = 1]; 761 | } 762 | 763 | // Message that stores parameters used by EmbedLayer 764 | message EmbedParameter { 765 | optional uint32 num_output = 1; // The number of outputs for the layer 766 | // The input is given as integers to be interpreted as one-hot 767 | // vector indices with dimension num_input. Hence num_input should be 768 | // 1 greater than the maximum possible input value. 769 | optional uint32 input_dim = 2; 770 | 771 | optional bool bias_term = 3 [default = true]; // Whether to use a bias term 772 | optional FillerParameter weight_filler = 4; // The filler for the weight 773 | optional FillerParameter bias_filler = 5; // The filler for the bias 774 | 775 | } 776 | 777 | // Message that stores parameters used by ExpLayer 778 | message ExpParameter { 779 | // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 780 | // Or if base is set to the default (-1), base is set to e, 781 | // so y = exp(shift + scale * x). 782 | optional float base = 1 [default = -1.0]; 783 | optional float scale = 2 [default = 1.0]; 784 | optional float shift = 3 [default = 0.0]; 785 | } 786 | 787 | /// Message that stores parameters used by FlattenLayer 788 | message FlattenParameter { 789 | // The first axis to flatten: all preceding axes are retained in the output. 790 | // May be negative to index from the end (e.g., -1 for the last axis). 791 | optional int32 axis = 1 [default = 1]; 792 | 793 | // The last axis to flatten: all following axes are retained in the output. 794 | // May be negative to index from the end (e.g., the default -1 for the last 795 | // axis). 796 | optional int32 end_axis = 2 [default = -1]; 797 | } 798 | 799 | // Message that stores parameters used by HDF5DataLayer 800 | message HDF5DataParameter { 801 | // Specify the data source. 802 | optional string source = 1; 803 | // Specify the batch size. 804 | optional uint32 batch_size = 2; 805 | 806 | // Specify whether to shuffle the data. 807 | // If shuffle == true, the ordering of the HDF5 files is shuffled, 808 | // and the ordering of data within any given HDF5 file is shuffled, 809 | // but data between different files are not interleaved; all of a file's 810 | // data are output (in a random order) before moving onto another file. 811 | optional bool shuffle = 3 [default = false]; 812 | } 813 | 814 | message HDF5OutputParameter { 815 | optional string file_name = 1; 816 | } 817 | 818 | message HingeLossParameter { 819 | enum Norm { 820 | L1 = 1; 821 | L2 = 2; 822 | } 823 | // Specify the Norm to use L1 or L2 824 | optional Norm norm = 1 [default = L1]; 825 | } 826 | 827 | message ImageDataParameter { 828 | // Specify the data source. 829 | optional string source = 1; 830 | // Specify the batch size. 831 | optional uint32 batch_size = 4 [default = 1]; 832 | // The rand_skip variable is for the data layer to skip a few data points 833 | // to avoid all asynchronous sgd clients to start at the same point. The skip 834 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 835 | // be larger than the number of keys in the database. 836 | optional uint32 rand_skip = 7 [default = 0]; 837 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 838 | optional bool shuffle = 8 [default = false]; 839 | // It will also resize images if new_height or new_width are not zero. 840 | optional uint32 new_height = 9 [default = 0]; 841 | optional uint32 new_width = 10 [default = 0]; 842 | // Specify if the images are color or gray 843 | optional bool is_color = 11 [default = true]; 844 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 845 | // simple scaling and subtracting the data mean, if provided. Note that the 846 | // mean subtraction is always carried out before scaling. 847 | optional float scale = 2 [default = 1]; 848 | optional string mean_file = 3; 849 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 850 | // crop an image. 851 | optional uint32 crop_size = 5 [default = 0]; 852 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 853 | // data. 854 | optional bool mirror = 6 [default = false]; 855 | optional string root_folder = 12 [default = ""]; 856 | } 857 | 858 | message InfogainLossParameter { 859 | // Specify the infogain matrix source. 860 | optional string source = 1; 861 | optional int32 axis = 2 [default = 1]; // axis of prob 862 | } 863 | 864 | message InnerProductParameter { 865 | optional uint32 num_output = 1; // The number of outputs for the layer 866 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 867 | optional FillerParameter weight_filler = 3; // The filler for the weight 868 | optional FillerParameter bias_filler = 4; // The filler for the bias 869 | 870 | // The first axis to be lumped into a single inner product computation; 871 | // all preceding axes are retained in the output. 872 | // May be negative to index from the end (e.g., -1 for the last axis). 873 | optional int32 axis = 5 [default = 1]; 874 | // Specify whether to transpose the weight matrix or not. 875 | // If transpose == true, any operations will be performed on the transpose 876 | // of the weight matrix. The weight matrix itself is not going to be transposed 877 | // but rather the transfer flag of operations will be toggled accordingly. 878 | optional bool transpose = 6 [default = false]; 879 | } 880 | 881 | message InputParameter { 882 | // This layer produces N >= 1 top blob(s) to be assigned manually. 883 | // Define N shapes to set a shape for each top. 884 | // Define 1 shape to set the same shape for every top. 885 | // Define no shape to defer to reshaping manually. 886 | repeated BlobShape shape = 1; 887 | } 888 | 889 | // Message that stores parameters used by LogLayer 890 | message LogParameter { 891 | // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 892 | // Or if base is set to the default (-1), base is set to e, 893 | // so y = ln(shift + scale * x) = log_e(shift + scale * x) 894 | optional float base = 1 [default = -1.0]; 895 | optional float scale = 2 [default = 1.0]; 896 | optional float shift = 3 [default = 0.0]; 897 | } 898 | 899 | // Message that stores parameters used by LRNLayer 900 | message LRNParameter { 901 | optional uint32 local_size = 1 [default = 5]; 902 | optional float alpha = 2 [default = 1.]; 903 | optional float beta = 3 [default = 0.75]; 904 | enum NormRegion { 905 | ACROSS_CHANNELS = 0; 906 | WITHIN_CHANNEL = 1; 907 | } 908 | optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 909 | optional float k = 5 [default = 1.]; 910 | enum Engine { 911 | DEFAULT = 0; 912 | CAFFE = 1; 913 | CUDNN = 2; 914 | } 915 | optional Engine engine = 6 [default = DEFAULT]; 916 | } 917 | 918 | message MemoryDataParameter { 919 | optional uint32 batch_size = 1; 920 | optional uint32 channels = 2; 921 | optional uint32 height = 3; 922 | optional uint32 width = 4; 923 | } 924 | 925 | message MVNParameter { 926 | // This parameter can be set to false to normalize mean only 927 | optional bool normalize_variance = 1 [default = true]; 928 | 929 | // This parameter can be set to true to perform DNN-like MVN 930 | optional bool across_channels = 2 [default = false]; 931 | 932 | // Epsilon for not dividing by zero while normalizing variance 933 | optional float eps = 3 [default = 1e-9]; 934 | } 935 | 936 | message ParameterParameter { 937 | optional BlobShape shape = 1; 938 | } 939 | 940 | message PoolingParameter { 941 | enum PoolMethod { 942 | MAX = 0; 943 | AVE = 1; 944 | STOCHASTIC = 2; 945 | } 946 | optional PoolMethod pool = 1 [default = MAX]; // The pooling method 947 | // Pad, kernel size, and stride are all given as a single value for equal 948 | // dimensions in height and width or as Y, X pairs. 949 | optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 950 | optional uint32 pad_h = 9 [default = 0]; // The padding height 951 | optional uint32 pad_w = 10 [default = 0]; // The padding width 952 | optional uint32 kernel_size = 2; // The kernel size (square) 953 | optional uint32 kernel_h = 5; // The kernel height 954 | optional uint32 kernel_w = 6; // The kernel width 955 | optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 956 | optional uint32 stride_h = 7; // The stride height 957 | optional uint32 stride_w = 8; // The stride width 958 | enum Engine { 959 | DEFAULT = 0; 960 | CAFFE = 1; 961 | CUDNN = 2; 962 | } 963 | optional Engine engine = 11 [default = DEFAULT]; 964 | // If global_pooling then it will pool over the size of the bottom by doing 965 | // kernel_h = bottom->height and kernel_w = bottom->width 966 | optional bool global_pooling = 12 [default = false]; 967 | // How to calculate the output size - using ceil (default) or floor rounding. 968 | enum RoundMode { 969 | CEIL = 0; 970 | FLOOR = 1; 971 | } 972 | optional RoundMode round_mode = 13 [default = CEIL]; 973 | } 974 | 975 | message PowerParameter { 976 | // PowerLayer computes outputs y = (shift + scale * x) ^ power. 977 | optional float power = 1 [default = 1.0]; 978 | optional float scale = 2 [default = 1.0]; 979 | optional float shift = 3 [default = 0.0]; 980 | } 981 | 982 | message PythonParameter { 983 | optional string module = 1; 984 | optional string layer = 2; 985 | // This value is set to the attribute `param_str` of the `PythonLayer` object 986 | // in Python before calling the `setup()` method. This could be a number, 987 | // string, dictionary in Python dict format, JSON, etc. You may parse this 988 | // string in `setup` method and use it in `forward` and `backward`. 989 | optional string param_str = 3 [default = '']; 990 | // DEPRECATED 991 | optional bool share_in_parallel = 4 [default = false]; 992 | } 993 | 994 | // Message that stores parameters used by RecurrentLayer 995 | message RecurrentParameter { 996 | // The dimension of the output (and usually hidden state) representation -- 997 | // must be explicitly set to non-zero. 998 | optional uint32 num_output = 1 [default = 0]; 999 | 1000 | optional FillerParameter weight_filler = 2; // The filler for the weight 1001 | optional FillerParameter bias_filler = 3; // The filler for the bias 1002 | 1003 | // Whether to enable displaying debug_info in the unrolled recurrent net. 1004 | optional bool debug_info = 4 [default = false]; 1005 | 1006 | // Whether to add as additional inputs (bottoms) the initial hidden state 1007 | // blobs, and add as additional outputs (tops) the final timestep hidden state 1008 | // blobs. The number of additional bottom/top blobs required depends on the 1009 | // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 1010 | optional bool expose_hidden = 5 [default = false]; 1011 | } 1012 | 1013 | // Message that stores parameters used by ReductionLayer 1014 | message ReductionParameter { 1015 | enum ReductionOp { 1016 | SUM = 1; 1017 | ASUM = 2; 1018 | SUMSQ = 3; 1019 | MEAN = 4; 1020 | } 1021 | 1022 | optional ReductionOp operation = 1 [default = SUM]; // reduction operation 1023 | 1024 | // The first axis to reduce to a scalar -- may be negative to index from the 1025 | // end (e.g., -1 for the last axis). 1026 | // (Currently, only reduction along ALL "tail" axes is supported; reduction 1027 | // of axis M through N, where N < num_axes - 1, is unsupported.) 1028 | // Suppose we have an n-axis bottom Blob with shape: 1029 | // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 1030 | // If axis == m, the output Blob will have shape 1031 | // (d0, d1, d2, ..., d(m-1)), 1032 | // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 1033 | // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 1034 | // If axis == 0 (the default), the output Blob always has the empty shape 1035 | // (count 1), performing reduction across the entire input -- 1036 | // often useful for creating new loss functions. 1037 | optional int32 axis = 2 [default = 0]; 1038 | 1039 | optional float coeff = 3 [default = 1.0]; // coefficient for output 1040 | } 1041 | 1042 | // Message that stores parameters used by ReLULayer 1043 | message ReLUParameter { 1044 | // Allow non-zero slope for negative inputs to speed up optimization 1045 | // Described in: 1046 | // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 1047 | // improve neural network acoustic models. In ICML Workshop on Deep Learning 1048 | // for Audio, Speech, and Language Processing. 1049 | optional float negative_slope = 1 [default = 0]; 1050 | enum Engine { 1051 | DEFAULT = 0; 1052 | CAFFE = 1; 1053 | CUDNN = 2; 1054 | } 1055 | optional Engine engine = 2 [default = DEFAULT]; 1056 | } 1057 | 1058 | message ReshapeParameter { 1059 | // Specify the output dimensions. If some of the dimensions are set to 0, 1060 | // the corresponding dimension from the bottom layer is used (unchanged). 1061 | // Exactly one dimension may be set to -1, in which case its value is 1062 | // inferred from the count of the bottom blob and the remaining dimensions. 1063 | // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1064 | // 1065 | // layer { 1066 | // type: "Reshape" bottom: "input" top: "output" 1067 | // reshape_param { ... } 1068 | // } 1069 | // 1070 | // If "input" is 2D with shape 2 x 8, then the following reshape_param 1071 | // specifications are all equivalent, producing a 3D blob "output" with shape 1072 | // 2 x 2 x 4: 1073 | // 1074 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1075 | // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1076 | // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1077 | // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1078 | // 1079 | optional BlobShape shape = 1; 1080 | 1081 | // axis and num_axes control the portion of the bottom blob's shape that are 1082 | // replaced by (included in) the reshape. By default (axis == 0 and 1083 | // num_axes == -1), the entire bottom blob shape is included in the reshape, 1084 | // and hence the shape field must specify the entire output shape. 1085 | // 1086 | // axis may be non-zero to retain some portion of the beginning of the input 1087 | // shape (and may be negative to index from the end; e.g., -1 to begin the 1088 | // reshape after the last axis, including nothing in the reshape, 1089 | // -2 to include only the last axis, etc.). 1090 | // 1091 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1092 | // Then the following ReshapeLayer specifications are all equivalent, 1093 | // producing a blob "output" with shape 2 x 2 x 4: 1094 | // 1095 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1096 | // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1097 | // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1098 | // 1099 | // num_axes specifies the extent of the reshape. 1100 | // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1101 | // input axes in the range [axis, axis+num_axes]. 1102 | // num_axes may also be -1, the default, to include all remaining axes 1103 | // (starting from axis). 1104 | // 1105 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1106 | // Then the following ReshapeLayer specifications are equivalent, 1107 | // producing a blob "output" with shape 1 x 2 x 8. 1108 | // 1109 | // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1110 | // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1111 | // reshape_param { shape { dim: 1 } num_axes: 0 } 1112 | // 1113 | // On the other hand, these would produce output blob shape 2 x 1 x 8: 1114 | // 1115 | // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1116 | // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1117 | // 1118 | optional int32 axis = 2 [default = 0]; 1119 | optional int32 num_axes = 3 [default = -1]; 1120 | } 1121 | 1122 | message ScaleParameter { 1123 | // The first axis of bottom[0] (the first input Blob) along which to apply 1124 | // bottom[1] (the second input Blob). May be negative to index from the end 1125 | // (e.g., -1 for the last axis). 1126 | // 1127 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1128 | // top[0] will have the same shape, and bottom[1] may have any of the 1129 | // following shapes (for the given value of axis): 1130 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1131 | // (axis == 1 == -3) 3; 3x40; 3x40x60 1132 | // (axis == 2 == -2) 40; 40x60 1133 | // (axis == 3 == -1) 60 1134 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1135 | // "axis") -- a scalar multiplier. 1136 | optional int32 axis = 1 [default = 1]; 1137 | 1138 | // (num_axes is ignored unless just one bottom is given and the scale is 1139 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 1140 | // number of axes by the second bottom.) 1141 | // The number of axes of the input (bottom[0]) covered by the scale 1142 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1143 | // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1144 | optional int32 num_axes = 2 [default = 1]; 1145 | 1146 | // (filler is ignored unless just one bottom is given and the scale is 1147 | // a learned parameter of the layer.) 1148 | // The initialization for the learned scale parameter. 1149 | // Default is the unit (1) initialization, resulting in the ScaleLayer 1150 | // initially performing the identity operation. 1151 | optional FillerParameter filler = 3; 1152 | 1153 | // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1154 | // may be more efficient). Initialized with bias_filler (defaults to 0). 1155 | optional bool bias_term = 4 [default = false]; 1156 | optional FillerParameter bias_filler = 5; 1157 | } 1158 | 1159 | message SigmoidParameter { 1160 | enum Engine { 1161 | DEFAULT = 0; 1162 | CAFFE = 1; 1163 | CUDNN = 2; 1164 | } 1165 | optional Engine engine = 1 [default = DEFAULT]; 1166 | } 1167 | 1168 | message SliceParameter { 1169 | // The axis along which to slice -- may be negative to index from the end 1170 | // (e.g., -1 for the last axis). 1171 | // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1172 | optional int32 axis = 3 [default = 1]; 1173 | repeated uint32 slice_point = 2; 1174 | 1175 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 1176 | optional uint32 slice_dim = 1 [default = 1]; 1177 | } 1178 | 1179 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1180 | message SoftmaxParameter { 1181 | enum Engine { 1182 | DEFAULT = 0; 1183 | CAFFE = 1; 1184 | CUDNN = 2; 1185 | } 1186 | optional Engine engine = 1 [default = DEFAULT]; 1187 | 1188 | // The axis along which to perform the softmax -- may be negative to index 1189 | // from the end (e.g., -1 for the last axis). 1190 | // Any other axes will be evaluated as independent softmaxes. 1191 | optional int32 axis = 2 [default = 1]; 1192 | } 1193 | 1194 | // Message that stores parameters used by SwishLayer 1195 | message SwishParameter { 1196 | // Beta parameter for the Swish activation function 1197 | // Described in: 1198 | // Prajit Ramachandran, Barret Zoph, Quoc V. Le. (2017). Searching for 1199 | // Activation Functions. https://arxiv.org/abs/1710.05941v2 1200 | optional float beta = 1 [default = 1]; 1201 | } 1202 | 1203 | message TanHParameter { 1204 | enum Engine { 1205 | DEFAULT = 0; 1206 | CAFFE = 1; 1207 | CUDNN = 2; 1208 | } 1209 | optional Engine engine = 1 [default = DEFAULT]; 1210 | } 1211 | 1212 | // Message that stores parameters used by TileLayer 1213 | message TileParameter { 1214 | // The index of the axis to tile. 1215 | optional int32 axis = 1 [default = 1]; 1216 | 1217 | // The number of copies (tiles) of the blob to output. 1218 | optional int32 tiles = 2; 1219 | } 1220 | 1221 | // Message that stores parameters used by ThresholdLayer 1222 | message ThresholdParameter { 1223 | optional float threshold = 1 [default = 0]; // Strictly positive values 1224 | } 1225 | 1226 | message WindowDataParameter { 1227 | // Specify the data source. 1228 | optional string source = 1; 1229 | // For data pre-processing, we can do simple scaling and subtracting the 1230 | // data mean, if provided. Note that the mean subtraction is always carried 1231 | // out before scaling. 1232 | optional float scale = 2 [default = 1]; 1233 | optional string mean_file = 3; 1234 | // Specify the batch size. 1235 | optional uint32 batch_size = 4; 1236 | // Specify if we would like to randomly crop an image. 1237 | optional uint32 crop_size = 5 [default = 0]; 1238 | // Specify if we want to randomly mirror data. 1239 | optional bool mirror = 6 [default = false]; 1240 | // Foreground (object) overlap threshold 1241 | optional float fg_threshold = 7 [default = 0.5]; 1242 | // Background (non-object) overlap threshold 1243 | optional float bg_threshold = 8 [default = 0.5]; 1244 | // Fraction of batch that should be foreground objects 1245 | optional float fg_fraction = 9 [default = 0.25]; 1246 | // Amount of contextual padding to add around a window 1247 | // (used only by the window_data_layer) 1248 | optional uint32 context_pad = 10 [default = 0]; 1249 | // Mode for cropping out a detection window 1250 | // warp: cropped window is warped to a fixed size and aspect ratio 1251 | // square: the tightest square around the window is cropped 1252 | optional string crop_mode = 11 [default = "warp"]; 1253 | // cache_images: will load all images in memory for faster access 1254 | optional bool cache_images = 12 [default = false]; 1255 | // append root_folder to locate images 1256 | optional string root_folder = 13 [default = ""]; 1257 | } 1258 | 1259 | message SPPParameter { 1260 | enum PoolMethod { 1261 | MAX = 0; 1262 | AVE = 1; 1263 | STOCHASTIC = 2; 1264 | } 1265 | optional uint32 pyramid_height = 1; 1266 | optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1267 | enum Engine { 1268 | DEFAULT = 0; 1269 | CAFFE = 1; 1270 | CUDNN = 2; 1271 | } 1272 | optional Engine engine = 6 [default = DEFAULT]; 1273 | } 1274 | 1275 | // DEPRECATED: use LayerParameter. 1276 | message V1LayerParameter { 1277 | repeated string bottom = 2; 1278 | repeated string top = 3; 1279 | optional string name = 4; 1280 | repeated NetStateRule include = 32; 1281 | repeated NetStateRule exclude = 33; 1282 | enum LayerType { 1283 | NONE = 0; 1284 | ABSVAL = 35; 1285 | ACCURACY = 1; 1286 | ARGMAX = 30; 1287 | BNLL = 2; 1288 | CONCAT = 3; 1289 | CONTRASTIVE_LOSS = 37; 1290 | CONVOLUTION = 4; 1291 | UPSAMPLE = 40; 1292 | DATA = 5; 1293 | DECONVOLUTION = 39; 1294 | DROPOUT = 6; 1295 | DUMMY_DATA = 32; 1296 | EUCLIDEAN_LOSS = 7; 1297 | ELTWISE = 25; 1298 | EXP = 38; 1299 | FLATTEN = 8; 1300 | HDF5_DATA = 9; 1301 | HDF5_OUTPUT = 10; 1302 | HINGE_LOSS = 28; 1303 | IM2COL = 11; 1304 | IMAGE_DATA = 12; 1305 | INFOGAIN_LOSS = 13; 1306 | INNER_PRODUCT = 14; 1307 | LRN = 15; 1308 | MEMORY_DATA = 29; 1309 | MULTINOMIAL_LOGISTIC_LOSS = 16; 1310 | MVN = 34; 1311 | POOLING = 17; 1312 | POWER = 26; 1313 | RELU = 18; 1314 | SIGMOID = 19; 1315 | SIGMOID_CROSS_ENTROPY_LOSS = 27; 1316 | SILENCE = 36; 1317 | SOFTMAX = 20; 1318 | SOFTMAX_LOSS = 21; 1319 | SPLIT = 22; 1320 | SLICE = 33; 1321 | TANH = 23; 1322 | WINDOW_DATA = 24; 1323 | THRESHOLD = 31; 1324 | } 1325 | optional LayerType type = 5; 1326 | repeated BlobProto blobs = 6; 1327 | repeated string param = 1001; 1328 | repeated DimCheckMode blob_share_mode = 1002; 1329 | enum DimCheckMode { 1330 | STRICT = 0; 1331 | PERMISSIVE = 1; 1332 | } 1333 | repeated float blobs_lr = 7; 1334 | repeated float weight_decay = 8; 1335 | repeated float loss_weight = 35; 1336 | optional AccuracyParameter accuracy_param = 27; 1337 | optional ArgMaxParameter argmax_param = 23; 1338 | optional ConcatParameter concat_param = 9; 1339 | optional ContrastiveLossParameter contrastive_loss_param = 40; 1340 | optional ConvolutionParameter convolution_param = 10; 1341 | optional UpsampleParameter upsample_param = 43; 1342 | optional DataParameter data_param = 11; 1343 | optional DropoutParameter dropout_param = 12; 1344 | optional DummyDataParameter dummy_data_param = 26; 1345 | optional EltwiseParameter eltwise_param = 24; 1346 | optional ExpParameter exp_param = 41; 1347 | optional HDF5DataParameter hdf5_data_param = 13; 1348 | optional HDF5OutputParameter hdf5_output_param = 14; 1349 | optional HingeLossParameter hinge_loss_param = 29; 1350 | optional ImageDataParameter image_data_param = 15; 1351 | optional InfogainLossParameter infogain_loss_param = 16; 1352 | optional InnerProductParameter inner_product_param = 17; 1353 | optional LRNParameter lrn_param = 18; 1354 | optional MemoryDataParameter memory_data_param = 22; 1355 | optional MVNParameter mvn_param = 34; 1356 | optional PoolingParameter pooling_param = 19; 1357 | optional PowerParameter power_param = 21; 1358 | optional ReLUParameter relu_param = 30; 1359 | optional SigmoidParameter sigmoid_param = 38; 1360 | optional SoftmaxParameter softmax_param = 39; 1361 | optional SliceParameter slice_param = 31; 1362 | optional TanHParameter tanh_param = 37; 1363 | optional ThresholdParameter threshold_param = 25; 1364 | optional WindowDataParameter window_data_param = 20; 1365 | optional TransformationParameter transform_param = 36; 1366 | optional LossParameter loss_param = 42; 1367 | optional V0LayerParameter layer = 1; 1368 | } 1369 | 1370 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1371 | // in Caffe. We keep this message type around for legacy support. 1372 | message V0LayerParameter { 1373 | optional string name = 1; // the layer name 1374 | optional string type = 2; // the string to specify the layer type 1375 | 1376 | // Parameters to specify layers with inner products. 1377 | optional uint32 num_output = 3; // The number of outputs for the layer 1378 | optional bool biasterm = 4 [default = true]; // whether to have bias terms 1379 | optional FillerParameter weight_filler = 5; // The filler for the weight 1380 | optional FillerParameter bias_filler = 6; // The filler for the bias 1381 | 1382 | optional uint32 pad = 7 [default = 0]; // The padding size 1383 | optional uint32 kernelsize = 8; // The kernel size 1384 | optional uint32 group = 9 [default = 1]; // The group size for group conv 1385 | optional uint32 stride = 10 [default = 1]; // The stride 1386 | enum PoolMethod { 1387 | MAX = 0; 1388 | AVE = 1; 1389 | STOCHASTIC = 2; 1390 | } 1391 | optional PoolMethod pool = 11 [default = MAX]; // The pooling method 1392 | optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 1393 | 1394 | optional uint32 local_size = 13 [default = 5]; // for local response norm 1395 | optional float alpha = 14 [default = 1.]; // for local response norm 1396 | optional float beta = 15 [default = 0.75]; // for local response norm 1397 | optional float k = 22 [default = 1.]; 1398 | 1399 | // For data layers, specify the data source 1400 | optional string source = 16; 1401 | // For data pre-processing, we can do simple scaling and subtracting the 1402 | // data mean, if provided. Note that the mean subtraction is always carried 1403 | // out before scaling. 1404 | optional float scale = 17 [default = 1]; 1405 | optional string meanfile = 18; 1406 | // For data layers, specify the batch size. 1407 | optional uint32 batchsize = 19; 1408 | // For data layers, specify if we would like to randomly crop an image. 1409 | optional uint32 cropsize = 20 [default = 0]; 1410 | // For data layers, specify if we want to randomly mirror data. 1411 | optional bool mirror = 21 [default = false]; 1412 | 1413 | // The blobs containing the numeric parameters of the layer 1414 | repeated BlobProto blobs = 50; 1415 | // The ratio that is multiplied on the global learning rate. If you want to 1416 | // set the learning ratio for one blob, you need to set it for all blobs. 1417 | repeated float blobs_lr = 51; 1418 | // The weight decay that is multiplied on the global weight decay. 1419 | repeated float weight_decay = 52; 1420 | 1421 | // The rand_skip variable is for the data layer to skip a few data points 1422 | // to avoid all asynchronous sgd clients to start at the same point. The skip 1423 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1424 | // be larger than the number of keys in the database. 1425 | optional uint32 rand_skip = 53 [default = 0]; 1426 | 1427 | // Fields related to detection (det_*) 1428 | // foreground (object) overlap threshold 1429 | optional float det_fg_threshold = 54 [default = 0.5]; 1430 | // background (non-object) overlap threshold 1431 | optional float det_bg_threshold = 55 [default = 0.5]; 1432 | // Fraction of batch that should be foreground objects 1433 | optional float det_fg_fraction = 56 [default = 0.25]; 1434 | 1435 | // optional bool OBSOLETE_can_clobber = 57 [default = true]; 1436 | 1437 | // Amount of contextual padding to add around a window 1438 | // (used only by the window_data_layer) 1439 | optional uint32 det_context_pad = 58 [default = 0]; 1440 | 1441 | // Mode for cropping out a detection window 1442 | // warp: cropped window is warped to a fixed size and aspect ratio 1443 | // square: the tightest square around the window is cropped 1444 | optional string det_crop_mode = 59 [default = "warp"]; 1445 | 1446 | // For ReshapeLayer, one needs to specify the new dimensions. 1447 | optional int32 new_num = 60 [default = 0]; 1448 | optional int32 new_channels = 61 [default = 0]; 1449 | optional int32 new_height = 62 [default = 0]; 1450 | optional int32 new_width = 63 [default = 0]; 1451 | 1452 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 1453 | // It will also resize images if new_height or new_width are not zero. 1454 | optional bool shuffle_images = 64 [default = false]; 1455 | 1456 | // For ConcatLayer, one needs to specify the dimension for concatenation, and 1457 | // the other dimensions must be the same for all the bottom blobs. 1458 | // By default it will concatenate blobs along the channels dimension. 1459 | optional uint32 concat_dim = 65 [default = 1]; 1460 | 1461 | optional HDF5OutputParameter hdf5_output_param = 1001; 1462 | } 1463 | 1464 | message PReLUParameter { 1465 | // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 1466 | // Surpassing Human-Level Performance on ImageNet Classification, 2015. 1467 | 1468 | // Initial value of a_i. Default is a_i=0.25 for all i. 1469 | optional FillerParameter filler = 1; 1470 | // Whether or not slope parameters are shared across channels. 1471 | optional bool channel_shared = 2 [default = false]; 1472 | } 1473 | 1474 | message ReLU6Parameter { 1475 | optional float negative_slope = 1 [default = 0]; 1476 | } 1477 | 1478 | message InterpParameter { 1479 | optional int32 height = 1 [default = 0]; // Height of output 1480 | optional int32 width = 2 [default = 0]; // Width of output 1481 | optional int32 zoom_factor = 3 [default = 1]; // zoom factor 1482 | optional int32 shrink_factor = 4 [default = 1]; // shrink factor 1483 | optional int32 pad_beg = 5 [default = 0]; // padding at begin of input 1484 | optional int32 pad_end = 6 [default = 0]; // padding at end of input 1485 | } 1486 | 1487 | message PermuteParameter { 1488 | // The new orders of the axes of data. Notice it should be with 1489 | // in the same range as the input data, and it starts from 0. 1490 | // Do not provide repeated order. 1491 | repeated uint32 order = 1; 1492 | } 1493 | 1494 | message PriorBoxParameter { 1495 | // Encode/decode type. 1496 | enum CodeType { 1497 | CORNER = 1; 1498 | CENTER_SIZE = 2; 1499 | CORNER_SIZE = 3; 1500 | } 1501 | // Minimum box size (in pixels). Required! 1502 | repeated float min_size = 1;//对应论文2.2节中公式(4)中的sk×网络输入层输入图像[data层的输入]大小 1503 | // Maximum box size (in pixels). Required! 1504 | repeated float max_size = 2; 1505 | // Various of aspect ratios. Duplicate ratios will be ignored. 1506 | // If none is provided, we use default ratio 1. 1507 | repeated float aspect_ratio = 3; // 等宽比 1508 | // If true, will flip each aspect ratio. 1509 | // For example, if there is aspect ratio "r", 1510 | // we will generate aspect ratio "1.0/r" as well. 1511 | optional bool flip = 4 [default = true]; // 是否反转等宽比 1512 | // If true, will clip the prior so that it is within [0, 1] 1513 | optional bool clip = 5 [default = false]; // 是否进行裁剪,是否保证默认框整个在网络输入层输入图像内) 1514 | // Variance for adjusting the prior bboxes. 1515 | repeated float variance = 6; 1516 | // By default, we calculate img_height, img_width, step_x, step_y based on 1517 | // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely 1518 | // provided. 1519 | // Explicitly provide the img_size. 1520 | optional uint32 img_size = 7; 1521 | // Either img_size or img_h/img_w should be specified; not both. 1522 | optional uint32 img_h = 8;//网络输入层输入图像的高(或自行设置的高度) 1523 | optional uint32 img_w = 9;//网络输入层输入图像的宽(或自行设置的高度) 1524 | 1525 | // Explicitly provide the step size. 1526 | optional float step = 10; 1527 | // Either step or step_h/step_w should be specified; not both. 1528 | optional float step_h = 11; 1529 | optional float step_w = 12; 1530 | 1531 | // Offset to the top left corner of each cell. 1532 | optional float offset = 13 [default = 0.5]; 1533 | } 1534 | 1535 | // Message that store parameters used by DetectionOutputLayer 1536 | message DetectionOutputParameter { 1537 | // Number of classes to be predicted. Required! 1538 | optional uint32 num_classes = 1; 1539 | // If true, bounding box are shared among different classes. 1540 | optional bool share_location = 2 [default = true]; 1541 | // Background label id. If there is no background class, 1542 | // set it as -1. 1543 | optional int32 background_label_id = 3 [default = 0]; 1544 | // Parameters used for non maximum suppression. 1545 | optional NonMaximumSuppressionParameter nms_param = 4; 1546 | // Parameters used for saving detection results. 1547 | optional SaveOutputParameter save_output_param = 5; 1548 | // Type of coding method for bbox. 1549 | optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER]; 1550 | // If true, variance is encoded in target; otherwise we need to adjust the 1551 | // predicted offset accordingly. 1552 | optional bool variance_encoded_in_target = 8 [default = false]; 1553 | // Number of total bboxes to be kept per image after nms step. 1554 | // -1 means keeping all bboxes after nms step. 1555 | optional int32 keep_top_k = 7 [default = -1]; 1556 | // Only consider detections whose confidences are larger than a threshold. 1557 | // If not provided, consider all boxes. 1558 | optional float confidence_threshold = 9; 1559 | // If true, visualize the detection results. 1560 | optional bool visualize = 10 [default = false]; 1561 | // The threshold used to visualize the detection results. 1562 | optional float visualize_threshold = 11; 1563 | // If provided, save outputs to video file. 1564 | optional string save_file = 12; 1565 | } 1566 | 1567 | // Message that store parameters used by DetectionEvaluateLayer 1568 | message DetectionEvaluateParameter { 1569 | // Number of classes that are actually predicted. Required! 1570 | optional uint32 num_classes = 1; 1571 | // Label id for background class. Needed for sanity check so that 1572 | // background class is neither in the ground truth nor the detections. 1573 | optional uint32 background_label_id = 2 [default = 0]; 1574 | // Threshold for deciding true/false positive. 1575 | optional float overlap_threshold = 3 [default = 0.5]; 1576 | // If true, also consider difficult ground truth for evaluation. 1577 | optional bool evaluate_difficult_gt = 4 [default = true]; 1578 | // A file which contains a list of names and sizes with same order 1579 | // of the input DB. The file is in the following format: 1580 | // name height width 1581 | // ... 1582 | // If provided, we will scale the prediction and ground truth NormalizedBBox 1583 | // for evaluation. 1584 | optional string name_size_file = 5; 1585 | // The resize parameter used in converting NormalizedBBox to original image. 1586 | optional ResizeParameter resize_param = 6; 1587 | } 1588 | 1589 | message ResizeParameter { 1590 | //Probability of using this resize policy 1591 | optional float prob = 1 [default = 1]; 1592 | 1593 | enum Resize_mode { 1594 | WARP = 1; 1595 | FIT_SMALL_SIZE = 2; 1596 | FIT_LARGE_SIZE_AND_PAD = 3; 1597 | } 1598 | optional Resize_mode resize_mode = 2 [default = WARP]; 1599 | optional uint32 height = 3 [default = 0]; 1600 | optional uint32 width = 4 [default = 0]; 1601 | // A parameter used to update bbox in FIT_SMALL_SIZE mode. 1602 | optional uint32 height_scale = 8 [default = 0]; 1603 | optional uint32 width_scale = 9 [default = 0]; 1604 | 1605 | enum Pad_mode { 1606 | CONSTANT = 1; 1607 | MIRRORED = 2; 1608 | REPEAT_NEAREST = 3; 1609 | } 1610 | // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering 1611 | optional Pad_mode pad_mode = 5 [default = CONSTANT]; 1612 | // if specified can be repeated once (would fill all the channels) 1613 | // or can be repeated the same number of times as channels 1614 | // (would use it them to the corresponding channel) 1615 | repeated float pad_value = 6; 1616 | 1617 | enum Interp_mode { //Same as in OpenCV 1618 | LINEAR = 1; 1619 | AREA = 2; 1620 | NEAREST = 3; 1621 | CUBIC = 4; 1622 | LANCZOS4 = 5; 1623 | } 1624 | //interpolation for for resizing 1625 | repeated Interp_mode interp_mode = 7; 1626 | } 1627 | 1628 | message NonMaximumSuppressionParameter { 1629 | // Threshold to be used in nms. 1630 | optional float nms_threshold = 1 [default = 0.3]; 1631 | // Maximum number of results to be kept. 1632 | optional int32 top_k = 2; 1633 | // Parameter for adaptive nms. 1634 | optional float eta = 3 [default = 1.0]; 1635 | } 1636 | 1637 | message SaveOutputParameter { 1638 | // Output directory. If not empty, we will save the results. 1639 | optional string output_directory = 1; 1640 | // Output name prefix. 1641 | optional string output_name_prefix = 2; 1642 | // Output format. 1643 | // VOC - PASCAL VOC output format. 1644 | // COCO - MS COCO output format. 1645 | optional string output_format = 3; 1646 | // If you want to output results, must also provide the following two files. 1647 | // Otherwise, we will ignore saving results. 1648 | // label map file. 1649 | optional string label_map_file = 4; 1650 | // A file which contains a list of names and sizes with same order 1651 | // of the input DB. The file is in the following format: 1652 | // name height width 1653 | // ... 1654 | optional string name_size_file = 5; 1655 | // Number of test images. It can be less than the lines specified in 1656 | // name_size_file. For example, when we only want to evaluate on part 1657 | // of the test images. 1658 | optional uint32 num_test_image = 6; 1659 | // The resize parameter used in saving the data. 1660 | optional ResizeParameter resize_param = 7; 1661 | } 1662 | 1663 | message AxpyParameter { 1664 | 1665 | } 1666 | 1667 | // Message that stores parameters used by NormalizeLayer 1668 | message NormalizeParameter { 1669 | optional bool across_spatial = 1 [default = true]; 1670 | // Initial value of scale. Default is 1.0 for all 1671 | optional FillerParameter scale_filler = 2; 1672 | // Whether or not scale parameters are shared across channels. 1673 | optional bool channel_shared = 3 [default = true]; 1674 | // Epsilon for not dividing by zero while normalizing variance 1675 | optional float eps = 4 [default = 1e-10]; 1676 | } -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Add.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def get_add_output_shape(input_shape): 5 | output_shape = input_shape[0] 6 | return [output_shape] 7 | 8 | 9 | def create_add_node(layer, node_name, input_name, output_name, input_shape): 10 | output_shape = get_add_output_shape(input_shape) 11 | 12 | node = Node.c2oNode(layer, node_name, 'Add', input_name, output_name, input_shape, output_shape) 13 | 14 | return node 15 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Axpy.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | from typing import * 3 | import copy 4 | 5 | 6 | def need_add_reshape(input_shape: List[List]) -> bool: 7 | return len(input_shape[0]) != len(input_shape[1]) 8 | 9 | 10 | def get_param_shape(input_shape: List[List]) -> List: 11 | input = input_shape[0] 12 | scale = copy.deepcopy(input_shape[1]) 13 | if len(input) > len(scale): 14 | for i in range(len(input) - len(scale)): 15 | scale.append(1) 16 | return scale 17 | 18 | def broadcast_scale(input_shape: List[List]) -> List[List]: 19 | input = input_shape[0] 20 | scale = input_shape[1] 21 | if len(input) > len(scale): 22 | for i in range(len(input) - len(scale)): 23 | scale.append(1) 24 | broadcast_shape = [input, scale] 25 | elif len(input) < len(scale): 26 | print("the scale should be less than input") 27 | exit(-1) 28 | else: 29 | broadcast_shape = [input, scale] 30 | return broadcast_shape 31 | 32 | 33 | def get_mul_output_shape(input_shape: List[List]) -> List[List]: 34 | output_shape = input_shape[1] 35 | return [output_shape] 36 | 37 | 38 | def create_axpy_mul_node(layer, node_name, input_name, output_name, input_shape): 39 | 40 | new_node_name = node_name + "_middle" 41 | output_shape = get_mul_output_shape(input_shape) 42 | new_input_name = [input_name[0], input_name[1]] 43 | new_output_name = [output_name[0] + "_mul"] 44 | new_input_shape = [input_shape[0], input_shape[1]] 45 | 46 | node = Node.c2oNode(layer, new_node_name, 'Mul', new_input_name, new_output_name, new_input_shape, output_shape) 47 | 48 | return node 49 | 50 | def get_add_output_shape(input_shape): 51 | 52 | output_shape = input_shape[1] 53 | 54 | return [output_shape] 55 | 56 | def create_axpy_add_node(layer, node_name, input_name, output_name, input_shape): 57 | 58 | output_shape = get_add_output_shape(input_shape) 59 | new_input_name = [node_name + "_mul", input_name[2]] 60 | new_input_shape = [input_shape[1], input_shape[2]] 61 | node = Node.c2oNode(layer, node_name, "Add", new_input_name, output_name, input_shape, output_shape) 62 | 63 | return node 64 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/BatchNorm.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getBNAttri(layer): 5 | eps = layer.batch_norm_param.eps 6 | momentum = layer.batch_norm_param.moving_average_fraction 7 | dict = {"epsilon": eps, 8 | "momentum": momentum 9 | } 10 | return dict 11 | 12 | 13 | def getBNOutShape(input_shape): 14 | output_shape = input_shape 15 | return output_shape 16 | 17 | 18 | def createBN(layer, nodename, inname, outname, input_shape): 19 | dict = getBNAttri(layer) 20 | output_shape = getBNOutShape(input_shape) 21 | node = Node.c2oNode(layer, nodename, "BatchNormalization", inname, outname, input_shape, output_shape,dict) 22 | return node -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Clip.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def get_attribute(layer): 5 | attributes = {} 6 | max_attribute = 0 7 | min_attribute = 0 8 | if layer.type == 'ReLU6': 9 | max_attribute = 6.0 10 | min_attribute = 0 11 | 12 | attribute = { 13 | 'max': max_attribute, 14 | 'min': min_attribute 15 | } 16 | return attributes 17 | 18 | 19 | def get_clip_output_shape(input_shape): 20 | output_shape = input_shape 21 | return output_shape 22 | 23 | 24 | def create_clip_node(layer, node_name, input_name, output_name, input_shape): 25 | output_shape = get_clip_output_shape(input_shape) 26 | node = Node.c2oNode(layer, node_name, 'Clip', input_name, output_name, input_shape, output_shape) 27 | return node 28 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Concat.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | from typing import List 3 | import copy 4 | 5 | 6 | def get_concat_attributes(layer): 7 | axis = layer.concat_param.axis 8 | attributes = {"axis": axis} 9 | return attributes 10 | 11 | 12 | def get_concat_outshape(layer, input_shape: List) -> List: 13 | bottom = input_shape[0] 14 | axis = layer.concat_param.axis 15 | 16 | output_shape = copy.deepcopy(bottom) 17 | 18 | assert (axis < len(bottom)) 19 | 20 | for i in range(1, len(input_shape)): 21 | output_shape[axis] = output_shape[axis] + input_shape[i][axis] 22 | return [output_shape] 23 | # 24 | # if len(bottom) == 2: 25 | # n, c = bottom[0], 0 26 | # for i in range(len(input_shape)): 27 | # c = c + input_shape[i][1] 28 | # output_shape = [[n, c]] 29 | # return output_shape 30 | # 31 | # elif len(bottom) == 3: 32 | # n, c = bottom[0], 0 33 | # for i in range(len(input_shape)): 34 | # c = c + input_shape[i][1] 35 | # output_shape = [[n, c]] 36 | # return output_shape 37 | # 38 | # elif len(bottom) == 4: 39 | # n, c, w, h = input_shape[0][0], 0, input_shape[0][2], input_shape[0][3] 40 | # for i in range(len(input_shape)): 41 | # c = c + input_shape[i][1] 42 | # output_shape = [[n, c, w, h]] 43 | # return output_shape 44 | 45 | 46 | def createConcat(layer, nodename, inname, outname, input_shape): 47 | attributes = get_concat_attributes(layer) 48 | output_shape = get_concat_outshape(layer, input_shape) 49 | node = Node.c2oNode(layer, nodename, "Concat", inname, outname, input_shape, output_shape, attributes) 50 | return node 51 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Conv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import caffe2onnx.src.c2oObject as Node 3 | import math 4 | 5 | 6 | def getConvAttri(layer, input_shape): 7 | dilations = [1, 1] 8 | if layer.convolution_param.dilation != []: 9 | dilation = layer.convolution_param.dilation[0] 10 | dilations = [dilation, dilation] 11 | 12 | pads = [0, 0, 0, 0] 13 | if layer.convolution_param.pad != []: 14 | pads = np.array([layer.convolution_param.pad] * 4).reshape(1, -1)[0].tolist() 15 | elif layer.convolution_param.pad_h != 0 or layer.convolution_param.pad_w != 0: 16 | pads = [layer.convolution_param.pad_h, layer.convolution_param.pad_w, layer.convolution_param.pad_h, 17 | layer.convolution_param.pad_w] 18 | 19 | strides = [1, 1] 20 | if layer.convolution_param.stride != []: 21 | strides = np.array([layer.convolution_param.stride] * 2).reshape(1, -1)[0].tolist() 22 | 23 | elif layer.convolution_param.stride_h != 0 and layer.convolution_param.stride_w != 0: 24 | strides = [layer.convolution_param.stride_h, layer.convolution_param.stride_w] 25 | 26 | kernel_shape = np.array([layer.convolution_param.kernel_size] * 2).reshape(1, -1)[0].tolist() 27 | if layer.convolution_param.kernel_size == []: 28 | kernel_shape = [layer.convolution_param.kernel_h, layer.convolution_param.kernel_w] 29 | 30 | group = 1 31 | if layer.type == "ConvolutionDepthwise": 32 | group = input_shape[0][1] 33 | else: 34 | group = layer.convolution_param.group 35 | 36 | dict = { 37 | #"auto_pad":"NOTSET", 38 | "dilations": dilations, 39 | "group": group, 40 | "kernel_shape": kernel_shape, 41 | "pads": pads, 42 | "strides": strides 43 | } 44 | return dict 45 | 46 | 47 | def getConvOutShape(input_shape, layer, dict): 48 | dilations = dict["dilations"] 49 | kernel_shape = dict["kernel_shape"] 50 | pads = dict["pads"] 51 | strides = dict["strides"] 52 | 53 | kernel_num = layer.convolution_param.num_output 54 | 55 | # reference the caffe source code 56 | kernel_extent_h = dilations[0] * (kernel_shape[0] - 1) + 1 57 | output_shape_h = math.floor((input_shape[0][2] + 2 * pads[0] - kernel_extent_h) / strides[0]) + 1 58 | 59 | kernel_extent_w = dilations[1] * (kernel_shape[1] - 1) + 1 60 | output_shape_w = math.floor((input_shape[0][3] + 2 * pads[1] - kernel_extent_w) / strides[1]) + 1 61 | 62 | output_shape = [[input_shape[0][0], kernel_num, output_shape_h, output_shape_w]] 63 | return output_shape 64 | 65 | 66 | def createConv(layer, node_name, input_name, output_name, input_shape): 67 | attributes = getConvAttri(layer, input_shape) 68 | output_shape = getConvOutShape(input_shape, layer, attributes) 69 | node = Node.c2oNode(layer, node_name, "Conv", input_name, output_name, input_shape, output_shape, attributes) 70 | return node 71 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/ConvTranspose.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import caffe2onnx.src.c2oObject as Node 3 | 4 | 5 | def getConvTransposeAttri(layer): 6 | 7 | dilations = [1, 1] 8 | if layer.convolution_param.dilation != []: 9 | dilation = layer.convolution_param.dilation[0] 10 | dilations = [dilation, dilation] 11 | 12 | pads = [0, 0, 0, 0] 13 | if layer.convolution_param.pad != []: 14 | pads = np.array([layer.convolution_param.pad] * 4).reshape(1, -1)[0].tolist() 15 | elif layer.convolution_param.pad_h != 0 or layer.convolution_param.pad_w != 0: 16 | pads = [layer.convolution_param.pad_h, layer.convolution_param.pad_w, layer.convolution_param.pad_h, 17 | layer.convolution_param.pad_w] 18 | 19 | strides = [1, 1] 20 | if layer.convolution_param.stride != []: 21 | strides = np.array([layer.convolution_param.stride] * 2).reshape(1, -1)[0].tolist() 22 | elif layer.convolution_param.stride_h != 0 and layer.convolution_param.stride_w != 0: 23 | strides = [layer.convolution_param.stride_h, layer.convolution_param.stride_w] 24 | 25 | kernel_shape = np.array([layer.convolution_param.kernel_size] * 2).reshape(1, -1)[0].tolist() 26 | if layer.convolution_param.kernel_size == []: 27 | kernel_shape = [layer.convolution_param.kernel_h, layer.convolution_param.kernel_w] 28 | 29 | group = layer.convolution_param.group 30 | 31 | dict = { # "auto_pad":"NOTSET", 32 | "dilations": dilations, 33 | "group": group, 34 | "kernel_shape": kernel_shape, 35 | "pads": pads, 36 | "strides": strides 37 | } 38 | return dict 39 | 40 | 41 | def getConvTransposeOutShape(input_shape, layer,dict): 42 | dilations = dict["dilations"] 43 | kernel_shape = dict["kernel_shape"] 44 | pads = dict["pads"] 45 | strides = dict["strides"] 46 | kernel_num = layer.convolution_param.num_output 47 | 48 | def get_output_shape(i, k, p, s): 49 | return (i-1)*s + k -2*p 50 | 51 | h = get_output_shape(input_shape[0][2], kernel_shape[0], pads[0], strides[0]) 52 | w = get_output_shape(input_shape[0][3], kernel_shape[1], pads[1], strides[1]) 53 | 54 | output_shape = [[input_shape[0][0], kernel_num, h, w]] 55 | 56 | return output_shape 57 | 58 | 59 | def createConvTranspose(layer, nodename, inname, outname, input_shape): 60 | dict = getConvTransposeAttri(layer) 61 | output_shape = getConvTransposeOutShape(input_shape, layer, dict) 62 | node = Node.c2oNode(layer, nodename, "ConvTranspose", inname, outname, input_shape, output_shape, dict) 63 | return node 64 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Crop.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | 4 | def get_crop_param(layer, input_shape): 5 | axis: int = layer.crop_param.axis 6 | crop_offset = layer.crop_param.offset 7 | 8 | if not crop_offset: 9 | offset_0 = 0 10 | else: 11 | offset_0 = crop_offset[0] 12 | 13 | offset = [] 14 | starts = [] 15 | axes = [] 16 | ends = [] 17 | 18 | for i in range(len(input_shape[0])): 19 | if i < axis: 20 | start = 0 21 | end = input_shape[1][i] 22 | else: 23 | if (i - axis) >= len(crop_offset): 24 | offset.append(offset_0) 25 | else: 26 | offset.append(crop_offset[i - axis]) 27 | 28 | start = offset[i - axis] 29 | end = start + input_shape[1][i] 30 | 31 | if input_shape[0][i] != input_shape[1][i]: 32 | axes.append(i) 33 | starts.append(start) 34 | ends.append(end) 35 | 36 | return starts, ends, axes 37 | 38 | 39 | def get_crop_output_shape(layer, input_shape): 40 | return [input_shape[1]] 41 | 42 | 43 | def create_crop_node(layer, node_name, input_name, output_name, input_shape): 44 | output_shape = get_crop_output_shape(layer, input_shape) 45 | node = Node.c2oNode(layer, node_name, "Slice", input_name, output_name, input_shape, output_shape) 46 | return node 47 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/DetectionOutput.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | from typing import * 3 | from onnx import helper 4 | from typing import * 5 | import ctypes 6 | import caffe2onnx.src.c2oObject as Node 7 | 8 | 9 | def create_attribuates(layer) -> Dict: 10 | detection_output_param = layer.detection_output_param 11 | num_classes = detection_output_param.num_classes 12 | share_location = 1 if detection_output_param.share_location else 0 13 | background_label_id = detection_output_param.background_label_id 14 | # NonMaximumSuppressionParameter 15 | nms_threshold = detection_output_param.nms_param.nms_threshold 16 | top_k = detection_output_param.nms_param.top_k 17 | eta = detection_output_param.nms_param.eta 18 | 19 | code_type = detection_output_param.code_type 20 | variance_encoded_in_target = 1 if detection_output_param.variance_encoded_in_target else 0 21 | keep_top_k = detection_output_param.keep_top_k 22 | confidence_threshold = detection_output_param.confidence_threshold 23 | visualize = 1 if detection_output_param.visualize else 0 24 | visualize_threshold = detection_output_param.visualize_threshold 25 | save_file = detection_output_param.save_file 26 | 27 | 28 | 29 | # TODO: SaveOutputParameter 30 | # save_output_param = detection_output_param.save_output_param 31 | # output_directory: str = save_output_param.output_directory 32 | # output_name_prefix: str = save_output_param.output_name_prefix 33 | # output_format: str = save_output_param.output_format 34 | # label_map_file: str = save_output_param.label_map_file 35 | # name_size_file: str = save_output_param.name_size_file 36 | # num_test_image: int = save_output_param.num_test_image 37 | 38 | 39 | 40 | attributes = { 41 | 'num_classes' : num_classes, 42 | 'share_location' : share_location, 43 | 'background_label_id' : background_label_id, 44 | 'nms_threshold' : nms_threshold, 45 | 'top_k' : top_k, 46 | 'eta' : eta, 47 | 'code_type' : code_type, 48 | 'variance_encoded_in_target' : variance_encoded_in_target, 49 | 'keep_top_k' : keep_top_k, 50 | 'confidence_threshold' : confidence_threshold, 51 | 'visualize' : visualize, 52 | 'visualize_threshold' : visualize_threshold, 53 | 'save_file' : save_file 54 | } 55 | return attributes 56 | 57 | 58 | def create_detection_output(layer, 59 | node_name: str, 60 | inputs_name: List[str], 61 | outputs_name: List[str], 62 | inputs_shape: List, ) -> onnx.NodeProto: 63 | 64 | attributes = create_attribuates(layer) 65 | 66 | outputs_shape = [[1, 1, 1, 7]] 67 | 68 | node = Node.c2oNode(layer, node_name, "DetectionOutput", 69 | inputs_name, outputs_name, 70 | inputs_shape, outputs_shape, 71 | attributes) 72 | return node 73 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Dropout.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getDropoutAttri(layer): 5 | 6 | ratio = layer.dropout_param.dropout_ratio 7 | ratio = 0.0 8 | 9 | dict = {"ratio":ratio} 10 | return dict 11 | 12 | 13 | def getDropoutOutShape(input_shape): 14 | output_shape = input_shape 15 | return output_shape 16 | 17 | 18 | def createDropout(layer, nodename, inname, outname, input_shape): 19 | dict = getDropoutAttri(layer) 20 | output_shape = getDropoutOutShape(input_shape) 21 | node = Node.c2oNode(layer, nodename, "Dropout", inname, outname, input_shape, output_shape, dict=dict) 22 | return node 23 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Eltwise.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def createEltwise(layer, nodename, inname, outname, input_shape): 5 | 6 | if layer.eltwise_param.operation == 0: 7 | node = __createMul(layer, nodename, inname, outname, input_shape) 8 | 9 | elif layer.eltwise_param.operation == 1: 10 | node = __createAdd(layer, nodename, inname, outname, input_shape) 11 | 12 | elif layer.eltwise_param.operation == 2: 13 | node = __createMax(layer, nodename, inname, outname, input_shape) 14 | 15 | return node 16 | 17 | 18 | def __createMul(layer, nodename, inname, outname, input_shape): 19 | output_shape = input_shape[0] 20 | node = Node.c2oNode(layer, nodename, "Mul", inname, outname, input_shape, output_shape) 21 | return node 22 | 23 | 24 | def __createAdd(layer, nodename, inname, outname, input_shape): 25 | output_shape = [input_shape[0]] 26 | node = Node.c2oNode(layer, nodename, "Add", inname, outname, input_shape, output_shape) 27 | return node 28 | 29 | 30 | def __createMax(layer, nodename, inname, outname, input_shape): 31 | output_shape = input_shape 32 | node = Node.c2oNode(layer, nodename, "Max", inname, outname, input_shape, output_shape) 33 | return node 34 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Flatten.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | from typing import List, Dict 3 | import onnx 4 | 5 | 6 | def get_attributes(layer) -> Dict: 7 | axis = layer.flatten_param.axis 8 | end_axis = layer.flatten_param.end_axis 9 | if end_axis != -1: 10 | print("not support end_axis param!") 11 | exit(-1) 12 | attributes = { 13 | "axis": axis 14 | } 15 | return attributes 16 | 17 | 18 | def get_flatten_output_shape(input_shape: List, 19 | attributes: Dict) -> List: 20 | shape = input_shape[0] 21 | input_prod = 1 22 | axis = attributes.get("axis") 23 | for i in range(axis, len(shape)): 24 | input_prod = input_prod * shape[i] 25 | 26 | output_shape = [shape[0:axis]+ [input_prod]] 27 | return output_shape 28 | 29 | def create_flatten_node(layer, node_name : str, 30 | input_names: List, 31 | output_name: List, 32 | input_shape: List) -> onnx.NodeProto: 33 | attributes = get_attributes(layer) 34 | 35 | output_shape = get_flatten_output_shape(input_shape, attributes) 36 | 37 | node = Node.c2oNode(layer, node_name, "Flatten", input_names, 38 | output_name, input_shape, output_shape, attributes) 39 | return node 40 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Gemm.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getGemmAttri(layer): 5 | 6 | dict = {"alpha": 1.0, 7 | "beta": 1.0, 8 | "transA": 0, 9 | "transB": 1} 10 | return dict 11 | 12 | 13 | def getGemmOutShape(input_shape,num_output): 14 | output_shape = [[input_shape[0][0], num_output]] 15 | return output_shape 16 | 17 | 18 | def createGemm(layer, nodename, inname, outname, input_shape, num_output): 19 | dict = getGemmAttri(layer) 20 | output_shape = getGemmOutShape(input_shape,num_output) 21 | node = Node.c2oNode(layer, nodename, "Gemm", inname, outname, input_shape, output_shape, dict) 22 | return node 23 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/InstanceNorm.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | 4 | 5 | def get_InstanceNorm_param(layer, input_shape): 6 | scale = [] 7 | bias = [] 8 | for i in range(input_shape[0][1]): 9 | scale.append(1) 10 | bias.append(0) 11 | return scale, bias 12 | 13 | 14 | def create_InstanceNorm_attributes(layer): 15 | epsilon: float = layer.mvn_param.eps 16 | if not epsilon: 17 | epsilon = 1e-05 18 | 19 | attributes = {"epsilon": epsilon} 20 | return attributes 21 | 22 | 23 | def get_InstanceNorm_output_shape(input_shape): 24 | output_shape = input_shape 25 | return output_shape 26 | 27 | 28 | def create_InstanceNorm_op(layer, node_name, input_name, output_name, input_shape): 29 | output_shape = get_InstanceNorm_output_shape(input_shape) 30 | attributes = create_InstanceNorm_attributes(layer) 31 | node = Node.c2oNode(layer, node_name, "InstanceNormalization", 32 | input_name, output_name, 33 | input_shape,output_shape,attributes) 34 | return node 35 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Interp.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | 4 | def get_interp_attri(layer, input_shape): 5 | height = layer.interp_param.height 6 | width = layer.interp_param.width 7 | zoom_factor = layer.interp_param.zoom_factor 8 | shrink_factor = layer.interp_param.shrink_factor 9 | pad_beg = layer.interp_param.pad_beg 10 | pad_end = layer.interp_param.pad_end 11 | H, W = input_shape[0][2], input_shape[0][3] 12 | 13 | sacles = [1.0, 1.0, 1.0, 1.0] 14 | if height > H and width > W: 15 | if height / H == width / W: 16 | scale = float(height / H) 17 | scales = [1.0, 1.0, scale, scale] 18 | attributes = {"mode": "linear", 19 | 'scales': scales} 20 | return attributes 21 | if height == 0 and width == 0: 22 | if zoom_factor > 1 and shrink_factor == 1: 23 | height_in_eff = height + pad_beg + pad_end 24 | width_in_eff = width + pad_beg + pad_end 25 | height_out = height_in_eff + (height_in_eff - 1) * (zoom_factor -1) 26 | width_out = width_in_eff + (width_in_eff - 1) * (zoom_factor -1) 27 | scale_height = float(height_out /height_in_eff) 28 | scale_width = float(width_out /width_in_eff) 29 | scales = [1.0, 1.0, scale_height, scale_width] 30 | attributes = {"mode": "linear", 31 | 'scales': scales} 32 | return attributes 33 | else: 34 | print("do not support interp type") 35 | exit(-1) 36 | 37 | 38 | def get_interp_output_shape(layer, input_shape, attributes): 39 | scales = attributes.get("scales") 40 | output_shape = [np.multiply(np.array(scales, dtype=np.int), np.array(input_shape[0])).tolist()] 41 | return output_shape 42 | 43 | def create_interp_node(layer, node_name, input_name, output_name, input_shape): 44 | attributes = get_interp_attri(layer, input_shape) 45 | output_shape = get_interp_output_shape(layer, input_shape, attributes) 46 | node = Node.c2oNode(layer, node_name, "Upsample", input_name, output_name, input_shape, output_shape, attributes) 47 | return node 48 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/LRN.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getLRNAttri(layer): 5 | 6 | size = layer.lrn_param.local_size 7 | alpha = layer.lrn_param.alpha 8 | beta = layer.lrn_param.beta 9 | 10 | dict = {"alpha":alpha, 11 | "beta":beta, 12 | "bias":1.0, 13 | "size": size} 14 | return dict 15 | 16 | 17 | def getLRNOutShape(input_shape): 18 | output_shape = input_shape 19 | return output_shape 20 | 21 | 22 | def createLRN(layer,nodename, inname,outname,input_shape): 23 | dict = getLRNAttri(layer) 24 | output_shape = getLRNOutShape(input_shape) 25 | node = Node.c2oNode(layer, nodename, "LRN", inname, outname, input_shape, output_shape, dict) 26 | return node 27 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Log.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def get_log_output_shape(input_shape): 5 | output_shape = input_shape 6 | return output_shape 7 | 8 | 9 | def create_log_node(layer, node_name, input_name, output_name, input_shape): 10 | output_shape = get_log_output_shape(layer) 11 | 12 | node = Node.c2oNode(layer, node_name, 'Log', input_name, output_name, input_shape, output_shape) 13 | 14 | return node 15 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/LpNormalization.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | from typing import Dict 3 | 4 | 5 | def create_attribute(layer): 6 | attribute: Dict = { 7 | 'axis': 1, 8 | 'p': 2 9 | } 10 | return attribute 11 | 12 | 13 | def get_node_output(input_shape): 14 | output_shape = input_shape 15 | return output_shape 16 | 17 | 18 | def create_Lp_Normalization(layer, node_name, input_name, output_name, input_shape): 19 | attribute = create_attribute(layer) 20 | output_shape = get_node_output(input_shape) 21 | 22 | node = Node.c2oNode(layer, node_name, "LpNormalization", input_name, output_name, input_shape, output_shape, 23 | attribute) 24 | return node 25 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Min.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def get_min_output_shape(input_shape): 5 | output_shape = input_shape 6 | return output_shape 7 | 8 | 9 | def create_min_op(layer, node_name, input_name, output_name, input_shape): 10 | output_shape = get_min_output_shape(input_shape) 11 | node = Node.c2oNode(layer, node_name, "Min", input_name, output_name, input_shape, output_shape) 12 | return node 13 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Mul.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | from typing import * 4 | from onnx import TensorProto 5 | import copy 6 | 7 | 8 | def need_add_reshape(input_shape: List[List]) -> bool: 9 | return len(input_shape[0]) != len(input_shape[1]) 10 | 11 | 12 | def get_param_shape(input_shape: List[List]) -> List: 13 | input = input_shape[0] 14 | scale = copy.deepcopy(input_shape[1]) 15 | if len(input) > len(scale): 16 | for i in range(len(input) - len(scale)): 17 | scale.append(1) 18 | return scale 19 | 20 | def broadcast_scale(input_shape: List[List]) -> List[List]: 21 | input = input_shape[0] 22 | scale = input_shape[1] 23 | if len(input) > len(scale): 24 | for i in range(len(input) - len(scale)): 25 | scale.append(1) 26 | broadcast_shape = [input, scale] 27 | elif len(input) < len(scale): 28 | print("the scale should be less than input") 29 | exit(-1) 30 | else: 31 | broadcast_shape = [input, scale] 32 | return broadcast_shape 33 | 34 | 35 | def get_mul_output_shape(input_shape: List[List]) -> List[List]: 36 | output_shape = input_shape[0] 37 | return [output_shape] 38 | 39 | 40 | def create_mul_node(layer, node_name, input_name, output_name, input_shape): 41 | 42 | output_shape = get_mul_output_shape(input_shape) 43 | 44 | node = Node.c2oNode(layer, node_name, 'Mul', input_name, output_name, input_shape, output_shape) 45 | 46 | return node 47 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/PRelu.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getPReluOutShape(input_shape): 5 | output_shape = input_shape 6 | return output_shape 7 | 8 | 9 | def createPRelu(layer, nodename, inname, outname, input_shape): 10 | output_shape = getPReluOutShape(input_shape) 11 | node = Node.c2oNode(layer, nodename, "PRelu", inname, outname, input_shape, output_shape) 12 | return node 13 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Pooling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import caffe2onnx.src.c2oObject as Node 3 | import math 4 | import copy 5 | 6 | 7 | def get_pool_pads(layer): 8 | pad = layer.pooling_param.pad 9 | if pad != 0: 10 | pad_h = pad_w = pad 11 | else: 12 | if layer.pooling_param.pad_h != 0 and layer.pooling_param.pad_w != 0: 13 | pad_h = layer.pooling_param.pad_h 14 | pad_w = layer.pooling_param.pad_w 15 | else: 16 | pad_h = pad_w = 0 17 | pads = [0, 0, pad_h, pad_w, 0, 0, pad_h, pad_w] 18 | 19 | return pads 20 | 21 | 22 | def calculate_pad_output_shape(input_shape, pads): 23 | pad_h = pads[2] 24 | pad_w = pads[3] 25 | output_shape = copy.deepcopy(input_shape[0]) 26 | 27 | output_shape[2] = output_shape[2] + 2 * pad_h 28 | output_shape[3] = output_shape[3] + 2 * pad_w 29 | return [output_shape] 30 | 31 | 32 | def create_pad_node(layer, node_name, input_name, output_name, input_shape): 33 | pads = get_pool_pads(layer) 34 | attributes = {"mode": "constant"} 35 | pad_input_name = input_name 36 | pad_output_name = output_name 37 | pad_output_shape = calculate_pad_output_shape(input_shape, pads) 38 | 39 | node = Node.c2oNode(layer, node_name, 'Pad', pad_input_name, pad_output_name, input_shape, pad_output_shape, 40 | attributes) 41 | 42 | return node 43 | 44 | 45 | def get_pool_attributes(layer, pool_type, input_shape): 46 | number = input_shape[0][0] 47 | channel = input_shape[0][1] 48 | height = input_shape[0][2] 49 | weight = input_shape[0][3] 50 | kernel_size = layer.pooling_param.kernel_size 51 | pad = layer.pooling_param.pad 52 | stride = layer.pooling_param.stride 53 | 54 | if pool_type == 'GlobalMaxPool' or pool_type == 'GlobalAveragePool': 55 | global_pooling = True 56 | else: 57 | global_pooling = False 58 | # pass kernel_shape 59 | if global_pooling: 60 | kernel_h = height 61 | kernel_w = weight 62 | else: 63 | if kernel_size != 0: 64 | kernel_h = kernel_w = kernel_size 65 | elif layer.pooling_param.kernel_h != 0 and layer.pooling_param.kernel_w != 0: 66 | kernel_h = layer.pooling_param.kernel_h 67 | kernel_w = layer.pooling_param.kernel_w 68 | else: 69 | kernel_h = 1 70 | kernel_w = 1 71 | kernel_shape = [kernel_h, kernel_w] 72 | # pass pad 73 | if pad != 0: 74 | pad_h = pad_w = pad 75 | else: 76 | if layer.pooling_param.pad_h != 0 and layer.pooling_param.pad_w != 0: 77 | pad_h = layer.pooling_param.pad_h 78 | pad_w = layer.pooling_param.pad_w 79 | else: 80 | pad_h = pad_w = 0 81 | pads = [pad_h, pad_w, pad_h, pad_w] 82 | pads = [0, 0, 0, 0] 83 | # pass strides 84 | stride_h = stride_w = 1 85 | if stride != 1: 86 | stride_h = stride_w = stride 87 | else: 88 | if layer.pooling_param.stride_h != 0 and layer.pooling_param.stride_w != 0: 89 | stride_h = layer.pooling_param.stride_h 90 | stride_w = layer.pooling_param.stride_w 91 | else: 92 | stride_h = stride_w = 1 93 | strides = [stride_h, stride_w] 94 | 95 | # pass round_mode 96 | # caffe definition 97 | # enum RoundMode { 98 | # CEIL = 0; 99 | # FLOOR = 1; 100 | # } 101 | # default Ceil = 0 102 | # onnx ceil_mode floor = 0, ceil = 1, default: floor = 0 103 | round_mode_ceil = 0 104 | round_mode_floor = 1 105 | round_mode = 0 106 | if layer.pooling_param.round_mode == 0: 107 | round_mode = round_mode_ceil 108 | elif layer.pooling_param.round_mode == 1: 109 | round_mode = round_mode_floor 110 | else: 111 | # wrong condition 112 | exit(-1) 113 | if round_mode == round_mode_ceil: 114 | ceil_mode = 1 115 | else: 116 | ceil_mode = 0 117 | 118 | attributes = {"kernel_shape": kernel_shape, 119 | "strides": strides, 120 | "pads": pads, 121 | "ceil_mode": ceil_mode 122 | } 123 | return attributes 124 | 125 | 126 | def get_pooling_output_shape(input_shape, layer, attributes, with_indices=False): 127 | number = input_shape[0][0] 128 | channel = input_shape[0][1] 129 | kernel_shape = attributes["kernel_shape"] 130 | kernel_h = kernel_shape[0] 131 | kernel_w = kernel_shape[1] 132 | pads = attributes["pads"] 133 | strides = attributes["strides"] 134 | stride_h = strides[0] 135 | stride_w = strides[1] 136 | ceil_mode = attributes["ceil_mode"] 137 | pad_h = pads[2] 138 | pad_w = pads[3] 139 | height = input_shape[0][2] 140 | width = input_shape[0][3] 141 | 142 | if ceil_mode == 1: 143 | # ceil 144 | pooled_height = int(math.ceil((height + 2 * pad_h - kernel_h) / stride_h)) + 1 145 | pooled_width = int(math.ceil((width + 2 * pad_h - kernel_w) / stride_w)) + 1 146 | else: 147 | # floor 148 | pooled_height = int(math.floor((height + 2 * pad_h - kernel_h) / stride_h)) + 1 149 | pooled_width = int(math.floor((width + 2 * pad_h - kernel_w) / stride_w)) + 1 150 | 151 | if pad_h != 0 or pad_w != 0: 152 | if ((pooled_height - 1) * stride_h) >= (height + pad_h): 153 | pooled_height = pooled_height - 1 154 | if ((pooled_width - 1) * stride_w) >= (width + pad_w): 155 | pooled_width = pooled_width - 1 156 | if kernel_h == 0: 157 | kernel_h = kernel_w = 1 158 | if with_indices: 159 | output_shape = [[number, channel, pooled_height, pooled_width], 160 | [number, channel, pooled_height, pooled_width]] 161 | else: 162 | output_shape = [[number, channel, pooled_height, pooled_width]] 163 | return output_shape 164 | 165 | 166 | def pooling_type(layer): 167 | pool_value = layer.pooling_param.pool 168 | global_value = layer.pooling_param.global_pooling 169 | if pool_value == 0 and global_value is True: 170 | return 'GlobalMaxPool' 171 | elif pool_value == 1 and global_value is True: 172 | return 'GlobalAveragePool' 173 | elif pool_value == 0 and global_value is False: 174 | return 'MaxPool' 175 | elif pool_value == 1 and global_value is False: 176 | return 'AveragePool' 177 | else: 178 | print("unsupport pooling!") 179 | exit(-1) 180 | 181 | 182 | def create_pooling_node(layer, nodename, inname, outname, input_shape): 183 | pool_type = pooling_type(layer) 184 | node = None 185 | attributes = get_pool_attributes(layer, pool_type, input_shape) 186 | with_indices = True if len(outname) == 2 else False 187 | output_shape = get_pooling_output_shape(input_shape, layer, attributes, with_indices=with_indices) 188 | 189 | if pool_type == 'GlobalMaxPool': 190 | node = Node.c2oNode(layer, nodename, "GlobalMaxPool", inname, outname, input_shape, output_shape, dict={}) 191 | elif pool_type == 'MaxPool': 192 | node = Node.c2oNode(layer, nodename, "MaxPool", inname, outname, input_shape, output_shape, dict=attributes) 193 | elif pool_type == 'GlobalAveragePool': 194 | node = Node.c2oNode(layer, nodename, "GlobalAveragePool", inname, outname, input_shape, output_shape, 195 | dict={}) 196 | elif pool_type == 'AveragePool': 197 | node = Node.c2oNode(layer, nodename, "AveragePool", inname, outname, input_shape, output_shape, 198 | dict=attributes) 199 | 200 | assert (node is not None) 201 | return node 202 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Power.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | 4 | def get_power_param(layer): 5 | power: int = layer.power_param.power 6 | scale: int = layer.power_param.scale 7 | shift: int = layer.power_param.shift 8 | return np.array([power]), np.array([scale]), np.array([shift]) 9 | 10 | 11 | def get_power_output_shape(input_shape): 12 | return [input_shape[0]] 13 | 14 | 15 | def create_power_node(layer, node_name, input_name, output_name, input_shape): 16 | output_shape = get_power_output_shape(input_shape) 17 | node = Node.c2oNode(layer, node_name, "Pow", input_name, output_name, input_shape, output_shape) 18 | return node 19 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/PriroBox.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | from typing import * 3 | from onnx import helper 4 | from typing import * 5 | import ctypes 6 | import caffe2onnx.src.c2oObject as Node 7 | import math 8 | 9 | 10 | def create_custom_node(type_name: Text, 11 | inputs: Sequence[Text], 12 | outputs: Sequence[Text], 13 | attributes: Dict) -> onnx.NodeProto: 14 | node = helper.make_node(type_name, inputs, outputs, **attributes) 15 | print(format(node)) 16 | return node 17 | 18 | 19 | def create_priorbox_attributes(layer) -> Dict: 20 | min_sizes = layer.prior_box_param.min_size 21 | max_sizes = layer.prior_box_param.max_size 22 | 23 | # onnx attributes does not support bool type 24 | flip = 1 if layer.prior_box_param.flip else 0 25 | clip = 1 if layer.prior_box_param.clip else 0 26 | 27 | aspect_ratio_tmp = layer.prior_box_param.aspect_ratio 28 | # get aspect ratio 29 | aspect_ratios = [1.0] 30 | for item in aspect_ratio_tmp: 31 | already_exist = False 32 | for i in range(len(aspect_ratios)): 33 | if math.fabs(item - aspect_ratios[i]) < 1e-6: 34 | already_exist = True 35 | if already_exist is False: 36 | aspect_ratios.append(item) 37 | if flip == 1: 38 | aspect_ratios.append(1. / item) 39 | 40 | # get variances variances_tmp: List[float] 41 | variances = [] 42 | if len(layer.prior_box_param.variance) > 1: 43 | assert len(layer.prior_box_param.variance) == 4 44 | variances = layer.prior_box_param.variance 45 | elif len(layer.prior_box_param.variance) == 1: 46 | variances = layer.prior_box_param.variance 47 | else: 48 | # set default to 0.1 49 | variances.append(0.1) 50 | 51 | # get image size 52 | img_sizes = [0, 0] 53 | if layer.prior_box_param.img_size != 0: 54 | img_sizes = [layer.prior_box_param.img_size, layer.prior_box_param.img_size] 55 | elif (layer.prior_box_param.img_h != 0) and (layer.prior_box_param.img_w != 0): 56 | # be careful the order: [img_w, img_h] 57 | img_sizes = [layer.prior_box_param.img_w, layer.prior_box_param.img_h] 58 | 59 | # get step 60 | steps = [0.0, 0.0] 61 | if layer.prior_box_param.step != 0: 62 | steps = [layer.prior_box_param.step, layer.prior_box_param.step] 63 | elif (layer.prior_box_param.step_h != 0) and (layer.prior_box_param.step_w != 0): 64 | # be careful the order: [step_w, step_h] 65 | steps = [layer.prior_box_param.step_w, layer.prior_box_param.step_h] 66 | 67 | offset = layer.prior_box_param.offset 68 | 69 | attributes = { 70 | 'min_sizes': min_sizes, 71 | 'max_sizes': max_sizes, 72 | 'clip': clip, 73 | 'flip': flip, 74 | 'variances': variances, 75 | 'aspect_ratios': aspect_ratios, 76 | 'img_sizes': img_sizes, 77 | 'steps': steps, 78 | 'offset': offset 79 | } 80 | return attributes 81 | 82 | 83 | def caculate_output_shape(layer, input_shape: List, attributes: Dict) -> List: 84 | width = input_shape[0][2] 85 | height = input_shape[0][3] 86 | aspect_ratios = attributes.get('aspect_ratios') 87 | min_sizes = attributes.get('min_sizes') 88 | num_priors = len(aspect_ratios) * len(min_sizes) 89 | max_sizes = attributes.get('max_sizes') 90 | for max_size in max_sizes: 91 | if max_size > 0: 92 | num_priors = num_priors + 1 93 | 94 | return [[1, 2, width * height * num_priors * 4]] 95 | 96 | 97 | def create_priorbox_node(layer, 98 | node_name: str, 99 | inputs_name: List[str], 100 | outputs_name: List[str], 101 | inputs_shape: List, ) -> onnx.NodeProto: 102 | attributes = create_priorbox_attributes(layer) 103 | 104 | outputs_shape = caculate_output_shape(layer, inputs_shape, attributes) 105 | node = Node.c2oNode(layer, node_name, "PriorBox", 106 | inputs_name, outputs_name, 107 | inputs_shape, outputs_shape, 108 | attributes) 109 | return node 110 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/ReLU.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getReluAttri(layer): 5 | attributes = {} 6 | if layer.relu_param.negative_slope != 0: 7 | attributes = {"alpha": layer.relu_param.negative_slope} 8 | return attributes 9 | 10 | 11 | def getReluOutShape(input_shape): 12 | output_shape = input_shape 13 | return output_shape 14 | 15 | 16 | def createRelu(layer, nodename, inname, outname, input_shape): 17 | attributes = getReluAttri(layer) 18 | output_shape = getReluOutShape(input_shape) 19 | 20 | if attributes == {}: 21 | node = Node.c2oNode(layer, nodename, "Relu", inname, outname, input_shape, output_shape) 22 | else: 23 | node = Node.c2oNode(layer, nodename, "LeakyRelu", inname, outname, input_shape, output_shape, dict=attributes) 24 | 25 | return node 26 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Reshape.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | from typing import * 4 | from operator import mul 5 | from functools import reduce 6 | 7 | 8 | def getReshapeOutShape(layer, input_shape: List) -> List: 9 | if layer.type == 'InnerProduct': 10 | dims = input_shape[0] 11 | in_prod = 1 12 | for i in range(1, len(dims)): 13 | in_prod = in_prod * dims[i] 14 | output_shape = [dims[0], in_prod] 15 | return [output_shape] 16 | 17 | elif layer.type == 'ShuffleChannel': 18 | ## change [N, C, H, W] -> [N, G, C', H, W] tensor 19 | group = layer.shuffle_channel_param.group 20 | n, g, c, h, w = input_shape[0][0], group, int(input_shape[0][1] / group), input_shape[0][2], input_shape[0][3] 21 | out_shape = [[n, g, c, h, w]] 22 | return out_shape 23 | 24 | elif layer.type == 'DeReshape': 25 | n, c, h, w = input_shape[0][0], input_shape[0][1] * input_shape[0][2], input_shape[0][3], input_shape[0][4] 26 | out_shape = [[n, c, h, w]] 27 | return out_shape 28 | 29 | elif layer.type == 'Flatten': 30 | 31 | axis = layer.flatten_param.axis 32 | assert axis == 1, "Flatten: not support axis not equal 1" 33 | # return [[0, -1]] 34 | shape = input_shape[0] 35 | input_prod = 1 36 | for i in range(axis, len(shape)): 37 | input_prod = input_prod * shape[i] 38 | output_shape = [shape[0:axis] + [input_prod]] 39 | return output_shape 40 | 41 | elif layer.type == 'Scale': 42 | return input_shape 43 | 44 | elif layer.type == 'Reshape': 45 | shape = input_shape[0] 46 | re_shape = layer.reshape_param.shape.dim 47 | new_shape_list = [] 48 | for j in range(len(re_shape)): 49 | if re_shape[j] == 0: 50 | # if value = 0 ; then use original 51 | new_shape_list.append(shape[j]) 52 | else: 53 | new_shape_list.append(re_shape[j]) 54 | if -1 in new_shape_list: 55 | index = new_shape_list.index(-1) 56 | if index == 0: 57 | prod = reduce(mul, new_shape_list[1:], 1) 58 | elif index == (len(new_shape_list) -1): 59 | prod = reduce(mul, new_shape_list[0:index]) 60 | else: 61 | prod = reduce(mul, new_shape_list[0:index]) * reduce(mul, new_shape_list[index+1:], 1) 62 | new_shape_list[index] = int(reduce(mul, shape, 1) / prod) 63 | output_shape = [new_shape_list] 64 | return output_shape 65 | 66 | 67 | def get_reshape_param(layer, input_shape: List[int]) -> List[int]: 68 | re_shape = layer.reshape_param.shape.dim 69 | return re_shape 70 | 71 | 72 | def createReshape(layer, node_name, input_name, output_name, input_shape, output_shape={}): 73 | 74 | if layer.type == 'Scale' and output_shape != {}: 75 | node = Node.c2oNode(layer, node_name, "Reshape", input_name, output_name, input_shape, output_shape) 76 | return node 77 | else: 78 | output_shape = getReshapeOutShape(layer, input_shape) 79 | 80 | node = Node.c2oNode(layer, node_name, "Reshape", input_name, output_name, input_shape, output_shape) 81 | return node 82 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Resize.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | from typing import * 3 | from onnx import helper 4 | from typing import * 5 | import ctypes 6 | import caffe2onnx.src.c2oObject as Node 7 | import numpy as np 8 | 9 | 10 | def create_attributes(layer) -> Dict: 11 | coordinate_transformation_mode = 'half_pixel' 12 | cubic_coeff_a = -0.75 13 | exclude_outside = 0 14 | extrapolation_value = 0.0 15 | mode = 'nearest' 16 | nearest_mode = 'round_prefer_floor' 17 | attributes = { 18 | "coordinate_transformation_mode": coordinate_transformation_mode, 19 | "cubic_coeff_a": cubic_coeff_a, 20 | "exclude_outside": exclude_outside, 21 | "extrapolation_value": extrapolation_value, 22 | "mode": mode, 23 | "nearest_mode" :nearest_mode 24 | } 25 | return attributes 26 | 27 | 28 | def caculate_output_shape(layer, input_shape) -> List: 29 | scale = layer.upsample_param.scale 30 | scales = [1.0,1.0,scale,scale] 31 | output_shape = [np.multiply(np.array(scales,dtype=np.int),np.array(input_shape[0])).tolist()] 32 | return output_shape 33 | 34 | 35 | 36 | def create_resize_node(layer, 37 | node_name: str, 38 | inputs_name: List[str], 39 | outputs_name: List[str], 40 | inputs_shape: List, ) -> onnx.NodeProto: 41 | attributes = create_attributes(layer) 42 | 43 | outputs_shape = caculate_output_shape(layer, inputs_shape) 44 | 45 | node = Node.c2oNode(layer, node_name, "Resize", 46 | inputs_name, outputs_name, 47 | inputs_shape, outputs_shape, 48 | attributes) 49 | return node 50 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Shuffle.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getReshapeOutShape(layer, input_shape): 5 | try: 6 | re_shape = layer.reshape_param.shape.dim 7 | except Exception as e: 8 | re_shape = [] 9 | 10 | in_prod = 1 11 | for dim in input_shape[0]: 12 | in_prod = in_prod * dim 13 | if re_shape == []: 14 | output_shape = [[1, in_prod]] 15 | else: 16 | output_shape = re_shape 17 | for i in range(len(re_shape)): 18 | if re_shape[i] == 0: 19 | output_shape[i] = input_shape[0][i] 20 | 21 | for j in range(len(output_shape)): 22 | if output_shape[j] == -1: 23 | for d in output_shape: 24 | in_prod = in_prod / d 25 | output_shape[j] = int(in_prod * -1) 26 | output_shape = [output_shape] 27 | return output_shape 28 | 29 | 30 | def createShuffle(layer, nodename, inname, outname, input_shape): 31 | output_shape = getReshapeOutShape(layer, input_shape) 32 | node = Node.c2oNode(layer, nodename, "Reshape", inname, outname, input_shape, output_shape) 33 | return node 34 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Sigmoid.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getOutShape(input_shape): 5 | output_shape = input_shape 6 | return output_shape 7 | 8 | 9 | def createSigmoid(layer, nodename, inname, outname, input_shape): 10 | output_shape = getOutShape(input_shape) 11 | 12 | node = Node.c2oNode(layer, nodename, "Sigmoid", inname, outname, input_shape, output_shape) 13 | 14 | return node 15 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Slice.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | def analyzeLayer(layer, input_shape): 4 | 5 | axis = layer.slice_param.axis 6 | starts = [0] 7 | axes = [axis] 8 | for step in layer.slice_param.slice_point: 9 | starts.append(step) 10 | axes.append(axis) 11 | 12 | ends = [] 13 | for step in layer.slice_param.slice_point: 14 | ends.append(step) 15 | 16 | ends.append(input_shape[0][axis]) 17 | 18 | 19 | return starts, ends, axes 20 | 21 | 22 | # 计算输出维度 23 | # def getSliceOutShape(layer, input_shape, output_name): 24 | # # TODO: 25 | # steps = [] 26 | # for step in layer.slice_param.slice_point: 27 | # steps.append(step) 28 | # # slice point 29 | # assert len(steps) == len(output_name) - 1 30 | # # 轴 31 | # axis = layer.concat_param.axis 32 | # start = 0 33 | # n, c, w, h = input_shape[0][0], 0, input_shape[0][2], input_shape[0][3] 34 | # # 计算总体的值 35 | # output_shape = [[]] 36 | # sum = input_shape[0][1] 37 | # if (axis == 1): 38 | # for step in steps: 39 | # # update start 40 | # c = step - start 41 | # output_shape.append([n, c, w, h]) 42 | # start = step 43 | # output_shape.append([n, sum - start, w, h]) 44 | # return output_shape[1:] 45 | 46 | 47 | # def getSliceAttri(layer, start, end, axes): 48 | # attributs = { 49 | # 'starts': [start], 50 | # 'ends': [end], 51 | # 'axes': [axes], 52 | # } 53 | # return attributs 54 | 55 | 56 | def getSliceOutShape(input_shape, start, end): 57 | if len(input_shape[0]) == 4: 58 | output_shape = [[input_shape[0][0], end - start, input_shape[0][2], input_shape[0][3]]] 59 | elif len(input_shape[0]) == 2: 60 | output_shape = [[input_shape[0][0], end - start]] 61 | else: 62 | print("Unsupport slice shape") 63 | exit(-1) 64 | 65 | return output_shape 66 | 67 | 68 | def createSlice(layer, node_name, input_name, output_name, input_shape, start, end): 69 | 70 | output_shape = getSliceOutShape(input_shape, start, end) 71 | 72 | node = Node.c2oNode(layer, node_name, "Slice", input_name, output_name, input_shape, output_shape, Flag=True) 73 | return node 74 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Softmax.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getSoftmaxAttri(layer): 5 | 6 | axis = layer.softmax_param.axis 7 | 8 | dict = {"axis": axis} 9 | return dict 10 | 11 | 12 | def getSoftmaxOutShape(input_shape): 13 | output_shape = input_shape 14 | return output_shape 15 | 16 | 17 | def createSoftmax(layer, nodename, inname, outname, input_shape): 18 | dict = getSoftmaxAttri(layer) 19 | output_shape = getSoftmaxOutShape(input_shape) 20 | 21 | node = Node.c2oNode(layer, nodename, "Softmax", inname, outname, input_shape, output_shape, dict) 22 | return node 23 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Tanh.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | 3 | 4 | def getOutShape(input_shape): 5 | return input_shape 6 | 7 | 8 | def createTanh(layer, nodename, inname, outname, input_shape): 9 | output_shape = getOutShape(input_shape) 10 | node = Node.c2oNode(layer, nodename, "Tanh", inname, 11 | outname, input_shape, output_shape) 12 | return node 13 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Transpose.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import typing 3 | 4 | 5 | def getTransposeAttri(layer) -> typing.Dict: 6 | if layer.type == "ShuffleChannel": 7 | 8 | perm_array = [0, 2, 1, 3, 4] 9 | attributes = {"perm": perm_array} 10 | return attributes 11 | else: 12 | orders = layer.permute_param.order 13 | attributes = {"perm": orders} 14 | return attributes 15 | 16 | 17 | def getTransposeOutShape(layer, input_shape, attributes): 18 | if layer.type == "ShuffleChannel": 19 | n, g, c, h, w = input_shape[0][0], input_shape[0][1], input_shape[0][2], input_shape[0][3], input_shape[0][4] 20 | 21 | output_shape = [[n, c, g, h, w]] 22 | return output_shape 23 | else: 24 | orders = attributes.get("perm") 25 | shape = [] 26 | for order in orders: 27 | shape.append(input_shape[0][order]) 28 | return [shape] 29 | 30 | 31 | def createTranspose(layer, node_name, input_name, output_name, input_shape) -> Node: 32 | attributes = getTransposeAttri(layer) 33 | 34 | output_shape = getTransposeOutShape(layer, input_shape, attributes) 35 | 36 | node = Node.c2oNode(layer, node_name, "Transpose", input_name, output_name, input_shape, output_shape, attributes) 37 | return node 38 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/UnPooling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import caffe2onnx.src.c2oObject as Node 3 | 4 | 5 | def getUnPoolingAttri(layer): 6 | # ##池化核尺寸 7 | # kernel_shape = np.array([layer.pooling_param.kernel_size]*2).reshape(1,-1)[0].tolist() 8 | # if layer.pooling_param.kernel_size == []: 9 | # kernel_shape = [layer.pooling_param.kernel_h,layer.pooling_param.kernel_w] 10 | # ##步长 11 | # strides = [1, 1]#默认为1 12 | # if layer.pooling_param.stride != []: 13 | # strides = np.array([layer.pooling_param.stride]*2).reshape(1,-1)[0].tolist() 14 | # ##填充 15 | # pads = [0, 0, 0, 0]#默认为0 16 | # # 这里与卷积时一样,有pad,就按其值设置 17 | # if layer.pooling_param.pad != []: 18 | # pads = np.array([layer.pooling_param.pad] * 4).reshape(1, -1)[0].tolist() 19 | # elif layer.pooling_param.pad_h != 0 or layer.pooling_param.pad_w != 0: 20 | # pads = [layer.pooling_param.pad_h,layer.pooling_param.pad_w,layer.pooling_param.pad_h,layer.pooling_param.pad_w] 21 | 22 | 23 | dict = {"kernel_shape": [2, 2], 24 | "strides": [2, 2], 25 | "pads": [0, 0, 0, 0] 26 | } 27 | return dict 28 | 29 | 30 | def getUnPoolingOutShape(input_shape,layer,dict): 31 | kernel_shape = dict["kernel_shape"] 32 | pads = dict["pads"] 33 | strides = dict["strides"] 34 | 35 | # h = (input_shape[0][2] - kernel_shape[0] + 2 * pads[0])/strides[0] + 1 36 | # if h > int(h): 37 | # output_shape_h = int(h) + 1 38 | # pads = [0,0,1,1] 39 | # else: 40 | # output_shape_h = int(h) 41 | # output_shape = [[input_shape[0][0],input_shape[0][1],output_shape_h,output_shape_h]] 42 | 43 | output_shape = [[input_shape[0][0], input_shape[0][1], input_shape[0][2]*2, input_shape[0][3]*2]] 44 | 45 | return output_shape 46 | 47 | 48 | def createUnPooling(layer,nodename,inname,outname,input_shape): 49 | dict = getUnPoolingAttri(layer) 50 | output_shape = getUnPoolingOutShape(input_shape,layer,dict) 51 | 52 | node = Node.c2oNode(layer, nodename, "MaxUnpool", inname, outname, input_shape, output_shape, dict=dict) 53 | 54 | return node 55 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/Upsample.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.c2oObject as Node 2 | import numpy as np 3 | 4 | 5 | def get_upsample_attri(layer): 6 | # scale = layer.upsample_param.scale 7 | # scales = [1.0,1.0,scale,scale] 8 | # dict = {"scales":scales,"mode":"nearest"} 9 | # dict = {"width_scale": scale,"height_scale":scale, "mode": "nearest"} 10 | scale = layer.upsample_param.scale 11 | scales = [1.0, 1.0, scale, scale] 12 | 13 | attributes = {"mode": "linear", 14 | 'scales': scales} 15 | 16 | return attributes 17 | 18 | 19 | def get_upsample_outputshape(input_shape, layer): 20 | scale = layer.upsample_param.scale 21 | scales = [1.0, 1.0, scale, scale] 22 | output_shape = [np.multiply(np.array(scales, dtype=np.int), np.array(input_shape[0])).tolist()] 23 | return output_shape 24 | 25 | 26 | def create_upsample_node(layer, node_name, input_name, output_name, input_shape): 27 | attributes = get_upsample_attri(layer) 28 | output_shape = get_upsample_outputshape(input_shape, layer) 29 | 30 | # print(output_shape) 31 | node = Node.c2oNode(layer, node_name, "Upsample", input_name, output_name, input_shape, output_shape, attributes) 32 | return node 33 | -------------------------------------------------------------------------------- /caffe2onnx/src/OPs/__init__.py: -------------------------------------------------------------------------------- 1 | from caffe2onnx.src.OPs.BatchNorm import * 2 | from caffe2onnx.src.OPs.Concat import * 3 | from caffe2onnx.src.OPs.Conv import * 4 | from caffe2onnx.src.OPs.Dropout import * 5 | from caffe2onnx.src.OPs.Eltwise import * 6 | from caffe2onnx.src.OPs.Gemm import * 7 | from caffe2onnx.src.OPs.LRN import * 8 | from caffe2onnx.src.OPs.Pooling import * 9 | from caffe2onnx.src.OPs.PRelu import * 10 | from caffe2onnx.src.OPs.ReLU import * 11 | from caffe2onnx.src.OPs.Reshape import * 12 | from caffe2onnx.src.OPs.Softmax import * 13 | from caffe2onnx.src.OPs.Upsample import * 14 | from caffe2onnx.src.OPs.UnPooling import * 15 | from caffe2onnx.src.OPs.ConvTranspose import * 16 | from caffe2onnx.src.OPs.Slice import * 17 | from caffe2onnx.src.OPs.Transpose import * 18 | from caffe2onnx.src.OPs.Sigmoid import * 19 | from caffe2onnx.src.OPs.Min import * 20 | from caffe2onnx.src.OPs.Clip import * 21 | from caffe2onnx.src.OPs.Log import * 22 | from caffe2onnx.src.OPs.Mul import * 23 | from caffe2onnx.src.OPs.Interp import * 24 | from caffe2onnx.src.OPs.Crop import * 25 | from caffe2onnx.src.OPs.InstanceNorm import * 26 | from caffe2onnx.src.OPs.PriroBox import create_priorbox_node 27 | from caffe2onnx.src.OPs.DetectionOutput import create_detection_output 28 | from caffe2onnx.src.OPs.Flatten import create_flatten_node 29 | from caffe2onnx.src.OPs.Resize import create_resize_node 30 | from caffe2onnx.src.OPs.Axpy import create_axpy_add_node, create_axpy_mul_node 31 | from caffe2onnx.src.OPs.LpNormalization import create_Lp_Normalization 32 | from caffe2onnx.src.OPs.Power import get_power_param, create_power_node 33 | from caffe2onnx.src.OPs.Add import create_add_node 34 | from caffe2onnx.src.OPs.Tanh import createTanh 35 | 36 | -------------------------------------------------------------------------------- /caffe2onnx/src/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /caffe2onnx/src/args_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def parse_args(): 4 | parser = argparse.ArgumentParser(description='Convert Caffe model to ONNX.') 5 | 6 | parser.add_argument("--prototxt", 7 | type=str, required=True, 8 | help="input .prototxt") 9 | 10 | parser.add_argument("--caffemodel", 11 | type=str, 12 | required=False, 13 | help="input .caffemodel") 14 | 15 | parser.add_argument("--onnx", 16 | type=str, 17 | required=False, 18 | help="output .onnx") 19 | 20 | parser.add_argument("--frozen", 21 | type=bool, required=False, 22 | help="frozen graph or not") 23 | 24 | args = parser.parse_args() 25 | return args -------------------------------------------------------------------------------- /caffe2onnx/src/c2oObject.py: -------------------------------------------------------------------------------- 1 | from onnx import helper 2 | 3 | 4 | class c2oNode(object): 5 | def __init__(self,layer,node_name,type,inputs_name,outputs_name,inputs_shape,outputs_shape,dict={}, Flag=False): 6 | self.node = self.__createNode(type, inputs_name, outputs_name, node_name, dict) 7 | 8 | self.bottom = layer.bottom 9 | if Flag is True: 10 | self.top = outputs_name 11 | else: 12 | self.top = layer.top 13 | 14 | self.inputs_name = inputs_name 15 | self.outputs_name = outputs_name 16 | self.inputs_shape = inputs_shape 17 | self.outputs_shape = outputs_shape 18 | 19 | self.dict = dict 20 | 21 | # Create a node 22 | def __createNode(self, node_type, in_name, out_name, node_name, dict): 23 | node_def = helper.make_node( 24 | node_type, 25 | in_name, 26 | out_name, 27 | node_name, 28 | **dict, 29 | ) 30 | return node_def 31 | 32 | class c2oGraph(): 33 | def __init__(self,onnxname): 34 | self.name = onnxname 35 | self.in_tvi = [] # Store input information 36 | self.out_tvi = [] # Store output information 37 | self.init_t = [] # Store the value of the input parameter 38 | self.hidden_out_tvi = [] # Store intermediate output information 39 | 40 | def addInputsTVI(self,in_tvi): 41 | self.in_tvi.append(in_tvi) 42 | def addOutputsTVI(self,out_tvi): 43 | self.out_tvi.append(out_tvi) 44 | def addInitTensor(self,init_t): 45 | self.init_t.append(init_t) 46 | def addValueInfoTVI(self,vi_tvi): 47 | self.hidden_out_tvi.append(vi_tvi) 48 | -------------------------------------------------------------------------------- /caffe2onnx/src/caffe2onnx.py: -------------------------------------------------------------------------------- 1 | import caffe2onnx.src.OPs as op 2 | from caffe2onnx.src.c2oObject import * 3 | from onnx import helper 4 | import copy 5 | import numpy as np 6 | from caffe2onnx.src.op_layer_info import * 7 | import random 8 | import sys 9 | from typing import * 10 | import onnx 11 | 12 | class Caffe2Onnx(): 13 | def __init__(self, net, model, onnxname): 14 | # Initialize a c2oGraph object 15 | self.onnxmodel = c2oGraph(onnxname) 16 | # Network and parameters 17 | self.netLayerCaffe = self.GetNetLayerCaffe(net) 18 | self.netModelCaffe = self.GetNetModelCaffe(model) 19 | 20 | # Model input name and input dimension 21 | self.model_input_name = [] 22 | self.model_input_shape = [] 23 | 24 | # Node list 25 | self.onnxNodeList = [] 26 | 27 | # Get layer list 28 | LayerList = self.AddInputsTVIAndGetLayerList(net) 29 | self.GenerateOnnxNodeList(LayerList) 30 | self.AddOutputsTVIAndValueInfo() 31 | 32 | # Get the network layer 33 | def GetNetLayerCaffe(self, net): 34 | if len(net.layer) == 0 and len(net.layers) != 0: 35 | return net.layers 36 | elif len(net.layer) != 0 and len(net.layers) == 0: 37 | return net.layer 38 | else: 39 | print("prototxt layer error") 40 | return -1 41 | 42 | # Get parameter layer 43 | def GetNetModelCaffe(self, model): 44 | if len(model.layer) == 0 and len(model.layers) != 0: 45 | return model.layers 46 | elif len(model.layer) != 0 and len(model.layers) == 0: 47 | return model.layer 48 | else: 49 | print("caffemodel layer error") 50 | return -1 51 | 52 | # Add model input information to Inputs and get a list of subsequent layers 53 | def AddInputsTVIAndGetLayerList(self, net): 54 | # If the type of the first layer is Input, and no net.input exists 55 | if net.input == [] and self.netLayerCaffe[0].type == "Input": 56 | layer_list = [] 57 | # Considering that the entire network will have multiple inputs 58 | for lay in self.netLayerCaffe: 59 | if lay.type == "Input": 60 | if len(lay.top) == 1 and lay.top[0] != lay.name: 61 | input_layer_name = lay.top[0] 62 | else: 63 | input_layer_name = lay.name 64 | 65 | in_tvi = helper.make_tensor_value_info( 66 | input_layer_name + "_input", TensorProto.FLOAT, 67 | lay.input_param.shape[0].dim) 68 | 69 | self.model_input_name.append(input_layer_name + "_input") 70 | self.model_input_shape.append(lay.input_param.shape[0].dim) 71 | self.onnxmodel.addInputsTVI(in_tvi) 72 | else: 73 | layer_list.append(lay) 74 | return layer_list 75 | 76 | # If net.input exists 77 | elif net.input != []: 78 | 79 | if bool(net.input_dim): 80 | input_dim = net.input_dim 81 | elif bool(net.input_shape): 82 | input_dim = net.input_shape[0].dim 83 | else: 84 | raise RuntimeError("Input shape missing!") 85 | 86 | in_tvi = helper.make_tensor_value_info("input", TensorProto.FLOAT, input_dim) 87 | self.model_input_name.append("input") 88 | self.model_input_shape.append(input_dim) 89 | self.onnxmodel.addInputsTVI(in_tvi) 90 | return self.netLayerCaffe 91 | 92 | # None of the above situations, then the caffe model has no input, there is a problem 93 | else: 94 | raise ValueError("the caffe model has no input") 95 | 96 | # Get the parameter shape of layer 97 | def GetParamsShapeAndData(self, layer): 98 | ParamShape = [] 99 | ParamData = [] 100 | # According to the layer name, find out the parameters in the corresponding caffemodel 101 | for model_layer in self.netModelCaffe: 102 | if layer.name == model_layer.name: 103 | Params = copy.deepcopy(model_layer.blobs) 104 | ParamShape = [p.shape.dim for p in Params] 105 | ParamData = [p.data for p in Params] 106 | if layer.type == "BatchNorm" or layer.type == "BN": 107 | if len(ParamShape) == 3: 108 | # If it is a bn layer, the sliding coefficient of the last layer is not used 109 | ParamShape = ParamShape[:-1] 110 | ParamData = ParamData[:-1] 111 | elif len(ParamShape) == 2 and len(ParamShape[0]) != 1: 112 | ParamShape = [[ParamShape[0][1]], [ParamShape[1][1]]] 113 | ParamData = ParamData 114 | return ParamShape, ParamData 115 | 116 | def get_param_shape(self, params): 117 | shapes = [] 118 | for p in params: 119 | if p.shape.dim != []: 120 | shape = p.shape.dim 121 | shapes.append(shape) 122 | else: 123 | shape = [p.num, p.channels, p.height, p.width] 124 | shapes.append(shape) 125 | return shapes 126 | 127 | # Add parameters to Inputs and generate tensor storage data 128 | def AddInputsTVIFromParams(self, layer, ParamName, ParamType): 129 | ParamShape = [] 130 | ParamData = [] 131 | # Find out the parameters in the corresponding caffemodel based on the layer name 132 | for model_layer in self.netModelCaffe: 133 | if layer.name == model_layer.name: 134 | Params = copy.deepcopy(model_layer.blobs) 135 | #ParamShape = [p.shape.dim for p in Params] 136 | ParamShape = self.get_param_shape(Params) 137 | ParamData = [p.data for p in Params] 138 | if layer.type == "BatchNorm" or layer.type == "BN": 139 | if len(ParamShape) == 3: 140 | # If it is bn layer and params is [mean, var, s], you need to divide mean and var by sliding coefficient s 141 | ParamShape = ParamShape[:-1] 142 | ParamData = [ 143 | [q / (Params[-1].data[0]) 144 | for q in p.data] if i == 0 else 145 | [q / (Params[-1].data[0] + 1e-5) for q in p.data] 146 | for i, p in enumerate(Params[:-1]) 147 | ] # with s 148 | elif len(ParamShape) == 2 and len(ParamShape[0]) == 4: 149 | ParamShape = [[ParamShape[0][1]], [ParamShape[1][1]]] 150 | ParamData = [[q / 1. for q in p.data] if i == 0 else 151 | [q / (1. + 1e-5) for q in p.data] 152 | for i, p in enumerate(Params)] 153 | if layer.type == "Reshape": 154 | ParamShape = [[len(model_layer.reshape_param.shape.dim)]] 155 | ParamData = [model_layer.reshape_param.shape.dim] 156 | if layer.type == "Convolution" or layer.type == "ConvolutionDepthwise": 157 | if len(ParamShape) == 2: 158 | ParamShape[1] = [ParamShape[0][0]] 159 | if layer.type == "InnerProduct": 160 | if len(ParamShape[0]) > 2: 161 | ParamShape[0] = [ParamShape[0][2], ParamShape[0][3]] 162 | if len(ParamShape) == 2: 163 | if len(ParamShape[1]) > 2: 164 | ParamShape[1] = [ParamShape[1][2], ParamShape[1][3]] 165 | if layer.type == "Normalize": 166 | if len(ParamShape) == 1: 167 | ParamShape[0] = [1, ParamShape[0][0], 1, 1] 168 | 169 | # comment it for tvm because tvm use broadcast at prelu layer 170 | # if layer.type == 'PReLU': 171 | # ParamShape = [[ParamShape[0][0], 1, 1]] 172 | 173 | break 174 | 175 | # Judge whether there is Param 176 | if ParamShape != []: 177 | ParamName = ParamName[0:len(ParamShape)] 178 | ParamType = ParamType[0:len(ParamShape)] 179 | for i in range(len(ParamShape)): 180 | ParamName[i] = layer.name + ParamName[i] 181 | p_tvi = helper.make_tensor_value_info(ParamName[i], 182 | ParamType[i], 183 | ParamShape[i]) 184 | p_t = helper.make_tensor(ParamName[i], ParamType[i], 185 | ParamShape[i], ParamData[i]) 186 | self.onnxmodel.addInputsTVI(p_tvi) 187 | self.onnxmodel.addInitTensor(p_t) 188 | #print("add parameters " + Param_Name[i] + " input information and tensor data") 189 | if layer.type == "BatchNorm" or layer.type == "BN" or layer.type == "Scale": 190 | return ParamName, ParamShape 191 | return ParamName 192 | 193 | # Manually add parameters to the input information and generate tensor storage data 194 | def AddInputsTVIMannul(self, layer, param_names, param_types, param_shapes, 195 | param_data): 196 | node_names = copy.deepcopy(param_names) 197 | for i in range(len(param_shapes)): 198 | node_names[i] = layer.name + param_names[i] 199 | p_tvi = helper.make_tensor_value_info(node_names[i], 200 | param_types[i], 201 | param_shapes[i]) 202 | p_t = helper.make_tensor(node_names[i], param_types[i], 203 | param_shapes[i], param_data[i]) 204 | self.onnxmodel.addInputsTVI(p_tvi) 205 | self.onnxmodel.addInitTensor(p_t) 206 | return node_names 207 | # # Due to the special input of Slice, special processing is required 208 | # if layer.type == 'Slice': 209 | # for i in range(len(ParamShape)): 210 | # p_tvi = helper.make_tensor_value_info(Param_Name[i], ParamType[i], ParamShape[i]) 211 | # p_t = helper.make_tensor(Param_Name[i], ParamType[i], ParamShape[i], ParamData[i]) 212 | # self.onnxmodel.addInputsTVI(p_tvi) 213 | # self.onnxmodel.addInitTensor(p_t) 214 | # return Param_Name 215 | # else: 216 | # for i in range(len(ParamShape)): 217 | # Param_Name[i] = layer.name + ParamName[i] 218 | # p_tvi = helper.make_tensor_value_info(Param_Name[i], ParamType[i], ParamShape[i]) 219 | # p_t = helper.make_tensor(Param_Name[i], ParamType[i], ParamShape[i], ParamData[i]) 220 | # self.onnxmodel.addInputsTVI(p_tvi) 221 | # self.onnxmodel.addInitTensor(p_t) 222 | # return Param_Name 223 | 224 | # Get the output name of the previous layer (that is, the input of the current layer) 225 | def GetLastLayerOutNameAndShape(self, layer): 226 | output_name = [] 227 | outshape = [] 228 | # flag is True: The input of the model is not overwritten 229 | # flag is False: The input of the model has been overwritten 230 | flag = True 231 | 232 | # If the node list is empty, or the bottom of the current layer is in input_name, then the input of the previous layer must be Input 233 | if self.onnxNodeList == []: 234 | output_name += self.model_input_name 235 | outshape += self.model_input_shape 236 | 237 | else: 238 | for i in range(len(layer.bottom)): 239 | 240 | # Because top and bottom have the same name in prototxt, but layer.bottom can only correspond to one node, so for each layer.bottom, 241 | # find the last node with the same name as the upper node 242 | name = None 243 | shape = None 244 | for node in self.onnxNodeList: 245 | for j in range(len(node.top) if node.node.op_type != "MaxPool" else 1): 246 | if layer.bottom[i] == node.top[j]: 247 | name = node.outputs_name[j] 248 | shape = node.outputs_shape[j] 249 | for k in range(len(node.bottom)): 250 | if node.top[j] == node.bottom[k]: 251 | for w in range(len(self.model_input_name)): 252 | if node.top[j] + '_input' == self.model_input_name[w]: 253 | flag = False 254 | 255 | for j in range(len(self.model_input_name)): 256 | if layer.bottom[i] + '_input' == self.model_input_name[j] and flag: 257 | output_name.append(self.model_input_name[j]) 258 | outshape.append(self.model_input_shape[j]) 259 | 260 | if name: 261 | output_name.append(name) 262 | outshape.append(shape) 263 | 264 | try: 265 | assert output_name, "Failed at layer %s, layer's bottom not detected ..." % (layer.name) 266 | except: 267 | print("Failed at layer %s, layer's bottom not detected ..." % (layer.name)) 268 | exit(-1) 269 | return output_name, outshape 270 | 271 | # Get the output name of the current layer, that is, layer name 272 | def GetCurrentLayerOutName(self, layer): 273 | # return [layer.name] 274 | # Consider the case of multiple outputs 275 | # # TODO: Why use layer.name instead? 276 | if layer.top == layer.bottom and len(layer.top) == 1: 277 | return [layer.name] 278 | return [out for out in layer.top] 279 | 280 | 281 | def GenerateOnnxNodeList(self, Layers): 282 | for i in range(len(Layers)): 283 | print("convert layer: " + Layers[i].name) 284 | # Convolution 285 | if Layers[i].type == "Convolution" or Layers[i]. type == Layer_CONVOLUTION: 286 | # 1.Get node input name, input dimension, output name, node name 287 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 288 | output_name = self.GetCurrentLayerOutName(Layers[i]) 289 | node_name = Layers[i].name 290 | 291 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 292 | conv_pname = self.AddInputsTVIFromParams(Layers[i], op_pname["Conv"], op_ptype["Conv"]) 293 | input_name.extend(conv_pname) 294 | 295 | # 3.Build conv_node 296 | conv_node = op.createConv(Layers[i], node_name, input_name, output_name, input_shape) 297 | 298 | # 4.Add node to node list 299 | self.onnxNodeList.append(conv_node) 300 | 301 | elif Layers[i].type == "ConvolutionDepthwise" or Layers[i].type == Layer_CONVOLUTION: 302 | # 1.Get node input name, input dimension, output name, node name 303 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 304 | output_name = self.GetCurrentLayerOutName(Layers[i]) 305 | node_name = Layers[i].name 306 | 307 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 308 | conv_pname = self.AddInputsTVIFromParams(Layers[i], op_pname["Conv"], op_ptype["Conv"]) 309 | input_name.extend(conv_pname) 310 | 311 | # 3.Build conv_node 312 | conv_node = op.createConv(Layers[i], node_name, input_name, output_name, input_shape) 313 | 314 | # 4.Add node to node list 315 | self.onnxNodeList.append(conv_node) 316 | 317 | # BatchNorm+Scale 318 | elif Layers[i].type == "BatchNorm" or Layers[i].type == "BN": 319 | # 1.Get node input name, input dimension, output name, node name 320 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 321 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 322 | node_name = Layers[i].name 323 | 324 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 325 | if i < len(Layers) - 1 and Layers[i + 1].type == "Scale": 326 | scale_pname, scale_pshape = self.AddInputsTVIFromParams(Layers[i + 1], op_pname["Scale"], 327 | op_ptype["Scale"]) 328 | bn_pname, bn_pshape = self.AddInputsTVIFromParams(Layers[i], op_pname["BatchNorm"], 329 | op_ptype["BatchNorm"]) 330 | assert bn_pshape == scale_pshape, "BatchNorm and Scale params should share the same shape" 331 | input_name.extend(scale_pname) 332 | input_name.extend(bn_pname) 333 | else: 334 | bn_pshape, _ = self.GetParamsShapeAndData(Layers[i]) 335 | custom_params = [np.ones(shape=bn_pshape[0], dtype=np.float), 336 | 0.001 + np.zeros(shape=bn_pshape[1], dtype=np.float)] 337 | scale_pname = self.AddInputsTVIMannul(Layers[i], op_pname["Scale"], op_ptype["Scale"], bn_pshape, 338 | custom_params) 339 | bn_pname, bn_pshape = self.AddInputsTVIFromParams(Layers[i], op_pname["BatchNorm"], 340 | op_ptype["BatchNorm"]) 341 | input_name.extend(scale_pname) 342 | input_name.extend(bn_pname) 343 | 344 | # 3.Build bn_node 345 | bn_node = op.createBN(Layers[i], node_name, input_name, output_name, input_shape) 346 | 347 | # 4.Add node to node list 348 | self.onnxNodeList.append(bn_node) 349 | 350 | elif Layers[i].type == "Scale": 351 | if i > 0 and (Layers[i - 1].type == "BatchNorm" or Layers[i - 1].type == "BN"): 352 | # bn + scale 353 | continue 354 | # signal scale 355 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 356 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 357 | # node_name = Layers[i].name + random.choice('1234567890abcdefghijklmnopqrst') 358 | node_name = Layers[i].name 359 | has_two_input: bool = False 360 | if len(input_name) > 1: 361 | has_two_input = True 362 | 363 | if has_two_input and op.need_add_reshape(input_shape): 364 | reshape_layer = copy.deepcopy(Layers[i]) 365 | # add reshape layer 366 | reshape_node_name = input_name[1] + '_reshap_' + random.choice('1234567890abcdefghijklmnopqrst') 367 | 368 | reshape_input_name = input_name[1] 369 | reshape_input_shape = input_shape[1] 370 | 371 | reshape_shape_data = op.get_param_shape(input_shape) 372 | reshape_shape_shape = np.shape(reshape_shape_data) 373 | 374 | reshape_params = self.AddInputsTVIMannul(Layers[i], [reshape_node_name + 'shape'], [TensorProto.INT64], 375 | [reshape_shape_shape], [reshape_shape_data]) 376 | 377 | reshape_output_name = [reshape_input_name + '_output_name'] 378 | 379 | 380 | reshape_node = op.createReshape(reshape_layer, reshape_node_name, [reshape_input_name, reshape_params[0]], 381 | reshape_output_name, reshape_input_shape, output_shape=[reshape_shape_data]) 382 | 383 | self.onnxNodeList.append(reshape_node) 384 | 385 | # add mul node 386 | input_name[1] = reshape_output_name[0] 387 | input_shape[1] = reshape_shape_data 388 | mul_node = op.create_mul_node(Layers[i], node_name, input_name, output_name, input_shape) 389 | 390 | self.onnxNodeList.append(mul_node) 391 | else: 392 | param_shape, param_data = self.GetParamsShapeAndData(Layers[i]) 393 | # Scale = Mul + Add 394 | if len(param_shape) == 2: 395 | # create mul 396 | param_scale_shape = [1, param_shape[0][0], 1, 1] 397 | param_scale_data = param_data[0] 398 | param_scale_name = self.AddInputsTVIMannul(Layers[i], ["_scale"], [TensorProto.FLOAT], [param_scale_shape], [param_scale_data]) 399 | 400 | mul_node_name = node_name + "_mul" 401 | mul_input_name = [input_name[0], param_scale_name[0]] 402 | mul_output_name = [output_name[0] + "_mul"] 403 | mul_input_shape = [input_shape[0], param_scale_shape] 404 | 405 | mul_node = op.create_mul_node(Layers[i], mul_node_name, mul_input_name, mul_output_name, mul_input_shape) 406 | self.onnxNodeList.append(mul_node) 407 | 408 | param_bias_shape = [1, param_shape[1][0], 1, 1] 409 | param_bias_data = param_data[1] 410 | param_bias_name = self.AddInputsTVIMannul(Layers[i], ["_bias"], [TensorProto.FLOAT], [param_bias_shape], [param_bias_data]) 411 | 412 | add_node_name = node_name + "_add" 413 | add_input_name = [mul_output_name[0], param_bias_name[0]] 414 | add_output_name = output_name 415 | add_input_shape = [input_shape[0], param_bias_shape] 416 | add_node = op.create_add_node(Layers[i], add_node_name, add_input_name, add_output_name, add_input_shape) 417 | self.onnxNodeList.append(add_node) 418 | # Scale = Mul 419 | if len(param_shape) == 1: 420 | # create mul 421 | param_scale_shape = [1, param_shape[0][0], 1, 1] 422 | param_scale_data = param_data[0] 423 | param_scale_name = self.AddInputsTVIMannul( 424 | Layers[i], ["_scale"], [TensorProto.FLOAT], 425 | [param_scale_shape], [param_scale_data]) 426 | 427 | mul_input_name = [input_name[0], param_scale_name[0]] 428 | mul_input_shape = [input_shape[0], param_scale_shape] 429 | 430 | mul_node = op.create_mul_node(Layers[i], node_name, 431 | mul_input_name, 432 | output_name, 433 | mul_input_shape) 434 | self.onnxNodeList.append(mul_node) 435 | 436 | # Pooling 437 | elif Layers[i].type == "Pooling" or Layers[i].type == Layer_POOLING: 438 | # TODO: 439 | # Pooling <= Pad + Pool 440 | # NOTE: Because Caffe and ONNX handle the AveragePool differently, you need to add the Pad node before the pool node 441 | # 1.Get node input name, input dimension, output name, node name 442 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 443 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 444 | node_name = Layers[i].name 445 | # create pad node 446 | pads = op.get_pool_pads(Layers[i]) 447 | pads_shape = [np.shape(pads)] 448 | pads_name = node_name + "_output" 449 | pads_output_name = [node_name + "_output"] 450 | pad_output_shape = op.calculate_pad_output_shape(input_shape, pads) 451 | pads_param = self.AddInputsTVIMannul(Layers[i], ["_pad"], [TensorProto.INT64], pads_shape, [pads]) 452 | input_name.extend(pads_param) 453 | 454 | pool_type = op.pooling_type(Layers[i]) 455 | if pool_type == "GlobalMaxPool" or pool_type == "MaxPool": 456 | constant_value = [-sys.float_info.max] 457 | constant_shape = [np.shape(constant_value)] 458 | 459 | constant_value_param = self.AddInputsTVIMannul(Layers[i], ["_constant_value"], [TensorProto.FLOAT], 460 | constant_shape, [constant_value]) 461 | input_name.extend(constant_value_param) 462 | 463 | pad_node = op.create_pad_node(Layers[i], pads_name, input_name, pads_output_name, input_shape) 464 | self.onnxNodeList.append(pad_node) 465 | 466 | # 2.Build pool_node 467 | pool_node = op.create_pooling_node(Layers[i], node_name, pads_output_name, output_name, 468 | pad_output_shape) 469 | 470 | # 3.Add node to node list 471 | self.onnxNodeList.append(pool_node) 472 | 473 | 474 | # MaxUnPool 475 | elif Layers[i].type == "MaxUnpool": 476 | # 1.Get node input name, input dimension, output name, node name 477 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 478 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 479 | node_name = Layers[i].name 480 | 481 | # 2.Build unpool_node 482 | unpool_node = op.createUnPooling(Layers[i], node_name, input_name, output_name, input_shape) 483 | 484 | # 3.Add node to node list 485 | self.onnxNodeList.append(unpool_node) 486 | 487 | 488 | # Eltwise 489 | elif Layers[i].type == "Eltwise" or Layers[i].type == Layer_ELTWISE: 490 | # 1.Get node input name, input dimension, output name, node name 491 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 492 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 493 | 494 | node_name = Layers[i].name 495 | 496 | # 2.Build eltwise_node 497 | eltwise_node = op.createEltwise(Layers[i], node_name, input_name, output_name, input_shape) 498 | 499 | # 3.Add node to node list 500 | self.onnxNodeList.append(eltwise_node) 501 | 502 | 503 | # Softmax 504 | elif Layers[i].type == "Softmax" or Layers[i].type == Layer_SOFTMAX: 505 | # 1.Get node input name, input dimension, output name, node name 506 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 507 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 508 | node_name = Layers[i].name 509 | 510 | # 2.Build softmax_node 511 | softmax_node = op.createSoftmax(Layers[i], node_name, input_name, output_name, input_shape) 512 | 513 | # 3.Add node to node list 514 | self.onnxNodeList.append(softmax_node) 515 | 516 | 517 | # Relu 518 | elif Layers[i].type == "ReLU" or Layers[i].type == Layer_RELU: 519 | # 1.Get node input name, input dimension, output name, node name 520 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 521 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 522 | node_name = Layers[i].name 523 | # letters = '1234567890abcdefghijklmnopqrst' 524 | # length = random.randrange(5, 16) 525 | # randstr = ''.join(random.choice(letters) for _ in range(length)) 526 | # node_name = node_name 527 | # for i in range(len(output_name)): 528 | # output_name[i] = output_name[i] + random.choice('1234567890abcdef') 529 | #print(output_name) 530 | 531 | 532 | # 2.Build relu_node 533 | relu_node = op.createRelu(Layers[i], node_name, input_name, output_name, input_shape) 534 | 535 | # 3.Add node to node list 536 | self.onnxNodeList.append(relu_node) 537 | # PRelu 538 | elif Layers[i].type == "PReLU": 539 | # 1.Get node input name, input dimension, output name, node name 540 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 541 | output_name = self.GetCurrentLayerOutName(Layers[i]) 542 | node_name = Layers[i].name 543 | 544 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 545 | pname = self.AddInputsTVIFromParams(Layers[i], op_pname["PRelu"], op_ptype["PRelu"]) 546 | input_name.extend(pname) 547 | 548 | # 3.Build PRelu_node 549 | PRelu_node = op.createPRelu(Layers[i], node_name, input_name, output_name, input_shape) 550 | 551 | # 4.Add node to node list 552 | self.onnxNodeList.append(PRelu_node) 553 | # relu6 554 | elif Layers[i].type == 'ReLU6': 555 | # relu6 = clip(0, 6) 556 | # add relu node 557 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 558 | output_name = self.GetCurrentLayerOutName(Layers[i]) 559 | node_name = Layers[i].name 560 | 561 | min_value = np.float(0) 562 | max_value = np.float(6) 563 | shape = np.shape([min_value]) 564 | min_param = self.AddInputsTVIMannul(Layers[i], ["_min"], 565 | [TensorProto.FLOAT], [shape], 566 | [[min_value]]) 567 | input_name.extend(min_param) 568 | max_param = self.AddInputsTVIMannul(Layers[i], ['_max'], 569 | [TensorProto.FLOAT], [shape], 570 | [[max_value]]) 571 | input_name.extend(max_param) 572 | relu6_node = op.create_clip_node(Layers[i], node_name, input_name, output_name, input_shape) 573 | 574 | self.onnxNodeList.append(relu6_node) 575 | 576 | elif Layers[i].type == "Sigmoid": 577 | # 1.Get node input name, input dimension, output name, node name 578 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 579 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 580 | node_name = Layers[i].name 581 | 582 | # 2.Build relu_node 583 | sigmoid_node = op.createSigmoid(Layers[i], node_name, input_name, output_name, input_shape) 584 | 585 | # 3.Add node to node list 586 | self.onnxNodeList.append(sigmoid_node) 587 | elif Layers[i].type == 'Log': 588 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) # Get a list of input names and input shapes 589 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 590 | node_name = Layers[i].name 591 | 592 | log_node = op.create_log_node(Layers[i], node_name, input_name, output_name, input_shape) 593 | 594 | self.onnxNodeList.append(log_node) 595 | # LRN 596 | elif Layers[i].type == "LRN" or Layers[i].type == Layer_LRN: 597 | # 1.Get node input name, input dimension, output name, node name 598 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 599 | output_name = self.GetCurrentLayerOutName(Layers[i]) 600 | node_name = Layers[i].name 601 | 602 | # 2.Build LRN_node 603 | LRN_node = op.createLRN(Layers[i], node_name, input_name, output_name, input_shape) 604 | 605 | # 3.Add node to node list 606 | self.onnxNodeList.append(LRN_node) 607 | 608 | 609 | # Dropout 610 | elif Layers[i].type == "Dropout" or Layers[i].type == Layer_DROPOUT: 611 | # 1.Get node input name, input dimension, output name, node name 612 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 613 | output_name = self.GetCurrentLayerOutName(Layers[i]) 614 | node_name = Layers[i].name 615 | 616 | # 2.Build Dropout_node 617 | Dropout_node = op.createDropout(Layers[i], node_name, input_name, output_name, input_shape) 618 | 619 | # 3.Add node to node list 620 | self.onnxNodeList.append(Dropout_node) 621 | 622 | 623 | # Upsample 624 | elif Layers[i].type == "Upsample" or Layers[i].type == Layer_UPSAMPLE: 625 | # 1.Get node input name, input dimension, output name, node name 626 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 627 | output_name = self.GetCurrentLayerOutName(Layers[i]) 628 | node_name = Layers[i].name 629 | 630 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 631 | # add roi input 632 | 633 | # add scales input 634 | paramshape = [[8, 1], 635 | [4, 1]] 636 | paramdata = [[1, 1, 1, 1, 2, 2, 2, 2], 637 | [1.0, 1.0, Layers[i].upsample_param.scale, Layers[i].upsample_param.scale]] 638 | 639 | pname = self.AddInputsTVIMannul(Layers[i], op_pname["Upsample"], op_ptype["Upsample"], paramshape, 640 | paramdata) 641 | 642 | input_name.extend(pname) 643 | 644 | # 3.Build Upsample_node 645 | Upsample_node = op.create_resize_node(Layers[i], node_name, input_name, output_name, input_shape) 646 | 647 | # 4.Add node to node list 648 | self.onnxNodeList.append(Upsample_node) 649 | 650 | elif Layers[i].type == 'Interp': 651 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 652 | output_name = self.GetCurrentLayerOutName(Layers[i]) 653 | node_name = Layers[i].name 654 | 655 | interp_node = op.create_interp_node(Layers[i], node_name, input_name, output_name, input_shape) 656 | 657 | self.onnxNodeList.append(interp_node) 658 | 659 | # Concat 660 | elif Layers[i].type == "Concat" or Layers[i].type == Layer_CONCAT: 661 | # 1.Get node input name, input dimension, output name, node name 662 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 663 | output_name = self.GetCurrentLayerOutName(Layers[i]) 664 | node_name = Layers[i].name 665 | 666 | # 2.Build Concat_node 667 | Concat_node = op.createConcat(Layers[i], node_name, input_name, output_name, input_shape) 668 | 669 | # 3.Add node to node list 670 | self.onnxNodeList.append(Concat_node) 671 | 672 | elif Layers[i].type == 'Slice': 673 | # 1. Get node book input name, input dimension, output name, node name 674 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 675 | output_name_list = self.GetCurrentLayerOutName(Layers[i]) 676 | node_name = Layers[i].name 677 | 678 | starts, ends, axes = op.analyzeLayer(Layers[i], input_shape) 679 | 680 | SliceLayer = copy.deepcopy(Layers[i]) 681 | 682 | for i in range(len(output_name_list)): 683 | # The reason for putting it here is 684 | slice_name = copy.deepcopy(input_name) 685 | # The shape of starts ends axes is the same 686 | shape = [np.shape([1])] 687 | 688 | starts_param = self.AddInputsTVIMannul(SliceLayer, ['_starts' + str(i)], 689 | [TensorProto.INT64], shape, 690 | [[starts[i]]]) 691 | ends_param = self.AddInputsTVIMannul(SliceLayer, ['_ends' + str(i)], 692 | [TensorProto.INT64], shape, 693 | [[ends[i]]]) 694 | axes_param = self.AddInputsTVIMannul(SliceLayer, ['_axes' + str(i)], 695 | [TensorProto.INT64], shape, 696 | [[axes[i]]]) 697 | slice_name.extend(starts_param) 698 | slice_name.extend(ends_param) 699 | slice_name.extend(axes_param) 700 | 701 | Slice_node = op.createSlice(SliceLayer, output_name_list[i], slice_name, [output_name_list[i]], 702 | input_shape, starts[i], ends[i]) 703 | # 3. Add node to node list 704 | self.onnxNodeList.append(Slice_node) 705 | # Reshape 706 | elif Layers[i].type == "Reshape": 707 | # 1.Get node input name, input dimension, output name, node name 708 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 709 | output_name = self.GetCurrentLayerOutName(Layers[i]) 710 | node_name = Layers[i].name 711 | 712 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 713 | reshape_param = op.get_reshape_param(Layers[i], input_shape) 714 | reshape_param_shape = [np.shape(reshape_param)] 715 | pname = self.AddInputsTVIMannul(Layers[i], op_pname["Reshape"], op_ptype["Reshape"], reshape_param_shape, 716 | [reshape_param]) 717 | input_name.extend(pname) 718 | 719 | # 3.Build reshape节点 720 | reshape_node = op.createReshape(Layers[i], node_name, input_name, output_name, input_shape) 721 | 722 | # 4.添加点到节点列表 723 | self.onnxNodeList.append(reshape_node) 724 | 725 | # InnerProduct 726 | # Since there is no fully connected layer in onnx, it needs to be split. There are two ways to split (Reshape+Gemm, Reshape+MatMul+Add) 727 | elif Layers[i].type == "InnerProduct" or Layers[i].type == Layer_INNER_PRODUCT: 728 | node_layer = copy.deepcopy(Layers[i]) # Deep copy 729 | node_input_name, node_input_shape = self.GetLastLayerOutNameAndShape(node_layer) # Get a list of input names and input shapes 730 | 731 | reshape_outname = "" 732 | reshape_output_shape = op.getReshapeOutShape(Layers[i], node_input_shape) 733 | need_reshape = 0 if reshape_output_shape[0] == node_input_shape[0] else 1 734 | 735 | if need_reshape: 736 | #### reshape 737 | # 1.Get node input name, input dimension, output name, node name 738 | reshape_outname = [node_layer.name + "_Reshape"] 739 | reshape_nodename = node_layer.name + "_Reshape" 740 | 741 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 742 | paramshape = [[2]] 743 | reshape_pname = self.AddInputsTVIMannul(node_layer, op_pname["Reshape"], op_ptype["Reshape"], 744 | paramshape, reshape_output_shape) 745 | node_input_name.extend(reshape_pname) 746 | # 3.Build reshape_node 747 | reshape_node = op.createReshape(node_layer, reshape_nodename, node_input_name, reshape_outname, 748 | node_input_shape) 749 | 750 | # 4.Add node to node list 751 | self.onnxNodeList.append(reshape_node) 752 | 753 | # import ipdb; ipdb.set_trace() 754 | 755 | #### Second, Gemm's last node output keeps the original name 756 | gemm_layer = copy.deepcopy(Layers[i]) # Deep copy 757 | # 1.Get node input name, input dimension, output name, node name 758 | gemm_inname = reshape_outname if need_reshape == 1 else node_input_name 759 | gemm_input_shape = reshape_output_shape if need_reshape == 1 else node_input_shape 760 | gemm_outname = [gemm_layer.name] 761 | gemm_nodename = gemm_layer.name 762 | 763 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 764 | gemm_pname = self.AddInputsTVIFromParams(gemm_layer, op_pname["InnerProduct"], op_ptype[ 765 | "InnerProduct"]) # Get input parameters. For add, the bias stored in blobs[1] is not needed, so get blobs[0] directly 766 | gemm_inname.extend(gemm_pname) 767 | 768 | # 3.Build gemm_node 769 | matmul_node = op.createGemm(gemm_layer, gemm_nodename, gemm_inname, gemm_outname, gemm_input_shape, 770 | gemm_layer.inner_product_param.num_output) 771 | 772 | # 4.Add node to node list 773 | self.onnxNodeList.append(matmul_node) 774 | 775 | elif Layers[i].type == 'ShuffleChannel': 776 | # TODO support ShuffleChannel 777 | # reshape [N, C, H, W] tensor to [N, G, C', H, W] 778 | node_layer = copy.deepcopy(Layers[i]) # Deep copy 779 | node_input_name, node_input_shape = self.GetLastLayerOutNameAndShape(node_layer) # Get a list of input names and input shapes 780 | 781 | reshape_outname = "" 782 | reshape_output_shape = op.getReshapeOutShape(Layers[i], node_input_shape) 783 | need_reshape = 0 if reshape_output_shape[0] == node_input_shape[0] else 1 784 | 785 | if need_reshape: 786 | # 一. reshape [N, C, H, W] tensor to [N, G, C', H, W] 787 | # 1.Get node input name, input dimension, output name, node name 788 | reshape_outname = [node_layer.name + "_Reshape"] 789 | reshape_nodename = node_layer.name + "_Reshape" 790 | 791 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 792 | param_data = op.getReshapeOutShape(node_layer, node_input_shape) 793 | param_shape = np.array([1, 2, 3, 4, 5], np.int).shape 794 | reshape_pname = self.AddInputsTVIMannul(node_layer, op_pname["Reshape"], op_ptype["Reshape"], 795 | [param_shape], param_data) 796 | 797 | node_input_name.extend(reshape_pname) 798 | # There is no need to expand the input here, because the input has not increased 799 | # node_input_name.extend(reshape_pname) 800 | # 3.Build reshape_node 801 | reshape_node = op.createReshape(node_layer, 802 | reshape_nodename, 803 | node_input_name, 804 | reshape_outname, 805 | node_input_shape) 806 | 807 | # 4.Add node to node list 808 | self.onnxNodeList.append(reshape_node) 809 | 810 | # 2. transpose [N, C', G, H, W] 811 | transpose_layer = copy.deepcopy(Layers[i]) # Deep copy 812 | # 1.Get node input name, input dimension, output name, node name 813 | transpose_input_name = reshape_outname if need_reshape == 1 else node_input_name 814 | transpose_input_shape = reshape_output_shape if need_reshape == 1 else node_input_shape 815 | transpose_output_name = [node_layer.name + "_Transpose"] 816 | transpose_node_name = node_layer.name + "_Transpose" 817 | 818 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 819 | # Get input parameters. For add, the bias stored in blobs[1] is not needed, so get blobs[0] directly 820 | 821 | # TODO Why does this place choose to use AddInputsTVIMannul? Depends on what? 822 | # ANSWER: Depends on the type of onnx to be converted 823 | # TODO param_date What is it? Why set this variable 824 | param_data = [[2]] 825 | # transpose_pname = self.AddInputsTVIMannul(transpose_layer, 826 | # op_pname["Transpose"], 827 | # op_ptype['Transpose'], 828 | # param_data, 829 | # transpose_input_shape) 830 | # transpose_input_name.extend(transpose_pname) 831 | # 3. 832 | transpose_node = op.createTranspose(transpose_layer, 833 | transpose_node_name, 834 | transpose_input_name, 835 | transpose_output_name, 836 | transpose_input_shape) 837 | # 4.Add node to node list 838 | self.onnxNodeList.append(transpose_node) 839 | 840 | # 三、 Reshape [N, C', G, H, W] tensor to [N, C, H, W] 841 | # 842 | end_layer = copy.deepcopy(Layers[i]) 843 | end_layer.type = "DeReshape" 844 | # The last output node should keep the original name, this is to generate the node and keep the link open 845 | end_output_name = [end_layer.name] 846 | end_node_name = end_layer.name 847 | 848 | # The output of the previous layer is the input of this layer 849 | end_input_name = transpose_node.outputs_name 850 | end_input_shape = transpose_node.outputs_shape 851 | # Finally, keep the shape of the output and input consistent 852 | end_output_shape = [[node_input_shape[0][0], -1, node_input_shape[0][2], node_input_shape[0][3]]] 853 | param_shape = [np.array([1, 2, 3, 4], dtype=np.int).shape] 854 | end_pname = self.AddInputsTVIMannul(node_layer, op_pname["DouReshape"], op_ptype["DouReshape"], 855 | param_shape, end_output_shape) 856 | 857 | end_input_name.extend(end_pname) 858 | # Build 859 | end_node = op.createReshape(end_layer, 860 | end_node_name, 861 | end_input_name, 862 | end_output_name, 863 | end_input_shape) 864 | 865 | self.onnxNodeList.append(end_node) 866 | 867 | # Deconvolution 868 | elif Layers[i].type == "Deconvolution": 869 | # 1.Get node input name, input dimension, output name, node name 870 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 871 | output_name = self.GetCurrentLayerOutName(Layers[i]) 872 | node_name = Layers[i].name 873 | 874 | # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 875 | 876 | conv_pname = self.AddInputsTVIFromParams(Layers[i], op_pname["ConvTranspose"], 877 | op_ptype["ConvTranspose"]) 878 | input_name.extend(conv_pname) 879 | 880 | # 3.Build conv_node 881 | conv_node = op.createConvTranspose(Layers[i], node_name, input_name, output_name, input_shape) 882 | # if True: 883 | # self.__print_debug_info(node_name, input_name, output_name, input_shape, conv_node.outputs_shape) 884 | 885 | # 4.Add node to node list 886 | self.onnxNodeList.append(conv_node) 887 | 888 | # Flatten 889 | elif Layers[i].type == "Flatten": 890 | # 1.Get node input name, input dimension, output name, node name 891 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 892 | output_name = self.GetCurrentLayerOutName(Layers[i]) 893 | node_name = Layers[i].name 894 | 895 | # Since there are problems with the optimization of Flatten later, we will first Flatten -> reshape 896 | # flatten_node = op.create_flatten_node(layers[i], node_name, input_name, 897 | # output_name, input_shape) 898 | # self.onnxnodelist.append(flatten_nodelatten_node) 899 | # continue 900 | 901 | # Flatten -> Reshape 902 | # import ipdb; ipdb.set_trace() 903 | # # 2.Generate node parameter tensor value info, get the node parameter name, and add the parameter name to the node input name list 904 | paramshape = [[2]] 905 | paramdata = op.getReshapeOutShape(Layers[i], input_shape) 906 | reshape_pname = self.AddInputsTVIMannul(Layers[i], op_pname["Reshape"], op_ptype["Reshape"], paramshape, 907 | paramdata) 908 | input_name.extend(reshape_pname) 909 | 910 | # 3.Build reshape_node 911 | reshape_node = op.createReshape(Layers[i], node_name, input_name, output_name, input_shape) 912 | # 4.Add node to node list 913 | self.onnxNodeList.append(reshape_node) 914 | 915 | elif Layers[i].type == "Permute": 916 | # Permute -> Transpose 917 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 918 | output_name = self.GetCurrentLayerOutName(Layers[i]) 919 | node_name = Layers[i].name 920 | 921 | transpose_node = op.createTranspose(Layers[i], node_name, input_name, output_name, input_shape) 922 | self.onnxNodeList.append(transpose_node) 923 | elif Layers[i].type == "PriorBox": 924 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 925 | output_name = self.GetCurrentLayerOutName(Layers[i]) 926 | node_name = Layers[i].name 927 | 928 | priorbox_node = op.create_priorbox_node(Layers[i], node_name, input_name, output_name, input_shape) 929 | 930 | self.onnxNodeList.append(priorbox_node) 931 | 932 | elif Layers[i].type == "DetectionOutput": 933 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 934 | output_name = self.GetCurrentLayerOutName(Layers[i]) 935 | node_name = Layers[i].name 936 | 937 | detection_output_node = op.create_detection_output(Layers[i], node_name, input_name, output_name, input_shape) 938 | self.onnxNodeList.append(detection_output_node) 939 | elif Layers[i].type == "Axpy": 940 | # axpy = mul + add 941 | # top = bottom[0] * bottom[1] + bottom[2] 942 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 943 | output_name = self.GetCurrentLayerOutName(Layers[i]) 944 | node_name = Layers[i].name 945 | # create mul node 946 | mul_node = op.create_axpy_mul_node(Layers[i], node_name, input_name, output_name, input_shape) 947 | self.onnxNodeList.append(mul_node) 948 | 949 | # create add node 950 | add_node = op.create_axpy_add_node(Layers[i], node_name, input_name, output_name, input_shape) 951 | self.onnxNodeList.append(add_node) 952 | elif Layers[i].type == "Normalize": 953 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 954 | output_name = self.GetCurrentLayerOutName(Layers[i]) 955 | node_name = Layers[i].name 956 | lp_normalization_output_name = [output_name[0] + "_lp"] 957 | lp_normalization_node = op.create_Lp_Normalization(Layers[i], node_name, input_name, 958 | lp_normalization_output_name, input_shape) 959 | self.onnxNodeList.append(lp_normalization_node) 960 | # get Normalize 961 | scale_shape, scale_data = self.GetParamsShapeAndData(Layers[i]) 962 | scale_shape = [1, scale_shape[0][0], 1, 1] 963 | scale_input = self.AddInputsTVIFromParams(Layers[i], ["_scale"], [TensorProto.FLOAT]) 964 | mul_input_name = [lp_normalization_output_name[0], node_name + "_scale"] 965 | mul_input_shape = [input_shape[0], scale_shape] 966 | mul_node = op.create_mul_node(Layers[i], node_name + "_mul", mul_input_name, output_name, 967 | mul_input_shape) 968 | self.onnxNodeList.append(mul_node) 969 | elif Layers[i].type == "Power": 970 | # Power: Mul + Add + Pow 971 | # create mul node 972 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 973 | output_name = self.GetCurrentLayerOutName(Layers[i]) 974 | node_name = Layers[i].name 975 | power, scale, shift = op.get_power_param(Layers[i]) 976 | scale_node_name = self.AddInputsTVIMannul(Layers[i], ["_scale"], [TensorProto.FLOAT], [np.shape(scale)], [scale]) 977 | mul_input_name = [input_name[0], scale_node_name[0]] 978 | mul_node = op.create_mul_node(Layers[i], node_name + "_mul", mul_input_name, [output_name[0] + "_mul"], 979 | [input_shape[0], np.shape(power)]) 980 | self.onnxNodeList.append(mul_node) 981 | # create Add node 982 | shift_param_name = self.AddInputsTVIMannul(Layers[i], ["_shift"], [TensorProto.FLOAT], [np.shape(scale)], 983 | [shift]) 984 | add_input_name = [output_name[0] + "_mul", shift_param_name[0]] 985 | add_node = op.create_add_node(Layers[i], node_name + "_add", add_input_name, [output_name[0] + "_add"], [input_shape[0], np.shape(shift)]) 986 | self.onnxNodeList.append(add_node) 987 | 988 | # create Pow 989 | power_param_name = self.AddInputsTVIMannul(Layers[i], ["_param_power"], [TensorProto.FLOAT], [np.shape(power)],[power]) 990 | power_input_name = [output_name[0] + "_add", power_param_name[0]] 991 | power_node = op.create_power_node(Layers[i], node_name + "_power", power_input_name, output_name, 992 | [input_shape[0], np.shape(power)]) 993 | self.onnxNodeList.append(power_node) 994 | 995 | elif Layers[i].type == "TanH": 996 | # 1.Get node input name, input dimension, output name, node name 997 | input_name, input_shape = self.GetLastLayerOutNameAndShape( 998 | Layers[i]) # Get a list of input names and input shapes 999 | output_name = self.GetCurrentLayerOutName(Layers[i]) # Get a list of output names 1000 | node_name = Layers[i].name 1001 | 1002 | # 2.Build tanh_node 1003 | tanh_node = op.createTanh( 1004 | Layers[i], node_name, input_name, output_name, input_shape) 1005 | 1006 | # 3.Add node to node list 1007 | self.onnxNodeList.append(tanh_node) 1008 | 1009 | elif Layers[i].type == "Crop": 1010 | # Crop: Slice 1011 | # create Slice node 1012 | input_name, input_shape = self.GetLastLayerOutNameAndShape(Layers[i]) 1013 | output_name = self.GetCurrentLayerOutName(Layers[i]) 1014 | node_name = Layers[i].name 1015 | 1016 | starts, ends, axes = op.get_crop_param(Layers[i],input_shape) 1017 | 1018 | Crop_name=[] 1019 | Crop_name.append(input_name[0]) 1020 | 1021 | starts_param = self.AddInputsTVIMannul(Layers[i], 1022 | ['_starts' + str(i)], 1023 | [TensorProto.INT64], 1024 | [np.shape(starts)], 1025 | [starts]) 1026 | ends_param = self.AddInputsTVIMannul(Layers[i], 1027 | ['_ends' + str(i)], 1028 | [TensorProto.INT64], 1029 | [np.shape(ends)], [ends]) 1030 | axes_param = self.AddInputsTVIMannul(Layers[i], 1031 | ['_axes' + str(i)], 1032 | [TensorProto.INT64], 1033 | [np.shape(axes)], [axes]) 1034 | 1035 | Crop_name.extend(starts_param) 1036 | Crop_name.extend(ends_param) 1037 | Crop_name.extend(axes_param) 1038 | crop_node = op.create_crop_node(Layers[i], node_name, Crop_name, output_name, 1039 | input_shape) 1040 | self.onnxNodeList.append(crop_node) 1041 | 1042 | # MVN 1043 | elif Layers[i].type == "MVN": 1044 | # MVN: InstanceNormalization 1045 | # create InstanceNormalization 1046 | if Layers[i].mvn_param.normalize_variance == False or Layers[i].mvn_param.across_channels == True: 1047 | print("Failed type not support: " + Layers[i].type) 1048 | exit(-1) 1049 | 1050 | 1051 | input_name, input_shape = self.GetLastLayerOutNameAndShape( 1052 | Layers[i]) 1053 | output_name = self.GetCurrentLayerOutName(Layers[i]) 1054 | node_name = Layers[i].name 1055 | 1056 | MVN_name = [] 1057 | MVN_name.append(input_name[0]) 1058 | scale, bias = op.get_InstanceNorm_param(Layers[i],input_shape) 1059 | 1060 | scale_param = self.AddInputsTVIMannul(Layers[i], 1061 | ['_scale' + str(i)], 1062 | [TensorProto.FLOAT], 1063 | [np.shape(scale)], 1064 | [scale]) 1065 | bias_param = self.AddInputsTVIMannul(Layers[i], 1066 | ['_bias' + str(i)], 1067 | [TensorProto.FLOAT], 1068 | [np.shape(bias)], [bias]) 1069 | 1070 | MVN_name.extend(scale_param) 1071 | MVN_name.extend(bias_param) 1072 | MVN_node = op.create_InstanceNorm_op(Layers[i], node_name, 1073 | MVN_name, output_name, 1074 | input_shape) 1075 | self.onnxNodeList.append(MVN_node) 1076 | else: 1077 | print("Failed type not support: " + Layers[i].type) 1078 | exit(-1) 1079 | 1080 | # Determine whether the current node is an output node 1081 | def JudgeOutput(self, current_node, nodelist): 1082 | for output_name in current_node.outputs_name: 1083 | for node in nodelist: 1084 | if output_name in node.inputs_name: 1085 | return False 1086 | return True 1087 | 1088 | # Add model output information and intermediate node information 1089 | def AddOutputsTVIAndValueInfo(self): 1090 | for i in range(len(self.onnxNodeList)): 1091 | if self.JudgeOutput(self.onnxNodeList[i], self.onnxNodeList): # Build 输出节点信息 1092 | lastnode = self.onnxNodeList[i] 1093 | for j in range(len(lastnode.outputs_shape)): 1094 | output_tvi = helper.make_tensor_value_info(lastnode.outputs_name[j], TensorProto.FLOAT, 1095 | lastnode.outputs_shape[j]) 1096 | self.onnxmodel.addOutputsTVI(output_tvi) 1097 | else: # Build 1098 | innernode = self.onnxNodeList[i] 1099 | for k in range(len(innernode.outputs_shape)): 1100 | hid_out_tvi = helper.make_tensor_value_info(innernode.outputs_name[k], TensorProto.FLOAT, 1101 | innernode.outputs_shape[k]) 1102 | self.onnxmodel.addValueInfoTVI(hid_out_tvi) 1103 | #print("add model output information and model intermediate output information") 1104 | 1105 | # Create a model 1106 | def createOnnxModel(self): 1107 | node_def = [Node.node for Node in self.onnxNodeList] 1108 | graph_def = helper.make_graph( 1109 | node_def, 1110 | self.onnxmodel.name, 1111 | self.onnxmodel.in_tvi, 1112 | self.onnxmodel.out_tvi, 1113 | self.onnxmodel.init_t, 1114 | value_info=self.onnxmodel.hidden_out_tvi 1115 | ) 1116 | model_def = helper.make_model(graph_def, producer_name='caffe') 1117 | print("converting caffe model to onnx model completed successfully") 1118 | return model_def 1119 | -------------------------------------------------------------------------------- /caffe2onnx/src/load_save_model.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | import onnx 3 | from caffe2onnx.proto import caffe_upsample_pb2 4 | 5 | def loadcaffemodel(net_path, model_path): 6 | # read prototxt 7 | net = caffe_upsample_pb2.NetParameter() 8 | text_format.Merge(open(net_path).read(), net) 9 | # read caffemodel 10 | model = caffe_upsample_pb2.NetParameter() 11 | f = open(model_path, 'rb') 12 | model.ParseFromString(f.read()) 13 | f.close() 14 | print("caffe model has been successfully loaded") 15 | return net, model 16 | 17 | def loadonnxmodel(onnx_path): 18 | onnxmodel = onnx.load(onnx_path) 19 | return onnxmodel 20 | 21 | def saveonnxmodel(onnx_model, onnx_save_path): 22 | try: 23 | #onnx.checker.check_model(onnx_model) 24 | onnx.save_model(onnx_model, onnx_save_path) 25 | print("onnx model has been successfully saved to " + onnx_save_path) 26 | except Exception as e: 27 | print("onnx model was not saved successfully:\n", e) 28 | -------------------------------------------------------------------------------- /caffe2onnx/src/op_layer_info.py: -------------------------------------------------------------------------------- 1 | from onnx import TensorProto 2 | from caffe2onnx.proto import caffe_upsample_pb2 3 | 4 | Layer_CONCAT = caffe_upsample_pb2.V1LayerParameter.CONCAT # 3 5 | Layer_CONVOLUTION = caffe_upsample_pb2.V1LayerParameter.CONVOLUTION # 4 6 | Layer_DROPOUT = caffe_upsample_pb2.V1LayerParameter.DROPOUT # 6 7 | Layer_INNER_PRODUCT = caffe_upsample_pb2.V1LayerParameter.INNER_PRODUCT # 14 8 | Layer_LRN = caffe_upsample_pb2.V1LayerParameter.LRN # 15 9 | Layer_POOLING = caffe_upsample_pb2.V1LayerParameter.POOLING # 17 10 | Layer_RELU = caffe_upsample_pb2.V1LayerParameter.RELU # 18 11 | Layer_SOFTMAX = caffe_upsample_pb2.V1LayerParameter.SOFTMAX # 20 12 | Layer_ELTWISE = caffe_upsample_pb2.V1LayerParameter.ELTWISE # 25 13 | Layer_UPSAMPLE = caffe_upsample_pb2.V1LayerParameter.UPSAMPLE # 40 14 | 15 | op_pname = {"Conv": ["_W", "_b"], 16 | "BatchNorm": ["_mean", "_var"], 17 | "Scale": ["_scale", "_b"], 18 | "Reshape": ["_shape"], 19 | "DouReshape": ["_Doureshape"], 20 | "InnerProduct": ["_W", "_B"], 21 | "Upsample": ["_roi_" ,"_Scale"], 22 | "PRelu": ["_slope"], 23 | "Transpose": ["_trans"], 24 | "ConvTranspose": ["_W", "_b"], 25 | "Slice": ['_starts', '_ends', '_axes', '_steps'] 26 | } 27 | 28 | op_ptype = {"Conv": [TensorProto.FLOAT, TensorProto.FLOAT], 29 | "BatchNorm": [TensorProto.FLOAT, TensorProto.FLOAT], 30 | "Scale": [TensorProto.FLOAT, TensorProto.FLOAT], 31 | "Reshape": [TensorProto.INT64], 32 | "InnerProduct": [TensorProto.FLOAT, TensorProto.FLOAT], 33 | "Upsample": [TensorProto.FLOAT, TensorProto.FLOAT], 34 | "PRelu": [TensorProto.FLOAT], 35 | "Transpose": [TensorProto.INT64], 36 | "ConvTranspose": [TensorProto.FLOAT, TensorProto.FLOAT], 37 | "DouReshape": [TensorProto.INT64], 38 | "Slice": [TensorProto.INT64, TensorProto.INT64, TensorProto.INT64, TensorProto.INT64] 39 | } 40 | -------------------------------------------------------------------------------- /caffe2onnx/src/utils.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | 3 | def freeze(onnxmodel): 4 | print("removing not constant initializers from model") 5 | inputs = onnxmodel.graph.input 6 | name_to_input = {} 7 | for input in inputs: 8 | name_to_input[input.name] = input 9 | 10 | for initializer in onnxmodel.graph.initializer: 11 | if initializer.name in name_to_input: 12 | inputs.remove(name_to_input[initializer.name]) 13 | 14 | print("frozen graph has been created") 15 | return 16 | -------------------------------------------------------------------------------- /docs/caffe-operators.md: -------------------------------------------------------------------------------- 1 | # Supported caffe operators 2 | 3 | | operator | caffe2onnx v1.* (onnx 1.4.0) | caffe2onnx v2.* (onnx 1.6.0) | 4 | |-----------------------|:----------------------------------:|:------------------------------------------------:| 5 | | Add | - | ✓ | 6 | | Axpy | - | ✓ | 7 | | BatchNorm | ✓ | ✓ | 8 | | Clip | - | ✓ | 9 | | Concat | ✓ | ✓ | 10 | | Conv | ✓ | ✓ | 11 | | ConvTranspose | ✓ | ✓ | 12 | | Crop | - | ✓ | 13 | | DetectionOutput | - | ✓ | 14 | | Dropout | ✓ | ✓ | 15 | | Eltwise | ✓ | ✓ | 16 | | Flatten | - | ✓ | 17 | | Gemm | ✓ | ✓ | 18 | | InstanceNorm | - | ✓ | 19 | | Interp | - | ✓ | 20 | | Log | - | ✓ | 21 | | LpNormalization | - | ✓ | 22 | | LRN | ✓ | ✓ | 23 | | Min | - | ✓ | 24 | | Mul | - | ✓ | 25 | | Pooling | ✓ | ✓ | 26 | | Power | - | ✓ | 27 | | PRelu | ✓ | ✓ | 28 | | PriorBox | - | ✓ | 29 | | ReLU | ✓ | ✓ | 30 | | Reshape | ✓ | ✓ | 31 | | Resize | - | ✓ | 32 | | Shuffle | - | ✓ | 33 | | Sigmoid | - | ✓ | 34 | | Slice | - | ✓ | 35 | | Softmax | ✓ | ✓ | 36 | | Tanh | - | ✓ | 37 | | Transpose | - | ✓ | 38 | | UnPooling | ✓ | ✓ | 39 | | Upsample | ✓ | ✓ | 40 | 41 | If you want add an op which is not supported by this repo, you can look up to docs ofr [onnx v1.4.0](https://github.com/onnx/onnx/blob/rel-1.4.0/docs/Operators.md) or [onnx v1.6.0](https://github.com/onnx/onnx/blob/rel-1.6.0/docs/Operators.md) and add your own op by following the other existing ops in src/OPs. 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import os 3 | 4 | # package metadata 5 | NAME = 'caffe2onnx' 6 | VERSION = '2.0.1' 7 | DESCRIPTION = 'Convert Caffe models to ONNX.' 8 | LICENSE = 'BSD-3' 9 | GIT = 'https://github.com/asiryan/caffe-onnx' 10 | PYTHON = '>=3.5' 11 | 12 | # directory 13 | this = os.path.dirname(__file__) 14 | 15 | # readme 16 | with open(os.path.join(this, 'README.md'), encoding='utf-8') as f: 17 | LONG_DESCRIPTION = f.read() 18 | 19 | # setup tools 20 | setuptools.setup( 21 | name=NAME, 22 | version=VERSION, 23 | description=DESCRIPTION, 24 | long_description = LONG_DESCRIPTION, 25 | long_description_content_type='text/markdown', 26 | license=LICENSE, 27 | packages=setuptools.find_packages(), 28 | python_requires=PYTHON, 29 | author='Valery Asiryan', 30 | author_email='dmc5mod@yandex.ru', 31 | url=GIT, 32 | install_requires=[ 33 | 'protobuf', 34 | 'onnx==1.6.0' 35 | ], 36 | classifiers=[ 37 | 'Topic :: Software Development :: Libraries', 38 | 'Intended Audience :: Science/Research', 39 | 'Intended Audience :: Developers', 40 | 'License :: OSI Approved :: BSD License', 41 | 'Operating System :: OS Independent', 42 | 'Programming Language :: Python :: 3.5', 43 | 'Programming Language :: Python :: 3.6', 44 | 'Programming Language :: Python :: 3.7', 45 | 'Programming Language :: Python :: 3.8', 46 | 'Programming Language :: Python :: 3.9', 47 | 'Programming Language :: Python :: 3.10', 48 | 'Topic :: Scientific/Engineering :: Artificial Intelligence' 49 | ], 50 | ) --------------------------------------------------------------------------------