├── .gitignore ├── .travis.yml ├── AUTHORS ├── DeepRosetta.py ├── LICENSE ├── README.md ├── core ├── BaseExporter.py ├── BaseImporter.py ├── RosettaParser.py ├── __init__.py └── config.yaml ├── docs ├── README.md ├── img │ ├── Head-logo_V1.png │ └── RosettaStone.png └── tools │ └── update-authors.sh ├── examples └── main.py ├── modules ├── __init__.py ├── config │ ├── Matconv │ │ └── equivalences.yaml │ ├── Tensorflow │ │ └── equivalences.yaml │ ├── __init__.py │ └── caffe │ │ ├── README.md │ │ ├── __init__.py │ │ ├── caffe.proto │ │ ├── caffe_pb2.py │ │ └── layers.py ├── exporters │ ├── CaffeExporter.py │ ├── DummyCaffeExporter.py │ ├── LasagneExporter.py │ └── __init__.py └── importers │ ├── CaffeImporter.py │ ├── DummyCaffeImporter.py │ ├── LasagneImporter.py │ ├── MatconvFileImporter.py │ ├── TensorflowImporter.py │ └── __init__.py └── test ├── __init__.py ├── download_models.sh ├── local_tests.py └── tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.idea 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | 5 | script: python -m test/tests 6 | 7 | notifications: 8 | email: false 9 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # Authors ordered by first contribution. 2 | 3 | Edgar Riba 4 | edgarriba 5 | prlz77 6 | gcucurull 7 | gitabcworld 8 | Onur Ferhat 9 | HanaJarraya 10 | abc 11 | The Gitter Badger 12 | anguelos 13 | Pau Rodriguez 14 | Anguelos Nicolaou 15 | 16 | # Generated by tools/update-authors.sh 17 | -------------------------------------------------------------------------------- /DeepRosetta.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class RosettaStone(object): 4 | """ Class imodeling our network representation 5 | 6 | """ 7 | def __init__(self, *args, **kwargs): 8 | self.importers = self.list_importers() 9 | self.exporters = self.list_exporters() 10 | 11 | def load_list(self, module): 12 | """ Loads a list of files ended with *.py format in a given module 13 | 14 | :param module: string with the relative module name 15 | """ 16 | list_obj = (d for d in os.listdir(module) if d.endswith('.py')) 17 | return list(list_obj) 18 | 19 | def list_importers(self): 20 | """ Loads the list of files inside 'modules/importers' 21 | 22 | """ 23 | return self.load_list('modules/importers') 24 | 25 | def list_exporters(self): 26 | """ Loads the list of files inside 'modules/exporters' 27 | 28 | """ 29 | return self.load_list('modules/exporters') 30 | 31 | def check_parser_type(self, list_types, parser_type): 32 | """ Check if a substring is in a strings list. 33 | 34 | :param list_types: a strings list 35 | :param parser_type: the substring to find 36 | 37 | :return: Raises an Exception in case substring is not found 38 | """ 39 | if not any(parser_type in s for s in list_types): 40 | raise Exception('The parser type is not available: %s' % parser_type) 41 | 42 | def import_module(self, module, klass): 43 | """ Imports a class given a modules and the class name 44 | 45 | Is assumed that the package and class names are the same. 46 | 47 | :param module: string with the module name 48 | :param klass: string with the class name 49 | 50 | :return: the class instance 51 | """ 52 | mod = __import__(module + '.' + klass, fromlist=[klass]) 53 | return getattr(mod, klass)() 54 | 55 | def convert(self, input_file, output_file, input_format, output_format): 56 | """ Function that converts from one framework to a another 57 | 58 | :param input_file: path to the model file to be exported 59 | :param output_file: path to the exported model file 60 | :param input_format: type of the model to be exported 61 | :param output_format: type of the exported model 62 | 63 | :return: None 64 | """ 65 | # check if parser exists 66 | self.check_parser_type(self.importers, input_format) 67 | self.check_parser_type(self.exporters, output_format) 68 | 69 | # instantiate importer and exporter 70 | importer = self.import_module('modules.importers', input_format) 71 | exporter = self.import_module('modules.exporters', output_format) 72 | 73 | # load and save 74 | # exporter.save(importer.load(input_file), output_file) 75 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | Copyright (c) 2016, the respective contributors. 4 | All rights reserved. 5 | 6 | LICENSE 7 | 8 | The BSD 3-Clause License 9 | 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | * Redistributions of source code must retain the above copyright notice, this 15 | list of conditions and the following disclaimer. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, 18 | this list of conditions and the following disclaimer in the documentation 19 | and/or other materials provided with the distribution. 20 | 21 | * Neither the name of DeepRosetta nor the names of its 22 | contributors may be used to endorse or promote products derived from 23 | this software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Alt text](docs/img/Head-logo_V1.png?raw=true "Deep Rosetta logo") 2 | 3 | [![Join the chat at https://gitter.im/edgarriba/DeepRosetta](https://badges.gitter.im/edgarriba/DeepRosetta.svg)](https://gitter.im/edgarriba/DeepRosetta?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 4 | [![Build Status](https://travis-ci.org/edgarriba/DeepRosetta.svg?branch=master)](https://travis-ci.org/edgarriba/DeepRosetta) 5 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 6 | 7 | An universal deep learning models conversor :shipit: 8 | 9 | The general idea of this project is to convert any deep learning framework model to another. 10 | Each deep learning framework has its own structure representation defined by its layers, parameters and syntax. This structure representation makes difficult to swap from one framework to another. The project tries to solve this problem in a straightforward manner, making it simple for the non-experienced users. The project fills this gap using an inner representation which is the bridge between all deep learning frameworks. 11 | 12 | General Usage 13 | ------------- 14 | You can convert from one framework to another 15 | 16 | import DeepRosetta as dr 17 | 18 | if __name__ == '__main__': 19 | rosetta = dr.RosettaStone() 20 | rosetta.convert('my.caffemodel', 'your.caffemodel', 'DummyCaffeImporter', 'DummyCaffeExporter') 21 | 22 | print 'All went OK!' 23 | 24 | Supported formats 25 | ----------------- 26 | - Importers 27 | - [x] CaffeImporter 28 | - [x] MatConvnetImporter 29 | - [x] LasagneImporter 30 | - [ ] TorchImporter 31 | - [x] TensorflowImporter 32 | - [ ] ChainerImporter 33 | - [ ] TinyCNNImporter 34 | 35 | - Exporters 36 | - [ ] CaffeExporter 37 | - [ ] MatConvnetExporter 38 | - [x] LasagneExporter 39 | - [ ] TorcExporter 40 | - [ ] TensorflowExporter 41 | - [ ] ChainerExporter 42 | - [ ] TinyCNNExporter 43 | 44 | Contributing 45 | ------------ 46 | Developers are needed! Check our Contribution Documents. 47 | 48 | Licence 49 | ------- 50 | the BSD 3-Clause Licence 51 | -------------------------------------------------------------------------------- /core/BaseExporter.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod, abstractproperty 2 | import os 3 | 4 | class BaseExporter: 5 | """ Abstract class modeling a base importer 6 | 7 | """ 8 | __metaclass__ = ABCMeta 9 | 10 | def __init__(self): 11 | pass 12 | 13 | @abstractmethod 14 | def save(self, file_path): 15 | """ Loads a given model file. 16 | It will call the subroutine to load an specific format 17 | 18 | :param file_path: path to the model file 19 | """ 20 | raise NotImplementedError 21 | 22 | @abstractmethod 23 | def toObject(self, rosetaDict): 24 | """Generates a python object with a trained network from a 25 | dictionary in the rosetaFormat. 26 | 27 | :param rosetaDict: the roseta dictionary 28 | """ 29 | raise NotImplementedError 30 | -------------------------------------------------------------------------------- /core/BaseImporter.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod, abstractproperty 2 | 3 | class BaseImporter: 4 | """ Abstract class modeling a base importer 5 | 6 | """ 7 | __metaclass__ = ABCMeta 8 | 9 | def __init__(self): 10 | pass 11 | 12 | @abstractmethod 13 | def load(self, file_path): 14 | """ Loads a given model file. 15 | It will call the subroutine to load an specific format 16 | 17 | :param file_path: path to the model file 18 | """ 19 | return NotImplementedError 20 | 21 | @abstractmethod 22 | def loadFromObject(self, frameworkObj): 23 | """ Converts a network object of a framework to a dictionary 24 | in the roseta representation. This method should only be 25 | implemeted for frameworks that operate in python 26 | 27 | :param frameworkObj: a python object containing a trained network 28 | """ 29 | return NotImplementedError -------------------------------------------------------------------------------- /core/RosettaParser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | class BaseParser: 5 | '''parse the line command of the system''' 6 | def parse(self): 7 | parser = argparse.ArgumentParser(description='Convert deep learning model.') 8 | 9 | 10 | #parser.add_argument('-input model name', help='Store a model name') 11 | parser.add_argument('-i', metavar='in-file', type=argparse.FileType('rt'), 12 | help='name of the file of the model that you want to convert') 13 | 14 | #parser.add_argument('-output model name', help='name of the model that you want to convert') 15 | 16 | parser.add_argument('-o', metavar='out-file', type=argparse.FileType('wt'), 17 | help='file of the model that you want to convert') 18 | 19 | try: 20 | parser.parse_args() 21 | 22 | except IOError, msg: 23 | parser.error(str(msg)) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/core/__init__.py -------------------------------------------------------------------------------- /core/config.yaml: -------------------------------------------------------------------------------- 1 | # Rosetta structure 2 | layers: 3 | ConvolutionLayer: # Key is the name of the layer 4 | weights_name: 5 | biases_name: 6 | dim: 7 | - neurons 8 | - channels 9 | - height 10 | - width 11 | type: ConvolutionLayer 12 | padding: 13 | - top 14 | - bottom 15 | - left 16 | - right 17 | stride: 18 | - vertical 19 | - horizontal 20 | bottom: 21 | top: 22 | 23 | LinearLayer: 24 | weights_name: 25 | biases_name: 26 | dim: 27 | - neurons 28 | - channels 29 | type: LinearLayer 30 | bottom: 31 | top: 32 | 33 | ReLULayer: 34 | type: ReLULayer 35 | bottom: 36 | top: 37 | negative_slope: 38 | 39 | TanHLayer: 40 | type: TanHLayer 41 | bottom: 42 | top: 43 | 44 | SigmoidLayer: 45 | type: SigmoidLayer 46 | bottom: 47 | top: 48 | 49 | InputLayer: 50 | type: InputLayer 51 | bottom: 52 | top: 53 | dim: 54 | - neurons 55 | - channels 56 | - height 57 | - width 58 | 59 | DummyLayer: 60 | type: DummyLayer 61 | bottom: 62 | top: 63 | 64 | PoolingLayer: 65 | type: PoolingLayer 66 | bottom: 67 | top: 68 | padding: 69 | - top 70 | - bottom 71 | - left 72 | - right 73 | stride: 74 | - vertical 75 | - horizontal 76 | kernel_size: 77 | pool : 78 | 79 | LRNLayer: 80 | type: LRNLayer 81 | bottom: 82 | top: 83 | local_size: 84 | alpha: 85 | beta: 86 | norm_region: 87 | k: 88 | 89 | DropoutLayer: 90 | type: DropoutLayer 91 | bottom: 92 | top: 93 | dropout_ratio: 94 | 95 | SoftmaxLayer: 96 | type: SoftmaxLayer 97 | bottom: 98 | top: 99 | axis: 100 | 101 | ConcatLayer: 102 | type: ConcatLayer 103 | bottom: 104 | top: 105 | axis: 106 | concat_dim: 107 | 108 | InputLayer: 109 | type: InputLayer 110 | dim: 111 | - channels 112 | - height 113 | - width 114 | top: 115 | 116 | parameters: 117 | 118 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | General Description 2 | ------------------- 3 | 4 | - Input: 5 | The system requires two inputs: (1)the model and (2)the framework language name of the result model. (1)The model can be specified in three ways: first the file model to be converted depending on the framework, second the name of one of the predifined models, thirdly a link which will be downloaded aumotatically. (2) The name could be such as 'Torch', 'Caffe', etc. 6 | 7 | - Output: 8 | The converted model in the desired framework type. The output can be for example a '.caffemodel' or '.t7' file. 9 | 10 | - The architecture of this project: 11 | 1. YAML layer: 12 | 1. YAML inner representation: this file contains all the common names and parameters. 13 | 2. YAML map file: This layer contains a dictionary to map from each one of the components and paramteres, of a specific framework, to the inner representation of this project (which is YAML inner representation). 14 | 2. Core layer: 15 | 1. Importer: we have one encoder associated to each framework model language. 16 | 2. Roseta Stone: It can be done in two different ways. First method is storing the common representation is an hdf5 file at disk. This file stores all the needed parameters to rebuild the deep neural network of any other framework. The second method is not storing at disk and do all the operations in memory. 17 | 3. Exporter: transforms the inner data representation (hdf5) to the desired framework language model. 18 | 19 | - Folder structure: 20 | 1. core: Roseta Stone, Abstract classes of importers and exporters. 21 | 2. io: 22 | * Config: YAML files 23 | * Exporters: encoder of each model 24 | * Importers: decoder of each model 25 | 3. test: test files 26 | 27 | - General Schema: 28 | ![Alt text](img/RosettaStone.png?raw=true "Deep Rosetta architecture") 29 | -------------------------------------------------------------------------------- /docs/img/Head-logo_V1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/docs/img/Head-logo_V1.png -------------------------------------------------------------------------------- /docs/img/RosettaStone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/docs/img/RosettaStone.png -------------------------------------------------------------------------------- /docs/tools/update-authors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | git log --reverse --format='%aN <%aE>' | perl -we ' 4 | 5 | BEGIN { 6 | %seen = (), @authors = (); 7 | } 8 | 9 | while (<>) { 10 | next if $seen{$_}; 11 | next if /\@chromium.org/; 12 | next if //; 13 | $seen{$_} = push @authors, $_; 14 | } 15 | 16 | END { 17 | print "# Authors ordered by first contribution.\n"; 18 | print "\n", @authors, "\n"; 19 | print "# Generated by tools/update-authors.sh\n"; 20 | } 21 | 22 | ' > AUTHORS 23 | -------------------------------------------------------------------------------- /examples/main.py: -------------------------------------------------------------------------------- 1 | import DeepRosetta as dr 2 | 3 | if __name__ == '__main__': 4 | rosetta = dr.RosettaStone() 5 | rosetta.convert('my.caffemodel', 'your.caffemodel', 'DummyCaffeImporter', 'DummyCaffeExporter') 6 | 7 | print 'All went OK!' 8 | -------------------------------------------------------------------------------- /modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/modules/__init__.py -------------------------------------------------------------------------------- /modules/config/Matconv/equivalences.yaml: -------------------------------------------------------------------------------- 1 | # Matconvnet equivalences to Rosetta 2 | layers: 3 | ConvolutionLayer: # Key is the name of the layer 4 | weights_name: weights 5 | biases_name: weights 6 | dim: 7 | - neurons 8 | - channels 9 | - height 10 | - width 11 | type: conv 12 | padding: pad 13 | stride: stride 14 | bottom: 15 | top: 16 | 17 | LinearLayer: 18 | weights_name: weights 19 | biases_name: weights 20 | dim: 21 | - neurons 22 | - channels 23 | type: fc # workaround 24 | bottom: 25 | top: 26 | 27 | ReLULayer: 28 | type: relu 29 | bottom: 30 | top: 31 | negative_slope: 0 32 | 33 | PoolingLayer: 34 | type: pool 35 | bottom: 36 | top: 37 | padding: pad 38 | stride: stride 39 | kernel_size: pool 40 | pool : method 41 | 42 | LRNLayer: 43 | type: lrn 44 | bottom: 45 | top: 46 | local_size: 47 | alpha: 48 | beta: 49 | norm_region: ACCROSS_CHANNELS 50 | k: 51 | 52 | DropoutLayer: 53 | type: dropout 54 | bottom: 55 | top: 56 | dropout_ratio: ratio 57 | 58 | SoftmaxLayer: 59 | type: softmax 60 | bottom: 61 | top: 62 | axis: 2 63 | 64 | ConcatLayer: 65 | type: ConcatLayer 66 | bottom: 67 | top: 68 | axis: 69 | concat_dim: 70 | 71 | parameters: 72 | 73 | -------------------------------------------------------------------------------- /modules/config/Tensorflow/equivalences.yaml: -------------------------------------------------------------------------------- 1 | # Tensorflow equivalences to Rosetta 2 | layers: 3 | InputLayer: # Key is the name of the layer 4 | type: Placeholder 5 | dim: 6 | - channels 7 | - height 8 | - width 9 | top: 10 | 11 | ConvolutionLayer: # Key is the name of the layer 12 | type: Conv2D 13 | weights_name: weights 14 | biases_name: biases 15 | dim: 16 | - neurons 17 | - channels 18 | - height 19 | - width 20 | padding: pad 21 | stride: stride 22 | bottom: 23 | top: 24 | 25 | LinearLayer: 26 | type: MatMul # TODO check if always 27 | weights_name: weights 28 | biases_name: biases 29 | dim: 30 | - neurons 31 | - channels 32 | bottom: 33 | top: 34 | 35 | ReLULayer: 36 | type: Relu 37 | bottom: 38 | top: 39 | negative_slope: 0 40 | 41 | PoolingLayer: 42 | type: MaxPool 43 | # type_alt1: AvgPool # TODO: to handle several tf layer types in one target layer type? 44 | bottom: 45 | top: 46 | padding: pad 47 | stride: stride 48 | kernel_size: pool 49 | pool : method 50 | 51 | # LRNLayer: 52 | # type: lrn 53 | # bottom: 54 | # top: 55 | # local_size: 56 | # alpha: 57 | # beta: 58 | # norm_region: ACCROSS_CHANNELS 59 | # k: 60 | 61 | # DropoutLayer: 62 | # type: dropout 63 | # bottom: 64 | # top: 65 | # dropout_ratio: ratio 66 | 67 | SoftmaxLayer: 68 | type: Softmax 69 | bottom: 70 | top: 71 | axis: 2 72 | 73 | # ConcatLayer: 74 | # type: ConcatLayer 75 | # bottom: 76 | # top: 77 | # axis: 78 | # concat_dim: 79 | 80 | parameters: 81 | 82 | -------------------------------------------------------------------------------- /modules/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/modules/config/__init__.py -------------------------------------------------------------------------------- /modules/config/caffe/README.md: -------------------------------------------------------------------------------- 1 | The file caffe_pb2.py is generated by the command: 2 | `protoc -I=. --python_out=. ./caffe.proto` 3 | -------------------------------------------------------------------------------- /modules/config/caffe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/modules/config/caffe/__init__.py -------------------------------------------------------------------------------- /modules/config/caffe/caffe.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package caffe; 4 | 5 | // Specifies the shape (dimensions) of a Blob. 6 | message BlobShape { 7 | repeated int64 dim = 1 [packed = true]; 8 | } 9 | 10 | message BlobProto { 11 | optional BlobShape shape = 7; 12 | repeated float data = 5 [packed = true]; 13 | repeated float diff = 6 [packed = true]; 14 | repeated double double_data = 8 [packed = true]; 15 | repeated double double_diff = 9 [packed = true]; 16 | 17 | // 4D dimensions -- deprecated. Use "shape" instead. 18 | optional int32 num = 1 [default = 0]; 19 | optional int32 channels = 2 [default = 0]; 20 | optional int32 height = 3 [default = 0]; 21 | optional int32 width = 4 [default = 0]; 22 | } 23 | 24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances 25 | // around. 26 | message BlobProtoVector { 27 | repeated BlobProto blobs = 1; 28 | } 29 | 30 | message Datum { 31 | optional int32 channels = 1; 32 | optional int32 height = 2; 33 | optional int32 width = 3; 34 | // the actual image data, in bytes 35 | optional bytes data = 4; 36 | optional int32 label = 5; 37 | // Optionally, the datum could also hold float data. 38 | repeated float float_data = 6; 39 | // If true data contains an encoded image that need to be decoded 40 | optional bool encoded = 7 [default = false]; 41 | } 42 | 43 | message FillerParameter { 44 | // The filler type. 45 | optional string type = 1 [default = 'constant']; 46 | optional float value = 2 [default = 0]; // the value in constant filler 47 | optional float min = 3 [default = 0]; // the min value in uniform filler 48 | optional float max = 4 [default = 1]; // the max value in uniform filler 49 | optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 50 | optional float std = 6 [default = 1]; // the std value in Gaussian filler 51 | // The expected number of non-zero output weights for a given input in 52 | // Gaussian filler -- the default -1 means don't perform sparsification. 53 | optional int32 sparse = 7 [default = -1]; 54 | // Normalize the filler variance by fan_in, fan_out, or their average. 55 | // Applies to 'xavier' and 'msra' fillers. 56 | enum VarianceNorm { 57 | FAN_IN = 0; 58 | FAN_OUT = 1; 59 | AVERAGE = 2; 60 | } 61 | optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 62 | } 63 | 64 | message NetParameter { 65 | optional string name = 1; // consider giving the network a name 66 | // DEPRECATED. See InputParameter. The input blobs to the network. 67 | repeated string input = 3; 68 | // DEPRECATED. See InputParameter. The shape of the input blobs. 69 | repeated BlobShape input_shape = 8; 70 | 71 | // 4D input dimensions -- deprecated. Use "input_shape" instead. 72 | // If specified, for each input blob there should be four 73 | // values specifying the num, channels, height and width of the input blob. 74 | // Thus, there should be a total of (4 * #input) numbers. 75 | repeated int32 input_dim = 4; 76 | 77 | // Whether the network will force every layer to carry out backward operation. 78 | // If set False, then whether to carry out backward is determined 79 | // automatically according to the net structure and learning rates. 80 | optional bool force_backward = 5 [default = false]; 81 | // The current "state" of the network, including the phase, level, and stage. 82 | // Some layers may be included/excluded depending on this state and the states 83 | // specified in the layers' include and exclude fields. 84 | optional NetState state = 6; 85 | 86 | // Print debugging information about results while running Net::Forward, 87 | // Net::Backward, and Net::Update. 88 | optional bool debug_info = 7 [default = false]; 89 | 90 | // The layers that make up the net. Each of their configurations, including 91 | // connectivity and behavior, is specified as a LayerParameter. 92 | repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 93 | 94 | // DEPRECATED: use 'layer' instead. 95 | repeated V1LayerParameter layers = 2; 96 | } 97 | 98 | // NOTE 99 | // Update the next available ID when you add a new SolverParameter field. 100 | // 101 | // SolverParameter next available ID: 41 (last added: type) 102 | message SolverParameter { 103 | ////////////////////////////////////////////////////////////////////////////// 104 | // Specifying the train and test networks 105 | // 106 | // Exactly one train net must be specified using one of the following fields: 107 | // train_net_param, train_net, net_param, net 108 | // One or more test nets may be specified using any of the following fields: 109 | // test_net_param, test_net, net_param, net 110 | // If more than one test net field is specified (e.g., both net and 111 | // test_net are specified), they will be evaluated in the field order given 112 | // above: (1) test_net_param, (2) test_net, (3) net_param/net. 113 | // A test_iter must be specified for each test_net. 114 | // A test_level and/or a test_stage may also be specified for each test_net. 115 | ////////////////////////////////////////////////////////////////////////////// 116 | 117 | // Proto filename for the train net, possibly combined with one or more 118 | // test nets. 119 | optional string net = 24; 120 | // Inline train net param, possibly combined with one or more test nets. 121 | optional NetParameter net_param = 25; 122 | 123 | optional string train_net = 1; // Proto filename for the train net. 124 | repeated string test_net = 2; // Proto filenames for the test nets. 125 | optional NetParameter train_net_param = 21; // Inline train net params. 126 | repeated NetParameter test_net_param = 22; // Inline test net params. 127 | 128 | // The states for the train/test nets. Must be unspecified or 129 | // specified once per net. 130 | // 131 | // By default, all states will have solver = true; 132 | // train_state will have phase = TRAIN, 133 | // and all test_state's will have phase = TEST. 134 | // Other defaults are set according to the NetState defaults. 135 | optional NetState train_state = 26; 136 | repeated NetState test_state = 27; 137 | 138 | // The number of iterations for each test net. 139 | repeated int32 test_iter = 3; 140 | 141 | // The number of iterations between two testing phases. 142 | optional int32 test_interval = 4 [default = 0]; 143 | optional bool test_compute_loss = 19 [default = false]; 144 | // If true, run an initial test pass before the first iteration, 145 | // ensuring memory availability and printing the starting value of the loss. 146 | optional bool test_initialization = 32 [default = true]; 147 | optional float base_lr = 5; // The base learning rate 148 | // the number of iterations between displaying info. If display = 0, no info 149 | // will be displayed. 150 | optional int32 display = 6; 151 | // Display the loss averaged over the last average_loss iterations 152 | optional int32 average_loss = 33 [default = 1]; 153 | optional int32 max_iter = 7; // the maximum number of iterations 154 | // accumulate gradients over `iter_size` x `batch_size` instances 155 | optional int32 iter_size = 36 [default = 1]; 156 | 157 | // The learning rate decay policy. The currently implemented learning rate 158 | // policies are as follows: 159 | // - fixed: always return base_lr. 160 | // - step: return base_lr * gamma ^ (floor(iter / step)) 161 | // - exp: return base_lr * gamma ^ iter 162 | // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 163 | // - multistep: similar to step but it allows non uniform steps defined by 164 | // stepvalue 165 | // - poly: the effective learning rate follows a polynomial decay, to be 166 | // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 167 | // - sigmoid: the effective learning rate follows a sigmod decay 168 | // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 169 | // 170 | // where base_lr, max_iter, gamma, step, stepvalue and power are defined 171 | // in the solver parameter protocol buffer, and iter is the current iteration. 172 | optional string lr_policy = 8; 173 | optional float gamma = 9; // The parameter to compute the learning rate. 174 | optional float power = 10; // The parameter to compute the learning rate. 175 | optional float momentum = 11; // The momentum value. 176 | optional float weight_decay = 12; // The weight decay. 177 | // regularization types supported: L1 and L2 178 | // controlled by weight_decay 179 | optional string regularization_type = 29 [default = "L2"]; 180 | // the stepsize for learning rate policy "step" 181 | optional int32 stepsize = 13; 182 | // the stepsize for learning rate policy "multistep" 183 | repeated int32 stepvalue = 34; 184 | 185 | // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 186 | // whenever their actual L2 norm is larger. 187 | optional float clip_gradients = 35 [default = -1]; 188 | 189 | optional int32 snapshot = 14 [default = 0]; // The snapshot interval 190 | optional string snapshot_prefix = 15; // The prefix for the snapshot. 191 | // whether to snapshot diff in the results or not. Snapshotting diff will help 192 | // debugging but the final protocol buffer size will be much larger. 193 | optional bool snapshot_diff = 16 [default = false]; 194 | enum SnapshotFormat { 195 | HDF5 = 0; 196 | BINARYPROTO = 1; 197 | } 198 | optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 199 | // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 200 | enum SolverMode { 201 | CPU = 0; 202 | GPU = 1; 203 | } 204 | optional SolverMode solver_mode = 17 [default = GPU]; 205 | // the device_id will that be used in GPU mode. Use device_id = 0 in default. 206 | optional int32 device_id = 18 [default = 0]; 207 | // If non-negative, the seed with which the Solver will initialize the Caffe 208 | // random number generator -- useful for reproducible results. Otherwise, 209 | // (and by default) initialize using a seed derived from the system clock. 210 | optional int64 random_seed = 20 [default = -1]; 211 | 212 | // type of the solver 213 | optional string type = 40 [default = "SGD"]; 214 | 215 | // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 216 | optional float delta = 31 [default = 1e-8]; 217 | // parameters for the Adam solver 218 | optional float momentum2 = 39 [default = 0.999]; 219 | 220 | // RMSProp decay value 221 | // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 222 | optional float rms_decay = 38; 223 | 224 | // If true, print information about the state of the net that may help with 225 | // debugging learning problems. 226 | optional bool debug_info = 23 [default = false]; 227 | 228 | // If false, don't save a snapshot after training finishes. 229 | optional bool snapshot_after_train = 28 [default = true]; 230 | 231 | // DEPRECATED: old solver enum types, use string instead 232 | enum SolverType { 233 | SGD = 0; 234 | NESTEROV = 1; 235 | ADAGRAD = 2; 236 | RMSPROP = 3; 237 | ADADELTA = 4; 238 | ADAM = 5; 239 | } 240 | // DEPRECATED: use type instead of solver_type 241 | optional SolverType solver_type = 30 [default = SGD]; 242 | } 243 | 244 | // A message that stores the solver snapshots 245 | message SolverState { 246 | optional int32 iter = 1; // The current iteration 247 | optional string learned_net = 2; // The file that stores the learned net. 248 | repeated BlobProto history = 3; // The history for sgd solvers 249 | optional int32 current_step = 4 [default = 0]; // The current step for learning rate 250 | } 251 | 252 | enum Phase { 253 | TRAIN = 0; 254 | TEST = 1; 255 | } 256 | 257 | message NetState { 258 | optional Phase phase = 1 [default = TEST]; 259 | optional int32 level = 2 [default = 0]; 260 | repeated string stage = 3; 261 | } 262 | 263 | message NetStateRule { 264 | // Set phase to require the NetState have a particular phase (TRAIN or TEST) 265 | // to meet this rule. 266 | optional Phase phase = 1; 267 | 268 | // Set the minimum and/or maximum levels in which the layer should be used. 269 | // Leave undefined to meet the rule regardless of level. 270 | optional int32 min_level = 2; 271 | optional int32 max_level = 3; 272 | 273 | // Customizable sets of stages to include or exclude. 274 | // The net must have ALL of the specified stages and NONE of the specified 275 | // "not_stage"s to meet the rule. 276 | // (Use multiple NetStateRules to specify conjunctions of stages.) 277 | repeated string stage = 4; 278 | repeated string not_stage = 5; 279 | } 280 | 281 | // Specifies training parameters (multipliers on global learning constants, 282 | // and the name and other settings used for weight sharing). 283 | message ParamSpec { 284 | // The names of the parameter blobs -- useful for sharing parameters among 285 | // layers, but never required otherwise. To share a parameter between two 286 | // layers, give it a (non-empty) name. 287 | optional string name = 1; 288 | 289 | // Whether to require shared weights to have the same shape, or just the same 290 | // count -- defaults to STRICT if unspecified. 291 | optional DimCheckMode share_mode = 2; 292 | enum DimCheckMode { 293 | // STRICT (default) requires that num, channels, height, width each match. 294 | STRICT = 0; 295 | // PERMISSIVE requires only the count (num*channels*height*width) to match. 296 | PERMISSIVE = 1; 297 | } 298 | 299 | // The multiplier on the global learning rate for this parameter. 300 | optional float lr_mult = 3 [default = 1.0]; 301 | 302 | // The multiplier on the global weight decay for this parameter. 303 | optional float decay_mult = 4 [default = 1.0]; 304 | } 305 | 306 | // NOTE 307 | // Update the next available ID when you add a new LayerParameter field. 308 | // 309 | // LayerParameter next available layer-specific ID: 147 (last added: recurrent_param) 310 | message LayerParameter { 311 | optional string name = 1; // the layer name 312 | optional string type = 2; // the layer type 313 | repeated string bottom = 3; // the name of each bottom blob 314 | repeated string top = 4; // the name of each top blob 315 | 316 | // The train / test phase for computation. 317 | optional Phase phase = 10; 318 | 319 | // The amount of weight to assign each top blob in the objective. 320 | // Each layer assigns a default value, usually of either 0 or 1, 321 | // to each top blob. 322 | repeated float loss_weight = 5; 323 | 324 | // Specifies training parameters (multipliers on global learning constants, 325 | // and the name and other settings used for weight sharing). 326 | repeated ParamSpec param = 6; 327 | 328 | // The blobs containing the numeric parameters of the layer. 329 | repeated BlobProto blobs = 7; 330 | 331 | // Specifies whether to backpropagate to each bottom. If unspecified, 332 | // Caffe will automatically infer whether each input needs backpropagation 333 | // to compute parameter gradients. If set to true for some inputs, 334 | // backpropagation to those inputs is forced; if set false for some inputs, 335 | // backpropagation to those inputs is skipped. 336 | // 337 | // The size must be either 0 or equal to the number of bottoms. 338 | repeated bool propagate_down = 11; 339 | 340 | // Rules controlling whether and when a layer is included in the network, 341 | // based on the current NetState. You may specify a non-zero number of rules 342 | // to include OR exclude, but not both. If no include or exclude rules are 343 | // specified, the layer is always included. If the current NetState meets 344 | // ANY (i.e., one or more) of the specified rules, the layer is 345 | // included/excluded. 346 | repeated NetStateRule include = 8; 347 | repeated NetStateRule exclude = 9; 348 | 349 | // Parameters for data pre-processing. 350 | optional TransformationParameter transform_param = 100; 351 | 352 | // Parameters shared by loss layers. 353 | optional LossParameter loss_param = 101; 354 | 355 | // Layer type-specific parameters. 356 | // 357 | // Note: certain layers may have more than one computational engine 358 | // for their implementation. These layers include an Engine type and 359 | // engine parameter for selecting the implementation. 360 | // The default for the engine is set by the ENGINE switch at compile-time. 361 | optional AccuracyParameter accuracy_param = 102; 362 | optional ArgMaxParameter argmax_param = 103; 363 | optional BatchNormParameter batch_norm_param = 139; 364 | optional BiasParameter bias_param = 141; 365 | optional ConcatParameter concat_param = 104; 366 | optional ContrastiveLossParameter contrastive_loss_param = 105; 367 | optional ConvolutionParameter convolution_param = 106; 368 | optional CropParameter crop_param = 144; 369 | optional DataParameter data_param = 107; 370 | optional DropoutParameter dropout_param = 108; 371 | optional DummyDataParameter dummy_data_param = 109; 372 | optional EltwiseParameter eltwise_param = 110; 373 | optional ELUParameter elu_param = 140; 374 | optional EmbedParameter embed_param = 137; 375 | optional ExpParameter exp_param = 111; 376 | optional FlattenParameter flatten_param = 135; 377 | optional HDF5DataParameter hdf5_data_param = 112; 378 | optional HDF5OutputParameter hdf5_output_param = 113; 379 | optional HingeLossParameter hinge_loss_param = 114; 380 | optional ImageDataParameter image_data_param = 115; 381 | optional InfogainLossParameter infogain_loss_param = 116; 382 | optional InnerProductParameter inner_product_param = 117; 383 | optional InputParameter input_param = 143; 384 | optional LogParameter log_param = 134; 385 | optional LRNParameter lrn_param = 118; 386 | optional MemoryDataParameter memory_data_param = 119; 387 | optional MVNParameter mvn_param = 120; 388 | optional ParameterParameter parameter_param = 145; 389 | optional PoolingParameter pooling_param = 121; 390 | optional PowerParameter power_param = 122; 391 | optional PReLUParameter prelu_param = 131; 392 | optional PythonParameter python_param = 130; 393 | optional RecurrentParameter recurrent_param = 146; 394 | optional ReductionParameter reduction_param = 136; 395 | optional ReLUParameter relu_param = 123; 396 | optional ReshapeParameter reshape_param = 133; 397 | optional ScaleParameter scale_param = 142; 398 | optional SigmoidParameter sigmoid_param = 124; 399 | optional SoftmaxParameter softmax_param = 125; 400 | optional SPPParameter spp_param = 132; 401 | optional SliceParameter slice_param = 126; 402 | optional TanHParameter tanh_param = 127; 403 | optional ThresholdParameter threshold_param = 128; 404 | optional TileParameter tile_param = 138; 405 | optional WindowDataParameter window_data_param = 129; 406 | } 407 | 408 | // Message that stores parameters used to apply transformation 409 | // to the data layer's data 410 | message TransformationParameter { 411 | // For data pre-processing, we can do simple scaling and subtracting the 412 | // data mean, if provided. Note that the mean subtraction is always carried 413 | // out before scaling. 414 | optional float scale = 1 [default = 1]; 415 | // Specify if we want to randomly mirror data. 416 | optional bool mirror = 2 [default = false]; 417 | // Specify if we would like to randomly crop an image. 418 | optional uint32 crop_size = 3 [default = 0]; 419 | // mean_file and mean_value cannot be specified at the same time 420 | optional string mean_file = 4; 421 | // if specified can be repeated once (would substract it from all the channels) 422 | // or can be repeated the same number of times as channels 423 | // (would subtract them from the corresponding channel) 424 | repeated float mean_value = 5; 425 | // Force the decoded image to have 3 color channels. 426 | optional bool force_color = 6 [default = false]; 427 | // Force the decoded image to have 1 color channels. 428 | optional bool force_gray = 7 [default = false]; 429 | } 430 | 431 | // Message that stores parameters shared by loss layers 432 | message LossParameter { 433 | // If specified, ignore instances with the given label. 434 | optional int32 ignore_label = 1; 435 | // How to normalize the loss for loss layers that aggregate across batches, 436 | // spatial dimensions, or other dimensions. Currently only implemented in 437 | // SoftmaxWithLoss layer. 438 | enum NormalizationMode { 439 | // Divide by the number of examples in the batch times spatial dimensions. 440 | // Outputs that receive the ignore label will NOT be ignored in computing 441 | // the normalization factor. 442 | FULL = 0; 443 | // Divide by the total number of output locations that do not take the 444 | // ignore_label. If ignore_label is not set, this behaves like FULL. 445 | VALID = 1; 446 | // Divide by the batch size. 447 | BATCH_SIZE = 2; 448 | // Do not normalize the loss. 449 | NONE = 3; 450 | } 451 | optional NormalizationMode normalization = 3 [default = VALID]; 452 | // Deprecated. Ignored if normalization is specified. If normalization 453 | // is not specified, then setting this to false will be equivalent to 454 | // normalization = BATCH_SIZE to be consistent with previous behavior. 455 | optional bool normalize = 2; 456 | } 457 | 458 | // Messages that store parameters used by individual layer types follow, in 459 | // alphabetical order. 460 | 461 | message AccuracyParameter { 462 | // When computing accuracy, count as correct by comparing the true label to 463 | // the top k scoring classes. By default, only compare to the top scoring 464 | // class (i.e. argmax). 465 | optional uint32 top_k = 1 [default = 1]; 466 | 467 | // The "label" axis of the prediction blob, whose argmax corresponds to the 468 | // predicted label -- may be negative to index from the end (e.g., -1 for the 469 | // last axis). For example, if axis == 1 and the predictions are 470 | // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 471 | // labels with integer values in {0, 1, ..., C-1}. 472 | optional int32 axis = 2 [default = 1]; 473 | 474 | // If specified, ignore instances with the given label. 475 | optional int32 ignore_label = 3; 476 | } 477 | 478 | message ArgMaxParameter { 479 | // If true produce pairs (argmax, maxval) 480 | optional bool out_max_val = 1 [default = false]; 481 | optional uint32 top_k = 2 [default = 1]; 482 | // The axis along which to maximise -- may be negative to index from the 483 | // end (e.g., -1 for the last axis). 484 | // By default ArgMaxLayer maximizes over the flattened trailing dimensions 485 | // for each index of the first / num dimension. 486 | optional int32 axis = 3; 487 | } 488 | 489 | message ConcatParameter { 490 | // The axis along which to concatenate -- may be negative to index from the 491 | // end (e.g., -1 for the last axis). Other axes must have the 492 | // same dimension for all the bottom blobs. 493 | // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 494 | optional int32 axis = 2 [default = 1]; 495 | 496 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 497 | optional uint32 concat_dim = 1 [default = 1]; 498 | } 499 | 500 | message BatchNormParameter { 501 | // If false, accumulate global mean/variance values via a moving average. If 502 | // true, use those accumulated values instead of computing mean/variance 503 | // across the batch. 504 | optional bool use_global_stats = 1; 505 | // How much does the moving average decay each iteration? 506 | optional float moving_average_fraction = 2 [default = .999]; 507 | // Small value to add to the variance estimate so that we don't divide by 508 | // zero. 509 | optional float eps = 3 [default = 1e-5]; 510 | } 511 | 512 | message BiasParameter { 513 | // The first axis of bottom[0] (the first input Blob) along which to apply 514 | // bottom[1] (the second input Blob). May be negative to index from the end 515 | // (e.g., -1 for the last axis). 516 | // 517 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 518 | // top[0] will have the same shape, and bottom[1] may have any of the 519 | // following shapes (for the given value of axis): 520 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 521 | // (axis == 1 == -3) 3; 3x40; 3x40x60 522 | // (axis == 2 == -2) 40; 40x60 523 | // (axis == 3 == -1) 60 524 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 525 | // "axis") -- a scalar bias. 526 | optional int32 axis = 1 [default = 1]; 527 | 528 | // (num_axes is ignored unless just one bottom is given and the bias is 529 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 530 | // number of axes by the second bottom.) 531 | // The number of axes of the input (bottom[0]) covered by the bias 532 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 533 | // Set num_axes := 0, to add a zero-axis Blob: a scalar. 534 | optional int32 num_axes = 2 [default = 1]; 535 | 536 | // (filler is ignored unless just one bottom is given and the bias is 537 | // a learned parameter of the layer.) 538 | // The initialization for the learned bias parameter. 539 | // Default is the zero (0) initialization, resulting in the BiasLayer 540 | // initially performing the identity operation. 541 | optional FillerParameter filler = 3; 542 | } 543 | 544 | message ContrastiveLossParameter { 545 | // margin for dissimilar pair 546 | optional float margin = 1 [default = 1.0]; 547 | // The first implementation of this cost did not exactly match the cost of 548 | // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 549 | // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 550 | // Hadsell paper. New models should probably use this version. 551 | // legacy_version = true uses (margin - d^2). This is kept to support / 552 | // reproduce existing models and results 553 | optional bool legacy_version = 2 [default = false]; 554 | } 555 | 556 | message ConvolutionParameter { 557 | optional uint32 num_output = 1; // The number of outputs for the layer 558 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 559 | 560 | // Pad, kernel size, and stride are all given as a single value for equal 561 | // dimensions in all spatial dimensions, or once per spatial dimension. 562 | repeated uint32 pad = 3; // The padding size; defaults to 0 563 | repeated uint32 kernel_size = 4; // The kernel size 564 | repeated uint32 stride = 6; // The stride; defaults to 1 565 | // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 566 | // holes. (Kernel dilation is sometimes referred to by its use in the 567 | // algorithme à trous from Holschneider et al. 1987.) 568 | repeated uint32 dilation = 18; // The dilation; defaults to 1 569 | 570 | // For 2D convolution only, the *_h and *_w versions may also be used to 571 | // specify both spatial dimensions. 572 | optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 573 | optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 574 | optional uint32 kernel_h = 11; // The kernel height (2D only) 575 | optional uint32 kernel_w = 12; // The kernel width (2D only) 576 | optional uint32 stride_h = 13; // The stride height (2D only) 577 | optional uint32 stride_w = 14; // The stride width (2D only) 578 | 579 | optional uint32 group = 5 [default = 1]; // The group size for group conv 580 | 581 | optional FillerParameter weight_filler = 7; // The filler for the weight 582 | optional FillerParameter bias_filler = 8; // The filler for the bias 583 | enum Engine { 584 | DEFAULT = 0; 585 | CAFFE = 1; 586 | CUDNN = 2; 587 | } 588 | optional Engine engine = 15 [default = DEFAULT]; 589 | 590 | // The axis to interpret as "channels" when performing convolution. 591 | // Preceding dimensions are treated as independent inputs; 592 | // succeeding dimensions are treated as "spatial". 593 | // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 594 | // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 595 | // groups g>1) filters across the spatial axes (H, W) of the input. 596 | // With (N, C, D, H, W) inputs, and axis == 1, we perform 597 | // N independent 3D convolutions, sliding (C/g)-channels 598 | // filters across the spatial axes (D, H, W) of the input. 599 | optional int32 axis = 16 [default = 1]; 600 | 601 | // Whether to force use of the general ND convolution, even if a specific 602 | // implementation for blobs of the appropriate number of spatial dimensions 603 | // is available. (Currently, there is only a 2D-specific convolution 604 | // implementation; for input blobs with num_axes != 2, this option is 605 | // ignored and the ND implementation will be used.) 606 | optional bool force_nd_im2col = 17 [default = false]; 607 | } 608 | 609 | message CropParameter { 610 | // To crop, elements of the first bottom are selected to fit the dimensions 611 | // of the second, reference bottom. The crop is configured by 612 | // - the crop `axis` to pick the dimensions for cropping 613 | // - the crop `offset` to set the shift for all/each dimension 614 | // to align the cropped bottom with the reference bottom. 615 | // All dimensions up to but excluding `axis` are preserved, while 616 | // the dimensions including and trailing `axis` are cropped. 617 | // If only one `offset` is set, then all dimensions are offset by this amount. 618 | // Otherwise, the number of offsets must equal the number of cropped axes to 619 | // shift the crop in each dimension accordingly. 620 | // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 621 | // and `axis` may be negative to index from the end (e.g., -1 for the last 622 | // axis). 623 | optional int32 axis = 1 [default = 2]; 624 | repeated uint32 offset = 2; 625 | } 626 | 627 | message DataParameter { 628 | enum DB { 629 | LEVELDB = 0; 630 | LMDB = 1; 631 | } 632 | // Specify the data source. 633 | optional string source = 1; 634 | // Specify the batch size. 635 | optional uint32 batch_size = 4; 636 | // The rand_skip variable is for the data layer to skip a few data points 637 | // to avoid all asynchronous sgd clients to start at the same point. The skip 638 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 639 | // be larger than the number of keys in the database. 640 | // DEPRECATED. Each solver accesses a different subset of the database. 641 | optional uint32 rand_skip = 7 [default = 0]; 642 | optional DB backend = 8 [default = LEVELDB]; 643 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 644 | // simple scaling and subtracting the data mean, if provided. Note that the 645 | // mean subtraction is always carried out before scaling. 646 | optional float scale = 2 [default = 1]; 647 | optional string mean_file = 3; 648 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 649 | // crop an image. 650 | optional uint32 crop_size = 5 [default = 0]; 651 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 652 | // data. 653 | optional bool mirror = 6 [default = false]; 654 | // Force the encoded image to have 3 color channels 655 | optional bool force_encoded_color = 9 [default = false]; 656 | // Prefetch queue (Number of batches to prefetch to host memory, increase if 657 | // data access bandwidth varies). 658 | optional uint32 prefetch = 10 [default = 4]; 659 | } 660 | 661 | message DropoutParameter { 662 | optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 663 | } 664 | 665 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random 666 | // (or constant) data generated by "Fillers" (see "message FillerParameter"). 667 | message DummyDataParameter { 668 | // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 669 | // shape fields, and 0, 1 or N data_fillers. 670 | // 671 | // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 672 | // If 1 data_filler is specified, it is applied to all top blobs. If N are 673 | // specified, the ith is applied to the ith top blob. 674 | repeated FillerParameter data_filler = 1; 675 | repeated BlobShape shape = 6; 676 | 677 | // 4D dimensions -- deprecated. Use "shape" instead. 678 | repeated uint32 num = 2; 679 | repeated uint32 channels = 3; 680 | repeated uint32 height = 4; 681 | repeated uint32 width = 5; 682 | } 683 | 684 | message EltwiseParameter { 685 | enum EltwiseOp { 686 | PROD = 0; 687 | SUM = 1; 688 | MAX = 2; 689 | } 690 | optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 691 | repeated float coeff = 2; // blob-wise coefficient for SUM operation 692 | 693 | // Whether to use an asymptotically slower (for >2 inputs) but stabler method 694 | // of computing the gradient for the PROD operation. (No effect for SUM op.) 695 | optional bool stable_prod_grad = 3 [default = true]; 696 | } 697 | 698 | // Message that stores parameters used by ELULayer 699 | message ELUParameter { 700 | // Described in: 701 | // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 702 | // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 703 | optional float alpha = 1 [default = 1]; 704 | } 705 | 706 | // Message that stores parameters used by EmbedLayer 707 | message EmbedParameter { 708 | optional uint32 num_output = 1; // The number of outputs for the layer 709 | // The input is given as integers to be interpreted as one-hot 710 | // vector indices with dimension num_input. Hence num_input should be 711 | // 1 greater than the maximum possible input value. 712 | optional uint32 input_dim = 2; 713 | 714 | optional bool bias_term = 3 [default = true]; // Whether to use a bias term 715 | optional FillerParameter weight_filler = 4; // The filler for the weight 716 | optional FillerParameter bias_filler = 5; // The filler for the bias 717 | 718 | } 719 | 720 | // Message that stores parameters used by ExpLayer 721 | message ExpParameter { 722 | // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 723 | // Or if base is set to the default (-1), base is set to e, 724 | // so y = exp(shift + scale * x). 725 | optional float base = 1 [default = -1.0]; 726 | optional float scale = 2 [default = 1.0]; 727 | optional float shift = 3 [default = 0.0]; 728 | } 729 | 730 | /// Message that stores parameters used by FlattenLayer 731 | message FlattenParameter { 732 | // The first axis to flatten: all preceding axes are retained in the output. 733 | // May be negative to index from the end (e.g., -1 for the last axis). 734 | optional int32 axis = 1 [default = 1]; 735 | 736 | // The last axis to flatten: all following axes are retained in the output. 737 | // May be negative to index from the end (e.g., the default -1 for the last 738 | // axis). 739 | optional int32 end_axis = 2 [default = -1]; 740 | } 741 | 742 | // Message that stores parameters used by HDF5DataLayer 743 | message HDF5DataParameter { 744 | // Specify the data source. 745 | optional string source = 1; 746 | // Specify the batch size. 747 | optional uint32 batch_size = 2; 748 | 749 | // Specify whether to shuffle the data. 750 | // If shuffle == true, the ordering of the HDF5 files is shuffled, 751 | // and the ordering of data within any given HDF5 file is shuffled, 752 | // but data between different files are not interleaved; all of a file's 753 | // data are output (in a random order) before moving onto another file. 754 | optional bool shuffle = 3 [default = false]; 755 | } 756 | 757 | message HDF5OutputParameter { 758 | optional string file_name = 1; 759 | } 760 | 761 | message HingeLossParameter { 762 | enum Norm { 763 | L1 = 1; 764 | L2 = 2; 765 | } 766 | // Specify the Norm to use L1 or L2 767 | optional Norm norm = 1 [default = L1]; 768 | } 769 | 770 | message ImageDataParameter { 771 | // Specify the data source. 772 | optional string source = 1; 773 | // Specify the batch size. 774 | optional uint32 batch_size = 4 [default = 1]; 775 | // The rand_skip variable is for the data layer to skip a few data points 776 | // to avoid all asynchronous sgd clients to start at the same point. The skip 777 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 778 | // be larger than the number of keys in the database. 779 | optional uint32 rand_skip = 7 [default = 0]; 780 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 781 | optional bool shuffle = 8 [default = false]; 782 | // It will also resize images if new_height or new_width are not zero. 783 | optional uint32 new_height = 9 [default = 0]; 784 | optional uint32 new_width = 10 [default = 0]; 785 | // Specify if the images are color or gray 786 | optional bool is_color = 11 [default = true]; 787 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 788 | // simple scaling and subtracting the data mean, if provided. Note that the 789 | // mean subtraction is always carried out before scaling. 790 | optional float scale = 2 [default = 1]; 791 | optional string mean_file = 3; 792 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 793 | // crop an image. 794 | optional uint32 crop_size = 5 [default = 0]; 795 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 796 | // data. 797 | optional bool mirror = 6 [default = false]; 798 | optional string root_folder = 12 [default = ""]; 799 | } 800 | 801 | message InfogainLossParameter { 802 | // Specify the infogain matrix source. 803 | optional string source = 1; 804 | } 805 | 806 | message InnerProductParameter { 807 | optional uint32 num_output = 1; // The number of outputs for the layer 808 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 809 | optional FillerParameter weight_filler = 3; // The filler for the weight 810 | optional FillerParameter bias_filler = 4; // The filler for the bias 811 | 812 | // The first axis to be lumped into a single inner product computation; 813 | // all preceding axes are retained in the output. 814 | // May be negative to index from the end (e.g., -1 for the last axis). 815 | optional int32 axis = 5 [default = 1]; 816 | // Specify whether to transpose the weight matrix or not. 817 | // If transpose == true, any operations will be performed on the transpose 818 | // of the weight matrix. The weight matrix itself is not going to be transposed 819 | // but rather the transfer flag of operations will be toggled accordingly. 820 | optional bool transpose = 6 [default = false]; 821 | } 822 | 823 | message InputParameter { 824 | // This layer produces N >= 1 top blob(s) to be assigned manually. 825 | // Define N shapes to set a shape for each top. 826 | // Define 1 shape to set the same shape for every top. 827 | // Define no shape to defer to reshaping manually. 828 | repeated BlobShape shape = 1; 829 | } 830 | 831 | // Message that stores parameters used by LogLayer 832 | message LogParameter { 833 | // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 834 | // Or if base is set to the default (-1), base is set to e, 835 | // so y = ln(shift + scale * x) = log_e(shift + scale * x) 836 | optional float base = 1 [default = -1.0]; 837 | optional float scale = 2 [default = 1.0]; 838 | optional float shift = 3 [default = 0.0]; 839 | } 840 | 841 | // Message that stores parameters used by LRNLayer 842 | message LRNParameter { 843 | optional uint32 local_size = 1 [default = 5]; 844 | optional float alpha = 2 [default = 1.]; 845 | optional float beta = 3 [default = 0.75]; 846 | enum NormRegion { 847 | ACROSS_CHANNELS = 0; 848 | WITHIN_CHANNEL = 1; 849 | } 850 | optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 851 | optional float k = 5 [default = 1.]; 852 | enum Engine { 853 | DEFAULT = 0; 854 | CAFFE = 1; 855 | CUDNN = 2; 856 | } 857 | optional Engine engine = 6 [default = DEFAULT]; 858 | } 859 | 860 | message MemoryDataParameter { 861 | optional uint32 batch_size = 1; 862 | optional uint32 channels = 2; 863 | optional uint32 height = 3; 864 | optional uint32 width = 4; 865 | } 866 | 867 | message MVNParameter { 868 | // This parameter can be set to false to normalize mean only 869 | optional bool normalize_variance = 1 [default = true]; 870 | 871 | // This parameter can be set to true to perform DNN-like MVN 872 | optional bool across_channels = 2 [default = false]; 873 | 874 | // Epsilon for not dividing by zero while normalizing variance 875 | optional float eps = 3 [default = 1e-9]; 876 | } 877 | 878 | message ParameterParameter { 879 | optional BlobShape shape = 1; 880 | } 881 | 882 | message PoolingParameter { 883 | enum PoolMethod { 884 | MAX = 0; 885 | AVE = 1; 886 | STOCHASTIC = 2; 887 | } 888 | optional PoolMethod pool = 1 [default = MAX]; // The pooling method 889 | // Pad, kernel size, and stride are all given as a single value for equal 890 | // dimensions in height and width or as Y, X pairs. 891 | optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 892 | optional uint32 pad_h = 9 [default = 0]; // The padding height 893 | optional uint32 pad_w = 10 [default = 0]; // The padding width 894 | optional uint32 kernel_size = 2; // The kernel size (square) 895 | optional uint32 kernel_h = 5; // The kernel height 896 | optional uint32 kernel_w = 6; // The kernel width 897 | optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 898 | optional uint32 stride_h = 7; // The stride height 899 | optional uint32 stride_w = 8; // The stride width 900 | enum Engine { 901 | DEFAULT = 0; 902 | CAFFE = 1; 903 | CUDNN = 2; 904 | } 905 | optional Engine engine = 11 [default = DEFAULT]; 906 | // If global_pooling then it will pool over the size of the bottom by doing 907 | // kernel_h = bottom->height and kernel_w = bottom->width 908 | optional bool global_pooling = 12 [default = false]; 909 | } 910 | 911 | message PowerParameter { 912 | // PowerLayer computes outputs y = (shift + scale * x) ^ power. 913 | optional float power = 1 [default = 1.0]; 914 | optional float scale = 2 [default = 1.0]; 915 | optional float shift = 3 [default = 0.0]; 916 | } 917 | 918 | message PythonParameter { 919 | optional string module = 1; 920 | optional string layer = 2; 921 | // This value is set to the attribute `param_str` of the `PythonLayer` object 922 | // in Python before calling the `setup()` method. This could be a number, 923 | // string, dictionary in Python dict format, JSON, etc. You may parse this 924 | // string in `setup` method and use it in `forward` and `backward`. 925 | optional string param_str = 3 [default = '']; 926 | // Whether this PythonLayer is shared among worker solvers during data parallelism. 927 | // If true, each worker solver sequentially run forward from this layer. 928 | // This value should be set true if you are using it as a data layer. 929 | optional bool share_in_parallel = 4 [default = false]; 930 | } 931 | 932 | // Message that stores parameters used by RecurrentLayer 933 | message RecurrentParameter { 934 | // The dimension of the output (and usually hidden state) representation -- 935 | // must be explicitly set to non-zero. 936 | optional uint32 num_output = 1 [default = 0]; 937 | 938 | optional FillerParameter weight_filler = 2; // The filler for the weight 939 | optional FillerParameter bias_filler = 3; // The filler for the bias 940 | 941 | // Whether to enable displaying debug_info in the unrolled recurrent net. 942 | optional bool debug_info = 4 [default = false]; 943 | 944 | // Whether to add as additional inputs (bottoms) the initial hidden state 945 | // blobs, and add as additional outputs (tops) the final timestep hidden state 946 | // blobs. The number of additional bottom/top blobs required depends on the 947 | // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 948 | optional bool expose_hidden = 5 [default = false]; 949 | } 950 | 951 | // Message that stores parameters used by ReductionLayer 952 | message ReductionParameter { 953 | enum ReductionOp { 954 | SUM = 1; 955 | ASUM = 2; 956 | SUMSQ = 3; 957 | MEAN = 4; 958 | } 959 | 960 | optional ReductionOp operation = 1 [default = SUM]; // reduction operation 961 | 962 | // The first axis to reduce to a scalar -- may be negative to index from the 963 | // end (e.g., -1 for the last axis). 964 | // (Currently, only reduction along ALL "tail" axes is supported; reduction 965 | // of axis M through N, where N < num_axes - 1, is unsupported.) 966 | // Suppose we have an n-axis bottom Blob with shape: 967 | // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 968 | // If axis == m, the output Blob will have shape 969 | // (d0, d1, d2, ..., d(m-1)), 970 | // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 971 | // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 972 | // If axis == 0 (the default), the output Blob always has the empty shape 973 | // (count 1), performing reduction across the entire input -- 974 | // often useful for creating new loss functions. 975 | optional int32 axis = 2 [default = 0]; 976 | 977 | optional float coeff = 3 [default = 1.0]; // coefficient for output 978 | } 979 | 980 | // Message that stores parameters used by ReLULayer 981 | message ReLUParameter { 982 | // Allow non-zero slope for negative inputs to speed up optimization 983 | // Described in: 984 | // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 985 | // improve neural network acoustic models. In ICML Workshop on Deep Learning 986 | // for Audio, Speech, and Language Processing. 987 | optional float negative_slope = 1 [default = 0]; 988 | enum Engine { 989 | DEFAULT = 0; 990 | CAFFE = 1; 991 | CUDNN = 2; 992 | } 993 | optional Engine engine = 2 [default = DEFAULT]; 994 | } 995 | 996 | message ReshapeParameter { 997 | // Specify the output dimensions. If some of the dimensions are set to 0, 998 | // the corresponding dimension from the bottom layer is used (unchanged). 999 | // Exactly one dimension may be set to -1, in which case its value is 1000 | // inferred from the count of the bottom blob and the remaining dimensions. 1001 | // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1002 | // 1003 | // layer { 1004 | // type: "Reshape" bottom: "input" top: "output" 1005 | // reshape_param { ... } 1006 | // } 1007 | // 1008 | // If "input" is 2D with shape 2 x 8, then the following reshape_param 1009 | // specifications are all equivalent, producing a 3D blob "output" with shape 1010 | // 2 x 2 x 4: 1011 | // 1012 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1013 | // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1014 | // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1015 | // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1016 | // 1017 | optional BlobShape shape = 1; 1018 | 1019 | // axis and num_axes control the portion of the bottom blob's shape that are 1020 | // replaced by (included in) the reshape. By default (axis == 0 and 1021 | // num_axes == -1), the entire bottom blob shape is included in the reshape, 1022 | // and hence the shape field must specify the entire output shape. 1023 | // 1024 | // axis may be non-zero to retain some portion of the beginning of the input 1025 | // shape (and may be negative to index from the end; e.g., -1 to begin the 1026 | // reshape after the last axis, including nothing in the reshape, 1027 | // -2 to include only the last axis, etc.). 1028 | // 1029 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1030 | // Then the following ReshapeLayer specifications are all equivalent, 1031 | // producing a blob "output" with shape 2 x 2 x 4: 1032 | // 1033 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1034 | // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1035 | // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1036 | // 1037 | // num_axes specifies the extent of the reshape. 1038 | // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1039 | // input axes in the range [axis, axis+num_axes]. 1040 | // num_axes may also be -1, the default, to include all remaining axes 1041 | // (starting from axis). 1042 | // 1043 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1044 | // Then the following ReshapeLayer specifications are equivalent, 1045 | // producing a blob "output" with shape 1 x 2 x 8. 1046 | // 1047 | // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1048 | // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1049 | // reshape_param { shape { dim: 1 } num_axes: 0 } 1050 | // 1051 | // On the other hand, these would produce output blob shape 2 x 1 x 8: 1052 | // 1053 | // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1054 | // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1055 | // 1056 | optional int32 axis = 2 [default = 0]; 1057 | optional int32 num_axes = 3 [default = -1]; 1058 | } 1059 | 1060 | message ScaleParameter { 1061 | // The first axis of bottom[0] (the first input Blob) along which to apply 1062 | // bottom[1] (the second input Blob). May be negative to index from the end 1063 | // (e.g., -1 for the last axis). 1064 | // 1065 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1066 | // top[0] will have the same shape, and bottom[1] may have any of the 1067 | // following shapes (for the given value of axis): 1068 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1069 | // (axis == 1 == -3) 3; 3x40; 3x40x60 1070 | // (axis == 2 == -2) 40; 40x60 1071 | // (axis == 3 == -1) 60 1072 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1073 | // "axis") -- a scalar multiplier. 1074 | optional int32 axis = 1 [default = 1]; 1075 | 1076 | // (num_axes is ignored unless just one bottom is given and the scale is 1077 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 1078 | // number of axes by the second bottom.) 1079 | // The number of axes of the input (bottom[0]) covered by the scale 1080 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1081 | // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1082 | optional int32 num_axes = 2 [default = 1]; 1083 | 1084 | // (filler is ignored unless just one bottom is given and the scale is 1085 | // a learned parameter of the layer.) 1086 | // The initialization for the learned scale parameter. 1087 | // Default is the unit (1) initialization, resulting in the ScaleLayer 1088 | // initially performing the identity operation. 1089 | optional FillerParameter filler = 3; 1090 | 1091 | // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1092 | // may be more efficient). Initialized with bias_filler (defaults to 0). 1093 | optional bool bias_term = 4 [default = false]; 1094 | optional FillerParameter bias_filler = 5; 1095 | } 1096 | 1097 | message SigmoidParameter { 1098 | enum Engine { 1099 | DEFAULT = 0; 1100 | CAFFE = 1; 1101 | CUDNN = 2; 1102 | } 1103 | optional Engine engine = 1 [default = DEFAULT]; 1104 | } 1105 | 1106 | message SliceParameter { 1107 | // The axis along which to slice -- may be negative to index from the end 1108 | // (e.g., -1 for the last axis). 1109 | // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1110 | optional int32 axis = 3 [default = 1]; 1111 | repeated uint32 slice_point = 2; 1112 | 1113 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 1114 | optional uint32 slice_dim = 1 [default = 1]; 1115 | } 1116 | 1117 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1118 | message SoftmaxParameter { 1119 | enum Engine { 1120 | DEFAULT = 0; 1121 | CAFFE = 1; 1122 | CUDNN = 2; 1123 | } 1124 | optional Engine engine = 1 [default = DEFAULT]; 1125 | 1126 | // The axis along which to perform the softmax -- may be negative to index 1127 | // from the end (e.g., -1 for the last axis). 1128 | // Any other axes will be evaluated as independent softmaxes. 1129 | optional int32 axis = 2 [default = 1]; 1130 | } 1131 | 1132 | message TanHParameter { 1133 | enum Engine { 1134 | DEFAULT = 0; 1135 | CAFFE = 1; 1136 | CUDNN = 2; 1137 | } 1138 | optional Engine engine = 1 [default = DEFAULT]; 1139 | } 1140 | 1141 | // Message that stores parameters used by TileLayer 1142 | message TileParameter { 1143 | // The index of the axis to tile. 1144 | optional int32 axis = 1 [default = 1]; 1145 | 1146 | // The number of copies (tiles) of the blob to output. 1147 | optional int32 tiles = 2; 1148 | } 1149 | 1150 | // Message that stores parameters used by ThresholdLayer 1151 | message ThresholdParameter { 1152 | optional float threshold = 1 [default = 0]; // Strictly positive values 1153 | } 1154 | 1155 | message WindowDataParameter { 1156 | // Specify the data source. 1157 | optional string source = 1; 1158 | // For data pre-processing, we can do simple scaling and subtracting the 1159 | // data mean, if provided. Note that the mean subtraction is always carried 1160 | // out before scaling. 1161 | optional float scale = 2 [default = 1]; 1162 | optional string mean_file = 3; 1163 | // Specify the batch size. 1164 | optional uint32 batch_size = 4; 1165 | // Specify if we would like to randomly crop an image. 1166 | optional uint32 crop_size = 5 [default = 0]; 1167 | // Specify if we want to randomly mirror data. 1168 | optional bool mirror = 6 [default = false]; 1169 | // Foreground (object) overlap threshold 1170 | optional float fg_threshold = 7 [default = 0.5]; 1171 | // Background (non-object) overlap threshold 1172 | optional float bg_threshold = 8 [default = 0.5]; 1173 | // Fraction of batch that should be foreground objects 1174 | optional float fg_fraction = 9 [default = 0.25]; 1175 | // Amount of contextual padding to add around a window 1176 | // (used only by the window_data_layer) 1177 | optional uint32 context_pad = 10 [default = 0]; 1178 | // Mode for cropping out a detection window 1179 | // warp: cropped window is warped to a fixed size and aspect ratio 1180 | // square: the tightest square around the window is cropped 1181 | optional string crop_mode = 11 [default = "warp"]; 1182 | // cache_images: will load all images in memory for faster access 1183 | optional bool cache_images = 12 [default = false]; 1184 | // append root_folder to locate images 1185 | optional string root_folder = 13 [default = ""]; 1186 | } 1187 | 1188 | message SPPParameter { 1189 | enum PoolMethod { 1190 | MAX = 0; 1191 | AVE = 1; 1192 | STOCHASTIC = 2; 1193 | } 1194 | optional uint32 pyramid_height = 1; 1195 | optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1196 | enum Engine { 1197 | DEFAULT = 0; 1198 | CAFFE = 1; 1199 | CUDNN = 2; 1200 | } 1201 | optional Engine engine = 6 [default = DEFAULT]; 1202 | } 1203 | 1204 | // DEPRECATED: use LayerParameter. 1205 | message V1LayerParameter { 1206 | repeated string bottom = 2; 1207 | repeated string top = 3; 1208 | optional string name = 4; 1209 | repeated NetStateRule include = 32; 1210 | repeated NetStateRule exclude = 33; 1211 | enum LayerType { 1212 | NONE = 0; 1213 | ABSVAL = 35; 1214 | ACCURACY = 1; 1215 | ARGMAX = 30; 1216 | BNLL = 2; 1217 | CONCAT = 3; 1218 | CONTRASTIVE_LOSS = 37; 1219 | CONVOLUTION = 4; 1220 | DATA = 5; 1221 | DECONVOLUTION = 39; 1222 | DROPOUT = 6; 1223 | DUMMY_DATA = 32; 1224 | EUCLIDEAN_LOSS = 7; 1225 | ELTWISE = 25; 1226 | EXP = 38; 1227 | FLATTEN = 8; 1228 | HDF5_DATA = 9; 1229 | HDF5_OUTPUT = 10; 1230 | HINGE_LOSS = 28; 1231 | IM2COL = 11; 1232 | IMAGE_DATA = 12; 1233 | INFOGAIN_LOSS = 13; 1234 | INNER_PRODUCT = 14; 1235 | LRN = 15; 1236 | MEMORY_DATA = 29; 1237 | MULTINOMIAL_LOGISTIC_LOSS = 16; 1238 | MVN = 34; 1239 | POOLING = 17; 1240 | POWER = 26; 1241 | RELU = 18; 1242 | SIGMOID = 19; 1243 | SIGMOID_CROSS_ENTROPY_LOSS = 27; 1244 | SILENCE = 36; 1245 | SOFTMAX = 20; 1246 | SOFTMAX_LOSS = 21; 1247 | SPLIT = 22; 1248 | SLICE = 33; 1249 | TANH = 23; 1250 | WINDOW_DATA = 24; 1251 | THRESHOLD = 31; 1252 | } 1253 | optional LayerType type = 5; 1254 | repeated BlobProto blobs = 6; 1255 | repeated string param = 1001; 1256 | repeated DimCheckMode blob_share_mode = 1002; 1257 | enum DimCheckMode { 1258 | STRICT = 0; 1259 | PERMISSIVE = 1; 1260 | } 1261 | repeated float blobs_lr = 7; 1262 | repeated float weight_decay = 8; 1263 | repeated float loss_weight = 35; 1264 | optional AccuracyParameter accuracy_param = 27; 1265 | optional ArgMaxParameter argmax_param = 23; 1266 | optional ConcatParameter concat_param = 9; 1267 | optional ContrastiveLossParameter contrastive_loss_param = 40; 1268 | optional ConvolutionParameter convolution_param = 10; 1269 | optional DataParameter data_param = 11; 1270 | optional DropoutParameter dropout_param = 12; 1271 | optional DummyDataParameter dummy_data_param = 26; 1272 | optional EltwiseParameter eltwise_param = 24; 1273 | optional ExpParameter exp_param = 41; 1274 | optional HDF5DataParameter hdf5_data_param = 13; 1275 | optional HDF5OutputParameter hdf5_output_param = 14; 1276 | optional HingeLossParameter hinge_loss_param = 29; 1277 | optional ImageDataParameter image_data_param = 15; 1278 | optional InfogainLossParameter infogain_loss_param = 16; 1279 | optional InnerProductParameter inner_product_param = 17; 1280 | optional LRNParameter lrn_param = 18; 1281 | optional MemoryDataParameter memory_data_param = 22; 1282 | optional MVNParameter mvn_param = 34; 1283 | optional PoolingParameter pooling_param = 19; 1284 | optional PowerParameter power_param = 21; 1285 | optional ReLUParameter relu_param = 30; 1286 | optional SigmoidParameter sigmoid_param = 38; 1287 | optional SoftmaxParameter softmax_param = 39; 1288 | optional SliceParameter slice_param = 31; 1289 | optional TanHParameter tanh_param = 37; 1290 | optional ThresholdParameter threshold_param = 25; 1291 | optional WindowDataParameter window_data_param = 20; 1292 | optional TransformationParameter transform_param = 36; 1293 | optional LossParameter loss_param = 42; 1294 | optional V0LayerParameter layer = 1; 1295 | } 1296 | 1297 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1298 | // in Caffe. We keep this message type around for legacy support. 1299 | message V0LayerParameter { 1300 | optional string name = 1; // the layer name 1301 | optional string type = 2; // the string to specify the layer type 1302 | 1303 | // Parameters to specify layers with inner products. 1304 | optional uint32 num_output = 3; // The number of outputs for the layer 1305 | optional bool biasterm = 4 [default = true]; // whether to have bias terms 1306 | optional FillerParameter weight_filler = 5; // The filler for the weight 1307 | optional FillerParameter bias_filler = 6; // The filler for the bias 1308 | 1309 | optional uint32 pad = 7 [default = 0]; // The padding size 1310 | optional uint32 kernelsize = 8; // The kernel size 1311 | optional uint32 group = 9 [default = 1]; // The group size for group conv 1312 | optional uint32 stride = 10 [default = 1]; // The stride 1313 | enum PoolMethod { 1314 | MAX = 0; 1315 | AVE = 1; 1316 | STOCHASTIC = 2; 1317 | } 1318 | optional PoolMethod pool = 11 [default = MAX]; // The pooling method 1319 | optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 1320 | 1321 | optional uint32 local_size = 13 [default = 5]; // for local response norm 1322 | optional float alpha = 14 [default = 1.]; // for local response norm 1323 | optional float beta = 15 [default = 0.75]; // for local response norm 1324 | optional float k = 22 [default = 1.]; 1325 | 1326 | // For data layers, specify the data source 1327 | optional string source = 16; 1328 | // For data pre-processing, we can do simple scaling and subtracting the 1329 | // data mean, if provided. Note that the mean subtraction is always carried 1330 | // out before scaling. 1331 | optional float scale = 17 [default = 1]; 1332 | optional string meanfile = 18; 1333 | // For data layers, specify the batch size. 1334 | optional uint32 batchsize = 19; 1335 | // For data layers, specify if we would like to randomly crop an image. 1336 | optional uint32 cropsize = 20 [default = 0]; 1337 | // For data layers, specify if we want to randomly mirror data. 1338 | optional bool mirror = 21 [default = false]; 1339 | 1340 | // The blobs containing the numeric parameters of the layer 1341 | repeated BlobProto blobs = 50; 1342 | // The ratio that is multiplied on the global learning rate. If you want to 1343 | // set the learning ratio for one blob, you need to set it for all blobs. 1344 | repeated float blobs_lr = 51; 1345 | // The weight decay that is multiplied on the global weight decay. 1346 | repeated float weight_decay = 52; 1347 | 1348 | // The rand_skip variable is for the data layer to skip a few data points 1349 | // to avoid all asynchronous sgd clients to start at the same point. The skip 1350 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1351 | // be larger than the number of keys in the database. 1352 | optional uint32 rand_skip = 53 [default = 0]; 1353 | 1354 | // Fields related to detection (det_*) 1355 | // foreground (object) overlap threshold 1356 | optional float det_fg_threshold = 54 [default = 0.5]; 1357 | // background (non-object) overlap threshold 1358 | optional float det_bg_threshold = 55 [default = 0.5]; 1359 | // Fraction of batch that should be foreground objects 1360 | optional float det_fg_fraction = 56 [default = 0.25]; 1361 | 1362 | // optional bool OBSOLETE_can_clobber = 57 [default = true]; 1363 | 1364 | // Amount of contextual padding to add around a window 1365 | // (used only by the window_data_layer) 1366 | optional uint32 det_context_pad = 58 [default = 0]; 1367 | 1368 | // Mode for cropping out a detection window 1369 | // warp: cropped window is warped to a fixed size and aspect ratio 1370 | // square: the tightest square around the window is cropped 1371 | optional string det_crop_mode = 59 [default = "warp"]; 1372 | 1373 | // For ReshapeLayer, one needs to specify the new dimensions. 1374 | optional int32 new_num = 60 [default = 0]; 1375 | optional int32 new_channels = 61 [default = 0]; 1376 | optional int32 new_height = 62 [default = 0]; 1377 | optional int32 new_width = 63 [default = 0]; 1378 | 1379 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 1380 | // It will also resize images if new_height or new_width are not zero. 1381 | optional bool shuffle_images = 64 [default = false]; 1382 | 1383 | // For ConcatLayer, one needs to specify the dimension for concatenation, and 1384 | // the other dimensions must be the same for all the bottom blobs. 1385 | // By default it will concatenate blobs along the channels dimension. 1386 | optional uint32 concat_dim = 65 [default = 1]; 1387 | 1388 | optional HDF5OutputParameter hdf5_output_param = 1001; 1389 | } 1390 | 1391 | message PReLUParameter { 1392 | // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 1393 | // Surpassing Human-Level Performance on ImageNet Classification, 2015. 1394 | 1395 | // Initial value of a_i. Default is a_i=0.25 for all i. 1396 | optional FillerParameter filler = 1; 1397 | // Whether or not slope paramters are shared across channels. 1398 | optional bool channel_shared = 2 [default = false]; 1399 | } 1400 | -------------------------------------------------------------------------------- /modules/config/caffe/layers.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/vlfeat/matconvnet/blob/master/utils/layers.py by Andrea Vedaldi 2 | 3 | # Recent Caffes just pass a string as a type; this is used for legacy support 4 | layers_type = {} 5 | layers_type[0] = 'none' 6 | layers_type[1] = 'accuracy' 7 | layers_type[2] = 'bnll' 8 | layers_type[3] = 'concat' 9 | layers_type[4] = 'conv' 10 | layers_type[5] = 'data' 11 | layers_type[6] = 'dropout' 12 | layers_type[7] = 'euclidean_loss' 13 | layers_type[8] = 'flatten' 14 | layers_type[9] = 'hdf5_data' 15 | layers_type[10] = 'hdf5_output' 16 | layers_type[28] = 'hinge_loss' 17 | layers_type[11] = 'im2col' 18 | layers_type[12] = 'image_data' 19 | layers_type[13] = 'infogain_loss' 20 | layers_type[14] = 'inner_product' 21 | layers_type[15] = 'lrn' 22 | layers_type[25] = 'eltwise' 23 | layers_type[29] = 'memory_data' 24 | layers_type[16] = 'multinomial_logistic_loss' 25 | layers_type[17] = 'pool' 26 | layers_type[26] = 'power' 27 | layers_type[18] = 'relu' 28 | layers_type[19] = 'sigmoid' 29 | layers_type[27] = 'sigmoid_cross_entropy_loss' 30 | layers_type[20] = 'softmax' 31 | layers_type[21] = 'softmax_loss' 32 | layers_type[22] = 'split' 33 | layers_type[23] = 'tanh' 34 | layers_type[24] = 'window_data' 35 | layers_type[39] = 'deconvolution' 36 | layers_type[40] = 'crop' 37 | 38 | # pooling types 39 | pool_methods = {} 40 | pool_methods[0] = 'max' 41 | pool_methods[1] = 'ave' 42 | pool_methods[2] = 'stochastic' -------------------------------------------------------------------------------- /modules/exporters/CaffeExporter.py: -------------------------------------------------------------------------------- 1 | from core.BaseExporter import BaseExporter 2 | 3 | class CaffeExporter(BaseExporter): 4 | """ Class modeling a Caffe exporter 5 | 6 | """ 7 | def __init__(self): 8 | pass 9 | 10 | def save(self, file_path): 11 | print 'Saving Caffe model: %s' % file_path 12 | return {} 13 | 14 | def toObject(self, rosetaDict): 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /modules/exporters/DummyCaffeExporter.py: -------------------------------------------------------------------------------- 1 | from core.BaseExporter import BaseExporter 2 | 3 | class DummyCaffeExporter(BaseExporter): 4 | """ Class modeling a Caffe exporter 5 | 6 | """ 7 | def __init__(self): 8 | pass 9 | 10 | def save(self, file_path): 11 | print 'Saving Caffe model: %s' % file_path 12 | return {} 13 | 14 | def toObject(self, rosetaDict): 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /modules/exporters/LasagneExporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import lasagne 3 | from core.BaseExporter import BaseExporter 4 | 5 | class Roseta2Lasagne: 6 | def __init__(self,useCuDNN=False): 7 | self.convertDict={ 8 | 'InputLayer':lambda d:lasagne.layers.InputLayer([d['dim']['neurons'],d['dim']['channels'],d['dim']['height'],d['dim']['width']]), 9 | 'LinearLayer':lambda d,bot:lasagne.layers.DenseLayer(bot,d['dim']['neurons'],W=d['weights_obj'],b=d['biases_obj'],name=d['name']), 10 | 'ConvolutionLayer':lambda d,bot:lasagne.layers.Conv2DLayer(bot,d['dim']['neurons'],(d['dim']['width'],d['dim']['height']),stride=d['stride'],pad=d['padding'],W=d['weights_obj'],b=d['biases_obj'],name=d['name']), 11 | 'PoolingLayer':lambda d,bot:lasagne.layers.Pool2DLayer(bot,d['kernel_size'],stride=d['stride'],pad=d['padding'],name=d['name']), 12 | 'DropoutLayer':lambda d,bot:lasagne.layers.DropoutLayer(bot,d['dropout_ratio'],name=d['name']), 13 | 'DummyLayer':lambda d,bot:lasagne.layers.NonlinearityLayer(bot,nonlinearity=None,name=d['name']), 14 | 'SoftmaxLayer':lambda d,bot:lasagne.layers.NonlinearityLayer(bot,nonlinearity=lasagne.nonlinearities.softmax,name=d['name']), 15 | 'SigmoidLayer':lambda d,bot:lasagne.layers.NonlinearityLayer(bot,nonlinearity=lasagne.nonlinearities.sigmoid,name=d['name']), 16 | 'TanHLayer':lambda d,bot:lasagne.layers.NonlinearityLayer(bot,nonlinearity=lasagne.nonlinearities.tanh,name=d['name']), 17 | 'ReLULayer':lambda d,bot:lasagne.layers.NonlinearityLayer(bot,nonlinearity=lasagne.nonlinearities.LeakyRectify(d['negative_slope']),name=d['name']), 18 | } 19 | if useCuDNN: 20 | self.convertDict['ConvolutionLayer']=lambda d,bot:lasagne.layers.dnn.Conv2DDNNLayer(bot,d['dim']['neurons'],(d['dim']['width'],d['dim']['height']),stride=d['stride'],pad=d['padding'],W=d['weights_obj'],b=d['biases_obj'],name=d['name']), 21 | self.convertDict['PoolingLayer']=lambda d,bot:lasagne.layers.dnn.Pool2DDNNLayer(bot,d['kernel_size'],stride=d['stride'],pad=d['padding'],name=d['name']), 22 | 23 | def generateTheanoSharedFromNumpy(self,paramDict): 24 | theanoDict={} 25 | for k in paramDict.keys(): 26 | if len(paramDict[k].shape)==1:#if bias 27 | theanoDict[k]=lasagne.layers.DenseLayer(lasagne.layers.InputLayer([None,1]),paramDict[k].shape[0],b=paramDict[k]).b 28 | elif len(paramDict[k].shape)==2:#if weights of dense 29 | theanoDict[k]=lasagne.layers.DenseLayer(lasagne.layers.InputLayer([None,paramDict[k].shape[0]]),paramDict[k].shape[1],W=paramDict[k]).W 30 | elif len(paramDict[k].shape)==4:#if weights of dense 31 | theanoDict[k]=lasagne.layers.Conv2DLayer(lasagne.layers.InputLayer([None,paramDict[k].shape[1],10,10]),paramDict[k].shape[0],paramDict[k].shape[2:],W=paramDict[k]).W 32 | else: 33 | print '\n\n',k,'\n\n' 34 | raise Exception('Could not convert weigth mat '+k+' to theano shared object') 35 | return theanoDict 36 | 37 | def getInputLayers(self,layerDict): 38 | res=[] 39 | for k in layerDict.keys(): 40 | if layerDict[k]['bottom']==layerDict[k]['name']: 41 | res.append(layerDict[k]) 42 | return res 43 | 44 | def __call__(self,d): 45 | allParams=self.generateTheanoSharedFromNumpy(d['parameters']) 46 | allLayers=d['layers'] 47 | for k in allLayers.keys(): 48 | allLayers[k]['name']=k 49 | if 'weights_name' in allLayers[k].keys(): 50 | allLayers[k]['weights_obj']=allParams[allLayers[k]['weights_name']] 51 | if 'biases_name' in allLayers[k].keys(): 52 | allLayers[k]['biases_obj']=allParams[allLayers[k]['biases_name']] 53 | inputLayers=self.getInputLayers(allLayers) 54 | if len(inputLayers)>1: 55 | raise Exception('For the moment roseta lasagne experter supports only a sigle input layer') 56 | curLayerName=inputLayers[0]['name'] 57 | nextLayerName=inputLayers[0]['top'] 58 | print 'TYPE', inputLayers[0]['type'],'\n',inputLayers[0] 59 | curLayer=self.convertDict[inputLayers[0]['type']](inputLayers[0]) 60 | while nextLayerName!=curLayerName:#assuming a single output layer 61 | curLayerName=nextLayerName 62 | curLayer=self.convertDict[allLayers[curLayerName]['type']](allLayers[curLayerName],curLayer) 63 | nextLayerName=allLayers[curLayerName]['top'] 64 | return curLayer 65 | 66 | 67 | class LasagneExporter(BaseExporter): 68 | def toObject(self,rosetaDict): 69 | functor= Roseta2Lasagne(False) 70 | return functor(rosetaDict) 71 | 72 | 73 | if __name__=='__main__': 74 | pass 75 | -------------------------------------------------------------------------------- /modules/exporters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/modules/exporters/__init__.py -------------------------------------------------------------------------------- /modules/importers/CaffeImporter.py: -------------------------------------------------------------------------------- 1 | from core.BaseImporter import BaseImporter 2 | from modules.config.caffe import caffe_pb2 as caffe_pb2 3 | from modules.config.caffe.layers import * 4 | from modules.config.caffe.equivalences import * 5 | import numpy as np 6 | 7 | class CaffeImporter(BaseImporter): 8 | """ Class modeling a Caffe importer 9 | 10 | """ 11 | def __init__(self): 12 | self.equivalences = equivalences 13 | self.layers_type = layers_type 14 | self.pool_methods = pool_methods 15 | self.regions = ['across_channels', 'within_channel'] 16 | 17 | def blobproto_to_array(self, blob): 18 | """Convert a Caffe Blob to a numpy array. 19 | 20 | It also reverses the order of all dimensions to [width, height, 21 | channels, instance]. 22 | """ 23 | dims = [] 24 | if hasattr(blob, 'shape'): 25 | dims = list(blob.shape.dim) 26 | if not dims: 27 | dims = [blob.num, blob.channels, blob.height, blob.width] 28 | return np.array(blob.data,dtype='float32').reshape(dims) 29 | 30 | def find_layer_by_type(self, dict_, type_): 31 | for key in dict_: 32 | if dict_[key]['type'] == type_: 33 | return key 34 | 35 | def get_data_from_caffemodel(self, file_path): 36 | print 'Loading Caffe model: %s' % file_path 37 | data=caffe_pb2.NetParameter() 38 | caffe_data = open(file_path, 'rb') 39 | data.ParseFromString(caffe_data.read()) 40 | return data 41 | 42 | 43 | def load(self, file_path): 44 | # open and load caffe model into data 45 | data = self.get_data_from_caffemodel() 46 | 47 | #read layers 48 | data_layers_list = data.layers 49 | 50 | output = {} 51 | output['layers'] = {} 52 | output['parameters'] = {} 53 | 54 | for layer in data_layers_list: 55 | layer_type = layer.type if type(layer.type) != int else Importer.layers_type[layer.type] 56 | name = layer.name 57 | print('Layer %s (%s)' % (layer_type, name)) 58 | 59 | if layer_type == 'data': 60 | continue 61 | 62 | converted_type = Importer.find_layer_by_type(Importer.equivalences['layers'], layer_type) 63 | eq_fields = Importer.equivalences['layers'][converted_type] 64 | 65 | output['layers'][name] = {} 66 | layer_obj = output['layers'][name] 67 | if layer_type in ['conv', 'Convolution']: 68 | for field in eq_fields: 69 | if field == 'weights_name': 70 | weights = Importer.blobproto_to_array(layer.blobs[0]).copy() 71 | output['parameters'][name + '_w'] = weights 72 | layer_obj['weights_name'] = name + '_w' 73 | layer_obj['dim'] = weights.shape 74 | elif field == 'biases_name': 75 | biases = Importer.blobproto_to_array(layer.blobs[1]).copy() 76 | output['parameters'][name + '_b'] = biases 77 | layer_obj['biases_name'] = name + '_b' 78 | elif field == 'dim': 79 | pass 80 | elif field == 'type': 81 | pass 82 | else: 83 | prop = eq_fields[field] if eq_fields[field] else field 84 | try: 85 | layer_obj[field] = getattr(layer.convolution_param, prop) 86 | except AttributeError: 87 | # for bottom and top 88 | layer_obj[field] = getattr(layer, prop) 89 | 90 | elif layer_type in ['relu', 'ReLU']: 91 | for field in eq_fields: 92 | if field == 'type': 93 | pass 94 | else: 95 | prop = eq_fields[field] if eq_fields[field] else field 96 | try: 97 | layer_obj[field] = getattr(layer.relu_param, prop) 98 | except AttributeError: 99 | # for bottom and top 100 | layer_obj[field] = getattr(layer, prop) 101 | 102 | elif layer_type in ['lrn', 'LRN']: 103 | for field in eq_fields: 104 | if field == 'type': 105 | pass 106 | elif field == 'norm_region': 107 | layer_obj[field] = Importer.regions[getattr(layer.lrn_param, field)] 108 | else: 109 | prop = eq_fields[field] if eq_fields[field] else field 110 | try: 111 | layer_obj[field] = getattr(layer.lrn_param, prop) 112 | except AttributeError: 113 | # for bottom and top 114 | layer_obj[field] = getattr(layer, prop) 115 | 116 | elif layer_type in ['pool', 'Pooling']: 117 | for field in eq_fields: 118 | if field == 'type': 119 | pass 120 | elif field == 'pool': 121 | layer_obj[field] = Importer.pool_methods[getattr(layer.pooling_param, field)] 122 | else: 123 | prop = eq_fields[field] if eq_fields[field] else field 124 | try: 125 | layer_obj[field] = getattr(layer.pooling_param, prop) 126 | except AttributeError: 127 | # for bottom and top 128 | layer_obj[field] = getattr(layer, prop) 129 | 130 | elif layer_type in ['inner_product', 'InnerProduct']: 131 | for field in eq_fields: 132 | if field == 'weights_name': 133 | weights = Importer.blobproto_to_array(layer.blobs[0]).copy() 134 | output['parameters'][name + '_w'] = weights 135 | layer_obj['weights_name'] = name + '_w' 136 | layer_obj['dim'] = weights.shape 137 | elif field == 'biases_name': 138 | biases = Importer.blobproto_to_array(layer.blobs[1]).copy() 139 | output['parameters'][name + '_b'] = biases 140 | layer_obj['biases_name'] = name + '_b' 141 | elif field == 'dim': 142 | pass 143 | elif field == 'type': 144 | pass 145 | else: 146 | prop = eq_fields[field] if eq_fields[field] else field 147 | try: 148 | layer_obj[field] = getattr(layer.inner_product_param, prop) 149 | except AttributeError: 150 | # for bottom and top 151 | layer_obj[field] = getattr(layer, prop) 152 | 153 | elif layer_type in ['dropout', 'Dropout']: 154 | for field in eq_fields: 155 | if field == 'type': 156 | pass 157 | else: 158 | prop = eq_fields[field] if eq_fields[field] else field 159 | try: 160 | layer_obj[field] = getattr(layer.dropout_param, prop) 161 | except AttributeError: 162 | # for bottom and top 163 | layer_obj[field] = getattr(layer, prop) 164 | 165 | elif layer_type in ['softmax_loss', 'SoftmaxLoss']: 166 | for field in eq_fields: 167 | if field == 'type': 168 | pass 169 | else: 170 | prop = eq_fields[field] if eq_fields[field] else field 171 | try: 172 | layer_obj[field] = getattr(layer.softmax_param, prop) 173 | except AttributeError: 174 | # for bottom and top 175 | layer_obj[field] = getattr(layer, prop) 176 | 177 | return {} 178 | 179 | def loadFromObject(self, frameworkObj): 180 | return NotImplementedError 181 | 182 | if __name__ == '__main__': 183 | importer = CaffeImporter() 184 | file_path = '/home/guillem/git/caffe/models/bvlc_alexnet/bvlc_alexnet.caffemodel' 185 | output = importer.load(file_path) -------------------------------------------------------------------------------- /modules/importers/DummyCaffeImporter.py: -------------------------------------------------------------------------------- 1 | from core.BaseImporter import BaseImporter 2 | 3 | 4 | class DummyCaffeImporter(BaseImporter): 5 | """ Class modeling a Caffe importer 6 | 7 | """ 8 | def __init__(self): 9 | pass 10 | 11 | def load(self, file_path): 12 | print 'Loading Caffe model: %s' % file_path 13 | 14 | return {} 15 | 16 | def loadFromObject(self, frameworkObj): 17 | print 'Loading Caffe model from object: %s' % frameworkObj 18 | 19 | return {} 20 | -------------------------------------------------------------------------------- /modules/importers/LasagneImporter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cPickle 3 | from core.BaseImporter import BaseImporter 4 | sys.path.insert(0, '../../') 5 | 6 | 7 | 8 | try: 9 | import lasagne 10 | except: 11 | pass 12 | 13 | 14 | class Lasagne2Roseta: 15 | """ Class modeling a Lasagne importer 16 | """ 17 | lasagne2CaffeTypes=dict([('DenseLayer','LinearLayer'), 18 | ('MaxPool2DLayer','PoolLayer'), 19 | ('MaxPool2DDNNLayer','PoolLayer'), 20 | ('Pool2DLayer','PoolLayer'), 21 | ('Pool2DDNNLayer','PoolLayer'), 22 | ('Conv2DLayer','ConvolutionLayer'), 23 | ('Conv2DDNNLayer','ConvolutionLayer'), 24 | ('InputLayer','InputLayer'), 25 | ('NonlinearityLayer','DummyLayer'), 26 | ('DropoutLayer','DropoutLayer'), 27 | ('rectify','ReLULayer'), 28 | ('LeakyRectify','ReLULayer'), 29 | ('tanh','TanHLayer'), 30 | ('sigmoid','LogsigLayer'), 31 | ('softmax','SoftmaxLayer'), 32 | ('NonlinearityLayer','DummyLayer') 33 | ]) 34 | def paramName(self,x): 35 | return str(x)+'.'+str(id(x)) 36 | def addLayerNamesIfNotThere(self,outputLayer): 37 | """This function will make sure all layers have unique layers 38 | """ 39 | currentLayer=outputLayer 40 | names=[] 41 | counter=0 42 | while 'input_layer' in currentLayer.__dict__.keys(): 43 | if currentLayer.name==None or len(set(names+[currentLayer.name]))!=len((names+[currentLayer.name])): 44 | currentLayer.name='layer_'+str(counter); 45 | names.append(currentLayer.name) 46 | currentLayer=currentLayer.input_layer 47 | counter+=1 48 | if currentLayer.name==None or len(set(names+[currentLayer.name]))!=len((names+[currentLayer.name])): 49 | currentLayer.name='layer_'+str(counter); 50 | 51 | 52 | def genParamDict(self,outputLayer): 53 | self.weightDict=dict([(self.paramName(p),p.get_value()) for p in lasagne.layers.get_all_params(outputLayer)]) 54 | 55 | def findParamId(self,paramObj): 56 | for pId in self.paramDict.keys(): 57 | if self.paramDict[pId]==paramObj: 58 | return pId 59 | raise Exception('Object '+str(paramObj)+' not found in parameters') 60 | 61 | def createActivationLayer(self,bottomLayer,topLayer): 62 | lasagne2CaffeTypes=Lasagne2Roseta.lasagne2CaffeTypes 63 | #lasagneName=str(type(bottomLayer.nonlinearity))[1:-1].split('.')[-1] 64 | lasagneName=str(bottomLayer.nonlinearity)[10:].split(' ')[0] 65 | res= {'type':lasagne2CaffeTypes[lasagneName],'name':bottomLayer.name+'_'+lasagneName} 66 | if lasagne2CaffeTypes[lasagneName]=='ReLULayer': 67 | if lasagneName=='LeakyRectify': 68 | res['negative_slope']=bottomLayer.nonlinearity.leakiness 69 | else: 70 | res['negative_slope']=0 71 | res['bottom']=bottomLayer.name 72 | if bottomLayer is topLayer:#If this is the output layer 73 | res['top']=res['name'] 74 | else: 75 | res['top']=topLayer.name 76 | return res 77 | 78 | 79 | def createDummyLayers(self,curLayer,topLayer): 80 | #used for lasagne.layers.NonlinearityLayer 81 | layerDict={'top':topLayer.name,'bottom':curLayer.input_layer.name,'name':curLayer.name,'type':'DummyLayer'} 82 | nonlinearityLayerDict=self.createActivationLayer(curLayer,topLayer) 83 | layerDict['top']=nonlinearityLayerDict['name'] 84 | return [layerDict,nonlinearityLayerDict] 85 | 86 | 87 | def createConvLayers(self,curLayer,topLayer): 88 | dimSz=curLayer.W.get_value().shape 89 | dimDict={'neurons':dimSz[0],'channels':dimSz[1],'height':dimSz[2],'width':dimSz[3]} 90 | layerDict={'top':topLayer.name,'bottom':curLayer.input_layer.name,'type':'ConvolutionLayer','name':curLayer.name, 91 | 'dim':dimDict,'weights_name':self.findParamId(curLayer.W),'biases_name':self.findParamId(curLayer.b), 92 | 'stride':list(curLayer.stride),'padding':list(curLayer.pad) 93 | } 94 | nonlinearityLayerDict=self.createActivationLayer(curLayer,topLayer) 95 | layerDict['top']=nonlinearityLayerDict['name'] 96 | return [layerDict,nonlinearityLayerDict] 97 | 98 | def createLinearLayers(self,curLayer,topLayer): 99 | dimSz=curLayer.W.get_value().shape 100 | dimDict={'neurons':dimSz[1],'channels':dimSz[0]} 101 | layerDict={'top':topLayer.name,'bottom':curLayer.input_layer.name,'type':'LinearLayer','name':curLayer.name, 102 | 'dim':dimDict,'weights_name':self.findParamId(curLayer.W),'biases_name':self.findParamId(curLayer.b), 103 | } 104 | nonlinearityLayerDict=self.createActivationLayer(curLayer,topLayer) 105 | layerDict['top']=nonlinearityLayerDict['name'] 106 | return [layerDict,nonlinearityLayerDict] 107 | 108 | def createPoolingLayers(self,curLayer,topLayer): 109 | lasagneName=str(type(curLayer))[8:-2].split('.')[-1] 110 | layerDict={'top':topLayer.name,'bottom':curLayer.input_layer.name,'type':'PoolingLayer','name':curLayer.name, 111 | 'stride':list(curLayer.stride),'padding':list(curLayer.pad),'kernel_size':list(curLayer.pool_size)} 112 | if lasagneName in ['MaxPool2DLayer' ,'MaxPool2DDNNLayer']or (lasagneName in ['Pool2DLayer','Pool2DDNNLayer' ]and curLayer.mode=='max'): 113 | layerDict['pool']='max' 114 | else: 115 | raise NotImplementedError 116 | if curLayer is topLayer:#If this is the output layer 117 | layerDict['top']=layerDict['name'] 118 | else: 119 | layerDict['top']=topLayer.name 120 | return [layerDict] 121 | #Assuming there is never a Relu layer after pooling 122 | #nonlinearityLayerDict=self.createActivationLayer(curLayer,topLayer) 123 | #layerDict['top']=nonlinearityLayerDict['name'] 124 | #return [layerDict,nonlinearityLayerDict] 125 | 126 | def createDropoutLayers(self,curLayer,topLayer): 127 | layerDict={'top':topLayer.name,'bottom':curLayer.input_layer.name,'name':curLayer.name,'type':'DropoutLayer','dropout_ratio':curLayer.p} 128 | if curLayer is topLayer:#If this is the output layer 129 | layerDict['top']=layerDict['name'] 130 | else: 131 | layerDict['top']=topLayer.name 132 | return [layerDict] 133 | 134 | def createInputLayers(self,curLayer,topLayer): 135 | dimSz=curLayer.shape 136 | if len(dimSz)==2: 137 | dimDict={'neurons':dimSz[0],'channels':dimSz[1],'height':1,'width':1} 138 | elif len(dimSz)==4: 139 | dimDict={'neurons':dimSz[0],'channels':dimSz[1],'height':dimSz[2],'width':dimSz[3]} 140 | else: 141 | raise Exception('Input Layers should either have 2 or 4 dimensions') 142 | layerDict={'top':topLayer.name,'bottom':curLayer.name,'name':curLayer.name,'type':'InputLayer','dim':dimDict} 143 | if curLayer is topLayer:#If this is the output layer 144 | layerDict['top']=layerDict['name'] 145 | else: 146 | layerDict['top']=topLayer.name 147 | return [layerDict] 148 | 149 | def createLayers(self,curLayer,topLayer): 150 | """returns a list with dictionaries , one for each layer created""" 151 | lName=str(type(curLayer))[8:-2].split('.')[-1]; 152 | if lName=='DenseLayer': 153 | return self.createLinearLayers(curLayer,topLayer) 154 | elif lName in ['MaxPool2DLayer','Pool2DLayer','MaxPool2DDNNLayer','Pool2DDNNLayer'] : 155 | return self.createPoolingLayers(curLayer,topLayer) 156 | elif lName in ['Conv2DLayer','Conv2DDNNLayer']: 157 | return self.createConvLayers(curLayer,topLayer) 158 | elif lName=='InputLayer': 159 | return self.createInputLayers(curLayer,topLayer) 160 | elif lName=='DropoutLayer': 161 | return self.createDropoutLayers(curLayer,topLayer) 162 | elif lName=='NonlinearityLayer': 163 | return self.createDummyLayers(curLayer,topLayer) 164 | else: 165 | raise Exception('Inrecognised layer type '+lName) 166 | 167 | def __eval__(self, lasagneOutputLayer): 168 | self.addLayerNamesIfNotThere(lasagneOutputLayer) 169 | self.paramDict=dict([(self.paramName(p),p) for p in lasagne.layers.get_all_params(lasagneOutputLayer)]) 170 | curLayer=lasagneOutputLayer 171 | topLayer=lasagneOutputLayer 172 | allLayers=[] 173 | while str(type(curLayer))[8:-2].split('.')[-1]!='InputLayer': 174 | allLayers+=self.createLayers(curLayer,topLayer) 175 | topLayer=curLayer 176 | curLayer=curLayer.input_layer 177 | allLayers+=self.createLayers(curLayer,topLayer) 178 | layerDict = dict([(l['name'],l) for l in allLayers]) 179 | npParamDict={} 180 | for k in self.paramDict.keys(): 181 | npParamDict[k]=self.paramDict[k].get_value() 182 | res={'parameters':npParamDict,'layers':layerDict} 183 | del self.paramDict #no longer needed 184 | return res 185 | 186 | 187 | class LasagneImporter(BaseImporter): 188 | def loadFromObject(self,lasagneOutputLayer): 189 | functor=Lasagne2Roseta() 190 | return functor(lasagneOutputLayer) 191 | 192 | 193 | if __name__=='__main__': 194 | il=lasagne.layers.InputLayer((None,1,28,28),name='input') 195 | c1=lasagne.layers.Conv2DLayer(il,32,(5,5),pad=(2,2),name='c1') 196 | p1=lasagne.layers.Pool2DLayer(c1,(2,2),stride=(2,2),name='p1') 197 | c2=lasagne.layers.Conv2DLayer(p1,64,(5,5),pad=(2,2),name='c2') 198 | p2=lasagne.layers.Pool2DLayer(c2,(2,2),stride=(2,2),name='p2') 199 | fc1=lasagne.layers.DenseLayer(p2,256,name='fc1') 200 | do1=lasagne.layers.DropoutLayer(fc1,0.5,name='do1') 201 | fc2=lasagne.layers.DenseLayer(do1,256,name='fc2') 202 | do2=lasagne.layers.DropoutLayer(fc2,0.5,name='do2') 203 | fc3=lasagne.layers.DenseLayer(do2,10,nonlinearity=lasagne.nonlinearities.softmax,name='fc3') 204 | functor=Lasagne2Roseta() 205 | cPickle.dump(functor(fc3),open('/tmp/lasagne_untrained_network.cPickle','w')) 206 | -------------------------------------------------------------------------------- /modules/importers/MatconvFileImporter.py: -------------------------------------------------------------------------------- 1 | import os 2 | from core.BaseImporter import BaseImporter 3 | from scipy.io import loadmat 4 | import yaml 5 | 6 | class Importer(BaseImporter): 7 | def __init__(self): 8 | with open('./modules/config/Matconv/equivalences.yaml', 'r') as infile: 9 | self.equivalences = yaml.load(infile) 10 | self.bottom = None 11 | 12 | def find_layer_by_type(self, dict_, type_): 13 | for key in dict_: 14 | if dict_[key]['type'] == type_: 15 | return key 16 | 17 | def load(self, file_path): 18 | output = {} 19 | output['layers'] = {} 20 | output['parameters'] = {} 21 | model = loadmat(file_path)['layers'][0] 22 | for index, layer in enumerate(model): 23 | layer_type = layer['type'][0][0][0] 24 | name = layer['name'][0][0][0] 25 | if layer_type == 'conv' and 'fc' in name: 26 | layer_type = 'fc' 27 | converted_type = self.find_layer_by_type(self.equivalences['layers'], layer_type) 28 | eq_fields = self.equivalences['layers'][converted_type] 29 | 30 | output['layers'][name] = {} 31 | layer_obj = output['layers'][name] 32 | for field in eq_fields: 33 | if layer_type == 'conv' or layer_type == 'fc': 34 | if field == 'weights_name': 35 | weights = layer[eq_fields[field]].item()[0][0].copy() 36 | weights = weights.transpose(3,2,0,1) 37 | output['parameters'][name + '_w'] = weights 38 | layer_obj['weights_name'] = name + '_w' 39 | layer_obj['dim'] = weights.shape 40 | elif field == 'biases_name': 41 | try: 42 | biases = layer[eq_fields[field]].item()[0][1].copy() 43 | output['parameters'][name + '_b'] = biases 44 | layer_obj['biases_name'] = name + '_b' 45 | except: 46 | pass 47 | elif layer_type == 'lrn': 48 | if field == 'local_size': 49 | layer_obj['local_size'] = layer['param'][0][0][0][0] 50 | elif field == 'kappa': 51 | layer_obj['kappa'] = layer['param'][0][0][0][1] 52 | elif field == 'alpha': 53 | layer_obj['alpha'] = layer['param'][0][0][0][0]*layer['param'][0][0][0][1] 54 | elif field == 'beta': 55 | layer_obj['beta'] = layer['param'][0][0][0][3] 56 | if field == 'bottom': 57 | layer_obj['bottom'] = self.bottom 58 | self.bottom = name 59 | elif field == 'top': 60 | if index + 1 < len(model): 61 | layer_obj['top'] = model[index + 1]['name'][0][0][0] 62 | else: 63 | layer_obj['top'] = None 64 | else: 65 | if eq_fields[field] in layer.dtype.names: 66 | layer_obj[field] = layer[eq_fields[field]][0][0][0] 67 | else: 68 | layer_obj[field] = eq_fields[field] 69 | return output 70 | 71 | if __name__=='__main__': 72 | importer = Importer() 73 | output = importer.load('/Users/prlz77/Downloads/imagenet-vgg-f.mat') 74 | pass -------------------------------------------------------------------------------- /modules/importers/TensorflowImporter.py: -------------------------------------------------------------------------------- 1 | import os 2 | from core.BaseImporter import BaseImporter 3 | import yaml 4 | import numpy as np 5 | from math import ceil 6 | 7 | try: 8 | import tensorflow as tf 9 | from tensorflow.python.client import graph_util 10 | from tensorflow.python.framework import tensor_shape 11 | from tensorflow.python.platform import gfile 12 | except: 13 | pass 14 | 15 | 16 | class TensorflowImporter(BaseImporter): 17 | """ Class modeling a Tensorflow importer 18 | """ 19 | def __init__(self): 20 | with open('./modules/config/Tensorflow/equivalences.yaml', 'r') as infile: 21 | self.equivalences = yaml.load(infile) 22 | with open('./core/config.yaml', 'r') as infile: 23 | self.layer_definitions = yaml.load(infile) 24 | self.known_ops = [self.equivalences['layers'][key]['type'] for key in self.equivalences['layers']] 25 | self.known_ops.append('AvgPool') # TODO: how we handle several equivalencies in one layer type? 26 | self.util_ops = [] 27 | self.util_ops.append('Const') # Const ops contain the weights as Tensors 28 | self.util_ops.append('Identity') # Trivial utility op for reading Const tensors 29 | self.util_ops.append('BiasAdd') # BiasAdd op is usually part of a conv or fc layer 30 | self.util_ops.append('Add') # Add op is usually part of a conv or fc layer 31 | self.mapsizes = [] # Needed to calculate paddings 32 | 33 | def find_const_input(self, node, graph_def): 34 | """Helper function that finds a 'Const' input of a given node in the tf Graph definition. 35 | 'Const' nodes have the numerical data for the weights stored in its attributes. 36 | """ 37 | const_input_nodes = [n_ for n_ in graph_def.node if ((n_.op == 'Const' or n_.op == 'Identity') 38 | and any(n_.name == s for s in node.input))] 39 | assert(len(const_input_nodes)==1), 'Unexpected graph definition!' 40 | 41 | # If there is no 'Const' node then we have an 'Identity' node reading from a 'Const' 42 | if (const_input_nodes[0].op == 'Identity'): 43 | const_input_nodes = [n_ for n_ in graph_def.node if (n_.op == 'Const' 44 | and any(n_.name == s for s in const_input_nodes[0].input))] 45 | assert(len(const_input_nodes)==1), 'Unexpected graph definition!' 46 | 47 | return const_input_nodes[0] 48 | 49 | def find_bottom_layer_name(self, node, graph_def): 50 | """ Finds the bottom layer (ignoring unsuported layers and utility nodes) 51 | """ 52 | valid_inputs = [] 53 | while len(valid_inputs) == 0 and node != None: 54 | inputs = [n_ for n_ in graph_def.node if any(n_.name == s for s in node.input)] 55 | if len(inputs) == 0: 56 | return '' 57 | valid_inputs = [n_ for n_ in inputs if any(n_.op == s for s in self.known_ops)] 58 | assert(len(valid_inputs)<=1),'This Importer does not allow more than one bottom layer' 59 | if len(valid_inputs) == 1: 60 | return str(valid_inputs[0].name) 61 | else: 62 | node = inputs[0] # TODO it may be the case we have two unsuported layers 63 | if (node.op == 'Identity' or node.op == 'Const'): 64 | if len(inputs)>1: node = inputs[1] 65 | else: node = None 66 | return '' 67 | 68 | def find_top_layer_name(self, node, graph_def): 69 | """ Finds the top layer (ignoring unsuported layers and utility nodes) 70 | """ 71 | valid_outputs = [] 72 | while len(valid_outputs) == 0 and node != None: 73 | outputs = [n_ for n_ in graph_def.node if any(node.name == s for s in n_.input)] 74 | if len(outputs) == 0: 75 | return '' 76 | valid_outputs = [n_ for n_ in outputs if any(n_.op == s for s in self.known_ops)] 77 | assert(len(valid_outputs)<=1),'This Importer does not allow more than one top layer' 78 | if len(valid_outputs) == 1: 79 | return str(valid_outputs[0].name) 80 | else: 81 | node = outputs[0] # TODO it may be the case we have two unsuported layers 82 | return '' 83 | 84 | def find_layer_by_type(self, dict_, type_): 85 | """ Finds the equvalet Rosseta layer type for a given TF layer type_ 86 | """ 87 | for key in dict_: 88 | if dict_[key]['type'] == type_: 89 | return key 90 | 91 | def load(self, file_path): 92 | """ Loads a Tensorflow graph from protobuf file and converts to Rosseta dict format 93 | """ 94 | output = {} 95 | output['layers'] = {} 96 | output['parameters'] = {} 97 | 98 | print 'Loading Tensorflow model: %s' % file_path 99 | with gfile.FastGFile(file_path, 'rb') as f: 100 | 101 | graph_def = tf.GraphDef() 102 | 103 | filename, extension = os.path.splitext(file_path) 104 | if extension == '.pb': 105 | graph_def.ParseFromString(f.read()) 106 | else: 107 | text_format.Merge(f.read(), graph_def) 108 | 109 | 110 | for node in graph_def.node: # each node has .op .name .input .attr 111 | # attr is dict with key value pairs 112 | 113 | if any(node.op == s for s in self.known_ops): 114 | name = node.name 115 | layer_type = node.op 116 | converted_type = self.find_layer_by_type(self.equivalences['layers'], layer_type) 117 | if (layer_type == 'AvgPool'): converted_type = 'PoolingLayer' #TODO fix this! 118 | output['layers'][name] = {} 119 | layer_obj = output['layers'][name] 120 | layer_obj_fields = self.layer_definitions['layers'][converted_type] 121 | layer_obj['type'] = layer_obj_fields['type'] 122 | if (layer_obj['type'] != 'InputLayer'): 123 | layer_obj['bottom'] = self.find_bottom_layer_name(node, graph_def) 124 | layer_obj['top'] = self.find_top_layer_name(node, graph_def) 125 | #print('Layer op: ' +node.op+' name: '+node.name) 126 | #print([at for at in node.attr.keys()]) 127 | inputs = [n_ for n_ in graph_def.node if any(n_.name == s for s in node.input)] 128 | outputs = [n_ for n_ in graph_def.node if any(node.name == s for s in n_.input)] 129 | elif any(node.op == s for s in self.util_ops): 130 | continue 131 | else: 132 | # TODO Reshape nodes seem critical in tf since they specify the way the feature map cuboids are mapped into 2D matrices to feed them to the fully connected layers. Not trivial at all, but seems that the standard is to do it same way as other frameworks do it by default. 133 | print(' >> Warning: Tensorflow importer to Rossetta does not support "'+node.op+'" Nodes. This node will be ignored!') 134 | continue 135 | 136 | if node.op == 'Placeholder': 137 | shape = [int(node.attr['shape'].shape.dim.__getitem__(i).size) for i in range(4)] 138 | layer_obj['dim'] = [shape[3],shape[1],shape[2]] 139 | self.mapsizes.append((shape[1],shape[2])) 140 | 141 | if node.op == 'Conv2D' or node.op == 'MatMul': 142 | # we expect here (at least) two input nodes (one bottom layer and one tensor with the weights) 143 | # and one output node 144 | assert(len(inputs)>=2 and len(outputs)>=1), 'Unexpected graph definition!' 145 | 146 | # Find 'Const' input and read the tensor with weights 147 | conv_const_input_node = self.find_const_input(node,graph_def) 148 | t_weights = tf.contrib.util.make_ndarray(conv_const_input_node.attr['value'].tensor) 149 | # filter weights for the Conv2D operation are stored on the second input, and are expected to be in the order [filter_height, filter_width, input_depth, output_depth] (https://www.tensorflow.org/versions/r0.9/how_tos/tool_developers/index.html#weight-formats) 150 | if node.op == 'Conv2D': t_weights = t_weights.transpose(3,2,0,1) 151 | output['parameters'][conv_const_input_node.name] = t_weights 152 | layer_obj['weights_name'] = conv_const_input_node.name 153 | layer_obj['dim'] = t_weights.shape 154 | 155 | # Try to find a 'BiasAdd' in the output nodes and read the tensor with biases (if there are) 156 | conv_biasAdd_output_nodes = [n_ for n_ in outputs if (n_.op == 'BiasAdd' or n_.op == 'Add')] 157 | assert(len(conv_biasAdd_output_nodes)<=1), 'Unexpected graph definition!' 158 | biases_name = 'none' 159 | if len(conv_biasAdd_output_nodes)==1: 160 | biasAdd_const_input_node = self.find_const_input(conv_biasAdd_output_nodes[0],graph_def) 161 | t_bias = tf.contrib.util.make_ndarray(biasAdd_const_input_node.attr['value'].tensor) 162 | biases_name = biasAdd_const_input_node.name 163 | else: # No bias 164 | t_bias = np.zeros(shape=(t_weights.shape[0],)) 165 | output['parameters'][biases_name] = t_bias 166 | layer_obj['biases_name'] = biases_name 167 | # print(node.attr['data_format']) # TODO warning: the default is "NHWC" format with shape [batch, in_height, in_width, in_channels]. but others exist 168 | 169 | if node.op == 'MaxPool' or node.op == 'AvgPool': 170 | # we expect here (at least) one input node and one output node 171 | assert(len(inputs)>=1 and len(outputs)>=1), 'Unexpected graph definition!' 172 | 173 | ksize = [int(i) for i in node.attr['ksize'].list.i] 174 | layer_obj['kernel_size'] = [ksize[1],ksize[2]] 175 | layer_obj['pool'] = node.op 176 | 177 | if node.op == 'MaxPool' or node.op == 'AvgPool' or node.op == 'Conv2D': 178 | 179 | layer_obj['stride'] = [int(i) for i in node.attr['strides'].list.i] 180 | 181 | # In tensorflow there are two padding scheme chosen as `'SAME'` or `'VALID'` 182 | # (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/nn.py) 183 | if node.attr[u'padding'].s == u'VALID': 184 | # For the `'VALID'` the padding values are always zero 185 | layer_obj['padding'] = [0,0] 186 | elif node.attr[u'padding'].s == u'SAME': 187 | # For the `'SAME'` padding, they are computed as a function of the input height and width 188 | assert(len(self.mapsizes)>0),'Unexpected graph definition! Node '+node.op+' use SAME as padding strategy but not possible to calculate padding without a Palceholder node' 189 | last_size = self.mapsizes[len(self.mapsizes)-1] 190 | in_height = last_size[0] 191 | in_width = last_size[1] 192 | filter_height = layer_obj['dim'][2] if node.op == 'Conv2D' else layer_obj['kernel_size'][0] 193 | filter_width = layer_obj['dim'][3] if node.op == 'Conv2D' else layer_obj['kernel_size'][1] 194 | out_height = ceil(float(in_height) / float(layer_obj['stride'][1])) 195 | out_width = ceil(float(in_width) / float(layer_obj['stride'][2])) 196 | pad_along_height = ((out_height - 1) * layer_obj['stride'][1] + filter_height - in_height) 197 | pad_along_width = ((out_width - 1) * layer_obj['stride'][2] + filter_width - in_width) 198 | layer_obj['padding'] = [pad_along_height,pad_along_width] 199 | self.mapsizes.append((out_height,out_width)) 200 | 201 | if node.op == 'Relu': 202 | # we expect here (at least) one input node and one output node 203 | assert(len(inputs)>=1 and len(outputs)>=1), 'Unexpected graph definition!' 204 | layer_obj['negative_slope'] = None 205 | 206 | if node.op == 'Softmax': 207 | # we expect here one input node and no output node 208 | assert(len(inputs)==1 and len(outputs)==0), 'Unexpected graph definition!' 209 | layer_obj['axis'] = None 210 | 211 | # Check if we have all required fields 212 | #print(' - layer_obj: '+str(layer_obj)) 213 | assert(sorted(layer_obj_fields.keys()) == sorted(layer_obj.keys())), 'Layer definition is not complete!' 214 | print('Done!') 215 | return output 216 | 217 | def loadFromObject(self, frameworkObj): 218 | return NotImplementedError 219 | 220 | if __name__=='__main__': 221 | importer = TensorflowImporter() 222 | out_model = importer.load('/home/lluis/tensorflow_in_out/mnist/mnist_conv.pb') 223 | pass 224 | -------------------------------------------------------------------------------- /modules/importers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/modules/importers/__init__.py -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgarriba/DeepRosetta/4908092010c4e2fc4f9949b1f273c08996266b73/test/__init__.py -------------------------------------------------------------------------------- /test/download_models.sh: -------------------------------------------------------------------------------- 1 | # Downloads models of various formats for test purposes 2 | 3 | # Create the directory for downloaded models 4 | if [ ! -d ./test/models ]; then 5 | mkdir ./test/models 6 | fi 7 | 8 | # CAFFE: VGG Face model (http://www.robots.ox.ac.uk/~vgg/software/vgg_face/) 9 | if [ ! -f ./test/models/vgg_face_caffe/VGG_FACE.caffemodel ]; then 10 | echo "Downloading Caffe model..." 11 | wget http://www.robots.ox.ac.uk/~vgg/software/vgg_face/src/vgg_face_caffe.tar.gz -O ./test/models/vgg_face_caffe.tar.gz 12 | tar -xvf ./test/models/vgg_face_caffe.tar.gz -C ./test/models/ 13 | echo "OK" 14 | else 15 | echo "Caffe model already downloaded..." 16 | fi 17 | 18 | 19 | # TENSORFLOW: Inception model (http://arxiv.org/abs/1512.00567) 20 | if [ ! -f ./test/models/tensorflow_inception_graph.pb ]; then 21 | echo "Downloading TensorFlow model..." 22 | wget https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015.zip -O ./test/models/inception_dec_2015.zip 23 | 24 | # Unzip the model (resulting file will be ) 25 | unzip ./test/models/inception_dec_2015.zip -d ./test/models/ 26 | echo "OK" 27 | else 28 | echo "TensorFlow model already downloaded..." 29 | fi 30 | 31 | # MATCONVNET: VGG Face model (http://www.robots.ox.ac.uk/~vgg/software/vgg_face/) 32 | if [ ! -f ./test/models/vgg_face_matconvnet/data/vgg_face.mat ]; then 33 | echo "Downloading MatConvNet model..." 34 | wget http://www.robots.ox.ac.uk/~vgg/software/vgg_face/src/vgg_face_matconvnet.tar.gz -O ./test/models/vgg_face_matconvnet.tar.gz 35 | tar -xvf ./test/models/vgg_face_matconvnet.tar.gz -C ./test/models/ 36 | echo "OK" 37 | else 38 | echo "MatConvNet model already downloaded..." 39 | fi 40 | 41 | 42 | # TORCH: VGG Face model (http://www.robots.ox.ac.uk/~vgg/software/vgg_face/) 43 | if [ ! -f ./test/models/vgg_face_torch/VGG_FACE.t7 ]; then 44 | echo "Downloading Torch model..." 45 | wget http://www.robots.ox.ac.uk/~vgg/software/vgg_face/src/vgg_face_torch.tar.gz -O ./test/models/vgg_face_torch.tar.gz 46 | tar -xvf ./test/models/vgg_face_torch.tar.gz -C ./test/models/ 47 | echo "OK" 48 | else 49 | echo "Torch model already downloaded..." 50 | fi -------------------------------------------------------------------------------- /test/local_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | sys.path.insert(0, '../') 4 | 5 | from core.RosettaStone import RosettaStone 6 | from modules.importers.CaffeFileImporter import Importer 7 | 8 | class MyTest(unittest.TestCase): 9 | def test_caffe_importer(self): 10 | importer = Importer() 11 | importer.load("./test/models/vgg_face_caffe/VGG_FACE.caffemodel") 12 | 13 | if __name__ == '__main__': 14 | unittest.main() 15 | -------------------------------------------------------------------------------- /test/tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import DeepRosetta as dr 4 | 5 | class MyTest(unittest.TestCase): 6 | def test_rosetta(self): 7 | rosetta = dr.RosettaStone() 8 | 9 | A_file = 'my.caffemodel' 10 | A_type = 'DummyCaffeImporter' 11 | 12 | B_file = 'your.caffemodel' 13 | B_type = 'DummyCaffeExporter' 14 | 15 | # good example 16 | rosetta.convert(A_file, B_file, A_type, B_type) 17 | 18 | # wrong example 19 | self.assertRaises(Exception, rosetta.convert, A_file, B_file, B_type, A_type) 20 | 21 | 22 | if __name__ == '__main__': 23 | unittest.main() 24 | --------------------------------------------------------------------------------