├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
    ├── FindEigen3.cmake
    └── FindNumPy.cmake
├── lib
    ├── CMakeLists.txt
    ├── constraintloss
    │   ├── CMakeLists.txt
    │   ├── constraintsoftmax.cpp
    │   └── constraintsoftmax.h
    ├── optimization
    │   ├── CMakeLists.txt
    │   ├── fista.cpp
    │   └── fista.h
    ├── python
    │   ├── CMakeLists.txt
    │   ├── boost.cpp
    │   ├── boost.h
    │   ├── ccnn.cpp
    │   ├── ccnn.h
    │   ├── constraintloss.cpp
    │   ├── constraintloss.h
    │   ├── util.cpp
    │   └── util.h
    └── util
    │   ├── CMakeLists.txt
    │   ├── eigen.cpp
    │   ├── eigen.h
    │   ├── win_util.cpp
    │   └── win_util.h
├── models
    ├── examples
    │   ├── gt1.png
    │   ├── gt2.png
    │   ├── im1.jpg
    │   ├── im2.jpg
    │   ├── im3.jpg
    │   ├── im4.jpg
    │   ├── out1.png
    │   └── out2.png
    ├── fcn_32s
    │   ├── deploy_32s.prototxt
    │   ├── solver_32s.prototxt
    │   └── train_32s.prototxt
    ├── fcn_8s
    │   ├── deploy_8s.prototxt
    │   ├── solver_8s.prototxt
    │   └── train_8s.prototxt
    └── scripts
    │   ├── download_ccnn_models.sh
    │   └── download_pretrained_models.sh
└── src
    ├── ccnn.py
    ├── config.py
    ├── dataset.py
    ├── demo.py
    ├── extras
        ├── VOClabelcolormap.m
        ├── evaluate_pascal_seg_test.py
        ├── gen_bb_ind_helper.py
        ├── generate_bb_indicator.m
        ├── generate_lmbd.py
        ├── gray2ind.py
        ├── mat2png.py
        ├── png2gray.py
        └── save_png_colormap_pascal.m
    ├── python_layers.py
    ├── test.py
    ├── test_argmax.py
    ├── train.py
    └── user_config.py


/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | **/__pycache__
3 | *.kdev4
4 | **.pyc
5 | **.caffemodel
6 | **.solverstate
7 | **/.*
8 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caffe-ccnn"]
2 | 	path = caffe-ccnn
3 | 	url = https://github.com/pathak22/caffe-ccnn.git
4 | 	branch = master
5 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project( ccnn )
2 | cmake_minimum_required(VERSION 2.8)
3 | add_definitions( -DLBFGS_FLOAT=32 )
4 | set( CMAKE_POSITION_INDEPENDENT_CODE True )
5 | 
6 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake )
7 | add_subdirectory( lib )


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | UC Berkeley's Standard Copyright and Disclaimer Notice:
 2 | 
 3 | Copyright (c) 2015, Deepak Pathak, Philipp Krähenbühl 
 4 | and The Regents of the University of California (Regents). 
 5 | All Rights Reserved. 
 6 | 
 7 | Permission to use, copy, modify, and distribute this software and its 
 8 | documentation for educational, research, and not-for-profit purposes, without 
 9 | fee and without a signed licensing agreement, is hereby granted, provided that 
10 | the above copyright notice, this paragraph and the following two paragraphs appear 
11 | in all copies, modifications, and distributions. Contact The Office of Technology 
12 | Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, 
13 | (510) 643-7201, for commercial licensing opportunities.
14 | 
15 | IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, 
16 | INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
17 | USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE 
18 | POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 
19 | BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
20 | PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS 
21 | PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
22 | ENHANCEMENTS, OR MODIFICATIONS.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## CCNN: Constrained Convolutional Neural Networks for Weakly Supervised Segmentation
  2 | 
  3 | [Deepak Pathak](http://cs.berkeley.edu/~pathak), [Philipp Kr&auml;henb&uuml;hl](http://www.philkr.net/), [Trevor Darrell](http://cs.berkeley.edu/~trevor)
  4 | 
  5 | **CCNN** is a framework for optimizing convolutional neural networks with linear constraints.
  6 |  - It has been shown to achieve state-of-the-art results on the task of weakly-supervised semantic segmentation.
  7 |  - It is written in Python and C++, and based on [Caffe](http://caffe.berkeleyvision.org/).
  8 |  - It has been published at **ICCV 2015**. It was initially described in the [arXiv report](http://arxiv.org/abs/1506.03648).
  9 | 
 10 | If you find CCNN useful in your research, please cite:
 11 | 
 12 |     @inproceedings{pathakICCV15ccnn,
 13 |         Author = {Pathak, Deepak and Kr\"ahenb\"uhl, Philipp and Darrell, Trevor},
 14 |         Title = {Constrained Convolutional Neural Networks for Weakly Supervised Segmentation},
 15 |         Booktitle = {International Conference on Computer Vision ({ICCV})},
 16 |         Year = {2015}
 17 |     }
 18 | 
 19 | ### License
 20 | 
 21 | CCNN is released under academic, non-commercial UC Berkeley license (see [LICENSE](https://github.com/pathak22/ccnn/blob/master/LICENSE) file for details). 
 22 | 
 23 | ### Contents
 24 | 1. [Requirements](#1-requirements)
 25 | 2. [Installation](#2-installation)
 26 | 3. [Usage](#3-usage)
 27 | 4. [Scripts Information](#4-scripts-information)
 28 | 5. [Extra Downloads](#5-extra-downloads)
 29 | 
 30 | ### 1) Requirements
 31 | 
 32 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html))
 33 | 2. GCC version more than 4.7
 34 | 3. Boost version more than 1.53 (recommended). If system dependencies give issues, install anaconda dependencies:
 35 | 
 36 |   ```
 37 |   $ conda install boost
 38 |   $ conda install protobuf
 39 |   ```
 40 |   
 41 | 4. A good GPU (e.g., Titan, K20, K40, ...) with at least 3G of memory is sufficient.
 42 | 
 43 | ### 2) Installation
 44 | 
 45 | 1. Clone the CCNN repository
 46 |   ```Shell
 47 |   # Make sure to clone with --recursive
 48 |   git clone --recursive https://github.com/pathak22/ccnn.git
 49 |   ```
 50 |   
 51 | 2. Build Caffe and pycaffe
 52 | 
 53 |   - Now follow the Caffe installation instructions [here](http://caffe.berkeleyvision.org/installation.html)
 54 |   - Caffe *must* be built with support for Python layers!
 55 |   - In your Makefile.config, make sure to have this line uncommented
 56 |     `WITH_PYTHON_LAYER := 1`
 57 |   - You can download my [Makefile.config](http://www.cs.berkeley.edu/~pathak/ccnn/Makefile.config) for reference.
 58 |   
 59 |   ```Shell
 60 |   cd ccnn/caffe-ccnn
 61 |   # If you have all caffe requirements installed
 62 |   # and your Makefile.config in place, then simply do:
 63 |   make -j8 && make pycaffe
 64 |   ```
 65 |     
 66 | 3. Now build CCNN
 67 | 
 68 |     ```Shell
 69 |     cd ccnn
 70 |     mkdir build
 71 |     cd build
 72 |     cmake ..
 73 |     make -j8
 74 |     ```
 75 |     
 76 |   - **Note:** If anaconda is installed, then python paths may have been messed b/w anaconda and system python. 
 77 |   - I usually run this command : 
 78 | 
 79 |   ```Shell
 80 |   cmake .. -DBOOST_ROOT=/home/pathak/anaconda -DPYTHON_LIBRARY=/home/pathak/anaconda/lib/libpython2.7.so -DPYTHON_INCLUDE_DIR=/home/pathak/anaconda/include/python2.7/ -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8
 81 |   ```
 82 |   
 83 |   - To verify this do : `ccmake ./` inside the build folder and manually check the following things : 
 84 |   `MAKE_CXX_COMPILER, CMAKE_C_COMPILER , PYTHON_EXECUTABLE , PYTHON_INCLUDE_DIR , PYTHON_LIBRARY`
 85 |   - Make sure that cmake doesn't mess the anaconda boost to system boost.
 86 | 
 87 | 4. Configure path (if needed) in `src/user_config.py`.
 88 | 
 89 | 5. (Optional -- I don't do it) If everything runs fine, set `CMAKE_BUILD_TYPE` using `ccmake .` to `Release`. This prevents eigen from checking all assertions etc. and works faster.
 90 | 
 91 | ### 3) Usage
 92 | 
 93 | **Demo** CCNN.
 94 | 
 95 | ```Shell
 96 | cd ccnn
 97 | bash ./models/scripts/download_ccnn_models.sh
 98 | # This will populate the `ccnn/models/` folder with trained models.
 99 | python ./src/demo.py
100 | ```
101 | 
102 | **Train** CCNN.
103 | 
104 | ```Shell
105 | cd ccnn
106 | bash ./models/scripts/download_pretrained_models.sh
107 | # This will populate the `ccnn/models/` folder with imagenet pre-trained models.
108 | python ./src/train.py 2> log.txt
109 | ```
110 | 
111 | **Test** CCNN.
112 | 
113 | ```Shell
114 | cd ccnn
115 | python ./src/test.py  # To test IOU with CRF post-processing
116 | python ./src/test_argmax.py  # To test IOU without CRF
117 | ```
118 | 
119 | ### 4) Scripts Information
120 | 
121 | Model Prototxts:
122 | - `models/fcn_8s/` : Atrous algorithm based 8-strided VGG, described [here](http://arxiv.org/abs/1412.7062).
123 | - `models/fcn_32s/` : 32-strided VGG
124 | 
125 | Configure:
126 | - `src/config.py` : Set glog-minlevel accordingly to get desired caffe output to terminal
127 | 
128 | Helper Scripts:
129 | - `src/extras/` : These scripts are not needed to run the code. They are simple helper scripts to create data, to prepare pascal test server file, to add pascal cmap to segmentation outputs etc.
130 | 
131 | ### 5) Extra Downloads
132 | 
133 | - Pascal VOC Image List: [train](http://www.cs.berkeley.edu/~pathak/ccnn/train.txt), [val](http://www.cs.berkeley.edu/~pathak/ccnn/val.txt), [trainval](http://www.cs.berkeley.edu/~pathak/ccnn/trainval.txt), [test](http://www.cs.berkeley.edu/~pathak/ccnn/test.txt)
134 | - [Training image-level label indicator files](http://www.cs.berkeley.edu/~pathak/ccnn/trainIndicatorFiles.tar.gz)
135 | - [Pascal VOC 2012 validation result images](http://www.cs.berkeley.edu/~pathak/ccnn/voc_2012_val_results.tar.gz)
136 | - [Pascal VOC 2012 test result images](http://www.cs.berkeley.edu/~pathak/ccnn/voc_2012_test_results.tar.gz)
137 | 


--------------------------------------------------------------------------------
/cmake/FindEigen3.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Eigen3 lib
 2 | #
 3 | # This module supports requiring a minimum version, e.g. you can do
 4 | #   find_package(Eigen3 3.1.2)
 5 | # to require version 3.1.2 or newer of Eigen3.
 6 | #
 7 | # Once done this will define
 8 | #
 9 | #  EIGEN3_FOUND - system has eigen lib with correct version
10 | #  EIGEN3_INCLUDE_DIR - the eigen include directory
11 | #  EIGEN3_VERSION - eigen version
12 | 
13 | # Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
14 | # Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
15 | # Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
16 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license.
17 | 
18 | if(NOT Eigen3_FIND_VERSION)
19 |   if(NOT Eigen3_FIND_VERSION_MAJOR)
20 |     set(Eigen3_FIND_VERSION_MAJOR 2)
21 |   endif(NOT Eigen3_FIND_VERSION_MAJOR)
22 |   if(NOT Eigen3_FIND_VERSION_MINOR)
23 |     set(Eigen3_FIND_VERSION_MINOR 91)
24 |   endif(NOT Eigen3_FIND_VERSION_MINOR)
25 |   if(NOT Eigen3_FIND_VERSION_PATCH)
26 |     set(Eigen3_FIND_VERSION_PATCH 0)
27 |   endif(NOT Eigen3_FIND_VERSION_PATCH)
28 | 
29 |   set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
30 | endif(NOT Eigen3_FIND_VERSION)
31 | 
32 | macro(_eigen3_check_version)
33 |   file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
34 | 
35 |   string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
36 |   set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
37 |   string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
38 |   set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
39 |   string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
40 |   set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
41 | 
42 |   set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
43 |   if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
44 |     set(EIGEN3_VERSION_OK FALSE)
45 |   else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
46 |     set(EIGEN3_VERSION_OK TRUE)
47 |   endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
48 | 
49 |   if(NOT EIGEN3_VERSION_OK)
50 | 
51 |     message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
52 |                    "but at least version ${Eigen3_FIND_VERSION} is required")
53 |   endif(NOT EIGEN3_VERSION_OK)
54 | endmacro(_eigen3_check_version)
55 | 
56 | if (EIGEN3_INCLUDE_DIR)
57 | 
58 |   # in cache already
59 |   _eigen3_check_version()
60 |   set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
61 | 
62 | else (EIGEN3_INCLUDE_DIR)
63 | 
64 |   find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
65 |       PATHS
66 |       ${CMAKE_INSTALL_PREFIX}/include
67 |       ${KDE4_INCLUDE_DIR}
68 |       PATH_SUFFIXES eigen3 eigen
69 |     )
70 | 
71 |   if(EIGEN3_INCLUDE_DIR)
72 |     _eigen3_check_version()
73 |   endif(EIGEN3_INCLUDE_DIR)
74 | 
75 |   include(FindPackageHandleStandardArgs)
76 |   find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
77 | 
78 |   mark_as_advanced(EIGEN3_INCLUDE_DIR)
79 | 
80 | endif(EIGEN3_INCLUDE_DIR)
81 | 
82 | 


--------------------------------------------------------------------------------
/cmake/FindNumPy.cmake:
--------------------------------------------------------------------------------
 1 | # - Find the NumPy libraries
 2 | # This module finds if NumPy is installed, and sets the following variables
 3 | # indicating where it is.
 4 | #
 5 | # TODO: Update to provide the libraries and paths for linking npymath lib.
 6 | #
 7 | #  NUMPY_FOUND               - was NumPy found
 8 | #  NUMPY_VERSION             - the version of NumPy found as a string
 9 | #  NUMPY_VERSION_MAJOR       - the major version number of NumPy
10 | #  NUMPY_VERSION_MINOR       - the minor version number of NumPy
11 | #  NUMPY_VERSION_PATCH       - the patch version number of NumPy
12 | #  NUMPY_VERSION_DECIMAL     - e.g. version 1.6.1 is 10601
13 | #  NUMPY_INCLUDE_DIRS        - path to the NumPy include files
14 | 
15 | #============================================================================
16 | # Copyright 2012 Continuum Analytics, Inc.
17 | #
18 | # MIT License
19 | #
20 | # Permission is hereby granted, free of charge, to any person obtaining
21 | # a copy of this software and associated documentation files
22 | # (the "Software"), to deal in the Software without restriction, including
23 | # without limitation the rights to use, copy, modify, merge, publish,
24 | # distribute, sublicense, and/or sell copies of the Software, and to permit
25 | # persons to whom the Software is furnished to do so, subject to
26 | # the following conditions:
27 | # 
28 | # The above copyright notice and this permission notice shall be included
29 | # in all copies or substantial portions of the Software.
30 | # 
31 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
32 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
34 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
35 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
36 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
37 | # OTHER DEALINGS IN THE SOFTWARE.
38 | # 
39 | #============================================================================
40 | 
41 | # Finding NumPy involves calling the Python interpreter
42 | if(NumPy_FIND_REQUIRED)
43 |     find_package(PythonInterp REQUIRED)
44 | else()
45 |     find_package(PythonInterp)
46 | endif()
47 | 
48 | if(NOT PYTHONINTERP_FOUND)
49 |     set(NUMPY_FOUND FALSE)
50 | endif()
51 | 
52 | execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
53 |     "import numpy as n; print(n.__version__); print(n.get_include());"
54 |     RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS
55 |     OUTPUT_VARIABLE _NUMPY_VALUES
56 |     ERROR_VARIABLE _NUMPY_ERROR_VALUE
57 |     OUTPUT_STRIP_TRAILING_WHITESPACE)
58 | 
59 | if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0)
60 |     if(NumPy_FIND_REQUIRED)
61 |         message(FATAL_ERROR
62 |             "NumPy import failure:\n${_NUMPY_ERROR_VALUE}")
63 |     endif()
64 |     set(NUMPY_FOUND FALSE)
65 | endif()
66 | 
67 | # Convert the process output into a list
68 | string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES})
69 | string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES})
70 | list(GET _NUMPY_VALUES 0 NUMPY_VERSION)
71 | list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS)
72 | 
73 | # Make sure all directory separators are '/'
74 | string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS})
75 | 
76 | # Get the major and minor version numbers
77 | string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION})
78 | list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR)
79 | list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR)
80 | list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH)
81 | string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH})
82 | math(EXPR NUMPY_VERSION_DECIMAL
83 |     "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
84 | 
85 | find_package_message(NUMPY
86 |     "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}"
87 |     "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}")
88 | 
89 | set(NUMPY_FOUND TRUE)
90 | 
91 | 


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package( Eigen3 3.2.0 REQUIRED )
 2 | set(python_version "2" CACHE STRING "Specify which python version to use")
 3 | MESSAGE(${python_version})
 4 | if(python_version VERSION_LESS 3.0.0)
 5 | 	find_package(PythonInterp 2.7 REQUIRED)
 6 | 	find_package(PythonLibs 2.7 REQUIRED)
 7 | 	find_package(NumPy REQUIRED)
 8 | 	find_package(Boost COMPONENTS python REQUIRED)
 9 | else()
10 | 	find_package(PythonInterp 3.3 REQUIRED)
11 | 	find_package(PythonLibs 3.3 REQUIRED)
12 | 	find_package(NumPy REQUIRED)
13 | 	find_package(Boost COMPONENTS python-py34)
14 | 	if(NOT Boost_FOUND)
15 | 		find_package(Boost COMPONENTS python-py33)
16 | 	endif()
17 | 	if(NOT Boost_FOUND)
18 | 		find_package(Boost COMPONENTS python3 REQUIRED)
19 | 	endif()
20 | endif()
21 | find_package(OpenMP)
22 | 
23 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++11 -Wall ${OpenMP_CXX_FLAGS}" ) # set global flags
24 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
25 | 
26 | add_subdirectory( constraintloss )
27 | add_subdirectory( optimization )
28 | add_subdirectory( python )
29 | add_subdirectory( util )
30 | 


--------------------------------------------------------------------------------
/lib/constraintloss/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library( constraintloss constraintsoftmax.cpp )
2 | target_link_libraries( constraintloss util optimization )


--------------------------------------------------------------------------------
/lib/constraintloss/constraintsoftmax.cpp:
--------------------------------------------------------------------------------
  1 | // --------------------------------------------------------
  2 | // CCNN 
  3 | // Copyright (c) 2015 [See LICENSE file for details]
  4 | // Written by Deepak Pathak, Philipp Krähenbühl
  5 | // --------------------------------------------------------
  6 | 
  7 | #include "constraintsoftmax.h"
  8 | #include "optimization/fista.h"
  9 | #include "util/eigen.h"
 10 | #include <iostream>
 11 | 
 12 | LinearConstraint::LinearConstraint( const VectorXf & a, float b, float slack ):a(a),b(b),slack(slack) {
 13 | }
 14 | float LinearConstraint::eval( const RMatrixXf & x ) const {
 15 | 	// Return   \sum_i a*x_i - b
 16 | 	return (x*a).array().sum()-b;
 17 | }
 18 | 
 19 | // Performed across columns i.e. across channels
 20 | static RMatrixXf expAndNormalize( const RMatrixXf & m ) {
 21 | 	VectorXf mx = m.rowwise().maxCoeff();
 22 | 	RMatrixXf r = (m.colwise()-mx).array().exp();
 23 | 	return r.array().colwise() / r.array().rowwise().sum();
 24 | }
 25 | static VectorXf logSumExp( const RMatrixXf & m ) {
 26 | 	VectorXf mx = m.rowwise().maxCoeff();
 27 | 	return mx.array() + (m.colwise()-mx).array().exp().rowwise().sum().log();
 28 | }
 29 | 
 30 | // scale_ : determines the hardness of optimization. In hard case, entropy term in KL divergence is zero.
 31 | // Alternate way to implement this is to scale the second cross entropy term in KL divergence by 1000 times !
 32 | ConstraintSoftmax::ConstraintSoftmax( float scale ):scale_(scale) {
 33 | }
 34 | 
 35 | void ConstraintSoftmax::addLinearConstraint( const VectorXf & a, float b, float slack ) {
 36 | 	linear_constraints_.push_back( LinearConstraint(a, b, slack) );
 37 | }
 38 | void ConstraintSoftmax::addZeroConstraint( const VectorXf & a ) {
 39 | 	eassert( (a.array() >= 0).all() );
 40 | 	if( zero_constraints_.size() )
 41 | 		zero_constraints_.array() = zero_constraints_.array() || (a.array() > 0);
 42 | 	else
 43 | 		zero_constraints_ = a.array() > 0;
 44 | }
 45 | RMatrixXf ConstraintSoftmax::compute( const RMatrixXf & f ) const {
 46 | 	return expAndNormalize( scale_*computeLog( f ) );
 47 | }
 48 | RMatrixXf ConstraintSoftmax::computeLog( const RMatrixXf & f ) const {
 49 | 	const int N = f.rows(), M = f.cols();
 50 | 	// Special handling for zero constraints, let's remove all dimensions
 51 | 	// that are constraint to 0
 52 | 	int pM = M;
 53 | 	RMatrixXf pf = f, P;
 54 | 	std::vector<LinearConstraint> pc = linear_constraints_;
 55 | 	
 56 | 	// Project onto the zero constraints
 57 | 	if( zero_constraints_.size() ) {
 58 | 		pM = (zero_constraints_.array()==0).cast<int>().sum();
 59 | 		if( pM <= 1 ) {
 60 | 			RMatrixXf r = 1*f;
 61 | 			for( int i=0; i<M; i++ )
 62 | 				if( zero_constraints_[i]>0 )
 63 | 					r.col(i).setConstant(-1e10);
 64 | 			return r;
 65 | 		}
 66 | 		// Build the projection matrix
 67 | 		P = RMatrixXf::Zero(M,pM);
 68 | 		for( int i=0,k=0; i<M; i++ )
 69 | 			if( !zero_constraints_[i] )
 70 | 				P(i,k++) = 1;
 71 | 		// Project onto the matrix : Means remove the variables with zero equality constraints
 72 | 		pf = pf * P;
 73 | 		for(auto & c: pc)
 74 | 			c.a = P.transpose() * c.a;
 75 | 	}
 76 | 	
 77 | 	// Let's formulate the constraints as Ap >= b 		(with slack : Ap >= b - slack)
 78 | 	// Then our objective is D(p||q) = \sum p log p - \sum p log q + l' (b - Ap)
 79 | 	//                               = - H_p - \sum p pf - l' A p + lb
 80 | 	//                  d/dp D(p||q) = log p + 1 + c - pf - A' l = 0
 81 | 	//                             p = 1/Z exp(fp + A'l)
 82 | 	//                             where l >= 0
 83 | 	// The objective then simplifies to
 84 | 	//                       D(p||q) = \sum p (fp + A'l) - log Z - \sum p pf + l' (b - Ap)
 85 | 	//                               = -log Z + l' b
 86 | 	
 87 | 	RMatrixXf A(pc.size(),pM);
 88 | 	VectorXf b(pc.size()), slack(pc.size());
 89 | 	for( int i=0; i<(int)pc.size(); i++ ) {
 90 | 		A.row(i) = pc[i].a.transpose() / N; 		// Normalize by spatial_dim (no change theoretically, for implementation stability)
 91 | 		b[i] = pc[i].b;
 92 | 		slack[i] = pc[i].slack * N;					// Scale regularizer of slack according to spatial_dim
 93 | 	}
 94 | 	
 95 | 	// Solve for the soft assignment to the laten variables
 96 | 	// This function returns 
 97 | 	//	g : Gradient vector for dual variables. Returned as function argument.
 98 | 	//  return : objective value for dual optimization (which is to be minimized)
 99 | 	auto fun = [&](const VectorXf & l, VectorXf * g) -> double {
100 | 		if( g ) {
101 | 			RMatrixXf p = expAndNormalize( scale_*(pf.rowwise() + l.transpose()*A) ).colwise().sum();
102 | 			*g =  A*(p.colwise().sum()).transpose() - b;
103 | 		}
104 | 		return 1.0/scale_*logSumExp( scale_*(pf.rowwise() + l.transpose()*A) ).sum() - l.dot(b);
105 | 	};
106 | 	auto proj = [&](const VectorXf & x)->VectorXf {
107 | 		// if (x.array().maxCoeff() > 0) {
108 | 		// 	std::cout<<"\nActive Dual before slack : "<<x.array().maxCoeff()<<"\n";
109 | 		// 	std::cout<<"Scaled slack value : "<<slack.array().minCoeff()<<"  Input scale value : "<<(float)slack.array().minCoeff()/(float)N<<"  scale N : "<<N<<"\n";
110 | 		// }
111 | 		return x.array().max(0.f).min(slack.array()); 					// 0 <= dual variable <= slack
112 | 	};
113 | 	
114 | 	// Solve for the lagrangian dual to the constraint optimization
115 | 	// VectorXf l = fista( VectorXf::Zero(pc.size()), fun, proj );		// accelerated pgd : pgd + momentum
116 | 	bool converged = false;
117 | 	VectorXf l = pgd( VectorXf::Zero(pc.size()), fun, proj, false, &converged );
118 | 	if(!converged)
119 | 		printf("Projected gradient descent didn't converge. The problem might not be satisfiable!\n");
120 | 
121 | 	// Compute the 'labels'
122 | 	RMatrixXf r = pf.rowwise() + l.transpose()*A;
123 | 	
124 | 	// Construct the result
125 | 	if( zero_constraints_.size() ) {
126 | 		r = r * P.transpose();
127 | 		for( int i=0; i<M; i++ )
128 | 			if( zero_constraints_[i]>0 )
129 | 				r.col(i).setConstant(-1e10);
130 | 	}
131 | 	return r;
132 | }
133 | 
134 | 


--------------------------------------------------------------------------------
/lib/constraintloss/constraintsoftmax.h:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #pragma once
 8 | #include "util/eigen.h"
 9 | 
10 | struct LinearConstraint {
11 | 	LinearConstraint( const VectorXf & a, float b, float slack=1e10 );
12 | 	// A constraint \sum_i a*x_i >= b - slack
13 | 	VectorXf a;
14 | 	float b,slack;
15 | 	float eval( const RMatrixXf & x ) const;
16 | };
17 | 
18 | class ConstraintSoftmax {
19 | protected:
20 | 	float scale_;
21 | 	std::vector<LinearConstraint> linear_constraints_;
22 | 	VectorXb zero_constraints_;
23 | public:
24 | 	ConstraintSoftmax( float scale=1.0 );
25 | 	// A constraint \sum_i a*x_i >= b
26 | 	void addLinearConstraint( const VectorXf & a, float b, float slack=1e10 );
27 | 	// A constraint \sum_i a*x_i == 0  where a >= 0
28 | 	void addZeroConstraint( const VectorXf & a );
29 | 	RMatrixXf compute( const RMatrixXf & f ) const;
30 | 	RMatrixXf computeLog( const RMatrixXf & f ) const;
31 | };
32 | 


--------------------------------------------------------------------------------
/lib/optimization/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library( optimization fista.cpp )
2 | target_link_libraries( optimization util )
3 | 


--------------------------------------------------------------------------------
/lib/optimization/fista.cpp:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #include "fista.h"
 8 | #include <iostream>
 9 | 
10 | VectorXf identity(const VectorXf & x ) { return x; }
11 | 
12 | VectorXf fista( VectorXf x0, function_t f, projection_t p, bool verbose ) {
13 | 	const int N_ITER = 3000;
14 | 	const float beta = 0.5;
15 | 	float alpha = 1e-1;
16 | 	
17 | 	VectorXf r = x0;
18 | 	float best_e = 1e10;
19 | 	VectorXf x1 = x0, g = 0*x0;
20 | 	for( int k=1; k<=N_ITER && alpha>1e-5; k++ ) {
21 | 		// Strictly speaking this is not "legal" FISTA, but it seems to work well in practice
22 | 		alpha *= 1.05;
23 | 		
24 | 		// Compute y
25 | 		VectorXf y = x1 + (k-2.) / (k+1.)*(x1 - x0);
26 | 		// Evaluate the gradient at y
27 | 		float fy = f(y,&g), fx = 1e10;
28 | 		// Update the old x
29 | 		x0 = x1;
30 | 		// Update x
31 | 		x1 = p( y - alpha*g );
32 | 		while( alpha >= 1e-5 && (fx=f(x1,NULL)) > fy + g.dot(x1-y)+1./(2.*alpha)*(x1-y).dot(x1-y) ) {
33 | 			alpha *= beta;
34 | 			x1 = p( y - alpha*g );
35 | 		}
36 | 		if ( fx < best_e ) {
37 | 			best_e = fx;
38 | 			r = x0;
39 | 		}
40 | 		if (verbose){
41 | 			printf("it = %d   df = %f   alpha = %f\n", k, (x0-x1).array().abs().maxCoeff(), alpha );
42 | 			std::cout<<y.transpose()<<std::endl;
43 | 			std::cout<<g.transpose()<<std::endl;
44 | 			VectorXf gg;
45 | 			f(x1,&gg);
46 | 			std::cout<<x1.transpose()<<std::endl;
47 | 			std::cout<<gg.transpose()<<std::endl;
48 | 			std::cout<<std::endl;
49 | 		}
50 | 		if( (x0-x1).array().abs().maxCoeff() < 1e-4 )
51 | 			break;
52 | 	}
53 | 	return r;
54 | }
55 | 
56 | VectorXf pgd( VectorXf x0, function_t f, projection_t p, bool verbose, bool * converged ) {
57 | 	const int N_ITER = 3000;
58 | 	const float beta = 0.5;
59 | 	float alpha = 1e5;
60 | 	
61 | 	VectorXf g = 0*x0;
62 | 	float prev_fx = f(x0,&g);
63 | 
64 | 	int k=1;
65 | 	for( k=1; k<=N_ITER && alpha>1e-8; k++ ) {
66 | 		VectorXf ng;
67 | 		float fx = f(p(x0-alpha*g),&ng);
68 | 		if( fx < prev_fx ) {
69 | 			x0 = p(x0-alpha*g);
70 | 			g = ng;
71 | 			prev_fx = fx;
72 | 			alpha *= 1.1;
73 | 		}
74 | 		else
75 | 			alpha *= beta;
76 | 	}
77 | 
78 | 	// Debugging
79 | 	// if (k>N_ITER){
80 | 	// 	std::cout<<"PGD didn't converge\n";
81 | 	// 	std::cout<<"K="<<k<<" alpha="<<alpha<<"\n";
82 | 	// 	std::cout<<"Dual before slack :\n"<<x0.array()<<"\n"; 		//x0.array().maxCoeff()
83 | 	// 	std::cout<<"Gradient at last step :\n"<<g.array()<<"\n";
84 | 	// }
85 | 	if( converged ) *converged = (k < N_ITER);
86 | 
87 | 	return x0;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/optimization/fista.h:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #pragma once
 8 | #include "util/eigen.h"
 9 | #include <functional>
10 | 
11 | typedef std::function<float(const VectorXf & x, VectorXf * gradient)> function_t;
12 | typedef std::function<VectorXf(const VectorXf & x)> projection_t;
13 | 
14 | VectorXf identity(const VectorXf & x );
15 | VectorXf fista( VectorXf x0, function_t f, projection_t p = identity, bool verbose=false );
16 | VectorXf pgd( VectorXf x0, function_t f, projection_t p = identity, bool verbose=false, bool * converged=NULL );
17 | 


--------------------------------------------------------------------------------
/lib/python/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories( ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS})
 2 | link_directories( ${Boost_LIBRARY_DIR} )
 3 | file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/__init__.py" "" )
 4 | 
 5 | add_library( ccnn SHARED boost.cpp ccnn.cpp util.cpp constraintloss.cpp )
 6 | target_link_libraries( ccnn ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} constraintloss util)
 7 | 
 8 | set_target_properties( ccnn PROPERTIES PREFIX "")
 9 | if(APPLE)
10 | 	set_target_properties( ccnn PROPERTIES SUFFIX ".so" )
11 | endif()
12 | 


--------------------------------------------------------------------------------
/lib/python/boost.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright (c) 2014, Philipp Krähenbühl
 3 |     All rights reserved.
 4 | 	
 5 |     Redistribution and use in source and binary forms, with or without
 6 |     modification, are permitted provided that the following conditions are met:
 7 |         * Redistributions of source code must retain the above copyright
 8 |         notice, this list of conditions and the following disclaimer.
 9 |         * Redistributions in binary form must reproduce the above copyright
10 |         notice, this list of conditions and the following disclaimer in the
11 |         documentation and/or other materials provided with the distribution.
12 |         * Neither the name of the Stanford University nor the
13 |         names of its contributors may be used to endorse or promote products
14 |         derived from this software without specific prior written permission.
15 | 	
16 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
17 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
20 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | */
27 | #include "boost.h"
28 | 


--------------------------------------------------------------------------------
/lib/python/boost.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright (c) 2014, Philipp Krähenbühl
 3 |     All rights reserved.
 4 | 	
 5 |     Redistribution and use in source and binary forms, with or without
 6 |     modification, are permitted provided that the following conditions are met:
 7 |         * Redistributions of source code must retain the above copyright
 8 |         notice, this list of conditions and the following disclaimer.
 9 |         * Redistributions in binary form must reproduce the above copyright
10 |         notice, this list of conditions and the following disclaimer in the
11 |         documentation and/or other materials provided with the distribution.
12 |         * Neither the name of the Stanford University nor the
13 |         names of its contributors may be used to endorse or promote products
14 |         derived from this software without specific prior written permission.
15 | 	
16 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
17 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
20 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | */
27 | #pragma once
28 | #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
29 | #include <boost/python.hpp>
30 | #include <memory>
31 | using namespace boost::python;
32 | 


--------------------------------------------------------------------------------
/lib/python/ccnn.cpp:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #include "util.h"
 8 | #include "constraintloss.h"
 9 | #include "ccnn.h"
10 | 
11 | BOOST_PYTHON_MODULE(ccnn)
12 | {
13 | 	import_array1();
14 | 
15 | 	defineUtil();
16 | 	defineConstraintloss();
17 | }
18 | 


--------------------------------------------------------------------------------
/lib/python/ccnn.h:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #pragma once
 8 | #include "boost.h"
 9 | #include <Python.h>
10 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
11 | #include <numpy/arrayobject.h>
12 | 
13 | #define ADD_MODULE( name ) object name ## Module(handle<>(borrowed(PyImport_AddModule(((std::string)"ccnn."+# name).c_str()))));\
14 | scope().attr(# name) = name ## Module;\
15 | scope name ## _scope = name ## Module;
16 | 


--------------------------------------------------------------------------------
/lib/python/constraintloss.cpp:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #include "constraintloss.h"
 8 | #include "ccnn.h"
 9 | #include "constraintloss/constraintsoftmax.h"
10 | 
11 | BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(ConstraintSoftmax_addLinearConstraint_o, ConstraintSoftmax::addLinearConstraint, 2, 3 );
12 | 
13 | void defineConstraintloss() {
14 | 	ADD_MODULE( constraintloss );
15 | 	
16 | 	class_<ConstraintSoftmax>("ConstraintSoftmax",init<>())
17 | 	.def(init<float>())
18 | 	.def( "addLinearConstraint", &ConstraintSoftmax::addLinearConstraint, ConstraintSoftmax_addLinearConstraint_o() )
19 | 	.def( "addZeroConstraint", &ConstraintSoftmax::addZeroConstraint )
20 | 	.def( "compute", &ConstraintSoftmax::compute )
21 | 	.def( "computeLog", &ConstraintSoftmax::computeLog );
22 | }
23 | 


--------------------------------------------------------------------------------
/lib/python/constraintloss.h:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN 
 3 | // Copyright (c) 2015 [See LICENSE file for details]
 4 | // Written by Deepak Pathak, Philipp Krähenbühl
 5 | // --------------------------------------------------------
 6 | 
 7 | #pragma once
 8 | 
 9 | void defineConstraintloss();
10 | 


--------------------------------------------------------------------------------
/lib/python/util.cpp:
--------------------------------------------------------------------------------
  1 | // --------------------------------------------------------
  2 | // CCNN
  3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl
  4 | // --------------------------------------------------------
  5 | 
  6 | /*
  7 |     Copyright (c) 2014, Philipp Krähenbühl
  8 |     All rights reserved.
  9 | 	
 10 |     Redistribution and use in source and binary forms, with or without
 11 |     modification, are permitted provided that the following conditions are met:
 12 |         * Redistributions of source code must retain the above copyright
 13 |         notice, this list of conditions and the following disclaimer.
 14 |         * Redistributions in binary form must reproduce the above copyright
 15 |         notice, this list of conditions and the following disclaimer in the
 16 |         documentation and/or other materials provided with the distribution.
 17 |         * Neither the name of the Stanford University nor the
 18 |         names of its contributors may be used to endorse or promote products
 19 |         derived from this software without specific prior written permission.
 20 | 	
 21 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
 22 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 23 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
 25 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 26 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 27 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 28 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 30 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | */
 32 | #include "util.h"
 33 | #include "ccnn.h"
 34 | #include "util/eigen.h"
 35 | 
 36 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 37 | #include <numpy/arrayobject.h>
 38 | #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
 39 | 
 40 | // For numpy 1.6 define NPY_ARRAY_*
 41 | #if NPY_API_VERSION < 0x00000007
 42 | #define NPY_ARRAY_C_CONTIGUOUS NPY_C_CONTIGUOUS
 43 | #define NPY_ARRAY_ALIGNED      NPY_ALIGNED
 44 | #endif
 45 | 
 46 | template <typename SCALAR>
 47 | struct NumpyEquivalentType {};
 48 | 
 49 | template <> struct NumpyEquivalentType<double> {enum { type_code = NPY_DOUBLE };};
 50 | template <> struct NumpyEquivalentType<float> {enum { type_code = NPY_FLOAT };};
 51 | template <> struct NumpyEquivalentType<int64_t> {enum { type_code = NPY_INT64 };};
 52 | template <> struct NumpyEquivalentType<uint64_t> {enum { type_code = NPY_UINT64 };};
 53 | template <> struct NumpyEquivalentType<int32_t> {enum { type_code = NPY_INT32 };};
 54 | template <> struct NumpyEquivalentType<uint32_t> {enum { type_code = NPY_UINT32 };};
 55 | template <> struct NumpyEquivalentType<int16_t> {enum { type_code = NPY_INT16 };};
 56 | template <> struct NumpyEquivalentType<uint16_t> {enum { type_code = NPY_UINT16 };};
 57 | template <> struct NumpyEquivalentType<int8_t> {enum { type_code = NPY_INT8 };};
 58 | template <> struct NumpyEquivalentType<uint8_t> {enum { type_code = NPY_UINT8  };};
 59 | template <> struct NumpyEquivalentType<bool> {enum { type_code = NPY_BOOL  };};
 60 | 
 61 | template< typename T >
 62 | void copyMat( T * dst, const T* src, int cols, int rows, bool transpose ) {
 63 | 	if( !transpose )
 64 | 		memcpy( dst, src, cols*rows*sizeof(T) );
 65 | 	else {
 66 | 		for( int j=0; j<rows; j++ )
 67 | 			for( int i=0; i<cols; i++ )
 68 | 				dst[ j*cols+i ] = src[ i*rows+j ];
 69 | 	}
 70 | }
 71 | // Reduce the number of pointer-to-function warnings (since disabling them seems not possible)
 72 | static PyArrayObject * PyArrayObject_New( int n, npy_intp * shape, int type ) {
 73 | 	return (PyArrayObject*)PyArray_SimpleNew( n, shape, type );
 74 | }
 75 | static int PyArray_SIZE2( PyArrayObject * array ) {
 76 | 	return PyArray_SIZE(array);
 77 | }
 78 | template<class MatType>
 79 | struct EigenMatrixToPython {
 80 | 	static PyObject* convert(const MatType& mat) {
 81 | 		typedef typename MatType::Scalar T;
 82 | 		PyArrayObject* python_array;
 83 | 		if( MatType::ColsAtCompileTime==1 || MatType::RowsAtCompileTime==1 ) {
 84 | 			npy_intp shape[1] = { mat.rows()*mat.cols() };
 85 | 			python_array = PyArrayObject_New(1, shape, NumpyEquivalentType<T>::type_code);
 86 | 		}
 87 | 		else {
 88 | 			npy_intp shape[2] = { mat.rows(), mat.cols() };
 89 | 			python_array = PyArrayObject_New(2, shape, NumpyEquivalentType<T>::type_code);
 90 | 		}
 91 | 		copyMat( (T*)PyArray_DATA(python_array), mat.data(), mat.rows(), mat.cols(), !(MatType::Flags & RowMajor) );
 92 | 		return (PyObject*)python_array;
 93 | 	}
 94 | };
 95 | 
 96 | template<typename MatType>
 97 | struct EigenMatrixFromPython {
 98 | 	typedef typename MatType::Scalar T;
 99 | 	EigenMatrixFromPython() {
100 | 		converter::registry::push_back(&convertible, &construct, type_id<MatType>());
101 | 	}
102 | 	static void* convertible(PyObject* obj_ptr) {
103 | 		const int R = MatType::RowsAtCompileTime;
104 | 		const int C = MatType::ColsAtCompileTime;
105 | 		PyArrayObject *array = reinterpret_cast<PyArrayObject*>(obj_ptr);
106 | 		if (!PyArray_Check(obj_ptr) || PyArray_NDIM(array) > 2 ||  PyArray_NDIM(array) <= 0 || PyArray_TYPE(array) != NumpyEquivalentType<T>::type_code)
107 | 			return 0;
108 | 		if( R==1 || C==1 ) { // Eigen Vector
109 | 			if ( PyArray_NDIM(array)==2 && PyArray_DIMS(array)[0]>1 && PyArray_DIMS(array)[1]>1 )
110 | 				return 0;
111 | 			if ( PyArray_NDIM(array)==1 && R*C > 0 && R*C != PyArray_DIMS(array)[0] )
112 | 				return 0;
113 | 		}
114 | 		else if ( R > 1 && PyArray_DIMS(array)[0] != R )
115 | 			return 0;
116 | 		else if ( C > 1 && PyArray_NDIM(array)<2 && PyArray_DIMS(array)[1] != C )
117 | 			return 0;
118 | 		return obj_ptr;
119 | 	}
120 | 	static void construct(PyObject* obj_ptr, converter::rvalue_from_python_stage1_data* data) {
121 | 		const int R = MatType::RowsAtCompileTime;
122 | 		const int C = MatType::ColsAtCompileTime;
123 | 		
124 | 		PyArrayObject *array = reinterpret_cast<PyArrayObject*>(obj_ptr);
125 | 		int flags = PyArray_FLAGS(array);
126 | 		if (!(flags & NPY_ARRAY_C_CONTIGUOUS) || !(flags & NPY_ARRAY_ALIGNED))
127 | 			throw std::invalid_argument("Contiguous and aligned array required!");
128 | 		const int ndims = PyArray_NDIM(array);
129 | 		
130 | 		const int dtype_size = (PyArray_DESCR(array))->elsize;
131 | 		const int s1 = PyArray_STRIDE(array, 0), s2 = ndims > 1 ? PyArray_STRIDE(array, 1) : 0;
132 | 		
133 | 		int nrows=1, ncols=1;
134 | 		if( R==1 || C==1 ) { // Vector
135 | 			nrows = R==1 ? 1 : PyArray_SIZE2(array);
136 | 			ncols = C==1 ? 1 : PyArray_SIZE2(array);
137 | 		}
138 | 		else {
139 | 			nrows = (R == Dynamic) ? PyArray_DIMS(array)[0] : R;
140 | 			if ( ndims > 1 )
141 | 				ncols = (R == Dynamic) ? PyArray_DIMS(array)[1] : R;
142 | 		}
143 | 		T* raw_data = reinterpret_cast<T*>(PyArray_DATA(array));
144 | 		
145 | 		typedef Map< Matrix<T,Dynamic,Dynamic,RowMajor>,Aligned,Stride<Dynamic, Dynamic> > MapType;
146 | 		
147 | 		void* storage=((converter::rvalue_from_python_storage<MatType>*)(data))->storage.bytes;
148 | 		new (storage) MatType;
149 | 		MatType* emat = (MatType*)storage;
150 | 		*emat = MapType(raw_data, nrows, ncols,Stride<Dynamic, Dynamic>(s1/dtype_size, s2/dtype_size));
151 | 		data->convertible = storage;
152 | 	}
153 | };
154 | 
155 | #define EIGEN_MATRIX_CONVERTER(Type) EigenMatrixFromPython<Type>(); to_python_converter<Type, EigenMatrixToPython<Type> >();
156 | // #define EIGEN_MATRIX_CONVERTER(Type) EigenMatrixFromPython<Type>(); to_python_converter<Type, EigenMatrixToPython<Type> >(); to_python_converter<const Type &, EigenMatrixToPython<Type> >();
157 | 
158 | #define MAT_CONV( N )\
159 | EIGEN_MATRIX_CONVERTER( N ## d );\
160 | EIGEN_MATRIX_CONVERTER( N ## f );\
161 | EIGEN_MATRIX_CONVERTER( N ## i );\
162 | EIGEN_MATRIX_CONVERTER( N ## u );\
163 | EIGEN_MATRIX_CONVERTER( N ## s );\
164 | EIGEN_MATRIX_CONVERTER( N ## us );\
165 | EIGEN_MATRIX_CONVERTER( N ## i8 );\
166 | EIGEN_MATRIX_CONVERTER( N ## u8 );\
167 | EIGEN_MATRIX_CONVERTER( N ## b )
168 | 
169 | #define EIGEN_MATRIX_VECTOR(Type) class_< std::vector<Type> >((std::string("Vec")+std::string(#Type)).c_str()).def( vector_indexing_suite< std::vector<Type>, true >() ).def( VectorInitSuite< std::vector<Type> >() );
170 | #define MAT_VEC( N )\
171 | EIGEN_MATRIX_VECTOR( N ## d );\
172 | EIGEN_MATRIX_VECTOR( N ## f );\
173 | EIGEN_MATRIX_VECTOR( N ## i );\
174 | EIGEN_MATRIX_VECTOR( N ## u );\
175 | EIGEN_MATRIX_VECTOR( N ## s );\
176 | EIGEN_MATRIX_VECTOR( N ## us );\
177 | EIGEN_MATRIX_VECTOR( N ## i8 );\
178 | EIGEN_MATRIX_VECTOR( N ## u8 );\
179 | EIGEN_MATRIX_VECTOR( N ## b )
180 | 
181 | // Exceptions
182 | void translateAssertException(const AssertException& e) {
183 | 	PyErr_SetString(PyExc_AssertionError, e.what());
184 | }
185 | 
186 | #if PY_MAJOR_VERSION >= 3
187 | int init_numpy() { import_array(); return 0; }
188 | #else
189 | void init_numpy() { import_array(); return; }
190 | #endif
191 | 
192 | void defineUtil() {
193 | 	// NOTE: This file has a ton of macros and templates, so it's going to take a while to compile ...
194 | 	init_numpy();
195 | 	boost::python::numeric::array::set_module_and_type("numpy", "ndarray");
196 | 	
197 | 	register_exception_translator<AssertException>(&translateAssertException);
198 | 	
199 | 	ADD_MODULE(util);
200 | 	
201 | 	// NOTE: When overloading functions always make sure to put the array/matrix function before the vector one
202 | 	MAT_CONV( MatrixX );
203 | 	MAT_CONV( RMatrixX );
204 | 	MAT_CONV( VectorX );
205 | 	MAT_CONV( ArrayXX );
206 | 	MAT_CONV( RArrayXX );
207 | 	MAT_CONV( ArrayX );
208 | 	
209 | 	// Define some std::vectors
210 | 	MAT_VEC( RMatrixX );
211 | 	MAT_VEC( VectorX );
212 | 	
213 | 	// Datastructures
214 | 	class_< std::vector<int> >("VecInt").def( vector_indexing_suite< std::vector<int> >() ).def( VectorInitSuite< std::vector<int> >() );
215 | 	class_< std::vector<float> >("VecFloat").def( vector_indexing_suite< std::vector<float> >() ).def( VectorInitSuite< std::vector<float> >() );
216 | }
217 | 


--------------------------------------------------------------------------------
/lib/python/util.h:
--------------------------------------------------------------------------------
  1 | // --------------------------------------------------------
  2 | // CCNN
  3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl
  4 | // --------------------------------------------------------
  5 | 
  6 | /*
  7 |     Copyright (c) 2014, Philipp Krähenbühl
  8 |     All rights reserved.
  9 | 	
 10 |     Redistribution and use in source and binary forms, with or without
 11 |     modification, are permitted provided that the following conditions are met:
 12 |         * Redistributions of source code must retain the above copyright
 13 |         notice, this list of conditions and the following disclaimer.
 14 |         * Redistributions in binary form must reproduce the above copyright
 15 |         notice, this list of conditions and the following disclaimer in the
 16 |         documentation and/or other materials provided with the distribution.
 17 |         * Neither the name of the Stanford University nor the
 18 |         names of its contributors may be used to endorse or promote products
 19 |         derived from this software without specific prior written permission.
 20 | 	
 21 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
 22 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 23 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
 25 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 26 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 27 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 28 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 30 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | */
 32 | #pragma once
 33 | #include <boost/version.hpp>
 34 | #include <boost/python.hpp>
 35 | #include <stdexcept>
 36 | #include <memory>
 37 | #include <vector>
 38 | using namespace boost::python;
 39 | 
 40 | // Make older boost versions happy
 41 | #if BOOST_VERSION < 105300
 42 | template<typename T> T* get_pointer(const std::shared_ptr<T>& p) { return p.get(); }
 43 | #endif
 44 | 
 45 | template<typename OBJ>
 46 | struct SaveLoad_pickle_suite : pickle_suite {
 47 | 	static object getstate(const OBJ& obj) {
 48 | 		std::stringstream ss;
 49 | 		obj.save( ss );
 50 | 		std::string data = ss.str();
 51 | 		return object( handle<>( PyBytes_FromStringAndSize( data.data(), data.size() ) ) );
 52 | 	}
 53 | 	
 54 | 	static void setstate(OBJ& obj, const object & state) {
 55 | 		if(!PyBytes_Check(state.ptr()))
 56 | 			throw std::invalid_argument("Failed to unpickle, unexpected type!");
 57 | 		std::stringstream ss( std::string( PyBytes_AS_STRING(state.ptr()), PyBytes_Size(state.ptr()) ) );
 58 | 		obj.load( ss );
 59 | 	}
 60 | };
 61 | 
 62 | template<typename OBJ>
 63 | struct SaveLoad_pickle_suite_shared_ptr : pickle_suite {
 64 | 	static object getstate(const std::shared_ptr<OBJ>& obj) {
 65 | 		std::stringstream ss;
 66 | 		obj->save( ss );
 67 | 		std::string data = ss.str();
 68 | 		return object( handle<>( PyBytes_FromStringAndSize( data.data(), data.size() ) ) );
 69 | 	}
 70 | 	
 71 | 	static void setstate(std::shared_ptr<OBJ> obj, const object & state) {
 72 | 		if(!PyBytes_Check(state.ptr()))
 73 | 			throw std::invalid_argument("Failed to unpickle, unexpected type!");
 74 | 		std::stringstream ss( std::string( PyBytes_AS_STRING(state.ptr()), PyBytes_Size(state.ptr()) ) );
 75 | 		obj->load( ss );
 76 | 	}
 77 | };
 78 | 
 79 | template<typename OBJ>
 80 | struct VectorSaveLoad_pickle_suite_shared_ptr : pickle_suite {
 81 | 	static object getstate(const std::vector< std::shared_ptr<OBJ> > & obj) {
 82 | 		std::stringstream ss;
 83 | 		const int nobj = obj.size();
 84 | 		ss.write( (const char*)&nobj, sizeof(nobj) );
 85 | 		for( int i=0; i<nobj; i++ )
 86 | 			obj[i]->save( ss );
 87 | 		std::string data = ss.str();
 88 | 		return object( handle<>( PyBytes_FromStringAndSize( data.data(), data.size() ) ) );
 89 | 	}
 90 | 	
 91 | 	static void setstate(std::vector< std::shared_ptr<OBJ> > & obj, const object & state) {
 92 | 		if(!PyBytes_Check(state.ptr()))
 93 | 			throw std::invalid_argument("Failed to unpickle, unexpected type!");
 94 | 		std::stringstream ss( std::string( PyBytes_AS_STRING(state.ptr()), PyBytes_Size(state.ptr()) ) );
 95 | 		int nobj = 0;
 96 | 		ss.read( (char*)&nobj, sizeof(nobj) );
 97 | 		obj.resize( nobj );
 98 | 		for( int i=0; i<nobj; i++ ) {
 99 | 			obj[i] = std::make_shared<OBJ>();
100 | 			obj[i]->load( ss );
101 | 		}
102 | 	}
103 | };
104 | 
105 | template<typename T>
106 | struct VectorInitSuite: public def_visitor< VectorInitSuite<T> > {
107 | 	typedef typename T::value_type D;
108 | 	
109 | 	static T * init_list( const list & l ) {
110 | 		T * r = new T;
111 | 		const int N = len(l);
112 | 		for ( int i=0; i<N; i++ )
113 | 			(*r).push_back( extract<D>(l[i]) );
114 | 		return r;
115 | 	}
116 | //	template <class C> static C * init_list( const list & l ) {
117 | //		C * r = new C;
118 | //		const int N = len(l);
119 | //		for ( int i=0; i<N; i++ )
120 | //			(*r).push_back( extract<D>(l[i]) );
121 | //		return r;
122 | //	}
123 | 	template <class C>
124 | 	void visit(C& cl) const
125 | 	{
126 | 		cl
127 | 		.def("__init__", make_constructor(&VectorInitSuite<T>::init_list));
128 | //		.def("__init__", make_constructor(&init_generator));
129 | 	}
130 | };
131 | 
132 | template<typename T>
133 | std::vector<T> to_vector( const list & l ) {
134 | 	std::vector<T> r;
135 | 	for( int i=0; i<len(l); i++ )
136 | 		r.push_back( extract<T>(l[i]) );
137 | 	return r;
138 | }
139 | 
140 | void defineUtil();
141 | 
142 | class ScopedGILRelease
143 | {
144 | public:
145 | 	inline ScopedGILRelease() {
146 | 		m_thread_state = PyEval_SaveThread();
147 | 	}
148 | 	inline ~ScopedGILRelease() {
149 | 		PyEval_RestoreThread(m_thread_state);
150 | 		m_thread_state = NULL;
151 | 	}
152 | private:
153 | 	PyThreadState * m_thread_state;
154 | 	ScopedGILRelease( const ScopedGILRelease & o ) { }
155 | };
156 | 


--------------------------------------------------------------------------------
/lib/util/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library( util eigen.cpp)
2 | target_link_libraries( util )
3 | 


--------------------------------------------------------------------------------
/lib/util/eigen.cpp:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN
 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl
 4 | // --------------------------------------------------------
 5 | 
 6 | /*
 7 |     Copyright (c) 2014, Philipp Krähenbühl
 8 |     All rights reserved.
 9 | 	
10 |     Redistribution and use in source and binary forms, with or without
11 |     modification, are permitted provided that the following conditions are met:
12 |         * Redistributions of source code must retain the above copyright
13 |         notice, this list of conditions and the following disclaimer.
14 |         * Redistributions in binary form must reproduce the above copyright
15 |         notice, this list of conditions and the following disclaimer in the
16 |         documentation and/or other materials provided with the distribution.
17 |         * Neither the name of the Stanford University nor the
18 |         names of its contributors may be used to endorse or promote products
19 |         derived from this software without specific prior written permission.
20 | 	
21 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
22 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
25 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | */
32 | #include "eigen.h"
33 | 
34 | VectorXi range( int end ) {
35 | 	return range( 0, end );
36 | }
37 | VectorXi range( int start, int end ) {
38 | 	VectorXi r(end-start);
39 | 	for( int i=0; i<end-start; i++ )
40 | 		r[i] = start + i;
41 | 	return r;
42 | }
43 | 


--------------------------------------------------------------------------------
/lib/util/eigen.h:
--------------------------------------------------------------------------------
  1 | // --------------------------------------------------------
  2 | // CCNN
  3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl
  4 | // --------------------------------------------------------
  5 | 
  6 | /*
  7 |     Copyright (c) 2014, Philipp Krähenbühl
  8 |     All rights reserved.
  9 | 	
 10 |     Redistribution and use in source and binary forms, with or without
 11 |     modification, are permitted provided that the following conditions are met:
 12 |         * Redistributions of source code must retain the above copyright
 13 |         notice, this list of conditions and the following disclaimer.
 14 |         * Redistributions in binary form must reproduce the above copyright
 15 |         notice, this list of conditions and the following disclaimer in the
 16 |         documentation and/or other materials provided with the distribution.
 17 |         * Neither the name of the Stanford University nor the
 18 |         names of its contributors may be used to endorse or promote products
 19 |         derived from this software without specific prior written permission.
 20 | 	
 21 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
 22 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 23 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
 25 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 26 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 27 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 28 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 30 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | */
 32 | #pragma once
 33 | #include "win_util.h"
 34 | #include <cstdint>
 35 | #define EIGEN_DONT_PARALLELIZE
 36 | #include <Eigen/Core>
 37 | #include <Eigen/SparseCore>
 38 | using namespace Eigen;
 39 | 
 40 | // Assert with exception handling
 41 | class AssertException: public std::logic_error {
 42 | public:
 43 | 	AssertException( const std::string & assertion, const std::string & location ): logic_error("Assertion \""+assertion+"\" failed in "+location) {}
 44 | };
 45 | #define eassert( x ) {if (!(x)) throw AssertException( _str(x), FILE_AND_LINE );}
 46 | #define _xstr(s) _str(s)
 47 | #define _str(s) #s
 48 | #define LINE_STRING _xstr(__LINE__)
 49 | #define FILE_AND_LINE ((std::string)__FILE__ + (std::string)":" + LINE_STRING)
 50 | 
 51 | #define DEFINE_MAT( N )\
 52 | typedef N<double>( N ## d );\
 53 | typedef N<float>( N ## f );\
 54 | typedef N<int32_t>( N ## i );\
 55 | typedef N<uint32_t>( N ## u );\
 56 | typedef N<int16_t>( N ## s );\
 57 | typedef N<uint16_t>( N ## us );\
 58 | typedef N<int8_t>( N ## i8 );\
 59 | typedef N<uint8_t>( N ## u8 );\
 60 | typedef N<bool>( N ## b )
 61 | 
 62 | #define DEFINE_MAT2( N )\
 63 | typedef N<uint32_t>( N ## u );\
 64 | typedef N<int16_t>( N ## s );\
 65 | typedef N<uint16_t>( N ## us );\
 66 | typedef N<int8_t>( N ## i8 );\
 67 | typedef N<uint8_t>( N ## u8 );\
 68 | typedef N<bool>( N ## b )
 69 | 
 70 | template<typename T> using RowVectorX = Matrix<T,1,Dynamic>;
 71 | template<typename T> using RMatrixX = Matrix<T,Dynamic,Dynamic,RowMajor>;
 72 | template<typename T> using SMatrixX = SparseMatrix<T>;
 73 | template<typename T> using SRMatrixX = SparseMatrix<T,RowMajor>;
 74 | template<typename T> using RArrayXX = Array<T,Dynamic,Dynamic,RowMajor>;
 75 | template<typename T> using MatrixX = Matrix<T,Dynamic,Dynamic>;
 76 | template<typename T> using ArrayXX = Array<T,Dynamic,Dynamic>;
 77 | template<typename T> using VectorX = Matrix<T,Dynamic,1>;
 78 | template<typename T> using ArrayX = Array<T,Dynamic,1>;
 79 | 
 80 | DEFINE_MAT( RMatrixX );
 81 | DEFINE_MAT( SMatrixX );
 82 | DEFINE_MAT( SRMatrixX );
 83 | DEFINE_MAT( RArrayXX );
 84 | DEFINE_MAT2( MatrixX );
 85 | DEFINE_MAT2( ArrayXX );
 86 | DEFINE_MAT2( VectorX );
 87 | DEFINE_MAT2( ArrayX );
 88 | 
 89 | namespace std{
 90 | 	template< typename T, int R, int C, int O, int RR, int CC > const T * begin( const Matrix<T,R,C,O,RR,CC> & m ){
 91 | 		return m.data();
 92 | 	}
 93 | 	template< typename T, int R, int C, int O, int RR, int CC > const T * end( const Matrix<T,R,C,O,RR,CC> & m ){
 94 | 		return m.data()+m.size();
 95 | 	}
 96 | 	template< typename T, int R, int C, int O, int RR, int CC > T * begin( Matrix<T,R,C,O,RR,CC> & m ){
 97 | 		return m.data();
 98 | 	}
 99 | 	template< typename T, int R, int C, int O, int RR, int CC > T * end( Matrix<T,R,C,O,RR,CC> & m ){
100 | 		return m.data()+m.size();
101 | 	}
102 | }
103 | VectorXi range( int end );
104 | VectorXi range( int start, int end );
105 | 
106 | template<typename T,int C, int R, int O>
107 | void saveMatrixX( std::ostream & s, const Matrix<T,C,R,O> & m ) {
108 | 	int rc[2] = {(int)m.rows(),(int)m.cols()};
109 | 	s.write( (char*)rc, sizeof(rc) );
110 | 	s.write( (char*)m.data(), m.size()*sizeof(T) );
111 | }
112 | template<typename T,int C, int R, int O>
113 | void loadMatrixX( std::istream & s, Matrix<T,C,R,O> & m ) {
114 | 	int rc[2];
115 | 	s.read( (char*)rc, sizeof(rc) );
116 | 	m = Matrix<T,C,R>(rc[0],rc[1]);
117 | 	s.read( (char*)m.data(), m.size()*sizeof(T) );
118 | }
119 | 


--------------------------------------------------------------------------------
/lib/util/win_util.cpp:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN
 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl
 4 | // --------------------------------------------------------
 5 | 
 6 | /*
 7 |     Copyright (c) 2014, Philipp Krähenbühl
 8 |     All rights reserved.
 9 | 	
10 |     Redistribution and use in source and binary forms, with or without
11 |     modification, are permitted provided that the following conditions are met:
12 |         * Redistributions of source code must retain the above copyright
13 |         notice, this list of conditions and the following disclaimer.
14 |         * Redistributions in binary form must reproduce the above copyright
15 |         notice, this list of conditions and the following disclaimer in the
16 |         documentation and/or other materials provided with the distribution.
17 |         * Neither the name of the Stanford University nor the
18 |         names of its contributors may be used to endorse or promote products
19 |         derived from this software without specific prior written permission.
20 | 	
21 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
22 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
25 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | */
32 | #include "win_util.h"
33 | 


--------------------------------------------------------------------------------
/lib/util/win_util.h:
--------------------------------------------------------------------------------
 1 | // --------------------------------------------------------
 2 | // CCNN
 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl
 4 | // --------------------------------------------------------
 5 | 
 6 | /*
 7 |     Copyright (c) 2014, Philipp Krähenbühl
 8 |     All rights reserved.
 9 | 	
10 |     Redistribution and use in source and binary forms, with or without
11 |     modification, are permitted provided that the following conditions are met:
12 |         * Redistributions of source code must retain the above copyright
13 |         notice, this list of conditions and the following disclaimer.
14 |         * Redistributions in binary form must reproduce the above copyright
15 |         notice, this list of conditions and the following disclaimer in the
16 |         documentation and/or other materials provided with the distribution.
17 |         * Neither the name of the Stanford University nor the
18 |         names of its contributors may be used to endorse or promote products
19 |         derived from this software without specific prior written permission.
20 | 	
21 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
22 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
25 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | 	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 | 	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | 	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | */
32 | #pragma once
33 | #ifdef _MSC_VER
34 | #define _USE_MATH_DEFINES
35 | 
36 | #endif
37 | #include <cmath>
38 | 


--------------------------------------------------------------------------------
/models/examples/gt1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/gt1.png


--------------------------------------------------------------------------------
/models/examples/gt2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/gt2.png


--------------------------------------------------------------------------------
/models/examples/im1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im1.jpg


--------------------------------------------------------------------------------
/models/examples/im2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im2.jpg


--------------------------------------------------------------------------------
/models/examples/im3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im3.jpg


--------------------------------------------------------------------------------
/models/examples/im4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im4.jpg


--------------------------------------------------------------------------------
/models/examples/out1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/out1.png


--------------------------------------------------------------------------------
/models/examples/out2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/out2.png


--------------------------------------------------------------------------------
/models/fcn_32s/deploy_32s.prototxt:
--------------------------------------------------------------------------------
  1 | name : "FCN-32s"
  2 | 
  3 | input: 'data'
  4 | input_dim: 1
  5 | input_dim: 3
  6 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
  7 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
  8 | 
  9 | input: 'data-orig'
 10 | input_dim: 1
 11 | input_dim: 3
 12 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
 13 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
 14 | 
 15 | # ========== Network architecture begin ================
 16 | 
 17 | layer : {
 18 |   name : "conv1_1"
 19 |   type : "Convolution"
 20 |   bottom : "data"
 21 |   top : "conv1_1"
 22 |   convolution_param : {
 23 |     engine: CAFFE
 24 |     num_output : 64
 25 |     pad : 100
 26 |     kernel_size : 3
 27 |   }
 28 |   param : {
 29 |     lr_mult : 1.0
 30 |     decay_mult : 1.0
 31 |   }
 32 |   param : {
 33 |     lr_mult : 2.0
 34 |     decay_mult : 0.0
 35 |   }
 36 | }
 37 | layer : {
 38 |   name : "relu1_1"
 39 |   type : "ReLU"
 40 |   bottom : "conv1_1"
 41 |   top : "conv1_1"
 42 | }
 43 | layer : {
 44 |   name : "conv1_2"
 45 |   type : "Convolution"
 46 |   bottom : "conv1_1"
 47 |   top : "conv1_2"
 48 |   convolution_param : {
 49 |     engine: CAFFE
 50 |     num_output : 64
 51 |     pad : 1
 52 |     kernel_size : 3
 53 |   }
 54 |   param : {
 55 |     lr_mult : 1.0
 56 |     decay_mult : 1.0
 57 |   }
 58 |   param : {
 59 |     lr_mult : 2.0
 60 |     decay_mult : 0.0
 61 |   }
 62 | }
 63 | layer : {
 64 |   name : "relu1_2"
 65 |   type : "ReLU"
 66 |   bottom : "conv1_2"
 67 |   top : "conv1_2"
 68 | }
 69 | layer : {
 70 |   name : "pool1"
 71 |   type : "Pooling"
 72 |   bottom : "conv1_2"
 73 |   top : "pool1"
 74 |   pooling_param : {
 75 |     pool : MAX
 76 |     kernel_size : 2
 77 |     stride : 2
 78 |   }
 79 | }
 80 | layer : {
 81 |   name : "conv2_1"
 82 |   type : "Convolution"
 83 |   bottom : "pool1"
 84 |   top : "conv2_1"
 85 |   convolution_param : {
 86 |     engine: CAFFE
 87 |     num_output : 128
 88 |     pad : 1
 89 |     kernel_size : 3
 90 |   }
 91 |   param : {
 92 |     lr_mult : 1.0
 93 |     decay_mult : 1.0
 94 |   }
 95 |   param : {
 96 |     lr_mult : 2.0
 97 |     decay_mult : 0.0
 98 |   }
 99 | }
100 | layer : {
101 |   name : "relu2_1"
102 |   type : "ReLU"
103 |   bottom : "conv2_1"
104 |   top : "conv2_1"
105 | }
106 | layer : {
107 |   name : "conv2_2"
108 |   type : "Convolution"
109 |   bottom : "conv2_1"
110 |   top : "conv2_2"
111 |   convolution_param : {
112 |     engine: CAFFE
113 |     num_output : 128
114 |     pad : 1
115 |     kernel_size : 3
116 |   }
117 |   param : {
118 |     lr_mult : 1.0
119 |     decay_mult : 1.0
120 |   }
121 |   param : {
122 |     lr_mult : 2.0
123 |     decay_mult : 0.0
124 |   }
125 | }
126 | layer : {
127 |   name : "relu2_2"
128 |   type : "ReLU"
129 |   bottom : "conv2_2"
130 |   top : "conv2_2"
131 | }
132 | layer : {
133 |   name : "pool2"
134 |   type : "Pooling"
135 |   bottom : "conv2_2"
136 |   top : "pool2"
137 |   pooling_param : {
138 |     pool : MAX
139 |     kernel_size : 2
140 |     stride : 2
141 |   }
142 | }
143 | layer : {
144 |   name : "conv3_1"
145 |   type : "Convolution"
146 |   bottom : "pool2"
147 |   top : "conv3_1"
148 |   convolution_param : {
149 |     engine: CAFFE
150 |     num_output : 256
151 |     pad : 1
152 |     kernel_size : 3
153 |   }
154 |   param : {
155 |     lr_mult : 1.0
156 |     decay_mult : 1.0
157 |   }
158 |   param : {
159 |     lr_mult : 2.0
160 |     decay_mult : 0.0
161 |   }
162 | }
163 | layer : {
164 |   name : "relu3_1"
165 |   type : "ReLU"
166 |   bottom : "conv3_1"
167 |   top : "conv3_1"
168 | }
169 | layer : {
170 |   name : "conv3_2"
171 |   type : "Convolution"
172 |   bottom : "conv3_1"
173 |   top : "conv3_2"
174 |   convolution_param : {
175 |     engine: CAFFE
176 |     num_output : 256
177 |     pad : 1
178 |     kernel_size : 3
179 |   }
180 |   param : {
181 |     lr_mult : 1.0
182 |     decay_mult : 1.0
183 |   }
184 |   param : {
185 |     lr_mult : 2.0
186 |     decay_mult : 0.0
187 |   }
188 | }
189 | layer : {
190 |   name : "relu3_2"
191 |   type : "ReLU"
192 |   bottom : "conv3_2"
193 |   top : "conv3_2"
194 | }
195 | layer : {
196 |   name : "conv3_3"
197 |   type : "Convolution"
198 |   bottom : "conv3_2"
199 |   top : "conv3_3"
200 |   convolution_param : {
201 |     engine: CAFFE
202 |     num_output : 256
203 |     pad : 1
204 |     kernel_size : 3
205 |   }
206 |   param : {
207 |     lr_mult : 1.0
208 |     decay_mult : 1.0
209 |   }
210 |   param : {
211 |     lr_mult : 2.0
212 |     decay_mult : 0.0
213 |   }
214 | }
215 | layer : {
216 |   name : "relu3_3"
217 |   type : "ReLU"
218 |   bottom : "conv3_3"
219 |   top : "conv3_3"
220 | }
221 | layer : {
222 |   name : "pool3"
223 |   type : "Pooling"
224 |   bottom : "conv3_3"
225 |   top : "pool3"
226 |   pooling_param : {
227 |     pool : MAX
228 |     kernel_size : 2
229 |     stride : 2
230 |   }
231 | }
232 | layer : {
233 |   name : "conv4_1"
234 |   type : "Convolution"
235 |   bottom : "pool3"
236 |   top : "conv4_1"
237 |   convolution_param : {
238 |     engine: CAFFE
239 |     num_output : 512
240 |     pad : 1
241 |     kernel_size : 3
242 |   }
243 |   param : {
244 |     lr_mult : 1.0
245 |     decay_mult : 1.0
246 |   }
247 |   param : {
248 |     lr_mult : 2.0
249 |     decay_mult : 0.0
250 |   }
251 | }
252 | layer : {
253 |   name : "relu4_1"
254 |   type : "ReLU"
255 |   bottom : "conv4_1"
256 |   top : "conv4_1"
257 | }
258 | layer : {
259 |   name : "conv4_2"
260 |   type : "Convolution"
261 |   bottom : "conv4_1"
262 |   top : "conv4_2"
263 |   convolution_param : {
264 |     engine: CAFFE
265 |     num_output : 512
266 |     pad : 1
267 |     kernel_size : 3
268 |   }
269 |   param : {
270 |     lr_mult : 1.0
271 |     decay_mult : 1.0
272 |   }
273 |   param : {
274 |     lr_mult : 2.0
275 |     decay_mult : 0.0
276 |   }
277 | }
278 | layer : {
279 |   name : "relu4_2"
280 |   type : "ReLU"
281 |   bottom : "conv4_2"
282 |   top : "conv4_2"
283 | }
284 | layer : {
285 |   name : "conv4_3"
286 |   type : "Convolution"
287 |   bottom : "conv4_2"
288 |   top : "conv4_3"
289 |   convolution_param : {
290 |     engine: CAFFE
291 |     num_output : 512
292 |     pad : 1
293 |     kernel_size : 3
294 |   }
295 |   param : {
296 |     lr_mult : 1.0
297 |     decay_mult : 1.0
298 |   }
299 |   param : {
300 |     lr_mult : 2.0
301 |     decay_mult : 0.0
302 |   }
303 | }
304 | layer : {
305 |   name : "relu4_3"
306 |   type : "ReLU"
307 |   bottom : "conv4_3"
308 |   top : "conv4_3"
309 | }
310 | layer : {
311 |   name : "pool4"
312 |   type : "Pooling"
313 |   bottom : "conv4_3"
314 |   top : "pool4"
315 |   pooling_param : {
316 |     pool : MAX
317 |     kernel_size : 2
318 |     stride : 2
319 |   }
320 | }
321 | layer : {
322 |   name : "conv5_1"
323 |   type : "Convolution"
324 |   bottom : "pool4"
325 |   top : "conv5_1"
326 |   convolution_param : {
327 |     engine: CAFFE
328 |     num_output : 512
329 |     pad : 1
330 |     kernel_size : 3
331 |   }
332 |   param : {
333 |     lr_mult : 1.0
334 |     decay_mult : 1.0
335 |   }
336 |   param : {
337 |     lr_mult : 2.0
338 |     decay_mult : 0.0
339 |   }
340 | }
341 | layer : {
342 |   name : "relu5_1"
343 |   type : "ReLU"
344 |   bottom : "conv5_1"
345 |   top : "conv5_1"
346 | }
347 | layer : {
348 |   name : "conv5_2"
349 |   type : "Convolution"
350 |   bottom : "conv5_1"
351 |   top : "conv5_2"
352 |   convolution_param : {
353 |     engine: CAFFE
354 |     num_output : 512
355 |     pad : 1
356 |     kernel_size : 3
357 |   }
358 |   param : {
359 |     lr_mult : 1.0
360 |     decay_mult : 1.0
361 |   }
362 |   param : {
363 |     lr_mult : 2.0
364 |     decay_mult : 0.0
365 |   }
366 | }
367 | layer : {
368 |   name : "relu5_2"
369 |   type : "ReLU"
370 |   bottom : "conv5_2"
371 |   top : "conv5_2"
372 | }
373 | layer : {
374 |   name : "conv5_3"
375 |   type : "Convolution"
376 |   bottom : "conv5_2"
377 |   top : "conv5_3"
378 |   convolution_param : {
379 |     engine: CAFFE
380 |     num_output : 512
381 |     pad : 1
382 |     kernel_size : 3
383 |   }
384 |   param : {
385 |     lr_mult : 1.0
386 |     decay_mult : 1.0
387 |   }
388 |   param : {
389 |     lr_mult : 2.0
390 |     decay_mult : 0.0
391 |   }
392 | }
393 | layer : {
394 |   name : "relu5_3"
395 |   type : "ReLU"
396 |   bottom : "conv5_3"
397 |   top : "conv5_3"
398 | }
399 | layer : {
400 |   name : "pool5"
401 |   type : "Pooling"
402 |   bottom : "conv5_3"
403 |   top : "pool5"
404 |   pooling_param : {
405 |     pool : MAX
406 |     kernel_size : 2
407 |     stride : 2
408 |   }
409 | }
410 | layer : {
411 |   name : "conv6"
412 |   bottom : "pool5"
413 |   top : "conv6"
414 |   type: "Convolution"
415 |   convolution_param {
416 |     engine: CAFFE
417 |     num_output : 4096
418 |     kernel_size: 7
419 |     weight_filler {
420 |       type: "gaussian"
421 |       std: 0.01
422 |     }
423 |     bias_filler {
424 |       type: "constant"
425 |       value: 0.1
426 |     }
427 |   }
428 |   param : {
429 |     lr_mult : 1.0
430 |     decay_mult : 1.0
431 |   }
432 |   param : {
433 |     lr_mult : 2.0
434 |     decay_mult : 0.0
435 |   }
436 | }
437 | layer : {
438 |   name : "relu6"
439 |   type : "ReLU"
440 |   bottom : "conv6"
441 |   top : "conv6"
442 | }
443 | layer {
444 |  name: "drop6"
445 |  type: "Dropout"
446 |  bottom: "conv6"
447 |  top: "conv6"
448 |  dropout_param {
449 |    dropout_ratio: 0.5
450 |  }
451 | }
452 | layer : {
453 |   name : "conv7"
454 |   bottom : "conv6"
455 |   top : "conv7"
456 |   type: "Convolution"
457 |   convolution_param {
458 |     engine: CAFFE
459 |     num_output : 4096
460 |     kernel_size: 1
461 |     weight_filler {
462 |       type: "gaussian"
463 |       std: 0.01
464 |     }
465 |     bias_filler {
466 |       type: "constant"
467 |       value: 0.1
468 |     }
469 |   }
470 |   param : {
471 |     lr_mult : 1.0
472 |     decay_mult : 1.0
473 |   }
474 |   param : {
475 |     lr_mult : 2.0
476 |     decay_mult : 0.0
477 |   }
478 | }
479 | layer : {
480 |   name : "relu7"
481 |   type : "ReLU"
482 |   bottom : "conv7"
483 |   top : "conv7"
484 | }
485 | layer {
486 |   name: "drop7"
487 |   type: "Dropout"
488 |   bottom: "conv7"
489 |   top: "conv7"
490 |   dropout_param {
491 |     dropout_ratio: 0.5
492 |   }
493 | }
494 | layer {
495 |   name: 'conv8'
496 |   bottom: 'conv7'
497 |   top: 'conv8'
498 |   type: "Convolution"
499 |   convolution_param {
500 |     engine: CAFFE
501 |     num_output: 21
502 |     kernel_size: 1
503 |     weight_filler {
504 |       type: "gaussian"
505 |       std: 0.01
506 |     }
507 |   }
508 |   param : {
509 |     lr_mult : 1.0
510 |     decay_mult : 1.0
511 |   }
512 |   param : {
513 |     lr_mult : 2.0
514 |     decay_mult : 0.0
515 |   }
516 | }
517 | 
518 | # ========== Network architecture end ==================
519 | 
520 | layer { type: 'Python' name: 'weak_loss' bottom: 'conv8' bottom: 'indicator' bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 
521 |   top: 'loss'
522 |   python_param { module: 'python_layers' layer: 'WeakLoss' }
523 |   include { phase: TRAIN }
524 |   loss_weight: 1 }
525 | 
526 | layer {
527 |   name: 'upsample'
528 |   bottom: 'conv8'
529 |   top: 'score'
530 |   type: "Deconvolution"
531 |   convolution_param {
532 |     engine: CAFFE
533 |     group: 21
534 |     num_output: 21
535 |     kernel_size: 64
536 |     stride: 32
537 |     weight_filler: { type: "bilinear_upsampling" }
538 |   }
539 |   param : {
540 |     lr_mult : 0
541 |     decay_mult : 0
542 |   }
543 |   include { phase: TEST }
544 | }
545 | 
546 | layer { name: 'crop'  type: 'Crop' bottom: 'score' bottom: 'data' top: 'score_crop'
547 |   include { phase: TEST } 
548 |   }
549 | 
550 | # =============Dense CRF===========================
551 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 
552 | 
553 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf'
554 |   # Untuned Default Params
555 |   # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6
556 |   #   x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 }
557 |   # Tuned Deeplab Params
558 |   densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15
559 |    x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 }
560 |   include { phase: TEST } }
561 | # ===================================================
562 | 


--------------------------------------------------------------------------------
/models/fcn_32s/solver_32s.prototxt:
--------------------------------------------------------------------------------
 1 | # Make sure that Loss is NOT-NORMALIZED by number of pixels i.e. self.normalization=FALSE in src/python_layers.py
 2 | base_lr: 1e-8
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 10000
 6 | display: 100
 7 | max_iter: 40000
 8 | momentum: 0.99
 9 | weight_decay: 0.0   #0.0000005
10 | 


--------------------------------------------------------------------------------
/models/fcn_32s/train_32s.prototxt:
--------------------------------------------------------------------------------
  1 | name : "FCN-32s"
  2 | 
  3 | # =========== Train Data Layers =========================
  4 | 
  5 | layer { type: 'Data' name: 'data' top: 'data'
  6 |   data_param {
  7 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_train_lmdb'
  8 |     batch_size: 1 backend: LMDB }
  9 |   transform_param { mean_value: 104.00698793 mean_value: 116.66876762
 10 |     mean_value: 122.67891434 }
 11 |   include { phase: TRAIN } }
 12 | 
 13 | layer { type: 'HDF5Data' name: 'gt'
 14 |   top: 'indicator' top: 'indicator_0.01' top: 'indicator_0.05' top: 'indicator_0.10' 
 15 |   top: 'flag_1' top: 'flag_3' top: 'flag_5' top: 'flag_10' top: 'flag_50' top: 'flag_100' top: 'flag_200'
 16 |   hdf5_data_param {
 17 |     source: "/mnt/a/pathak/fcn_mil_cache/VOC2012/indicator_train.txt"
 18 |     batch_size: 1 }
 19 |   include { phase: TRAIN } }
 20 | 
 21 | layer { type: 'Silence' 
 22 |   bottom: 'indicator' bottom: 'indicator_0.01' bottom: 'indicator_0.05' bottom: 'indicator_0.10'
 23 |   bottom: 'flag_1' bottom: 'flag_3' bottom: 'flag_5' bottom: 'flag_10' bottom: 'flag_50' bottom: 'flag_100' bottom: 'flag_200'
 24 |   include { phase: TRAIN } }
 25 | 
 26 | # =========== Test Data Layers =========================
 27 | 
 28 | layer { type: 'Data' name: 'data' top: 'data'
 29 |   data_param {
 30 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb'
 31 |     # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest'
 32 |     batch_size: 1 backend: LMDB }
 33 |   transform_param { mean_value: 104.00698793 mean_value: 116.66876762
 34 |     mean_value: 122.67891434 }
 35 |   include { phase: TEST } }
 36 | 
 37 | layer { type: 'Data' name: 'data-orig' top: 'data-orig'
 38 |   data_param {
 39 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb'
 40 |     # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest'
 41 |     batch_size: 1 backend: LMDB }
 42 |   include { phase: TEST } }
 43 | 
 44 | layer { type: 'Data' name: 'gt' top: 'gt'
 45 |   data_param {
 46 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/segmentation_class_val_lmdb'
 47 |     batch_size: 1 backend: LMDB }
 48 |   include { phase: TEST } }
 49 | 
 50 | # ========== Network architecture begin ================
 51 | 
 52 | layer : {
 53 |   name : "conv1_1"
 54 |   type : "Convolution"
 55 |   bottom : "data"
 56 |   top : "conv1_1"
 57 |   convolution_param : {
 58 |     engine: CAFFE
 59 |     num_output : 64
 60 |     pad : 100
 61 |     kernel_size : 3
 62 |   }
 63 |   param : {
 64 |     lr_mult : 1.0
 65 |     decay_mult : 1.0
 66 |   }
 67 |   param : {
 68 |     lr_mult : 2.0
 69 |     decay_mult : 0.0
 70 |   }
 71 | }
 72 | layer : {
 73 |   name : "relu1_1"
 74 |   type : "ReLU"
 75 |   bottom : "conv1_1"
 76 |   top : "conv1_1"
 77 | }
 78 | layer : {
 79 |   name : "conv1_2"
 80 |   type : "Convolution"
 81 |   bottom : "conv1_1"
 82 |   top : "conv1_2"
 83 |   convolution_param : {
 84 |     engine: CAFFE
 85 |     num_output : 64
 86 |     pad : 1
 87 |     kernel_size : 3
 88 |   }
 89 |   param : {
 90 |     lr_mult : 1.0
 91 |     decay_mult : 1.0
 92 |   }
 93 |   param : {
 94 |     lr_mult : 2.0
 95 |     decay_mult : 0.0
 96 |   }
 97 | }
 98 | layer : {
 99 |   name : "relu1_2"
100 |   type : "ReLU"
101 |   bottom : "conv1_2"
102 |   top : "conv1_2"
103 | }
104 | layer : {
105 |   name : "pool1"
106 |   type : "Pooling"
107 |   bottom : "conv1_2"
108 |   top : "pool1"
109 |   pooling_param : {
110 |     pool : MAX
111 |     kernel_size : 2
112 |     stride : 2
113 |   }
114 | }
115 | layer : {
116 |   name : "conv2_1"
117 |   type : "Convolution"
118 |   bottom : "pool1"
119 |   top : "conv2_1"
120 |   convolution_param : {
121 |     engine: CAFFE
122 |     num_output : 128
123 |     pad : 1
124 |     kernel_size : 3
125 |   }
126 |   param : {
127 |     lr_mult : 1.0
128 |     decay_mult : 1.0
129 |   }
130 |   param : {
131 |     lr_mult : 2.0
132 |     decay_mult : 0.0
133 |   }
134 | }
135 | layer : {
136 |   name : "relu2_1"
137 |   type : "ReLU"
138 |   bottom : "conv2_1"
139 |   top : "conv2_1"
140 | }
141 | layer : {
142 |   name : "conv2_2"
143 |   type : "Convolution"
144 |   bottom : "conv2_1"
145 |   top : "conv2_2"
146 |   convolution_param : {
147 |     engine: CAFFE
148 |     num_output : 128
149 |     pad : 1
150 |     kernel_size : 3
151 |   }
152 |   param : {
153 |     lr_mult : 1.0
154 |     decay_mult : 1.0
155 |   }
156 |   param : {
157 |     lr_mult : 2.0
158 |     decay_mult : 0.0
159 |   }
160 | }
161 | layer : {
162 |   name : "relu2_2"
163 |   type : "ReLU"
164 |   bottom : "conv2_2"
165 |   top : "conv2_2"
166 | }
167 | layer : {
168 |   name : "pool2"
169 |   type : "Pooling"
170 |   bottom : "conv2_2"
171 |   top : "pool2"
172 |   pooling_param : {
173 |     pool : MAX
174 |     kernel_size : 2
175 |     stride : 2
176 |   }
177 | }
178 | layer : {
179 |   name : "conv3_1"
180 |   type : "Convolution"
181 |   bottom : "pool2"
182 |   top : "conv3_1"
183 |   convolution_param : {
184 |     engine: CAFFE
185 |     num_output : 256
186 |     pad : 1
187 |     kernel_size : 3
188 |   }
189 |   param : {
190 |     lr_mult : 1.0
191 |     decay_mult : 1.0
192 |   }
193 |   param : {
194 |     lr_mult : 2.0
195 |     decay_mult : 0.0
196 |   }
197 | }
198 | layer : {
199 |   name : "relu3_1"
200 |   type : "ReLU"
201 |   bottom : "conv3_1"
202 |   top : "conv3_1"
203 | }
204 | layer : {
205 |   name : "conv3_2"
206 |   type : "Convolution"
207 |   bottom : "conv3_1"
208 |   top : "conv3_2"
209 |   convolution_param : {
210 |     engine: CAFFE
211 |     num_output : 256
212 |     pad : 1
213 |     kernel_size : 3
214 |   }
215 |   param : {
216 |     lr_mult : 1.0
217 |     decay_mult : 1.0
218 |   }
219 |   param : {
220 |     lr_mult : 2.0
221 |     decay_mult : 0.0
222 |   }
223 | }
224 | layer : {
225 |   name : "relu3_2"
226 |   type : "ReLU"
227 |   bottom : "conv3_2"
228 |   top : "conv3_2"
229 | }
230 | layer : {
231 |   name : "conv3_3"
232 |   type : "Convolution"
233 |   bottom : "conv3_2"
234 |   top : "conv3_3"
235 |   convolution_param : {
236 |     engine: CAFFE
237 |     num_output : 256
238 |     pad : 1
239 |     kernel_size : 3
240 |   }
241 |   param : {
242 |     lr_mult : 1.0
243 |     decay_mult : 1.0
244 |   }
245 |   param : {
246 |     lr_mult : 2.0
247 |     decay_mult : 0.0
248 |   }
249 | }
250 | layer : {
251 |   name : "relu3_3"
252 |   type : "ReLU"
253 |   bottom : "conv3_3"
254 |   top : "conv3_3"
255 | }
256 | layer : {
257 |   name : "pool3"
258 |   type : "Pooling"
259 |   bottom : "conv3_3"
260 |   top : "pool3"
261 |   pooling_param : {
262 |     pool : MAX
263 |     kernel_size : 2
264 |     stride : 2
265 |   }
266 | }
267 | layer : {
268 |   name : "conv4_1"
269 |   type : "Convolution"
270 |   bottom : "pool3"
271 |   top : "conv4_1"
272 |   convolution_param : {
273 |     engine: CAFFE
274 |     num_output : 512
275 |     pad : 1
276 |     kernel_size : 3
277 |   }
278 |   param : {
279 |     lr_mult : 1.0
280 |     decay_mult : 1.0
281 |   }
282 |   param : {
283 |     lr_mult : 2.0
284 |     decay_mult : 0.0
285 |   }
286 | }
287 | layer : {
288 |   name : "relu4_1"
289 |   type : "ReLU"
290 |   bottom : "conv4_1"
291 |   top : "conv4_1"
292 | }
293 | layer : {
294 |   name : "conv4_2"
295 |   type : "Convolution"
296 |   bottom : "conv4_1"
297 |   top : "conv4_2"
298 |   convolution_param : {
299 |     engine: CAFFE
300 |     num_output : 512
301 |     pad : 1
302 |     kernel_size : 3
303 |   }
304 |   param : {
305 |     lr_mult : 1.0
306 |     decay_mult : 1.0
307 |   }
308 |   param : {
309 |     lr_mult : 2.0
310 |     decay_mult : 0.0
311 |   }
312 | }
313 | layer : {
314 |   name : "relu4_2"
315 |   type : "ReLU"
316 |   bottom : "conv4_2"
317 |   top : "conv4_2"
318 | }
319 | layer : {
320 |   name : "conv4_3"
321 |   type : "Convolution"
322 |   bottom : "conv4_2"
323 |   top : "conv4_3"
324 |   convolution_param : {
325 |     engine: CAFFE
326 |     num_output : 512
327 |     pad : 1
328 |     kernel_size : 3
329 |   }
330 |   param : {
331 |     lr_mult : 1.0
332 |     decay_mult : 1.0
333 |   }
334 |   param : {
335 |     lr_mult : 2.0
336 |     decay_mult : 0.0
337 |   }
338 | }
339 | layer : {
340 |   name : "relu4_3"
341 |   type : "ReLU"
342 |   bottom : "conv4_3"
343 |   top : "conv4_3"
344 | }
345 | layer : {
346 |   name : "pool4"
347 |   type : "Pooling"
348 |   bottom : "conv4_3"
349 |   top : "pool4"
350 |   pooling_param : {
351 |     pool : MAX
352 |     kernel_size : 2
353 |     stride : 2
354 |   }
355 | }
356 | layer : {
357 |   name : "conv5_1"
358 |   type : "Convolution"
359 |   bottom : "pool4"
360 |   top : "conv5_1"
361 |   convolution_param : {
362 |     engine: CAFFE
363 |     num_output : 512
364 |     pad : 1
365 |     kernel_size : 3
366 |   }
367 |   param : {
368 |     lr_mult : 1.0
369 |     decay_mult : 1.0
370 |   }
371 |   param : {
372 |     lr_mult : 2.0
373 |     decay_mult : 0.0
374 |   }
375 | }
376 | layer : {
377 |   name : "relu5_1"
378 |   type : "ReLU"
379 |   bottom : "conv5_1"
380 |   top : "conv5_1"
381 | }
382 | layer : {
383 |   name : "conv5_2"
384 |   type : "Convolution"
385 |   bottom : "conv5_1"
386 |   top : "conv5_2"
387 |   convolution_param : {
388 |     engine: CAFFE
389 |     num_output : 512
390 |     pad : 1
391 |     kernel_size : 3
392 |   }
393 |   param : {
394 |     lr_mult : 1.0
395 |     decay_mult : 1.0
396 |   }
397 |   param : {
398 |     lr_mult : 2.0
399 |     decay_mult : 0.0
400 |   }
401 | }
402 | layer : {
403 |   name : "relu5_2"
404 |   type : "ReLU"
405 |   bottom : "conv5_2"
406 |   top : "conv5_2"
407 | }
408 | layer : {
409 |   name : "conv5_3"
410 |   type : "Convolution"
411 |   bottom : "conv5_2"
412 |   top : "conv5_3"
413 |   convolution_param : {
414 |     engine: CAFFE
415 |     num_output : 512
416 |     pad : 1
417 |     kernel_size : 3
418 |   }
419 |   param : {
420 |     lr_mult : 1.0
421 |     decay_mult : 1.0
422 |   }
423 |   param : {
424 |     lr_mult : 2.0
425 |     decay_mult : 0.0
426 |   }
427 | }
428 | layer : {
429 |   name : "relu5_3"
430 |   type : "ReLU"
431 |   bottom : "conv5_3"
432 |   top : "conv5_3"
433 | }
434 | layer : {
435 |   name : "pool5"
436 |   type : "Pooling"
437 |   bottom : "conv5_3"
438 |   top : "pool5"
439 |   pooling_param : {
440 |     pool : MAX
441 |     kernel_size : 2
442 |     stride : 2
443 |   }
444 | }
445 | layer : {
446 |   name : "conv6"
447 |   bottom : "pool5"
448 |   top : "conv6"
449 |   type: "Convolution"
450 |   convolution_param {
451 |     engine: CAFFE
452 |     num_output : 4096
453 |     kernel_size: 7
454 |     weight_filler {
455 |       type: "gaussian"
456 |       std: 0.01
457 |     }
458 |     bias_filler {
459 |       type: "constant"
460 |       value: 0.1
461 |     }
462 |   }
463 |   param : {
464 |     lr_mult : 1.0
465 |     decay_mult : 1.0
466 |   }
467 |   param : {
468 |     lr_mult : 2.0
469 |     decay_mult : 0.0
470 |   }
471 | }
472 | layer : {
473 |   name : "relu6"
474 |   type : "ReLU"
475 |   bottom : "conv6"
476 |   top : "conv6"
477 | }
478 | layer {
479 |  name: "drop6"
480 |  type: "Dropout"
481 |  bottom: "conv6"
482 |  top: "conv6"
483 |  dropout_param {
484 |    dropout_ratio: 0.5
485 |  }
486 | }
487 | layer : {
488 |   name : "conv7"
489 |   bottom : "conv6"
490 |   top : "conv7"
491 |   type: "Convolution"
492 |   convolution_param {
493 |     engine: CAFFE
494 |     num_output : 4096
495 |     kernel_size: 1
496 |     weight_filler {
497 |       type: "gaussian"
498 |       std: 0.01
499 |     }
500 |     bias_filler {
501 |       type: "constant"
502 |       value: 0.1
503 |     }
504 |   }
505 |   param : {
506 |     lr_mult : 1.0
507 |     decay_mult : 1.0
508 |   }
509 |   param : {
510 |     lr_mult : 2.0
511 |     decay_mult : 0.0
512 |   }
513 | }
514 | layer : {
515 |   name : "relu7"
516 |   type : "ReLU"
517 |   bottom : "conv7"
518 |   top : "conv7"
519 | }
520 | layer {
521 |   name: "drop7"
522 |   type: "Dropout"
523 |   bottom: "conv7"
524 |   top: "conv7"
525 |   dropout_param {
526 |     dropout_ratio: 0.5
527 |   }
528 | }
529 | layer {
530 |   name: 'conv8'
531 |   bottom: 'conv7'
532 |   top: 'conv8'
533 |   type: "Convolution"
534 |   convolution_param {
535 |     engine: CAFFE
536 |     num_output: 21
537 |     kernel_size: 1
538 |     weight_filler {
539 |       type: "gaussian"
540 |       std: 0.01
541 |     }
542 |   }
543 |   param : {
544 |     lr_mult : 1.0
545 |     decay_mult : 1.0
546 |   }
547 |   param : {
548 |     lr_mult : 2.0
549 |     decay_mult : 0.0
550 |   }
551 | }
552 | 
553 | # ========== Network architecture end ==================
554 | 
555 | layer { type: 'Python' name: 'weak_loss' bottom: 'conv8' bottom: 'indicator' bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 
556 |   top: 'loss'
557 |   python_param { module: 'python_layers' layer: 'WeakLoss' }
558 |   include { phase: TRAIN }
559 |   loss_weight: 1 }
560 | 
561 | layer {
562 |   name: 'upsample'
563 |   bottom: 'conv8'
564 |   top: 'score'
565 |   type: "Deconvolution"
566 |   convolution_param {
567 |     engine: CAFFE
568 |     group: 21
569 |     num_output: 21
570 |     kernel_size: 64
571 |     stride: 32
572 |     weight_filler: { type: "bilinear_upsampling" }
573 |   }
574 |   param : {
575 |     lr_mult : 0
576 |     decay_mult : 0
577 |   }
578 |   include { phase: TEST }
579 | }
580 | 
581 | layer { name: 'crop'  type: 'Crop' bottom: 'score' bottom: 'data' top: 'score_crop'
582 |   include { phase: TEST } 
583 |   }
584 | 
585 | # =============Dense CRF===========================
586 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 
587 | 
588 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf'
589 |   # Untuned Default Params
590 |   # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6
591 |   #   x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 }
592 |   # Tuned Deeplab Params
593 |   densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15
594 |    x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 }
595 |   include { phase: TEST } }
596 | # ===================================================
597 | 


--------------------------------------------------------------------------------
/models/fcn_8s/deploy_8s.prototxt:
--------------------------------------------------------------------------------
  1 | name : "FCN-8s"
  2 | 
  3 | input: 'data'
  4 | input_dim: 1
  5 | input_dim: 3
  6 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
  7 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
  8 | 
  9 | input: 'data-orig'
 10 | input_dim: 1
 11 | input_dim: 3
 12 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
 13 | input_dim: 500   # Dummy Size. Can pass any size image. See demo.py
 14 | 
 15 | # ========== Network architecture begin ================
 16 | 
 17 | layer : {
 18 |   name : "conv1_1"
 19 |   type : "Convolution"
 20 |   bottom : "data"
 21 |   top : "conv1_1"
 22 |   convolution_param : {
 23 |     engine: CAFFE
 24 |     num_output : 64
 25 |     pad : 1
 26 |     kernel_size : 3
 27 |   }
 28 |   param : {
 29 |     lr_mult : 1.0
 30 |     decay_mult : 1.0
 31 |   }
 32 |   param : {
 33 |     lr_mult : 2.0
 34 |     decay_mult : 0.0
 35 |   }
 36 | }
 37 | layer : {
 38 |   name : "relu1_1"
 39 |   type : "ReLU"
 40 |   bottom : "conv1_1"
 41 |   top : "conv1_1"
 42 | }
 43 | layer : {
 44 |   name : "conv1_2"
 45 |   type : "Convolution"
 46 |   bottom : "conv1_1"
 47 |   top : "conv1_2"
 48 |   convolution_param : {
 49 |     engine: CAFFE
 50 |     num_output : 64
 51 |     pad : 1
 52 |     kernel_size : 3
 53 |   }
 54 |   param : {
 55 |     lr_mult : 1.0
 56 |     decay_mult : 1.0
 57 |   }
 58 |   param : {
 59 |     lr_mult : 2.0
 60 |     decay_mult : 0.0
 61 |   }
 62 | }
 63 | layer : {
 64 |   name : "relu1_2"
 65 |   type : "ReLU"
 66 |   bottom : "conv1_2"
 67 |   top : "conv1_2"
 68 | }
 69 | layer : {
 70 |   name : "pool1"
 71 |   type : "Pooling"
 72 |   bottom : "conv1_2"
 73 |   top : "pool1"
 74 |   pooling_param : {
 75 |     pool : MAX
 76 |     kernel_size : 3
 77 |     stride : 2
 78 |     pad : 1
 79 |   }
 80 | }
 81 | layer : {
 82 |   name : "conv2_1"
 83 |   type : "Convolution"
 84 |   bottom : "pool1"
 85 |   top : "conv2_1"
 86 |   convolution_param : {
 87 |     engine: CAFFE
 88 |     num_output : 128
 89 |     pad : 1
 90 |     kernel_size : 3
 91 |   }
 92 |   param : {
 93 |     lr_mult : 1.0
 94 |     decay_mult : 1.0
 95 |   }
 96 |   param : {
 97 |     lr_mult : 2.0
 98 |     decay_mult : 0.0
 99 |   }
100 | }
101 | layer : {
102 |   name : "relu2_1"
103 |   type : "ReLU"
104 |   bottom : "conv2_1"
105 |   top : "conv2_1"
106 | }
107 | layer : {
108 |   name : "conv2_2"
109 |   type : "Convolution"
110 |   bottom : "conv2_1"
111 |   top : "conv2_2"
112 |   convolution_param : {
113 |     engine: CAFFE
114 |     num_output : 128
115 |     pad : 1
116 |     kernel_size : 3
117 |   }
118 |   param : {
119 |     lr_mult : 1.0
120 |     decay_mult : 1.0
121 |   }
122 |   param : {
123 |     lr_mult : 2.0
124 |     decay_mult : 0.0
125 |   }
126 | }
127 | layer : {
128 |   name : "relu2_2"
129 |   type : "ReLU"
130 |   bottom : "conv2_2"
131 |   top : "conv2_2"
132 | }
133 | layer : {
134 |   name : "pool2"
135 |   type : "Pooling"
136 |   bottom : "conv2_2"
137 |   top : "pool2"
138 |   pooling_param : {
139 |     pool : MAX
140 |     kernel_size : 3
141 |     stride : 2
142 |     pad : 1
143 |   }
144 | }
145 | layer : {
146 |   name : "conv3_1"
147 |   type : "Convolution"
148 |   bottom : "pool2"
149 |   top : "conv3_1"
150 |   convolution_param : {
151 |     engine: CAFFE
152 |     num_output : 256
153 |     pad : 1
154 |     kernel_size : 3
155 |   }
156 |   param : {
157 |     lr_mult : 1.0
158 |     decay_mult : 1.0
159 |   }
160 |   param : {
161 |     lr_mult : 2.0
162 |     decay_mult : 0.0
163 |   }
164 | }
165 | layer : {
166 |   name : "relu3_1"
167 |   type : "ReLU"
168 |   bottom : "conv3_1"
169 |   top : "conv3_1"
170 | }
171 | layer : {
172 |   name : "conv3_2"
173 |   type : "Convolution"
174 |   bottom : "conv3_1"
175 |   top : "conv3_2"
176 |   convolution_param : {
177 |     engine: CAFFE
178 |     num_output : 256
179 |     pad : 1
180 |     kernel_size : 3
181 |   }
182 |   param : {
183 |     lr_mult : 1.0
184 |     decay_mult : 1.0
185 |   }
186 |   param : {
187 |     lr_mult : 2.0
188 |     decay_mult : 0.0
189 |   }
190 | }
191 | layer : {
192 |   name : "relu3_2"
193 |   type : "ReLU"
194 |   bottom : "conv3_2"
195 |   top : "conv3_2"
196 | }
197 | layer : {
198 |   name : "conv3_3"
199 |   type : "Convolution"
200 |   bottom : "conv3_2"
201 |   top : "conv3_3"
202 |   convolution_param : {
203 |     engine: CAFFE
204 |     num_output : 256
205 |     pad : 1
206 |     kernel_size : 3
207 |   }
208 |   param : {
209 |     lr_mult : 1.0
210 |     decay_mult : 1.0
211 |   }
212 |   param : {
213 |     lr_mult : 2.0
214 |     decay_mult : 0.0
215 |   }
216 | }
217 | layer : {
218 |   name : "relu3_3"
219 |   type : "ReLU"
220 |   bottom : "conv3_3"
221 |   top : "conv3_3"
222 | }
223 | layer : {
224 |   name : "pool3"
225 |   type : "Pooling"
226 |   bottom : "conv3_3"
227 |   top : "pool3"
228 |   pooling_param : {
229 |     pool : MAX
230 |     kernel_size : 3
231 |     stride : 2
232 |     pad : 1
233 |   }
234 | }
235 | layer : {
236 |   name : "conv4_1"
237 |   type : "Convolution"
238 |   bottom : "pool3"
239 |   top : "conv4_1"
240 |   convolution_param : {
241 |     engine: CAFFE
242 |     num_output : 512
243 |     pad : 1
244 |     kernel_size : 3
245 |   }
246 |   param : {
247 |     lr_mult : 1.0
248 |     decay_mult : 1.0
249 |   }
250 |   param : {
251 |     lr_mult : 2.0
252 |     decay_mult : 0.0
253 |   }
254 | }
255 | layer : {
256 |   name : "relu4_1"
257 |   type : "ReLU"
258 |   bottom : "conv4_1"
259 |   top : "conv4_1"
260 | }
261 | layer : {
262 |   name : "conv4_2"
263 |   type : "Convolution"
264 |   bottom : "conv4_1"
265 |   top : "conv4_2"
266 |   convolution_param : {
267 |     engine: CAFFE
268 |     num_output : 512
269 |     pad : 1
270 |     kernel_size : 3
271 |   }
272 |   param : {
273 |     lr_mult : 1.0
274 |     decay_mult : 1.0
275 |   }
276 |   param : {
277 |     lr_mult : 2.0
278 |     decay_mult : 0.0
279 |   }
280 | }
281 | layer : {
282 |   name : "relu4_2"
283 |   type : "ReLU"
284 |   bottom : "conv4_2"
285 |   top : "conv4_2"
286 | }
287 | layer : {
288 |   name : "conv4_3"
289 |   type : "Convolution"
290 |   bottom : "conv4_2"
291 |   top : "conv4_3"
292 |   convolution_param : {
293 |     engine: CAFFE
294 |     num_output : 512
295 |     pad : 1
296 |     kernel_size : 3
297 |   }
298 |   param : {
299 |     lr_mult : 1.0
300 |     decay_mult : 1.0
301 |   }
302 |   param : {
303 |     lr_mult : 2.0
304 |     decay_mult : 0.0
305 |   }
306 | }
307 | layer : {
308 |   name : "relu4_3"
309 |   type : "ReLU"
310 |   bottom : "conv4_3"
311 |   top : "conv4_3"
312 | }
313 | layer : {
314 |   name : "pool4"
315 |   type : "Pooling"
316 |   bottom : "conv4_3"
317 |   top : "pool4"
318 |   pooling_param : {
319 |     pool : MAX
320 |     kernel_size : 3
321 |     # stride : 2
322 |     stride : 1
323 |     pad : 1
324 |   }
325 | }
326 | layer : {
327 |   name : "conv5_1"
328 |   type : "Convolution"
329 |   bottom : "pool4"
330 |   top : "conv5_1"
331 |   convolution_param : {
332 |     engine: CAFFE
333 |     num_output : 512
334 |     #pad: 1
335 |     pad: 2
336 |     kernel_size : 3
337 |     hole: 2
338 |   }
339 |   param : {
340 |     lr_mult : 1.0
341 |     decay_mult : 1.0
342 |   }
343 |   param : {
344 |     lr_mult : 2.0
345 |     decay_mult : 0.0
346 |   }
347 | }
348 | layer : {
349 |   name : "relu5_1"
350 |   type : "ReLU"
351 |   bottom : "conv5_1"
352 |   top : "conv5_1"
353 | }
354 | layer : {
355 |   name : "conv5_2"
356 |   type : "Convolution"
357 |   bottom : "conv5_1"
358 |   top : "conv5_2"
359 |   convolution_param : {
360 |     engine: CAFFE
361 |     num_output : 512
362 |     #pad: 1
363 |     pad: 2
364 |     kernel_size : 3
365 |     hole: 2
366 |   }
367 |   param : {
368 |     lr_mult : 1.0
369 |     decay_mult : 1.0
370 |   }
371 |   param : {
372 |     lr_mult : 2.0
373 |     decay_mult : 0.0
374 |   }
375 | }
376 | layer : {
377 |   name : "relu5_2"
378 |   type : "ReLU"
379 |   bottom : "conv5_2"
380 |   top : "conv5_2"
381 | }
382 | layer : {
383 |   name : "conv5_3"
384 |   type : "Convolution"
385 |   bottom : "conv5_2"
386 |   top : "conv5_3"
387 |   convolution_param : {
388 |     engine: CAFFE
389 |     num_output : 512
390 |     #pad: 1
391 |     pad: 2
392 |     kernel_size : 3
393 |     hole: 2
394 |   }
395 |   param : {
396 |     lr_mult : 1.0
397 |     decay_mult : 1.0
398 |   }
399 |   param : {
400 |     lr_mult : 2.0
401 |     decay_mult : 0.0
402 |   }
403 | }
404 | layer : {
405 |   name : "relu5_3"
406 |   type : "ReLU"
407 |   bottom : "conv5_3"
408 |   top : "conv5_3"
409 | }
410 | layer : {
411 |   name : "pool5"
412 |   type : "Pooling"
413 |   bottom : "conv5_3"
414 |   top : "pool5"
415 |   pooling_param : {
416 |     pool : MAX
417 |     kernel_size : 3
418 |     # stride : 2
419 |     stride : 1
420 |     pad : 1
421 |   }
422 | }
423 | layer : {
424 |   name : "fc6"
425 |   bottom : "pool5"
426 |   top : "fc6"
427 |   type: "Convolution"
428 |   convolution_param {
429 |     num_output: 4096
430 |     engine: CAFFE
431 |     pad: 6
432 |     kernel_size: 4
433 |     hole: 4
434 |   }
435 |   param : {
436 |     lr_mult : 1.0
437 |     decay_mult : 1.0
438 |   }
439 |   param : {
440 |     lr_mult : 2.0
441 |     decay_mult : 0.0
442 |   }
443 | }
444 | layer : {
445 |   name : "relu6"
446 |   type : "ReLU"
447 |   bottom : "fc6"
448 |   top : "fc6"
449 | }
450 | layer {
451 |   name: "drop6"
452 |   type: "Dropout"
453 |   bottom: "fc6"
454 |   top: "fc6"
455 |   dropout_param {
456 |     dropout_ratio: 0.5
457 |   }
458 | }
459 | layer : {
460 |   name : "fc7"
461 |   bottom : "fc6"
462 |   top : "fc7"
463 |   type: "Convolution"
464 |   convolution_param {
465 |     engine: CAFFE
466 |     num_output : 4096
467 |     kernel_size: 1
468 |   }
469 |   param : {
470 |     lr_mult : 1.0
471 |     decay_mult : 1.0
472 |   }
473 |   param : {
474 |     lr_mult : 2.0
475 |     decay_mult : 0.0
476 |   }
477 | }
478 | layer : {
479 |   name : "relu7"
480 |   type : "ReLU"
481 |   bottom : "fc7"
482 |   top : "fc7"
483 | }
484 | layer {
485 |   name: "drop7"
486 |   type: "Dropout"
487 |   bottom: "fc7"
488 |   top: "fc7"
489 |   dropout_param {
490 |     dropout_ratio: 0.5
491 |   }
492 | }
493 | layer {
494 |   name: 'fc8_voc12'
495 |   bottom: 'fc7'
496 |   top: 'fc8'
497 |   type: "Convolution"
498 |   convolution_param {
499 |     engine: CAFFE
500 |     num_output: 21
501 |     kernel_size: 1
502 |     weight_filler {
503 |       type: "gaussian"
504 |       std: 0.01
505 |     }
506 |     bias_filler {
507 |       type: "constant"
508 |       value: 0
509 |     }
510 |   }
511 |   param : {
512 |     lr_mult : 1.0
513 |     decay_mult : 1.0
514 |   }
515 |   param : {
516 |     lr_mult : 2.0
517 |     decay_mult : 0.0
518 |   }
519 | }
520 | 
521 | # ========== Network architecture end ================
522 | 
523 | layer {
524 |   type: 'Python'
525 |   name: 'weak_loss'
526 |   bottom: 'fc8'
527 |   bottom: 'indicator'
528 |   bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 
529 |   top: 'loss'
530 |   python_param {
531 |     module: 'python_layers'
532 |     layer: 'WeakLoss'
533 |   }
534 |   include { phase: TRAIN }
535 |   loss_weight: 1
536 | }
537 | 
538 | layer {
539 |   name: 'upsample'
540 |   bottom: 'fc8'
541 |   top: 'score'
542 |   type: "Deconvolution"
543 |   convolution_param {
544 |     engine: CAFFE
545 |     group: 21
546 |     num_output: 21
547 |     kernel_size: 15
548 |     stride: 8
549 |     weight_filler: { type: "bilinear_upsampling" }
550 |   }
551 |   param : {
552 |     lr_mult : 0
553 |     decay_mult : 0
554 |   }
555 |   include { phase: TEST }
556 | }
557 | layer {
558 |   name: 'crop' 
559 |   type: 'Crop'
560 |   bottom: 'score'
561 |   bottom: 'data'
562 |   top: 'score_crop'
563 |   include { phase: TEST }
564 | }
565 | 
566 | # =============Dense CRF===========================
567 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 
568 | 
569 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf'
570 |   # Untuned Default Params
571 |   # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6
572 |   #   x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 }
573 |   # Tuned Deeplab Params
574 |   densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15
575 |    x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 }
576 |   include { phase: TEST } }
577 | # ===================================================
578 | 


--------------------------------------------------------------------------------
/models/fcn_8s/solver_8s.prototxt:
--------------------------------------------------------------------------------
 1 | # Make sure that Loss is NORMALIZED by number of pixels i.e. self.normalization=TRUE in src/python_layers.py
 2 | lr_policy: "step"
 3 | gamma: 0.1
 4 | stepsize: 40000
 5 | base_lr: 1e-6
 6 | display: 20
 7 | max_iter: 35000
 8 | momentum: 0.99
 9 | weight_decay: 0.0000005
10 | snapshot: 30000
11 | 


--------------------------------------------------------------------------------
/models/fcn_8s/train_8s.prototxt:
--------------------------------------------------------------------------------
  1 | name : "FCN-8s"
  2 | 
  3 | # =========== Train Data Layers =========================
  4 | 
  5 | layer { type: 'Data' name: 'data' top: 'data'
  6 |   data_param {
  7 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_train_lmdb'
  8 |     batch_size: 1 backend: LMDB }
  9 |   transform_param { mean_value: 104.00698793 mean_value: 116.66876762
 10 |     mean_value: 122.67891434 }
 11 |   include { phase: TRAIN } }
 12 | 
 13 | layer { type: 'HDF5Data' name: 'gt'
 14 |   top: 'indicator' top: 'indicator_0.01' top: 'indicator_0.05' top: 'indicator_0.10' 
 15 |   top: 'flag_1' top: 'flag_3' top: 'flag_5' top: 'flag_10' top: 'flag_50' top: 'flag_100' top: 'flag_200'
 16 |   hdf5_data_param {
 17 |     source: "/mnt/a/pathak/fcn_mil_cache/VOC2012/indicator_train.txt"
 18 |     batch_size: 1 }
 19 |   include { phase: TRAIN } }
 20 | 
 21 | layer { type: 'Silence' 
 22 |   bottom: 'indicator' bottom: 'indicator_0.01' bottom: 'indicator_0.05' bottom: 'indicator_0.10'
 23 |   bottom: 'flag_1' bottom: 'flag_3' bottom: 'flag_5' bottom: 'flag_10' bottom: 'flag_50' bottom: 'flag_100' bottom: 'flag_200'
 24 |   include { phase: TRAIN } }
 25 | 
 26 | # =========== Test Data Layers =========================
 27 | 
 28 | layer { type: 'Data' name: 'data' top: 'data'
 29 |   data_param {
 30 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb'
 31 |     # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest'
 32 |     batch_size: 1 backend: LMDB }
 33 |   transform_param { mean_value: 104.00698793 mean_value: 116.66876762
 34 |     mean_value: 122.67891434 }
 35 |   include { phase: TEST } }
 36 | 
 37 | layer { type: 'Data' name: 'data-orig' top: 'data-orig'
 38 |   data_param {
 39 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb'
 40 |     # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest'
 41 |     batch_size: 1 backend: LMDB }
 42 |   include { phase: TEST } }
 43 | 
 44 | layer { type: 'Data' name: 'gt' top: 'gt'
 45 |   data_param {
 46 |     source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/segmentation_class_val_lmdb'
 47 |     batch_size: 1 backend: LMDB }
 48 |   include { phase: TEST } }
 49 | 
 50 | # ========== Network architecture begin ================
 51 | 
 52 | layer : {
 53 |   name : "conv1_1"
 54 |   type : "Convolution"
 55 |   bottom : "data"
 56 |   top : "conv1_1"
 57 |   convolution_param : {
 58 |     engine: CAFFE
 59 |     num_output : 64
 60 |     pad : 1
 61 |     kernel_size : 3
 62 |   }
 63 |   param : {
 64 |     lr_mult : 1.0
 65 |     decay_mult : 1.0
 66 |   }
 67 |   param : {
 68 |     lr_mult : 2.0
 69 |     decay_mult : 0.0
 70 |   }
 71 | }
 72 | layer : {
 73 |   name : "relu1_1"
 74 |   type : "ReLU"
 75 |   bottom : "conv1_1"
 76 |   top : "conv1_1"
 77 | }
 78 | layer : {
 79 |   name : "conv1_2"
 80 |   type : "Convolution"
 81 |   bottom : "conv1_1"
 82 |   top : "conv1_2"
 83 |   convolution_param : {
 84 |     engine: CAFFE
 85 |     num_output : 64
 86 |     pad : 1
 87 |     kernel_size : 3
 88 |   }
 89 |   param : {
 90 |     lr_mult : 1.0
 91 |     decay_mult : 1.0
 92 |   }
 93 |   param : {
 94 |     lr_mult : 2.0
 95 |     decay_mult : 0.0
 96 |   }
 97 | }
 98 | layer : {
 99 |   name : "relu1_2"
100 |   type : "ReLU"
101 |   bottom : "conv1_2"
102 |   top : "conv1_2"
103 | }
104 | layer : {
105 |   name : "pool1"
106 |   type : "Pooling"
107 |   bottom : "conv1_2"
108 |   top : "pool1"
109 |   pooling_param : {
110 |     pool : MAX
111 |     kernel_size : 3
112 |     stride : 2
113 |     pad : 1
114 |   }
115 | }
116 | layer : {
117 |   name : "conv2_1"
118 |   type : "Convolution"
119 |   bottom : "pool1"
120 |   top : "conv2_1"
121 |   convolution_param : {
122 |     engine: CAFFE
123 |     num_output : 128
124 |     pad : 1
125 |     kernel_size : 3
126 |   }
127 |   param : {
128 |     lr_mult : 1.0
129 |     decay_mult : 1.0
130 |   }
131 |   param : {
132 |     lr_mult : 2.0
133 |     decay_mult : 0.0
134 |   }
135 | }
136 | layer : {
137 |   name : "relu2_1"
138 |   type : "ReLU"
139 |   bottom : "conv2_1"
140 |   top : "conv2_1"
141 | }
142 | layer : {
143 |   name : "conv2_2"
144 |   type : "Convolution"
145 |   bottom : "conv2_1"
146 |   top : "conv2_2"
147 |   convolution_param : {
148 |     engine: CAFFE
149 |     num_output : 128
150 |     pad : 1
151 |     kernel_size : 3
152 |   }
153 |   param : {
154 |     lr_mult : 1.0
155 |     decay_mult : 1.0
156 |   }
157 |   param : {
158 |     lr_mult : 2.0
159 |     decay_mult : 0.0
160 |   }
161 | }
162 | layer : {
163 |   name : "relu2_2"
164 |   type : "ReLU"
165 |   bottom : "conv2_2"
166 |   top : "conv2_2"
167 | }
168 | layer : {
169 |   name : "pool2"
170 |   type : "Pooling"
171 |   bottom : "conv2_2"
172 |   top : "pool2"
173 |   pooling_param : {
174 |     pool : MAX
175 |     kernel_size : 3
176 |     stride : 2
177 |     pad : 1
178 |   }
179 | }
180 | layer : {
181 |   name : "conv3_1"
182 |   type : "Convolution"
183 |   bottom : "pool2"
184 |   top : "conv3_1"
185 |   convolution_param : {
186 |     engine: CAFFE
187 |     num_output : 256
188 |     pad : 1
189 |     kernel_size : 3
190 |   }
191 |   param : {
192 |     lr_mult : 1.0
193 |     decay_mult : 1.0
194 |   }
195 |   param : {
196 |     lr_mult : 2.0
197 |     decay_mult : 0.0
198 |   }
199 | }
200 | layer : {
201 |   name : "relu3_1"
202 |   type : "ReLU"
203 |   bottom : "conv3_1"
204 |   top : "conv3_1"
205 | }
206 | layer : {
207 |   name : "conv3_2"
208 |   type : "Convolution"
209 |   bottom : "conv3_1"
210 |   top : "conv3_2"
211 |   convolution_param : {
212 |     engine: CAFFE
213 |     num_output : 256
214 |     pad : 1
215 |     kernel_size : 3
216 |   }
217 |   param : {
218 |     lr_mult : 1.0
219 |     decay_mult : 1.0
220 |   }
221 |   param : {
222 |     lr_mult : 2.0
223 |     decay_mult : 0.0
224 |   }
225 | }
226 | layer : {
227 |   name : "relu3_2"
228 |   type : "ReLU"
229 |   bottom : "conv3_2"
230 |   top : "conv3_2"
231 | }
232 | layer : {
233 |   name : "conv3_3"
234 |   type : "Convolution"
235 |   bottom : "conv3_2"
236 |   top : "conv3_3"
237 |   convolution_param : {
238 |     engine: CAFFE
239 |     num_output : 256
240 |     pad : 1
241 |     kernel_size : 3
242 |   }
243 |   param : {
244 |     lr_mult : 1.0
245 |     decay_mult : 1.0
246 |   }
247 |   param : {
248 |     lr_mult : 2.0
249 |     decay_mult : 0.0
250 |   }
251 | }
252 | layer : {
253 |   name : "relu3_3"
254 |   type : "ReLU"
255 |   bottom : "conv3_3"
256 |   top : "conv3_3"
257 | }
258 | layer : {
259 |   name : "pool3"
260 |   type : "Pooling"
261 |   bottom : "conv3_3"
262 |   top : "pool3"
263 |   pooling_param : {
264 |     pool : MAX
265 |     kernel_size : 3
266 |     stride : 2
267 |     pad : 1
268 |   }
269 | }
270 | layer : {
271 |   name : "conv4_1"
272 |   type : "Convolution"
273 |   bottom : "pool3"
274 |   top : "conv4_1"
275 |   convolution_param : {
276 |     engine: CAFFE
277 |     num_output : 512
278 |     pad : 1
279 |     kernel_size : 3
280 |   }
281 |   param : {
282 |     lr_mult : 1.0
283 |     decay_mult : 1.0
284 |   }
285 |   param : {
286 |     lr_mult : 2.0
287 |     decay_mult : 0.0
288 |   }
289 | }
290 | layer : {
291 |   name : "relu4_1"
292 |   type : "ReLU"
293 |   bottom : "conv4_1"
294 |   top : "conv4_1"
295 | }
296 | layer : {
297 |   name : "conv4_2"
298 |   type : "Convolution"
299 |   bottom : "conv4_1"
300 |   top : "conv4_2"
301 |   convolution_param : {
302 |     engine: CAFFE
303 |     num_output : 512
304 |     pad : 1
305 |     kernel_size : 3
306 |   }
307 |   param : {
308 |     lr_mult : 1.0
309 |     decay_mult : 1.0
310 |   }
311 |   param : {
312 |     lr_mult : 2.0
313 |     decay_mult : 0.0
314 |   }
315 | }
316 | layer : {
317 |   name : "relu4_2"
318 |   type : "ReLU"
319 |   bottom : "conv4_2"
320 |   top : "conv4_2"
321 | }
322 | layer : {
323 |   name : "conv4_3"
324 |   type : "Convolution"
325 |   bottom : "conv4_2"
326 |   top : "conv4_3"
327 |   convolution_param : {
328 |     engine: CAFFE
329 |     num_output : 512
330 |     pad : 1
331 |     kernel_size : 3
332 |   }
333 |   param : {
334 |     lr_mult : 1.0
335 |     decay_mult : 1.0
336 |   }
337 |   param : {
338 |     lr_mult : 2.0
339 |     decay_mult : 0.0
340 |   }
341 | }
342 | layer : {
343 |   name : "relu4_3"
344 |   type : "ReLU"
345 |   bottom : "conv4_3"
346 |   top : "conv4_3"
347 | }
348 | layer : {
349 |   name : "pool4"
350 |   type : "Pooling"
351 |   bottom : "conv4_3"
352 |   top : "pool4"
353 |   pooling_param : {
354 |     pool : MAX
355 |     kernel_size : 3
356 |     # stride : 2
357 |     stride : 1
358 |     pad : 1
359 |   }
360 | }
361 | layer : {
362 |   name : "conv5_1"
363 |   type : "Convolution"
364 |   bottom : "pool4"
365 |   top : "conv5_1"
366 |   convolution_param : {
367 |     engine: CAFFE
368 |     num_output : 512
369 |     #pad: 1
370 |     pad: 2
371 |     kernel_size : 3
372 |     hole: 2
373 |   }
374 |   param : {
375 |     lr_mult : 1.0
376 |     decay_mult : 1.0
377 |   }
378 |   param : {
379 |     lr_mult : 2.0
380 |     decay_mult : 0.0
381 |   }
382 | }
383 | layer : {
384 |   name : "relu5_1"
385 |   type : "ReLU"
386 |   bottom : "conv5_1"
387 |   top : "conv5_1"
388 | }
389 | layer : {
390 |   name : "conv5_2"
391 |   type : "Convolution"
392 |   bottom : "conv5_1"
393 |   top : "conv5_2"
394 |   convolution_param : {
395 |     engine: CAFFE
396 |     num_output : 512
397 |     #pad: 1
398 |     pad: 2
399 |     kernel_size : 3
400 |     hole: 2
401 |   }
402 |   param : {
403 |     lr_mult : 1.0
404 |     decay_mult : 1.0
405 |   }
406 |   param : {
407 |     lr_mult : 2.0
408 |     decay_mult : 0.0
409 |   }
410 | }
411 | layer : {
412 |   name : "relu5_2"
413 |   type : "ReLU"
414 |   bottom : "conv5_2"
415 |   top : "conv5_2"
416 | }
417 | layer : {
418 |   name : "conv5_3"
419 |   type : "Convolution"
420 |   bottom : "conv5_2"
421 |   top : "conv5_3"
422 |   convolution_param : {
423 |     engine: CAFFE
424 |     num_output : 512
425 |     #pad: 1
426 |     pad: 2
427 |     kernel_size : 3
428 |     hole: 2
429 |   }
430 |   param : {
431 |     lr_mult : 1.0
432 |     decay_mult : 1.0
433 |   }
434 |   param : {
435 |     lr_mult : 2.0
436 |     decay_mult : 0.0
437 |   }
438 | }
439 | layer : {
440 |   name : "relu5_3"
441 |   type : "ReLU"
442 |   bottom : "conv5_3"
443 |   top : "conv5_3"
444 | }
445 | layer : {
446 |   name : "pool5"
447 |   type : "Pooling"
448 |   bottom : "conv5_3"
449 |   top : "pool5"
450 |   pooling_param : {
451 |     pool : MAX
452 |     kernel_size : 3
453 |     # stride : 2
454 |     stride : 1
455 |     pad : 1
456 |   }
457 | }
458 | layer : {
459 |   name : "fc6"
460 |   bottom : "pool5"
461 |   top : "fc6"
462 |   type: "Convolution"
463 |   convolution_param {
464 |     num_output: 4096
465 |     engine: CAFFE
466 |     pad: 6
467 |     kernel_size: 4
468 |     hole: 4
469 |   }
470 |   param : {
471 |     lr_mult : 1.0
472 |     decay_mult : 1.0
473 |   }
474 |   param : {
475 |     lr_mult : 2.0
476 |     decay_mult : 0.0
477 |   }
478 | }
479 | layer : {
480 |   name : "relu6"
481 |   type : "ReLU"
482 |   bottom : "fc6"
483 |   top : "fc6"
484 | }
485 | layer {
486 |   name: "drop6"
487 |   type: "Dropout"
488 |   bottom: "fc6"
489 |   top: "fc6"
490 |   dropout_param {
491 |     dropout_ratio: 0.5
492 |   }
493 | }
494 | layer : {
495 |   name : "fc7"
496 |   bottom : "fc6"
497 |   top : "fc7"
498 |   type: "Convolution"
499 |   convolution_param {
500 |     engine: CAFFE
501 |     num_output : 4096
502 |     kernel_size: 1
503 |   }
504 |   param : {
505 |     lr_mult : 1.0
506 |     decay_mult : 1.0
507 |   }
508 |   param : {
509 |     lr_mult : 2.0
510 |     decay_mult : 0.0
511 |   }
512 | }
513 | layer : {
514 |   name : "relu7"
515 |   type : "ReLU"
516 |   bottom : "fc7"
517 |   top : "fc7"
518 | }
519 | layer {
520 |   name: "drop7"
521 |   type: "Dropout"
522 |   bottom: "fc7"
523 |   top: "fc7"
524 |   dropout_param {
525 |     dropout_ratio: 0.5
526 |   }
527 | }
528 | layer {
529 |   name: 'fc8_voc12'
530 |   bottom: 'fc7'
531 |   top: 'fc8'
532 |   type: "Convolution"
533 |   convolution_param {
534 |     engine: CAFFE
535 |     num_output: 21
536 |     kernel_size: 1
537 |     weight_filler {
538 |       type: "gaussian"
539 |       std: 0.01
540 |     }
541 |     bias_filler {
542 |       type: "constant"
543 |       value: 0
544 |     }
545 |   }
546 |   param : {
547 |     lr_mult : 1.0
548 |     decay_mult : 1.0
549 |   }
550 |   param : {
551 |     lr_mult : 2.0
552 |     decay_mult : 0.0
553 |   }
554 | }
555 | 
556 | # ========== Network architecture end ================
557 | 
558 | layer {
559 |   type: 'Python'
560 |   name: 'weak_loss'
561 |   bottom: 'fc8'
562 |   bottom: 'indicator'
563 |   bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 
564 |   top: 'loss'
565 |   python_param {
566 |     module: 'python_layers'
567 |     layer: 'WeakLoss'
568 |   }
569 |   include { phase: TRAIN }
570 |   loss_weight: 1
571 | }
572 | 
573 | layer {
574 |   name: 'upsample'
575 |   bottom: 'fc8'
576 |   top: 'score'
577 |   type: "Deconvolution"
578 |   convolution_param {
579 |     engine: CAFFE
580 |     group: 21
581 |     num_output: 21
582 |     kernel_size: 15
583 |     stride: 8
584 |     weight_filler: { type: "bilinear_upsampling" }
585 |   }
586 |   param : {
587 |     lr_mult : 0
588 |     decay_mult : 0
589 |   }
590 |   include { phase: TEST }
591 | }
592 | layer {
593 |   name: 'crop' 
594 |   type: 'Crop'
595 |   bottom: 'score'
596 |   bottom: 'data'
597 |   top: 'score_crop'
598 |   include { phase: TEST }
599 | }
600 | 
601 | # =============Dense CRF===========================
602 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 
603 | 
604 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf'
605 |   # Untuned Default Params
606 |   # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6
607 |   #   x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 }
608 |   # Tuned Deeplab Params
609 |   densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15
610 |    x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 }
611 |   include { phase: TEST } }
612 | # ===================================================
613 | 


--------------------------------------------------------------------------------
/models/scripts/download_ccnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=ccnn_models.tar.gz
 7 | URL=http://www.cs.berkeley.edu/~pathak/ccnn/$FILE
 8 | CHECKSUM=9936ae392acef2512f2f3cf71cf98bdb
 9 | 
10 | if [ ! -f $FILE ]; then
11 |   echo "Downloading all the CCNN models (1.8G)..."
12 |   wget $URL -O $FILE
13 |   echo "Unzipping..."
14 |   tar zxvf $FILE
15 |   echo "Downloading Done."
16 | else
17 |   echo "File already exists. Checking md5..."
18 | fi
19 | 
20 | os=`uname -s`
21 | if [ "$os" = "Linux" ]; then
22 |   checksum=`md5sum $FILE | awk '{ print $1 }'`
23 | elif [ "$os" = "Darwin" ]; then
24 |   checksum=`cat $FILE | md5`
25 | elif [ "$os" = "SunOS" ]; then
26 |   checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'`
27 | fi
28 | if [ "$checksum" = "$CHECKSUM" ]; then
29 |   echo "Checksum is correct. File was correctly downloaded."
30 |   exit 0
31 | else
32 |   echo "Checksum is incorrect. DELETE and download again."
33 | fi


--------------------------------------------------------------------------------
/models/scripts/download_pretrained_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=imagenet_pretrained_models.tar.gz
 7 | URL=http://www.cs.berkeley.edu/~pathak/ccnn/$FILE
 8 | CHECKSUM=1d13ce28cd92a5082974e35b6a3cc187
 9 | 
10 | if [ ! -f $FILE ]; then
11 |   echo "Downloading all the CCNN models (1.8G)..."
12 |   wget $URL -O $FILE
13 |   echo "Unzipping..."
14 |   tar zxvf $FILE
15 |   echo "Downloading Done."
16 | else
17 |   echo "File already exists. Checking md5..."
18 | fi
19 | 
20 | os=`uname -s`
21 | if [ "$os" = "Linux" ]; then
22 |   checksum=`md5sum $FILE | awk '{ print $1 }'`
23 | elif [ "$os" = "Darwin" ]; then
24 |   checksum=`cat $FILE | md5`
25 | elif [ "$os" = "SunOS" ]; then
26 |   checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'`
27 | fi
28 | if [ "$checksum" = "$CHECKSUM" ]; then
29 |   echo "Checksum is correct. File was correctly downloaded."
30 |   exit 0
31 | else
32 |   echo "Checksum is incorrect. DELETE and download again."
33 | fi


--------------------------------------------------------------------------------
/src/ccnn.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | def __setup_path():
 8 | 	import os, sys, inspect, numpy as np
 9 | 	paths = ['.','..','../build/','../build/release','../build/debug']
10 | 	current_path = os.path.split(inspect.getfile( inspect.currentframe() ))[0]
11 | 	paths = [os.path.realpath(os.path.abspath(os.path.join(current_path,x))) for x in paths]
12 | 	paths = list( filter( lambda x: os.path.exists(x+'/lib/python/ccnn.so'), paths ) )
13 | 	ptime = [os.path.getmtime(x+'/lib/python/ccnn.so') for x in paths]
14 | 	if len( ptime ):
15 | 		path = paths[ np.argmax( ptime ) ]
16 | 		sys.path.insert(0, path+'/lib')
17 | __setup_path()
18 | del __setup_path
19 | from python.ccnn import *
20 | 


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | from os import environ
 8 | 
 9 | def tryLoad(name, default):
10 |     try:
11 |         import user_config
12 |     except:
13 |         return None
14 |     if hasattr(user_config, name):
15 |         return getattr(user_config, name)
16 |     return default
17 | 
18 | CAFFE_DIR = tryLoad('CAFFE_DIR', '.')
19 | 
20 | import sys
21 | import config
22 | PD = CAFFE_DIR + '/python'
23 | if PD not in sys.path:
24 |     sys.path.append(PD)
25 | 
26 | # if not 'GLOG_minloglevel' in environ:
27 | environ['GLOG_minloglevel'] = '1'
28 | # To supress the output level to command line you need to increase the loglevel to at least 2. Do it before importing caffe.
29 | # 0 - debug
30 | # 1 - info (still a LOT of outputs)
31 | # 2 - warnings
32 | # 3 - errors
33 | 
34 | import caffe
35 | 


--------------------------------------------------------------------------------
/src/dataset.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | 
 9 | VOC_DIR = '/mnt/a/pathak/fcn_mil_cache/VOC2012'
10 | CHANNEL_MEAN = np.array([104.00698793,116.66876762,122.67891434])
11 | 
12 | def idsVOC(type='train'):
13 | 	if type == 'train':
14 | 		return [l.strip() for l in open(VOC_DIR+'/train.txt','r')]
15 | 	if type == 'trainval':
16 | 		return [l.strip() for l in open(VOC_DIR+'/trainval.txt','r')]
17 | 	if type == 'trainval':
18 | 		return [l.strip() for l in open(VOC_DIR+'/test.txt','r')]
19 | 	return [l.strip() for l in open(VOC_DIR+'/val.txt','r')]
20 | 
21 | t0,t1 = 0,0
22 | def fetchVOC( id ):
23 | 	from skimage import io
24 | 	from time import time
25 | 	global t0,t1
26 | 	t0 += time()
27 | 	im = io.imread(VOC_DIR+"/JPEGImages/%s.jpg"%id)
28 | 	tim = im[:,:,::-1].transpose((2,0,1))-CHANNEL_MEAN[:,None,None]
29 | 	lbl = io.imread(VOC_DIR+"/SegmentationClassGray/%s.png"%id)
30 | 	t1 += time()
31 | 	return tim[None],lbl[None,None]
32 | 
33 | 


--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | from config import *
 8 | import numpy as np
 9 | from PIL import Image
10 | 
11 | # Function to get VOC color map. Don't change the image being loaded here.
12 | def palette_gt(gt):
13 |     palette_im = Image.open('../models/examples/gt1.png')
14 |     gt.putpalette(palette_im.palette)
15 |     return gt
16 | 
17 | # Network definitions
18 | net_def = '../models/fcn_32s/deploy_32s.prototxt'
19 | weights = '../models/ccnn_models/ccnn_tag_size_train.caffemodel'
20 | 
21 | # Load Network
22 | caffe.set_device(0)
23 | caffe.set_mode_gpu()
24 | net = caffe.Net(net_def, weights, caffe.TEST)
25 | 
26 | # Load Image
27 | im = Image.open('../models/examples/im2.jpg')
28 | im = np.array(im, dtype=np.float32)
29 | im = im[:,:,::-1]               # Change to BGR
30 | mean = np.array((104.00698793,116.66876762,122.67891434))
31 | im -= mean     # Mean Subtraction
32 | im = im.transpose(2,0,1)        # Blob: C x H x W
33 | im = im[None,:,:,:]
34 | 
35 | # Assign Data
36 | net.blobs['data'].reshape(*im.shape)
37 | net.blobs['data'].data[...] = im
38 | net.blobs['data-orig'].reshape(*im.shape)
39 | net.blobs['data-orig'].data[...] = im+mean[None,:,None,None]
40 | 
41 | # Run forward
42 | net.forward()
43 | out = Image.fromarray(net.blobs['upscore-crf'].data[0,0].astype(np.uint8), mode='P')
44 | out = palette_gt(out)
45 | out.save('../models/examples/result.png')
46 | 
47 | # Classes Predicted
48 | print 'Classes Predicted:', np.unique(net.blobs['upscore-crf'].data[0,0].astype(np.uint8))
49 | print 'Result saved'
50 | 


--------------------------------------------------------------------------------
/src/extras/VOClabelcolormap.m:
--------------------------------------------------------------------------------
 1 | % VOCLABELCOLORMAP Creates a label color map such that adjacent indices have different
 2 | % colors.  Useful for reading and writing index images which contain large indices,
 3 | % by encoding them as RGB images.
 4 | %
 5 | % CMAP = VOCLABELCOLORMAP(N) creates a label color map with N entries.
 6 | function cmap = labelcolormap(N)
 7 | 
 8 | if nargin==0
 9 |     N=256
10 | end
11 | cmap = zeros(N,3);
12 | for i=1:N
13 |     id = i-1; r=0;g=0;b=0;
14 |     for j=0:7
15 |         r = bitor(r, bitshift(bitget(id,1),7 - j));
16 |         g = bitor(g, bitshift(bitget(id,2),7 - j));
17 |         b = bitor(b, bitshift(bitget(id,3),7 - j));
18 |         id = bitshift(id,-3);
19 |     end
20 |     cmap(i,1)=r; cmap(i,2)=g; cmap(i,3)=b;
21 | end
22 | cmap = cmap / 255;
23 | 


--------------------------------------------------------------------------------
/src/extras/evaluate_pascal_seg_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | from __future__ import division
 8 | from pylab import *
 9 | from config import *
10 | import os
11 | from PIL import Image
12 | 
13 | # For ccnn fcn32 code trained model : size
14 | net_def = '../../models/fcn_32s/train_32s.prototxt'
15 | weights = '../../models/ccnn_models/ccnn_tag_size_trainval.caffemodel'
16 | save_dir = '/mnt/a/pathak/fcn_mil_cache/visualized_output/seg12test_size_untuned/results/VOC2012/Segmentation/comp6_test_cls/'
17 | 
18 | caffe.set_device(2)
19 | caffe.set_mode_gpu()
20 | test_net = caffe.Net(net_def, weights, caffe.TEST)
21 | 
22 | fnames_test = np.loadtxt('/mnt/a/pathak/fcn_mil_cache/VOC2012/test.txt', str)
23 | 
24 | #os.makedirs(save_dir)
25 | count = 0
26 | for fname in fnames_test:
27 |     count = count + 1
28 |     if count % 10 == 1:
29 |         print count
30 |     # print fname
31 |     test_net.forward()
32 |     im = Image.fromarray(test_net.blobs['upscore-crf'].data[0,0].astype(np.uint8), mode='P')
33 |     im.save(os.path.join(save_dir, fname + '.png'))
34 | print 'Total Images : ',count
35 | print 'Weight File : ', weights


--------------------------------------------------------------------------------
/src/extras/gen_bb_ind_helper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | 
 8 | '''
 9 | - Converts the matlab generated indicator file to hdf5 format. It is used after generate_bb_indicator.m
10 | '''
11 | 
12 | from sys import argv
13 | from scipy.io import loadmat
14 | import numpy as np
15 | import h5py
16 | import os
17 | 
18 | out_dir = 'trainList_cl12_seg12'
19 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012/'
20 | indicatorLabels = np.loadtxt(voc_dir+out_dir+'/train_labels.txt')
21 | 
22 | if not os.path.exists(voc_dir+out_dir+'/ClassIndicator'):
23 | 	print 'Creating Directory : '+voc_dir+out_dir+'/ClassIndicator';
24 | 	os.makedirs(voc_dir+out_dir+'/ClassIndicator')
25 | 
26 | 
27 | imNames = open(voc_dir+out_dir+'/train.txt','r')
28 | i = 0
29 | for line in imNames:
30 | 	label = indicatorLabels[i,:]
31 | 	label = label[None,:,None,None]
32 | 
33 | 	f = h5py.File(voc_dir+out_dir+'/ClassIndicator/'+line[:-1]+'.hf5', "w")
34 | 	f.create_dataset('indicator', label.shape, dtype='f')[...] = label
35 | 	f.close()
36 | 
37 | 	i = i+1
38 | 
39 | imNames.close()
40 | 


--------------------------------------------------------------------------------
/src/extras/generate_bb_indicator.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % CCNN 
 3 | % Copyright (c) 2015 [See LICENSE file for details]
 4 | % Written by Deepak Pathak, Philipp Krahenbuhl
 5 | % --------------------------------------------------------
 6 | 
 7 | % Matlab script to generate the labels from classification annotations (same as image-level detection labels) set in VOC
 8 | % Run this script and then run the python script to generate hdf5 files : python gen_bb_ind_helper.py
 9 | 
10 | clear all; close all; clc;
11 | 
12 | out_dir = 'trainList_cl12_seg12';
13 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012';
14 | curr_folder = pwd;
15 | cd(voc_dir);
16 | 
17 | train_seg = textread('./train.txt','%s');
18 | trainval_cl = textread('./ImageSets/Main/trainval.txt','%s');
19 | [train_new, indSeg, indCl] = intersect(train_seg,trainval_cl);
20 | 
21 | fprintf('Saving output to directory : %s\n',fullfile(voc_dir,out_dir));
22 | 
23 | fid = fopen(['./' out_dir '/train.txt'],'w');
24 | for i=1:length(train_new)
25 | 	fprintf(fid,'%s\n',train_new{i});
26 | end
27 | fclose(fid);
28 | 
29 | fid = fopen(['./' out_dir '/indicator_train.txt'],'w');
30 | for i=1:length(train_new)
31 | 	fprintf(fid,'%s\n',['/mnt/a/pathak/fcn_mil_cache/VOC2012/' out_dir '/ClassIndicator/' train_new{i} '.hf5']);
32 | end
33 | fclose(fid);
34 | 
35 | 
36 | classes = { 'background',
37 |     'aeroplane', 
38 |     'bicycle',
39 |     'bird',
40 |     'boat',
41 |     'bottle',
42 |     'bus',
43 |     'car',
44 |     'cat',
45 |     'chair',
46 |     'cow',
47 |     'diningtable',
48 |     'dog',
49 |     'horse',
50 |     'motorbike',
51 |     'person',
52 |     'pottedplant',
53 |     'sheep',
54 |     'sofa',
55 |     'train',
56 |     'tvmonitor' };
57 | tempIndicator = zeros(length(trainval_cl),21);
58 | tempIndicator(:,1) = ones(length(trainval_cl),1);
59 | for i=2:length(classes)
60 |     [~,tempIndicator(:,i)] = textread(['./ImageSets/Main/' classes{i} '_trainval.txt'],'%s %d');
61 | end
62 | tempIndicator(tempIndicator==-1)=0;             % 1: present , -1 : absent , 0 : difficult
63 | 
64 | 
65 | indicator = tempIndicator(indCl,:);
66 | dlmwrite(['./' out_dir '/train_labels.txt'], indicator, 'delimiter',' ');
67 | 
68 | cd(curr_folder);
69 | 
70 | 
71 | % ===========================================================================
72 | % Shuffling Code :
73 | % ===========================================================================
74 | 
75 | clear all; close all; clc;
76 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012';
77 | curr_folder = pwd;
78 | cd(voc_dir);
79 | 
80 | datasetName = 'trainval';
81 | rng(2222);
82 | train_seg = textread(['./' datasetName '_notShuffled.txt'],'%s');
83 | randomSeq = randperm(length(train_seg));
84 | 
85 | fid = fopen(['./' datasetName '.txt'],'w');
86 | for i=1:length(train_seg)
87 |     fprintf(fid,'%s\n',train_seg{randomSeq(i)});
88 | end
89 | fclose(fid);
90 | 
91 | fid = fopen(['./indicator_' datasetName '.txt'],'w');
92 | for i=1:length(train_seg)
93 |     fprintf(fid,'%s\n',['/mnt/a/pathak/fcn_mil_cache/VOC2012/SegmentationClassIndicator/' train_seg{randomSeq(i)} '.hf5']);
94 | end
95 | fclose(fid);
96 | 
97 | % ===========================================================================
98 | % ===========================================================================
99 | 


--------------------------------------------------------------------------------
/src/extras/generate_lmbd.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | '''
 8 | To generate compact lmdb using convert_imageset tool (see /caffe-pathak/fcn_mil/src/create_imagenet.sh) :
 9 | -	You run the tool twice. Once to create the image lmdb and once to create the label lmdb. 
10 | 	The filename is the path to the image and label is always 0. Use --encode flag in both runs. 
11 | 	Use --grey flag for label.
12 | - 	Encode flag is not present in python
13 | -   See the shuffling code in generate_bb_indicator.m
14 | '''
15 | 
16 | from __future__ import division
17 | from config import *
18 | import lmdb
19 | import numpy as np
20 | import scipy.stats, scipy.io
21 | from PIL import Image
22 | 
23 | dataset = 'val'       # train or val or trainval
24 | dirAddress = '/mnt/a/pathak/fcn_mil_cache/VOC2012'
25 | 
26 | inputs = np.loadtxt('{}/{}.txt'.format(dirAddress,dataset), str)
27 | 
28 | # Generate Image LMDB =====================================
29 | image_db = lmdb.open('{}/images_{}_lmdb'.format(dirAddress,dataset), map_size=int(1e12))
30 | with image_db.begin(write=True) as in_txn:
31 | 	for in_idx, in_ in enumerate(inputs):
32 | 
33 | 		im = np.array(Image.open(dirAddress + '/JPEGImages/' + in_ + '.jpg'))  # numpy ndarray
34 | 		# Classes present : np.unique(im.astype(np.uint8))
35 |         
36 | 		# If rgb image : im = im[:,:,::-1] (RGB to BGR); im = im.transpose((2, 0, 1)) (in caffe channel-height-width)
37 | 		# If ground truth single channel image : im = im.astype(np.uint8) and im = im[np.newaxis, :, :]
38 | 		im = im[:,:,::-1]
39 | 		im = im.transpose((2, 0, 1))
40 | 		im_dat = caffe.io.array_to_datum(im)
41 |         
42 | 		# Note that the indices are zero padded to preserve their order: LMDB sorts the keys lexicographically so bare integers as strings will be disordered.
43 | 		in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString())
44 | image_db.close()
45 | 
46 | # Generate GT LMDB =======================================
47 | image_db = lmdb.open('{}/segmentation_class_{}_lmdb'.format(dirAddress,dataset), map_size=int(1e12))
48 | with image_db.begin(write=True) as in_txn:
49 | 	for in_idx, in_ in enumerate(inputs):
50 | 		
51 | 		im = np.array(Image.open(dirAddress + '/SegmentationClassPNG/' + in_ + '.png'))  # numpy ndarray
52 | 		# Classes present : np.unique(im.astype(np.uint8))
53 | 
54 | 		# If rgb image : im = im[:,:,::-1] (RGB to BGR); im = im.transpose((2, 0, 1)) (in caffe channel-height-width)
55 | 		# If ground truth single channel image : im = im.astype(np.uint8) and im = im[np.newaxis, :, :]
56 | 		im = im.astype(np.uint8)
57 | 		im = im[np.newaxis, :, :]
58 | 		im_dat = caffe.io.array_to_datum(im)
59 | 
60 | 		# Note that the indices are zero padded to preserve their order: LMDB sorts the keys lexicographically so bare integers as strings will be disordered.
61 | 		in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString())
62 | image_db.close()
63 | 


--------------------------------------------------------------------------------
/src/extras/gray2ind.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # CCNN 
  3 | # Copyright (c) 2015 [See LICENSE file for details]
  4 | # Written by Deepak Pathak, Philipp Krahenbuhl
  5 | # --------------------------------------------------------
  6 | 
  7 | '''
  8 | - It is to be run after png2gray.py
  9 | - Converts the gray scale segmentation ground truth image to hdf5 format. 
 10 | '''
 11 | 
 12 | from sys import argv
 13 | from scipy.io import loadmat
 14 | import numpy as np
 15 | from PIL import Image,ImagePalette
 16 | import h5py
 17 | import random
 18 | random.seed(222)
 19 | 
 20 | # Code to convert image one by one ============
 21 | 
 22 | # if len(argv) < 3:
 23 | # 	print("Usage: %s png hf5"%argv[0])
 24 | # 	exit(1)
 25 | 
 26 | # N = 21
 27 | # im = Image.open(argv[1])
 28 | # I = np.array(im) 	# shape : (h,w)
 29 | # l = I[I>=0]			# shape : (hw,1)
 30 | # l = l[l<N]
 31 | # # im.close() : works with python3. No need to close in python2.7
 32 | 
 33 | # f = h5py.File(argv[2], "w")
 34 | # cnt = np.bincount(l,minlength=N)[None,:,None,None]
 35 | # f.create_dataset('cnt', cnt.shape, dtype='f')[...] = cnt
 36 | # f.create_dataset('indicator', cnt.shape, dtype='f')[...] = (cnt > 0).astype(float)
 37 | # f.create_dataset('indicator_0.01', cnt.shape, dtype='f')[...] = (cnt > 0.01*l.size).astype(float)
 38 | # f.create_dataset('indicator_0.05', cnt.shape, dtype='f')[...] = (cnt > 0.05*l.size).astype(float)
 39 | # f.create_dataset('indicator_0.10', cnt.shape, dtype='f')[...] = (cnt > 0.10*l.size).astype(float)
 40 | # f.close()
 41 | 
 42 | # =============================================
 43 | # =============================================
 44 | 
 45 | 
 46 | # Code to convert images in batch =============
 47 | # =============================================
 48 | 
 49 | # N = 21
 50 | # out_dir = 'SegmentationClassIndicator/'
 51 | # voc_dir = '/x/pathak/fcn_mil_cache/VOC2012/'
 52 | # imNames = open(voc_dir+out_dir+'val.txt','r')
 53 | # for line in imNames:
 54 | # 	im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png')
 55 | # 	I = np.array(im) 	# shape : (h,w)
 56 | # 	l = I[I>=0]			# shape : (hw,1)
 57 | # 	l = l[l<N]
 58 | # 	# im.close() : works with python3. No need to close in python2.7
 59 | 
 60 | # 	f = h5py.File(voc_dir+out_dir+'/ClassIndicator/'+line[:-1]+'.hf5', "w")
 61 | # 	cnt = np.bincount(l,minlength=N)[None,:,None,None]
 62 | 	
 63 | # 	#cnt = cnt[:,1:,:,:] # To ignore background class
 64 | 	
 65 | # 	f.create_dataset('cnt', cnt.shape, dtype='f')[...] = cnt
 66 | # 	f.create_dataset('indicator', cnt.shape, dtype='f')[...] = (cnt > 0).astype(float)
 67 | # 	f.create_dataset('indicator_0.01', cnt.shape, dtype='f')[...] = (cnt > 0.01*l.size).astype(float)
 68 | # 	f.create_dataset('indicator_0.05', cnt.shape, dtype='f')[...] = (cnt > 0.05*l.size).astype(float)
 69 | # 	f.create_dataset('indicator_0.10', cnt.shape, dtype='f')[...] = (cnt > 0.10*l.size).astype(float)
 70 | # 	f.close()
 71 | 
 72 | # =============================================
 73 | # =============================================
 74 | 
 75 | 
 76 | # Code to generate annotations with the semi-supervised flags ====
 77 | # ================================================================
 78 | 
 79 | N = 21
 80 | out_dir = 'SegmentationClassIndicator/'
 81 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012/'
 82 | data = 'val'
 83 | imNames = open(voc_dir+data+'.txt','r')
 84 | 
 85 | classFreq = np.zeros(N)
 86 | for line in imNames:
 87 | 	im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png')
 88 | 	I = np.array(im) 	# shape : (h,w)
 89 | 	l = I[I>=0]			# shape : (hw,1)
 90 | 	l = l[l<N]
 91 | 	# im.close() : works with python3. No need to close in python2.7
 92 | 	cnt = np.bincount(l,minlength=N)[None,:,None,None]
 93 | 	classFreq += (cnt[0,:,0,0] > 0).astype(float)
 94 | #print classFreq.shape
 95 | print 'Class Frequency: ',classFreq
 96 | #classFreq = np.array([ 10578.,586. , 486. , 698. , 461. ,654.,385. ,1086.,1000.,1081. ,264.,528.,1177.,444.,482.,3898.,487.,299.,491.,500.,548.]) for train
 97 | 
 98 | classIm = []
 99 | for i in range(0,N):
100 | 	classIm.append([])
101 | imID = 0
102 | imNames = open(voc_dir+data+'.txt','r')
103 | for line in imNames:
104 | 	im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png')
105 | 	I = np.array(im)
106 | 	l = I[I>=0]
107 | 	l = l[l<N]
108 | 	cnt = np.bincount(l,minlength=N)[None,:,None,None]
109 | 	cnt = (cnt[0,:,0,0] > 0).astype(float)
110 | 	classesPresent = np.flatnonzero(cnt)
111 | 	classChosen = classesPresent[classFreq[classesPresent].argmin()]
112 | 	classIm[classChosen].append(imID)
113 | 	imID += 1
114 | 
115 | samples = [1,3,5,10,50,100,200]			# number of randomly sampled images per class
116 | selectedImages = []
117 | for i in range(0,len(samples)):
118 | 	selectedImages.append([])
119 | 	for j in range(1,N):
120 | 		temp = classIm[j]
121 | 		random.shuffle(temp)
122 | 		selectedImages[i].extend(temp[0:min(samples[i],len(temp))])
123 | 	selectedImages[i].sort()
124 | print 'Images Selected'
125 | 
126 | imID = 0
127 | temp = len(samples)*[0]
128 | imNames = open(voc_dir+data+'.txt','r')
129 | for line in imNames:
130 | 	im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png')
131 | 	I = np.array(im) 	# shape : (h,w)
132 | 	l = I[I>=0]			# shape : (hw,1)
133 | 	l = l[l<N]
134 | 	# im.close() : works with python3. No need to close in python2.7
135 | 
136 | 	f = h5py.File(voc_dir+out_dir+line[:-1]+'.hf5', "w")
137 | 	cnt = np.bincount(l,minlength=N)[None,:,None,None]
138 | 	
139 | 	f.create_dataset('cnt', cnt.shape, dtype='f')[...] = cnt
140 | 	f.create_dataset('indicator', cnt.shape, dtype='f')[...] = (cnt > 0).astype(float)
141 | 	f.create_dataset('indicator_0.01', cnt.shape, dtype='f')[...] = (cnt > 0.01*l.size).astype(float)
142 | 	f.create_dataset('indicator_0.05', cnt.shape, dtype='f')[...] = (cnt > 0.05*l.size).astype(float)
143 | 	f.create_dataset('indicator_0.10', cnt.shape, dtype='f')[...] = (cnt > 0.10*l.size).astype(float)
144 | 	
145 | 	for i in range(0,len(samples)):
146 | 		if temp[i]<len(selectedImages[i]) and imID == selectedImages[i][temp[i]]:
147 | 			f.create_dataset('flag_'+str(samples[i]), (1,), dtype='f')[...] = 1
148 | 			temp[i] += 1
149 | 		else:
150 | 			f.create_dataset('flag_'+str(samples[i]), (1,), dtype='f')[...] = 0
151 | 
152 | 	f.close()
153 | 	imID += 1
154 | print 'Samples: ',samples
155 | print 'Temp: ',temp
156 | 
157 | # ===============================================
158 | # ===============================================
159 | 


--------------------------------------------------------------------------------
/src/extras/mat2png.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | '''
 8 | - Converts the mat file provided by Hariharan et. al. SBD to png single channel images with color palette.
 9 | - Use python3 to run. After this, use png2gray.py and then gray2ind.py.
10 | '''
11 | 
12 | from sys import argv
13 | from scipy.io import loadmat
14 | import numpy as np
15 | from PIL import Image
16 | 
17 | if len(argv) < 3:
18 | 	print("Usage: %s mat png"%argv[0])
19 | 	exit(1)
20 | 
21 | M = loadmat( argv[1] )
22 | im = Image.fromarray(M['GTcls'][0,0]['Segmentation'].astype(np.uint8),'P')
23 | 
24 | if len(argv)>3:
25 | 	im.putpalette( Image.open( argv[3] ).palette )
26 | 
27 | im.save(argv[2])
28 | 


--------------------------------------------------------------------------------
/src/extras/png2gray.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | '''
 8 | - It is to be run after mat2png.py
 9 | - Converts png segmentation ground truth images to grayscale representing the labels as intensities.
10 | - Use python3 to run.
11 | '''
12 | 
13 | from sys import argv
14 | from scipy.io import loadmat
15 | import numpy as np
16 | from PIL import Image,ImagePalette
17 | 
18 | if len(argv) < 3:
19 | 	print("Usage: %s png png"%argv[0])
20 | 	exit(1)
21 | 
22 | im = Image.open(argv[1])
23 | Image.frombytes('L',im.size,im.tobytes()).save(argv[2])
24 | 


--------------------------------------------------------------------------------
/src/extras/save_png_colormap_pascal.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % CCNN 
 3 | % Copyright (c) 2015 [See LICENSE file for details]
 4 | % Written by Deepak Pathak, Philipp Krahenbuhl
 5 | % --------------------------------------------------------
 6 | 
 7 | % Script to save png indexed images with the colormap defined by VOCdevkit
 8 | 
 9 | clear all; close all; clc;
10 | 
11 | dirAddress = '/mnt/a/pathak/fcn_mil_cache/visualized_output/seg12test_size_untuned/';
12 | inputImages = dir(fullfile(dirAddress,'results/VOC2012/Segmentation/comp6_test_cls/*.png'));
13 | fprintf('Dir: %s\n',dirAddress);
14 | cmap = VOClabelcolormap(256);
15 | for i=1:length(inputImages)
16 |     if mod(i,100)==0
17 |         fprintf('Image # %d\n',i);
18 |     end
19 |     im = imread(fullfile(dirAddress,'results/VOC2012/Segmentation/comp6_test_cls/',inputImages(i).name));
20 |     imwrite(im,cmap,fullfile(dirAddress,'results/VOC2012/Segmentation/comp6_test_cls/',inputImages(i).name));
21 | end
22 | 


--------------------------------------------------------------------------------
/src/python_layers.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # CCNN 
  3 | # Copyright (c) 2015 [See LICENSE file for details]
  4 | # Written by Deepak Pathak, Philipp Krahenbuhl
  5 | # --------------------------------------------------------
  6 | 
  7 | from config import *
  8 | import numpy as np
  9 | 
 10 | class WeakLoss(caffe.Layer):
 11 | 	def DS(self, I, stride=32, factor=32,N=21 ):
 12 | 		r = np.zeros(( (I.shape[0]-1)/stride+1, (I.shape[1]-1)/stride+1 ), dtype=I.dtype)
 13 | 		for j in range(0,I.shape[0],stride):
 14 | 			for i in range(0,I.shape[1],stride):
 15 | 				p = I[j:j+factor,i:i+factor]
 16 | 				b = np.bincount( p[p>=0],minlength=N )
 17 | 				r[j/stride,i/stride] = np.argmax(b)
 18 | 		return r
 19 | 
 20 | 	def setup(self,bottom,top):
 21 | 
 22 | 		self.bg_lower,self.bg_upper = 0.3,0.7
 23 | 		self.bg_slack = 1e10 		# no slack : 1e10
 24 | 		self.fg_lower_hard = 0.01
 25 | 		self.fg_lower = 0.05
 26 | 		self.fg_slack = 2 		# no slack : 1e10
 27 | 		self.hardness = 1 				# no hardness : 1 and hardness : 1000
 28 | 
 29 | 		self.semi_supervised = False
 30 | 		self.apply_size_constraint = False
 31 | 		self.normalization = True  # models/fcn_8s/solver_8s.prototxt needs the loss to be normalized and solver_32s doesn't
 32 | 
 33 | 		if self.apply_size_constraint:
 34 | 			self.bg_lower,self.bg_upper = 0.2,0.7
 35 | 			self.bg_slack = 1e10
 36 | 			self.fg_lower_hard = 0.1
 37 | 			self.fg_lower = 0.1
 38 | 			self.fg_slack = 1e10
 39 | 			self.hardness = 1000
 40 | 			self.fg_upper_small = 0.01 		# upper bound on small object. Don't make it zero as strictly less than 0 is not satisfiable. Make it epsilon small.
 41 | 		
 42 | 		# self.counter = 1
 43 | 
 44 | 	def reshape(self, bottom, top):
 45 | 		top[0].reshape(1,1,1,1)
 46 | 	
 47 | 	def forward(self, bottom, top):
 48 | 		# print "first : ",int(np.prod(bottom[0].data.shape[1:]))
 49 | 		from time import time
 50 | 		t0 = time()
 51 | 		D = bottom[0].channels
 52 | 		from ccnn import constraintloss
 53 | 		self.diff = []
 54 | 		loss,w = 0,0
 55 | 		for i in range(bottom[0].num):
 56 | 			# print '-------------------------------------'
 57 | 			# print 'Image Number : ',self.counter 
 58 | 
 59 | 			if self.semi_supervised:
 60 | 				assert (len(bottom)>4),"Semi Supervised Flag ON, but full supervised images not supplied as additional bottom !"
 61 | 
 62 | 			if (not self.semi_supervised) or (bottom[3].data[i]==0): 		# weakly-supervised downsampled training
 63 | 				# Setup bottoms
 64 | 				f = np.ascontiguousarray(bottom[0].data[i].reshape((D,-1)).T) 		# f : height*width x channels
 65 | 				q = np.exp(f-np.max(f,axis=1)[:,None]) 								# expAndNormalize across channels
 66 | 				q/= np.sum(q,axis=1)[:,None]
 67 | 
 68 | 				# Setup the constraint softmax
 69 | 				csm = constraintloss.ConstraintSoftmax(self.hardness)
 70 | 
 71 | 				# Add Negative Label constraints
 72 | 				if self.apply_size_constraint:
 73 | 					L = bottom[2].data[i].flatten() > 0.5
 74 | 				else:
 75 | 					L = bottom[1].data[i].flatten() > 0.5
 76 | 				csm.addZeroConstraint( (~L).astype(np.float32) )
 77 | 				
 78 | 				# Add Small Object Size constraints
 79 | 				# L_up = 0*L
 80 | 				# if self.apply_size_constraint:
 81 | 				# 	assert (len(bottom)>2),"Size constraint ON, but size information not supplied as additional bottom !"
 82 | 				# 	L_up = 1*L
 83 | 				# 	L = bottom[2].data[i].flatten() > 0.5
 84 | 
 85 | 				# for l in np.flatnonzero(L_up):
 86 | 				# 	if l>0 and not L[l]:
 87 | 				# 		v = np.zeros(D).astype(np.float32); v[l] = 1
 88 | 				# 		csm.addLinearConstraint( -v, -self.fg_upper_small, self.fg_slack )
 89 | 
 90 | 				# Apply Positive Label Constraints
 91 | 				for l in np.flatnonzero(L):
 92 | 					if l>0:
 93 | 						v = np.zeros(D).astype(np.float32); v[l] = 1
 94 | 						# if self.apply_size_constraint:
 95 | 						# 	csm.addLinearConstraint(  v, self.fg_lower_hard )
 96 | 						csm.addLinearConstraint(  v, self.fg_lower, self.fg_slack )
 97 | 
 98 | 				# Add Background Constraints
 99 | 				v = np.zeros(D).astype(np.float32); v[0] = 1
100 | 				csm.addLinearConstraint(  v, self.bg_lower, self.bg_slack ) # lower bound
101 | 				if (np.sum(L[1:]) > 0): # i.e. image is not all background
102 | 					csm.addLinearConstraint( -v, -self.bg_upper ) # upper bound
103 | 				
104 | 				# Run constrained optimization
105 | 				p = csm.compute(f)
106 | 
107 | 				if self.normalization:
108 | 					self.diff.append( ((q-p).T.reshape(bottom[0].data[i].shape))/np.float32(f.shape[0]) )      # normalize by (f.shape[0])
109 | 				else:
110 | 					self.diff.append( ((q-p).T.reshape(bottom[0].data[i].shape)) )      # unnormalize
111 | 
112 | 				# Debugging Code ---------
113 | 				# temp = 1
114 | 				# for l in np.flatnonzero(L_up):
115 | 				# 	if l>0 and not L[l]:
116 | 				# 		if p[:,l].sum() > self.fg_upper_small:
117 | 				# 			print 'Small Object Class Index=',temp,'  sumP=',p[:,l].sum(),'  sumQ=',q[:,l].sum()
118 | 				# 			print '\tP=',repr(p[:,l])
119 | 				# 			print '\tQ=',repr(q[:,l])
120 | 				# 	temp += 1
121 | 				# print ''
122 | 				# np.savez('./debug/debug_im'+str(self.counter)+'.npz', hardness=self.hardness, bg_lower = self.bg_lower, bg_upper=self.bg_upper, L=L, L_up=L_up, fg_lower = self.fg_lower, fg_slack=self.fg_slack, fg_upper_small=self.fg_upper_small, f=f,p=p,q=q )
123 | 				# self.counter += 1
124 | 				# -----------------------
125 | 
126 | 			else: 		# fully-supervised upsample training
127 | 				f = np.ascontiguousarray(bottom[5].data[i].reshape((D,-1)).T) 		# f : height*width x channels
128 | 				q = np.exp(f-np.max(f,axis=1)[:,None]) 								# expAndNormalize across channels
129 | 				q/= np.sum(q,axis=1)[:,None]
130 | 
131 | 				gt = bottom[4].data[i]
132 | 				# print '\t q : ',q.shape
133 | 				# print '\t cnn_output_Shape : ',bottom[0].data[i].shape
134 | 				# print '\t gt_Shape : ',gt.shape
135 | 				# print '\t gt_resized_Shape : ', (np.float32(self.DS(np.uint8(gt[0,...])))).shape
136 | 				gt = np.uint8(gt[0,...]) 		# For downsampling the gt use this : self.DS(np.uint8(gt[0,...]))
137 | 				gt = np.ascontiguousarray(gt.reshape((1,-1)).T) 		# gt : height*width x 1
138 | 				gt = gt.squeeze()
139 | 				p = np.zeros(q.shape).astype(np.float32) 					# q,p,f : height*width x channels
140 | 				ind = np.where(gt!=255)
141 | 				p[ind,gt[ind]] = 1
142 | 				ind = np.where(gt==255)
143 | 				p[ind,:] = q[ind,:] 								# so that q-p=0 at this position because it is ignore label
144 | 
145 | 				if self.normalization:	
146 | 					self.diff.append( ((q-p).T.reshape(bottom[5].data[i].shape))/np.float32(f.shape[0]) )      # normalize by (f.shape[0])
147 | 				else:
148 | 					self.diff.append((q-p).T.reshape(bottom[5].data[i].shape))
149 | 
150 | 			if self.normalization:			
151 | 				loss += (np.sum(p*np.log(np.maximum(p,1e-10))) - np.sum(p*np.log(np.maximum(q,1e-10))))/np.float32(f.shape[0])    # normalize by (f.shape[0])
152 | 			else:
153 | 				loss += (np.sum(p*np.log(np.maximum(p,1e-10))) - np.sum(p*np.log(np.maximum(q,1e-10))))    # unnormalize
154 | 
155 | # 			print( np.min(f), np.max(f) )
156 | # 			np.set_printoptions(linewidth=150)
157 | # 			print( L.astype(bool) )
158 | # 			print( np.bincount(np.argmax(f,axis=1),minlength=21) )
159 | # 			print( np.sum(p[:,~L]), 'P', np.sum(p,axis=0).astype(int)[L], 'H', np.bincount(np.argmax(p,axis=1),minlength=L.size)[L] )
160 | 		#print( "===== %f ====="%(time()-t0) )
161 | 		top[0].data[0,0,0,0] = loss
162 | 		self.diff = np.array(self.diff)
163 | 
164 | 	def backward(self, top, propagate_down, bottom):
165 | 		for i in range(bottom[0].num):
166 | 			if (not self.semi_supervised) or (bottom[3].data[i]==0):
167 | 				bottom[0].diff[i] = top[0].diff[0,0,0,0]*self.diff[i]
168 | 			else:
169 | 				bottom[5].diff[i] = top[0].diff[0,0,0,0]*self.diff[i]
170 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Modified by Deepak Pathak
 5 | # Originally written by Jonathan Long
 6 | # --------------------------------------------------------
 7 | 
 8 | from __future__ import division
 9 | import numpy as np
10 | import os
11 | from PIL import Image
12 | from datetime import datetime
13 | from config import *
14 | 
15 | fnames_val = {'pascal': np.loadtxt('/mnt/a/pathak/fcn_mil_cache/VOC2012/val.txt', str)}
16 | 
17 | def prepare():
18 |     save_dir = '/mnt/a/pathak/fcn_mil_cache/visualized_output/2012val_best_tuned/'
19 |     return save_dir
20 | 
21 | def compute_hist(net, save_dir, dataset):
22 |     n_cl = net.blobs['score_crop'].channels
23 |     count = 1
24 |     hist = np.zeros((n_cl, n_cl))
25 |     for fname in fnames_val[dataset]:
26 |         net.forward()
27 |         h, _, _ = np.histogram2d(net.blobs['gt'].data[0, 0].flatten(),
28 |                 net.blobs['upscore-crf'].data[0,0].flatten(),
29 |                 bins=n_cl, range=[[0, n_cl], [0, n_cl]])
30 |         hist += h
31 |         iu = np.zeros(n_cl)
32 |         for i in range(n_cl):
33 |             iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i])
34 |         print 'Image : ',count,' ,  Name : ',fname, ' , mean IU (till here) : ', np.nanmean(iu)*100
35 |         #print '\tClasses Present : ',np.unique(net.blobs['gt'].data[0, 0].astype(np.uint8))
36 |         #print '\tClasses Predicted : ', np.unique(net.blobs['upscore-crf'].data[0,0].astype(np.uint8))
37 |         #print ''
38 |         # im = Image.fromarray(net.blobs['upscore-crf'].data[0,0].astype(np.uint8), mode='P')
39 |         # im.save(os.path.join(save_dir, fname + '.png'))
40 |         count += 1
41 |         import sys
42 |         sys.stdout.flush()
43 |     return hist
44 | 
45 | def seg_tests(test_net, save_format, dataset, weights):
46 |     print '>>>', datetime.now(), 'Begin seg tests'
47 |     n_cl = test_net.blobs['score_crop'].channels
48 |     hist = compute_hist(test_net, save_format, dataset)
49 |     # overall accuracy
50 |     acc = np.diag(hist).sum() / hist.sum()
51 |     print '>>>', datetime.now(), 'overall accuracy', acc
52 |     # per-class accuracy
53 |     acc = np.zeros(n_cl)
54 |     for i in range(n_cl):
55 |         acc[i] = hist[i, i] / hist[i].sum()
56 |     print '>>>', datetime.now(), 'mean accuracy', np.nanmean(acc)
57 |     # per-class IU
58 |     iu = np.zeros(n_cl)
59 |     for i in range(n_cl):
60 |         iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i])
61 |     print '>>>', datetime.now(), 'mean IU (out of 100)', np.nanmean(iu)*100
62 |     iu2 = [ round(100*elem, 1) for elem in iu ]
63 |     print '>>>', datetime.now(), 'per-class IU (out of 100)', iu2
64 |     freq = hist.sum(1) / hist.sum()
65 |     print '>>>', datetime.now(), 'fwavacc', \
66 |             (freq[freq > 0] * iu[freq > 0]).sum()
67 |     print 'Weight File', weights
68 | 
69 | 
70 | # Running the code 
71 | dataset = 'pascal'
72 | save_format = prepare()
73 | 
74 | net_def = '../models/fcn_32s/train_32s.prototxt'
75 | weights = '../models/ccnn_models/ccnn_tag_size_train.caffemodel'
76 | 
77 | caffe.set_device(0)
78 | caffe.set_mode_gpu()
79 | test_net = caffe.Net(net_def, weights, caffe.TEST)
80 | seg_tests(test_net, save_format, dataset, weights)
81 | 


--------------------------------------------------------------------------------
/src/test_argmax.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Modified by Deepak Pathak
 5 | # Originally written by Jonathan Long
 6 | # --------------------------------------------------------
 7 | 
 8 | from __future__ import division
 9 | import numpy as np
10 | import os
11 | from PIL import Image
12 | from datetime import datetime
13 | from config import *
14 | 
15 | fnames_val = {'pascal': np.loadtxt('/mnt/a/pathak/fcn_mil_cache/VOC2012/val.txt', str)}
16 | 
17 | def prepare():
18 |     save_dir = '/mnt/a/pathak/fcn_mil_cache/visualized_output/2012val_best_raw/'
19 |     return save_dir
20 | 
21 | def compute_hist(net, save_dir, dataset):
22 |     n_cl = net.blobs['score_crop'].channels
23 |     count = 1
24 |     hist = np.zeros((n_cl, n_cl))
25 |     for fname in fnames_val[dataset]:        
26 |         net.forward()
27 |         h, _, _ = np.histogram2d(net.blobs['gt'].data[0, 0].flatten(),
28 |                 net.blobs['score_crop'].data[0].argmax(0).flatten(),
29 |                 bins=n_cl, range=[[0, n_cl], [0, n_cl]])
30 |         hist += h
31 |         iu = np.zeros(n_cl)
32 |         for i in range(n_cl):
33 |             iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i])
34 |         print 'Image : ',count,' ,  Name : ',fname,' , mean IU (till here) : ', np.nanmean(iu)*100
35 |         #print '\tClasses Present : ',np.unique(net.blobs['gt'].data[0, 0].astype(np.uint8))
36 |         #print '\tClasses Predicted : ', np.unique(net.blobs['score_crop'].data[0].argmax(0).astype(np.uint8))
37 |         #print ''
38 |         # im = Image.fromarray(net.blobs['score_crop'].data[0].argmax(0).astype(np.uint8), mode='P')
39 |         # im.save(os.path.join(save_dir, fname + '.png'))
40 |         count += 1
41 |         import sys
42 |         sys.stdout.flush()
43 |     return hist
44 | 
45 | def seg_tests(test_net, save_format, dataset, weights, net_def):
46 |     print '>>>', datetime.now(), 'Begin seg tests'
47 |     n_cl = test_net.blobs['score_crop'].channels
48 |     hist = compute_hist(test_net, save_format, dataset)
49 |     # overall accuracy
50 |     acc = np.diag(hist).sum() / hist.sum()
51 |     print '>>>', datetime.now(), 'overall accuracy', acc
52 |     # per-class accuracy
53 |     acc = np.zeros(n_cl)
54 |     for i in range(n_cl):
55 |         acc[i] = hist[i, i] / hist[i].sum()
56 |     print '>>>', datetime.now(), 'mean accuracy', np.nanmean(acc)
57 |     # per-class IU
58 |     iu = np.zeros(n_cl)
59 |     for i in range(n_cl):
60 |         iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i])
61 |     print '>>>', datetime.now(), 'mean IU', np.nanmean(iu)*100
62 |     iu2 = [ round(100*elem, 1) for elem in iu ]
63 |     print '>>>', datetime.now(), 'per-class IU', iu2
64 |     freq = hist.sum(1) / hist.sum()
65 |     print '>>>', datetime.now(), 'fwavacc', \
66 |             (freq[freq > 0] * iu[freq > 0]).sum()
67 |     print 'Weight File', weights
68 |     print 'Proto File', net_def
69 | 
70 | 
71 | # Running the code 
72 | 
73 | dataset = 'pascal'
74 | save_format = prepare()
75 | 
76 | net_def = '../models/fcn_32s/train_32s.prototxt'
77 | weights = '../models/ccnn_models/ccnn_tag_size_train.caffemodel'
78 | 
79 | caffe.set_device(0)
80 | caffe.set_mode_gpu()
81 | test_net = caffe.Net(net_def, weights, caffe.TEST)
82 | seg_tests(test_net, save_format, dataset, weights, net_def)
83 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # CCNN 
 3 | # Copyright (c) 2015 [See LICENSE file for details]
 4 | # Written by Deepak Pathak, Philipp Krahenbuhl
 5 | # --------------------------------------------------------
 6 | 
 7 | from config import *
 8 | import ccnn
 9 | import python_layers, dataset
10 | from glob import glob
11 | import numpy as np
12 | from time import time
13 | from sys import argv
14 | 
15 | caffe.set_mode_gpu()
16 | caffe.set_device(0)
17 | 
18 | MODEL_PROTOTXT = '../models/fcn_8s/train_8s.prototxt'
19 | MODEL_INIT = '../models/imagenet_pretrained_models/vgg_init_8s.caffemodel'
20 | 
21 | # MODEL_PROTOTXT = '../models/fcn_32s/train_32s.prototxt'
22 | # MODEL_INIT = '../models/imagenet_pretrained_models/vgg_init_32s.caffemodel'
23 | 
24 | MODEL_SAVE = '../models/ccnn_models/ccnn.caffemodel'
25 | 
26 | if len(argv)>1:
27 | 	MODEL_SAVE = argv[1]
28 | doTest = False
29 | 
30 | SOLVER_STR = """train_net: "{TRAIN_NET}"
31 | base_lr: 1e-6
32 | lr_policy: "step"
33 | gamma: 0.1
34 | stepsize: 40000
35 | display: 20
36 | max_iter: 35000
37 | momentum: 0.99
38 | weight_decay: 0.0000005
39 | #average_loss: 1
40 | """
41 | 
42 | SOLVER_STR = SOLVER_STR.replace( "{TRAIN_NET}", MODEL_PROTOTXT )
43 | 
44 | t0 = time()
45 | solver = caffe.get_solver_from_string(SOLVER_STR)
46 | solver.net.copy_from(MODEL_INIT) # Note that this does not copy the interpolation params!
47 | print "Load model %fs"%(time()-t0)
48 | 
49 | for it in range(35):
50 | 	t0 = time()
51 | 	solver.step(1000)
52 | 	t1 = time()
53 | 	print "%4d iterations t ="%((it+1)*1000), t1-t0
54 | 	solver.net.save(MODEL_SAVE)
55 | 	if (it+1)%5==0 and it>10:
56 | 		solver.net.save(MODEL_SAVE + '_'+str(it+1))
57 | 	if doTest and it>30:
58 | 		import subprocess
59 | 		try:
60 | 			sp.wait()
61 | 		except:
62 | 			pass
63 | 		sp = subprocess.Popen(['python', 'test_argmax.py'])
64 | 
65 | if doTest:
66 | 	try:
67 | 		sp.wait()
68 | 	except:
69 | 		pass
70 | 


--------------------------------------------------------------------------------
/src/user_config.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # CCNN 
3 | # Copyright (c) 2015 [See LICENSE file for details]
4 | # Written by Deepak Pathak, Philipp Krahenbuhl
5 | # --------------------------------------------------------
6 | 
7 | from os import path
8 | CAFFE_DIR = path.abspath('./../caffe-ccnn/')
9 | 


--------------------------------------------------------------------------------