├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── FindEigen3.cmake └── FindNumPy.cmake ├── lib ├── CMakeLists.txt ├── constraintloss │ ├── CMakeLists.txt │ ├── constraintsoftmax.cpp │ └── constraintsoftmax.h ├── optimization │ ├── CMakeLists.txt │ ├── fista.cpp │ └── fista.h ├── python │ ├── CMakeLists.txt │ ├── boost.cpp │ ├── boost.h │ ├── ccnn.cpp │ ├── ccnn.h │ ├── constraintloss.cpp │ ├── constraintloss.h │ ├── util.cpp │ └── util.h └── util │ ├── CMakeLists.txt │ ├── eigen.cpp │ ├── eigen.h │ ├── win_util.cpp │ └── win_util.h ├── models ├── examples │ ├── gt1.png │ ├── gt2.png │ ├── im1.jpg │ ├── im2.jpg │ ├── im3.jpg │ ├── im4.jpg │ ├── out1.png │ └── out2.png ├── fcn_32s │ ├── deploy_32s.prototxt │ ├── solver_32s.prototxt │ └── train_32s.prototxt ├── fcn_8s │ ├── deploy_8s.prototxt │ ├── solver_8s.prototxt │ └── train_8s.prototxt └── scripts │ ├── download_ccnn_models.sh │ └── download_pretrained_models.sh └── src ├── ccnn.py ├── config.py ├── dataset.py ├── demo.py ├── extras ├── VOClabelcolormap.m ├── evaluate_pascal_seg_test.py ├── gen_bb_ind_helper.py ├── generate_bb_indicator.m ├── generate_lmbd.py ├── gray2ind.py ├── mat2png.py ├── png2gray.py └── save_png_colormap_pascal.m ├── python_layers.py ├── test.py ├── test_argmax.py ├── train.py └── user_config.py /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | **/__pycache__ 3 | *.kdev4 4 | **.pyc 5 | **.caffemodel 6 | **.solverstate 7 | **/.* 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caffe-ccnn"] 2 | path = caffe-ccnn 3 | url = https://github.com/pathak22/caffe-ccnn.git 4 | branch = master 5 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project( ccnn ) 2 | cmake_minimum_required(VERSION 2.8) 3 | add_definitions( -DLBFGS_FLOAT=32 ) 4 | set( CMAKE_POSITION_INDEPENDENT_CODE True ) 5 | 6 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ) 7 | add_subdirectory( lib ) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | UC Berkeley's Standard Copyright and Disclaimer Notice: 2 | 3 | Copyright (c) 2015, Deepak Pathak, Philipp Krähenbühl 4 | and The Regents of the University of California (Regents). 5 | All Rights Reserved. 6 | 7 | Permission to use, copy, modify, and distribute this software and its 8 | documentation for educational, research, and not-for-profit purposes, without 9 | fee and without a signed licensing agreement, is hereby granted, provided that 10 | the above copyright notice, this paragraph and the following two paragraphs appear 11 | in all copies, modifications, and distributions. Contact The Office of Technology 12 | Licensing, UC Berkeley, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94720-1620, 13 | (510) 643-7201, for commercial licensing opportunities. 14 | 15 | IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, 16 | INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 17 | USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN ADVISED OF THE 18 | POSSIBILITY OF SUCH DAMAGE. REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 | BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 | PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS 21 | PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 22 | ENHANCEMENTS, OR MODIFICATIONS. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## CCNN: Constrained Convolutional Neural Networks for Weakly Supervised Segmentation 2 | 3 | [Deepak Pathak](http://cs.berkeley.edu/~pathak), [Philipp Krähenbühl](http://www.philkr.net/), [Trevor Darrell](http://cs.berkeley.edu/~trevor) 4 | 5 | **CCNN** is a framework for optimizing convolutional neural networks with linear constraints. 6 | - It has been shown to achieve state-of-the-art results on the task of weakly-supervised semantic segmentation. 7 | - It is written in Python and C++, and based on [Caffe](http://caffe.berkeleyvision.org/). 8 | - It has been published at **ICCV 2015**. It was initially described in the [arXiv report](http://arxiv.org/abs/1506.03648). 9 | 10 | If you find CCNN useful in your research, please cite: 11 | 12 | @inproceedings{pathakICCV15ccnn, 13 | Author = {Pathak, Deepak and Kr\"ahenb\"uhl, Philipp and Darrell, Trevor}, 14 | Title = {Constrained Convolutional Neural Networks for Weakly Supervised Segmentation}, 15 | Booktitle = {International Conference on Computer Vision ({ICCV})}, 16 | Year = {2015} 17 | } 18 | 19 | ### License 20 | 21 | CCNN is released under academic, non-commercial UC Berkeley license (see [LICENSE](https://github.com/pathak22/ccnn/blob/master/LICENSE) file for details). 22 | 23 | ### Contents 24 | 1. [Requirements](#1-requirements) 25 | 2. [Installation](#2-installation) 26 | 3. [Usage](#3-usage) 27 | 4. [Scripts Information](#4-scripts-information) 28 | 5. [Extra Downloads](#5-extra-downloads) 29 | 30 | ### 1) Requirements 31 | 32 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html)) 33 | 2. GCC version more than 4.7 34 | 3. Boost version more than 1.53 (recommended). If system dependencies give issues, install anaconda dependencies: 35 | 36 | ``` 37 | $ conda install boost 38 | $ conda install protobuf 39 | ``` 40 | 41 | 4. A good GPU (e.g., Titan, K20, K40, ...) with at least 3G of memory is sufficient. 42 | 43 | ### 2) Installation 44 | 45 | 1. Clone the CCNN repository 46 | ```Shell 47 | # Make sure to clone with --recursive 48 | git clone --recursive https://github.com/pathak22/ccnn.git 49 | ``` 50 | 51 | 2. Build Caffe and pycaffe 52 | 53 | - Now follow the Caffe installation instructions [here](http://caffe.berkeleyvision.org/installation.html) 54 | - Caffe *must* be built with support for Python layers! 55 | - In your Makefile.config, make sure to have this line uncommented 56 | `WITH_PYTHON_LAYER := 1` 57 | - You can download my [Makefile.config](http://www.cs.berkeley.edu/~pathak/ccnn/Makefile.config) for reference. 58 | 59 | ```Shell 60 | cd ccnn/caffe-ccnn 61 | # If you have all caffe requirements installed 62 | # and your Makefile.config in place, then simply do: 63 | make -j8 && make pycaffe 64 | ``` 65 | 66 | 3. Now build CCNN 67 | 68 | ```Shell 69 | cd ccnn 70 | mkdir build 71 | cd build 72 | cmake .. 73 | make -j8 74 | ``` 75 | 76 | - **Note:** If anaconda is installed, then python paths may have been messed b/w anaconda and system python. 77 | - I usually run this command : 78 | 79 | ```Shell 80 | cmake .. -DBOOST_ROOT=/home/pathak/anaconda -DPYTHON_LIBRARY=/home/pathak/anaconda/lib/libpython2.7.so -DPYTHON_INCLUDE_DIR=/home/pathak/anaconda/include/python2.7/ -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8 81 | ``` 82 | 83 | - To verify this do : `ccmake ./` inside the build folder and manually check the following things : 84 | `MAKE_CXX_COMPILER, CMAKE_C_COMPILER , PYTHON_EXECUTABLE , PYTHON_INCLUDE_DIR , PYTHON_LIBRARY` 85 | - Make sure that cmake doesn't mess the anaconda boost to system boost. 86 | 87 | 4. Configure path (if needed) in `src/user_config.py`. 88 | 89 | 5. (Optional -- I don't do it) If everything runs fine, set `CMAKE_BUILD_TYPE` using `ccmake .` to `Release`. This prevents eigen from checking all assertions etc. and works faster. 90 | 91 | ### 3) Usage 92 | 93 | **Demo** CCNN. 94 | 95 | ```Shell 96 | cd ccnn 97 | bash ./models/scripts/download_ccnn_models.sh 98 | # This will populate the `ccnn/models/` folder with trained models. 99 | python ./src/demo.py 100 | ``` 101 | 102 | **Train** CCNN. 103 | 104 | ```Shell 105 | cd ccnn 106 | bash ./models/scripts/download_pretrained_models.sh 107 | # This will populate the `ccnn/models/` folder with imagenet pre-trained models. 108 | python ./src/train.py 2> log.txt 109 | ``` 110 | 111 | **Test** CCNN. 112 | 113 | ```Shell 114 | cd ccnn 115 | python ./src/test.py # To test IOU with CRF post-processing 116 | python ./src/test_argmax.py # To test IOU without CRF 117 | ``` 118 | 119 | ### 4) Scripts Information 120 | 121 | Model Prototxts: 122 | - `models/fcn_8s/` : Atrous algorithm based 8-strided VGG, described [here](http://arxiv.org/abs/1412.7062). 123 | - `models/fcn_32s/` : 32-strided VGG 124 | 125 | Configure: 126 | - `src/config.py` : Set glog-minlevel accordingly to get desired caffe output to terminal 127 | 128 | Helper Scripts: 129 | - `src/extras/` : These scripts are not needed to run the code. They are simple helper scripts to create data, to prepare pascal test server file, to add pascal cmap to segmentation outputs etc. 130 | 131 | ### 5) Extra Downloads 132 | 133 | - Pascal VOC Image List: [train](http://www.cs.berkeley.edu/~pathak/ccnn/train.txt), [val](http://www.cs.berkeley.edu/~pathak/ccnn/val.txt), [trainval](http://www.cs.berkeley.edu/~pathak/ccnn/trainval.txt), [test](http://www.cs.berkeley.edu/~pathak/ccnn/test.txt) 134 | - [Training image-level label indicator files](http://www.cs.berkeley.edu/~pathak/ccnn/trainIndicatorFiles.tar.gz) 135 | - [Pascal VOC 2012 validation result images](http://www.cs.berkeley.edu/~pathak/ccnn/voc_2012_val_results.tar.gz) 136 | - [Pascal VOC 2012 test result images](http://www.cs.berkeley.edu/~pathak/ccnn/voc_2012_test_results.tar.gz) 137 | -------------------------------------------------------------------------------- /cmake/FindEigen3.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Eigen3 lib 2 | # 3 | # This module supports requiring a minimum version, e.g. you can do 4 | # find_package(Eigen3 3.1.2) 5 | # to require version 3.1.2 or newer of Eigen3. 6 | # 7 | # Once done this will define 8 | # 9 | # EIGEN3_FOUND - system has eigen lib with correct version 10 | # EIGEN3_INCLUDE_DIR - the eigen include directory 11 | # EIGEN3_VERSION - eigen version 12 | 13 | # Copyright (c) 2006, 2007 Montel Laurent, 14 | # Copyright (c) 2008, 2009 Gael Guennebaud, 15 | # Copyright (c) 2009 Benoit Jacob 16 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license. 17 | 18 | if(NOT Eigen3_FIND_VERSION) 19 | if(NOT Eigen3_FIND_VERSION_MAJOR) 20 | set(Eigen3_FIND_VERSION_MAJOR 2) 21 | endif(NOT Eigen3_FIND_VERSION_MAJOR) 22 | if(NOT Eigen3_FIND_VERSION_MINOR) 23 | set(Eigen3_FIND_VERSION_MINOR 91) 24 | endif(NOT Eigen3_FIND_VERSION_MINOR) 25 | if(NOT Eigen3_FIND_VERSION_PATCH) 26 | set(Eigen3_FIND_VERSION_PATCH 0) 27 | endif(NOT Eigen3_FIND_VERSION_PATCH) 28 | 29 | set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") 30 | endif(NOT Eigen3_FIND_VERSION) 31 | 32 | macro(_eigen3_check_version) 33 | file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) 34 | 35 | string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") 36 | set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") 37 | string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") 38 | set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") 39 | string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") 40 | set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") 41 | 42 | set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) 43 | if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 44 | set(EIGEN3_VERSION_OK FALSE) 45 | else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 46 | set(EIGEN3_VERSION_OK TRUE) 47 | endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 48 | 49 | if(NOT EIGEN3_VERSION_OK) 50 | 51 | message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " 52 | "but at least version ${Eigen3_FIND_VERSION} is required") 53 | endif(NOT EIGEN3_VERSION_OK) 54 | endmacro(_eigen3_check_version) 55 | 56 | if (EIGEN3_INCLUDE_DIR) 57 | 58 | # in cache already 59 | _eigen3_check_version() 60 | set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) 61 | 62 | else (EIGEN3_INCLUDE_DIR) 63 | 64 | find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library 65 | PATHS 66 | ${CMAKE_INSTALL_PREFIX}/include 67 | ${KDE4_INCLUDE_DIR} 68 | PATH_SUFFIXES eigen3 eigen 69 | ) 70 | 71 | if(EIGEN3_INCLUDE_DIR) 72 | _eigen3_check_version() 73 | endif(EIGEN3_INCLUDE_DIR) 74 | 75 | include(FindPackageHandleStandardArgs) 76 | find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) 77 | 78 | mark_as_advanced(EIGEN3_INCLUDE_DIR) 79 | 80 | endif(EIGEN3_INCLUDE_DIR) 81 | 82 | -------------------------------------------------------------------------------- /cmake/FindNumPy.cmake: -------------------------------------------------------------------------------- 1 | # - Find the NumPy libraries 2 | # This module finds if NumPy is installed, and sets the following variables 3 | # indicating where it is. 4 | # 5 | # TODO: Update to provide the libraries and paths for linking npymath lib. 6 | # 7 | # NUMPY_FOUND - was NumPy found 8 | # NUMPY_VERSION - the version of NumPy found as a string 9 | # NUMPY_VERSION_MAJOR - the major version number of NumPy 10 | # NUMPY_VERSION_MINOR - the minor version number of NumPy 11 | # NUMPY_VERSION_PATCH - the patch version number of NumPy 12 | # NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 13 | # NUMPY_INCLUDE_DIRS - path to the NumPy include files 14 | 15 | #============================================================================ 16 | # Copyright 2012 Continuum Analytics, Inc. 17 | # 18 | # MIT License 19 | # 20 | # Permission is hereby granted, free of charge, to any person obtaining 21 | # a copy of this software and associated documentation files 22 | # (the "Software"), to deal in the Software without restriction, including 23 | # without limitation the rights to use, copy, modify, merge, publish, 24 | # distribute, sublicense, and/or sell copies of the Software, and to permit 25 | # persons to whom the Software is furnished to do so, subject to 26 | # the following conditions: 27 | # 28 | # The above copyright notice and this permission notice shall be included 29 | # in all copies or substantial portions of the Software. 30 | # 31 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 32 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 34 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 35 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 36 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 37 | # OTHER DEALINGS IN THE SOFTWARE. 38 | # 39 | #============================================================================ 40 | 41 | # Finding NumPy involves calling the Python interpreter 42 | if(NumPy_FIND_REQUIRED) 43 | find_package(PythonInterp REQUIRED) 44 | else() 45 | find_package(PythonInterp) 46 | endif() 47 | 48 | if(NOT PYTHONINTERP_FOUND) 49 | set(NUMPY_FOUND FALSE) 50 | endif() 51 | 52 | execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" 53 | "import numpy as n; print(n.__version__); print(n.get_include());" 54 | RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS 55 | OUTPUT_VARIABLE _NUMPY_VALUES 56 | ERROR_VARIABLE _NUMPY_ERROR_VALUE 57 | OUTPUT_STRIP_TRAILING_WHITESPACE) 58 | 59 | if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0) 60 | if(NumPy_FIND_REQUIRED) 61 | message(FATAL_ERROR 62 | "NumPy import failure:\n${_NUMPY_ERROR_VALUE}") 63 | endif() 64 | set(NUMPY_FOUND FALSE) 65 | endif() 66 | 67 | # Convert the process output into a list 68 | string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES}) 69 | string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES}) 70 | list(GET _NUMPY_VALUES 0 NUMPY_VERSION) 71 | list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS) 72 | 73 | # Make sure all directory separators are '/' 74 | string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS}) 75 | 76 | # Get the major and minor version numbers 77 | string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION}) 78 | list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR) 79 | list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR) 80 | list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH) 81 | string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH}) 82 | math(EXPR NUMPY_VERSION_DECIMAL 83 | "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") 84 | 85 | find_package_message(NUMPY 86 | "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}" 87 | "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}") 88 | 89 | set(NUMPY_FOUND TRUE) 90 | 91 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package( Eigen3 3.2.0 REQUIRED ) 2 | set(python_version "2" CACHE STRING "Specify which python version to use") 3 | MESSAGE(${python_version}) 4 | if(python_version VERSION_LESS 3.0.0) 5 | find_package(PythonInterp 2.7 REQUIRED) 6 | find_package(PythonLibs 2.7 REQUIRED) 7 | find_package(NumPy REQUIRED) 8 | find_package(Boost COMPONENTS python REQUIRED) 9 | else() 10 | find_package(PythonInterp 3.3 REQUIRED) 11 | find_package(PythonLibs 3.3 REQUIRED) 12 | find_package(NumPy REQUIRED) 13 | find_package(Boost COMPONENTS python-py34) 14 | if(NOT Boost_FOUND) 15 | find_package(Boost COMPONENTS python-py33) 16 | endif() 17 | if(NOT Boost_FOUND) 18 | find_package(Boost COMPONENTS python3 REQUIRED) 19 | endif() 20 | endif() 21 | find_package(OpenMP) 22 | 23 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++11 -Wall ${OpenMP_CXX_FLAGS}" ) # set global flags 24 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) 25 | 26 | add_subdirectory( constraintloss ) 27 | add_subdirectory( optimization ) 28 | add_subdirectory( python ) 29 | add_subdirectory( util ) 30 | -------------------------------------------------------------------------------- /lib/constraintloss/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( constraintloss constraintsoftmax.cpp ) 2 | target_link_libraries( constraintloss util optimization ) -------------------------------------------------------------------------------- /lib/constraintloss/constraintsoftmax.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #include "constraintsoftmax.h" 8 | #include "optimization/fista.h" 9 | #include "util/eigen.h" 10 | #include 11 | 12 | LinearConstraint::LinearConstraint( const VectorXf & a, float b, float slack ):a(a),b(b),slack(slack) { 13 | } 14 | float LinearConstraint::eval( const RMatrixXf & x ) const { 15 | // Return \sum_i a*x_i - b 16 | return (x*a).array().sum()-b; 17 | } 18 | 19 | // Performed across columns i.e. across channels 20 | static RMatrixXf expAndNormalize( const RMatrixXf & m ) { 21 | VectorXf mx = m.rowwise().maxCoeff(); 22 | RMatrixXf r = (m.colwise()-mx).array().exp(); 23 | return r.array().colwise() / r.array().rowwise().sum(); 24 | } 25 | static VectorXf logSumExp( const RMatrixXf & m ) { 26 | VectorXf mx = m.rowwise().maxCoeff(); 27 | return mx.array() + (m.colwise()-mx).array().exp().rowwise().sum().log(); 28 | } 29 | 30 | // scale_ : determines the hardness of optimization. In hard case, entropy term in KL divergence is zero. 31 | // Alternate way to implement this is to scale the second cross entropy term in KL divergence by 1000 times ! 32 | ConstraintSoftmax::ConstraintSoftmax( float scale ):scale_(scale) { 33 | } 34 | 35 | void ConstraintSoftmax::addLinearConstraint( const VectorXf & a, float b, float slack ) { 36 | linear_constraints_.push_back( LinearConstraint(a, b, slack) ); 37 | } 38 | void ConstraintSoftmax::addZeroConstraint( const VectorXf & a ) { 39 | eassert( (a.array() >= 0).all() ); 40 | if( zero_constraints_.size() ) 41 | zero_constraints_.array() = zero_constraints_.array() || (a.array() > 0); 42 | else 43 | zero_constraints_ = a.array() > 0; 44 | } 45 | RMatrixXf ConstraintSoftmax::compute( const RMatrixXf & f ) const { 46 | return expAndNormalize( scale_*computeLog( f ) ); 47 | } 48 | RMatrixXf ConstraintSoftmax::computeLog( const RMatrixXf & f ) const { 49 | const int N = f.rows(), M = f.cols(); 50 | // Special handling for zero constraints, let's remove all dimensions 51 | // that are constraint to 0 52 | int pM = M; 53 | RMatrixXf pf = f, P; 54 | std::vector pc = linear_constraints_; 55 | 56 | // Project onto the zero constraints 57 | if( zero_constraints_.size() ) { 58 | pM = (zero_constraints_.array()==0).cast().sum(); 59 | if( pM <= 1 ) { 60 | RMatrixXf r = 1*f; 61 | for( int i=0; i0 ) 63 | r.col(i).setConstant(-1e10); 64 | return r; 65 | } 66 | // Build the projection matrix 67 | P = RMatrixXf::Zero(M,pM); 68 | for( int i=0,k=0; i= b (with slack : Ap >= b - slack) 78 | // Then our objective is D(p||q) = \sum p log p - \sum p log q + l' (b - Ap) 79 | // = - H_p - \sum p pf - l' A p + lb 80 | // d/dp D(p||q) = log p + 1 + c - pf - A' l = 0 81 | // p = 1/Z exp(fp + A'l) 82 | // where l >= 0 83 | // The objective then simplifies to 84 | // D(p||q) = \sum p (fp + A'l) - log Z - \sum p pf + l' (b - Ap) 85 | // = -log Z + l' b 86 | 87 | RMatrixXf A(pc.size(),pM); 88 | VectorXf b(pc.size()), slack(pc.size()); 89 | for( int i=0; i<(int)pc.size(); i++ ) { 90 | A.row(i) = pc[i].a.transpose() / N; // Normalize by spatial_dim (no change theoretically, for implementation stability) 91 | b[i] = pc[i].b; 92 | slack[i] = pc[i].slack * N; // Scale regularizer of slack according to spatial_dim 93 | } 94 | 95 | // Solve for the soft assignment to the laten variables 96 | // This function returns 97 | // g : Gradient vector for dual variables. Returned as function argument. 98 | // return : objective value for dual optimization (which is to be minimized) 99 | auto fun = [&](const VectorXf & l, VectorXf * g) -> double { 100 | if( g ) { 101 | RMatrixXf p = expAndNormalize( scale_*(pf.rowwise() + l.transpose()*A) ).colwise().sum(); 102 | *g = A*(p.colwise().sum()).transpose() - b; 103 | } 104 | return 1.0/scale_*logSumExp( scale_*(pf.rowwise() + l.transpose()*A) ).sum() - l.dot(b); 105 | }; 106 | auto proj = [&](const VectorXf & x)->VectorXf { 107 | // if (x.array().maxCoeff() > 0) { 108 | // std::cout<<"\nActive Dual before slack : "<0 ) 129 | r.col(i).setConstant(-1e10); 130 | } 131 | return r; 132 | } 133 | 134 | -------------------------------------------------------------------------------- /lib/constraintloss/constraintsoftmax.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #pragma once 8 | #include "util/eigen.h" 9 | 10 | struct LinearConstraint { 11 | LinearConstraint( const VectorXf & a, float b, float slack=1e10 ); 12 | // A constraint \sum_i a*x_i >= b - slack 13 | VectorXf a; 14 | float b,slack; 15 | float eval( const RMatrixXf & x ) const; 16 | }; 17 | 18 | class ConstraintSoftmax { 19 | protected: 20 | float scale_; 21 | std::vector linear_constraints_; 22 | VectorXb zero_constraints_; 23 | public: 24 | ConstraintSoftmax( float scale=1.0 ); 25 | // A constraint \sum_i a*x_i >= b 26 | void addLinearConstraint( const VectorXf & a, float b, float slack=1e10 ); 27 | // A constraint \sum_i a*x_i == 0 where a >= 0 28 | void addZeroConstraint( const VectorXf & a ); 29 | RMatrixXf compute( const RMatrixXf & f ) const; 30 | RMatrixXf computeLog( const RMatrixXf & f ) const; 31 | }; 32 | -------------------------------------------------------------------------------- /lib/optimization/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( optimization fista.cpp ) 2 | target_link_libraries( optimization util ) 3 | -------------------------------------------------------------------------------- /lib/optimization/fista.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #include "fista.h" 8 | #include 9 | 10 | VectorXf identity(const VectorXf & x ) { return x; } 11 | 12 | VectorXf fista( VectorXf x0, function_t f, projection_t p, bool verbose ) { 13 | const int N_ITER = 3000; 14 | const float beta = 0.5; 15 | float alpha = 1e-1; 16 | 17 | VectorXf r = x0; 18 | float best_e = 1e10; 19 | VectorXf x1 = x0, g = 0*x0; 20 | for( int k=1; k<=N_ITER && alpha>1e-5; k++ ) { 21 | // Strictly speaking this is not "legal" FISTA, but it seems to work well in practice 22 | alpha *= 1.05; 23 | 24 | // Compute y 25 | VectorXf y = x1 + (k-2.) / (k+1.)*(x1 - x0); 26 | // Evaluate the gradient at y 27 | float fy = f(y,&g), fx = 1e10; 28 | // Update the old x 29 | x0 = x1; 30 | // Update x 31 | x1 = p( y - alpha*g ); 32 | while( alpha >= 1e-5 && (fx=f(x1,NULL)) > fy + g.dot(x1-y)+1./(2.*alpha)*(x1-y).dot(x1-y) ) { 33 | alpha *= beta; 34 | x1 = p( y - alpha*g ); 35 | } 36 | if ( fx < best_e ) { 37 | best_e = fx; 38 | r = x0; 39 | } 40 | if (verbose){ 41 | printf("it = %d df = %f alpha = %f\n", k, (x0-x1).array().abs().maxCoeff(), alpha ); 42 | std::cout<1e-8; k++ ) { 66 | VectorXf ng; 67 | float fx = f(p(x0-alpha*g),&ng); 68 | if( fx < prev_fx ) { 69 | x0 = p(x0-alpha*g); 70 | g = ng; 71 | prev_fx = fx; 72 | alpha *= 1.1; 73 | } 74 | else 75 | alpha *= beta; 76 | } 77 | 78 | // Debugging 79 | // if (k>N_ITER){ 80 | // std::cout<<"PGD didn't converge\n"; 81 | // std::cout<<"K="< 10 | 11 | typedef std::function function_t; 12 | typedef std::function projection_t; 13 | 14 | VectorXf identity(const VectorXf & x ); 15 | VectorXf fista( VectorXf x0, function_t f, projection_t p = identity, bool verbose=false ); 16 | VectorXf pgd( VectorXf x0, function_t f, projection_t p = identity, bool verbose=false, bool * converged=NULL ); 17 | -------------------------------------------------------------------------------- /lib/python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories( ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) 2 | link_directories( ${Boost_LIBRARY_DIR} ) 3 | file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/__init__.py" "" ) 4 | 5 | add_library( ccnn SHARED boost.cpp ccnn.cpp util.cpp constraintloss.cpp ) 6 | target_link_libraries( ccnn ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} constraintloss util) 7 | 8 | set_target_properties( ccnn PROPERTIES PREFIX "") 9 | if(APPLE) 10 | set_target_properties( ccnn PROPERTIES SUFFIX ".so" ) 11 | endif() 12 | -------------------------------------------------------------------------------- /lib/python/boost.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014, Philipp Krähenbühl 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of the Stanford University nor the 13 | names of its contributors may be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 17 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 20 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | #include "boost.h" 28 | -------------------------------------------------------------------------------- /lib/python/boost.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014, Philipp Krähenbühl 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of the Stanford University nor the 13 | names of its contributors may be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 17 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 20 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | #pragma once 28 | #include 29 | #include 30 | #include 31 | using namespace boost::python; 32 | -------------------------------------------------------------------------------- /lib/python/ccnn.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #include "util.h" 8 | #include "constraintloss.h" 9 | #include "ccnn.h" 10 | 11 | BOOST_PYTHON_MODULE(ccnn) 12 | { 13 | import_array1(); 14 | 15 | defineUtil(); 16 | defineConstraintloss(); 17 | } 18 | -------------------------------------------------------------------------------- /lib/python/ccnn.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #pragma once 8 | #include "boost.h" 9 | #include 10 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 11 | #include 12 | 13 | #define ADD_MODULE( name ) object name ## Module(handle<>(borrowed(PyImport_AddModule(((std::string)"ccnn."+# name).c_str()))));\ 14 | scope().attr(# name) = name ## Module;\ 15 | scope name ## _scope = name ## Module; 16 | -------------------------------------------------------------------------------- /lib/python/constraintloss.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #include "constraintloss.h" 8 | #include "ccnn.h" 9 | #include "constraintloss/constraintsoftmax.h" 10 | 11 | BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(ConstraintSoftmax_addLinearConstraint_o, ConstraintSoftmax::addLinearConstraint, 2, 3 ); 12 | 13 | void defineConstraintloss() { 14 | ADD_MODULE( constraintloss ); 15 | 16 | class_("ConstraintSoftmax",init<>()) 17 | .def(init()) 18 | .def( "addLinearConstraint", &ConstraintSoftmax::addLinearConstraint, ConstraintSoftmax_addLinearConstraint_o() ) 19 | .def( "addZeroConstraint", &ConstraintSoftmax::addZeroConstraint ) 20 | .def( "compute", &ConstraintSoftmax::compute ) 21 | .def( "computeLog", &ConstraintSoftmax::computeLog ); 22 | } 23 | -------------------------------------------------------------------------------- /lib/python/constraintloss.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // Copyright (c) 2015 [See LICENSE file for details] 4 | // Written by Deepak Pathak, Philipp Krähenbühl 5 | // -------------------------------------------------------- 6 | 7 | #pragma once 8 | 9 | void defineConstraintloss(); 10 | -------------------------------------------------------------------------------- /lib/python/util.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl 4 | // -------------------------------------------------------- 5 | 6 | /* 7 | Copyright (c) 2014, Philipp Krähenbühl 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are met: 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | * Neither the name of the Stanford University nor the 18 | names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "util.h" 33 | #include "ccnn.h" 34 | #include "util/eigen.h" 35 | 36 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 37 | #include 38 | #include 39 | 40 | // For numpy 1.6 define NPY_ARRAY_* 41 | #if NPY_API_VERSION < 0x00000007 42 | #define NPY_ARRAY_C_CONTIGUOUS NPY_C_CONTIGUOUS 43 | #define NPY_ARRAY_ALIGNED NPY_ALIGNED 44 | #endif 45 | 46 | template 47 | struct NumpyEquivalentType {}; 48 | 49 | template <> struct NumpyEquivalentType {enum { type_code = NPY_DOUBLE };}; 50 | template <> struct NumpyEquivalentType {enum { type_code = NPY_FLOAT };}; 51 | template <> struct NumpyEquivalentType {enum { type_code = NPY_INT64 };}; 52 | template <> struct NumpyEquivalentType {enum { type_code = NPY_UINT64 };}; 53 | template <> struct NumpyEquivalentType {enum { type_code = NPY_INT32 };}; 54 | template <> struct NumpyEquivalentType {enum { type_code = NPY_UINT32 };}; 55 | template <> struct NumpyEquivalentType {enum { type_code = NPY_INT16 };}; 56 | template <> struct NumpyEquivalentType {enum { type_code = NPY_UINT16 };}; 57 | template <> struct NumpyEquivalentType {enum { type_code = NPY_INT8 };}; 58 | template <> struct NumpyEquivalentType {enum { type_code = NPY_UINT8 };}; 59 | template <> struct NumpyEquivalentType {enum { type_code = NPY_BOOL };}; 60 | 61 | template< typename T > 62 | void copyMat( T * dst, const T* src, int cols, int rows, bool transpose ) { 63 | if( !transpose ) 64 | memcpy( dst, src, cols*rows*sizeof(T) ); 65 | else { 66 | for( int j=0; j 79 | struct EigenMatrixToPython { 80 | static PyObject* convert(const MatType& mat) { 81 | typedef typename MatType::Scalar T; 82 | PyArrayObject* python_array; 83 | if( MatType::ColsAtCompileTime==1 || MatType::RowsAtCompileTime==1 ) { 84 | npy_intp shape[1] = { mat.rows()*mat.cols() }; 85 | python_array = PyArrayObject_New(1, shape, NumpyEquivalentType::type_code); 86 | } 87 | else { 88 | npy_intp shape[2] = { mat.rows(), mat.cols() }; 89 | python_array = PyArrayObject_New(2, shape, NumpyEquivalentType::type_code); 90 | } 91 | copyMat( (T*)PyArray_DATA(python_array), mat.data(), mat.rows(), mat.cols(), !(MatType::Flags & RowMajor) ); 92 | return (PyObject*)python_array; 93 | } 94 | }; 95 | 96 | template 97 | struct EigenMatrixFromPython { 98 | typedef typename MatType::Scalar T; 99 | EigenMatrixFromPython() { 100 | converter::registry::push_back(&convertible, &construct, type_id()); 101 | } 102 | static void* convertible(PyObject* obj_ptr) { 103 | const int R = MatType::RowsAtCompileTime; 104 | const int C = MatType::ColsAtCompileTime; 105 | PyArrayObject *array = reinterpret_cast(obj_ptr); 106 | if (!PyArray_Check(obj_ptr) || PyArray_NDIM(array) > 2 || PyArray_NDIM(array) <= 0 || PyArray_TYPE(array) != NumpyEquivalentType::type_code) 107 | return 0; 108 | if( R==1 || C==1 ) { // Eigen Vector 109 | if ( PyArray_NDIM(array)==2 && PyArray_DIMS(array)[0]>1 && PyArray_DIMS(array)[1]>1 ) 110 | return 0; 111 | if ( PyArray_NDIM(array)==1 && R*C > 0 && R*C != PyArray_DIMS(array)[0] ) 112 | return 0; 113 | } 114 | else if ( R > 1 && PyArray_DIMS(array)[0] != R ) 115 | return 0; 116 | else if ( C > 1 && PyArray_NDIM(array)<2 && PyArray_DIMS(array)[1] != C ) 117 | return 0; 118 | return obj_ptr; 119 | } 120 | static void construct(PyObject* obj_ptr, converter::rvalue_from_python_stage1_data* data) { 121 | const int R = MatType::RowsAtCompileTime; 122 | const int C = MatType::ColsAtCompileTime; 123 | 124 | PyArrayObject *array = reinterpret_cast(obj_ptr); 125 | int flags = PyArray_FLAGS(array); 126 | if (!(flags & NPY_ARRAY_C_CONTIGUOUS) || !(flags & NPY_ARRAY_ALIGNED)) 127 | throw std::invalid_argument("Contiguous and aligned array required!"); 128 | const int ndims = PyArray_NDIM(array); 129 | 130 | const int dtype_size = (PyArray_DESCR(array))->elsize; 131 | const int s1 = PyArray_STRIDE(array, 0), s2 = ndims > 1 ? PyArray_STRIDE(array, 1) : 0; 132 | 133 | int nrows=1, ncols=1; 134 | if( R==1 || C==1 ) { // Vector 135 | nrows = R==1 ? 1 : PyArray_SIZE2(array); 136 | ncols = C==1 ? 1 : PyArray_SIZE2(array); 137 | } 138 | else { 139 | nrows = (R == Dynamic) ? PyArray_DIMS(array)[0] : R; 140 | if ( ndims > 1 ) 141 | ncols = (R == Dynamic) ? PyArray_DIMS(array)[1] : R; 142 | } 143 | T* raw_data = reinterpret_cast(PyArray_DATA(array)); 144 | 145 | typedef Map< Matrix,Aligned,Stride > MapType; 146 | 147 | void* storage=((converter::rvalue_from_python_storage*)(data))->storage.bytes; 148 | new (storage) MatType; 149 | MatType* emat = (MatType*)storage; 150 | *emat = MapType(raw_data, nrows, ncols,Stride(s1/dtype_size, s2/dtype_size)); 151 | data->convertible = storage; 152 | } 153 | }; 154 | 155 | #define EIGEN_MATRIX_CONVERTER(Type) EigenMatrixFromPython(); to_python_converter >(); 156 | // #define EIGEN_MATRIX_CONVERTER(Type) EigenMatrixFromPython(); to_python_converter >(); to_python_converter >(); 157 | 158 | #define MAT_CONV( N )\ 159 | EIGEN_MATRIX_CONVERTER( N ## d );\ 160 | EIGEN_MATRIX_CONVERTER( N ## f );\ 161 | EIGEN_MATRIX_CONVERTER( N ## i );\ 162 | EIGEN_MATRIX_CONVERTER( N ## u );\ 163 | EIGEN_MATRIX_CONVERTER( N ## s );\ 164 | EIGEN_MATRIX_CONVERTER( N ## us );\ 165 | EIGEN_MATRIX_CONVERTER( N ## i8 );\ 166 | EIGEN_MATRIX_CONVERTER( N ## u8 );\ 167 | EIGEN_MATRIX_CONVERTER( N ## b ) 168 | 169 | #define EIGEN_MATRIX_VECTOR(Type) class_< std::vector >((std::string("Vec")+std::string(#Type)).c_str()).def( vector_indexing_suite< std::vector, true >() ).def( VectorInitSuite< std::vector >() ); 170 | #define MAT_VEC( N )\ 171 | EIGEN_MATRIX_VECTOR( N ## d );\ 172 | EIGEN_MATRIX_VECTOR( N ## f );\ 173 | EIGEN_MATRIX_VECTOR( N ## i );\ 174 | EIGEN_MATRIX_VECTOR( N ## u );\ 175 | EIGEN_MATRIX_VECTOR( N ## s );\ 176 | EIGEN_MATRIX_VECTOR( N ## us );\ 177 | EIGEN_MATRIX_VECTOR( N ## i8 );\ 178 | EIGEN_MATRIX_VECTOR( N ## u8 );\ 179 | EIGEN_MATRIX_VECTOR( N ## b ) 180 | 181 | // Exceptions 182 | void translateAssertException(const AssertException& e) { 183 | PyErr_SetString(PyExc_AssertionError, e.what()); 184 | } 185 | 186 | #if PY_MAJOR_VERSION >= 3 187 | int init_numpy() { import_array(); return 0; } 188 | #else 189 | void init_numpy() { import_array(); return; } 190 | #endif 191 | 192 | void defineUtil() { 193 | // NOTE: This file has a ton of macros and templates, so it's going to take a while to compile ... 194 | init_numpy(); 195 | boost::python::numeric::array::set_module_and_type("numpy", "ndarray"); 196 | 197 | register_exception_translator(&translateAssertException); 198 | 199 | ADD_MODULE(util); 200 | 201 | // NOTE: When overloading functions always make sure to put the array/matrix function before the vector one 202 | MAT_CONV( MatrixX ); 203 | MAT_CONV( RMatrixX ); 204 | MAT_CONV( VectorX ); 205 | MAT_CONV( ArrayXX ); 206 | MAT_CONV( RArrayXX ); 207 | MAT_CONV( ArrayX ); 208 | 209 | // Define some std::vectors 210 | MAT_VEC( RMatrixX ); 211 | MAT_VEC( VectorX ); 212 | 213 | // Datastructures 214 | class_< std::vector >("VecInt").def( vector_indexing_suite< std::vector >() ).def( VectorInitSuite< std::vector >() ); 215 | class_< std::vector >("VecFloat").def( vector_indexing_suite< std::vector >() ).def( VectorInitSuite< std::vector >() ); 216 | } 217 | -------------------------------------------------------------------------------- /lib/python/util.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl 4 | // -------------------------------------------------------- 5 | 6 | /* 7 | Copyright (c) 2014, Philipp Krähenbühl 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are met: 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | * Neither the name of the Stanford University nor the 18 | names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #pragma once 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | using namespace boost::python; 39 | 40 | // Make older boost versions happy 41 | #if BOOST_VERSION < 105300 42 | template T* get_pointer(const std::shared_ptr& p) { return p.get(); } 43 | #endif 44 | 45 | template 46 | struct SaveLoad_pickle_suite : pickle_suite { 47 | static object getstate(const OBJ& obj) { 48 | std::stringstream ss; 49 | obj.save( ss ); 50 | std::string data = ss.str(); 51 | return object( handle<>( PyBytes_FromStringAndSize( data.data(), data.size() ) ) ); 52 | } 53 | 54 | static void setstate(OBJ& obj, const object & state) { 55 | if(!PyBytes_Check(state.ptr())) 56 | throw std::invalid_argument("Failed to unpickle, unexpected type!"); 57 | std::stringstream ss( std::string( PyBytes_AS_STRING(state.ptr()), PyBytes_Size(state.ptr()) ) ); 58 | obj.load( ss ); 59 | } 60 | }; 61 | 62 | template 63 | struct SaveLoad_pickle_suite_shared_ptr : pickle_suite { 64 | static object getstate(const std::shared_ptr& obj) { 65 | std::stringstream ss; 66 | obj->save( ss ); 67 | std::string data = ss.str(); 68 | return object( handle<>( PyBytes_FromStringAndSize( data.data(), data.size() ) ) ); 69 | } 70 | 71 | static void setstate(std::shared_ptr obj, const object & state) { 72 | if(!PyBytes_Check(state.ptr())) 73 | throw std::invalid_argument("Failed to unpickle, unexpected type!"); 74 | std::stringstream ss( std::string( PyBytes_AS_STRING(state.ptr()), PyBytes_Size(state.ptr()) ) ); 75 | obj->load( ss ); 76 | } 77 | }; 78 | 79 | template 80 | struct VectorSaveLoad_pickle_suite_shared_ptr : pickle_suite { 81 | static object getstate(const std::vector< std::shared_ptr > & obj) { 82 | std::stringstream ss; 83 | const int nobj = obj.size(); 84 | ss.write( (const char*)&nobj, sizeof(nobj) ); 85 | for( int i=0; isave( ss ); 87 | std::string data = ss.str(); 88 | return object( handle<>( PyBytes_FromStringAndSize( data.data(), data.size() ) ) ); 89 | } 90 | 91 | static void setstate(std::vector< std::shared_ptr > & obj, const object & state) { 92 | if(!PyBytes_Check(state.ptr())) 93 | throw std::invalid_argument("Failed to unpickle, unexpected type!"); 94 | std::stringstream ss( std::string( PyBytes_AS_STRING(state.ptr()), PyBytes_Size(state.ptr()) ) ); 95 | int nobj = 0; 96 | ss.read( (char*)&nobj, sizeof(nobj) ); 97 | obj.resize( nobj ); 98 | for( int i=0; i(); 100 | obj[i]->load( ss ); 101 | } 102 | } 103 | }; 104 | 105 | template 106 | struct VectorInitSuite: public def_visitor< VectorInitSuite > { 107 | typedef typename T::value_type D; 108 | 109 | static T * init_list( const list & l ) { 110 | T * r = new T; 111 | const int N = len(l); 112 | for ( int i=0; i(l[i]) ); 114 | return r; 115 | } 116 | // template static C * init_list( const list & l ) { 117 | // C * r = new C; 118 | // const int N = len(l); 119 | // for ( int i=0; i(l[i]) ); 121 | // return r; 122 | // } 123 | template 124 | void visit(C& cl) const 125 | { 126 | cl 127 | .def("__init__", make_constructor(&VectorInitSuite::init_list)); 128 | // .def("__init__", make_constructor(&init_generator)); 129 | } 130 | }; 131 | 132 | template 133 | std::vector to_vector( const list & l ) { 134 | std::vector r; 135 | for( int i=0; i(l[i]) ); 137 | return r; 138 | } 139 | 140 | void defineUtil(); 141 | 142 | class ScopedGILRelease 143 | { 144 | public: 145 | inline ScopedGILRelease() { 146 | m_thread_state = PyEval_SaveThread(); 147 | } 148 | inline ~ScopedGILRelease() { 149 | PyEval_RestoreThread(m_thread_state); 150 | m_thread_state = NULL; 151 | } 152 | private: 153 | PyThreadState * m_thread_state; 154 | ScopedGILRelease( const ScopedGILRelease & o ) { } 155 | }; 156 | -------------------------------------------------------------------------------- /lib/util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( util eigen.cpp) 2 | target_link_libraries( util ) 3 | -------------------------------------------------------------------------------- /lib/util/eigen.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl 4 | // -------------------------------------------------------- 5 | 6 | /* 7 | Copyright (c) 2014, Philipp Krähenbühl 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are met: 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | * Neither the name of the Stanford University nor the 18 | names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "eigen.h" 33 | 34 | VectorXi range( int end ) { 35 | return range( 0, end ); 36 | } 37 | VectorXi range( int start, int end ) { 38 | VectorXi r(end-start); 39 | for( int i=0; i 35 | #define EIGEN_DONT_PARALLELIZE 36 | #include 37 | #include 38 | using namespace Eigen; 39 | 40 | // Assert with exception handling 41 | class AssertException: public std::logic_error { 42 | public: 43 | AssertException( const std::string & assertion, const std::string & location ): logic_error("Assertion \""+assertion+"\" failed in "+location) {} 44 | }; 45 | #define eassert( x ) {if (!(x)) throw AssertException( _str(x), FILE_AND_LINE );} 46 | #define _xstr(s) _str(s) 47 | #define _str(s) #s 48 | #define LINE_STRING _xstr(__LINE__) 49 | #define FILE_AND_LINE ((std::string)__FILE__ + (std::string)":" + LINE_STRING) 50 | 51 | #define DEFINE_MAT( N )\ 52 | typedef N( N ## d );\ 53 | typedef N( N ## f );\ 54 | typedef N( N ## i );\ 55 | typedef N( N ## u );\ 56 | typedef N( N ## s );\ 57 | typedef N( N ## us );\ 58 | typedef N( N ## i8 );\ 59 | typedef N( N ## u8 );\ 60 | typedef N( N ## b ) 61 | 62 | #define DEFINE_MAT2( N )\ 63 | typedef N( N ## u );\ 64 | typedef N( N ## s );\ 65 | typedef N( N ## us );\ 66 | typedef N( N ## i8 );\ 67 | typedef N( N ## u8 );\ 68 | typedef N( N ## b ) 69 | 70 | template using RowVectorX = Matrix; 71 | template using RMatrixX = Matrix; 72 | template using SMatrixX = SparseMatrix; 73 | template using SRMatrixX = SparseMatrix; 74 | template using RArrayXX = Array; 75 | template using MatrixX = Matrix; 76 | template using ArrayXX = Array; 77 | template using VectorX = Matrix; 78 | template using ArrayX = Array; 79 | 80 | DEFINE_MAT( RMatrixX ); 81 | DEFINE_MAT( SMatrixX ); 82 | DEFINE_MAT( SRMatrixX ); 83 | DEFINE_MAT( RArrayXX ); 84 | DEFINE_MAT2( MatrixX ); 85 | DEFINE_MAT2( ArrayXX ); 86 | DEFINE_MAT2( VectorX ); 87 | DEFINE_MAT2( ArrayX ); 88 | 89 | namespace std{ 90 | template< typename T, int R, int C, int O, int RR, int CC > const T * begin( const Matrix & m ){ 91 | return m.data(); 92 | } 93 | template< typename T, int R, int C, int O, int RR, int CC > const T * end( const Matrix & m ){ 94 | return m.data()+m.size(); 95 | } 96 | template< typename T, int R, int C, int O, int RR, int CC > T * begin( Matrix & m ){ 97 | return m.data(); 98 | } 99 | template< typename T, int R, int C, int O, int RR, int CC > T * end( Matrix & m ){ 100 | return m.data()+m.size(); 101 | } 102 | } 103 | VectorXi range( int end ); 104 | VectorXi range( int start, int end ); 105 | 106 | template 107 | void saveMatrixX( std::ostream & s, const Matrix & m ) { 108 | int rc[2] = {(int)m.rows(),(int)m.cols()}; 109 | s.write( (char*)rc, sizeof(rc) ); 110 | s.write( (char*)m.data(), m.size()*sizeof(T) ); 111 | } 112 | template 113 | void loadMatrixX( std::istream & s, Matrix & m ) { 114 | int rc[2]; 115 | s.read( (char*)rc, sizeof(rc) ); 116 | m = Matrix(rc[0],rc[1]); 117 | s.read( (char*)m.data(), m.size()*sizeof(T) ); 118 | } 119 | -------------------------------------------------------------------------------- /lib/util/win_util.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl 4 | // -------------------------------------------------------- 5 | 6 | /* 7 | Copyright (c) 2014, Philipp Krähenbühl 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are met: 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | * Neither the name of the Stanford University nor the 18 | names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "win_util.h" 33 | -------------------------------------------------------------------------------- /lib/util/win_util.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // CCNN 3 | // 2015. Modified by Deepak Pathak, Philipp Krähenbühl 4 | // -------------------------------------------------------- 5 | 6 | /* 7 | Copyright (c) 2014, Philipp Krähenbühl 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are met: 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | * Neither the name of the Stanford University nor the 18 | names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 22 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 25 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #pragma once 33 | #ifdef _MSC_VER 34 | #define _USE_MATH_DEFINES 35 | 36 | #endif 37 | #include 38 | -------------------------------------------------------------------------------- /models/examples/gt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/gt1.png -------------------------------------------------------------------------------- /models/examples/gt2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/gt2.png -------------------------------------------------------------------------------- /models/examples/im1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im1.jpg -------------------------------------------------------------------------------- /models/examples/im2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im2.jpg -------------------------------------------------------------------------------- /models/examples/im3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im3.jpg -------------------------------------------------------------------------------- /models/examples/im4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/im4.jpg -------------------------------------------------------------------------------- /models/examples/out1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/out1.png -------------------------------------------------------------------------------- /models/examples/out2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathak22/ccnn/aa3d3b2b0c194640fc2b887dbff04d9a5c032392/models/examples/out2.png -------------------------------------------------------------------------------- /models/fcn_32s/deploy_32s.prototxt: -------------------------------------------------------------------------------- 1 | name : "FCN-32s" 2 | 3 | input: 'data' 4 | input_dim: 1 5 | input_dim: 3 6 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 7 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 8 | 9 | input: 'data-orig' 10 | input_dim: 1 11 | input_dim: 3 12 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 13 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 14 | 15 | # ========== Network architecture begin ================ 16 | 17 | layer : { 18 | name : "conv1_1" 19 | type : "Convolution" 20 | bottom : "data" 21 | top : "conv1_1" 22 | convolution_param : { 23 | engine: CAFFE 24 | num_output : 64 25 | pad : 100 26 | kernel_size : 3 27 | } 28 | param : { 29 | lr_mult : 1.0 30 | decay_mult : 1.0 31 | } 32 | param : { 33 | lr_mult : 2.0 34 | decay_mult : 0.0 35 | } 36 | } 37 | layer : { 38 | name : "relu1_1" 39 | type : "ReLU" 40 | bottom : "conv1_1" 41 | top : "conv1_1" 42 | } 43 | layer : { 44 | name : "conv1_2" 45 | type : "Convolution" 46 | bottom : "conv1_1" 47 | top : "conv1_2" 48 | convolution_param : { 49 | engine: CAFFE 50 | num_output : 64 51 | pad : 1 52 | kernel_size : 3 53 | } 54 | param : { 55 | lr_mult : 1.0 56 | decay_mult : 1.0 57 | } 58 | param : { 59 | lr_mult : 2.0 60 | decay_mult : 0.0 61 | } 62 | } 63 | layer : { 64 | name : "relu1_2" 65 | type : "ReLU" 66 | bottom : "conv1_2" 67 | top : "conv1_2" 68 | } 69 | layer : { 70 | name : "pool1" 71 | type : "Pooling" 72 | bottom : "conv1_2" 73 | top : "pool1" 74 | pooling_param : { 75 | pool : MAX 76 | kernel_size : 2 77 | stride : 2 78 | } 79 | } 80 | layer : { 81 | name : "conv2_1" 82 | type : "Convolution" 83 | bottom : "pool1" 84 | top : "conv2_1" 85 | convolution_param : { 86 | engine: CAFFE 87 | num_output : 128 88 | pad : 1 89 | kernel_size : 3 90 | } 91 | param : { 92 | lr_mult : 1.0 93 | decay_mult : 1.0 94 | } 95 | param : { 96 | lr_mult : 2.0 97 | decay_mult : 0.0 98 | } 99 | } 100 | layer : { 101 | name : "relu2_1" 102 | type : "ReLU" 103 | bottom : "conv2_1" 104 | top : "conv2_1" 105 | } 106 | layer : { 107 | name : "conv2_2" 108 | type : "Convolution" 109 | bottom : "conv2_1" 110 | top : "conv2_2" 111 | convolution_param : { 112 | engine: CAFFE 113 | num_output : 128 114 | pad : 1 115 | kernel_size : 3 116 | } 117 | param : { 118 | lr_mult : 1.0 119 | decay_mult : 1.0 120 | } 121 | param : { 122 | lr_mult : 2.0 123 | decay_mult : 0.0 124 | } 125 | } 126 | layer : { 127 | name : "relu2_2" 128 | type : "ReLU" 129 | bottom : "conv2_2" 130 | top : "conv2_2" 131 | } 132 | layer : { 133 | name : "pool2" 134 | type : "Pooling" 135 | bottom : "conv2_2" 136 | top : "pool2" 137 | pooling_param : { 138 | pool : MAX 139 | kernel_size : 2 140 | stride : 2 141 | } 142 | } 143 | layer : { 144 | name : "conv3_1" 145 | type : "Convolution" 146 | bottom : "pool2" 147 | top : "conv3_1" 148 | convolution_param : { 149 | engine: CAFFE 150 | num_output : 256 151 | pad : 1 152 | kernel_size : 3 153 | } 154 | param : { 155 | lr_mult : 1.0 156 | decay_mult : 1.0 157 | } 158 | param : { 159 | lr_mult : 2.0 160 | decay_mult : 0.0 161 | } 162 | } 163 | layer : { 164 | name : "relu3_1" 165 | type : "ReLU" 166 | bottom : "conv3_1" 167 | top : "conv3_1" 168 | } 169 | layer : { 170 | name : "conv3_2" 171 | type : "Convolution" 172 | bottom : "conv3_1" 173 | top : "conv3_2" 174 | convolution_param : { 175 | engine: CAFFE 176 | num_output : 256 177 | pad : 1 178 | kernel_size : 3 179 | } 180 | param : { 181 | lr_mult : 1.0 182 | decay_mult : 1.0 183 | } 184 | param : { 185 | lr_mult : 2.0 186 | decay_mult : 0.0 187 | } 188 | } 189 | layer : { 190 | name : "relu3_2" 191 | type : "ReLU" 192 | bottom : "conv3_2" 193 | top : "conv3_2" 194 | } 195 | layer : { 196 | name : "conv3_3" 197 | type : "Convolution" 198 | bottom : "conv3_2" 199 | top : "conv3_3" 200 | convolution_param : { 201 | engine: CAFFE 202 | num_output : 256 203 | pad : 1 204 | kernel_size : 3 205 | } 206 | param : { 207 | lr_mult : 1.0 208 | decay_mult : 1.0 209 | } 210 | param : { 211 | lr_mult : 2.0 212 | decay_mult : 0.0 213 | } 214 | } 215 | layer : { 216 | name : "relu3_3" 217 | type : "ReLU" 218 | bottom : "conv3_3" 219 | top : "conv3_3" 220 | } 221 | layer : { 222 | name : "pool3" 223 | type : "Pooling" 224 | bottom : "conv3_3" 225 | top : "pool3" 226 | pooling_param : { 227 | pool : MAX 228 | kernel_size : 2 229 | stride : 2 230 | } 231 | } 232 | layer : { 233 | name : "conv4_1" 234 | type : "Convolution" 235 | bottom : "pool3" 236 | top : "conv4_1" 237 | convolution_param : { 238 | engine: CAFFE 239 | num_output : 512 240 | pad : 1 241 | kernel_size : 3 242 | } 243 | param : { 244 | lr_mult : 1.0 245 | decay_mult : 1.0 246 | } 247 | param : { 248 | lr_mult : 2.0 249 | decay_mult : 0.0 250 | } 251 | } 252 | layer : { 253 | name : "relu4_1" 254 | type : "ReLU" 255 | bottom : "conv4_1" 256 | top : "conv4_1" 257 | } 258 | layer : { 259 | name : "conv4_2" 260 | type : "Convolution" 261 | bottom : "conv4_1" 262 | top : "conv4_2" 263 | convolution_param : { 264 | engine: CAFFE 265 | num_output : 512 266 | pad : 1 267 | kernel_size : 3 268 | } 269 | param : { 270 | lr_mult : 1.0 271 | decay_mult : 1.0 272 | } 273 | param : { 274 | lr_mult : 2.0 275 | decay_mult : 0.0 276 | } 277 | } 278 | layer : { 279 | name : "relu4_2" 280 | type : "ReLU" 281 | bottom : "conv4_2" 282 | top : "conv4_2" 283 | } 284 | layer : { 285 | name : "conv4_3" 286 | type : "Convolution" 287 | bottom : "conv4_2" 288 | top : "conv4_3" 289 | convolution_param : { 290 | engine: CAFFE 291 | num_output : 512 292 | pad : 1 293 | kernel_size : 3 294 | } 295 | param : { 296 | lr_mult : 1.0 297 | decay_mult : 1.0 298 | } 299 | param : { 300 | lr_mult : 2.0 301 | decay_mult : 0.0 302 | } 303 | } 304 | layer : { 305 | name : "relu4_3" 306 | type : "ReLU" 307 | bottom : "conv4_3" 308 | top : "conv4_3" 309 | } 310 | layer : { 311 | name : "pool4" 312 | type : "Pooling" 313 | bottom : "conv4_3" 314 | top : "pool4" 315 | pooling_param : { 316 | pool : MAX 317 | kernel_size : 2 318 | stride : 2 319 | } 320 | } 321 | layer : { 322 | name : "conv5_1" 323 | type : "Convolution" 324 | bottom : "pool4" 325 | top : "conv5_1" 326 | convolution_param : { 327 | engine: CAFFE 328 | num_output : 512 329 | pad : 1 330 | kernel_size : 3 331 | } 332 | param : { 333 | lr_mult : 1.0 334 | decay_mult : 1.0 335 | } 336 | param : { 337 | lr_mult : 2.0 338 | decay_mult : 0.0 339 | } 340 | } 341 | layer : { 342 | name : "relu5_1" 343 | type : "ReLU" 344 | bottom : "conv5_1" 345 | top : "conv5_1" 346 | } 347 | layer : { 348 | name : "conv5_2" 349 | type : "Convolution" 350 | bottom : "conv5_1" 351 | top : "conv5_2" 352 | convolution_param : { 353 | engine: CAFFE 354 | num_output : 512 355 | pad : 1 356 | kernel_size : 3 357 | } 358 | param : { 359 | lr_mult : 1.0 360 | decay_mult : 1.0 361 | } 362 | param : { 363 | lr_mult : 2.0 364 | decay_mult : 0.0 365 | } 366 | } 367 | layer : { 368 | name : "relu5_2" 369 | type : "ReLU" 370 | bottom : "conv5_2" 371 | top : "conv5_2" 372 | } 373 | layer : { 374 | name : "conv5_3" 375 | type : "Convolution" 376 | bottom : "conv5_2" 377 | top : "conv5_3" 378 | convolution_param : { 379 | engine: CAFFE 380 | num_output : 512 381 | pad : 1 382 | kernel_size : 3 383 | } 384 | param : { 385 | lr_mult : 1.0 386 | decay_mult : 1.0 387 | } 388 | param : { 389 | lr_mult : 2.0 390 | decay_mult : 0.0 391 | } 392 | } 393 | layer : { 394 | name : "relu5_3" 395 | type : "ReLU" 396 | bottom : "conv5_3" 397 | top : "conv5_3" 398 | } 399 | layer : { 400 | name : "pool5" 401 | type : "Pooling" 402 | bottom : "conv5_3" 403 | top : "pool5" 404 | pooling_param : { 405 | pool : MAX 406 | kernel_size : 2 407 | stride : 2 408 | } 409 | } 410 | layer : { 411 | name : "conv6" 412 | bottom : "pool5" 413 | top : "conv6" 414 | type: "Convolution" 415 | convolution_param { 416 | engine: CAFFE 417 | num_output : 4096 418 | kernel_size: 7 419 | weight_filler { 420 | type: "gaussian" 421 | std: 0.01 422 | } 423 | bias_filler { 424 | type: "constant" 425 | value: 0.1 426 | } 427 | } 428 | param : { 429 | lr_mult : 1.0 430 | decay_mult : 1.0 431 | } 432 | param : { 433 | lr_mult : 2.0 434 | decay_mult : 0.0 435 | } 436 | } 437 | layer : { 438 | name : "relu6" 439 | type : "ReLU" 440 | bottom : "conv6" 441 | top : "conv6" 442 | } 443 | layer { 444 | name: "drop6" 445 | type: "Dropout" 446 | bottom: "conv6" 447 | top: "conv6" 448 | dropout_param { 449 | dropout_ratio: 0.5 450 | } 451 | } 452 | layer : { 453 | name : "conv7" 454 | bottom : "conv6" 455 | top : "conv7" 456 | type: "Convolution" 457 | convolution_param { 458 | engine: CAFFE 459 | num_output : 4096 460 | kernel_size: 1 461 | weight_filler { 462 | type: "gaussian" 463 | std: 0.01 464 | } 465 | bias_filler { 466 | type: "constant" 467 | value: 0.1 468 | } 469 | } 470 | param : { 471 | lr_mult : 1.0 472 | decay_mult : 1.0 473 | } 474 | param : { 475 | lr_mult : 2.0 476 | decay_mult : 0.0 477 | } 478 | } 479 | layer : { 480 | name : "relu7" 481 | type : "ReLU" 482 | bottom : "conv7" 483 | top : "conv7" 484 | } 485 | layer { 486 | name: "drop7" 487 | type: "Dropout" 488 | bottom: "conv7" 489 | top: "conv7" 490 | dropout_param { 491 | dropout_ratio: 0.5 492 | } 493 | } 494 | layer { 495 | name: 'conv8' 496 | bottom: 'conv7' 497 | top: 'conv8' 498 | type: "Convolution" 499 | convolution_param { 500 | engine: CAFFE 501 | num_output: 21 502 | kernel_size: 1 503 | weight_filler { 504 | type: "gaussian" 505 | std: 0.01 506 | } 507 | } 508 | param : { 509 | lr_mult : 1.0 510 | decay_mult : 1.0 511 | } 512 | param : { 513 | lr_mult : 2.0 514 | decay_mult : 0.0 515 | } 516 | } 517 | 518 | # ========== Network architecture end ================== 519 | 520 | layer { type: 'Python' name: 'weak_loss' bottom: 'conv8' bottom: 'indicator' bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 521 | top: 'loss' 522 | python_param { module: 'python_layers' layer: 'WeakLoss' } 523 | include { phase: TRAIN } 524 | loss_weight: 1 } 525 | 526 | layer { 527 | name: 'upsample' 528 | bottom: 'conv8' 529 | top: 'score' 530 | type: "Deconvolution" 531 | convolution_param { 532 | engine: CAFFE 533 | group: 21 534 | num_output: 21 535 | kernel_size: 64 536 | stride: 32 537 | weight_filler: { type: "bilinear_upsampling" } 538 | } 539 | param : { 540 | lr_mult : 0 541 | decay_mult : 0 542 | } 543 | include { phase: TEST } 544 | } 545 | 546 | layer { name: 'crop' type: 'Crop' bottom: 'score' bottom: 'data' top: 'score_crop' 547 | include { phase: TEST } 548 | } 549 | 550 | # =============Dense CRF=========================== 551 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 552 | 553 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf' 554 | # Untuned Default Params 555 | # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6 556 | # x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 } 557 | # Tuned Deeplab Params 558 | densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15 559 | x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 } 560 | include { phase: TEST } } 561 | # =================================================== 562 | -------------------------------------------------------------------------------- /models/fcn_32s/solver_32s.prototxt: -------------------------------------------------------------------------------- 1 | # Make sure that Loss is NOT-NORMALIZED by number of pixels i.e. self.normalization=FALSE in src/python_layers.py 2 | base_lr: 1e-8 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 10000 6 | display: 100 7 | max_iter: 40000 8 | momentum: 0.99 9 | weight_decay: 0.0 #0.0000005 10 | -------------------------------------------------------------------------------- /models/fcn_32s/train_32s.prototxt: -------------------------------------------------------------------------------- 1 | name : "FCN-32s" 2 | 3 | # =========== Train Data Layers ========================= 4 | 5 | layer { type: 'Data' name: 'data' top: 'data' 6 | data_param { 7 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_train_lmdb' 8 | batch_size: 1 backend: LMDB } 9 | transform_param { mean_value: 104.00698793 mean_value: 116.66876762 10 | mean_value: 122.67891434 } 11 | include { phase: TRAIN } } 12 | 13 | layer { type: 'HDF5Data' name: 'gt' 14 | top: 'indicator' top: 'indicator_0.01' top: 'indicator_0.05' top: 'indicator_0.10' 15 | top: 'flag_1' top: 'flag_3' top: 'flag_5' top: 'flag_10' top: 'flag_50' top: 'flag_100' top: 'flag_200' 16 | hdf5_data_param { 17 | source: "/mnt/a/pathak/fcn_mil_cache/VOC2012/indicator_train.txt" 18 | batch_size: 1 } 19 | include { phase: TRAIN } } 20 | 21 | layer { type: 'Silence' 22 | bottom: 'indicator' bottom: 'indicator_0.01' bottom: 'indicator_0.05' bottom: 'indicator_0.10' 23 | bottom: 'flag_1' bottom: 'flag_3' bottom: 'flag_5' bottom: 'flag_10' bottom: 'flag_50' bottom: 'flag_100' bottom: 'flag_200' 24 | include { phase: TRAIN } } 25 | 26 | # =========== Test Data Layers ========================= 27 | 28 | layer { type: 'Data' name: 'data' top: 'data' 29 | data_param { 30 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb' 31 | # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest' 32 | batch_size: 1 backend: LMDB } 33 | transform_param { mean_value: 104.00698793 mean_value: 116.66876762 34 | mean_value: 122.67891434 } 35 | include { phase: TEST } } 36 | 37 | layer { type: 'Data' name: 'data-orig' top: 'data-orig' 38 | data_param { 39 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb' 40 | # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest' 41 | batch_size: 1 backend: LMDB } 42 | include { phase: TEST } } 43 | 44 | layer { type: 'Data' name: 'gt' top: 'gt' 45 | data_param { 46 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/segmentation_class_val_lmdb' 47 | batch_size: 1 backend: LMDB } 48 | include { phase: TEST } } 49 | 50 | # ========== Network architecture begin ================ 51 | 52 | layer : { 53 | name : "conv1_1" 54 | type : "Convolution" 55 | bottom : "data" 56 | top : "conv1_1" 57 | convolution_param : { 58 | engine: CAFFE 59 | num_output : 64 60 | pad : 100 61 | kernel_size : 3 62 | } 63 | param : { 64 | lr_mult : 1.0 65 | decay_mult : 1.0 66 | } 67 | param : { 68 | lr_mult : 2.0 69 | decay_mult : 0.0 70 | } 71 | } 72 | layer : { 73 | name : "relu1_1" 74 | type : "ReLU" 75 | bottom : "conv1_1" 76 | top : "conv1_1" 77 | } 78 | layer : { 79 | name : "conv1_2" 80 | type : "Convolution" 81 | bottom : "conv1_1" 82 | top : "conv1_2" 83 | convolution_param : { 84 | engine: CAFFE 85 | num_output : 64 86 | pad : 1 87 | kernel_size : 3 88 | } 89 | param : { 90 | lr_mult : 1.0 91 | decay_mult : 1.0 92 | } 93 | param : { 94 | lr_mult : 2.0 95 | decay_mult : 0.0 96 | } 97 | } 98 | layer : { 99 | name : "relu1_2" 100 | type : "ReLU" 101 | bottom : "conv1_2" 102 | top : "conv1_2" 103 | } 104 | layer : { 105 | name : "pool1" 106 | type : "Pooling" 107 | bottom : "conv1_2" 108 | top : "pool1" 109 | pooling_param : { 110 | pool : MAX 111 | kernel_size : 2 112 | stride : 2 113 | } 114 | } 115 | layer : { 116 | name : "conv2_1" 117 | type : "Convolution" 118 | bottom : "pool1" 119 | top : "conv2_1" 120 | convolution_param : { 121 | engine: CAFFE 122 | num_output : 128 123 | pad : 1 124 | kernel_size : 3 125 | } 126 | param : { 127 | lr_mult : 1.0 128 | decay_mult : 1.0 129 | } 130 | param : { 131 | lr_mult : 2.0 132 | decay_mult : 0.0 133 | } 134 | } 135 | layer : { 136 | name : "relu2_1" 137 | type : "ReLU" 138 | bottom : "conv2_1" 139 | top : "conv2_1" 140 | } 141 | layer : { 142 | name : "conv2_2" 143 | type : "Convolution" 144 | bottom : "conv2_1" 145 | top : "conv2_2" 146 | convolution_param : { 147 | engine: CAFFE 148 | num_output : 128 149 | pad : 1 150 | kernel_size : 3 151 | } 152 | param : { 153 | lr_mult : 1.0 154 | decay_mult : 1.0 155 | } 156 | param : { 157 | lr_mult : 2.0 158 | decay_mult : 0.0 159 | } 160 | } 161 | layer : { 162 | name : "relu2_2" 163 | type : "ReLU" 164 | bottom : "conv2_2" 165 | top : "conv2_2" 166 | } 167 | layer : { 168 | name : "pool2" 169 | type : "Pooling" 170 | bottom : "conv2_2" 171 | top : "pool2" 172 | pooling_param : { 173 | pool : MAX 174 | kernel_size : 2 175 | stride : 2 176 | } 177 | } 178 | layer : { 179 | name : "conv3_1" 180 | type : "Convolution" 181 | bottom : "pool2" 182 | top : "conv3_1" 183 | convolution_param : { 184 | engine: CAFFE 185 | num_output : 256 186 | pad : 1 187 | kernel_size : 3 188 | } 189 | param : { 190 | lr_mult : 1.0 191 | decay_mult : 1.0 192 | } 193 | param : { 194 | lr_mult : 2.0 195 | decay_mult : 0.0 196 | } 197 | } 198 | layer : { 199 | name : "relu3_1" 200 | type : "ReLU" 201 | bottom : "conv3_1" 202 | top : "conv3_1" 203 | } 204 | layer : { 205 | name : "conv3_2" 206 | type : "Convolution" 207 | bottom : "conv3_1" 208 | top : "conv3_2" 209 | convolution_param : { 210 | engine: CAFFE 211 | num_output : 256 212 | pad : 1 213 | kernel_size : 3 214 | } 215 | param : { 216 | lr_mult : 1.0 217 | decay_mult : 1.0 218 | } 219 | param : { 220 | lr_mult : 2.0 221 | decay_mult : 0.0 222 | } 223 | } 224 | layer : { 225 | name : "relu3_2" 226 | type : "ReLU" 227 | bottom : "conv3_2" 228 | top : "conv3_2" 229 | } 230 | layer : { 231 | name : "conv3_3" 232 | type : "Convolution" 233 | bottom : "conv3_2" 234 | top : "conv3_3" 235 | convolution_param : { 236 | engine: CAFFE 237 | num_output : 256 238 | pad : 1 239 | kernel_size : 3 240 | } 241 | param : { 242 | lr_mult : 1.0 243 | decay_mult : 1.0 244 | } 245 | param : { 246 | lr_mult : 2.0 247 | decay_mult : 0.0 248 | } 249 | } 250 | layer : { 251 | name : "relu3_3" 252 | type : "ReLU" 253 | bottom : "conv3_3" 254 | top : "conv3_3" 255 | } 256 | layer : { 257 | name : "pool3" 258 | type : "Pooling" 259 | bottom : "conv3_3" 260 | top : "pool3" 261 | pooling_param : { 262 | pool : MAX 263 | kernel_size : 2 264 | stride : 2 265 | } 266 | } 267 | layer : { 268 | name : "conv4_1" 269 | type : "Convolution" 270 | bottom : "pool3" 271 | top : "conv4_1" 272 | convolution_param : { 273 | engine: CAFFE 274 | num_output : 512 275 | pad : 1 276 | kernel_size : 3 277 | } 278 | param : { 279 | lr_mult : 1.0 280 | decay_mult : 1.0 281 | } 282 | param : { 283 | lr_mult : 2.0 284 | decay_mult : 0.0 285 | } 286 | } 287 | layer : { 288 | name : "relu4_1" 289 | type : "ReLU" 290 | bottom : "conv4_1" 291 | top : "conv4_1" 292 | } 293 | layer : { 294 | name : "conv4_2" 295 | type : "Convolution" 296 | bottom : "conv4_1" 297 | top : "conv4_2" 298 | convolution_param : { 299 | engine: CAFFE 300 | num_output : 512 301 | pad : 1 302 | kernel_size : 3 303 | } 304 | param : { 305 | lr_mult : 1.0 306 | decay_mult : 1.0 307 | } 308 | param : { 309 | lr_mult : 2.0 310 | decay_mult : 0.0 311 | } 312 | } 313 | layer : { 314 | name : "relu4_2" 315 | type : "ReLU" 316 | bottom : "conv4_2" 317 | top : "conv4_2" 318 | } 319 | layer : { 320 | name : "conv4_3" 321 | type : "Convolution" 322 | bottom : "conv4_2" 323 | top : "conv4_3" 324 | convolution_param : { 325 | engine: CAFFE 326 | num_output : 512 327 | pad : 1 328 | kernel_size : 3 329 | } 330 | param : { 331 | lr_mult : 1.0 332 | decay_mult : 1.0 333 | } 334 | param : { 335 | lr_mult : 2.0 336 | decay_mult : 0.0 337 | } 338 | } 339 | layer : { 340 | name : "relu4_3" 341 | type : "ReLU" 342 | bottom : "conv4_3" 343 | top : "conv4_3" 344 | } 345 | layer : { 346 | name : "pool4" 347 | type : "Pooling" 348 | bottom : "conv4_3" 349 | top : "pool4" 350 | pooling_param : { 351 | pool : MAX 352 | kernel_size : 2 353 | stride : 2 354 | } 355 | } 356 | layer : { 357 | name : "conv5_1" 358 | type : "Convolution" 359 | bottom : "pool4" 360 | top : "conv5_1" 361 | convolution_param : { 362 | engine: CAFFE 363 | num_output : 512 364 | pad : 1 365 | kernel_size : 3 366 | } 367 | param : { 368 | lr_mult : 1.0 369 | decay_mult : 1.0 370 | } 371 | param : { 372 | lr_mult : 2.0 373 | decay_mult : 0.0 374 | } 375 | } 376 | layer : { 377 | name : "relu5_1" 378 | type : "ReLU" 379 | bottom : "conv5_1" 380 | top : "conv5_1" 381 | } 382 | layer : { 383 | name : "conv5_2" 384 | type : "Convolution" 385 | bottom : "conv5_1" 386 | top : "conv5_2" 387 | convolution_param : { 388 | engine: CAFFE 389 | num_output : 512 390 | pad : 1 391 | kernel_size : 3 392 | } 393 | param : { 394 | lr_mult : 1.0 395 | decay_mult : 1.0 396 | } 397 | param : { 398 | lr_mult : 2.0 399 | decay_mult : 0.0 400 | } 401 | } 402 | layer : { 403 | name : "relu5_2" 404 | type : "ReLU" 405 | bottom : "conv5_2" 406 | top : "conv5_2" 407 | } 408 | layer : { 409 | name : "conv5_3" 410 | type : "Convolution" 411 | bottom : "conv5_2" 412 | top : "conv5_3" 413 | convolution_param : { 414 | engine: CAFFE 415 | num_output : 512 416 | pad : 1 417 | kernel_size : 3 418 | } 419 | param : { 420 | lr_mult : 1.0 421 | decay_mult : 1.0 422 | } 423 | param : { 424 | lr_mult : 2.0 425 | decay_mult : 0.0 426 | } 427 | } 428 | layer : { 429 | name : "relu5_3" 430 | type : "ReLU" 431 | bottom : "conv5_3" 432 | top : "conv5_3" 433 | } 434 | layer : { 435 | name : "pool5" 436 | type : "Pooling" 437 | bottom : "conv5_3" 438 | top : "pool5" 439 | pooling_param : { 440 | pool : MAX 441 | kernel_size : 2 442 | stride : 2 443 | } 444 | } 445 | layer : { 446 | name : "conv6" 447 | bottom : "pool5" 448 | top : "conv6" 449 | type: "Convolution" 450 | convolution_param { 451 | engine: CAFFE 452 | num_output : 4096 453 | kernel_size: 7 454 | weight_filler { 455 | type: "gaussian" 456 | std: 0.01 457 | } 458 | bias_filler { 459 | type: "constant" 460 | value: 0.1 461 | } 462 | } 463 | param : { 464 | lr_mult : 1.0 465 | decay_mult : 1.0 466 | } 467 | param : { 468 | lr_mult : 2.0 469 | decay_mult : 0.0 470 | } 471 | } 472 | layer : { 473 | name : "relu6" 474 | type : "ReLU" 475 | bottom : "conv6" 476 | top : "conv6" 477 | } 478 | layer { 479 | name: "drop6" 480 | type: "Dropout" 481 | bottom: "conv6" 482 | top: "conv6" 483 | dropout_param { 484 | dropout_ratio: 0.5 485 | } 486 | } 487 | layer : { 488 | name : "conv7" 489 | bottom : "conv6" 490 | top : "conv7" 491 | type: "Convolution" 492 | convolution_param { 493 | engine: CAFFE 494 | num_output : 4096 495 | kernel_size: 1 496 | weight_filler { 497 | type: "gaussian" 498 | std: 0.01 499 | } 500 | bias_filler { 501 | type: "constant" 502 | value: 0.1 503 | } 504 | } 505 | param : { 506 | lr_mult : 1.0 507 | decay_mult : 1.0 508 | } 509 | param : { 510 | lr_mult : 2.0 511 | decay_mult : 0.0 512 | } 513 | } 514 | layer : { 515 | name : "relu7" 516 | type : "ReLU" 517 | bottom : "conv7" 518 | top : "conv7" 519 | } 520 | layer { 521 | name: "drop7" 522 | type: "Dropout" 523 | bottom: "conv7" 524 | top: "conv7" 525 | dropout_param { 526 | dropout_ratio: 0.5 527 | } 528 | } 529 | layer { 530 | name: 'conv8' 531 | bottom: 'conv7' 532 | top: 'conv8' 533 | type: "Convolution" 534 | convolution_param { 535 | engine: CAFFE 536 | num_output: 21 537 | kernel_size: 1 538 | weight_filler { 539 | type: "gaussian" 540 | std: 0.01 541 | } 542 | } 543 | param : { 544 | lr_mult : 1.0 545 | decay_mult : 1.0 546 | } 547 | param : { 548 | lr_mult : 2.0 549 | decay_mult : 0.0 550 | } 551 | } 552 | 553 | # ========== Network architecture end ================== 554 | 555 | layer { type: 'Python' name: 'weak_loss' bottom: 'conv8' bottom: 'indicator' bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 556 | top: 'loss' 557 | python_param { module: 'python_layers' layer: 'WeakLoss' } 558 | include { phase: TRAIN } 559 | loss_weight: 1 } 560 | 561 | layer { 562 | name: 'upsample' 563 | bottom: 'conv8' 564 | top: 'score' 565 | type: "Deconvolution" 566 | convolution_param { 567 | engine: CAFFE 568 | group: 21 569 | num_output: 21 570 | kernel_size: 64 571 | stride: 32 572 | weight_filler: { type: "bilinear_upsampling" } 573 | } 574 | param : { 575 | lr_mult : 0 576 | decay_mult : 0 577 | } 578 | include { phase: TEST } 579 | } 580 | 581 | layer { name: 'crop' type: 'Crop' bottom: 'score' bottom: 'data' top: 'score_crop' 582 | include { phase: TEST } 583 | } 584 | 585 | # =============Dense CRF=========================== 586 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 587 | 588 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf' 589 | # Untuned Default Params 590 | # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6 591 | # x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 } 592 | # Tuned Deeplab Params 593 | densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15 594 | x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 } 595 | include { phase: TEST } } 596 | # =================================================== 597 | -------------------------------------------------------------------------------- /models/fcn_8s/deploy_8s.prototxt: -------------------------------------------------------------------------------- 1 | name : "FCN-8s" 2 | 3 | input: 'data' 4 | input_dim: 1 5 | input_dim: 3 6 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 7 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 8 | 9 | input: 'data-orig' 10 | input_dim: 1 11 | input_dim: 3 12 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 13 | input_dim: 500 # Dummy Size. Can pass any size image. See demo.py 14 | 15 | # ========== Network architecture begin ================ 16 | 17 | layer : { 18 | name : "conv1_1" 19 | type : "Convolution" 20 | bottom : "data" 21 | top : "conv1_1" 22 | convolution_param : { 23 | engine: CAFFE 24 | num_output : 64 25 | pad : 1 26 | kernel_size : 3 27 | } 28 | param : { 29 | lr_mult : 1.0 30 | decay_mult : 1.0 31 | } 32 | param : { 33 | lr_mult : 2.0 34 | decay_mult : 0.0 35 | } 36 | } 37 | layer : { 38 | name : "relu1_1" 39 | type : "ReLU" 40 | bottom : "conv1_1" 41 | top : "conv1_1" 42 | } 43 | layer : { 44 | name : "conv1_2" 45 | type : "Convolution" 46 | bottom : "conv1_1" 47 | top : "conv1_2" 48 | convolution_param : { 49 | engine: CAFFE 50 | num_output : 64 51 | pad : 1 52 | kernel_size : 3 53 | } 54 | param : { 55 | lr_mult : 1.0 56 | decay_mult : 1.0 57 | } 58 | param : { 59 | lr_mult : 2.0 60 | decay_mult : 0.0 61 | } 62 | } 63 | layer : { 64 | name : "relu1_2" 65 | type : "ReLU" 66 | bottom : "conv1_2" 67 | top : "conv1_2" 68 | } 69 | layer : { 70 | name : "pool1" 71 | type : "Pooling" 72 | bottom : "conv1_2" 73 | top : "pool1" 74 | pooling_param : { 75 | pool : MAX 76 | kernel_size : 3 77 | stride : 2 78 | pad : 1 79 | } 80 | } 81 | layer : { 82 | name : "conv2_1" 83 | type : "Convolution" 84 | bottom : "pool1" 85 | top : "conv2_1" 86 | convolution_param : { 87 | engine: CAFFE 88 | num_output : 128 89 | pad : 1 90 | kernel_size : 3 91 | } 92 | param : { 93 | lr_mult : 1.0 94 | decay_mult : 1.0 95 | } 96 | param : { 97 | lr_mult : 2.0 98 | decay_mult : 0.0 99 | } 100 | } 101 | layer : { 102 | name : "relu2_1" 103 | type : "ReLU" 104 | bottom : "conv2_1" 105 | top : "conv2_1" 106 | } 107 | layer : { 108 | name : "conv2_2" 109 | type : "Convolution" 110 | bottom : "conv2_1" 111 | top : "conv2_2" 112 | convolution_param : { 113 | engine: CAFFE 114 | num_output : 128 115 | pad : 1 116 | kernel_size : 3 117 | } 118 | param : { 119 | lr_mult : 1.0 120 | decay_mult : 1.0 121 | } 122 | param : { 123 | lr_mult : 2.0 124 | decay_mult : 0.0 125 | } 126 | } 127 | layer : { 128 | name : "relu2_2" 129 | type : "ReLU" 130 | bottom : "conv2_2" 131 | top : "conv2_2" 132 | } 133 | layer : { 134 | name : "pool2" 135 | type : "Pooling" 136 | bottom : "conv2_2" 137 | top : "pool2" 138 | pooling_param : { 139 | pool : MAX 140 | kernel_size : 3 141 | stride : 2 142 | pad : 1 143 | } 144 | } 145 | layer : { 146 | name : "conv3_1" 147 | type : "Convolution" 148 | bottom : "pool2" 149 | top : "conv3_1" 150 | convolution_param : { 151 | engine: CAFFE 152 | num_output : 256 153 | pad : 1 154 | kernel_size : 3 155 | } 156 | param : { 157 | lr_mult : 1.0 158 | decay_mult : 1.0 159 | } 160 | param : { 161 | lr_mult : 2.0 162 | decay_mult : 0.0 163 | } 164 | } 165 | layer : { 166 | name : "relu3_1" 167 | type : "ReLU" 168 | bottom : "conv3_1" 169 | top : "conv3_1" 170 | } 171 | layer : { 172 | name : "conv3_2" 173 | type : "Convolution" 174 | bottom : "conv3_1" 175 | top : "conv3_2" 176 | convolution_param : { 177 | engine: CAFFE 178 | num_output : 256 179 | pad : 1 180 | kernel_size : 3 181 | } 182 | param : { 183 | lr_mult : 1.0 184 | decay_mult : 1.0 185 | } 186 | param : { 187 | lr_mult : 2.0 188 | decay_mult : 0.0 189 | } 190 | } 191 | layer : { 192 | name : "relu3_2" 193 | type : "ReLU" 194 | bottom : "conv3_2" 195 | top : "conv3_2" 196 | } 197 | layer : { 198 | name : "conv3_3" 199 | type : "Convolution" 200 | bottom : "conv3_2" 201 | top : "conv3_3" 202 | convolution_param : { 203 | engine: CAFFE 204 | num_output : 256 205 | pad : 1 206 | kernel_size : 3 207 | } 208 | param : { 209 | lr_mult : 1.0 210 | decay_mult : 1.0 211 | } 212 | param : { 213 | lr_mult : 2.0 214 | decay_mult : 0.0 215 | } 216 | } 217 | layer : { 218 | name : "relu3_3" 219 | type : "ReLU" 220 | bottom : "conv3_3" 221 | top : "conv3_3" 222 | } 223 | layer : { 224 | name : "pool3" 225 | type : "Pooling" 226 | bottom : "conv3_3" 227 | top : "pool3" 228 | pooling_param : { 229 | pool : MAX 230 | kernel_size : 3 231 | stride : 2 232 | pad : 1 233 | } 234 | } 235 | layer : { 236 | name : "conv4_1" 237 | type : "Convolution" 238 | bottom : "pool3" 239 | top : "conv4_1" 240 | convolution_param : { 241 | engine: CAFFE 242 | num_output : 512 243 | pad : 1 244 | kernel_size : 3 245 | } 246 | param : { 247 | lr_mult : 1.0 248 | decay_mult : 1.0 249 | } 250 | param : { 251 | lr_mult : 2.0 252 | decay_mult : 0.0 253 | } 254 | } 255 | layer : { 256 | name : "relu4_1" 257 | type : "ReLU" 258 | bottom : "conv4_1" 259 | top : "conv4_1" 260 | } 261 | layer : { 262 | name : "conv4_2" 263 | type : "Convolution" 264 | bottom : "conv4_1" 265 | top : "conv4_2" 266 | convolution_param : { 267 | engine: CAFFE 268 | num_output : 512 269 | pad : 1 270 | kernel_size : 3 271 | } 272 | param : { 273 | lr_mult : 1.0 274 | decay_mult : 1.0 275 | } 276 | param : { 277 | lr_mult : 2.0 278 | decay_mult : 0.0 279 | } 280 | } 281 | layer : { 282 | name : "relu4_2" 283 | type : "ReLU" 284 | bottom : "conv4_2" 285 | top : "conv4_2" 286 | } 287 | layer : { 288 | name : "conv4_3" 289 | type : "Convolution" 290 | bottom : "conv4_2" 291 | top : "conv4_3" 292 | convolution_param : { 293 | engine: CAFFE 294 | num_output : 512 295 | pad : 1 296 | kernel_size : 3 297 | } 298 | param : { 299 | lr_mult : 1.0 300 | decay_mult : 1.0 301 | } 302 | param : { 303 | lr_mult : 2.0 304 | decay_mult : 0.0 305 | } 306 | } 307 | layer : { 308 | name : "relu4_3" 309 | type : "ReLU" 310 | bottom : "conv4_3" 311 | top : "conv4_3" 312 | } 313 | layer : { 314 | name : "pool4" 315 | type : "Pooling" 316 | bottom : "conv4_3" 317 | top : "pool4" 318 | pooling_param : { 319 | pool : MAX 320 | kernel_size : 3 321 | # stride : 2 322 | stride : 1 323 | pad : 1 324 | } 325 | } 326 | layer : { 327 | name : "conv5_1" 328 | type : "Convolution" 329 | bottom : "pool4" 330 | top : "conv5_1" 331 | convolution_param : { 332 | engine: CAFFE 333 | num_output : 512 334 | #pad: 1 335 | pad: 2 336 | kernel_size : 3 337 | hole: 2 338 | } 339 | param : { 340 | lr_mult : 1.0 341 | decay_mult : 1.0 342 | } 343 | param : { 344 | lr_mult : 2.0 345 | decay_mult : 0.0 346 | } 347 | } 348 | layer : { 349 | name : "relu5_1" 350 | type : "ReLU" 351 | bottom : "conv5_1" 352 | top : "conv5_1" 353 | } 354 | layer : { 355 | name : "conv5_2" 356 | type : "Convolution" 357 | bottom : "conv5_1" 358 | top : "conv5_2" 359 | convolution_param : { 360 | engine: CAFFE 361 | num_output : 512 362 | #pad: 1 363 | pad: 2 364 | kernel_size : 3 365 | hole: 2 366 | } 367 | param : { 368 | lr_mult : 1.0 369 | decay_mult : 1.0 370 | } 371 | param : { 372 | lr_mult : 2.0 373 | decay_mult : 0.0 374 | } 375 | } 376 | layer : { 377 | name : "relu5_2" 378 | type : "ReLU" 379 | bottom : "conv5_2" 380 | top : "conv5_2" 381 | } 382 | layer : { 383 | name : "conv5_3" 384 | type : "Convolution" 385 | bottom : "conv5_2" 386 | top : "conv5_3" 387 | convolution_param : { 388 | engine: CAFFE 389 | num_output : 512 390 | #pad: 1 391 | pad: 2 392 | kernel_size : 3 393 | hole: 2 394 | } 395 | param : { 396 | lr_mult : 1.0 397 | decay_mult : 1.0 398 | } 399 | param : { 400 | lr_mult : 2.0 401 | decay_mult : 0.0 402 | } 403 | } 404 | layer : { 405 | name : "relu5_3" 406 | type : "ReLU" 407 | bottom : "conv5_3" 408 | top : "conv5_3" 409 | } 410 | layer : { 411 | name : "pool5" 412 | type : "Pooling" 413 | bottom : "conv5_3" 414 | top : "pool5" 415 | pooling_param : { 416 | pool : MAX 417 | kernel_size : 3 418 | # stride : 2 419 | stride : 1 420 | pad : 1 421 | } 422 | } 423 | layer : { 424 | name : "fc6" 425 | bottom : "pool5" 426 | top : "fc6" 427 | type: "Convolution" 428 | convolution_param { 429 | num_output: 4096 430 | engine: CAFFE 431 | pad: 6 432 | kernel_size: 4 433 | hole: 4 434 | } 435 | param : { 436 | lr_mult : 1.0 437 | decay_mult : 1.0 438 | } 439 | param : { 440 | lr_mult : 2.0 441 | decay_mult : 0.0 442 | } 443 | } 444 | layer : { 445 | name : "relu6" 446 | type : "ReLU" 447 | bottom : "fc6" 448 | top : "fc6" 449 | } 450 | layer { 451 | name: "drop6" 452 | type: "Dropout" 453 | bottom: "fc6" 454 | top: "fc6" 455 | dropout_param { 456 | dropout_ratio: 0.5 457 | } 458 | } 459 | layer : { 460 | name : "fc7" 461 | bottom : "fc6" 462 | top : "fc7" 463 | type: "Convolution" 464 | convolution_param { 465 | engine: CAFFE 466 | num_output : 4096 467 | kernel_size: 1 468 | } 469 | param : { 470 | lr_mult : 1.0 471 | decay_mult : 1.0 472 | } 473 | param : { 474 | lr_mult : 2.0 475 | decay_mult : 0.0 476 | } 477 | } 478 | layer : { 479 | name : "relu7" 480 | type : "ReLU" 481 | bottom : "fc7" 482 | top : "fc7" 483 | } 484 | layer { 485 | name: "drop7" 486 | type: "Dropout" 487 | bottom: "fc7" 488 | top: "fc7" 489 | dropout_param { 490 | dropout_ratio: 0.5 491 | } 492 | } 493 | layer { 494 | name: 'fc8_voc12' 495 | bottom: 'fc7' 496 | top: 'fc8' 497 | type: "Convolution" 498 | convolution_param { 499 | engine: CAFFE 500 | num_output: 21 501 | kernel_size: 1 502 | weight_filler { 503 | type: "gaussian" 504 | std: 0.01 505 | } 506 | bias_filler { 507 | type: "constant" 508 | value: 0 509 | } 510 | } 511 | param : { 512 | lr_mult : 1.0 513 | decay_mult : 1.0 514 | } 515 | param : { 516 | lr_mult : 2.0 517 | decay_mult : 0.0 518 | } 519 | } 520 | 521 | # ========== Network architecture end ================ 522 | 523 | layer { 524 | type: 'Python' 525 | name: 'weak_loss' 526 | bottom: 'fc8' 527 | bottom: 'indicator' 528 | bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 529 | top: 'loss' 530 | python_param { 531 | module: 'python_layers' 532 | layer: 'WeakLoss' 533 | } 534 | include { phase: TRAIN } 535 | loss_weight: 1 536 | } 537 | 538 | layer { 539 | name: 'upsample' 540 | bottom: 'fc8' 541 | top: 'score' 542 | type: "Deconvolution" 543 | convolution_param { 544 | engine: CAFFE 545 | group: 21 546 | num_output: 21 547 | kernel_size: 15 548 | stride: 8 549 | weight_filler: { type: "bilinear_upsampling" } 550 | } 551 | param : { 552 | lr_mult : 0 553 | decay_mult : 0 554 | } 555 | include { phase: TEST } 556 | } 557 | layer { 558 | name: 'crop' 559 | type: 'Crop' 560 | bottom: 'score' 561 | bottom: 'data' 562 | top: 'score_crop' 563 | include { phase: TEST } 564 | } 565 | 566 | # =============Dense CRF=========================== 567 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 568 | 569 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf' 570 | # Untuned Default Params 571 | # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6 572 | # x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 } 573 | # Tuned Deeplab Params 574 | densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15 575 | x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 } 576 | include { phase: TEST } } 577 | # =================================================== 578 | -------------------------------------------------------------------------------- /models/fcn_8s/solver_8s.prototxt: -------------------------------------------------------------------------------- 1 | # Make sure that Loss is NORMALIZED by number of pixels i.e. self.normalization=TRUE in src/python_layers.py 2 | lr_policy: "step" 3 | gamma: 0.1 4 | stepsize: 40000 5 | base_lr: 1e-6 6 | display: 20 7 | max_iter: 35000 8 | momentum: 0.99 9 | weight_decay: 0.0000005 10 | snapshot: 30000 11 | -------------------------------------------------------------------------------- /models/fcn_8s/train_8s.prototxt: -------------------------------------------------------------------------------- 1 | name : "FCN-8s" 2 | 3 | # =========== Train Data Layers ========================= 4 | 5 | layer { type: 'Data' name: 'data' top: 'data' 6 | data_param { 7 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_train_lmdb' 8 | batch_size: 1 backend: LMDB } 9 | transform_param { mean_value: 104.00698793 mean_value: 116.66876762 10 | mean_value: 122.67891434 } 11 | include { phase: TRAIN } } 12 | 13 | layer { type: 'HDF5Data' name: 'gt' 14 | top: 'indicator' top: 'indicator_0.01' top: 'indicator_0.05' top: 'indicator_0.10' 15 | top: 'flag_1' top: 'flag_3' top: 'flag_5' top: 'flag_10' top: 'flag_50' top: 'flag_100' top: 'flag_200' 16 | hdf5_data_param { 17 | source: "/mnt/a/pathak/fcn_mil_cache/VOC2012/indicator_train.txt" 18 | batch_size: 1 } 19 | include { phase: TRAIN } } 20 | 21 | layer { type: 'Silence' 22 | bottom: 'indicator' bottom: 'indicator_0.01' bottom: 'indicator_0.05' bottom: 'indicator_0.10' 23 | bottom: 'flag_1' bottom: 'flag_3' bottom: 'flag_5' bottom: 'flag_10' bottom: 'flag_50' bottom: 'flag_100' bottom: 'flag_200' 24 | include { phase: TRAIN } } 25 | 26 | # =========== Test Data Layers ========================= 27 | 28 | layer { type: 'Data' name: 'data' top: 'data' 29 | data_param { 30 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb' 31 | # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest' 32 | batch_size: 1 backend: LMDB } 33 | transform_param { mean_value: 104.00698793 mean_value: 116.66876762 34 | mean_value: 122.67891434 } 35 | include { phase: TEST } } 36 | 37 | layer { type: 'Data' name: 'data-orig' top: 'data-orig' 38 | data_param { 39 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/images_val_lmdb' 40 | # source: '/mnt/a/pathak/fcn_mil_cache/lmdb_dataset_cache/datum_voc2012segtest' 41 | batch_size: 1 backend: LMDB } 42 | include { phase: TEST } } 43 | 44 | layer { type: 'Data' name: 'gt' top: 'gt' 45 | data_param { 46 | source: '/mnt/a/pathak/fcn_mil_cache/VOC2012/segmentation_class_val_lmdb' 47 | batch_size: 1 backend: LMDB } 48 | include { phase: TEST } } 49 | 50 | # ========== Network architecture begin ================ 51 | 52 | layer : { 53 | name : "conv1_1" 54 | type : "Convolution" 55 | bottom : "data" 56 | top : "conv1_1" 57 | convolution_param : { 58 | engine: CAFFE 59 | num_output : 64 60 | pad : 1 61 | kernel_size : 3 62 | } 63 | param : { 64 | lr_mult : 1.0 65 | decay_mult : 1.0 66 | } 67 | param : { 68 | lr_mult : 2.0 69 | decay_mult : 0.0 70 | } 71 | } 72 | layer : { 73 | name : "relu1_1" 74 | type : "ReLU" 75 | bottom : "conv1_1" 76 | top : "conv1_1" 77 | } 78 | layer : { 79 | name : "conv1_2" 80 | type : "Convolution" 81 | bottom : "conv1_1" 82 | top : "conv1_2" 83 | convolution_param : { 84 | engine: CAFFE 85 | num_output : 64 86 | pad : 1 87 | kernel_size : 3 88 | } 89 | param : { 90 | lr_mult : 1.0 91 | decay_mult : 1.0 92 | } 93 | param : { 94 | lr_mult : 2.0 95 | decay_mult : 0.0 96 | } 97 | } 98 | layer : { 99 | name : "relu1_2" 100 | type : "ReLU" 101 | bottom : "conv1_2" 102 | top : "conv1_2" 103 | } 104 | layer : { 105 | name : "pool1" 106 | type : "Pooling" 107 | bottom : "conv1_2" 108 | top : "pool1" 109 | pooling_param : { 110 | pool : MAX 111 | kernel_size : 3 112 | stride : 2 113 | pad : 1 114 | } 115 | } 116 | layer : { 117 | name : "conv2_1" 118 | type : "Convolution" 119 | bottom : "pool1" 120 | top : "conv2_1" 121 | convolution_param : { 122 | engine: CAFFE 123 | num_output : 128 124 | pad : 1 125 | kernel_size : 3 126 | } 127 | param : { 128 | lr_mult : 1.0 129 | decay_mult : 1.0 130 | } 131 | param : { 132 | lr_mult : 2.0 133 | decay_mult : 0.0 134 | } 135 | } 136 | layer : { 137 | name : "relu2_1" 138 | type : "ReLU" 139 | bottom : "conv2_1" 140 | top : "conv2_1" 141 | } 142 | layer : { 143 | name : "conv2_2" 144 | type : "Convolution" 145 | bottom : "conv2_1" 146 | top : "conv2_2" 147 | convolution_param : { 148 | engine: CAFFE 149 | num_output : 128 150 | pad : 1 151 | kernel_size : 3 152 | } 153 | param : { 154 | lr_mult : 1.0 155 | decay_mult : 1.0 156 | } 157 | param : { 158 | lr_mult : 2.0 159 | decay_mult : 0.0 160 | } 161 | } 162 | layer : { 163 | name : "relu2_2" 164 | type : "ReLU" 165 | bottom : "conv2_2" 166 | top : "conv2_2" 167 | } 168 | layer : { 169 | name : "pool2" 170 | type : "Pooling" 171 | bottom : "conv2_2" 172 | top : "pool2" 173 | pooling_param : { 174 | pool : MAX 175 | kernel_size : 3 176 | stride : 2 177 | pad : 1 178 | } 179 | } 180 | layer : { 181 | name : "conv3_1" 182 | type : "Convolution" 183 | bottom : "pool2" 184 | top : "conv3_1" 185 | convolution_param : { 186 | engine: CAFFE 187 | num_output : 256 188 | pad : 1 189 | kernel_size : 3 190 | } 191 | param : { 192 | lr_mult : 1.0 193 | decay_mult : 1.0 194 | } 195 | param : { 196 | lr_mult : 2.0 197 | decay_mult : 0.0 198 | } 199 | } 200 | layer : { 201 | name : "relu3_1" 202 | type : "ReLU" 203 | bottom : "conv3_1" 204 | top : "conv3_1" 205 | } 206 | layer : { 207 | name : "conv3_2" 208 | type : "Convolution" 209 | bottom : "conv3_1" 210 | top : "conv3_2" 211 | convolution_param : { 212 | engine: CAFFE 213 | num_output : 256 214 | pad : 1 215 | kernel_size : 3 216 | } 217 | param : { 218 | lr_mult : 1.0 219 | decay_mult : 1.0 220 | } 221 | param : { 222 | lr_mult : 2.0 223 | decay_mult : 0.0 224 | } 225 | } 226 | layer : { 227 | name : "relu3_2" 228 | type : "ReLU" 229 | bottom : "conv3_2" 230 | top : "conv3_2" 231 | } 232 | layer : { 233 | name : "conv3_3" 234 | type : "Convolution" 235 | bottom : "conv3_2" 236 | top : "conv3_3" 237 | convolution_param : { 238 | engine: CAFFE 239 | num_output : 256 240 | pad : 1 241 | kernel_size : 3 242 | } 243 | param : { 244 | lr_mult : 1.0 245 | decay_mult : 1.0 246 | } 247 | param : { 248 | lr_mult : 2.0 249 | decay_mult : 0.0 250 | } 251 | } 252 | layer : { 253 | name : "relu3_3" 254 | type : "ReLU" 255 | bottom : "conv3_3" 256 | top : "conv3_3" 257 | } 258 | layer : { 259 | name : "pool3" 260 | type : "Pooling" 261 | bottom : "conv3_3" 262 | top : "pool3" 263 | pooling_param : { 264 | pool : MAX 265 | kernel_size : 3 266 | stride : 2 267 | pad : 1 268 | } 269 | } 270 | layer : { 271 | name : "conv4_1" 272 | type : "Convolution" 273 | bottom : "pool3" 274 | top : "conv4_1" 275 | convolution_param : { 276 | engine: CAFFE 277 | num_output : 512 278 | pad : 1 279 | kernel_size : 3 280 | } 281 | param : { 282 | lr_mult : 1.0 283 | decay_mult : 1.0 284 | } 285 | param : { 286 | lr_mult : 2.0 287 | decay_mult : 0.0 288 | } 289 | } 290 | layer : { 291 | name : "relu4_1" 292 | type : "ReLU" 293 | bottom : "conv4_1" 294 | top : "conv4_1" 295 | } 296 | layer : { 297 | name : "conv4_2" 298 | type : "Convolution" 299 | bottom : "conv4_1" 300 | top : "conv4_2" 301 | convolution_param : { 302 | engine: CAFFE 303 | num_output : 512 304 | pad : 1 305 | kernel_size : 3 306 | } 307 | param : { 308 | lr_mult : 1.0 309 | decay_mult : 1.0 310 | } 311 | param : { 312 | lr_mult : 2.0 313 | decay_mult : 0.0 314 | } 315 | } 316 | layer : { 317 | name : "relu4_2" 318 | type : "ReLU" 319 | bottom : "conv4_2" 320 | top : "conv4_2" 321 | } 322 | layer : { 323 | name : "conv4_3" 324 | type : "Convolution" 325 | bottom : "conv4_2" 326 | top : "conv4_3" 327 | convolution_param : { 328 | engine: CAFFE 329 | num_output : 512 330 | pad : 1 331 | kernel_size : 3 332 | } 333 | param : { 334 | lr_mult : 1.0 335 | decay_mult : 1.0 336 | } 337 | param : { 338 | lr_mult : 2.0 339 | decay_mult : 0.0 340 | } 341 | } 342 | layer : { 343 | name : "relu4_3" 344 | type : "ReLU" 345 | bottom : "conv4_3" 346 | top : "conv4_3" 347 | } 348 | layer : { 349 | name : "pool4" 350 | type : "Pooling" 351 | bottom : "conv4_3" 352 | top : "pool4" 353 | pooling_param : { 354 | pool : MAX 355 | kernel_size : 3 356 | # stride : 2 357 | stride : 1 358 | pad : 1 359 | } 360 | } 361 | layer : { 362 | name : "conv5_1" 363 | type : "Convolution" 364 | bottom : "pool4" 365 | top : "conv5_1" 366 | convolution_param : { 367 | engine: CAFFE 368 | num_output : 512 369 | #pad: 1 370 | pad: 2 371 | kernel_size : 3 372 | hole: 2 373 | } 374 | param : { 375 | lr_mult : 1.0 376 | decay_mult : 1.0 377 | } 378 | param : { 379 | lr_mult : 2.0 380 | decay_mult : 0.0 381 | } 382 | } 383 | layer : { 384 | name : "relu5_1" 385 | type : "ReLU" 386 | bottom : "conv5_1" 387 | top : "conv5_1" 388 | } 389 | layer : { 390 | name : "conv5_2" 391 | type : "Convolution" 392 | bottom : "conv5_1" 393 | top : "conv5_2" 394 | convolution_param : { 395 | engine: CAFFE 396 | num_output : 512 397 | #pad: 1 398 | pad: 2 399 | kernel_size : 3 400 | hole: 2 401 | } 402 | param : { 403 | lr_mult : 1.0 404 | decay_mult : 1.0 405 | } 406 | param : { 407 | lr_mult : 2.0 408 | decay_mult : 0.0 409 | } 410 | } 411 | layer : { 412 | name : "relu5_2" 413 | type : "ReLU" 414 | bottom : "conv5_2" 415 | top : "conv5_2" 416 | } 417 | layer : { 418 | name : "conv5_3" 419 | type : "Convolution" 420 | bottom : "conv5_2" 421 | top : "conv5_3" 422 | convolution_param : { 423 | engine: CAFFE 424 | num_output : 512 425 | #pad: 1 426 | pad: 2 427 | kernel_size : 3 428 | hole: 2 429 | } 430 | param : { 431 | lr_mult : 1.0 432 | decay_mult : 1.0 433 | } 434 | param : { 435 | lr_mult : 2.0 436 | decay_mult : 0.0 437 | } 438 | } 439 | layer : { 440 | name : "relu5_3" 441 | type : "ReLU" 442 | bottom : "conv5_3" 443 | top : "conv5_3" 444 | } 445 | layer : { 446 | name : "pool5" 447 | type : "Pooling" 448 | bottom : "conv5_3" 449 | top : "pool5" 450 | pooling_param : { 451 | pool : MAX 452 | kernel_size : 3 453 | # stride : 2 454 | stride : 1 455 | pad : 1 456 | } 457 | } 458 | layer : { 459 | name : "fc6" 460 | bottom : "pool5" 461 | top : "fc6" 462 | type: "Convolution" 463 | convolution_param { 464 | num_output: 4096 465 | engine: CAFFE 466 | pad: 6 467 | kernel_size: 4 468 | hole: 4 469 | } 470 | param : { 471 | lr_mult : 1.0 472 | decay_mult : 1.0 473 | } 474 | param : { 475 | lr_mult : 2.0 476 | decay_mult : 0.0 477 | } 478 | } 479 | layer : { 480 | name : "relu6" 481 | type : "ReLU" 482 | bottom : "fc6" 483 | top : "fc6" 484 | } 485 | layer { 486 | name: "drop6" 487 | type: "Dropout" 488 | bottom: "fc6" 489 | top: "fc6" 490 | dropout_param { 491 | dropout_ratio: 0.5 492 | } 493 | } 494 | layer : { 495 | name : "fc7" 496 | bottom : "fc6" 497 | top : "fc7" 498 | type: "Convolution" 499 | convolution_param { 500 | engine: CAFFE 501 | num_output : 4096 502 | kernel_size: 1 503 | } 504 | param : { 505 | lr_mult : 1.0 506 | decay_mult : 1.0 507 | } 508 | param : { 509 | lr_mult : 2.0 510 | decay_mult : 0.0 511 | } 512 | } 513 | layer : { 514 | name : "relu7" 515 | type : "ReLU" 516 | bottom : "fc7" 517 | top : "fc7" 518 | } 519 | layer { 520 | name: "drop7" 521 | type: "Dropout" 522 | bottom: "fc7" 523 | top: "fc7" 524 | dropout_param { 525 | dropout_ratio: 0.5 526 | } 527 | } 528 | layer { 529 | name: 'fc8_voc12' 530 | bottom: 'fc7' 531 | top: 'fc8' 532 | type: "Convolution" 533 | convolution_param { 534 | engine: CAFFE 535 | num_output: 21 536 | kernel_size: 1 537 | weight_filler { 538 | type: "gaussian" 539 | std: 0.01 540 | } 541 | bias_filler { 542 | type: "constant" 543 | value: 0 544 | } 545 | } 546 | param : { 547 | lr_mult : 1.0 548 | decay_mult : 1.0 549 | } 550 | param : { 551 | lr_mult : 2.0 552 | decay_mult : 0.0 553 | } 554 | } 555 | 556 | # ========== Network architecture end ================ 557 | 558 | layer { 559 | type: 'Python' 560 | name: 'weak_loss' 561 | bottom: 'fc8' 562 | bottom: 'indicator' 563 | bottom: 'indicator_0.10' # bottom: 'flag_3' bottom: 'seg_gt' bottom: 'score_crop' 564 | top: 'loss' 565 | python_param { 566 | module: 'python_layers' 567 | layer: 'WeakLoss' 568 | } 569 | include { phase: TRAIN } 570 | loss_weight: 1 571 | } 572 | 573 | layer { 574 | name: 'upsample' 575 | bottom: 'fc8' 576 | top: 'score' 577 | type: "Deconvolution" 578 | convolution_param { 579 | engine: CAFFE 580 | group: 21 581 | num_output: 21 582 | kernel_size: 15 583 | stride: 8 584 | weight_filler: { type: "bilinear_upsampling" } 585 | } 586 | param : { 587 | lr_mult : 0 588 | decay_mult : 0 589 | } 590 | include { phase: TEST } 591 | } 592 | layer { 593 | name: 'crop' 594 | type: 'Crop' 595 | bottom: 'score' 596 | bottom: 'data' 597 | top: 'score_crop' 598 | include { phase: TEST } 599 | } 600 | 601 | # =============Dense CRF=========================== 602 | # Uncomment the untransformed data layer code on top and set bottom of DENSECRF layer to bottom: 'data-orig', if you want to use that. But similar results with transformed data as well. 603 | 604 | layer { type: 'DenseCRF' name: 'densecrf' bottom: 'score_crop' bottom: 'data-orig' top: 'upscore-crf' 605 | # Untuned Default Params 606 | # densecrf_param { x_gauss: 6 y_gauss: 6 wt_gauss: 6 607 | # x_bilateral: 50 y_bilateral: 50 r_bilateral: 4 g_bilateral: 4 b_bilateral: 4 wt_bilateral: 5 } 608 | # Tuned Deeplab Params 609 | densecrf_param { x_gauss: 19 y_gauss: 19 wt_gauss: 15 610 | x_bilateral: 61 y_bilateral: 61 r_bilateral: 10 g_bilateral: 10 b_bilateral: 10 wt_bilateral: 35 } 611 | include { phase: TEST } } 612 | # =================================================== 613 | -------------------------------------------------------------------------------- /models/scripts/download_ccnn_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=ccnn_models.tar.gz 7 | URL=http://www.cs.berkeley.edu/~pathak/ccnn/$FILE 8 | CHECKSUM=9936ae392acef2512f2f3cf71cf98bdb 9 | 10 | if [ ! -f $FILE ]; then 11 | echo "Downloading all the CCNN models (1.8G)..." 12 | wget $URL -O $FILE 13 | echo "Unzipping..." 14 | tar zxvf $FILE 15 | echo "Downloading Done." 16 | else 17 | echo "File already exists. Checking md5..." 18 | fi 19 | 20 | os=`uname -s` 21 | if [ "$os" = "Linux" ]; then 22 | checksum=`md5sum $FILE | awk '{ print $1 }'` 23 | elif [ "$os" = "Darwin" ]; then 24 | checksum=`cat $FILE | md5` 25 | elif [ "$os" = "SunOS" ]; then 26 | checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'` 27 | fi 28 | if [ "$checksum" = "$CHECKSUM" ]; then 29 | echo "Checksum is correct. File was correctly downloaded." 30 | exit 0 31 | else 32 | echo "Checksum is incorrect. DELETE and download again." 33 | fi -------------------------------------------------------------------------------- /models/scripts/download_pretrained_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=imagenet_pretrained_models.tar.gz 7 | URL=http://www.cs.berkeley.edu/~pathak/ccnn/$FILE 8 | CHECKSUM=1d13ce28cd92a5082974e35b6a3cc187 9 | 10 | if [ ! -f $FILE ]; then 11 | echo "Downloading all the CCNN models (1.8G)..." 12 | wget $URL -O $FILE 13 | echo "Unzipping..." 14 | tar zxvf $FILE 15 | echo "Downloading Done." 16 | else 17 | echo "File already exists. Checking md5..." 18 | fi 19 | 20 | os=`uname -s` 21 | if [ "$os" = "Linux" ]; then 22 | checksum=`md5sum $FILE | awk '{ print $1 }'` 23 | elif [ "$os" = "Darwin" ]; then 24 | checksum=`cat $FILE | md5` 25 | elif [ "$os" = "SunOS" ]; then 26 | checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'` 27 | fi 28 | if [ "$checksum" = "$CHECKSUM" ]; then 29 | echo "Checksum is correct. File was correctly downloaded." 30 | exit 0 31 | else 32 | echo "Checksum is incorrect. DELETE and download again." 33 | fi -------------------------------------------------------------------------------- /src/ccnn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | def __setup_path(): 8 | import os, sys, inspect, numpy as np 9 | paths = ['.','..','../build/','../build/release','../build/debug'] 10 | current_path = os.path.split(inspect.getfile( inspect.currentframe() ))[0] 11 | paths = [os.path.realpath(os.path.abspath(os.path.join(current_path,x))) for x in paths] 12 | paths = list( filter( lambda x: os.path.exists(x+'/lib/python/ccnn.so'), paths ) ) 13 | ptime = [os.path.getmtime(x+'/lib/python/ccnn.so') for x in paths] 14 | if len( ptime ): 15 | path = paths[ np.argmax( ptime ) ] 16 | sys.path.insert(0, path+'/lib') 17 | __setup_path() 18 | del __setup_path 19 | from python.ccnn import * 20 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | from os import environ 8 | 9 | def tryLoad(name, default): 10 | try: 11 | import user_config 12 | except: 13 | return None 14 | if hasattr(user_config, name): 15 | return getattr(user_config, name) 16 | return default 17 | 18 | CAFFE_DIR = tryLoad('CAFFE_DIR', '.') 19 | 20 | import sys 21 | import config 22 | PD = CAFFE_DIR + '/python' 23 | if PD not in sys.path: 24 | sys.path.append(PD) 25 | 26 | # if not 'GLOG_minloglevel' in environ: 27 | environ['GLOG_minloglevel'] = '1' 28 | # To supress the output level to command line you need to increase the loglevel to at least 2. Do it before importing caffe. 29 | # 0 - debug 30 | # 1 - info (still a LOT of outputs) 31 | # 2 - warnings 32 | # 3 - errors 33 | 34 | import caffe 35 | -------------------------------------------------------------------------------- /src/dataset.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | 9 | VOC_DIR = '/mnt/a/pathak/fcn_mil_cache/VOC2012' 10 | CHANNEL_MEAN = np.array([104.00698793,116.66876762,122.67891434]) 11 | 12 | def idsVOC(type='train'): 13 | if type == 'train': 14 | return [l.strip() for l in open(VOC_DIR+'/train.txt','r')] 15 | if type == 'trainval': 16 | return [l.strip() for l in open(VOC_DIR+'/trainval.txt','r')] 17 | if type == 'trainval': 18 | return [l.strip() for l in open(VOC_DIR+'/test.txt','r')] 19 | return [l.strip() for l in open(VOC_DIR+'/val.txt','r')] 20 | 21 | t0,t1 = 0,0 22 | def fetchVOC( id ): 23 | from skimage import io 24 | from time import time 25 | global t0,t1 26 | t0 += time() 27 | im = io.imread(VOC_DIR+"/JPEGImages/%s.jpg"%id) 28 | tim = im[:,:,::-1].transpose((2,0,1))-CHANNEL_MEAN[:,None,None] 29 | lbl = io.imread(VOC_DIR+"/SegmentationClassGray/%s.png"%id) 30 | t1 += time() 31 | return tim[None],lbl[None,None] 32 | 33 | -------------------------------------------------------------------------------- /src/demo.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | from config import * 8 | import numpy as np 9 | from PIL import Image 10 | 11 | # Function to get VOC color map. Don't change the image being loaded here. 12 | def palette_gt(gt): 13 | palette_im = Image.open('../models/examples/gt1.png') 14 | gt.putpalette(palette_im.palette) 15 | return gt 16 | 17 | # Network definitions 18 | net_def = '../models/fcn_32s/deploy_32s.prototxt' 19 | weights = '../models/ccnn_models/ccnn_tag_size_train.caffemodel' 20 | 21 | # Load Network 22 | caffe.set_device(0) 23 | caffe.set_mode_gpu() 24 | net = caffe.Net(net_def, weights, caffe.TEST) 25 | 26 | # Load Image 27 | im = Image.open('../models/examples/im2.jpg') 28 | im = np.array(im, dtype=np.float32) 29 | im = im[:,:,::-1] # Change to BGR 30 | mean = np.array((104.00698793,116.66876762,122.67891434)) 31 | im -= mean # Mean Subtraction 32 | im = im.transpose(2,0,1) # Blob: C x H x W 33 | im = im[None,:,:,:] 34 | 35 | # Assign Data 36 | net.blobs['data'].reshape(*im.shape) 37 | net.blobs['data'].data[...] = im 38 | net.blobs['data-orig'].reshape(*im.shape) 39 | net.blobs['data-orig'].data[...] = im+mean[None,:,None,None] 40 | 41 | # Run forward 42 | net.forward() 43 | out = Image.fromarray(net.blobs['upscore-crf'].data[0,0].astype(np.uint8), mode='P') 44 | out = palette_gt(out) 45 | out.save('../models/examples/result.png') 46 | 47 | # Classes Predicted 48 | print 'Classes Predicted:', np.unique(net.blobs['upscore-crf'].data[0,0].astype(np.uint8)) 49 | print 'Result saved' 50 | -------------------------------------------------------------------------------- /src/extras/VOClabelcolormap.m: -------------------------------------------------------------------------------- 1 | % VOCLABELCOLORMAP Creates a label color map such that adjacent indices have different 2 | % colors. Useful for reading and writing index images which contain large indices, 3 | % by encoding them as RGB images. 4 | % 5 | % CMAP = VOCLABELCOLORMAP(N) creates a label color map with N entries. 6 | function cmap = labelcolormap(N) 7 | 8 | if nargin==0 9 | N=256 10 | end 11 | cmap = zeros(N,3); 12 | for i=1:N 13 | id = i-1; r=0;g=0;b=0; 14 | for j=0:7 15 | r = bitor(r, bitshift(bitget(id,1),7 - j)); 16 | g = bitor(g, bitshift(bitget(id,2),7 - j)); 17 | b = bitor(b, bitshift(bitget(id,3),7 - j)); 18 | id = bitshift(id,-3); 19 | end 20 | cmap(i,1)=r; cmap(i,2)=g; cmap(i,3)=b; 21 | end 22 | cmap = cmap / 255; 23 | -------------------------------------------------------------------------------- /src/extras/evaluate_pascal_seg_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | from __future__ import division 8 | from pylab import * 9 | from config import * 10 | import os 11 | from PIL import Image 12 | 13 | # For ccnn fcn32 code trained model : size 14 | net_def = '../../models/fcn_32s/train_32s.prototxt' 15 | weights = '../../models/ccnn_models/ccnn_tag_size_trainval.caffemodel' 16 | save_dir = '/mnt/a/pathak/fcn_mil_cache/visualized_output/seg12test_size_untuned/results/VOC2012/Segmentation/comp6_test_cls/' 17 | 18 | caffe.set_device(2) 19 | caffe.set_mode_gpu() 20 | test_net = caffe.Net(net_def, weights, caffe.TEST) 21 | 22 | fnames_test = np.loadtxt('/mnt/a/pathak/fcn_mil_cache/VOC2012/test.txt', str) 23 | 24 | #os.makedirs(save_dir) 25 | count = 0 26 | for fname in fnames_test: 27 | count = count + 1 28 | if count % 10 == 1: 29 | print count 30 | # print fname 31 | test_net.forward() 32 | im = Image.fromarray(test_net.blobs['upscore-crf'].data[0,0].astype(np.uint8), mode='P') 33 | im.save(os.path.join(save_dir, fname + '.png')) 34 | print 'Total Images : ',count 35 | print 'Weight File : ', weights -------------------------------------------------------------------------------- /src/extras/gen_bb_ind_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | 8 | ''' 9 | - Converts the matlab generated indicator file to hdf5 format. It is used after generate_bb_indicator.m 10 | ''' 11 | 12 | from sys import argv 13 | from scipy.io import loadmat 14 | import numpy as np 15 | import h5py 16 | import os 17 | 18 | out_dir = 'trainList_cl12_seg12' 19 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012/' 20 | indicatorLabels = np.loadtxt(voc_dir+out_dir+'/train_labels.txt') 21 | 22 | if not os.path.exists(voc_dir+out_dir+'/ClassIndicator'): 23 | print 'Creating Directory : '+voc_dir+out_dir+'/ClassIndicator'; 24 | os.makedirs(voc_dir+out_dir+'/ClassIndicator') 25 | 26 | 27 | imNames = open(voc_dir+out_dir+'/train.txt','r') 28 | i = 0 29 | for line in imNames: 30 | label = indicatorLabels[i,:] 31 | label = label[None,:,None,None] 32 | 33 | f = h5py.File(voc_dir+out_dir+'/ClassIndicator/'+line[:-1]+'.hf5', "w") 34 | f.create_dataset('indicator', label.shape, dtype='f')[...] = label 35 | f.close() 36 | 37 | i = i+1 38 | 39 | imNames.close() 40 | -------------------------------------------------------------------------------- /src/extras/generate_bb_indicator.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------- 2 | % CCNN 3 | % Copyright (c) 2015 [See LICENSE file for details] 4 | % Written by Deepak Pathak, Philipp Krahenbuhl 5 | % -------------------------------------------------------- 6 | 7 | % Matlab script to generate the labels from classification annotations (same as image-level detection labels) set in VOC 8 | % Run this script and then run the python script to generate hdf5 files : python gen_bb_ind_helper.py 9 | 10 | clear all; close all; clc; 11 | 12 | out_dir = 'trainList_cl12_seg12'; 13 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012'; 14 | curr_folder = pwd; 15 | cd(voc_dir); 16 | 17 | train_seg = textread('./train.txt','%s'); 18 | trainval_cl = textread('./ImageSets/Main/trainval.txt','%s'); 19 | [train_new, indSeg, indCl] = intersect(train_seg,trainval_cl); 20 | 21 | fprintf('Saving output to directory : %s\n',fullfile(voc_dir,out_dir)); 22 | 23 | fid = fopen(['./' out_dir '/train.txt'],'w'); 24 | for i=1:length(train_new) 25 | fprintf(fid,'%s\n',train_new{i}); 26 | end 27 | fclose(fid); 28 | 29 | fid = fopen(['./' out_dir '/indicator_train.txt'],'w'); 30 | for i=1:length(train_new) 31 | fprintf(fid,'%s\n',['/mnt/a/pathak/fcn_mil_cache/VOC2012/' out_dir '/ClassIndicator/' train_new{i} '.hf5']); 32 | end 33 | fclose(fid); 34 | 35 | 36 | classes = { 'background', 37 | 'aeroplane', 38 | 'bicycle', 39 | 'bird', 40 | 'boat', 41 | 'bottle', 42 | 'bus', 43 | 'car', 44 | 'cat', 45 | 'chair', 46 | 'cow', 47 | 'diningtable', 48 | 'dog', 49 | 'horse', 50 | 'motorbike', 51 | 'person', 52 | 'pottedplant', 53 | 'sheep', 54 | 'sofa', 55 | 'train', 56 | 'tvmonitor' }; 57 | tempIndicator = zeros(length(trainval_cl),21); 58 | tempIndicator(:,1) = ones(length(trainval_cl),1); 59 | for i=2:length(classes) 60 | [~,tempIndicator(:,i)] = textread(['./ImageSets/Main/' classes{i} '_trainval.txt'],'%s %d'); 61 | end 62 | tempIndicator(tempIndicator==-1)=0; % 1: present , -1 : absent , 0 : difficult 63 | 64 | 65 | indicator = tempIndicator(indCl,:); 66 | dlmwrite(['./' out_dir '/train_labels.txt'], indicator, 'delimiter',' '); 67 | 68 | cd(curr_folder); 69 | 70 | 71 | % =========================================================================== 72 | % Shuffling Code : 73 | % =========================================================================== 74 | 75 | clear all; close all; clc; 76 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012'; 77 | curr_folder = pwd; 78 | cd(voc_dir); 79 | 80 | datasetName = 'trainval'; 81 | rng(2222); 82 | train_seg = textread(['./' datasetName '_notShuffled.txt'],'%s'); 83 | randomSeq = randperm(length(train_seg)); 84 | 85 | fid = fopen(['./' datasetName '.txt'],'w'); 86 | for i=1:length(train_seg) 87 | fprintf(fid,'%s\n',train_seg{randomSeq(i)}); 88 | end 89 | fclose(fid); 90 | 91 | fid = fopen(['./indicator_' datasetName '.txt'],'w'); 92 | for i=1:length(train_seg) 93 | fprintf(fid,'%s\n',['/mnt/a/pathak/fcn_mil_cache/VOC2012/SegmentationClassIndicator/' train_seg{randomSeq(i)} '.hf5']); 94 | end 95 | fclose(fid); 96 | 97 | % =========================================================================== 98 | % =========================================================================== 99 | -------------------------------------------------------------------------------- /src/extras/generate_lmbd.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | ''' 8 | To generate compact lmdb using convert_imageset tool (see /caffe-pathak/fcn_mil/src/create_imagenet.sh) : 9 | - You run the tool twice. Once to create the image lmdb and once to create the label lmdb. 10 | The filename is the path to the image and label is always 0. Use --encode flag in both runs. 11 | Use --grey flag for label. 12 | - Encode flag is not present in python 13 | - See the shuffling code in generate_bb_indicator.m 14 | ''' 15 | 16 | from __future__ import division 17 | from config import * 18 | import lmdb 19 | import numpy as np 20 | import scipy.stats, scipy.io 21 | from PIL import Image 22 | 23 | dataset = 'val' # train or val or trainval 24 | dirAddress = '/mnt/a/pathak/fcn_mil_cache/VOC2012' 25 | 26 | inputs = np.loadtxt('{}/{}.txt'.format(dirAddress,dataset), str) 27 | 28 | # Generate Image LMDB ===================================== 29 | image_db = lmdb.open('{}/images_{}_lmdb'.format(dirAddress,dataset), map_size=int(1e12)) 30 | with image_db.begin(write=True) as in_txn: 31 | for in_idx, in_ in enumerate(inputs): 32 | 33 | im = np.array(Image.open(dirAddress + '/JPEGImages/' + in_ + '.jpg')) # numpy ndarray 34 | # Classes present : np.unique(im.astype(np.uint8)) 35 | 36 | # If rgb image : im = im[:,:,::-1] (RGB to BGR); im = im.transpose((2, 0, 1)) (in caffe channel-height-width) 37 | # If ground truth single channel image : im = im.astype(np.uint8) and im = im[np.newaxis, :, :] 38 | im = im[:,:,::-1] 39 | im = im.transpose((2, 0, 1)) 40 | im_dat = caffe.io.array_to_datum(im) 41 | 42 | # Note that the indices are zero padded to preserve their order: LMDB sorts the keys lexicographically so bare integers as strings will be disordered. 43 | in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString()) 44 | image_db.close() 45 | 46 | # Generate GT LMDB ======================================= 47 | image_db = lmdb.open('{}/segmentation_class_{}_lmdb'.format(dirAddress,dataset), map_size=int(1e12)) 48 | with image_db.begin(write=True) as in_txn: 49 | for in_idx, in_ in enumerate(inputs): 50 | 51 | im = np.array(Image.open(dirAddress + '/SegmentationClassPNG/' + in_ + '.png')) # numpy ndarray 52 | # Classes present : np.unique(im.astype(np.uint8)) 53 | 54 | # If rgb image : im = im[:,:,::-1] (RGB to BGR); im = im.transpose((2, 0, 1)) (in caffe channel-height-width) 55 | # If ground truth single channel image : im = im.astype(np.uint8) and im = im[np.newaxis, :, :] 56 | im = im.astype(np.uint8) 57 | im = im[np.newaxis, :, :] 58 | im_dat = caffe.io.array_to_datum(im) 59 | 60 | # Note that the indices are zero padded to preserve their order: LMDB sorts the keys lexicographically so bare integers as strings will be disordered. 61 | in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString()) 62 | image_db.close() 63 | -------------------------------------------------------------------------------- /src/extras/gray2ind.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | ''' 8 | - It is to be run after png2gray.py 9 | - Converts the gray scale segmentation ground truth image to hdf5 format. 10 | ''' 11 | 12 | from sys import argv 13 | from scipy.io import loadmat 14 | import numpy as np 15 | from PIL import Image,ImagePalette 16 | import h5py 17 | import random 18 | random.seed(222) 19 | 20 | # Code to convert image one by one ============ 21 | 22 | # if len(argv) < 3: 23 | # print("Usage: %s png hf5"%argv[0]) 24 | # exit(1) 25 | 26 | # N = 21 27 | # im = Image.open(argv[1]) 28 | # I = np.array(im) # shape : (h,w) 29 | # l = I[I>=0] # shape : (hw,1) 30 | # l = l[l 0).astype(float) 37 | # f.create_dataset('indicator_0.01', cnt.shape, dtype='f')[...] = (cnt > 0.01*l.size).astype(float) 38 | # f.create_dataset('indicator_0.05', cnt.shape, dtype='f')[...] = (cnt > 0.05*l.size).astype(float) 39 | # f.create_dataset('indicator_0.10', cnt.shape, dtype='f')[...] = (cnt > 0.10*l.size).astype(float) 40 | # f.close() 41 | 42 | # ============================================= 43 | # ============================================= 44 | 45 | 46 | # Code to convert images in batch ============= 47 | # ============================================= 48 | 49 | # N = 21 50 | # out_dir = 'SegmentationClassIndicator/' 51 | # voc_dir = '/x/pathak/fcn_mil_cache/VOC2012/' 52 | # imNames = open(voc_dir+out_dir+'val.txt','r') 53 | # for line in imNames: 54 | # im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png') 55 | # I = np.array(im) # shape : (h,w) 56 | # l = I[I>=0] # shape : (hw,1) 57 | # l = l[l 0).astype(float) 67 | # f.create_dataset('indicator_0.01', cnt.shape, dtype='f')[...] = (cnt > 0.01*l.size).astype(float) 68 | # f.create_dataset('indicator_0.05', cnt.shape, dtype='f')[...] = (cnt > 0.05*l.size).astype(float) 69 | # f.create_dataset('indicator_0.10', cnt.shape, dtype='f')[...] = (cnt > 0.10*l.size).astype(float) 70 | # f.close() 71 | 72 | # ============================================= 73 | # ============================================= 74 | 75 | 76 | # Code to generate annotations with the semi-supervised flags ==== 77 | # ================================================================ 78 | 79 | N = 21 80 | out_dir = 'SegmentationClassIndicator/' 81 | voc_dir = '/mnt/a/pathak/fcn_mil_cache/VOC2012/' 82 | data = 'val' 83 | imNames = open(voc_dir+data+'.txt','r') 84 | 85 | classFreq = np.zeros(N) 86 | for line in imNames: 87 | im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png') 88 | I = np.array(im) # shape : (h,w) 89 | l = I[I>=0] # shape : (hw,1) 90 | l = l[l 0).astype(float) 94 | #print classFreq.shape 95 | print 'Class Frequency: ',classFreq 96 | #classFreq = np.array([ 10578.,586. , 486. , 698. , 461. ,654.,385. ,1086.,1000.,1081. ,264.,528.,1177.,444.,482.,3898.,487.,299.,491.,500.,548.]) for train 97 | 98 | classIm = [] 99 | for i in range(0,N): 100 | classIm.append([]) 101 | imID = 0 102 | imNames = open(voc_dir+data+'.txt','r') 103 | for line in imNames: 104 | im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png') 105 | I = np.array(im) 106 | l = I[I>=0] 107 | l = l[l 0).astype(float) 110 | classesPresent = np.flatnonzero(cnt) 111 | classChosen = classesPresent[classFreq[classesPresent].argmin()] 112 | classIm[classChosen].append(imID) 113 | imID += 1 114 | 115 | samples = [1,3,5,10,50,100,200] # number of randomly sampled images per class 116 | selectedImages = [] 117 | for i in range(0,len(samples)): 118 | selectedImages.append([]) 119 | for j in range(1,N): 120 | temp = classIm[j] 121 | random.shuffle(temp) 122 | selectedImages[i].extend(temp[0:min(samples[i],len(temp))]) 123 | selectedImages[i].sort() 124 | print 'Images Selected' 125 | 126 | imID = 0 127 | temp = len(samples)*[0] 128 | imNames = open(voc_dir+data+'.txt','r') 129 | for line in imNames: 130 | im = Image.open(voc_dir+'SegmentationClassGray/'+line[:-1]+'.png') 131 | I = np.array(im) # shape : (h,w) 132 | l = I[I>=0] # shape : (hw,1) 133 | l = l[l 0).astype(float) 141 | f.create_dataset('indicator_0.01', cnt.shape, dtype='f')[...] = (cnt > 0.01*l.size).astype(float) 142 | f.create_dataset('indicator_0.05', cnt.shape, dtype='f')[...] = (cnt > 0.05*l.size).astype(float) 143 | f.create_dataset('indicator_0.10', cnt.shape, dtype='f')[...] = (cnt > 0.10*l.size).astype(float) 144 | 145 | for i in range(0,len(samples)): 146 | if temp[i]3: 25 | im.putpalette( Image.open( argv[3] ).palette ) 26 | 27 | im.save(argv[2]) 28 | -------------------------------------------------------------------------------- /src/extras/png2gray.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | ''' 8 | - It is to be run after mat2png.py 9 | - Converts png segmentation ground truth images to grayscale representing the labels as intensities. 10 | - Use python3 to run. 11 | ''' 12 | 13 | from sys import argv 14 | from scipy.io import loadmat 15 | import numpy as np 16 | from PIL import Image,ImagePalette 17 | 18 | if len(argv) < 3: 19 | print("Usage: %s png png"%argv[0]) 20 | exit(1) 21 | 22 | im = Image.open(argv[1]) 23 | Image.frombytes('L',im.size,im.tobytes()).save(argv[2]) 24 | -------------------------------------------------------------------------------- /src/extras/save_png_colormap_pascal.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------- 2 | % CCNN 3 | % Copyright (c) 2015 [See LICENSE file for details] 4 | % Written by Deepak Pathak, Philipp Krahenbuhl 5 | % -------------------------------------------------------- 6 | 7 | % Script to save png indexed images with the colormap defined by VOCdevkit 8 | 9 | clear all; close all; clc; 10 | 11 | dirAddress = '/mnt/a/pathak/fcn_mil_cache/visualized_output/seg12test_size_untuned/'; 12 | inputImages = dir(fullfile(dirAddress,'results/VOC2012/Segmentation/comp6_test_cls/*.png')); 13 | fprintf('Dir: %s\n',dirAddress); 14 | cmap = VOClabelcolormap(256); 15 | for i=1:length(inputImages) 16 | if mod(i,100)==0 17 | fprintf('Image # %d\n',i); 18 | end 19 | im = imread(fullfile(dirAddress,'results/VOC2012/Segmentation/comp6_test_cls/',inputImages(i).name)); 20 | imwrite(im,cmap,fullfile(dirAddress,'results/VOC2012/Segmentation/comp6_test_cls/',inputImages(i).name)); 21 | end 22 | -------------------------------------------------------------------------------- /src/python_layers.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | from config import * 8 | import numpy as np 9 | 10 | class WeakLoss(caffe.Layer): 11 | def DS(self, I, stride=32, factor=32,N=21 ): 12 | r = np.zeros(( (I.shape[0]-1)/stride+1, (I.shape[1]-1)/stride+1 ), dtype=I.dtype) 13 | for j in range(0,I.shape[0],stride): 14 | for i in range(0,I.shape[1],stride): 15 | p = I[j:j+factor,i:i+factor] 16 | b = np.bincount( p[p>=0],minlength=N ) 17 | r[j/stride,i/stride] = np.argmax(b) 18 | return r 19 | 20 | def setup(self,bottom,top): 21 | 22 | self.bg_lower,self.bg_upper = 0.3,0.7 23 | self.bg_slack = 1e10 # no slack : 1e10 24 | self.fg_lower_hard = 0.01 25 | self.fg_lower = 0.05 26 | self.fg_slack = 2 # no slack : 1e10 27 | self.hardness = 1 # no hardness : 1 and hardness : 1000 28 | 29 | self.semi_supervised = False 30 | self.apply_size_constraint = False 31 | self.normalization = True # models/fcn_8s/solver_8s.prototxt needs the loss to be normalized and solver_32s doesn't 32 | 33 | if self.apply_size_constraint: 34 | self.bg_lower,self.bg_upper = 0.2,0.7 35 | self.bg_slack = 1e10 36 | self.fg_lower_hard = 0.1 37 | self.fg_lower = 0.1 38 | self.fg_slack = 1e10 39 | self.hardness = 1000 40 | self.fg_upper_small = 0.01 # upper bound on small object. Don't make it zero as strictly less than 0 is not satisfiable. Make it epsilon small. 41 | 42 | # self.counter = 1 43 | 44 | def reshape(self, bottom, top): 45 | top[0].reshape(1,1,1,1) 46 | 47 | def forward(self, bottom, top): 48 | # print "first : ",int(np.prod(bottom[0].data.shape[1:])) 49 | from time import time 50 | t0 = time() 51 | D = bottom[0].channels 52 | from ccnn import constraintloss 53 | self.diff = [] 54 | loss,w = 0,0 55 | for i in range(bottom[0].num): 56 | # print '-------------------------------------' 57 | # print 'Image Number : ',self.counter 58 | 59 | if self.semi_supervised: 60 | assert (len(bottom)>4),"Semi Supervised Flag ON, but full supervised images not supplied as additional bottom !" 61 | 62 | if (not self.semi_supervised) or (bottom[3].data[i]==0): # weakly-supervised downsampled training 63 | # Setup bottoms 64 | f = np.ascontiguousarray(bottom[0].data[i].reshape((D,-1)).T) # f : height*width x channels 65 | q = np.exp(f-np.max(f,axis=1)[:,None]) # expAndNormalize across channels 66 | q/= np.sum(q,axis=1)[:,None] 67 | 68 | # Setup the constraint softmax 69 | csm = constraintloss.ConstraintSoftmax(self.hardness) 70 | 71 | # Add Negative Label constraints 72 | if self.apply_size_constraint: 73 | L = bottom[2].data[i].flatten() > 0.5 74 | else: 75 | L = bottom[1].data[i].flatten() > 0.5 76 | csm.addZeroConstraint( (~L).astype(np.float32) ) 77 | 78 | # Add Small Object Size constraints 79 | # L_up = 0*L 80 | # if self.apply_size_constraint: 81 | # assert (len(bottom)>2),"Size constraint ON, but size information not supplied as additional bottom !" 82 | # L_up = 1*L 83 | # L = bottom[2].data[i].flatten() > 0.5 84 | 85 | # for l in np.flatnonzero(L_up): 86 | # if l>0 and not L[l]: 87 | # v = np.zeros(D).astype(np.float32); v[l] = 1 88 | # csm.addLinearConstraint( -v, -self.fg_upper_small, self.fg_slack ) 89 | 90 | # Apply Positive Label Constraints 91 | for l in np.flatnonzero(L): 92 | if l>0: 93 | v = np.zeros(D).astype(np.float32); v[l] = 1 94 | # if self.apply_size_constraint: 95 | # csm.addLinearConstraint( v, self.fg_lower_hard ) 96 | csm.addLinearConstraint( v, self.fg_lower, self.fg_slack ) 97 | 98 | # Add Background Constraints 99 | v = np.zeros(D).astype(np.float32); v[0] = 1 100 | csm.addLinearConstraint( v, self.bg_lower, self.bg_slack ) # lower bound 101 | if (np.sum(L[1:]) > 0): # i.e. image is not all background 102 | csm.addLinearConstraint( -v, -self.bg_upper ) # upper bound 103 | 104 | # Run constrained optimization 105 | p = csm.compute(f) 106 | 107 | if self.normalization: 108 | self.diff.append( ((q-p).T.reshape(bottom[0].data[i].shape))/np.float32(f.shape[0]) ) # normalize by (f.shape[0]) 109 | else: 110 | self.diff.append( ((q-p).T.reshape(bottom[0].data[i].shape)) ) # unnormalize 111 | 112 | # Debugging Code --------- 113 | # temp = 1 114 | # for l in np.flatnonzero(L_up): 115 | # if l>0 and not L[l]: 116 | # if p[:,l].sum() > self.fg_upper_small: 117 | # print 'Small Object Class Index=',temp,' sumP=',p[:,l].sum(),' sumQ=',q[:,l].sum() 118 | # print '\tP=',repr(p[:,l]) 119 | # print '\tQ=',repr(q[:,l]) 120 | # temp += 1 121 | # print '' 122 | # np.savez('./debug/debug_im'+str(self.counter)+'.npz', hardness=self.hardness, bg_lower = self.bg_lower, bg_upper=self.bg_upper, L=L, L_up=L_up, fg_lower = self.fg_lower, fg_slack=self.fg_slack, fg_upper_small=self.fg_upper_small, f=f,p=p,q=q ) 123 | # self.counter += 1 124 | # ----------------------- 125 | 126 | else: # fully-supervised upsample training 127 | f = np.ascontiguousarray(bottom[5].data[i].reshape((D,-1)).T) # f : height*width x channels 128 | q = np.exp(f-np.max(f,axis=1)[:,None]) # expAndNormalize across channels 129 | q/= np.sum(q,axis=1)[:,None] 130 | 131 | gt = bottom[4].data[i] 132 | # print '\t q : ',q.shape 133 | # print '\t cnn_output_Shape : ',bottom[0].data[i].shape 134 | # print '\t gt_Shape : ',gt.shape 135 | # print '\t gt_resized_Shape : ', (np.float32(self.DS(np.uint8(gt[0,...])))).shape 136 | gt = np.uint8(gt[0,...]) # For downsampling the gt use this : self.DS(np.uint8(gt[0,...])) 137 | gt = np.ascontiguousarray(gt.reshape((1,-1)).T) # gt : height*width x 1 138 | gt = gt.squeeze() 139 | p = np.zeros(q.shape).astype(np.float32) # q,p,f : height*width x channels 140 | ind = np.where(gt!=255) 141 | p[ind,gt[ind]] = 1 142 | ind = np.where(gt==255) 143 | p[ind,:] = q[ind,:] # so that q-p=0 at this position because it is ignore label 144 | 145 | if self.normalization: 146 | self.diff.append( ((q-p).T.reshape(bottom[5].data[i].shape))/np.float32(f.shape[0]) ) # normalize by (f.shape[0]) 147 | else: 148 | self.diff.append((q-p).T.reshape(bottom[5].data[i].shape)) 149 | 150 | if self.normalization: 151 | loss += (np.sum(p*np.log(np.maximum(p,1e-10))) - np.sum(p*np.log(np.maximum(q,1e-10))))/np.float32(f.shape[0]) # normalize by (f.shape[0]) 152 | else: 153 | loss += (np.sum(p*np.log(np.maximum(p,1e-10))) - np.sum(p*np.log(np.maximum(q,1e-10)))) # unnormalize 154 | 155 | # print( np.min(f), np.max(f) ) 156 | # np.set_printoptions(linewidth=150) 157 | # print( L.astype(bool) ) 158 | # print( np.bincount(np.argmax(f,axis=1),minlength=21) ) 159 | # print( np.sum(p[:,~L]), 'P', np.sum(p,axis=0).astype(int)[L], 'H', np.bincount(np.argmax(p,axis=1),minlength=L.size)[L] ) 160 | #print( "===== %f ====="%(time()-t0) ) 161 | top[0].data[0,0,0,0] = loss 162 | self.diff = np.array(self.diff) 163 | 164 | def backward(self, top, propagate_down, bottom): 165 | for i in range(bottom[0].num): 166 | if (not self.semi_supervised) or (bottom[3].data[i]==0): 167 | bottom[0].diff[i] = top[0].diff[0,0,0,0]*self.diff[i] 168 | else: 169 | bottom[5].diff[i] = top[0].diff[0,0,0,0]*self.diff[i] 170 | -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Modified by Deepak Pathak 5 | # Originally written by Jonathan Long 6 | # -------------------------------------------------------- 7 | 8 | from __future__ import division 9 | import numpy as np 10 | import os 11 | from PIL import Image 12 | from datetime import datetime 13 | from config import * 14 | 15 | fnames_val = {'pascal': np.loadtxt('/mnt/a/pathak/fcn_mil_cache/VOC2012/val.txt', str)} 16 | 17 | def prepare(): 18 | save_dir = '/mnt/a/pathak/fcn_mil_cache/visualized_output/2012val_best_tuned/' 19 | return save_dir 20 | 21 | def compute_hist(net, save_dir, dataset): 22 | n_cl = net.blobs['score_crop'].channels 23 | count = 1 24 | hist = np.zeros((n_cl, n_cl)) 25 | for fname in fnames_val[dataset]: 26 | net.forward() 27 | h, _, _ = np.histogram2d(net.blobs['gt'].data[0, 0].flatten(), 28 | net.blobs['upscore-crf'].data[0,0].flatten(), 29 | bins=n_cl, range=[[0, n_cl], [0, n_cl]]) 30 | hist += h 31 | iu = np.zeros(n_cl) 32 | for i in range(n_cl): 33 | iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i]) 34 | print 'Image : ',count,' , Name : ',fname, ' , mean IU (till here) : ', np.nanmean(iu)*100 35 | #print '\tClasses Present : ',np.unique(net.blobs['gt'].data[0, 0].astype(np.uint8)) 36 | #print '\tClasses Predicted : ', np.unique(net.blobs['upscore-crf'].data[0,0].astype(np.uint8)) 37 | #print '' 38 | # im = Image.fromarray(net.blobs['upscore-crf'].data[0,0].astype(np.uint8), mode='P') 39 | # im.save(os.path.join(save_dir, fname + '.png')) 40 | count += 1 41 | import sys 42 | sys.stdout.flush() 43 | return hist 44 | 45 | def seg_tests(test_net, save_format, dataset, weights): 46 | print '>>>', datetime.now(), 'Begin seg tests' 47 | n_cl = test_net.blobs['score_crop'].channels 48 | hist = compute_hist(test_net, save_format, dataset) 49 | # overall accuracy 50 | acc = np.diag(hist).sum() / hist.sum() 51 | print '>>>', datetime.now(), 'overall accuracy', acc 52 | # per-class accuracy 53 | acc = np.zeros(n_cl) 54 | for i in range(n_cl): 55 | acc[i] = hist[i, i] / hist[i].sum() 56 | print '>>>', datetime.now(), 'mean accuracy', np.nanmean(acc) 57 | # per-class IU 58 | iu = np.zeros(n_cl) 59 | for i in range(n_cl): 60 | iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i]) 61 | print '>>>', datetime.now(), 'mean IU (out of 100)', np.nanmean(iu)*100 62 | iu2 = [ round(100*elem, 1) for elem in iu ] 63 | print '>>>', datetime.now(), 'per-class IU (out of 100)', iu2 64 | freq = hist.sum(1) / hist.sum() 65 | print '>>>', datetime.now(), 'fwavacc', \ 66 | (freq[freq > 0] * iu[freq > 0]).sum() 67 | print 'Weight File', weights 68 | 69 | 70 | # Running the code 71 | dataset = 'pascal' 72 | save_format = prepare() 73 | 74 | net_def = '../models/fcn_32s/train_32s.prototxt' 75 | weights = '../models/ccnn_models/ccnn_tag_size_train.caffemodel' 76 | 77 | caffe.set_device(0) 78 | caffe.set_mode_gpu() 79 | test_net = caffe.Net(net_def, weights, caffe.TEST) 80 | seg_tests(test_net, save_format, dataset, weights) 81 | -------------------------------------------------------------------------------- /src/test_argmax.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Modified by Deepak Pathak 5 | # Originally written by Jonathan Long 6 | # -------------------------------------------------------- 7 | 8 | from __future__ import division 9 | import numpy as np 10 | import os 11 | from PIL import Image 12 | from datetime import datetime 13 | from config import * 14 | 15 | fnames_val = {'pascal': np.loadtxt('/mnt/a/pathak/fcn_mil_cache/VOC2012/val.txt', str)} 16 | 17 | def prepare(): 18 | save_dir = '/mnt/a/pathak/fcn_mil_cache/visualized_output/2012val_best_raw/' 19 | return save_dir 20 | 21 | def compute_hist(net, save_dir, dataset): 22 | n_cl = net.blobs['score_crop'].channels 23 | count = 1 24 | hist = np.zeros((n_cl, n_cl)) 25 | for fname in fnames_val[dataset]: 26 | net.forward() 27 | h, _, _ = np.histogram2d(net.blobs['gt'].data[0, 0].flatten(), 28 | net.blobs['score_crop'].data[0].argmax(0).flatten(), 29 | bins=n_cl, range=[[0, n_cl], [0, n_cl]]) 30 | hist += h 31 | iu = np.zeros(n_cl) 32 | for i in range(n_cl): 33 | iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i]) 34 | print 'Image : ',count,' , Name : ',fname,' , mean IU (till here) : ', np.nanmean(iu)*100 35 | #print '\tClasses Present : ',np.unique(net.blobs['gt'].data[0, 0].astype(np.uint8)) 36 | #print '\tClasses Predicted : ', np.unique(net.blobs['score_crop'].data[0].argmax(0).astype(np.uint8)) 37 | #print '' 38 | # im = Image.fromarray(net.blobs['score_crop'].data[0].argmax(0).astype(np.uint8), mode='P') 39 | # im.save(os.path.join(save_dir, fname + '.png')) 40 | count += 1 41 | import sys 42 | sys.stdout.flush() 43 | return hist 44 | 45 | def seg_tests(test_net, save_format, dataset, weights, net_def): 46 | print '>>>', datetime.now(), 'Begin seg tests' 47 | n_cl = test_net.blobs['score_crop'].channels 48 | hist = compute_hist(test_net, save_format, dataset) 49 | # overall accuracy 50 | acc = np.diag(hist).sum() / hist.sum() 51 | print '>>>', datetime.now(), 'overall accuracy', acc 52 | # per-class accuracy 53 | acc = np.zeros(n_cl) 54 | for i in range(n_cl): 55 | acc[i] = hist[i, i] / hist[i].sum() 56 | print '>>>', datetime.now(), 'mean accuracy', np.nanmean(acc) 57 | # per-class IU 58 | iu = np.zeros(n_cl) 59 | for i in range(n_cl): 60 | iu[i] = hist[i, i] / (hist[i].sum() + hist[:, i].sum() - hist[i, i]) 61 | print '>>>', datetime.now(), 'mean IU', np.nanmean(iu)*100 62 | iu2 = [ round(100*elem, 1) for elem in iu ] 63 | print '>>>', datetime.now(), 'per-class IU', iu2 64 | freq = hist.sum(1) / hist.sum() 65 | print '>>>', datetime.now(), 'fwavacc', \ 66 | (freq[freq > 0] * iu[freq > 0]).sum() 67 | print 'Weight File', weights 68 | print 'Proto File', net_def 69 | 70 | 71 | # Running the code 72 | 73 | dataset = 'pascal' 74 | save_format = prepare() 75 | 76 | net_def = '../models/fcn_32s/train_32s.prototxt' 77 | weights = '../models/ccnn_models/ccnn_tag_size_train.caffemodel' 78 | 79 | caffe.set_device(0) 80 | caffe.set_mode_gpu() 81 | test_net = caffe.Net(net_def, weights, caffe.TEST) 82 | seg_tests(test_net, save_format, dataset, weights, net_def) 83 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | from config import * 8 | import ccnn 9 | import python_layers, dataset 10 | from glob import glob 11 | import numpy as np 12 | from time import time 13 | from sys import argv 14 | 15 | caffe.set_mode_gpu() 16 | caffe.set_device(0) 17 | 18 | MODEL_PROTOTXT = '../models/fcn_8s/train_8s.prototxt' 19 | MODEL_INIT = '../models/imagenet_pretrained_models/vgg_init_8s.caffemodel' 20 | 21 | # MODEL_PROTOTXT = '../models/fcn_32s/train_32s.prototxt' 22 | # MODEL_INIT = '../models/imagenet_pretrained_models/vgg_init_32s.caffemodel' 23 | 24 | MODEL_SAVE = '../models/ccnn_models/ccnn.caffemodel' 25 | 26 | if len(argv)>1: 27 | MODEL_SAVE = argv[1] 28 | doTest = False 29 | 30 | SOLVER_STR = """train_net: "{TRAIN_NET}" 31 | base_lr: 1e-6 32 | lr_policy: "step" 33 | gamma: 0.1 34 | stepsize: 40000 35 | display: 20 36 | max_iter: 35000 37 | momentum: 0.99 38 | weight_decay: 0.0000005 39 | #average_loss: 1 40 | """ 41 | 42 | SOLVER_STR = SOLVER_STR.replace( "{TRAIN_NET}", MODEL_PROTOTXT ) 43 | 44 | t0 = time() 45 | solver = caffe.get_solver_from_string(SOLVER_STR) 46 | solver.net.copy_from(MODEL_INIT) # Note that this does not copy the interpolation params! 47 | print "Load model %fs"%(time()-t0) 48 | 49 | for it in range(35): 50 | t0 = time() 51 | solver.step(1000) 52 | t1 = time() 53 | print "%4d iterations t ="%((it+1)*1000), t1-t0 54 | solver.net.save(MODEL_SAVE) 55 | if (it+1)%5==0 and it>10: 56 | solver.net.save(MODEL_SAVE + '_'+str(it+1)) 57 | if doTest and it>30: 58 | import subprocess 59 | try: 60 | sp.wait() 61 | except: 62 | pass 63 | sp = subprocess.Popen(['python', 'test_argmax.py']) 64 | 65 | if doTest: 66 | try: 67 | sp.wait() 68 | except: 69 | pass 70 | -------------------------------------------------------------------------------- /src/user_config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # CCNN 3 | # Copyright (c) 2015 [See LICENSE file for details] 4 | # Written by Deepak Pathak, Philipp Krahenbuhl 5 | # -------------------------------------------------------- 6 | 7 | from os import path 8 | CAFFE_DIR = path.abspath('./../caffe-ccnn/') 9 | --------------------------------------------------------------------------------