├── example
├── MNIST
│ ├── .gitignore
│ ├── mpi.conf
│ ├── run.sh
│ ├── MNIST.conf
│ ├── MNIST_CONV.conf
│ └── mnist.py
├── kaggle_bowl
│ ├── gen_tr_va.sh
│ ├── gen_test.py
│ ├── gen_train.py
│ ├── make_submission.py
│ ├── gen_img_list.py
│ ├── README.md
│ ├── pred.conf
│ └── bowl.conf
├── multi-machine
│ ├── partition.sh
│ ├── run.sh
│ ├── README.md
│ ├── bowl.conf
│ └── convert.py
├── README.md
└── ImageNet
│ ├── ImageNet.conf
│ ├── README.md
│ └── kaiming.conf
├── wrapper
└── matlab
│ ├── make.m
│ ├── README.md
│ ├── DataIter.m
│ ├── example.m
│ ├── example_conv.m
│ └── Net.m
├── bin
└── README
├── tools
├── network_maker
│ ├── inception.png
│ └── README.md
├── Makefile
├── caffe_converter
│ ├── convert_mean.cpp
│ └── convert.py
├── bin2rec.cc
└── imgbin-partition-maker.py
├── windows
├── thirdparty
│ ├── OpenCV
│ │ └── README.md
│ └── OpenBLAS
│ │ └── README.md
├── cxxnet
│ ├── im2bin
│ │ └── im2bin.vcxproj.filters
│ └── cxxnet.sln
└── README.md
├── make
├── README.md
└── config.mk
├── src
├── local_main.cpp
├── nnet
│ ├── nnet_impl.cpp
│ ├── nnet_impl.cu
│ └── nnet.h
├── layer
│ ├── layer_impl.cu
│ ├── layer_impl.cpp
│ ├── loss
│ │ ├── softmax_layer-inl.hpp
│ │ ├── multi_logistic_layer-inl.hpp
│ │ └── lp_loss_layer-inl.hpp
│ ├── flatten_layer-inl.hpp
│ ├── split_layer-inl.hpp
│ ├── activation_layer-inl.hpp
│ ├── xelu_layer-inl.hpp
│ ├── op.h
│ ├── dropout_layer-inl.hpp
│ ├── concat_layer-inl.hpp
│ ├── bias_layer-inl.hpp
│ ├── layer_impl-inl.hpp
│ ├── lrn_layer-inl.hpp
│ ├── fixconn_layer-inl.hpp
│ ├── insanity_layer-inl.hpp
│ └── param.h
├── global.h
├── updater
│ ├── updater_impl.cpp
│ ├── updater_impl.cu
│ ├── nag_updater-inl.hpp
│ ├── sgd_updater-inl.hpp
│ ├── adam_updater-inl.hpp
│ ├── updater_impl-inl.hpp
│ └── param.h
├── utils
│ ├── random.h
│ ├── utils.h
│ ├── thread.h
│ └── decoder.h
├── io
│ ├── image_recordio.h
│ ├── iter_mem_buffer-inl.hpp
│ ├── inst_vector.h
│ ├── iter_attach_txt-inl.hpp
│ ├── data.cpp
│ ├── iter_csv-inl.hpp
│ └── iter_img-inl.hpp
└── README.md
├── .gitignore
├── simple_mxnet_converter
├── README.md
├── dump_weight.py
└── symbol_converter.py
├── LICENSE
├── doc
├── multigpu.md
├── README.md
├── caffe_converter.md
├── advanced.md
├── debug_perf.md
├── other.md
├── tasks.md
├── updater.md
└── global.md
├── README.md
└── Makefile
/example/MNIST/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | models
3 |
--------------------------------------------------------------------------------
/wrapper/matlab/make.m:
--------------------------------------------------------------------------------
1 | mex -I../ -L../ cxxnet_mex.cpp -lcxxnetwrapper
--------------------------------------------------------------------------------
/bin/README:
--------------------------------------------------------------------------------
1 | This is where the binary file of cxxnet will be generated into
2 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/gen_tr_va.sh:
--------------------------------------------------------------------------------
1 | sed -n '1,20000p' $1 > tr.lst
2 | sed -n '20000, 40000p' $1 > va.lst
3 |
--------------------------------------------------------------------------------
/tools/network_maker/inception.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/cxxnet/HEAD/tools/network_maker/inception.png
--------------------------------------------------------------------------------
/windows/thirdparty/OpenCV/README.md:
--------------------------------------------------------------------------------
1 | This folder if for OpenCV 2.4.10
2 | This folder should contains:
3 |
4 | ```bash
5 | include
6 | x64
7 | ```
8 |
9 |
--------------------------------------------------------------------------------
/windows/thirdparty/OpenBLAS/README.md:
--------------------------------------------------------------------------------
1 | This folder is for OpenBLAS v0.2.13 x64
2 | This folder should contains:
3 |
4 | ```bash
5 | bin
6 | include
7 | lib
8 | ```
9 |
--------------------------------------------------------------------------------
/wrapper/matlab/README.md:
--------------------------------------------------------------------------------
1 | To use the two examples directly, you need to put the MNIST data under current folder. Otherwise, you should change the data path in data iterator.
--------------------------------------------------------------------------------
/example/MNIST/mpi.conf:
--------------------------------------------------------------------------------
1 | num_servers=2
2 | num_workers=2
3 | bin=../../bin/cxxnet.ps
4 | arg="MNIST.conf -app_file MNIST.conf update_on_server=1 param_server=dist slient=1"
5 | network_interface=eth0
6 | network_port=9000
7 | # hostfile=../../../../hosts
8 |
--------------------------------------------------------------------------------
/windows/cxxnet/im2bin/im2bin.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/make/README.md:
--------------------------------------------------------------------------------
1 | Make configuration for CXXNET
2 | ====
3 |
4 | cxxnet is designed to require less third party library. The minimal requirement is MKL/CBLAS/OpenBLAS and MShadow(which can be downloaded automatically). Other dependence can be set by editing [make/config.mk](make/config.mk) before make.
5 |
--------------------------------------------------------------------------------
/src/local_main.cpp:
--------------------------------------------------------------------------------
1 | /*!
2 | * \file local_main.cpp
3 | * \brief local main file that redirect directly to PSMain
4 | * \author Tianqi Chen
5 | */
6 |
7 | int WorkerNodeMain(int argc, char *argv[]);
8 |
9 | int main(int argc, char *argv[]) {
10 | return WorkerNodeMain(argc, argv);
11 | }
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | test*
3 | *.o
4 | *.conf
5 | Local*
6 | bin
7 | mshadow
8 | config.mk
9 | layer.h
10 | layer_impl-inl.hpp
11 | .gitignore
12 | *.so
13 | im2bin
14 | *.pyc
15 | example
16 | *.log
17 | ps-lite
18 | dmlc-core
19 | bin
20 | rabit
21 | *.opensdf
22 | *.sdf
23 | *.pdb
24 | *.user
25 | *.suo
26 | *.deps
27 | *.cache
28 | *state
29 | *build
--------------------------------------------------------------------------------
/tools/network_maker/README.md:
--------------------------------------------------------------------------------
1 | This is a script to help you build complex network configuration and visualization the network.
2 |
3 | Visualization require ```Graphviz```
4 |
5 | The original script is used to generate Inception network. To generate your own network, change code after line 308 in ```config_generator.py```
6 |
7 |
8 | 
9 |
--------------------------------------------------------------------------------
/src/nnet/nnet_impl.cpp:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_DEPRECATE
3 | // this is where the actual implementations are
4 | #include "nnet_impl-inl.hpp"
5 | // specialize the cpu implementation
6 | namespace cxxnet {
7 | namespace nnet {
8 | template<>
9 | INetTrainer* CreateNet(int net_type) {
10 | return CreateNet_(net_type);
11 | }
12 | } // namespace nnet
13 | } // namespace cxxnet
14 |
--------------------------------------------------------------------------------
/src/nnet/nnet_impl.cu:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_DEPRECATE
3 | // this is where the actual implementations are
4 | #include "nnet_impl-inl.hpp"
5 | // specialize the gpu implementation
6 | namespace cxxnet {
7 | namespace nnet {
8 | template<>
9 | INetTrainer* CreateNet(int net_type) {
10 | return CreateNet_(net_type);
11 | }
12 | } // namespace nnet
13 | } // namespace cxxnet
14 |
--------------------------------------------------------------------------------
/example/multi-machine/partition.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [ $# -lt 2 ]; then
3 | echo "usage: $0 train.lst im2bin"
4 | exit -1;
5 | fi
6 |
7 | # set -x
8 | for i in {0..7}; do
9 | start=$(( $i * 2500 + 1))
10 | end=$(( $start + 2500 -1 ))
11 | sed -n "${start}, ${end}p" $1 >tr_${i}.lst
12 | $2 tr_${i}.lst ./ tr_${i}.bin
13 | done
14 |
15 | end=$(($end + 1))
16 | sed -n "${end}, 40000p" $1 > va.lst
17 | $2 va.lst ./ va.bin
18 |
--------------------------------------------------------------------------------
/example/multi-machine/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | nworker=$1
4 | shift
5 | nserver=$1
6 | shift
7 | config=$1
8 | shift
9 |
10 | set -x
11 |
12 | ../../dmlc-core/tracker/dmlc_mpi.py \
13 | -H hosts -n $nworker -s $nserver \
14 | ../../bin/cxxnet.ps $config update_on_server=1 param_server=dist $@
15 |
16 | # ../../ps-lite/guide/local.sh $nserver $nworker \
17 | # ../../bin/cxxnet.ps $config update_on_server=1 param_server=dist -local
18 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/gen_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import subprocess
4 |
5 | if len(sys.argv) < 3:
6 | print "Usage: python gen_test.py input_folder output_folder"
7 | exit(1)
8 |
9 | fi = sys.argv[1]
10 | fo = sys.argv[2]
11 |
12 | cmd = "convert -resize 48x48\! "
13 | imgs = os.listdir(fi)
14 |
15 |
16 | for img in imgs:
17 | md = ""
18 | md += cmd
19 | md += fi + img
20 | md += " " + fo + img
21 | os.system(md)
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/windows/README.md:
--------------------------------------------------------------------------------
1 | This is sample Visual Studio solution to build cxxnet and im2bin.
2 |
3 | The Windows version support all function of Linux vision.
4 |
5 | This sample solution is based on OpenBLAS v0.2.13 x64 and OpenCV 2.4.10
6 |
7 | Build Guide:
8 | 1. Put dependency files according the guide in ```thirdparty``` folder.
9 | 2. Put ```mshadow``` at ```..``` folder.
10 | 3. Open ```cxxnet.sln```, change ```Solution Configuration``` to ```Release```, then change ```Solution Platform``` to ```x64```
11 | 4. Start build
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/simple_mxnet_converter/README.md:
--------------------------------------------------------------------------------
1 | 1. convert conf into symbol python by using ```symbol_converter.py```
2 | 2. dump all weight into a new folder by using ```dump_weight.py```
3 | 3. add extra script to load weight and build new mxnet symbol (sample at end file of```symbol.py```)
4 |
5 | Note:
6 | - It is a toy contains functions I may use, you need to add whatever you need to it
7 | - Only support ```[a->b]``` format in conf
8 | - Remove all inplace conf, eg ```layer[10->10] = softmax:sm``` to ```layer[10->11] = softmax```
9 | - every layer must have NAME
10 |
11 |
--------------------------------------------------------------------------------
/src/layer/layer_impl.cu:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_DEPRECATE
3 | // include the layer, this is where the actual implementations are
4 | #include "layer_impl-inl.hpp"
5 | // specialize the gpu implementation here
6 | namespace cxxnet {
7 | namespace layer {
8 | template<>
9 | ILayer* CreateLayer(LayerType type,
10 | mshadow::Random *p_rnd,
11 | const LabelInfo *label_info) {
12 | return CreateLayer_(type, p_rnd, label_info);
13 | }
14 | } // namespace layer
15 | } // namespace cxxnet
16 |
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 by Contributors
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/gen_train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import subprocess
4 |
5 | if len(sys.argv) < 3:
6 | print "Usage: python gen_train.py input_folder output_folder"
7 | exit(1)
8 |
9 | fi = sys.argv[1]
10 | fo = sys.argv[2]
11 |
12 | cmd = "convert -resize 48x48\! "
13 | classes = os.listdir(fi)
14 |
15 | os.chdir(fo)
16 | for cls in classes:
17 | try:
18 | os.mkdir(cls)
19 | except:
20 | pass
21 | imgs = os.listdir(fi + cls)
22 | for img in imgs:
23 | md = ""
24 | md += cmd
25 | md += fi + cls + "/" + img
26 | md += " " + fo + cls + "/" + img
27 | os.system(md)
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/src/layer/layer_impl.cpp:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_DEPRECATE
3 | // macro to hint it is CPU compilation
4 | #define CXXNET_COMPILE_CPU_
5 | // include the layer, this is where the actual implementations are
6 |
7 | #include "layer_impl-inl.hpp"
8 | // specialize the cpu implementation here
9 | namespace cxxnet {
10 | namespace layer {
11 | template<>
12 | ILayer* CreateLayer(LayerType type,
13 | mshadow::Random *p_rnd,
14 | const LabelInfo *label_info) {
15 | return CreateLayer_(type, p_rnd, label_info);
16 | }
17 | } // namespace layer
18 | } // namespace cxxnet
19 |
--------------------------------------------------------------------------------
/doc/multigpu.md:
--------------------------------------------------------------------------------
1 | Multi-GPU / Distributed Training
2 | ======
3 | This page contains
4 |
5 | [Set Multi-GPU in configuration file]()
6 |
7 | [Make cxxnet work in distributed system]()
8 |
9 | [How it works]()
10 |
11 | [Reference]()
12 |
13 | ### Set Multi-GPU in configuration file
14 | * To use multi-GPU, set the field with the corresponding device id
15 | ```bash
16 | dev = gpu:0,1,2,3
17 | ```
18 | or
19 | ```bash
20 | dev = gpu:0-3
21 | ```
22 | which indicate cxxnet will use the first four GPU to do the training task
23 |
24 | ### Make cxxnet work in distributed system
25 |
26 |
27 | ### How it works
28 | Parameter Server is the backend of multi-gpu / distributed training part of cxxnet. For multi-gpu, the parameter is running on local machine so you don't need to set mannually.
29 |
30 | For distributed case TODO
31 |
32 |
33 |
34 | ### Reference
35 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/make_submission.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import sys
3 |
4 | if len(sys.argv) < 4:
5 | print "Usage: python make_submission.py sample_submission.csv test.lst text.txt out.csv"
6 | exit(1)
7 |
8 | fc = csv.reader(file(sys.argv[1]))
9 | fl = csv.reader(file(sys.argv[2]), delimiter='\t', lineterminator='\n')
10 | fi = csv.reader(file(sys.argv[3]), delimiter=' ', lineterminator='\n')
11 | fo = csv.writer(open(sys.argv[4], "w"), lineterminator='\n')
12 |
13 | head = fc.next()
14 | fo.writerow(head)
15 |
16 | head = head[1:]
17 |
18 | img_lst = []
19 | for line in fl:
20 | path = line[-1]
21 | path = path.split('/')
22 | path = path[-1]
23 | img_lst.append(path)
24 |
25 | idx = 0
26 | for line in fi:
27 | row = [img_lst[idx]]
28 | idx += 1
29 | line = line[:-1]
30 | row.extend(line)
31 | fo.writerow(row)
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/example/MNIST/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ ! -d "data" ]; then
4 | mkdir data
5 | fi
6 |
7 | cd data
8 |
9 | if [ ! -f "train-images-idx3-ubyte" ]; then
10 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
11 | gzip -d train-images-idx3-ubyte.gz
12 | fi
13 |
14 | if [ ! -f "train-labels-idx1-ubyte" ]; then
15 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
16 | gzip -d train-labels-idx1-ubyte.gz
17 | fi
18 |
19 | if [ ! -f "t10k-images-idx3-ubyte" ]; then
20 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
21 | gzip -d t10k-images-idx3-ubyte.gz
22 | fi
23 |
24 | if [ ! -f "t10k-labels-idx1-ubyte" ]; then
25 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
26 | gzip -d t10k-labels-idx1-ubyte.gz
27 | fi
28 |
29 | cd ..
30 |
31 | if [ ! -d "models" ]; then
32 | mkdir models
33 | fi
34 |
35 |
36 | ../../bin/cxxnet $1
37 |
--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
1 | # set LD_LIBRARY_PATH
2 | export CC = gcc
3 | export CXX = g++
4 | export NVCC =nvcc
5 |
6 | export CFLAGS = -Wall -O3 -msse3 -Wno-unknown-pragmas -funroll-loops -I../mshadow/ -I.. -DMSHADOW_USE_MKL=0
7 |
8 | export LDFLAGS=
9 | export NVCCFLAGS = -g -O3 -ccbin $(CXX)
10 |
11 | # specify tensor path
12 | BIN = im2rec
13 | OBJ =
14 | CUOBJ =
15 | CUBIN =
16 | .PHONY: clean all
17 |
18 | all: $(BIN) $(OBJ) $(CUBIN) $(CUOBJ)
19 |
20 | im2bin: im2bin.cpp
21 |
22 | $(BIN) :
23 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(LDFLAGS)
24 |
25 | $(OBJ) :
26 | $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
27 |
28 | $(CUOBJ) :
29 | $(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^)
30 |
31 | $(CUBIN) :
32 | $(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^)
33 |
34 | clean:
35 | $(RM) $(OBJ) $(BIN) $(CUBIN) $(CUOBJ) *~
36 |
37 |
--------------------------------------------------------------------------------
/src/global.h:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_GLOBAL_H_
2 | /*!
3 | * \file global.h
4 | * \brief global configuration of cxxnet, this controls how cxxnet is compiled
5 | * \author Tianqi Chen
6 | */
7 | /*! \brief whether to adapt caffe layers */
8 | #ifndef CXXNET_USE_CAFFE_ADAPTOR
9 | #define CXXNET_USE_CAFFE_ADAPTOR 0
10 | #endif
11 |
12 | /*!
13 | *\brief whether to use opencv support,
14 | * without it, we will not be able to use load jpg image iterarators
15 | */
16 | #ifndef CXXNET_USE_OPENCV
17 | #define CXXNET_USE_OPENCV 1
18 | #endif
19 |
20 | /*!
21 | *\brief whether to use cudnn library for convolution
22 | */
23 | #ifndef CXXNET_USE_CUDNN
24 | #define CXXNET_USE_CUDNN 0
25 | #endif
26 |
27 | /*! \brief namespace of cxxnet */
28 | namespace cxxnet {
29 | typedef mshadow::cpu cpu;
30 | typedef mshadow::gpu gpu;
31 | typedef mshadow::index_t index_t;
32 | typedef mshadow::default_real_t real_t;
33 | } // namespace cxxnet
34 |
35 | #endif // CXXNET_GLOBAL_H_
36 |
--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
1 | CXXNET Examples
2 | ====
3 | This folder contains all the code examples using cxxnet
4 | * Contribution of examples, benchmarks is more than welcome!
5 | * If you like to share how you use xgboost to solve your problem, send a pull request:)
6 |
7 | List of examples
8 | ====
9 | * [Feature Walk-through by using MNIST](MNIST)
10 | - This is a basic steps of cxxnet
11 | * [Kaggle National Data Science Bowl Example](kaggle_bowl)
12 | - This is an example to show you how to solve a real kaggle problem by using cxxnet.
13 | - This example also show basic steps to using convolution neural network.
14 | * [ImageNet Example](ImageNet)
15 | - This is a step by step example to show how to use cxxnet train an AlexNet for ImageNet task.
16 | - We also provides better reference pre-trained model with all configurations.
17 | * [Distributed Example](multi-machine)
18 | - A step by step example to show how to use cxxnet to train kaggle_bowl on
19 | multi-machines
20 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/gen_img_list.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | import sys
4 | import random
5 |
6 | if len(sys.argv) < 4:
7 | print "Usage: gen_img_list.py train/test sample_submission.csv train_folder img.lst"
8 | exit(1)
9 |
10 | random.seed(888)
11 |
12 | task = sys.argv[1]
13 | fc = csv.reader(file(sys.argv[2]))
14 | fi = sys.argv[3]
15 | fo = csv.writer(open(sys.argv[4], "w"), delimiter='\t', lineterminator='\n')
16 |
17 | # make class map
18 | head = fc.next()
19 | head = head[1:]
20 |
21 | # make image list
22 | img_lst = []
23 | cnt = 0
24 | if task == "train":
25 | for i in xrange(len(head)):
26 | path = fi + head[i]
27 | lst = os.listdir(fi + head[i])
28 | for img in lst:
29 | img_lst.append((cnt, i, path + '/' + img))
30 | cnt += 1
31 | else:
32 | lst = os.listdir(fi)
33 | for img in lst:
34 | img_lst.append((cnt, 0, fi + img))
35 | cnt += 1
36 |
37 | # shuffle
38 | random.shuffle(img_lst)
39 |
40 | #wirte
41 | for item in img_lst:
42 | fo.writerow(item)
43 |
--------------------------------------------------------------------------------
/example/MNIST/MNIST.conf:
--------------------------------------------------------------------------------
1 | # example configure file for mnist
2 | # training iterator
3 | data = train
4 | iter = mnist
5 | path_img = "./data/train-images-idx3-ubyte"
6 | path_label = "./data/train-labels-idx1-ubyte"
7 | shuffle = 1
8 | iter = end
9 | # evaluation iterator
10 | eval = test
11 | iter = mnist
12 | path_img = "./data/t10k-images-idx3-ubyte"
13 | path_label = "./data/t10k-labels-idx1-ubyte"
14 | iter = end
15 |
16 | netconfig=start
17 | layer[+1:fc1] = fullc:fc1
18 | nhidden = 100
19 | init_sigma = 0.01
20 | layer[+1:sg1] = sigmoid:se1
21 | layer[sg1->fc2] = fullc:fc2
22 | nhidden = 10
23 | init_sigma = 0.01
24 | layer[+0] = softmax
25 | netconfig=end
26 |
27 | # input shape not including batch
28 | input_shape = 1,1,784
29 | batch_size = 100
30 |
31 | ## global parameters
32 | dev = cpu
33 | save_model = 1
34 | max_round = 15
35 | num_round = 15
36 | train_eval = 15
37 | random_type = gaussian
38 | ## learning parameters
39 | eta = 0.1
40 | momentum = 0.9
41 | wd = 0.0
42 | # evaluation metric
43 | metric[label] = error
44 | # end of config
45 |
--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | Document Home
2 | ====
3 | This is the documentation for cxxnet
4 |
5 | Links of Resources
6 | * [Learning CXXNET by Examples](../example)
7 | * [Python Interface](python.md)
8 | * [Multi-GPU/Distributed Training](multigpu.md)
9 |
10 | Configuration of CXXNET
11 | ====
12 | This section introduces the how to setup configuation file of cxxnet.
13 | In general, cxxnet configuration file contains four kinds of configurations in a single file:
14 | * [Data Input Iterator Setting](io.md)
15 | - Set input data configurations.
16 | * [Layer Setting](layer.md)
17 | - Configure network, and setup each layers.
18 | * [Updater Setting](updater.md)
19 | - Set parameters(learning rate, momentum) for learning procedure
20 | * [Tasks](tasks.md)
21 | - This page includes all the four tasks you could try by cxxnet.
22 | * [Other Setting](other.md)
23 | - Set other parameters for neural network, related to device selection, running control.
24 | * [Caffe Converter](caffe_converter.md)
25 | - Convert the pretrained model in Caffe to cxxnet.
26 | * [Advanced Usages](advanced.md)
27 | - Some advanced usages of cxxnet can he found here.
28 |
--------------------------------------------------------------------------------
/simple_mxnet_converter/dump_weight.py:
--------------------------------------------------------------------------------
1 | from cxxnet import Net
2 | import sys
3 | import re
4 | import numpy as np
5 | LAYER_PATTERN = re.compile(r"layer\[(.*)->(.*)\]\s*=\s*(\w+):(\w*)")
6 | dump_dict = {
7 | "fullc":["weight", "bias"],
8 | "conv":["weight", "bias"],
9 | "batch_norm":["gamma", "beta", "moving_mean", "moving_var"]
10 | }
11 |
12 | if len(sys.argv) < 3:
13 | print("usage: conf model output_folder")
14 |
15 | layers = []
16 |
17 | fi = open(sys.argv[1])
18 | cfg = ""
19 | for line in fi:
20 | line = line
21 | cfg += line
22 | if LAYER_PATTERN.match(line) != None:
23 | in_ids_str, out_ids_str, layer, name = LAYER_PATTERN.findall(line)[0]
24 | layers.append((layer, name))
25 |
26 | fi.close()
27 | net = Net(cfg=cfg)
28 | net.init_model()
29 | net.load_model(sys.argv[2])
30 |
31 |
32 | for layer, name in layers:
33 | path = sys.argv[3]
34 | if layer in dump_dict:
35 | for tag in dump_dict[layer]:
36 | weight = net.get_weight(name, tag)
37 | np.save(path + name + '_' + tag, weight)
38 | if weight == None:
39 | print name + '_' + tag
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/README.md:
--------------------------------------------------------------------------------
1 | * Resize all image to 48 X 48
2 | ```
3 | mkdir /home/cxxnet/example/kaggle_bowl/data
4 | python gen_train.py /home/data/bowl/train/ /home/cxxnet/example/kaggle_bowl/data/train/
5 | python gen_test.py /home/data/bowl/test/ /home/cxxnet/example/kaggle_bowl/data/test/
6 | ```
7 |
8 | * Generate img list
9 | ```
10 | python gen_img_list.py train /home/data/bowl/sampleSubmission.csv data/train/ train.lst
11 | python gen_img_list.py test /home/data/bowl/sampleSubmission.csv data/test/ test.lst
12 | ```
13 |
14 | * Generate binary image file use ```im2rec```
15 | ```
16 | ../../bin/im2rec train.lst ./ train.rec
17 | ../../bin/im2rec test.lst ./ test.rec
18 | ```
19 |
20 | * Run CXXNET
21 | ```
22 | mkdir models
23 | ../../bin/cxxnet bowl.conf
24 | ```
25 | It take about 5 minute to train a deep conv net model on Geforece 780
26 |
27 | * Run Prediction
28 | ```
29 | ../../bin/cxxnet pred.conf
30 | ```
31 | It will write softmax result in test.txt
32 |
33 | * Make a submission file
34 |
35 | ```
36 | python make_submission.py /home/data/bowl/sampleSubmission.csv test.lst test.txt out.csv
37 | ```
38 |
39 | * Submit out.csv, you will get a result
40 |
41 | * Validation
42 |
43 | Run
44 | ```
45 | sh gen_tr_va.sh train.lst
46 | ```
47 | Then you will have ```tr.lst``` and ```va.lst``` as validation set list.
48 |
49 |
--------------------------------------------------------------------------------
/src/layer/loss/softmax_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_SOFTMAX_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_SOFTMAX_LAYER_INL_HPP_
3 |
4 | #include
5 | #include
6 | #include "../layer.h"
7 | #include "./loss_layer_base-inl.hpp"
8 |
9 | namespace cxxnet {
10 | namespace layer {
11 | /*! \brief loss function layer */
12 | template
13 | class SoftmaxLayer: public LossLayerBase {
14 | public:
15 | SoftmaxLayer(const LabelInfo *label_info)
16 | : LossLayerBase(label_info) {}
17 | virtual ~SoftmaxLayer(void) {
18 | }
19 | protected:
20 | virtual void Forward_(mshadow::Tensor inout_data,
21 | mshadow::Stream *stream) {
22 | mshadow::Softmax(inout_data, inout_data);
23 | }
24 | virtual void SetGradCPU(mshadow::Tensor inout_data,
25 | const LabelRecord &label) {
26 | mshadow::Tensor lb = label.label;
27 | CHECK(lb.size(0) == inout_data.size(0) && lb.size(1) == 1)
28 | << "SoftmaxLayer: label size mismatch";
29 | for (mshadow::index_t i = 0; i < inout_data.size(0); ++i) {
30 | index_t k = static_cast(lb[i][0]);
31 | inout_data[i][k] -= 1.0f;
32 | }
33 | }
34 | };
35 | } // namespace layer
36 | } // namespace cxxnet
37 | #endif // LAYER_SOFTMAX_LAYER_INL_HPP_
38 |
--------------------------------------------------------------------------------
/tools/caffe_converter/convert_mean.cpp:
--------------------------------------------------------------------------------
1 | /*!
2 | * \file convert_mean.cpp
3 | * \brief convert caffe mean file to cxx bin
4 | * \author Zehua Huang, Naiyan Wang
5 | */
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | void ConvertMean(const char *caffe_file_path, const char *cxx_file_path){
13 | // read caffe mean file
14 | caffe::BlobProto blob_proto;
15 | caffe::ReadProtoFromBinaryFileOrDie(caffe_file_path, &blob_proto);
16 | caffe::Blob mean_blob;
17 | mean_blob.FromProto(blob_proto);
18 |
19 | mshadow::TensorContainer img;
20 | mshadow::Shape<3> shape = mshadow::Shape3(3, mean_blob.height(), mean_blob.width());
21 | img.Resize(shape);
22 |
23 | for (size_t y = 0; y < img.size(1); ++y) {
24 | for (size_t x = 0; x < img.size(2); ++x) {
25 | // store in BGR order
26 | img[2][y][x] = mean_blob.data_at(0, 0, y, x);
27 | img[1][y][x] = mean_blob.data_at(0, 1, y, x);
28 | img[0][y][x] = mean_blob.data_at(0, 2, y, x);
29 | }
30 | }
31 |
32 | cxxnet::utils::StdFile fo(cxx_file_path, "wb");
33 | img.SaveBinary(fo);
34 | }
35 |
36 | int main(int argc, char *argv[]){
37 | if (argc != 3) {
38 | printf("usage: \n");
39 | return 0;
40 | }
41 |
42 | ConvertMean(argv[1], argv[2]);
43 | return 0;
44 | }
45 |
--------------------------------------------------------------------------------
/example/MNIST/MNIST_CONV.conf:
--------------------------------------------------------------------------------
1 | # example configure file for mnist
2 | # training iterator
3 | data = train
4 | iter = mnist
5 | path_img = "./data/train-images-idx3-ubyte"
6 | path_label = "./data/train-labels-idx1-ubyte"
7 | input_flat = 0
8 | shuffle = 1
9 | iter = end
10 | # evaluation iterator
11 | eval = test
12 | iter = mnist
13 | input_flat = 0
14 | path_img = "./data/t10k-images-idx3-ubyte"
15 | path_label = "./data/t10k-labels-idx1-ubyte"
16 | iter = end
17 |
18 | netconfig=start
19 | layer[0->1] = conv:cv1
20 | kernel_size = 3
21 | pad = 1
22 | stride = 2
23 | nchannel = 32
24 | random_type = xavier
25 | no_bias=0
26 | layer[1->2] = max_pooling
27 | kernel_size = 3
28 | stride = 2
29 | layer[2->3] = flatten
30 | layer[3->3] = dropout
31 | threshold = 0.5
32 | layer[3->4] = fullc:fc1
33 | nhidden = 100
34 | init_sigma = 0.01
35 | layer[4->5] = sigmoid:se1
36 | layer[5->6] = fullc:fc2
37 | nhidden = 10
38 | init_sigma = 0.01
39 | layer[6->6] = softmax
40 | netconfig=end
41 |
42 | # input shape not including batch
43 | input_shape = 1,28,28
44 | batch_size = 100
45 |
46 | ## global parameters
47 | dev = gpu
48 | save_model = 1
49 | max_round = 15
50 | num_round = 15
51 | train_eval = 1
52 | random_type = gaussian
53 | ## learning parameters
54 | eta = 0.1
55 | momentum = 0.9
56 | wd = 0.0
57 | # evaluation metric
58 | metric = error
59 | eval_train = 1
60 | # end of config
61 |
--------------------------------------------------------------------------------
/src/layer/loss/multi_logistic_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_MULTISIGMOID_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_MULTISIGMOID_LAYER_INL_HPP_
3 |
4 | #include
5 | #include
6 | #include "../layer.h"
7 | #include "./loss_layer_base-inl.hpp"
8 |
9 | namespace cxxnet {
10 | namespace layer {
11 | /*! \brief loss function layer */
12 | template
13 | class MultiLogisticLayer: public LossLayerBase {
14 | public:
15 | MultiLogisticLayer(const LabelInfo *label_info)
16 | : LossLayerBase(label_info) {}
17 | virtual ~MultiLogisticLayer(void) {
18 | }
19 | protected:
20 | virtual void Forward_(mshadow::Tensor inout_data,
21 | mshadow::Stream *stream) {
22 | inout_data = mshadow::expr::F(inout_data);
23 | }
24 | virtual void SetGradCPU(mshadow::Tensor inout_data,
25 | const LabelRecord &label) {
26 | mshadow::Tensor lb = label.label;
27 | CHECK(lb.size(0) == inout_data.size(0) && lb.size(1) == inout_data.size(1))
28 | << " MultiLogisticLayer: label size mismatch";
29 | for (index_t i = 0; i < inout_data.size(0); ++i) {
30 | for (index_t j = 0; j < inout_data.size(1); ++j) {
31 | inout_data[i][j] -= lb[i][j];
32 | }
33 | }
34 | }
35 | };
36 | } // namespace layer
37 | } // namespace cxxnet
38 | #endif // LAYER_MULTISIGMOID_LAYER_INL_HPP_
39 |
--------------------------------------------------------------------------------
/src/updater/updater_impl.cpp:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_DEPRECATE
3 | // this is where the actual implementations are
4 | #include "updater_impl-inl.hpp"
5 | // specialize the cpu implementation
6 | namespace cxxnet {
7 | namespace updater {
8 | template<>
9 | IUpdater* CreateUpdater<>(const char *type,
10 | mshadow::Random *p_rnd,
11 | mshadow::Tensor weight,
12 | mshadow::Tensor wgrad,
13 | const char *tag) {
14 | return CreateUpdater_(type, p_rnd, weight, wgrad, tag);
15 | }
16 | template<>
17 | void CreateAsyncUpdaters(int layer_index,
18 | int device_id,
19 | mshadow::ps::ISharedModel *param_server,
20 | const char *type,
21 | mshadow::Random *p_rnd,
22 | layer::LayerType layer_type,
23 | layer::ILayer *p_layer,
24 | std::vector*> *out_updaters) {
25 | CreateAsyncUpdaterVisitor visitor(layer_index, device_id, param_server,
26 | type, p_rnd, layer_type, out_updaters);
27 | p_layer->ApplyVisitor(&visitor);
28 | }
29 | } // namespace updater
30 | } // namespace cxxnet
31 |
--------------------------------------------------------------------------------
/src/updater/updater_impl.cu:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_DEPRECATE
3 | // include the layer, this is where the actual implementations are
4 |
5 | #include "updater_impl-inl.hpp"
6 | // specialize the gpu implementation
7 | namespace cxxnet {
8 | namespace updater {
9 | template<>
10 | IUpdater* CreateUpdater(const char *type,
11 | mshadow::Random *p_rnd,
12 | mshadow::Tensor weight,
13 | mshadow::Tensor wgrad,
14 | const char *tag) {
15 | return CreateUpdater_(type, p_rnd, weight, wgrad, tag);
16 | }
17 | template<>
18 | void CreateAsyncUpdaters(int layer_index,
19 | int device_id,
20 | mshadow::ps::ISharedModel *param_server,
21 | const char *type,
22 | mshadow::Random *p_rnd,
23 | layer::LayerType layer_type,
24 | layer::ILayer *p_layer,
25 | std::vector*> *out_updaters) {
26 | CreateAsyncUpdaterVisitor visitor(layer_index, device_id, param_server,
27 | type, p_rnd, layer_type, out_updaters);
28 | p_layer->ApplyVisitor(&visitor);
29 | }
30 | } // namespace updater
31 | } // namespace cxxnet
32 |
--------------------------------------------------------------------------------
/example/multi-machine/README.md:
--------------------------------------------------------------------------------
1 |
2 | * Build by using `build_ps.sh` in the root directory. All parameter server and
3 | its dependencies will be statically linked into the binary.
4 |
5 | * Generate `train.lst` by following instructions in [kaggle_bowl](../kaggle_bowl).
6 |
7 | * Partition the data into 8 parts.
8 |
9 | ```
10 | ./partition.sh train.lst ../../tools/im2bin
11 | ```
12 |
13 | * Assume there are two machines, and their IPs are saved in `hosts`
14 | ```bash
15 | $ cat hosts
16 | 192.168.0.111
17 | 192.168.0.112
18 | ```
19 | Further assume each machine has two GPUs, so we put `dev = gpu:0,1` in
20 | `bowl.conf`. If `mpirun` is installed, then launch `cxxnet` on these two
21 | machines by using 2 workers and 2 servers:
22 | ```
23 | ./run.sh 2 2 bowl.conf
24 | ```
25 |
26 | More advantaged usage:
27 |
28 | - put all log files in ./log
29 | ```
30 | ./run.sh 2 2 bowl.conf -log_dir log
31 | ```
32 | - log all network package information (namely enable all verbose in `system/van.h` and `system/van.cc`)
33 | ```
34 | ./run.sh 2 2 bowl.conf -vmodule van*=1
35 | ```
36 |
37 | ## TODO
38 |
39 | * Data partition is not necessary when dmlc-core is ready.
40 | * The distributed version doesn't support `xavier` initialization. The temp
41 | solution is using `convert.py`. (The current version in this directory is
42 | buggy, waiting for Bing's patch).
43 | * Rather than let the root node do the evaluation, do it in the distributed
44 | fashion.
45 | * A distributed monitor for better progress printing.
46 | * More testing
47 |
--------------------------------------------------------------------------------
/doc/caffe_converter.md:
--------------------------------------------------------------------------------
1 | #### Introduction
2 | This page will introduce the usage of Caffe converter. It can convert a pretrained model in Caffe to cxxnet format.
3 |
4 | #### Preparation
5 | * To begin with the convert, a latest version of Caffe should be built.
6 | * Currently, no automatic configuration file converter is provided. You need to convert the Caffe config file in prototxt to cxxnet configuration format by yourself. **Please make sure that all the layers in original Caffe model has corresponding layer in cxxnet!**
7 | * Converters are provided in both C++ and Python.
8 | - To use the C++ converter, you need to specify the following paths in the config.mk. For example,
9 |
10 | ```bash
11 | # whether to build caffe converter
12 | USE_CAFFE_CONVERTER = 1
13 | CAFFE_ROOT = ~/caffe
14 | CAFFE_INCLUDE = ~/caffe/include/
15 | CAFFE_LIB = ~/caffe/build/lib/
16 | ```
17 |
18 | Then, run ```make all``` in the root of cxxnet. if everything is correct, you could find ```caffe_converter``` and ```caffe_mean_converter``` in the ```bin``` folder.
19 |
20 | - To use the Python converter, you should first make sure the Python wrapper of Caffe is successfully built. Then you need to specify the paths of Caffe and cxxnet in ```tools/caffe_converter/convert.py```.
21 |
22 | #### Convert
23 | * Simply run '''bin/caffe_converter''' and '''tools/caffe_converter/convert.py''', and then follow the instructions.
24 | * To convert the mean file of Caffe, please use the C++ converter: ```caffe_mean_converter```. But we strongly recommend you to recompute the mean file in cxxnet due to the different data augmentation methods in Caffe and cxxnet.
25 |
26 |
--------------------------------------------------------------------------------
/src/layer/loss/lp_loss_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_LP_LOSS_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_LP_LOSS_LAYER_INL_HPP_
3 |
4 | #include
5 | #include
6 | #include "../layer.h"
7 | #include "./loss_layer_base-inl.hpp"
8 |
9 | namespace cxxnet {
10 | namespace layer {
11 | /*! \brief loss function layer */
12 | template
13 | class LpLossLayer: public LossLayerBase {
14 | public:
15 | LpLossLayer(const LabelInfo *label_info)
16 | : LossLayerBase(label_info) {
17 | p = 2.0;
18 | }
19 | virtual ~LpLossLayer(void) {
20 | }
21 | virtual void SetParam(const char *name, const char *val) {
22 | if (!strcmp(name, "p")) p = atof(val);
23 | LossLayerBase::SetParam(name, val);
24 | }
25 | protected:
26 | virtual void Forward_(mshadow::Tensor inout_data,
27 | mshadow::Stream *stream) {
28 | // Do Nothing
29 | }
30 | virtual void SetGradCPU(mshadow::Tensor inout_data,
31 | const LabelRecord &label) {
32 | mshadow::Tensor lb = label.label;
33 | CHECK(lb.size(0) == inout_data.size(0) && lb.size(1) == inout_data.size(1));
34 | for (index_t i = 0; i < inout_data.size(0); ++i) {
35 | for (index_t j = 0; j < inout_data.size(1); ++j) {
36 | inout_data[i][j] = p * std::pow(std::abs(inout_data[i][j] - lb[i][j]), p - 1)
37 | * (inout_data[i][j] > lb[i][j] ? 1 : -1);
38 | }
39 | }
40 | }
41 | private:
42 | // L_p loss
43 | float p;
44 | };
45 | } // namespace layer
46 | } // namespace cxxnet
47 | #endif // LAYER_LP_LOSS_LAYER_INL_HPP_
48 |
--------------------------------------------------------------------------------
/wrapper/matlab/DataIter.m:
--------------------------------------------------------------------------------
1 | classdef DataIter
2 | properties %(Access = private)
3 | head_
4 | tail_
5 | handle_
6 | end
7 |
8 | methods
9 | function this = DataIter(cfg)
10 | assert(ischar(cfg));
11 | this.head_ = true;
12 | this.tail_ = false;
13 | this.handle_ = cxxnet_mex('MEXCXNIOCreateFromConfig', cfg);
14 | end
15 | function delete(this)
16 | cxxnet_mex('MEXCXNIOFree', this.handle_);
17 | end
18 | function ret = next(this)
19 | ret = cxxnet_mex('MEXCXNIONext', this.handle_);
20 | this.head_ = false;
21 | this.tail_ = ret == 0;
22 | end
23 | function before_first(this)
24 | cxxnet_mex('MEXCXNIOBeforeFirst', this.handle_);
25 | this.head_ = true;
26 | this.tail_ = false;
27 | end
28 | function check_valid(this)
29 | assert(this.head_ == true, 'iterator is at head');
30 | assert(this.tail_ == false, 'iterator is at end');
31 | end
32 | function data = get_data(this)
33 | if this.tail_ == false,
34 | data = cxxnet_mex('MEXCXNIOGetData', this.handle_);
35 | else
36 | printf('Iterator is at end\n');
37 | end
38 | end
39 | function label = get_label(this)
40 | if this.tail_ == false,
41 | label = cxxnet_mex('MEXCXNIOGetLabel', this.handle_);
42 | else
43 | printf('Iterator is at end\n');
44 | end
45 | end
46 | end
47 | end
48 |
--------------------------------------------------------------------------------
/src/utils/random.h:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_UTILS_GLOBAL_RANDOM_H_
2 | #define CXXNET_UTILS_GLOBAL_RANDOM_H_
3 | /*!
4 | * \file global_random.h
5 | * \brief global random number utils, used for some preprocessing
6 | * \author Tianqi Chen
7 | */
8 | #include
9 | #include
10 | #include
11 | #include "./utils.h"
12 |
13 | #if _MSC_VER
14 | #define rand_r(x) rand()
15 | #endif
16 |
17 | namespace cxxnet {
18 | namespace utils {
19 | /*! \brief simple thread dependent random sampler */
20 | class RandomSampler {
21 | public:
22 | RandomSampler(void) {
23 | this->Seed(0);
24 | }
25 | /*!
26 | * \brief seed random number
27 | * \param seed the random number seed
28 | */
29 | inline void Seed(unsigned seed) {
30 | this->rseed_ = seed;
31 | #if _MSC_VER
32 | srand(seed);
33 | #endif
34 | }
35 | /*! \brief return a real number uniform in [0,1) */
36 | inline double NextDouble() {
37 | return static_cast(rand_r(&rseed_)) /
38 | (static_cast(RAND_MAX) + 1.0);
39 | }
40 | /*! \brief return a random number in n */
41 | inline uint32_t NextUInt32(uint32_t n) {
42 | return static_cast(floor(NextDouble() * n));
43 | }
44 | /*! \brief random shuffle data */
45 | template
46 | inline void Shuffle(T *data, size_t sz) {
47 | if(sz == 0) return;
48 | for(uint32_t i = (uint32_t)sz - 1; i > 0; i--) {
49 | std::swap(data[i], data[NextUInt32(i+1)]);
50 | }
51 | }
52 | /*!\brief random shuffle data in */
53 | template
54 | inline void Shuffle(std::vector &data) {
55 | Shuffle(&data[0], data.size());
56 | }
57 |
58 | private:
59 | unsigned rseed_;
60 | };
61 | } // namespace utils
62 | } // namespace cxxnet
63 | #endif
64 |
--------------------------------------------------------------------------------
/src/layer/flatten_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_FLATTEN_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_FLATTEN_LAYER_INL_HPP_
3 |
4 | #include "./layer.h"
5 | #include "./op.h"
6 |
7 | namespace cxxnet {
8 | namespace layer {
9 |
10 | template
11 | class FlattenLayer : public ILayer {
12 | public:
13 | virtual ~FlattenLayer(void) {}
14 | virtual void InitConnection(const std::vector*> &nodes_in,
15 | const std::vector*> &nodes_out,
16 | ConnectState *p_cstate) {
17 | utils::Check(nodes_in.size() == 1 && nodes_out.size() == 1,
18 | "FlattenLayer: only support 1-1 connection");
19 | mshadow::Shape<4> ishape = nodes_in[0]->data.shape_;
20 | nodes_out[0]->data.shape_ =
21 | mshadow::Shape4(ishape[0], 1, 1, ishape[1] * ishape[2] * ishape[3]);
22 | }
23 | virtual void Forward(bool is_train,
24 | const std::vector*> &nodes_in,
25 | const std::vector*> &nodes_out,
26 | ConnectState *p_cstate) {
27 | using namespace mshadow::expr;
28 | nodes_out[0]->data = reshape(nodes_in[0]->data, nodes_out[0]->data.shape_);
29 | }
30 | virtual void Backprop(bool prop_grad,
31 | const std::vector*> &nodes_in,
32 | const std::vector*> &nodes_out,
33 | ConnectState *p_cstate) {
34 | using namespace mshadow::expr;
35 | if (prop_grad) {
36 | nodes_in[0]->data = reshape(nodes_out[0]->data, nodes_in[0]->data.shape_);
37 | }
38 | }
39 | };
40 | } // namespace layer
41 | } // namespace cxxnet
42 | #endif // LAYER_FLATTEN_LAYER_INL_HPP_
43 |
44 |
--------------------------------------------------------------------------------
/doc/advanced.md:
--------------------------------------------------------------------------------
1 | #### Introduction
2 | This page will introduce some advanced usages in cxxnet, including:
3 | * [Multi-label Training](#multi-label-training)
4 |
5 | #### Multi-label Training
6 | * To use multi-label training, you need the following three steps in additional to the case of single label training:
7 | - For multi-label training, in ```imgrec```, you need to specify ```image_list``` field to indicate the list file that contains the labels.
8 | - First, you need to specify the number of labels in the network by setting ```label_width``` variable in global settings. The following setting denotes that we have 5 labels in the network.
9 | ```bash
10 | label_width = 5
11 | ```
12 | - In the [image list file](io.md#image-list-file), you need to provide ```label_width``` labels instead of one label. Namely, each line is in the format:
13 | ```
14 | image_index \t label_1 \t label_2 ... \t label_n \t file_name
15 | ```
16 | - In global setting, you need to specify how each field of the labels form a label vector. For example, we are interested in a localization task. In the task, we first need to output the label for one image, and next predict its position denoted by a bounding box. The configuration can be written as:
17 | ```
18 | label_vec[0,1) = class
19 | label_vec[1,5) = bounding_box
20 | ```
21 | - At last, in each loss layer, you need to specify the target of the loss:
22 | ```
23 | layer[19->21] = softmax
24 | target = class
25 | layer[20->22] = l2_loss
26 | target = bounding_box
27 | ```
28 | This means for the first field of the labels, we treat it as a class label, and apply standard softmax loss function on it. For the other four labels, we treat them as the coordinates of the bounding box, and train them using Euclidean loss.
29 |
--------------------------------------------------------------------------------
/src/layer/split_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_SPLIT_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_SPLIT_LAYER_INL_HPP_
3 |
4 | #include "./layer.h"
5 | #include "./op.h"
6 |
7 |
8 | namespace cxxnet {
9 | namespace layer {
10 |
11 | template
12 | class SplitLayer : public ILayer {
13 | public:
14 | virtual void InitConnection(const std::vector*> &nodes_in,
15 | const std::vector*> &nodes_out,
16 | ConnectState *p_cstate) {
17 | //utils::Check(nodes_in.size() == 1 && nodes_out.size() > 1,
18 | // "Split layer only support 1-n connection");
19 | mshadow::Shape<4> oshape = nodes_in[0]->data.shape_;
20 | for (index_t i = 0; i < nodes_out.size(); ++i){
21 | nodes_out[i]->data.shape_ = oshape;
22 | }
23 | }
24 | virtual void Forward(bool is_train,
25 | const std::vector*> &nodes_in,
26 | const std::vector*> &nodes_out,
27 | ConnectState *p_cstate) {
28 | for (index_t i = 0; i < nodes_out.size(); ++i){
29 | mshadow::Copy(nodes_out[i]->data, nodes_in[0]->data,
30 | nodes_out[i]->data.stream_);
31 | }
32 | }
33 | virtual void Backprop(bool prop_grad,
34 | const std::vector*> &nodes_in,
35 | const std::vector*> &nodes_out,
36 | ConnectState *p_cstate) {
37 | if (prop_grad){
38 | mshadow::Copy(nodes_in[0]->data, nodes_out[0]->data,
39 | nodes_in[0]->data.stream_);
40 | for (index_t i = 1; i < nodes_out.size(); ++i){
41 | nodes_in[0]->data += nodes_out[i]->data;
42 | }
43 | }
44 | }
45 | }; //class SplitLayer
46 | } // namespace layer
47 | } // namespace cxxnet
48 | #endif
49 |
--------------------------------------------------------------------------------
/src/layer/activation_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_ACTIVATION_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_ACTIVATION_LAYER_INL_HPP_
3 |
4 | #include
5 | #include "./layer.h"
6 | #include "./op.h"
7 |
8 | namespace cxxnet {
9 | namespace layer {
10 |
11 | template
12 | class ActivationLayer : public ILayer{
13 | public:
14 | virtual ~ActivationLayer(void) {}
15 | virtual void InitConnection(const std::vector*> &nodes_in,
16 | const std::vector*> &nodes_out,
17 | ConnectState *p_cstate) {
18 | utils::Check(nodes_in.size() == 1 && nodes_out.size() == 1,
19 | "ActivationLayer Layer only support 1-1 connection");
20 | nodes_out[0]->data.shape_ = nodes_in[0]->data.shape_;
21 | }
22 | virtual void Forward(bool is_train,
23 | const std::vector*> &nodes_in,
24 | const std::vector*> &nodes_out,
25 | ConnectState *p_cstate) {
26 | using namespace mshadow::expr;
27 | // InitConnection is already called, no need to check size again
28 | nodes_in[0]->data = F(nodes_in[0]->data);
29 | mshadow::Copy(nodes_out[0]->data, nodes_in[0]->data, nodes_out[0]->data.stream_);
30 | }
31 | virtual void Backprop(bool prop_grad,
32 | const std::vector*> &nodes_in,
33 | const std::vector*> &nodes_out,
34 | ConnectState *p_cstate) {
35 | using namespace mshadow::expr;
36 | if (prop_grad) {
37 | nodes_in[0]->data = F(nodes_in[0]->data) * nodes_out[0]->data;
38 | }
39 | }
40 | };
41 | } // namespace layer
42 | } // namespace cxxnet
43 | #endif // LAYER_ACTIVATION_LAYER_INL_HPP_
44 |
45 |
--------------------------------------------------------------------------------
/doc/debug_perf.md:
--------------------------------------------------------------------------------
1 | ## Debug the performance
2 |
3 | Normally, the GPU ultilizaiton with be above 95% during running. We can get this
4 | number by `nvdia-smi` for high-end Nvidia GPUs, or comparing the required FLOPS of the
5 | neural network against the theoretical capacity of the GPUs.
6 |
7 | However, sometimes we didn't get the desired performance. Here we list some
8 | common problems:
9 |
10 | 1. Check if there is only your program using that GPU cards. If there are
11 | multiple GPUs cards, you can use `gpu:2` to select the 3rd card.
12 |
13 | 2. Check if reading the data is the bottleneck. You can test the reading and
14 | decoding performance by adding `test_io = 1` in your configuration. To improve the
15 | performance, you can
16 | - use `iter = threadbuffer` to do data prefetching
17 | - use a compact binary data format
18 | - change from `iter = imbin` to `iter = imbinx` to use the multithread
19 | decoder
20 | - copy the data into local disk if it sits on a NFS.
21 |
22 | 3. Use a proper minibatch size. A larger minibatch size improve the system
23 | performance. But it requires more memory (`~= model_size + minibatch_size *
24 | const`) and may slows down the convergence. You need to do a trade-off here.
25 |
26 | 4. Check if the memory-to-GPU bandwidth is the bottleneck. It often happens when
27 | using multi-GPUs within a single machine. There are several tools to monitor
28 | the memory bandwidth, such as
29 | [intel-perf-counter](https://software.intel.com/en-us/articles/intel-performance-counter-monitor)
30 |
31 | 5. Check if the network bandwidth is the bottleneck for distributed training
32 | using multiple machines. It often hits the maximal network bandwidth on 1Gbps
33 | clusters. To reduce the network bandwidth, you can
34 | 1. Increase the minibatch size
35 | 2. Use the filters in parameter server, such as converting a float into a 1
36 | (or 2) byte integer, and (or) data compression.
37 |
--------------------------------------------------------------------------------
/src/layer/xelu_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef LAYER_XELU_LAYER_INL_HPP_
2 | #define LAYER_XELU_LAYER_INL_HPP_
3 | #pragma once
4 |
5 | #include
6 | #include "./layer.h"
7 | #include "./param.h"
8 | #include "./op.h"
9 | #include "../utils/utils.h"
10 |
11 | namespace cxxnet {
12 | namespace layer {
13 |
14 | template
15 | class XeluLayer : public ILayer {
16 | public:
17 | XeluLayer() { b_ = 5.0f; }
18 | virtual ~XeluLayer(void) {}
19 | virtual void SetParam(const char *name, const char* val) {
20 | if (!strcmp(name, "b")) b_ = atof(val);
21 | }
22 | virtual void InitConnection(const std::vector*> &nodes_in,
23 | const std::vector*> &nodes_out,
24 | ConnectState *p_cstate) {
25 | utils::Check(nodes_in.size() == 1 && nodes_out.size() == 1,
26 | "ActivationLayer Layer only support 1-1 connection");
27 | nodes_out[0]->data.shape_ = nodes_in[0]->data.shape_;
28 | }
29 | virtual void Forward(bool is_train,
30 | const std::vector*> &nodes_in,
31 | const std::vector*> &nodes_out,
32 | ConnectState *p_cstate) {
33 | using namespace mshadow::expr;
34 | // InitConnection is already called, no need to check size again
35 | nodes_in[0]->data = F(nodes_in[0]->data, b_);
36 | mshadow::Copy(nodes_out[0]->data, nodes_in[0]->data, nodes_out[0]->data.stream_);
37 | }
38 | virtual void Backprop(bool prop_grad,
39 | const std::vector*> &nodes_in,
40 | const std::vector*> &nodes_out,
41 | ConnectState *p_cstate) {
42 | using namespace mshadow::expr;
43 | if (prop_grad) {
44 | nodes_in[0]->data = F(nodes_in[0]->data, b_) * nodes_out[0]->data;
45 | }
46 | }
47 | private:
48 | /*! \brief parameters that potentially be useful */
49 | float b_;
50 | };
51 |
52 | } // namespace layer
53 | } // namespace xelu
54 |
55 | #endif // XELU_LAYER_INL_HPP_
56 |
--------------------------------------------------------------------------------
/tools/bin2rec.cc:
--------------------------------------------------------------------------------
1 | /*!
2 | * Copyright (c) 2015 by Contributors
3 | * \file im2rec.cc
4 | * \brief convert images into image recordio format
5 | * Image Record Format: zeropad[64bit] imid[64bit] img-binary-content
6 | * The 64bit zero pad was reserved for future purposes
7 | *
8 | * Image List Format: unique-image-index label[s] path-to-image
9 | * \sa dmlc/recordio.h
10 | */
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include "../src/io/image_recordio.h"
22 | #include "../src/utils/io.h"
23 |
24 |
25 | int main(int argc, char **argv) {
26 | using namespace cxxnet::utils;
27 | using namespace dmlc;
28 | if (argc < 4) {
29 | printf("usage: bin2rec img_list bin_file rec_file [label_width=1]\n");
30 | exit(-1);
31 | }
32 | FILE *fplst = fopen(argv[1], "r");
33 | CHECK(fplst != NULL);
34 | dmlc::Stream *fo = dmlc::Stream::Create(argv[3], "w");
35 | dmlc::RecordIOWriter writer(fo);
36 | cxxnet::ImageRecordIO rec;
37 | std::string blob, fname;
38 | StdFile fi;
39 | fi.Open(argv[2], "rb");
40 | int label_width = 1;
41 | if (argc > 4) {
42 | label_width = atoi(argv[4]);
43 | }
44 | BinaryPage pg;
45 | size_t imcnt = 0;
46 | while (pg.Load(fi)) {
47 | for (int i = 0; i < pg.Size(); ++i) {
48 | CHECK(fscanf(fplst, "%lu", &rec.header.image_id[0]) == 1);
49 | CHECK(fscanf(fplst, "%f", &rec.header.label) == 1);
50 | for (int k = 1; k < label_width; ++k) {
51 | float tmp;
52 | CHECK(fscanf(fplst, "%f", &tmp) == 1);
53 | }
54 | CHECK(fscanf(fplst, "%*[^\n]\n") == 0) << "ignore";
55 | rec.SaveHeader(&blob);
56 | BinaryPage::Obj obj = pg[i];
57 | size_t bsize = blob.size();
58 | blob.resize(bsize + obj.sz);
59 | memcpy(BeginPtr(blob) + bsize, obj.dptr, obj.sz);
60 | writer.WriteRecord(BeginPtr(blob), blob.size());
61 | imcnt++;
62 | }
63 | }
64 | LOG(INFO) << "Total: " << imcnt << " images processed";
65 | delete fo;
66 | fclose(fplst);
67 | }
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/wrapper/matlab/example.m:
--------------------------------------------------------------------------------
1 | train_cfg = [
2 | 'iter = mnist' char(10)...
3 | ' path_img = "./data/train-images-idx3-ubyte"' char(10)...
4 | ' path_label = "./data/train-labels-idx1-ubyte"' char(10) ...
5 | ' shuffle = 1' char(10)...
6 | ' flatten = 1' char(10)...
7 | 'iter = end' char(10)...
8 | 'input_shape = 1,1,784' char(10)...
9 | 'batch_size = 100' char(10)];
10 |
11 |
12 | eval_cfg = [
13 | 'iter = mnist' char(10)...
14 | ' path_img = "./data/t10k-images-idx3-ubyte"' char(10)...
15 | ' path_label = "./data/t10k-labels-idx1-ubyte"' char(10)...
16 | ' flatten = 1' char(10)...
17 | 'iter = end' char(10)...
18 | 'input_shape = 1,1,784' char(10)...
19 | 'batch_size = 100' char(10)...
20 | ];
21 |
22 |
23 | cfg = [
24 | 'netconfig=start' char(10)...
25 | 'layer[+1:fc1] = fullc:fc1' char(10)...
26 | ' nhidden = 100' char(10)...
27 | ' init_sigma = 0.01' char(10)...
28 | 'layer[+1:sg1] = sigmoid:se1' char(10)...
29 | 'layer[sg1->fc2] = fullc:fc2' char(10)...
30 | ' nhidden = 10' char(10)...
31 | ' init_sigma = 0.01' char(10)...
32 | 'layer[+0] = softmax' char(10)...
33 | 'netconfig=end' char(10)...
34 | 'input_shape = 1,1,784' char(10)...
35 | 'batch_size = 100' char(10)...
36 | 'random_type = xavier' char(10)...
37 | 'metric[label]=error' char(10)...
38 | 'eta=0.1' char(10)...
39 | 'momentum=0.9' char(10)...
40 | ];
41 |
42 | train = DataIter(train_cfg);
43 | eval = DataIter(eval_cfg);
44 |
45 | net = Net('gpu', cfg);
46 | net.init_model();
47 |
48 | % train 1 epoch
49 | train.before_first();
50 | while train.next() == 1
51 | net.update(train);
52 | end
53 | net.evaluate(eval, 'eval');
54 | train.before_first();
55 | eval.before_first();
56 | w1 = net.get_weight('fc1', 'wmat');
57 | b1 = net.get_weight('fc1', 'bias');
58 | w2 = net.get_weight('fc2', 'wmat');
59 | b2 = net.get_weight('fc2', 'bias');
60 |
61 | % train second epoch
62 |
63 | while train.next() == 1,
64 | d = train.get_data();
65 | l = train.get_label();
66 | net.update(d, l);
67 | end
68 | net.evaluate(eval, 'eval');
69 | eval.before_first();
70 |
71 | % reset weight
72 | net.set_weight(w1, 'fc1', 'wmat');
73 | net.set_weight(b1, 'fc1', 'bias');
74 | net.set_weight(w2, 'fc2', 'wmat');
75 | net.set_weight(b2, 'fc2', 'bias');
76 | net.evaluate(eval, 'eval')
77 |
78 | delete(net);
79 | delete(train);
80 | delete(eval);
81 |
--------------------------------------------------------------------------------
/src/io/image_recordio.h:
--------------------------------------------------------------------------------
1 | /*!
2 | * \file image_recordio.h
3 | * \brief image recordio struct
4 | */
5 | #ifndef IMAGE_RECORDIO_H_
6 | #define IMAGE_RECORDIO_H_
7 |
8 | #include
9 | #include
10 |
11 | namespace cxxnet {
12 | /*! \brief image recordio struct */
13 | struct ImageRecordIO {
14 | /*! \brief header in image recordio */
15 | struct Header {
16 | /*!
17 | * \brief flag of the header,
18 | * used for future extension purposes
19 | */
20 | uint32_t flag;
21 | /*!
22 | * \brief label field that returns label of images
23 | * when image list was not presented,
24 | *
25 | * NOTE: user do not need to repack recordio just to
26 | * change label field, just supply a list file that
27 | * maps image id to new labels
28 | */
29 | float label;
30 | /*!
31 | * \brief unique image index
32 | * image_id[1] is always set to 0,
33 | * reserved for future purposes for 128bit id
34 | * image_id[0] is used to store image id
35 | */
36 | uint64_t image_id[2];
37 | };
38 | /*! \brief header of image recordio */
39 | Header header;
40 | /*! \brief pointer to data content */
41 | uint8_t *content;
42 | /*! \brief size of the content */
43 | size_t content_size;
44 | /*! \brief constructor */
45 | ImageRecordIO(void)
46 | : content(NULL), content_size(0) {
47 | memset(&header, 0, sizeof(header));
48 | }
49 | /*! \brief get image id from record */
50 | inline uint64_t image_index(void) const {
51 | return header.image_id[0];
52 | }
53 | /*!
54 | * \brief load header from a record content
55 | * \param buf the head of record
56 | * \param size the size of the entire record
57 | */
58 | inline void Load(void *buf, size_t size) {
59 | CHECK(size >= sizeof(header));
60 | std::memcpy(&header, buf, sizeof(header));
61 | content = reinterpret_cast(buf) + sizeof(header);
62 | content_size = size - sizeof(header);
63 | }
64 | /*!
65 | * \brief save the record header
66 | */
67 | inline void SaveHeader(std::string *blob) const {
68 | blob->resize(sizeof(header));
69 | std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header));
70 | }
71 | };
72 | } // namespace cxxnet
73 | #endif // IMAGE_RECORDIO_H_
74 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/pred.conf:
--------------------------------------------------------------------------------
1 | # Configuration for ImageNet
2 | # Acknowledgement:
3 | # Ref: http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
4 | # The scheduling parameters is adapted from Caffe(http://caffe.berkeleyvision.org/)
5 |
6 | data = train
7 | iter = imgbin
8 | image_list = "./train.lst"
9 | image_bin = "./train.bin"
10 | image_mean = "models/image_mean.bin"
11 | rand_mirror=1
12 | iter = threadbuffer
13 | iter = end
14 |
15 | pred = test.txt
16 | iter = imgbin
17 | image_list = "./test.lst"
18 | image_bin = "./test.bin"
19 | image_mean = "models/image_mean.bin"
20 | iter = threadbuffer
21 | iter = end
22 |
23 | task = pred_raw
24 | model_in = ./models/0045.model
25 |
26 |
27 | netconfig=start
28 | layer[0->1] = conv
29 | kernel_size = 5
30 | stride = 4
31 | nchannel = 96
32 | pad = 2
33 | layer[1->2] = relu
34 | layer[2->3] = max_pooling
35 | kernel_size = 3
36 | stride = 2
37 | ###############
38 | layer[3->4] = conv
39 | nchannel = 128
40 | kernel_size = 3
41 | pad = 2
42 | layer[4->5] = relu
43 | #############
44 | layer[5->6] = conv
45 | nchannel = 128
46 | kernel_size = 3
47 | pad = 1
48 | layer[6->7] = relu
49 | layer[7->8] = max_pooling
50 | kernel_size = 3
51 | stride = 2
52 | layer[8->9] = flatten
53 | layer[9->10] = fullc
54 | nhidden = 512
55 | layer[10->11] = relu
56 | layer[11->11] = dropout
57 | threshold = 0.5
58 | layer[11->12] = fullc
59 | nhidden = 512
60 | layer[12->13] = relu
61 | layer[13->13] = dropout
62 | threshold = 0.5
63 | layer[13->14] = fullc
64 | nhidden = 121
65 | layer[14->14] = softmax
66 | netconfig=end
67 |
68 | # evaluation metric
69 | metric = error
70 |
71 | max_round = 45
72 | num_round = 45
73 |
74 | # input shape not including batch
75 | input_shape = 3,48,48
76 |
77 | batch_size = 100
78 |
79 | # global parameters in any sectiion outside netconfig, and iter
80 | momentum = 0.9
81 | wmat:lr = 0.01
82 | wmat:wd = 0.0005
83 |
84 | bias:wd = 0.000
85 | bias:lr = 0.02
86 |
87 | # all the learning rate schedule starts with lr
88 | lr:schedule = expdecay
89 | lr:gamma = 0.1
90 | lr:step = 10000
91 |
92 | save_model=1
93 | model_dir=models
94 |
95 | # random config
96 | random_type = xavier
97 | init_sigma = 0.01
98 |
99 | # new line
100 |
--------------------------------------------------------------------------------
/example/multi-machine/bowl.conf:
--------------------------------------------------------------------------------
1 | # adapt from ../kaggle_bowl/bowl.conf
2 |
3 | data = train
4 | iter = imgbin
5 | image_conf_prefix = "./tr_%1d"
6 | image_conf_ids = 0-7
7 | image_mean = "models/image_mean.bin"
8 | rand_mirror=1
9 | rand_crop=1
10 | max_rotate_angle=180
11 | max_aspect_ratio = 0.5
12 | max_shear_ratio = 0.3
13 | min_crop_size=32
14 | max_crop_size=48
15 | iter = threadbuffer
16 | iter = end
17 |
18 | eval = val
19 | iter = imgbin
20 | image_list = "./va.lst"
21 | image_bin = "./va.bin"
22 | image_mean = "models/image_mean.bin"
23 | iter = threadbuffer
24 | iter = end
25 |
26 | netconfig=start
27 | layer[+1] = conv
28 | kernel_size = 4
29 | stride = 1
30 | nchannel = 48
31 | pad = 2
32 | layer[+1] = relu
33 | layer[+1] = max_pooling
34 | kernel_size = 3
35 | stride = 2
36 | ###############
37 | layer[+1] = conv
38 | nchannel = 96
39 | kernel_size = 3
40 | stride = 1
41 | pad = 1
42 | layer[+1] = relu
43 | layer[+1] = conv
44 | nchannel = 96
45 | kernel_size = 3
46 | stride = 1
47 | pad = 1
48 | layer[+1] = relu
49 | layer[+1] = max_pooling
50 | kernel_size = 3
51 | stride = 2
52 | ##############
53 | layer[+1] = conv
54 | nchannel = 128
55 | kernel_size = 2
56 | stride = 1
57 | layer[+1] = relu
58 | layer[+1] = conv
59 | nchannel = 128
60 | kernel_size = 3
61 | stride = 1
62 | layer[+1] = max_pooling
63 | kernel_size = 3
64 | stride = 2
65 | layer[+1] = flatten
66 | layer[+1] = fullc
67 | nhidden = 256
68 | layer[+0] = dropout
69 | threshold = 0.5
70 | layer[+1] = fullc
71 | nhidden = 121
72 | layer[+0] = softmax
73 | netconfig=end
74 |
75 | # evaluation metric
76 | metric = error
77 |
78 | dev = gpu:0,1
79 | max_round = 100
80 | num_round = 100
81 |
82 | # input shape not including batch
83 | input_shape = 3,40,40
84 |
85 | batch_size = 64
86 |
87 | # global parameters in any sectiion outside netconfig, and iter
88 | momentum = 0.9
89 | wmat:lr = 0.001
90 | wmat:wd = 0.0005
91 |
92 | bias:wd = 0.000
93 | bias:lr = 0.002
94 |
95 | # all the learning rate schedule starts with lr
96 | lr:schedule = expdecay
97 | lr:gamma = 0.1
98 | lr:step = 20000
99 |
100 | save_model=0
101 | model_dir=models
102 | print_step = 1
103 | # random config
104 | random_type = gaussian
105 | init_sigma = 0.01
106 |
107 | # new line
108 |
--------------------------------------------------------------------------------
/tools/imgbin-partition-maker.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import random
4 | import argparse
5 |
6 | random.seed(888)
7 |
8 |
9 | parser = argparse.ArgumentParser(description='Generate a Makfile to make partition imgbin file for cxxnet')
10 | parser.add_argument('--img_list', required=True, help="path to list of all images")
11 | parser.add_argument('--img_root', required=True, help="prefix path to the file path in img_list")
12 | parser.add_argument('--im2rec', default='../bin/im2rec', help="path to im2rec tools")
13 | parser.add_argument('--partition_size', default="256", help="max size of single bin file")
14 | parser.add_argument('--shuffle', default='0', help="Shuffle the list or not")
15 | parser.add_argument('--prefix', required=True, help="Prefix of output image lists and bins")
16 | parser.add_argument('--out', required=True, help="Output folder for image bins and lists")
17 | parser.add_argument('--resize', required=True, help="New size of image (-1 for do nothing)")
18 | parser.add_argument('--makefile', default="Gen.mk", help="name of generated Makefile")
19 |
20 |
21 | args = parser.parse_args()
22 | # im2bin path
23 | IM2BIN = args.im2rec
24 |
25 | new_size = "resize=" + args.new_size
26 |
27 | fi = file(args.img_list)
28 | lst = [line for line in fi]
29 |
30 | img_root = args.img_root
31 |
32 | if args.shuffle == "1":
33 | random.shuffle(lst)
34 |
35 | prefix = args.prefix
36 | output_dir = args.out
37 | if output_dir[-1] != '/':
38 | output_dir += '/'
39 |
40 | fo = open(args.makefile, "w")
41 |
42 | objs = []
43 | cmds = []
44 | fw = None
45 | sz = 0
46 | img_cnt = 1;
47 | cnt = 1
48 |
49 | for item in lst:
50 | if sz + 10240 > (int(args.partition_size)<<20) or fw == None:
51 | lst_name = output_dir + (prefix % cnt) + '.lst'
52 | bin_name = output_dir + (prefix % cnt) + '.bin'
53 | objs.append(bin_name)
54 | if fw != None:
55 | fw.close()
56 | fw = open(lst_name, "w")
57 | cmd = "%s: %s\n\t%s %s %s %s %s" % (bin_name, lst_name,
58 | IM2BIN, lst_name, img_root, bin_name, new_size)
59 | cmds.append(cmd)
60 | sz = 0
61 | cnt += 1
62 | img_cnt = 1
63 | path = item.split('\t')[2][:-1]
64 | sz += os.path.getsize(img_root + path) + (img_cnt + 2) * 4
65 | fw.write(item)
66 | img_cnt += 1
67 |
68 | obj = "all: " + ' '.join(objs) + '\n'
69 | fo.write(obj)
70 | fo.write('\n\n'.join(cmds))
71 | fo.close()
72 | fw.close()
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/src/io/iter_mem_buffer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_ITER_MEM_BUFFER_INL_HPP_
2 | #define CXXNET_ITER_MEM_BUFFER_INL_HPP_
3 | /*!
4 | * \file iter_mem_buffer-inl.hpp
5 | * \brief iterator that gets limited number of batch into memory,
6 | * and only return these data
7 | * \author Tianqi Chen
8 | */
9 | #include
10 | #include
11 | #include "./data.h"
12 | #include "../utils/utils.h"
13 | #include "../utils/io.h"
14 |
15 | namespace cxxnet {
16 | /*! \brief iterator that gets limitted number of batch into memory */
17 | class DenseBufferIterator : public IIterator {
18 | public:
19 | DenseBufferIterator(IIterator *base)
20 | : base_(base) {
21 | max_nbatch_ = 100;
22 | data_index_ = 0;
23 | silent_ = 0;
24 | }
25 | virtual void SetParam(const char *name, const char *val) {
26 | base_->SetParam(name, val);
27 | if (!strcmp(name, "max_nbatch")) {
28 | max_nbatch_ = static_cast(atol(val));
29 | }
30 | if (!strcmp(name, "silent")) silent_ = atoi(val);
31 | }
32 | virtual void Init(void) {
33 | base_->Init();
34 | while (base_->Next()) {
35 | const DataBatch &batch = base_->Value();
36 | CHECK(batch.label.dptr_ != NULL) << "need dense";
37 | DataBatch v;
38 | v.AllocSpaceDense(batch.data.shape_, batch.batch_size, batch.label.size(1));
39 | v.CopyFromDense(batch);
40 | buffer_.push_back(v);
41 | if (buffer_.size() >= max_nbatch_) break;
42 | }
43 | if (silent_ == 0) {
44 | printf("DenseBufferIterator: load %d batches\n",
45 | static_cast(buffer_.size()));
46 | }
47 | }
48 | virtual void BeforeFirst(void) {
49 | data_index_ = 0;
50 | }
51 | virtual bool Next(void) {
52 | if (data_index_ < buffer_.size()) {
53 | data_index_ += 1;
54 | return true;
55 | } else {
56 | return false;
57 | }
58 | }
59 | virtual const DataBatch &Value(void) const {
60 | CHECK(data_index_ > 0)
61 | << "Iterator.Value: at beginning of iterator";
62 | return buffer_[data_index_ - 1];
63 | }
64 |
65 | private:
66 | /*! \brief silent */
67 | int silent_;
68 | /*! \brief maximum number of batch in buffer */
69 | size_t max_nbatch_;
70 | /*! \brief data index */
71 | size_t data_index_;
72 | /*! \brief base iterator */
73 | IIterator *base_;
74 | /*! \brief data content */
75 | std::vector buffer_;
76 | };
77 | } // namespace cxxnet
78 | #endif // CXXNET_ITER_BATCH_PROC_INL_HPP_
79 |
--------------------------------------------------------------------------------
/make/config.mk:
--------------------------------------------------------------------------------
1 | #-----------------------------------------------------
2 | # cxxnet: the configuration compile script
3 | #
4 | # This is the default configuration setup for cxxnet
5 | # If you want to change configuration, do the following steps:
6 | #
7 | # - copy this file to the root folder
8 | # - modify the configuration you want
9 | # - type make or make -j n for parallel build
10 | #----------------------------------------------------
11 |
12 | # choice of compiler
13 | export CC = gcc
14 | export CXX = g++
15 | export NVCC = nvcc
16 |
17 | # whether use CUDA during compile
18 | USE_CUDA = 1
19 |
20 | # add the path to CUDA libary to link and compile flag
21 | # if you have already add them to enviroment variable, leave it as NONE
22 | USE_CUDA_PATH = NONE
23 |
24 | # whether use opencv during compilation
25 | # you can disable it, however, you will not able to use
26 | # imbin iterator
27 | USE_OPENCV = 1
28 | USE_OPENCV_DECODER = 1
29 | # whether use CUDNN R3 library
30 | USE_CUDNN = 0
31 | # add the path to CUDNN libary to link and compile flag
32 | # if you do not need that, or do not have that, leave it as NONE
33 | USE_CUDNN_PATH = NONE
34 | # whether to build caffe converter
35 | USE_CAFFE_CONVERTER = 0
36 | CAFFE_ROOT =
37 | CAFFE_INCLUDE =
38 | CAFFE_LIB =
39 | #
40 | # choose the version of blas you want to use
41 | # can be: mkl, blas, atlas, openblas
42 | USE_STATIC_MKL = NONE
43 | USE_BLAS = blas
44 | #
45 | # add path to intel libary, you may need it
46 | # for MKL, if you did not add the path to enviroment variable
47 | #
48 | USE_INTEL_PATH = NONE
49 |
50 | # whether compile with parameter server
51 | USE_DIST_PS = 0
52 | PS_PATH = NONE
53 | PS_THIRD_PATH = NONE
54 |
55 | # whether compile with rabit
56 | USE_RABIT_PS = 0
57 | RABIT_PATH = rabit
58 |
59 | # use openmp iterator
60 | USE_OPENMP_ITER = 1
61 | # the additional link flags you want to add
62 | ADD_LDFLAGS = -ljpeg
63 |
64 | # the additional compile flags you want to add
65 | ADD_CFLAGS =
66 | #
67 | # If use MKL, choose static link automaticly to fix python wrapper
68 | #
69 | ifeq ($(USE_BLAS), mkl)
70 | USE_STATIC_MKL = 1
71 | endif
72 |
73 | #------------------------
74 | # configuration for DMLC
75 | #------------------------
76 | # whether use HDFS support during compile
77 | # this will allow cxxnet to directly save/load model from hdfs
78 | USE_HDFS = 0
79 |
80 | # whether use AWS S3 support during compile
81 | # this will allow cxxnet to directly save/load model from s3
82 | USE_S3 = 0
83 |
84 | # path to libjvm.so
85 | LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
86 |
--------------------------------------------------------------------------------
/example/kaggle_bowl/bowl.conf:
--------------------------------------------------------------------------------
1 | # Configuration for ImageNet
2 | # Acknowledgement:
3 | # Ref: http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
4 | # The scheduling parameters is adapted from Caffe(http://caffe.berkeleyvision.org/)
5 |
6 | dev = cpu
7 |
8 | data = train
9 | iter = imgrec
10 | image_list = "./tr.lst"
11 | image_rec = "./tr.rec"
12 | image_mean = "models/image_mean.bin"
13 | rand_mirror=1
14 | rand_crop=1
15 | max_rotate_angle=180
16 | max_aspect_ratio = 0.5
17 | max_shear_ratio = 0.3
18 | min_crop_size=32
19 | max_crop_size=48
20 | iter = threadbuffer
21 | iter = end
22 |
23 | eval = val
24 | iter = imgrec
25 | image_list = "./va.lst"
26 | image_bin = "./va.rec"
27 | image_mean = "models/image_mean.bin"
28 | iter = threadbuffer
29 | iter = end
30 |
31 |
32 | netconfig=start
33 | layer[+1] = conv
34 | kernel_size = 4
35 | stride = 1
36 | nchannel = 48
37 | pad = 2
38 | layer[+1] = relu
39 | layer[+1] = max_pooling
40 | kernel_size = 3
41 | stride = 2
42 | ###############
43 | layer[+1] = conv
44 | nchannel = 96
45 | kernel_size = 3
46 | stride = 1
47 | pad = 1
48 | layer[+1] = relu
49 | layer[+1] = conv
50 | nchannel = 96
51 | kernel_size = 3
52 | stride = 1
53 | pad = 1
54 | layer[+1] = relu
55 | layer[+1] = max_pooling
56 | kernel_size = 3
57 | stride = 2
58 | ##############
59 | layer[+1] = conv
60 | nchannel = 128
61 | kernel_size = 2
62 | stride = 1
63 | layer[+1] = relu
64 | layer[+1] = conv
65 | nchannel = 128
66 | kernel_size = 3
67 | stride = 1
68 | layer[+1] = max_pooling
69 | kernel_size = 3
70 | stride = 2
71 | layer[+1] = flatten
72 | layer[+1] = fullc
73 | nhidden = 256
74 | layer[+0] = dropout
75 | threshold = 0.5
76 | layer[+1] = fullc
77 | nhidden = 121
78 | layer[+0] = softmax
79 | netconfig=end
80 |
81 | # evaluation metric
82 | metric = error
83 |
84 | dev = gpu:1
85 | max_round = 100
86 | num_round = 100
87 |
88 | # input shape not including batch
89 | input_shape = 3,40,40
90 |
91 | batch_size = 64
92 |
93 | # global parameters in any sectiion outside netconfig, and iter
94 | momentum = 0.9
95 | wmat:lr = 0.001
96 | wmat:wd = 0.0005
97 |
98 | bias:wd = 0.000
99 | bias:lr = 0.002
100 |
101 | # all the learning rate schedule starts with lr
102 | lr:schedule = expdecay
103 | lr:gamma = 0.1
104 | lr:step = 20000
105 |
106 | save_model=1
107 | model_dir=models
108 | print_step = 1
109 | # random config
110 | random_type = xavier
111 | init_sigma = 0.01
112 |
113 | # new line
114 |
--------------------------------------------------------------------------------
/tools/caffe_converter/convert.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 |
5 |
6 | # caffe root folder
7 | caffe_root = ''
8 | # cxxnet root folder
9 | cxxnet_root = ''
10 | sys.path.insert(0, os.path.join(caffe_root, 'python'))
11 | sys.path.insert(0, os.path.join(cxxnet_root, 'wrapper'))
12 |
13 | import caffe
14 | import cxxnet
15 |
16 |
17 | def main():
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument("caffe_prototxt",
20 | help="caffe prototxt")
21 | parser.add_argument(
22 | "caffe_model", help="caffe model")
23 | parser.add_argument("cxxnet_conf", help="cxxnet conf")
24 | parser.add_argument("to_save", help="to save, in format like 0090.model")
25 | args = parser.parse_args()
26 | caffe_prototxt = args.caffe_prototxt
27 | caffe_model = args.caffe_model
28 | cxxnet_conf = args.cxxnet_conf
29 | to_save = args.to_save
30 | print 'converting {0} and {1} with {2} into {3}'.format(caffe_prototxt, caffe_model, cxxnet_conf, to_save)
31 | caffe.set_mode_cpu()
32 | net_caffe = caffe.Net(caffe_prototxt, caffe_model, caffe.TEST)
33 | print 'creating cxxnet model'
34 | with open(cxxnet_conf, 'r') as f_in:
35 | cfg = f_in.read()
36 | net_cxxnet = cxxnet.Net(dev='cpu', cfg=cfg)
37 | net_cxxnet.set_param('dev', 'cpu')
38 | net_cxxnet.init_model()
39 |
40 | layer_names = net_caffe._layer_names
41 | first_conv = True
42 | for layer_idx, layer in enumerate(net_caffe.layers):
43 | layer_name = layer_names[layer_idx]
44 | if layer.type == 'Convolution' or layer.type == 'InnerProduct':
45 | assert(len(layer.blobs) == 2)
46 | wmat = layer.blobs[0].data
47 | bias = layer.blobs[1].data
48 | if first_conv:
49 | print 'Swapping BGR of caffe into RGB in cxxnet'
50 | wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :]
51 |
52 | assert(wmat.flags['C_CONTIGUOUS'] is True)
53 | assert(bias.flags['C_CONTIGUOUS'] is True)
54 | print 'converting layer {0}, wmat shape = {1}, bias shape = {2}'.format(layer_name, wmat.shape, bias.shape)
55 | wmat = wmat.reshape((wmat.shape[0], -1))
56 | bias = bias.reshape((bias.shape[0], 1))
57 | net_cxxnet.set_weight(wmat, layer_name, 'wmat')
58 | net_cxxnet.set_weight(bias, layer_name, 'bias')
59 | if first_conv and layer.type == 'Convolution':
60 | first_conv = False
61 |
62 | net_cxxnet.save_model(to_save)
63 |
64 | if __name__ == '__main__':
65 | main()
66 |
--------------------------------------------------------------------------------
/src/layer/op.h:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_OP_H_
2 | #define CXXNET_LAYER_OP_H_
3 | #pragma once
4 | /*!
5 | * \file op.h
6 | * \brief extra mshadow operation for cxxnet
7 | * \author Bing Xu
8 | */
9 | #include
10 |
11 | namespace cxxnet {
12 | /*! \brief operations for ActivationLayer */
13 | namespace op {
14 | /*! \brief Rectified Linear Operation */
15 | struct identity {
16 | MSHADOW_XINLINE static real_t Map(real_t a) {
17 | return a;
18 | }
19 | };
20 | struct identity_grad {
21 | MSHADOW_XINLINE static real_t Map(real_t a) {
22 | return 1.0f;
23 | }
24 | };
25 |
26 | /*! \brief sigmoid unit */
27 | struct sigmoid {
28 | MSHADOW_XINLINE static real_t Map(real_t a) {
29 | return 1.0f / (1.0f + expf(-a));
30 | }
31 | };
32 | struct sigmoid_grad {
33 | MSHADOW_XINLINE static real_t Map(real_t a) {
34 | return a * (1.0f - a);
35 | }
36 | };
37 | /*! \brief Rectified Linear Operation */
38 | struct relu {
39 | MSHADOW_XINLINE static real_t Map(real_t a) {
40 | using namespace std;
41 | return max(a, 0.0f);
42 | }
43 | };
44 | struct relu_grad {
45 | MSHADOW_XINLINE static real_t Map(real_t a) {
46 | return a > 0.0f ? 1.0f : 0.0f;
47 | }
48 | };
49 |
50 | /*! \brief Leaky ReLU Operation */
51 | struct xelu {
52 | MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
53 | return a > 0 ? a : a / b;
54 | }
55 | };
56 |
57 | struct xelu_grad {
58 | MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
59 | return a > 0 ? 1 : 1.0f / b;
60 | }
61 | };
62 |
63 | struct tanh {
64 | MSHADOW_XINLINE static real_t Map(real_t a) {
65 | return tanhf( a );
66 | }
67 | };
68 |
69 | struct tanh_grad {
70 | MSHADOW_XINLINE static real_t Map(real_t a) {
71 | return 1.0f - a * a;
72 | }
73 | };
74 |
75 |
76 | struct square {
77 | MSHADOW_XINLINE static real_t Map(real_t a) {
78 | return a * a;
79 | }
80 | };
81 |
82 | /*! \brief used for generate Bernoulli mask */
83 | struct threshold {
84 | MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
85 | return a < b ? 1.0f : 0.0f;
86 | }
87 | };
88 |
89 | /*! \brief used for generate element of power */
90 | struct power {
91 | MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
92 | return powf( a, b );
93 | }
94 | };
95 |
96 | /*!\ \brief used for generate element sqrt */
97 | struct square_root {
98 | MSHADOW_XINLINE static real_t Map(real_t a) {
99 | return sqrt(a);
100 | }
101 | };
102 |
103 | } // namespace op
104 | } // namespace cxxnet
105 | #endif // CXXNET_LAYER_OP_H
106 |
--------------------------------------------------------------------------------
/doc/other.md:
--------------------------------------------------------------------------------
1 | #### Introduction
2 | This page will introduce other setting in cxxnet, including:
3 | * [Device Selection](#set-working-hardware)
4 | * [Printing Control](#print-information)
5 | * [Training Round](#set-round-of-training)
6 | * [Saving Model and Continue Training](#saving-model-and-continue-training)
7 |
8 |
9 | #### Set working hardware
10 | * To use CPU, set the field
11 | ```bash
12 | dev = cpu
13 | ```
14 | * To use GPU, set the field
15 | ```bash
16 | dev = gpu
17 | ```
18 | We can also set specific device (say device 1) by using
19 | ```bash
20 | dev = gpu:1
21 | ```
22 | * To use multi-GPU, set the field with the corresponding device id
23 | ```bash
24 | dev = gpu:0,1,2,3
25 | ```
26 | or
27 | ```bash
28 | dev = gpu:0-3
29 | ```
30 | In default, it is `dev=gpu`
31 |
32 |
33 | #### Print information
34 | * To print training error evaluation, just set this field to 1
35 | ```bash
36 | eval_train = 1
37 | ```
38 | * in default this field is 0, which means cxxnet won't print anything about training error.
39 | * To turn off all information while training, set this field to 1
40 | ```bash
41 | silent = 1
42 | ```
43 | * In default this field is 0
44 | * To control print frequent, change this field
45 | ```bash
46 | print_step = 100
47 | ```
48 | * In default it will print every 100 batch
49 |
50 |
51 | #### Set round of training
52 | There are two field handle training round together: _**num_round**_ and _**max_round**_
53 | * _**num_round**_ is used for number of round to train
54 | * _**max_round**_ is used for maximum number of round to train from now on
55 | ```bash
56 | num_round = 15
57 | max_round = 15
58 | ```
59 | This configuration will make cxxnet train for 15 rounds on the training data.
60 |
61 | More examples,
62 | ```bash
63 | num_round = 50
64 | max_round = 2
65 | ```
66 | If we have a model trained 40 rounds, then use this configuration continue to train, cxxnet will stop at the 42 round.
67 |
68 |
69 | #### Saving model and continue training
70 | * To save model while training round, set this field to saving frequent(a number)
71 | ```bash
72 | save_model = 2
73 | model_dir = path_of_dir_to_save_model
74 | ```
75 | * In default, this field is 1, means cxxnet will save a model in every round
76 | * To continue a training process, you need to set model_in as the input snapshot you want to continue from
77 | ```conf
78 | model_in = path of model file
79 | ```
80 | * Alternatively, if you save model every round (save_model=1), then you can use option continue, cxxnet will automatically search the latest model and start from that model
81 | ```conf
82 | continue = 1
83 | ```
84 | In default, if neither of the two values is set, cxxnet will start training from start.
--------------------------------------------------------------------------------
/src/updater/nag_updater-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_UPDATER_NAG_UPDATER_INL_HPP_
2 | #define CXXNET_UPDATER_NAG_UPDATER_INL_HPP_
3 | /*!
4 | * \file nag_updater-inl.hpp
5 | * \brief implementation of NAG with momentum
6 | * \author Winsty
7 | */
8 | #include
9 | #include
10 | #include "./updater.h"
11 | #include "./param.h"
12 |
13 | namespace cxxnet {
14 | namespace updater {
15 | // SGD updater with momentum
16 | template
17 | class NAGUpdater : public IUpdater {
18 | public:
19 | NAGUpdater(mshadow::Tensor w, mshadow::Tensor dw, const char *tag)
20 | :w(w), dw(dw) {
21 | param.tag = tag;
22 | }
23 | virtual ~NAGUpdater(void) {}
24 | virtual void Init(void) {
25 | if (param.silent == 0) {
26 | printf("NAGUpdater: eta=%f, mom=%f\n", param.base_lr_, param.momentum);
27 | }
28 | m_w.Resize(w.shape_, 0.0f);
29 | old_m_w.Resize(w.shape_, 0.0f);
30 | }
31 | virtual void SetStream(mshadow::Stream *stream) {
32 | w.set_stream(stream);
33 | dw.set_stream(stream);
34 | m_w.set_stream(stream);
35 | old_m_w.set_stream(stream);
36 | }
37 | virtual void Update(long epoch) {
38 | this->ApplyUpdate(epoch, dw);
39 | // dw accumulate gradient instead of storing them
40 | // updater need to reset then to 0 after each update
41 | dw = 0.0f;
42 | }
43 | virtual void Update(long epoch, mshadow::Tensor grad) {
44 | CHECK(grad.shape_ == w.shape_.FlatTo2D())
45 | << "SGDUpdater: grad must be generated from source of same shape";
46 | this->ApplyUpdate(epoch, mshadow::Tensor
47 | (grad.dptr_, w.shape_, grad.stride_, w.stream_));
48 | }
49 | virtual void StartRound(int round) {
50 | param.round = round;
51 | }
52 | virtual void SetParam(const char *name, const char *val) {
53 | param.SetParam(name, val);
54 | }
55 | virtual void ApplyVisitor(typename IUpdater::IVisitor *pvisitor) {
56 | pvisitor->Visit(param.tag.c_str(), w, dw);
57 | }
58 |
59 | protected:
60 | UpdaterParam param;
61 | // variales
62 | mshadow::Tensor w, dw;
63 | // momentum variable
64 | mshadow::TensorContainer m_w, old_m_w;
65 |
66 | inline void ApplyUpdate(long epoch,
67 | mshadow::Tensor grad) {
68 | param.ScheduleEpoch(epoch);
69 | mshadow::Copy(old_m_w, m_w, old_m_w.stream_);
70 | m_w *= param.momentum;
71 | m_w += (-param.learning_rate) * (grad + param.wd * w);
72 | w += (1 + param.momentum) * m_w - param.momentum * old_m_w;
73 | }
74 | }; // class SGDUpdater
75 | } // namespace updater
76 | } // namespace cxxnet
77 | #endif
78 |
79 |
--------------------------------------------------------------------------------
/example/MNIST/mnist.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('../../wrapper/')
3 | import cxxnet
4 | import numpy as np
5 |
6 | data = cxxnet.DataIter("""
7 | iter = mnist
8 | path_img = "./data/train-images-idx3-ubyte"
9 | path_label = "./data/train-labels-idx1-ubyte"
10 | shuffle = 1
11 | iter = end
12 | input_shape = 1,1,784
13 | batch_size = 100
14 | """)
15 | print 'init data iter'
16 |
17 | deval = cxxnet.DataIter("""
18 | iter = mnist
19 | path_img = "./data/t10k-images-idx3-ubyte"
20 | path_label = "./data/t10k-labels-idx1-ubyte"
21 | iter = end
22 | input_shape = 1,1,784
23 | batch_size = 100
24 | """)
25 |
26 | cfg = """
27 | netconfig=start
28 | layer[+1:fc1] = fullc:fc1
29 | nhidden = 100
30 | init_sigma = 0.01
31 | layer[+1:sg1] = sigmoid:se1
32 | layer[sg1->fc2] = fullc:fc2
33 | nhidden = 10
34 | init_sigma = 0.01
35 | layer[+0] = softmax
36 | netconfig=end
37 |
38 | input_shape = 1,1,784
39 | batch_size = 100
40 |
41 | random_type = gaussian
42 | """
43 |
44 | param = {}
45 | param['eta'] = 0.1
46 | param['dev'] = 'cpu'
47 | param['momentum'] = 0.9
48 | param['metric[label]'] = 'error'
49 |
50 | net = cxxnet.train(cfg, data, 1, param, eval_data = deval)
51 |
52 | weights = []
53 | for layer in ['fc1', 'fc2']:
54 | for tag in ['wmat', 'bias']:
55 | weights.append((layer, tag, net.get_weight(layer, tag)))
56 |
57 | data.before_first()
58 | data.next()
59 | # extract
60 | print 'predict'
61 | pred = net.predict(data)
62 | print 'predict finish'
63 | dbatch = data.get_data()
64 | print dbatch.shape
65 | print 'get data'
66 | pred2 = net.predict(dbatch)
67 |
68 | print np.sum(np.abs(pred - pred2))
69 | print np.sum(np.abs(net.extract(data, 'sg1') - net.extract(dbatch, 'sg1')))
70 |
71 | # evaluate
72 | deval.before_first()
73 | werr = 0
74 | wcnt = 0
75 | while deval.next():
76 | label = deval.get_label()
77 | pred = net.predict(deval)
78 | werr += np.sum(label[:,0] != pred[:])
79 | wcnt += len(label[:,0])
80 | print 'eval-error=%f' % (float(werr) / wcnt)
81 |
82 | # training
83 | data.before_first()
84 | while data.next():
85 | label = data.get_label()
86 | batch = data.get_data()
87 | net.update(batch, label)
88 |
89 | # evaluate
90 | deval.before_first()
91 | werr = 0
92 | wcnt = 0
93 | while deval.next():
94 | label = deval.get_label()
95 | pred = net.predict(deval)
96 | werr += np.sum(label[:,0] != pred[:])
97 | wcnt += len(label[:,0])
98 | print 'eval-error2=%f' % (float(werr) / wcnt)
99 |
100 | for layer, tag, wt in weights:
101 | net.set_weight(wt, layer, tag)
102 |
103 | # evaluate
104 | deval.before_first()
105 | werr = 0
106 | wcnt = 0
107 | while deval.next():
108 | label = deval.get_label()
109 | pred = net.predict(deval)
110 | werr += np.sum(label[:,0] != pred[:])
111 | wcnt += len(label[:,0])
112 | print 'eval-error-after-setback=%f' % (float(werr) / wcnt)
113 |
--------------------------------------------------------------------------------
/src/layer/dropout_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_DROPOUT_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_DROPOUT_LAYER_INL_HPP_
3 |
4 | #include
5 | #include "./layer.h"
6 | #include "./op.h"
7 |
8 | namespace cxxnet {
9 | namespace layer {
10 |
11 | template
12 | class DropoutLayer : public ILayer {
13 | public:
14 | DropoutLayer(mshadow::Random *p_rnd) : prnd_(p_rnd) {
15 | // setup default value
16 | dropout_threshold = 0.0f;
17 | }
18 | virtual void SetParam(const char *name, const char* val) {
19 | if (!strcmp("threshold", name)) dropout_threshold = static_cast(atof(val));
20 | }
21 | virtual void InitConnection(const std::vector*> &nodes_in,
22 | const std::vector*> &nodes_out,
23 | ConnectState *p_cstate) {
24 | utils::Check(nodes_in.size() == 1 && nodes_out.size() == 1,
25 | "DropoutLayer: only support 1-1 connection");
26 | utils::Check(nodes_in[0] == nodes_out[0], "DropoutLayer is an self-loop Layer");
27 | utils::Check(dropout_threshold >= 0.0f && dropout_threshold < 1.0f,
28 | "DropoutLayer: invalid dropout_threshold\n");
29 | // use 1 temp state for mask
30 | p_cstate->states.resize(1);
31 | p_cstate->states[0].Resize(nodes_in[0]->data.shape_);
32 | }
33 | virtual void OnBatchSizeChanged(const std::vector*> &nodes_in,
34 | const std::vector*> &nodes_out,
35 | ConnectState *p_cstate) {
36 | p_cstate->states[0].Resize(nodes_in[0]->data.shape_);
37 | }
38 | virtual void Forward(bool is_train,
39 | const std::vector*> &nodes_in,
40 | const std::vector*> &nodes_out,
41 | ConnectState *p_cstate) {
42 | using namespace mshadow::expr;
43 | mshadow::TensorContainer &mask = p_cstate->states[0];
44 | const real_t pkeep = 1.0f - dropout_threshold;
45 | if (is_train) {
46 | mask = F(prnd_->uniform(mask.shape_), pkeep) * (1.0f/pkeep);
47 | nodes_out[0]->data = nodes_out[0]->data * mask;
48 | }
49 | }
50 | virtual void Backprop(bool prop_grad,
51 | const std::vector*> &nodes_in,
52 | const std::vector*> &nodes_out,
53 | ConnectState *p_cstate) {
54 | using namespace mshadow::expr;
55 | mshadow::TensorContainer &mask = p_cstate->states[0];
56 | if (prop_grad) {
57 | nodes_out[0]->data *= mask;
58 | }
59 | }
60 |
61 | private:
62 | /*! \brief random number generator */
63 | mshadow::Random *prnd_;
64 | /*! \brief dropout */
65 | real_t dropout_threshold;
66 | }; // class DropoutLayer
67 | } // namespace layer
68 | } // namespace cxxnet
69 | #endif // LAYER_DROPOUT_LAYER_INL_HPP_
70 |
71 |
--------------------------------------------------------------------------------
/src/updater/sgd_updater-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_UPDATER_SGD_UPDATER_INL_HPP_
2 | #define CXXNET_UPDATER_SGD_UPDATER_INL_HPP_
3 | /*!
4 | * \file sgd_updater-inl.hpp
5 | * \brief implementation of SGD with momentum
6 | * \author Tianqi Chen
7 | */
8 | #include
9 | #include
10 | #include
11 | #include "./updater.h"
12 | #include "./param.h"
13 |
14 | namespace cxxnet {
15 | namespace updater {
16 | /*! \brief used for gradient clipping and nan detection */
17 | struct clip {
18 | MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
19 | if (isnan(a)) return 0.0f;
20 | if (a < -b) return -b;
21 | if (a > b) return b;
22 | return a;
23 | }
24 | };
25 |
26 | // SGD updater with momentum
27 | template
28 | class SGDUpdater : public IUpdater {
29 | public:
30 | SGDUpdater(mshadow::Tensor w, mshadow::Tensor dw, const char *tag)
31 | :w(w), dw(dw) {
32 | param.tag = tag;
33 | }
34 | virtual ~SGDUpdater(void) {}
35 | virtual void Init(void) {
36 | if (param.silent == 0) {
37 | utils::TrackerPrintf("SGDUpdater: eta=%f, mom=%f\n", param.base_lr_, param.momentum);
38 | }
39 | m_w.Resize(w.shape_, 0.0f);
40 | }
41 | virtual void SetStream(mshadow::Stream *stream) {
42 | w.set_stream(stream);
43 | dw.set_stream(stream);
44 | m_w.set_stream(stream);
45 | }
46 | virtual void Update(long epoch) {
47 | this->ApplyUpdate(epoch, dw);
48 | // dw accumulate gradient instead of storing them
49 | // updater need to reset then to 0 after each update
50 | dw = 0.0f;
51 | }
52 | virtual void Update(long epoch, mshadow::Tensor grad) {
53 | CHECK(grad.shape_ == w.shape_.FlatTo2D())
54 | << "SGDUpdater: grad must be generated from source of same shape";
55 | this->ApplyUpdate(epoch, mshadow::Tensor
56 | (grad.dptr_, w.shape_, grad.stride_, w.stream_));
57 | }
58 | virtual void StartRound(int round) {
59 | param.round = round;
60 | }
61 | virtual void SetParam(const char *name, const char *val) {
62 | param.SetParam(name, val);
63 | }
64 | virtual void ApplyVisitor(typename IUpdater::IVisitor *pvisitor) {
65 | pvisitor->Visit(param.tag.c_str(), w, dw);
66 | }
67 |
68 | protected:
69 | UpdaterParam param;
70 | // variales
71 | mshadow::Tensor w, dw;
72 | // momentum variable
73 | mshadow::TensorContainer m_w;
74 | // update function
75 | virtual void ApplyUpdate(long epoch,
76 | mshadow::Tensor grad) {
77 | using namespace mshadow::expr;
78 | param.ScheduleEpoch(epoch);
79 | m_w *= param.momentum;
80 | if (param.clip_gradient != 0.0f) {
81 | m_w += (-param.learning_rate) * (F(grad, param.clip_gradient) + param.wd * w);
82 | } else {
83 | m_w += (-param.learning_rate) * (grad + param.wd * w);
84 | }
85 | w += m_w;
86 | }
87 | }; // class SGDUpdater
88 | } // namespace updater
89 | } // namespace cxxnet
90 | #endif
91 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # We move forward to [MXNet](https://github.com/dmlc/mxnet) !
2 | ----
3 |
4 | Dear users,
5 |
6 | Thanks for using and supporting cxxnet. Today, we finally make a hard but exciting decision: **we decide to deprecate cxxnet and fully move forward to next generation toolkit [MXNet](https://github.com/dmlc/mxnet).**
7 |
8 | Please check the feature [highlights](https://github.com/dmlc/mxnet#features), [speed/memory comparation](https://github.com/dmlc/mxnet/tree/master/example/imagenet) and [examples](https://github.com/dmlc/mxnet/tree/master/example) in MXNet.
9 |
10 |
11 | cxxnet developers,
12 |
13 | 28th, Sep, 2015
14 |
15 |
16 | -----
17 | Note: We provide a very simple converter to MXNet. Check [guide](simple_mxnet_converter) to see whether your model is able to be converted.
18 |
19 | ------
20 |
21 | #cxxnet
22 |
23 |
24 | CXXNET is a fast, concise, distributed deep learning framework.
25 |
26 | Contributors: https://github.com/antinucleon/cxxnet/graphs/contributors
27 |
28 | * [Documentation](doc)
29 | * [Learning to use cxxnet by examples](example)
30 | * [Note on Code](src)
31 | * User Group(TODO)
32 |
33 | ###Feature Highlights
34 |
35 | * Lightweight: small but sharp knife
36 | - cxxnet contains concise implementation of state-of-art deep learning models
37 | - The project maintains a minimum dependency that makes it portable and easy to build
38 | * Scale beyond single GPU and single machine
39 | - The library works on multiple GPUs, with nearly linear speedup
40 | - THe library works distributedly backed by disrtibuted parameter server
41 | * Easy extensibility with no requirement on GPU programming
42 | - cxxnet is build on [mshadow](#backbone-library)
43 | - developer can write numpy-style template expressions to extend the library only once
44 | - mshadow will generate high performance CUDA and CPU code for users
45 | - It brings concise and readable code, with performance matching hand crafted kernels
46 | * Convenient interface for other languages
47 | - Python interface for training from numpy array, and prediction/extraction to numpy array
48 | - Matlab interface
49 |
50 | ### News
51 | * 24-May, 2015: Pretrained [Inception model](example/ImageNet/Inception-BN.conf) with 89.9% Top-5 Correctness is ready to use.
52 | * 09-Apr, 2015: Matlab Interface is ready to use
53 |
54 |
55 | ### Backbone Library
56 | CXXNET is built on [MShadow: Lightweight CPU/GPU Tensor Template Library](https://github.com/tqchen/mshadow)
57 | * MShadow is an efficient, device invariant and simple tensor library
58 | - MShadow allows user to write expressions for machine learning while still provides
59 | - This means developer do not need to have knowledge on CUDA kernels to extend cxxnet.
60 | * MShadow also provides a parameter interface for Multi-GPU and distributed deep learning
61 | - Improvements to cxxnet can naturally run on Multiple GPUs and being distributed
62 |
63 | ###Build
64 |
65 | * Copy ```make/config.mk``` to root foler of the project
66 | * Modify the config to adjust your enviroment settings
67 | * Type ```./build.sh``` to build cxxnet
68 |
--------------------------------------------------------------------------------
/src/io/inst_vector.h:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_INST_VECTOR_H_
2 | #define CXXNET_INST_VECTOR_H_
3 | /*!
4 | * \file inst_vector.h
5 | * \brief holder of a sequence of DataInst in CPU
6 | * that are not necessarily of same shape
7 | */
8 | #include "./data.h"
9 | #include
10 | #include
11 | #include
12 | namespace cxxnet {
13 | /*!
14 | * \brief tensor vector that can store sequence of tensor
15 | * in a memory compact way, tensors do not have to be of same shape
16 | */
17 | template
18 | class TensorVector {
19 | public:
20 | TensorVector(void) {
21 | this->Clear();
22 | }
23 | // get i-th tensor
24 | inline mshadow::Tensor
25 | operator[](size_t i) const {
26 | CHECK(i + 1 < offset_.size());
27 | CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]);
28 | return mshadow::Tensor
29 | ((DType*)BeginPtr(content_) + offset_[i], shape_[i]);
30 | }
31 | inline mshadow::Tensor Back() const {
32 | return (*this)[Size() - 1];
33 | }
34 | inline size_t Size(void) const {
35 | return shape_.size();
36 | }
37 | // push a tensor of certain shape
38 | // return the reference of the pushed tensor
39 | inline void Push(mshadow::Shape shape) {
40 | shape_.push_back(shape);
41 | offset_.push_back(offset_.back() + shape.Size());
42 | content_.resize(offset_.back());
43 | }
44 | inline void Clear(void) {
45 | offset_.clear();
46 | offset_.push_back(0);
47 | content_.clear();
48 | shape_.clear();
49 | }
50 | private:
51 | // offset of the data content
52 | std::vector offset_;
53 | // data content
54 | std::vector content_;
55 | // shape of data
56 | std::vector > shape_;
57 | };
58 |
59 | /*!
60 | * \brief instance vector that can holds
61 | * non-uniform shape data instance in a shape efficient way
62 | */
63 | class InstVector {
64 | public:
65 | inline size_t Size(void) const {
66 | return index_.size();
67 | }
68 | // instance
69 | inline DataInst operator[](size_t i) const {
70 | DataInst inst;
71 | inst.index = index_[i];
72 | inst.data = data_[i];
73 | inst.label = label_[i];
74 | return inst;
75 | }
76 | // get back of instance vector
77 | inline DataInst Back() const {
78 | return (*this)[Size() - 1];
79 | }
80 | inline void Clear(void) {
81 | index_.clear();
82 | data_.Clear();
83 | label_.Clear();
84 | }
85 | inline void Push(unsigned index,
86 | mshadow::Shape<3> dshape,
87 | mshadow::Shape<1> lshape) {
88 | index_.push_back(index);
89 | data_.Push(dshape);
90 | label_.Push(lshape);
91 | }
92 |
93 | private:
94 | /*! \brief index of the data */
95 | std::vector index_;
96 | // label
97 | TensorVector<3, real_t> data_;
98 | // data
99 | TensorVector<1, real_t> label_;
100 | };
101 | } // cxxnet
102 | #endif // CXXNET_TENSOR_VECTOR_H_
103 |
--------------------------------------------------------------------------------
/windows/cxxnet/cxxnet.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Express 2013 for Windows Desktop
4 | VisualStudioVersion = 12.0.31101.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cxxnet", "cxxnet\cxxnet.vcxproj", "{11779A85-CDB2-4692-AD04-331DFE7F4796}"
7 | EndProject
8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "im2bin", "im2bin\im2bin.vcxproj", "{32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}"
9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cxxnet_wrapper", "cxxnet_wrapper\cxxnet_wrapper.vcxproj", "{C0589109-60ED-482D-BE59-F338EB941EA5}"
11 | ProjectSection(ProjectDependencies) = postProject
12 | {11779A85-CDB2-4692-AD04-331DFE7F4796} = {11779A85-CDB2-4692-AD04-331DFE7F4796}
13 | EndProjectSection
14 | EndProject
15 | Global
16 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
17 | Debug|Win32 = Debug|Win32
18 | Debug|x64 = Debug|x64
19 | Release|Win32 = Release|Win32
20 | Release|x64 = Release|x64
21 | EndGlobalSection
22 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
23 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Debug|Win32.ActiveCfg = Debug|Win32
24 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Debug|Win32.Build.0 = Debug|Win32
25 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Debug|x64.ActiveCfg = Debug|x64
26 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Debug|x64.Build.0 = Debug|x64
27 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Release|Win32.ActiveCfg = Release|Win32
28 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Release|Win32.Build.0 = Release|Win32
29 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Release|x64.ActiveCfg = Release|x64
30 | {11779A85-CDB2-4692-AD04-331DFE7F4796}.Release|x64.Build.0 = Release|x64
31 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Debug|Win32.ActiveCfg = Debug|Win32
32 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Debug|Win32.Build.0 = Debug|Win32
33 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Debug|x64.ActiveCfg = Debug|x64
34 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Debug|x64.Build.0 = Debug|x64
35 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Release|Win32.ActiveCfg = Release|Win32
36 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Release|Win32.Build.0 = Release|Win32
37 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Release|x64.ActiveCfg = Release|x64
38 | {32E2DEBE-ECE1-4BDB-ABE3-38ED24159E31}.Release|x64.Build.0 = Release|x64
39 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Debug|Win32.ActiveCfg = Debug|Win32
40 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Debug|Win32.Build.0 = Debug|Win32
41 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Debug|x64.ActiveCfg = Debug|Win32
42 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Release|Win32.ActiveCfg = Release|Win32
43 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Release|Win32.Build.0 = Release|Win32
44 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Release|x64.ActiveCfg = Release|x64
45 | {C0589109-60ED-482D-BE59-F338EB941EA5}.Release|x64.Build.0 = Release|x64
46 | EndGlobalSection
47 | GlobalSection(SolutionProperties) = preSolution
48 | HideSolutionNode = FALSE
49 | EndGlobalSection
50 | EndGlobal
51 |
--------------------------------------------------------------------------------
/src/updater/adam_updater-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_UPDATER_ADAM_UPDATER_INL_HPP_
2 | #define CXXNET_UPDATER_ADAM_UPDATER_INL_HPP_
3 | /*!
4 | * \file sgd_updater-inl.hpp
5 | * \brief implementation of SGD with momentum
6 | * \author Bing Xu
7 | */
8 | #include
9 | #include
10 | #include "./updater.h"
11 | #include "./param.h"
12 | #include "../layer/op.h"
13 |
14 | namespace cxxnet {
15 | namespace updater {
16 | // Adam updater with momentum
17 | template
18 | class AdamUpdater : public IUpdater {
19 | public:
20 | AdamUpdater(mshadow::Tensor w, mshadow::Tensor dw, const char *tag)
21 | :w(w), dw(dw) {
22 | param.tag = tag;
23 | decay1 = 0.1f;
24 | decay2 = 0.001f;
25 | }
26 | virtual ~AdamUpdater(void) {}
27 | virtual void Init(void) {
28 | if (param.silent == 0) {
29 | printf("AdamUpdater: eta=%f, beta1=%f, beta2=%f\n", param.base_lr_, decay1, decay2);
30 | }
31 | m_w1.Resize(w.shape_, 0.0f);
32 | m_w2.Resize(w.shape_, 0.0f);
33 | }
34 | virtual void SetStream(mshadow::Stream *stream) {
35 | w.set_stream(stream);
36 | dw.set_stream(stream);
37 | m_w1.set_stream(stream);
38 | m_w2.set_stream(stream);
39 | }
40 | virtual void Update(long epoch) {
41 | this->ApplyUpdate(epoch, dw);
42 | // dw accumulate gradient instead of storing them
43 | // updater need to reset then to 0 after each update
44 | dw = 0.0f;
45 | }
46 | virtual void Update(long epoch, mshadow::Tensor grad) {
47 | CHECK(grad.shape_ == w.shape_.FlatTo2D())
48 | << "SGDUpdater: grad must be generated from source of same shape";
49 | this->ApplyUpdate(epoch, mshadow::Tensor
50 | (grad.dptr_, w.shape_, grad.stride_, w.stream_));
51 | }
52 | virtual void StartRound(int round) {
53 | param.round = round;
54 | }
55 | virtual void SetParam(const char *name, const char *val) {
56 | param.SetParam(name, val);
57 | if (!strcmp(name, "beta1")) decay1 = atof(val);
58 | if (!strcmp(name, "beta2")) decay2 = atof(val);
59 | }
60 | virtual void ApplyVisitor(typename IUpdater::IVisitor *pvisitor) {
61 | pvisitor->Visit(param.tag.c_str(), w, dw);
62 | }
63 |
64 | protected:
65 | UpdaterParam param;
66 | // variales
67 | mshadow::Tensor w, dw;
68 | // momentum variable
69 | mshadow::TensorContainer m_w1;
70 | mshadow::TensorContainer m_w2;
71 | float decay1;
72 | float decay2;
73 | // update function
74 | virtual void ApplyUpdate(long epoch,
75 | mshadow::Tensor grad) {
76 | if (param.wd > 0.0f) grad -= param.wd * w;
77 | float fix1 = 1.0f - powf(1.0f - decay1, epoch + 1);
78 | float fix2 = 1.0f - powf(1.0f - decay2, epoch + 1);
79 | float lr_t = param.base_lr_ * sqrt(fix2) / fix1;
80 | m_w1 += decay1 * (grad - m_w1);
81 | m_w2 += decay2 * (mshadow::expr::F(grad) - m_w2);
82 | w -= lr_t * (m_w1 / (mshadow::expr::F(m_w2) + 1e-8f));
83 | }
84 | }; // class AdamUpdater
85 | } // namespace updater
86 | } // namespace cxxnet
87 | #endif
88 |
89 |
--------------------------------------------------------------------------------
/src/layer/concat_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_CONCAT_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_CONCAT_LAYER_INL_HPP_
3 |
4 | #include "./layer.h"
5 | #include "./op.h"
6 |
7 |
8 | namespace cxxnet {
9 | namespace layer {
10 |
11 | template
12 | class ConcatLayer : public ILayer {
13 | public:
14 | virtual void InitConnection(const std::vector*> &nodes_in,
15 | const std::vector*> &nodes_out,
16 | ConnectState *p_cstate) {
17 | utils::Check(nodes_in.size() > 1 && nodes_out.size() == 1,
18 | "Concat layer only support n-1 connection");
19 | utils::Check(nodes_in.size() <= 4, "More than 4 input node is unspported");
20 | mshadow::Shape<4> oshape = nodes_in[0]->data.shape_;
21 | mshadow::index_t out_ch = 0;
22 | for (mshadow::index_t i = 0; i < nodes_in.size(); ++i) {
23 | out_ch += nodes_in[i]->data.shape_[dim];
24 | for (mshadow::index_t j = 0; j < 4; ++j) {
25 | if (j == dim) continue;
26 | utils::Check(nodes_in[i]->data.shape_[j] == oshape[j],
27 | "Concat shape doesn't match");
28 | }
29 | }
30 | oshape[dim] = out_ch;
31 | nodes_out[0]->data.shape_ = oshape;
32 | }
33 | virtual void Forward(bool is_train,
34 | const std::vector*> &nodes_in,
35 | const std::vector*> &nodes_out,
36 | ConnectState *p_cstate) {
37 | using namespace mshadow::expr;
38 | switch(nodes_in.size()) {
39 | case 2:
40 | nodes_out[0]->data = concat(nodes_in[0]->data, nodes_in[1]->data);
41 | break;
42 | case 3:
43 | nodes_out[0]->data = concat(nodes_in[0]->data,
44 | concat(nodes_in[1]->data, nodes_in[2]->data));
45 | break;
46 | case 4:
47 | nodes_out[0]->data = concat(concat(nodes_in[0]->data, nodes_in[1]->data),
48 | concat(nodes_in[2]->data, nodes_in[3]->data));
49 | break;
50 | default:
51 | utils::Error("Too many node to concat");
52 | break;
53 | };
54 | }
55 | virtual void Backprop(bool prop_grad,
56 | const std::vector*> &nodes_in,
57 | const std::vector*> &nodes_out,
58 | ConnectState *p_cstate) {
59 | using namespace mshadow::expr;
60 | if (prop_grad) {
61 | switch(nodes_in.size()) {
62 | case 2:
63 | concat(nodes_in[0]->data, nodes_in[1]->data) = nodes_out[0]->data;
64 | break;
65 | case 3:
66 | concat(nodes_in[0]->data,
67 | concat(nodes_in[1]->data, nodes_in[2]->data)) = nodes_out[0]->data;
68 | break;
69 | case 4:
70 | concat(concat(nodes_in[0]->data, nodes_in[1]->data),
71 | concat(nodes_in[2]->data, nodes_in[3]->data)) = nodes_out[0]->data;
72 | break;
73 | default:
74 | utils::Error("Too many nodes to concat");
75 | break;
76 | };
77 | }
78 | }
79 | }; //class ConcatLayer
80 | } // namespace layer
81 | } // namespace cxxnet
82 | #endif
83 |
--------------------------------------------------------------------------------
/example/ImageNet/ImageNet.conf:
--------------------------------------------------------------------------------
1 | # Configuration for ImageNet
2 | # Acknowledgement:
3 | # Ref: http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
4 | # The scheduling parameters is adapted from Caffe(http://caffe.berkeleyvision.org/)
5 |
6 | data = train
7 | iter = imgrec
8 | # image_list = "../../NameList.train"
9 | image_rec = "../../TRAIN.BIN"
10 | # image_root = "../../data/resize256/"
11 | image_mean = "models/image_net_mean.bin"
12 | rand_crop=1
13 | rand_mirror=1
14 | iter = threadbuffer
15 | iter = end
16 |
17 | eval = test
18 | iter = imgrec
19 | # image_list = "../../NameList.test"
20 | image_rec = "../../TEST.BIN"
21 | # image_root = "../../data/resize256/"
22 | image_mean = "models/image_net_mean.bin"
23 | # no random crop and mirror in test
24 | iter = end
25 |
26 | netconfig=start
27 | layer[0->1] = conv:conv1
28 | kernel_size = 11
29 | stride = 4
30 | nchannel = 96
31 | layer[1->2] = relu:relu1
32 | layer[2->3] = max_pooling:pool1
33 | kernel_size = 3
34 | stride = 2
35 | layer[3->4] = lrn:lrn1
36 | local_size = 5
37 | alpha = 0.0001
38 | beta = 0.75
39 | knorm = 1
40 | ###############
41 | layer[4->5] = conv:conv2
42 | ngroup = 2
43 | nchannel = 256
44 | kernel_size = 5
45 | pad = 2
46 | layer[5->6] = relu:relu2
47 | layer[6->7] = max_pooling:pool2
48 | kernel_size = 3
49 | stride = 2
50 | layer[7->8] = lrn:lrn2
51 | local_size = 5
52 | alpha = 0.0001
53 | beta = 0.75
54 | knorm = 1
55 | #############
56 | layer[8->9] = conv:conv3
57 | nchannel = 384
58 | kernel_size = 3
59 | pad = 1
60 | layer[9->10]= relu:relu3
61 | layer[10->11] = conv:conv4
62 | nchannel = 384
63 | ngroup = 2
64 | kernel_size = 3
65 | pad = 1
66 | layer[11->12] = relu:relu4
67 | layer[12->13] = conv:conv5
68 | nchannel = 256
69 | ngroup = 2
70 | kernel_size = 3
71 | pad = 1
72 | init_bias = 1.0
73 | layer[13->14] = relu:relu5
74 | layer[14->15] = max_pooling:pool5
75 | kernel_size = 3
76 | stride = 2
77 | layer[15->16] = flatten:flatten1
78 | layer[16->17] = fullc:fc6
79 | nhidden = 4096
80 | init_sigma = 0.005
81 | init_bias = 1.0
82 | layer[17->18] = relu:relu6
83 | layer[18->18] = dropout:dropout1
84 | threshold = 0.5
85 | layer[18->19] = fullc:fc7
86 | nhidden = 4096
87 | init_sigma = 0.005
88 | init_bias = 1.0
89 | layer[19->20] = relu:relu7
90 | layer[20->20] = dropout:dropout2
91 | threshold = 0.5
92 | layer[20->21] = fullc:fc8
93 | nhidden = 1000
94 | layer[21->21] = softmax:softmax1
95 | netconfig=end
96 |
97 | # evaluation metric
98 | metric = error
99 | metric = rec@1
100 | metric = rec@5
101 |
102 | max_round = 45
103 | num_round = 45
104 |
105 | # input shape not including batch
106 | input_shape = 3,227,227
107 |
108 | batch_size = 256
109 |
110 | # global parameters in any sectiion outside netconfig, and iter
111 | momentum = 0.9
112 | wmat:lr = 0.01
113 | wmat:wd = 0.0005
114 |
115 | bias:wd = 0.000
116 | bias:lr = 0.02
117 |
118 | # all the learning rate schedule starts with lr
119 | lr:schedule = expdecay
120 | lr:gamma = 0.1
121 | lr:step = 100000
122 |
123 | save_model=1
124 | model_dir=models
125 |
126 | # random config
127 | random_type = xavier
128 |
129 |
130 | # new line
131 |
--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | Coding Guide
2 | ======
3 | This file is intended to be notes about code structure in cxxnet.
4 | * The project follows Google's C code style
5 | - All the module interface are heavily documented in doxygen format
6 | * Contribution is to the code and this NOTE welcomed!
7 | * If you have questions on code, fire an github issue
8 | - If you want to help improve this note, send a pullreq
9 |
10 | Getting Started
11 | ======
12 | * In each folder in the src, you can find a ```.h``` file with the same name as the folder
13 | - These are interface of that module, heavily documented with doxygen comment
14 | - Start with these the interface header to understand the interface
15 | * All the rest of the ```-inl.hpp``` files are implementations of the interface
16 | - These are invisible to other modules
17 | - Templatized class with parameter ```xpu``` that can stands for cpu or gpu
18 | * The project depends on [mshadow](http://github.com/dmlc/mshadow) for tensor operations
19 | - You can find the documentation on mshadow in its repo.
20 |
21 | Project Logical Layout
22 | =======
23 | * Dependency order: nnet->updater->layer
24 | - All module depends on global.h and utils
25 | - io is an independent module
26 | * layer is implementation of neural net layers and defines forward and backward propagation
27 | * updater is the parameter updating module, it defines update rule of weights
28 | - AsyncUpdater is a special updater that handles asynchronize communication and update
29 | - It uses [mshadow-ps](http://github.com/dmlc/mshadow/guide/mshadow-ps) to do async communication
30 | * nnet is the neural net structure that combines layers together to form a neural net
31 | - Dependency in nnet: CXXNetThreadTrainer->NeuralNetThread->NeuralNet
32 | * io is the input module to handle reading various data and preprocessing
33 | - io uses iterator pattern to handle data processing pipeline
34 | - The pipeline can be mult-threaded using threadbuffer trick
35 |
36 | How do They Work Together
37 | ======
38 | * Data is pulled from io module to feed into nnet
39 | * nnet contains #gpu threads, that get part of data, call layer objects to do forwardbackprop
40 | * For each weights, an updater is created
41 | - AsyncUpdater.AfterBackprop is called after backprop of the corresponding layer to push out gradient
42 | - AsyncUPdater.UpdateWait is called before forward to the layer
43 | - mshadow-ps does the async trick of parameter communication
44 | * AsyncUpdater will call IUpdater, which does the updating trick
45 | - If update_on_server is on, IUpdater will be created on server-side instead
46 |
47 | File Naming Convention
48 | =======
49 | * .h files are data structures and interface
50 | - In each folder, there is one .h file that have same name as the folder, this file defines everything needed for other module to use this module
51 | - Interface headers: layer/layer.h, updater/updater.h
52 | * -inl.hpp files are implementations of interface, like cpp file in most project.
53 | - You only need to understand the interface file to understand the usage of that layer
54 | * In each folder, there can be a .cpp file, and .cu file that that compiles the module of that layer
55 | - the .cpp file and .cu file does not contain implementation, but reuse common implementation in file ends with _impl-inl.hpp
56 |
--------------------------------------------------------------------------------
/example/multi-machine/convert.py:
--------------------------------------------------------------------------------
1 | import re
2 | import sys
3 | import math
4 | import argparse
5 |
6 |
7 | parser = argparse.ArgumentParser(description='random type converter for distributed cxxnet')
8 | parser.add_argument('--input_conf', required=True, help="path to input conf file")
9 | parser.add_argument('--output_conf', required=True, help="path to output conf")
10 | parser.add_argument('--type', default="xavier", help="use [xavier/kaiming] for convert. default: xavier")
11 | parser.add_argument('--a', default="0", help="extra bias for init")
12 |
13 | args = parser.parse_args()
14 |
15 | class Param:
16 | def __init__(self):
17 | self.clear()
18 | def clear(self):
19 | self.kernel_size = 1
20 | self.nchannel = 1
21 | self.nhidden = 1
22 | self.type = None
23 |
24 | task = args.type
25 |
26 |
27 | def gen_str(p, a):
28 | res = ""
29 | if p.type == 'conv' or p.type == 'fullc':
30 | if task == "kaiming":
31 | res += " random_type=gaussian\n"
32 | res += " init_sigma="
33 | else:
34 | res += " random_type=uniform\n"
35 | res += " init_uniform="
36 | res += str(math.sqrt(2.0 / (1 + a * a) / p.kernel_size / p.kernel_size / p.nchannel / p.nhidden))
37 | res += '\n'
38 | return res
39 |
40 |
41 |
42 | param = Param()
43 |
44 | START = re.compile(r"\s*netconfig\s*=\s*start\s*")
45 | LAYER = re.compile(r"\s*layer\[.*\]\s*=\s*(\w+):*\w*\s*")
46 | END = re.compile(r"\s*netconfig\s*=\s*end\s*")
47 |
48 | KERNEL_SIZE = re.compile(r"\s*kernel_size\s*=\s*(\d+)\s*")
49 | NCHANNEL = re.compile(r"\s*nchannel\s*=\s*(\d+)\s*")
50 | NHIDDEN = re.compile(r"\s*nhidden\s*=\s*(\d+)\s*")
51 |
52 | extra = int(args.a)
53 | state = 0
54 | fi = file(args.input_conf)
55 | fo = open(args.output_conf, "w")
56 |
57 | cfg = [line for line in fi]
58 | loc = 0
59 |
60 | while loc < len(cfg):
61 | line = cfg[loc]
62 | if state == 0: # outside network conf
63 | pass
64 | elif state == 1: # inside network conf
65 | if len(LAYER.findall(line)) > 0:
66 | param.clear()
67 | param.type = LAYER.findall(line)[0]
68 | elif state == 2: # inside layer
69 | if len(KERNEL_SIZE.findall(line)) > 0:
70 | param.kernel_size = int(KERNEL_SIZE.findall(line)[0])
71 | if len(NCHANNEL.findall(line)) > 0:
72 | param.nchannel = int(NCHANNEL.findall(line)[0])
73 | if len(NHIDDEN.findall(line)) > 0:
74 | param.nhidden = int(NHIDDEN.findall(line)[0])
75 | if state == 0:
76 | if START.match(line) != None:
77 | state = 1
78 | fo.write(line)
79 | loc += 1
80 | elif state == 1:
81 | if len(LAYER.findall(line)) > 0:
82 | state = 2
83 | fo.write(line)
84 | loc += 1
85 | elif state == 2:
86 | if LAYER.match(line) != None or END.match(line) != None:
87 | res = gen_str(param, extra)
88 | fo.write(res)
89 | else:
90 | loc += 1
91 | if END.match(line) != None:
92 | state = 0
93 | loc += 1
94 | if LAYER.match(line) != None:
95 | state = 1
96 | fo.write(line)
97 |
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/src/layer/bias_layer-inl.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CXXNET_LAYER_BIAS_LAYER_INL_HPP_
2 | #define CXXNET_LAYER_BIAS_LAYER_INL_HPP_
3 |
4 | #include
5 | #include "./layer.h"
6 | #include "./param.h"
7 | #include "../utils/utils.h"
8 |
9 | namespace cxxnet {
10 | namespace layer {
11 |
12 | /*! \brief a simple layer that adds bias to every node in batch, this is a self-loop layer */
13 | template
14 | class BiasLayer : public ILayer