├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── INSTALL.md
├── LICENSE
├── MEM.notes
├── README.md
├── TODO.cnn
├── cmake
    ├── FindCNN.cmake
    └── FindEigen3.cmake
├── cnn
    ├── CMakeLists.txt
    ├── aligned-mem-pool.cc
    ├── aligned-mem-pool.h
    ├── c2w.h
    ├── cfsm-builder.cc
    ├── cfsm-builder.h
    ├── cnn-helper.h
    ├── cnn.cc
    ├── cnn.h
    ├── conv.cc
    ├── conv.h
    ├── cuda.cc
    ├── cuda.h
    ├── deep-lstm.cc
    ├── deep-lstm.h
    ├── devices.cc
    ├── devices.h
    ├── dict.cc
    ├── dict.h
    ├── dim.cc
    ├── dim.h
    ├── except.h
    ├── exec.cc
    ├── exec.h
    ├── expr.cc
    ├── expr.h
    ├── fast-lstm.cc
    ├── fast-lstm.h
    ├── functors.h
    ├── gpu-kernels.h
    ├── gpu-ops.cu
    ├── gpu-ops.h
    ├── grad-check.cc
    ├── grad-check.h
    ├── graph.cc
    ├── graph.h
    ├── gru.cc
    ├── gru.h
    ├── hsm-builder.cc
    ├── hsm-builder.h
    ├── init.cc
    ├── init.h
    ├── lstm.cc
    ├── lstm.h
    ├── mem.cc
    ├── mem.h
    ├── model.cc
    ├── model.h
    ├── mp.cc
    ├── mp.h
    ├── nodes-common.cc
    ├── nodes.cc
    ├── nodes.h
    ├── param-nodes.cc
    ├── param-nodes.h
    ├── random.h
    ├── rnn-state-machine.cc
    ├── rnn-state-machine.h
    ├── rnn.cc
    ├── rnn.h
    ├── saxe-init.cc
    ├── saxe-init.h
    ├── shadow-params.cc
    ├── shadow-params.h
    ├── simd-functors.h
    ├── tensor.cc
    ├── tensor.h
    ├── tests
    │   ├── test_edges.cc
    │   ├── test_init.cc
    │   └── test_utils.h
    ├── timing.h
    ├── training.cc
    └── training.h
├── config.h.cmake
├── examples
    ├── CMakeLists.txt
    ├── embed-cl.cc
    ├── encdec.cc
    ├── example-data
    │   ├── clusters-hsm.txt
    │   ├── dev-hsm.txt
    │   ├── dev-poi.txt
    │   ├── fin-dev.txt
    │   ├── fin-toy.txt
    │   ├── fin-words-dev.txt
    │   ├── fin-words.txt
    │   ├── fin.txt
    │   ├── seg-sup.dev.txt
    │   ├── textcat.txt
    │   ├── train-hsm.txt
    │   └── train-poi.txt
    ├── mlc.cc
    ├── nlm.cc
    ├── poisson-regression.cc
    ├── read-write.cc
    ├── rnnlm-aevb.cc
    ├── rnnlm-batch.cc
    ├── rnnlm-cfsm.cc
    ├── rnnlm-givenbag.cc
    ├── rnnlm-mp.cc
    ├── rnnlm.cc
    ├── rnnlm.h
    ├── rnnlm2.cc
    ├── segrnn-sup.cc
    ├── skiprnnlm.cc
    ├── tag-bilstm.cc
    ├── textcat.cc
    ├── tok-embed.cc
    ├── xor-batch-lookup.cc
    ├── xor-batch.cc
    ├── xor-xent.cc
    └── xor.cc
├── pycnn
    ├── INSTALL
    ├── makefile
    ├── pycnn.pxd
    ├── pycnn.pyx
    ├── pycnn_viz.py
    ├── setup.py
    └── setup_gpu.py
├── pyexamples
    ├── attention.py
    ├── bilstmtagger.py
    ├── cpu_vs_gpu.py
    ├── minibatch.py
    ├── rnnlm.py
    ├── rnnlm_transduce.py
    ├── tutorials
    │   ├── API.ipynb
    │   ├── RNNs.ipynb
    │   └── tutorial-1-xor.ipynb
    ├── util.py
    └── xor.py
├── rnnlm
    ├── CMakeLists.txt
    ├── README.md
    ├── install-examples.sh
    └── lm.cc
└── tests
    ├── CMakeLists.txt
    ├── README.md
    ├── test-cnn.cc
    └── test-nodes.cc


/.gitignore:
--------------------------------------------------------------------------------
 1 | # cmake stuff
 2 | build/
 3 | Testing/
 4 | cnn/Testing/
 5 | cnn/tests.bin/
 6 | CTestTestfile.cmake
 7 | config.h
 8 | Makefile
 9 | CMakeCache.txt
10 | CMakeFiles
11 | cmake_install.cmake
12 | pycnn/pycnn.cpp
13 | pycnn/dist/
14 | pycnn/pyCNN.egg-info/
15 | 
16 | # binaries
17 | examples/embed-cl
18 | examples/encdec
19 | examples/xor
20 | examples/xor-xent
21 | examples/rnnlm
22 | examples/nlm
23 | 
24 | #data
25 | rnnlm/ptb-mikolov/
26 | 
27 | # Compiled Object files
28 | *.slo
29 | *.lo
30 | *.o
31 | *.obj
32 | 
33 | # Precompiled Headers
34 | *.gch
35 | *.pch
36 | 
37 | # Compiled Dynamic libraries
38 | *.so
39 | *.dylib
40 | *.dll
41 | 
42 | # Fortran module files
43 | *.mod
44 | *.smod
45 | 
46 | # Compiled Static libraries
47 | *.lai
48 | *.la
49 | *.a
50 | *.lib
51 | 
52 | # Executables
53 | *.exe
54 | *.out
55 | *.app
56 | 
57 | .RData
58 | .RHistory
59 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "external/easyloggingpp"]
2 | 	path = external/easyloggingpp
3 | 	url = https://github.com/easylogging/easyloggingpp.git
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | compiler:
 3 |   - gcc
 4 | 
 5 | install:
 6 | - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
 7 | addons:
 8 |   apt:
 9 |     sources:
10 |     - ubuntu-toolchain-r-test
11 |     packages:
12 |     - gcc-4.8
13 |     - g++-4.8
14 | 
15 | before_script:
16 |   - sudo apt-get update -qq
17 |   - sudo apt-get install libboost-filesystem-dev libboost-program-options-dev libboost-serialization-dev libboost-test-dev libboost-regex-dev
18 |   - hg clone https://bitbucket.org/eigen/eigen
19 |   - mkdir build
20 |   - cd build
21 |   - cmake .. -DEIGEN3_INCLUDE_DIR=eigen
22 | 
23 | script:
24 |   - make
25 |   - make test
26 | 
27 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | project(cnn)
  2 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
  3 | 
  4 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
  5 | 
  6 | # CNN uses Eigen which exploits modern CPU architectures. To get the
  7 | # best possible performance, the following are recommended:
  8 | #   1. use very recent versions of gcc or Clang to build
  9 | #   2. use very recent versions of Eigen (ideally the dev version)
 10 | #   3. try compiler options like -march=native or other architecture
 11 | #      flags (the compiler does not always make the best configuration
 12 | #      decisions without help)
 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++11 -Ofast -g -DEIGEN_FAST_MATH -march=native")
 14 | 
 15 | enable_testing()
 16 | 
 17 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}
 18 |                     ${PROJECT_SOURCE_DIR}/external/easyloggingpp/src)
 19 | 
 20 | function(find_cudnn)
 21 |   set(CUDNN_ROOT "" CACHE PATH "CUDNN root path")
 22 |   find_path(CUDNN_INCLUDE_DIRS cudnn.h
 23 |     PATHS ${CUDNN_ROOT}
 24 |           ${CUDNN_ROOT}/include
 25 |     DOC "CUDNN include path")
 26 |   find_library(CUDNN_LIBRARIES NAMES libcudnn.so
 27 |     PATHS ${CUDNN_ROOT}
 28 |           ${CUDNN_ROOT}/lib
 29 |           ${CUDNN_ROOT}/lib64
 30 |     DOC "CUDNN library path")
 31 |   if(CUDNN_INCLUDE_DIRS AND CUDNN_LIBRARIES)
 32 |     set(CUDNN_FOUND TRUE PARENT_SCOPE)
 33 |     message(STATUS "Found CUDNN (include: ${CUDNN_INCLUDE_DIRS}, library: ${CUDNN_LIBRARIES})")
 34 |     mark_as_advanced(CUDNN_INCLUDE_DIRS CUDNN_LIBRARIES)
 35 |   else()
 36 |     MESSAGE(FATAL_ERROR "Failed to find CUDNN in path: ${CUDNN_ROOT} (Did you set CUDNN_ROOT properly?)")
 37 |   endif()
 38 | endfunction()
 39 | 
 40 | # look for Boost
 41 | if(DEFINED ENV{BOOST_ROOT})
 42 |   set(Boost_NO_SYSTEM_PATHS ON)
 43 | endif()
 44 | set(Boost_REALPATH ON)
 45 | find_package(Boost COMPONENTS program_options regex serialization REQUIRED)
 46 | include_directories(${Boost_INCLUDE_DIR})
 47 | set(LIBS ${LIBS} ${Boost_LIBRARIES})
 48 | # trouble shooting:
 49 | # if boost library cannot be found, in addition to install boost library
 50 | # check if environment variables are set
 51 | # 
 52 | # to set boost root and its library root in environment variable, use
 53 | # for example
 54 | # echo "export BOOST_LIBRARYDIR=/usr/local/lib" >> ~/.bashrc
 55 | # echo "export BOOST_ROOT=/cygdrive/d/tools/boost_1_58_0/boost_1_58_0" >> ~/.bashrc
 56 | # then run source ~/.bashrc to have those environment variable effective immediately
 57 | 
 58 | if(BACKEND)
 59 |   message("-- BACKEND: ${BACKEND}")
 60 | else()
 61 |   message("-- BACKEND not specified, defaulting to eigen.")
 62 |   set(BACKEND "eigen")
 63 | endif()
 64 | 
 65 | if(BACKEND MATCHES "^eigen$")
 66 |   set(WITH_EIGEN_BACKEND 1)
 67 | elseif(BACKEND MATCHES "^cuda$")
 68 |   set(WITH_CUDA_BACKEND 1)
 69 | else()
 70 |   message(SEND_ERROR "BACKEND must be eigen or cuda")
 71 | endif()
 72 | 
 73 | if (WITH_CUDA_BACKEND)
 74 |   find_package(CUDA REQUIRED)
 75 |   set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT})
 76 |   include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
 77 |   add_definitions(-DHAVE_CUDA)
 78 |   #list(APPEND CUDA_LIBRARIES /usr/lib64/libpthread.so)
 79 |   MESSAGE("CUDA_LIBRARIES: ${CUDA_LIBRARIES}")
 80 |   list(REMOVE_ITEM CUDA_LIBRARIES -lpthread)
 81 |   set(LIBS ${LIBS} ${CUDA_LIBRARIES})
 82 |   #find_cudnn()
 83 |   #include_directories(SYSTEM ${CUDNN_INCLUDE_DIRS})
 84 | endif()
 85 | 
 86 | # look for Eigen
 87 | find_package(Eigen3 REQUIRED)
 88 | include_directories(${EIGEN3_INCLUDE_DIR})
 89 | 
 90 | FIND_PACKAGE(Threads REQUIRED)
 91 | set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
 92 | 
 93 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
 94 | include_directories(${CMAKE_CURRENT_BINARY_DIR})
 95 | 
 96 | add_subdirectory(cnn)
 97 | add_subdirectory(tests)
 98 | add_subdirectory(examples)
 99 | add_subdirectory(rnnlm)
100 | enable_testing()
101 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
  1 | # Installing the pyCNN module.
  2 | 
  3 | (for instructions on installing on a computer with GPU, see below)
  4 | 
  5 | First, get CNN:
  6 | 
  7 | ```bash
  8 | cd $HOME
  9 | git clone https://github.com/clab/cnn.git
 10 | cd cnn
 11 | git submodule init # To be consistent with CNN's installation instructions.
 12 | git submodule update # To be consistent with CNN's installation instructions.
 13 | ```
 14 | 
 15 | Then get Eigen:
 16 | 
 17 | ```bash
 18 | cd $HOME
 19 | cd cnn
 20 | # Latest version (17.03.16) of Eigen fails to compile , so we revert "-r" to the latest stable version.
 21 | # Otherwise, we can use "hg clone https://bitbucket.org/eigen/eigen/"
 22 | hg clone https://bitbucket.org/eigen/eigen/ -r 47fa289dda2dc13e0eea70adfc8671e93627d466
 23 | ```
 24 | 
 25 | To simplify the following steps, we can set a bash variable to hold where we have saved the main directories of `cnn` and `eigen`. In case you have gotten `ccn` and `eigen` differently from the instructions above and saved them in different location(s), these variables will be helpful:
 26 | 
 27 | ```bash
 28 | PATH_TO_CNN=$HOME/cnn/
 29 | PATH_TO_EIGEN=$HOME/cnn/eigen/
 30 | ```
 31 | 
 32 | Compile CNN.
 33 | (modify the code below to point to the correct boost location)
 34 | 
 35 | ```bash
 36 | cd $PATH_TO_CNN
 37 | mkdir build
 38 | cd build
 39 | cmake .. -DEIGEN3_INCLUDE_DIR=$PATH_TO_EIGEN -DBOOST_ROOT=$HOME/.local/boost_1_58_0 -DBoost_NO_BOOST_CMAKE=ON
 40 | make -j 2
 41 | ```
 42 | 
 43 | If CNN fails to compile and throws an error like this:
 44 | 
 45 | ```bash
 46 | $ make -j 2
 47 | Scanning dependencies of target cnn
 48 | Scanning dependencies of target cnn_shared
 49 | [  1%] [  2%] Building CXX object cnn/CMakeFiles/cnn.dir/cfsm-builder.cc.o
 50 | Building CXX object cnn/CMakeFiles/cnn_shared.dir/cfsm-builder.cc.o
 51 | In file included from /home/user/cnn/cnn/cnn.h:13:0,
 52 |                  from /home/user/cnn/cnn/cfsm-builder.h:6,
 53 |                  from /home/user/cnn/cnn/cfsm-builder.cc:1:
 54 | /home/user/cnn/cnn/tensor.h:22:42: fatal error: unsupported/Eigen/CXX11/Tensor: No such file or directory
 55 |  #include <unsupported/Eigen/CXX11/Tensor>
 56 |                                           ^
 57 | compilation terminated.
 58 | ```
 59 | 
 60 | If CNN fails to compile with the error above, then you can download a stable version of Eigen and re-build CNN as such:
 61 | 
 62 | ```bash
 63 | cd $PATH_TO_CNN
 64 | wget u.cs.biu.ac.il/~yogo/eigen.tgz
 65 | tar zxvf eigen.tgz # or "dtrx eigen.tgz" if you have dtrx installed
 66 | mkdir build
 67 | cd build
 68 | cmake .. -DEIGEN3_INCLUDE_DIR=$PATH_TO_EIGEN -DBOOST_ROOT=$HOME/.local/boost_1_58_0 -DBoost_NO_BOOST_CMAKE=ON
 69 | make -j 2
 70 | ```
 71 | 
 72 | Now that CNN is compiled, we need to compile the pycnn module.
 73 | This requires having cython installed.
 74 | If you don't have cython, it can be installed with either `pip install cython` or better yet `conda install cython`.
 75 | 
 76 | ```bash
 77 | pip2 install cython --user
 78 | ```
 79 | 
 80 | Customize the `setup.py` to include (i) the parent directory where the main `cnn` directory is saved and (ii) the path to the main `eigen` directy:
 81 | 
 82 | ```bash
 83 | cd $PATH_TO_CNN/pycnn
 84 | sed -i  "s|..\/..\/cnn\/|$PATH_TO_CNN|g" setup.py 
 85 | sed -i  "s|..\/..\/eigen\/|$PATH_TO_EIGEN|g" setup.py
 86 | make
 87 | make install
 88 | ```
 89 | 
 90 | We are almost there. 
 91 | We need to tell the environment where to find the compiled cnn shared library.
 92 | The pyCNN's `make` fetched a copy of `libcnn_shared.so` and put it in the `pycnn` lib.
 93 | 
 94 | Add the following line to your profile (`.zshrc` or `.bashrc`), change
 95 | according to your installation location.
 96 | 
 97 | ```bash
 98 | export LD_LIBRARY_PATH=$PATH_TO_CNN/pycnn
 99 | ```
100 | 
101 | Now, check that everything works:
102 | 
103 | ```bash
104 | # check that it works:
105 | cd $PATH_TO_CNN
106 | cd pyexamples
107 | python2 xor.py
108 | python2 rnnlm.py rnnlm.py
109 | ```
110 | 
111 | Alternatively, if the following script works for you, then your installation is likely to be working:
112 | ```
113 | from pycnn import *
114 | model = Model()
115 | ```
116 | 
117 | ## Installing with GPU support
118 | 
119 | For installing on a computer with GPU, first install CUDA.
120 | Here, we assume CUDA is installed in `/usr/local/cuda-7.5`
121 | 
122 | There are two modules, `pycnn` which is the regular CPU module, and `gpycnn` which is the GPU
123 | module. You can import either of them, these are two independent modules. The GPU support
124 | is incomplete: some operations (i.e. `hubber_distance`) are not available for the GPU.
125 | 
126 | First step is to build the CNN modules.
127 | Checkout and go to the `build` directory (same instructions as above). Then:
128 | 
129 | To build a CPU version on a computer with CUDA:
130 | ```bash
131 | cmake .. -DEIGEN3_INCLUDE_DIR=../eigen -DBACKEND=eigen
132 | make -j 4
133 | ```
134 | 
135 | To build a GPU version on a computer with CUDA:
136 | ```bash
137 | cmake .. -DBACKEND=cuda -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-7.5/
138 | make -j 4
139 | ```
140 | 
141 | Now, build the python modules (as above, we assume cython is installed):
142 | 
143 | The GPU module (gpycnn):
144 | ```bash
145 | cd ../pycnn
146 | make gpycnn.so
147 | make ginstall
148 | ```
149 | 
150 | The CPU module (pycnn):
151 | ```bash
152 | cd ../pycnn
153 | make pycnn.so
154 | make install
155 | ```
156 | 
157 | Add the following to your env:
158 | `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PATH_TO_CNN/pycnn`
159 | 
160 | Once both the `pycnn` and `gpycnn` are installed, run `python ../pyexamples/cpu_vs_gpu.py` for a small timing example.
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/MEM.notes:
--------------------------------------------------------------------------------
 1 | The code that computes the l2 norm of the gradient is going to need
 2 | scratch space on every device that CNN is using that has a parameter.
 3 | 1) devices should know whether they have parameters/gradients
 4 | 
 5 | alignment code is hidden away. it's all hard coded, but it looks like
 6 | Intel at least is getting more foregiving about alingment problems so
 7 | we might not notice opportunities for speedups if something changes.
 8 | GPU memory is aligned mostly by CUDA
 9 | 
10 | the MP stuff needs to be tested by Austin.
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # cnn-v1
 2 | Legacy version of CNN neural net toolkit  (now called [dynet](https://github.com/clab/dynet))
 3 | 
 4 | # cnn
 5 | C++ neural network library
 6 | 
 7 | #### Important: Eigen version requirement
 8 | 
 9 | You need the [development version of the Eigen library](https://bitbucket.org/eigen/eigen) for this software to function. **If you use any of the released versions, you may get assertion failures or compile errors.**
10 | 
11 | #### Building
12 | 
13 | First you need to fetch the dependent libraries
14 | 
15 |     git submodule init
16 |     git submodule update
17 | 
18 | In `src`, you need to first use [`cmake`](http://www.cmake.org/) to generate the makefiles
19 | 
20 |     mkdir build
21 |     cd build
22 |     cmake .. -DEIGEN3_INCLUDE_DIR=/path/to/eigen
23 | 
24 | Then to compile, run
25 | 
26 |     make -j 2
27 | 
28 | To see that things have built properly, you can run
29 | 
30 |     ./examples/xor
31 | 
32 | which will train a multilayer perceptron to predict the xor function.
33 | 
34 | #### Building without Eigen installed
35 | 
36 | If you don't have Eigen installed, the instructions below will fetch and compile
37 | both `Eigen` and `cnn`. Eigen does not have to be compiled, so “installing” it is easy.
38 |         
39 |     git clone https://github.com/clab/cnn.git
40 |     hg clone https://bitbucket.org/eigen/eigen/
41 | 
42 |     cd cnn/
43 |     mkdir build
44 |     cd build
45 |     cmake .. -DEIGEN3_INCLUDE_DIR=../eigen
46 |     make -j 2
47 | 
48 | #### Debugging build problems
49 | 
50 | If you want to see the compile commands that are used, you can run
51 | 
52 |     make VERBOSE=1
53 | 
54 | #### Training Models
55 | 
56 | An illustation of how models are trained (for a simple logistic regression model) is below:
57 | 
58 | ```c++
59 | // *** First, we set up the structure of the model
60 | // Create a model, and an SGD trainer to update its parameters.
61 | Model mod;
62 | SimpleSGDTrainer sgd(&mod);
63 | // Create a "computation graph," which will define the flow of information.
64 | ComputationGraph cg;
65 | // Initialize a 1x3 parameter vector, and add the parameters to be part of the
66 | // computation graph.
67 | Expression W = parameter(cg, mod.add_parameters({1, 3}));
68 | // Create variables defining the input and output of the regression, and load them
69 | // into the computation graph. Note that we don't need to set concrete values yet.
70 | vector<cnn::real> x_values(3);
71 | Expression x = input(cg, {3}, &x_values);
72 | cnn::real y_value;
73 | Expression y = input(cg, &y_value);
74 | // Next, set up the structure to multiply the input by the weight vector,  then run
75 | // the output of this through a logistic sigmoid function (logistic regression).
76 | Expression y_pred = logistic(W*x);
77 | // Finally, we create a function to calculate the loss. The model will be optimized
78 | // to minimize the value of the final function in the computation graph.
79 | Expression l = binary_log_loss(y_pred, y);
80 | // We are now done setting up the graph, and we can print out its structure:
81 | cg.PrintGraphviz();
82 | 
83 | // *** Now, we perform a parameter update for a single example.
84 | // Set the input/output to the values specified by the training data:
85 | x_values = {0.5, 0.3, 0.7};
86 | y_value = 1.0;
87 | // "forward" propagates values forward through the computation graph, and returns
88 | // the loss.
89 | cnn::real loss = as_scalar(cg.forward());
90 | // "backward" performs back-propagation, and accumulates the gradients of the
91 | // parameters within the "Model" data structure.
92 | cg.backward();
93 | // "sgd.update" updates parameters of the model that was passed to its constructor.
94 | // Here 1.0 is the scaling factor that allows us to control the size of the update.
95 | sgd.update(1.0);
96 | ```
97 | 
98 | Note that this very simple example that doesn't cover things like memory initialization, reading/writing models, recurrent/LSTM networks, or adding biases to functions. The best way to get an idea of how to use cnn for real is to look in the `example` directory, particularly starting with the simplest `xor` example.
99 | 


--------------------------------------------------------------------------------
/TODO.cnn:
--------------------------------------------------------------------------------
 1 | PRIORITIES:
 2 | 
 3 | Multiprocessor/single memory version has to get merged with good, clear examples [cdyer needs to try it out, then work with Austin]
 4 | 
 5 | throughout: instead of aborting, throw a proper exception type (this make's life easier
 6 | for Yoav's Python wrapper) [volunteer!!!]
 7 | 
 8 | cnn/init.cc [volunteer!!!]
 9 |  * This is an unlovely place that every CNN code calls as its first thing
10 |  * it should read (and remove) any cnn specific arguments from argc, argv
11 |  * add a --help argument
12 |  * what should the other arguments do?
13 |      - configure memory limits
14 |      - possibly enable things like initialization strategies for random variables (this
15 |        is not trivial, but worth doing0
16 |      - set rnd seed behavior
17 |      - configure GPU nonsense
18 |      - configure multiproc/multithread
19 | 
20 | tests/ [volunteer!!!]
21 |   * speaks for itself
22 |   * we need to report very clear, detailed runtime on lots of things. in PARTICULAR: big M-v and M-M products, but also softmax. these should be as close to the "user expr.h API" as possible since we want the these tests to be stable.
23 |   * we should have a separate mechanism for testing nodes in isolation, make sure fx can deal with non-zero numbers, make sure dEdxi does the right thing with non-zero numbers (different than fx!).
24 |   * we should have one example that calls the fin diff gradient checker on a non-trivial example
25 |   * TODO(wammar): test the basic functionality of LSTMs.
26 | 
27 | cnn/tensor.* [volunteer!!!]
28 |   * big change: start using the CNN multidim tensor library when it makes sense
29 |   * almost as big: make it so memory lives in GPU and CPU, and the scheduler will try to do smart things with CPU memory. This will mean the behavior of where memory lives will not be #if CUDA but rather a runtime property of the tensor. this affect all nodes.
30 | 
31 | cnn/exec.cc
32 |   * parallel execution of nodes (we've got the whole FSCKING graph). problem is, i don't know anything about how to elegantly put work like we've got into a threadpool or whatever it is the low-overhead kids are doing these days. I'd rather not pollute the Node code with this AT ALL.
33 |   * more importantly, auto-batching
34 | 
35 | cnn/examples/rnnlm.cc
36 |   * add real program options to do something nontrivial
37 |   * give elegant example of beam search implementation
38 | 
39 | 


--------------------------------------------------------------------------------
/cmake/FindCNN.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR)
 3 | 
 4 | INCLUDE(FindPackageHandleStandardArgs)
 5 | 
 6 | FIND_LIBRARY(TH_LIBRARY TH)
 7 | FIND_PATH(TH_INCLUDE_DIR "TH.h" PATHS "${CMAKE_PREFIX_PATH}/include/TH")
 8 | 
 9 | SET(TH_LIBRARIES ${TH_LIBRARY})
10 | 
11 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(
12 |   TH
13 |   REQUIRED_ARGS
14 |     TH_INCLUDE_DIR
15 |     TH_LIBRARY)
16 | 


--------------------------------------------------------------------------------
/cmake/FindEigen3.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Eigen3 lib
 2 | #
 3 | # This module supports requiring a minimum version, e.g. you can do
 4 | #   find_package(Eigen3 3.1.2)
 5 | # to require version 3.1.2 or newer of Eigen3.
 6 | #
 7 | # Once done this will define
 8 | #
 9 | #  EIGEN3_FOUND - system has eigen lib with correct version
10 | #  EIGEN3_INCLUDE_DIR - the eigen include directory
11 | #  EIGEN3_VERSION - eigen version
12 | #
13 | # This module reads hints about search locations from 
14 | # the following enviroment variables:
15 | #
16 | # EIGEN3_ROOT
17 | # EIGEN3_ROOT_DIR
18 | 
19 | # Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
20 | # Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
21 | # Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
22 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license.
23 | 
24 | if(NOT Eigen3_FIND_VERSION)
25 |   if(NOT Eigen3_FIND_VERSION_MAJOR)
26 |     set(Eigen3_FIND_VERSION_MAJOR 2)
27 |   endif(NOT Eigen3_FIND_VERSION_MAJOR)
28 |   if(NOT Eigen3_FIND_VERSION_MINOR)
29 |     set(Eigen3_FIND_VERSION_MINOR 91)
30 |   endif(NOT Eigen3_FIND_VERSION_MINOR)
31 |   if(NOT Eigen3_FIND_VERSION_PATCH)
32 |     set(Eigen3_FIND_VERSION_PATCH 0)
33 |   endif(NOT Eigen3_FIND_VERSION_PATCH)
34 | 
35 |   set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
36 | endif(NOT Eigen3_FIND_VERSION)
37 | 
38 | macro(_eigen3_check_version)
39 |   file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
40 | 
41 |   string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
42 |   set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
43 |   string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
44 |   set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
45 |   string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
46 |   set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
47 | 
48 |   set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
49 |   if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
50 |     set(EIGEN3_VERSION_OK FALSE)
51 |   else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
52 |     set(EIGEN3_VERSION_OK TRUE)
53 |   endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
54 | 
55 |   if(NOT EIGEN3_VERSION_OK)
56 | 
57 |     message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
58 |                    "but at least version ${Eigen3_FIND_VERSION} is required")
59 |   endif(NOT EIGEN3_VERSION_OK)
60 | endmacro(_eigen3_check_version)
61 | 
62 | if (EIGEN3_INCLUDE_DIR)
63 | 
64 |   # in cache already
65 |   _eigen3_check_version()
66 |   set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
67 | 
68 | else (EIGEN3_INCLUDE_DIR)
69 | 
70 |   find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
71 |       HINTS
72 |       ENV EIGEN3_ROOT 
73 |       ENV EIGEN3_ROOT_DIR
74 |       PATHS
75 |       ${CMAKE_INSTALL_PREFIX}/include
76 |       ${KDE4_INCLUDE_DIR}
77 |       PATH_SUFFIXES eigen3 eigen
78 |     )
79 | 
80 |   if(EIGEN3_INCLUDE_DIR)
81 |     _eigen3_check_version()
82 |   endif(EIGEN3_INCLUDE_DIR)
83 | 
84 |   include(FindPackageHandleStandardArgs)
85 |   find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
86 | 
87 |   mark_as_advanced(EIGEN3_INCLUDE_DIR)
88 | 
89 | endif(EIGEN3_INCLUDE_DIR)
90 | 
91 | 


--------------------------------------------------------------------------------
/cnn/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # ########## cnn library ##########
  2 | # Sources:
  3 | set(cnn_library_SRCS
  4 |     cfsm-builder.cc
  5 |     cnn.cc
  6 |     conv.cc
  7 |     deep-lstm.cc
  8 |     devices.cc
  9 |     dict.cc
 10 |     dim.cc
 11 |     exec.cc
 12 |     expr.cc
 13 |     fast-lstm.cc
 14 |     grad-check.cc
 15 |     graph.cc
 16 |     gru.cc
 17 |     hsm-builder.cc
 18 |     init.cc
 19 |     lstm.cc
 20 |     mem.cc
 21 |     model.cc
 22 |     mp.cc
 23 |     nodes.cc
 24 |     nodes-common.cc
 25 |     param-nodes.cc
 26 |     rnn.cc
 27 |     rnn-state-machine.cc
 28 |     saxe-init.cc
 29 |     shadow-params.cc
 30 |     tensor.cc
 31 |     training.cc
 32 | )
 33 | 
 34 | # Headers:
 35 | set(cnn_library_HDRS
 36 |     aligned-mem-pool.h
 37 |     cfsm-builder.h
 38 |     c2w.h
 39 |     cnn.h
 40 |     conv.h
 41 |     cuda.h
 42 |     devices.h
 43 |     dict.h
 44 |     dim.h
 45 |     exec.h
 46 |     expr.h
 47 |     fast-lstm.h
 48 |     functors.h
 49 |     gpu-kernels.h
 50 |     gpu-ops.h
 51 |     graph.h
 52 |     gru.h
 53 |     hsm-builder.h
 54 |     init.h
 55 |     lstm.h
 56 |     mem.h
 57 |     model.h
 58 |     mp.h
 59 |     nodes.h
 60 |     param-nodes.h
 61 |     random.h
 62 |     rnn-state-machine.h
 63 |     rnn.h
 64 |     saxe-init.h
 65 |     shadow-params.h
 66 |     simd-functors.h
 67 |     tensor.h
 68 |     timing.h
 69 |     training.h
 70 | )
 71 | 
 72 | if(WITH_CUDA_BACKEND)
 73 |   list(APPEND cnn_library_SRCS
 74 |        cuda.cc)
 75 | endif(WITH_CUDA_BACKEND)
 76 | 
 77 | file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc)
 78 | 
 79 | #foreach(test_src ${TEST_SRCS})
 80 |   #Extract the filename without an extension (NAME_WE)
 81 | #  get_filename_component(testName ${test_src} NAME_WE)
 82 | 
 83 |   #Add compile target
 84 | #  add_executable(${testName} ${test_src})
 85 | 
 86 |   #link to Boost libraries AND your targets and dependencies
 87 | #  target_link_libraries(${testName} cnn ${LIBS})
 88 | 
 89 | #  set_target_properties(${testName} PROPERTIES
 90 | #      RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin)
 91 | 
 92 |   #Finally add it to test execution -
 93 |   #Notice the WORKING_DIRECTORY and COMMAND
 94 | #  add_test(NAME ${testName}
 95 | #     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin
 96 | #     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin/${testName} )
 97 | #endforeach(test_src)
 98 | 
 99 | # actual target:
100 | add_library(cnn STATIC ${cnn_library_SRCS} ${cnn_library_HDRS})
101 | target_link_libraries(cnn ${LIBS})
102 | if(WITH_CUDA_BACKEND)
103 | 	add_library(gcnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS})
104 | 	target_link_libraries(gcnn_shared ${LIBS})
105 | else()
106 | 	add_library(cnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS})
107 | 	target_link_libraries(cnn_shared ${LIBS})
108 | endif(WITH_CUDA_BACKEND)
109 | #add_library(cnn ${cnn_library_SRCS} ${cnn_library_HDRS} ${LIBS})
110 | if(WITH_CUDA_BACKEND)
111 |   set(CUDA_SEPARABLE_COMPILATION ON)
112 |   list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-O2;-DVERBOSE;-Xcompiler;-fpic")
113 |   SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
114 |   cuda_add_library(cnncuda STATIC gpu-ops.cu)
115 |   cuda_add_library(cnncuda_shared SHARED gpu-ops.cu)
116 | endif(WITH_CUDA_BACKEND)
117 | 
118 | install(FILES ${cnn_library_HDRS} DESTINATION include/cnn)
119 | install(TARGETS cnn DESTINATION lib)
120 | 
121 | # target_compile_features(cnn PRIVATE cxx_range_for)
122 | 
123 | 


--------------------------------------------------------------------------------
/cnn/aligned-mem-pool.cc:
--------------------------------------------------------------------------------
1 | #include "aligned-mem-pool.h"
2 | 


--------------------------------------------------------------------------------
/cnn/aligned-mem-pool.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_ALIGNED_MEM_POOL_H
 2 | #define CNN_ALIGNED_MEM_POOL_H
 3 | 
 4 | #include <iostream>
 5 | #include "cnn/mem.h"
 6 | 
 7 | namespace cnn {
 8 | 
 9 | class AlignedMemoryPool {
10 |  public:
11 |   explicit AlignedMemoryPool(size_t cap, MemAllocator* a) : a(a) {
12 |     sys_alloc(cap);
13 |     zero_all();
14 |   }
15 | 
16 |   void* allocate(size_t n) {
17 |     auto rounded_n = a->round_up_align(n);
18 |     if (rounded_n + used > capacity) {
19 |       std::cerr << "cnn is out of memory, try increasing with --cnn-mem\n";
20 |       abort();
21 |     }
22 |     void* res = static_cast<char*>(mem) + used;
23 |     used += rounded_n;
24 |     return res;
25 |   }
26 |   void free() {
27 |     //std::cerr << "freeing " << used << " bytes\n";
28 |     used = 0;
29 |   }
30 |   // zeros out the amount of allocations
31 |   void zero_allocated_memory() {
32 |     if (used == 0) return;
33 |     a->zero(mem, used);
34 |   }
35 | 
36 |   bool is_shared() {
37 |     return shared;
38 |   }
39 |  private:
40 |   void sys_alloc(size_t cap) {
41 |     capacity = a->round_up_align(cap);
42 |     //std::cerr << "Allocating " << capacity << " ...\n";
43 |     mem = a->malloc(capacity);
44 |     if (!mem) { std::cerr << "Failed to allocate " << capacity << std::endl; abort(); }
45 |     used = 0;
46 |   }
47 |   void zero_all() {
48 |     a->zero(mem, capacity);
49 |   }
50 |   size_t capacity;
51 |   size_t used;
52 |   bool shared;
53 |   MemAllocator* a;
54 |   void* mem;
55 | };
56 | 
57 | } // namespace cnn
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/cnn/c2w.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_C2W_H_
 2 | #define CNN_C2W_H_
 3 | 
 4 | #include <vector>
 5 | #include <map>
 6 | 
 7 | #include "cnn/cnn.h"
 8 | #include "cnn/model.h"
 9 | #include "cnn/lstm.h"
10 | 
11 | namespace cnn {
12 | 
13 | // computes a representation of a word by reading characters
14 | // one at a time
15 | struct C2WBuilder {
16 |   LSTMBuilder fc2w;
17 |   LSTMBuilder rc2w;
18 |   LookupParameters* p_lookup;
19 |   std::vector<VariableIndex> words;
20 |   std::map<int, VariableIndex> wordid2vi;
21 |   explicit C2WBuilder(int vocab_size,
22 |                       unsigned layers,
23 |                       unsigned input_dim,
24 |                       unsigned hidden_dim,
25 |                       Model* m) :
26 |       fc2w(layers, input_dim, hidden_dim, m),
27 |       rc2w(layers, input_dim, hidden_dim, m),
28 |       p_lookup(m->add_lookup_parameters(vocab_size, {input_dim})) {
29 |   }
30 |   void new_graph(ComputationGraph* cg) {
31 |     words.clear();
32 |     fc2w.new_graph(cg);
33 |     rc2w.new_graph(cg);
34 |   }
35 |   // compute a composed representation of a word out of characters
36 |   // wordid should be a unique index for each word *type* in the graph being built
37 |   VariableIndex add_word(int word_id, const std::vector<int>& chars, ComputationGraph* cg) {
38 |     auto it = wordid2vi.find(word_id);
39 |     if (it == wordid2vi.end()) {
40 |       fc2w.start_new_sequence(cg);
41 |       rc2w.start_new_sequence(cg);
42 |       std::vector<VariableIndex> ins(chars.size());
43 |       std::map<int, VariableIndex> c2i;
44 |       for (unsigned i = 0; i < ins.size(); ++i) {
45 |         VariableIndex& v = c2i[chars[i]];
46 |         if (!v) v = cg->add_lookup(p_lookup, chars[i]);
47 |         ins[i] = v;
48 |         fc2w.add_input(v, cg);
49 |       }
50 |       for (int i = ins.size() - 1; i >= 0; --i)
51 |         rc2w.add_input(ins[i], cg);
52 |       VariableIndex i_concat = cg->add_function<Concatenate>({fc2w.back(), rc2w.back()});
53 |       it = wordid2vi.insert(std::make_pair(word_id, i_concat)).first;
54 |     }
55 |     return it->second;
56 |   }
57 | };
58 | 
59 | } // namespace cnn
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/cnn/cfsm-builder.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_CFSMBUILDER_H
 2 | #define CNN_CFSMBUILDER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include "cnn/cnn.h"
 7 | #include "cnn/expr.h"
 8 | #include "cnn/dict.h"
 9 | 
10 | namespace cnn {
11 | 
12 | struct Parameters;
13 | 
14 | class FactoredSoftmaxBuilder {
15 | public:
16 |   // call this once per ComputationGraph
17 |   virtual void new_graph(ComputationGraph& cg) = 0;
18 | 
19 |   // -log(p(c | rep) * p(w | c, rep))
20 |   virtual expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx) = 0;
21 | 
22 |   // samples a word from p(w,c | rep)
23 |   virtual unsigned sample(const expr::Expression& rep) = 0;
24 | };
25 | 
26 | class NonFactoredSoftmaxBuilder : public FactoredSoftmaxBuilder {
27 | public:
28 |   NonFactoredSoftmaxBuilder(unsigned rep_dim, unsigned vocab_size, Model* model);
29 |   void new_graph(ComputationGraph& cg);
30 |   expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx);
31 |   unsigned sample(const expr::Expression& rep);
32 | private:
33 |   Parameters* p_w;
34 |   Parameters* p_b;
35 |   expr::Expression w;
36 |   expr::Expression b;
37 |   ComputationGraph* pcg;
38 | };
39 | 
40 | // helps with implementation of hierarchical softmax
41 | // read a file with lines of the following format
42 | // CLASSID   word    [freq]
43 | class ClassFactoredSoftmaxBuilder : public FactoredSoftmaxBuilder {
44 |  public:
45 |   ClassFactoredSoftmaxBuilder(unsigned rep_dim,
46 |                               const std::string& cluster_file,
47 |                               Dict* word_dict,
48 |                               Model* model);
49 | 
50 |   void new_graph(ComputationGraph& cg);
51 |   expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx);
52 |   unsigned sample(const expr::Expression& rep);
53 | 
54 |  private:
55 |   void ReadClusterFile(const std::string& cluster_file, Dict* word_dict);
56 |   Dict cdict;
57 |   std::vector<int> widx2cidx; // will be -1 if not present
58 |   std::vector<unsigned> widx2cwidx; // word index to word index inside of cluster
59 |   std::vector<std::vector<unsigned>> cidx2words;
60 |   std::vector<bool> singleton_cluster; // does cluster contain a single word type?
61 | 
62 |   // parameters
63 |   Parameters* p_r2c;
64 |   Parameters* p_cbias;
65 |   std::vector<Parameters*> p_rc2ws;     // len = number of classes
66 |   std::vector<Parameters*> p_rcwbiases; // len = number of classes
67 | 
68 |   // Expressions for current graph
69 |   inline expr::Expression& get_rc2w(unsigned cluster_idx) {
70 |     expr::Expression& e = rc2ws[cluster_idx];
71 |     if (!e.pg)
72 |       e = expr::parameter(*pcg, p_rc2ws[cluster_idx]);
73 |     return e;
74 |   }
75 |   inline expr::Expression& get_rc2wbias(unsigned cluster_idx) {
76 |     expr::Expression& e = rc2biases[cluster_idx];
77 |     if (!e.pg)
78 |       e = expr::parameter(*pcg, p_rcwbiases[cluster_idx]);
79 |     return e;
80 |   }
81 |   ComputationGraph* pcg;
82 |   expr::Expression r2c;
83 |   expr::Expression cbias;
84 |   std::vector<expr::Expression> rc2ws;
85 |   std::vector<expr::Expression> rc2biases;
86 | };
87 | 
88 | }  // namespace cnn
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/cnn/cnn-helper.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_HELPER_H_
 2 | #define CNN_HELPER_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | /// helper functions
 7 | 
 8 | namespace cnn {
 9 | 
10 | /**
11 |     this fix a compilation problem in cygwin
12 | */
13 | #if defined(__CYGWIN__)
14 |   template <typename T>
15 |     inline std::string to_string(T value)
16 |     {
17 |       std::ostringstream os;
18 |       os << value;
19 |       return os.str();
20 |     }
21 | #endif
22 | 
23 | } // namespace cnn
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/cnn/conv.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_CONV_H_
 2 | #define CNN_CONV_H_
 3 | 
 4 | #include "cnn/cnn.h"
 5 | 
 6 | namespace cnn {
 7 | 
 8 | struct AddVectorToAllColumns : public Node {
 9 |   explicit AddVectorToAllColumns(const std::initializer_list<VariableIndex>& a) : Node(a) {}
10 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
11 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
12 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
13 |   void backward_impl(const std::vector<const Tensor*>& xs,
14 |                 const Tensor& fx,
15 |                 const Tensor& dEdf,
16 |                 unsigned i,
17 |                 Tensor& dEdxi) const override;
18 | };
19 | 
20 | struct KMaxPooling : public Node {
21 |   explicit KMaxPooling(const std::initializer_list<VariableIndex>& a, unsigned k = 1) : Node(a), k(k) {}
22 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
23 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
24 |   size_t aux_storage_size() const override;
25 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
26 |   void backward_impl(const std::vector<const Tensor*>& xs,
27 |                 const Tensor& fx,
28 |                 const Tensor& dEdf,
29 |                 unsigned i,
30 |                 Tensor& dEdxi) const override;
31 |   unsigned k;
32 | };
33 | 
34 | struct FoldRows : public Node {
35 |   explicit FoldRows(const std::initializer_list<VariableIndex>& a, unsigned nrows) : Node(a), nrows(nrows) {}
36 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
37 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
38 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
39 |   void backward_impl(const std::vector<const Tensor*>& xs,
40 |                 const Tensor& fx,
41 |                 const Tensor& dEdf,
42 |                 unsigned i,
43 |                 Tensor& dEdxi) const override;
44 |   unsigned nrows;
45 | };
46 | 
47 | // y = x_1 *conv x_2
48 | // x_1 \in R^{d x s} (input)
49 | // x_2 \in R^{d x m} (filter)
50 | struct Conv1DNarrow : public Node {
51 |   explicit Conv1DNarrow(const std::initializer_list<VariableIndex>& a) : Node(a) {}
52 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
53 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
54 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
55 |   void backward_impl(const std::vector<const Tensor*>& xs,
56 |                 const Tensor& fx,
57 |                 const Tensor& dEdf,
58 |                 unsigned i,
59 |                 Tensor& dEdxi) const override;
60 | };
61 | 
62 | // y = x_1 *conv x_2
63 | // x_1 \in R^{d x s} (input)
64 | // x_2 \in R^{d x m} (filter)
65 | struct Conv1DWide : public Node {
66 |   explicit Conv1DWide(const std::initializer_list<VariableIndex>& a) : Node(a) {}
67 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
68 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
69 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
70 |   void backward_impl(const std::vector<const Tensor*>& xs,
71 |                 const Tensor& fx,
72 |                 const Tensor& dEdf,
73 |                 unsigned i,
74 |                 Tensor& dEdxi) const override;
75 | };
76 | 
77 | } // namespace cnn
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/cnn/cuda.cc:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <vector>
  3 | #include <algorithm>
  4 | 
  5 | #include "cnn/cnn.h"
  6 | #include "cnn/cuda.h"
  7 | 
  8 | using namespace std;
  9 | 
 10 | namespace cnn {
 11 | 
 12 | cublasHandle_t cublas_handle;
 13 | 
 14 | static void RemoveArgs(int& argc, char**& argv, int& argi, int n) {
 15 |   for (int i = argi + n; i < argc; ++i)
 16 |     argv[i - n] = argv[i];
 17 |   argc -= n;
 18 |   assert(argc >= 0);
 19 | }
 20 | 
 21 | #define MAX_GPUS 256
 22 | 
 23 | vector<Device*> Initialize_GPU(int& argc, char**& argv) {
 24 |   int nDevices;
 25 |   CUDA_CHECK(cudaGetDeviceCount(&nDevices));
 26 |   if (nDevices < 1) {
 27 |     cerr << "[cnn] No GPUs found, recompile without DENABLE_CUDA=1\n";
 28 |     throw std::runtime_error("No GPUs found but CNN compiled with CUDA support.");
 29 |   }
 30 |   // logic: no flags, you get 1 GPU
 31 |   // or you request a certain number of GPUs explicitly
 32 |   // or you request the device ids
 33 |   int requested_gpus = -1;
 34 |   vector<int> gpu_mask(MAX_GPUS);
 35 |   int argi = 1;
 36 |   bool ngpus_requested = false;
 37 |   bool ids_requested = false;
 38 |   for( ;argi < argc; ++argi) {
 39 |     string arg = argv[argi];
 40 |     if (arg == "--cnn_gpus" || arg == "--cnn-gpus") {
 41 |       if ((argi + 1) > argc) {
 42 |         cerr << "[cnn] --cnn-gpus expects an argument (number of GPUs to use)\n";
 43 |         abort();
 44 |       } else {
 45 |         if (ngpus_requested) {
 46 |           cerr << "Multiple instances of --cnn-gpus" << endl; abort();
 47 |         }
 48 |         ngpus_requested = true;
 49 |         string a2 = argv[argi+1];
 50 |         istringstream c(a2); c >> requested_gpus;
 51 |         RemoveArgs(argc, argv, argi, 2);
 52 |       }
 53 |     } else if (arg == "--cnn_gpu_ids" || arg == "--cnn-gpu-ids") {
 54 |       if ((argi + 1) > argc) {
 55 |         cerr << "[cnn] --cnn-gpu-ids expects an argument (comma separated list of physical GPU ids to use)\n";
 56 |         abort();
 57 |       } else {
 58 |         string a2 = argv[argi+1];
 59 |         if (ids_requested) {
 60 |           cerr << "Multiple instances of --cnn-gpu-ids" << endl; abort();
 61 |         }
 62 |         ids_requested = true;
 63 |         if (a2.size() % 2 != 1) {
 64 |           cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort();
 65 |         }
 66 |         for (unsigned i = 0; i < a2.size(); ++i) {
 67 |           if ((i % 2 == 0 && (a2[i] < '0' || a2[i] > '9')) ||
 68 |               (i % 2 == 1 && a2[i] != ',')) {
 69 |             cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort();
 70 |           }
 71 |           if (i % 2 == 0) {
 72 |             int gpu_id = a2[i] - '0';
 73 |             if (gpu_id >= nDevices) {
 74 |               cerr << "You requested GPU id " << gpu_id << " but system only reports up to " << nDevices << endl;
 75 |               abort();
 76 |             }
 77 |             if (gpu_id >= MAX_GPUS) { cerr << "Raise MAX_GPUS\n"; abort(); }
 78 |             gpu_mask[gpu_id]++;
 79 |             requested_gpus++;
 80 |             if (gpu_mask[gpu_id] != 1) {
 81 |               cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort();
 82 |             }
 83 |           }
 84 |         }
 85 |         RemoveArgs(argc, argv, argi, 2);
 86 |       }
 87 |     }
 88 |   }
 89 |   if (ids_requested && ngpus_requested) {
 90 |     cerr << "Use only --cnn_gpus or --cnn_gpu_ids, not both\n";
 91 |     abort();
 92 |   }
 93 |   if (ngpus_requested || requested_gpus == -1) {
 94 |     if (requested_gpus == -1) requested_gpus = 1;
 95 |     cerr << "Request for " << requested_gpus << " GPU" << (requested_gpus == 1 ? "" : "s") << " ...\n";
 96 |     for (int i = 0; i < MAX_GPUS; ++i) gpu_mask[i] = 1;
 97 |   } else if (ids_requested) {
 98 |     requested_gpus++;
 99 |     cerr << "[cnn] Request for " << requested_gpus << " specific GPU" << (requested_gpus == 1 ? "" : "s") << " ...\n";
100 |   }
101 | 
102 |   vector<Device*> gpudevices;
103 |   if (requested_gpus == 0) return gpudevices;
104 |   if (requested_gpus > nDevices) {
105 |     cerr << "You requested " << requested_gpus << " GPUs but system only reports " << nDevices << endl;
106 |     abort();
107 |   }
108 | 
109 |   // after all that, requested_gpus is the number of GPUs to reserve
110 |   // we now pick the ones that are both requested by the user or have
111 |   // the most memory free
112 | 
113 |   vector<size_t> gpu_free_mem(MAX_GPUS, 0);
114 |   vector<int> gpus(MAX_GPUS, 0);
115 |   for (int i = 0; i < MAX_GPUS; ++i) gpus[i] = i;
116 |   size_t free_bytes, total_bytes, max_free = 0;
117 |   int selected = 0;
118 |   for (int i = 0; i < nDevices; i++) {
119 |     if (!gpu_mask[i]) continue;
120 |     cudaDeviceProp prop;
121 |     CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
122 |     cerr << "[cnn] Device Number: " << i << endl;
123 |     cerr << "[cnn]   Device name: " << prop.name << endl;
124 |     cerr << "[cnn]   Memory Clock Rate (KHz): " << prop.memoryClockRate << endl;
125 |     cerr << "[cnn]   Memory Bus Width (bits): " << prop.memoryBusWidth << endl;
126 |     cerr << "[cnn]   Peak Memory Bandwidth (GB/s): " << (2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6) << endl;
127 |     if (!prop.unifiedAddressing) {
128 |       cerr << "[cnn] GPU does not support unified addressing.\n";
129 |       abort();
130 |     }
131 |     CUDA_CHECK(cudaSetDevice(i));
132 |     CUDA_CHECK(cudaMemGetInfo( &free_bytes, &total_bytes ));
133 |     CUDA_CHECK(cudaDeviceReset());
134 |     cerr << "[cnn]   Memory Free (GB): " << free_bytes/1.0e9 << "/" << total_bytes/1.0e9 << endl;
135 |     cerr << "[cnn]" << endl;
136 |     gpu_free_mem[i] = free_bytes;
137 |   }
138 |   stable_sort(gpus.begin(), gpus.end(), [&](int a, int b) -> bool { return gpu_free_mem[a] > gpu_free_mem[b]; });
139 |   gpus.resize(requested_gpus);
140 |   cerr << "[cnn] Device(s) selected:";
141 |   for (int i = 0; i < requested_gpus; ++i) {
142 |     cerr << ' ' << gpus[i];
143 |     int mb = 512;
144 |     Device* d = new Device_GPU(mb, gpus[i]);
145 |     gpudevices.push_back(d);
146 |   }
147 |   cerr << endl;
148 | 
149 |   // eventually kill the global handle
150 |   CUDA_CHECK(cudaSetDevice(gpus[0]));
151 |   CUBLAS_CHECK(cublasCreate(&cublas_handle));
152 |   CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE));
153 |   return gpudevices;
154 | }
155 | 
156 | } // namespace cnn
157 | 


--------------------------------------------------------------------------------
/cnn/cuda.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_CUDA_H
 2 | #define CNN_CUDA_H
 3 | #if HAVE_CUDA
 4 | 
 5 | #include <vector>
 6 | #include <cassert>
 7 | #include <utility>
 8 | #include <stdexcept>
 9 | #include <cuda.h>
10 | #include <cuda_runtime.h>
11 | #include <cublas_v2.h>
12 | 
13 | #include "cnn/except.h"
14 | 
15 | #define CUDA_CHECK(stmt) do {                              \
16 |     cudaError_t err = stmt;                                \
17 |     if (err != cudaSuccess) {                              \
18 |       std::cerr << "CUDA failure in " << #stmt << std::endl\
19 |                 << cudaGetErrorString(err) << std::endl;   \
20 |       throw cnn::cuda_exception(#stmt);                    \
21 |     }                                                      \
22 |   } while(0)
23 | 
24 | #define CUBLAS_CHECK(stmt) do {                            \
25 |     cublasStatus_t stat = stmt;                            \
26 |     if (stat != CUBLAS_STATUS_SUCCESS) {                   \
27 |       std::cerr << "CUBLAS failure in " << #stmt           \
28 |                 << std::endl << stat << std::endl;         \
29 |       throw cnn::cuda_exception(#stmt);                    \
30 |     }                                                      \
31 |   } while(0)
32 | 
33 | namespace cnn {
34 | 
35 | struct Device;
36 | 
37 | inline std::pair<int,int> SizeToBlockThreadPair(int n) {
38 |   assert(n);
39 |   int logn;
40 |   asm("\tbsr %1, %0\n"
41 |       : "=r"(logn)
42 |       : "r" (n-1));
43 |   logn = logn > 9 ? 9 : (logn < 4 ? 4 : logn);
44 |   ++logn;
45 |   int threads = 1 << logn;
46 |   int blocks = (n + threads - 1) >> logn;
47 |   blocks = blocks > 128 ? 128 : blocks;
48 |   return std::make_pair(blocks, threads);
49 | }
50 | 
51 | std::vector<Device*> Initialize_GPU(int& argc, char**& argv);
52 | extern cublasHandle_t cublas_handle;
53 | 
54 | } // namespace cnn
55 | 
56 | #endif
57 | #endif
58 | 


--------------------------------------------------------------------------------
/cnn/deep-lstm.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/deep-lstm.h"
  2 | 
  3 | #include <string>
  4 | #include <cassert>
  5 | #include <vector>
  6 | #include <iostream>
  7 | 
  8 | #include "cnn/nodes.h"
  9 | 
 10 | using namespace std;
 11 | using namespace cnn::expr;
 12 | 
 13 | namespace cnn {
 14 | 
 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC };
 16 | 
 17 | DeepLSTMBuilder::DeepLSTMBuilder(unsigned layers,
 18 |                          unsigned input_dim,
 19 |                          unsigned hidden_dim,
 20 |                          Model* model) : layers(layers) {
 21 |   unsigned layer_input_dim = input_dim;
 22 |   for (unsigned i = 0; i < layers; ++i) {
 23 |     // i
 24 |     Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim});
 25 |     Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim});
 26 |     Parameters* p_c2i = model->add_parameters({hidden_dim, hidden_dim});
 27 |     Parameters* p_bi = model->add_parameters({hidden_dim});
 28 | 
 29 |     // o
 30 |     Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim});
 31 |     Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim});
 32 |     Parameters* p_c2o = model->add_parameters({hidden_dim, hidden_dim});
 33 |     Parameters* p_bo = model->add_parameters({hidden_dim});
 34 | 
 35 |     // c
 36 |     Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim});
 37 |     Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim});
 38 |     Parameters* p_bc = model->add_parameters({hidden_dim});
 39 |     layer_input_dim = hidden_dim + input_dim;  // output (hidden) from 1st layer is input to next
 40 | 
 41 |     vector<Parameters*> ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc};
 42 |     params.push_back(ps);
 43 |   }  // layers
 44 | }
 45 | 
 46 | void DeepLSTMBuilder::new_graph_impl(ComputationGraph& cg){
 47 |   param_vars.clear();
 48 | 
 49 |   for (unsigned i = 0; i < layers; ++i){
 50 |     auto& p = params[i];
 51 | 
 52 |     //i
 53 |     Expression i_x2i = parameter(cg,p[X2I]);
 54 |     Expression i_h2i = parameter(cg,p[H2I]);
 55 |     Expression i_c2i = parameter(cg,p[C2I]);
 56 |     Expression i_bi = parameter(cg,p[BI]);
 57 |     //o
 58 |     Expression i_x2o = parameter(cg,p[X2O]);
 59 |     Expression i_h2o = parameter(cg,p[H2O]);
 60 |     Expression i_c2o = parameter(cg,p[C2O]);
 61 |     Expression i_bo = parameter(cg,p[BO]);
 62 |     //c
 63 |     Expression i_x2c = parameter(cg,p[X2C]);
 64 |     Expression i_h2c = parameter(cg,p[H2C]);
 65 |     Expression i_bc = parameter(cg,p[BC]);
 66 | 
 67 |     vector<Expression> vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc};
 68 |     param_vars.push_back(vars);
 69 |   }
 70 | }
 71 | 
 72 | // layout: 0..layers = c
 73 | //         layers+1..2*layers = h
 74 | void DeepLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
 75 |   h.clear();
 76 |   c.clear();
 77 |   if (hinit.size() > 0) {
 78 |     assert(layers*2 == hinit.size());
 79 |     h0.resize(layers);
 80 |     c0.resize(layers);
 81 |     for (unsigned i = 0; i < layers; ++i) {
 82 |       c0[i] = hinit[i];
 83 |       h0[i] = hinit[i + layers];
 84 |     }
 85 |     has_initial_state = true;
 86 |   } else {
 87 |     has_initial_state = false;
 88 |   }
 89 | }
 90 | 
 91 | Expression DeepLSTMBuilder::add_input_impl(int prev, const Expression& x) {
 92 |   h.push_back(vector<Expression>(layers));
 93 |   c.push_back(vector<Expression>(layers));
 94 |   o.push_back(Expression());
 95 |   vector<Expression>& ht = h.back();
 96 |   vector<Expression>& ct = c.back();
 97 |   Expression& ot = o.back();
 98 |   Expression in = x;
 99 |   vector<Expression> cc(layers);
100 |   for (unsigned i = 0; i < layers; ++i) {
101 |     if (i > 0)
102 |       in = concatenate({in, x});
103 |     const vector<Expression>& vars = param_vars[i];
104 |     Expression i_h_tm1, i_c_tm1;
105 |     bool has_prev_state = (prev >= 0 || has_initial_state);
106 |     if (prev < 0) {
107 |       if (has_initial_state) {
108 |         // intial value for h and c at timestep 0 in layer i
109 |         // defaults to zero matrix input if not set in add_parameter_edges
110 |         i_h_tm1 = h0[i];
111 |         i_c_tm1 = c0[i];
112 |       }
113 |     } else {  // t > 0
114 |       i_h_tm1 = h[prev][i];
115 |       i_c_tm1 = c[prev][i];
116 |     }
117 |     // input
118 |     Expression i_ait;
119 |     if (has_prev_state)
120 | //      i_ait = vars[BI] + vars[X2I] * in + vars[H2I]*i_h_tm1 + vars[C2I] * i_c_tm1;
121 |       i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1});
122 |     else
123 | //      i_ait = vars[BI] + vars[X2I] * in;
124 |       i_ait = affine_transform({vars[BI], vars[X2I], in});
125 |     Expression i_it = logistic(i_ait);
126 |     // forget
127 |     Expression i_ft = 1.f - i_it;
128 |     // write memory cell
129 |     Expression i_awt;
130 |     if (has_prev_state)
131 | //      i_awt = vars[BC] + vars[X2C] * in + vars[H2C]*i_h_tm1;
132 |       i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1});
133 |     else
134 | //      i_awt = vars[BC] + vars[X2C] * in;
135 |       i_awt = affine_transform({vars[BC], vars[X2C], in});
136 |     Expression i_wt = tanh(i_awt);
137 |     // output
138 |     if (has_prev_state) {
139 |       Expression i_nwt = cwise_multiply(i_it,i_wt);
140 |       Expression i_crt = cwise_multiply(i_ft,i_c_tm1);
141 |       ct[i] = i_crt + i_nwt;
142 |     } else {
143 |       ct[i] = cwise_multiply(i_it,i_wt);
144 |     }
145 | 
146 |     Expression i_aot;
147 |     if (has_prev_state)
148 | //      i_aot = vars[BO] + vars[X2O] * in + vars[H2O] * i_h_tm1 + vars[C2O] * ct[i];
149 |       i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]});
150 |     else
151 | //      i_aot = vars[BO] + vars[X2O] * in;
152 |       i_aot = affine_transform({vars[BO], vars[X2O], in});
153 |     Expression i_ot = logistic(i_aot);
154 |     Expression ph_t = tanh(ct[i]);
155 |     in = ht[i] = cwise_multiply(i_ot,ph_t);
156 |     cc[i] = in;
157 |   }
158 |   ot = concatenate(cc);
159 |   return ot;
160 | }
161 | 
162 | } // namespace cnn
163 | 


--------------------------------------------------------------------------------
/cnn/deep-lstm.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_DEEP_LSTM_H_
 2 | #define CNN_DEEP_LSTM_H_
 3 | 
 4 | #include "cnn/cnn.h"
 5 | #include "cnn/rnn.h"
 6 | #include "cnn/expr.h"
 7 | 
 8 | using namespace cnn::expr;
 9 | 
10 | namespace cnn {
11 | 
12 | class Model;
13 | 
14 | struct DeepLSTMBuilder : public RNNBuilder {
15 |   DeepLSTMBuilder() = default;
16 |   explicit DeepLSTMBuilder(unsigned layers,
17 |                            unsigned input_dim,
18 |                            unsigned hidden_dim,
19 |                            Model* model);
20 | 
21 |   Expression back() const override { return h.back().back(); }
22 |   std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
23 |   std::vector<Expression> final_s() const override {
24 |     std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
25 |     for(auto my_h : final_h()) ret.push_back(my_h);
26 |     return ret;
27 |   }
28 |  protected:
29 |   void new_graph_impl(ComputationGraph& cg) override;
30 |   void start_new_sequence_impl(const std::vector<Expression>& h0) override;
31 |   Expression add_input_impl(int prev, const Expression& x) override;
32 | 
33 |  public:
34 |   // first index is layer, then ...
35 |   std::vector<std::vector<Parameters*>> params;
36 | 
37 |   // first index is layer, then ...
38 |   std::vector<std::vector<Expression>> param_vars;
39 | 
40 |   // first index is time, second is layer
41 |   std::vector<std::vector<Expression>> h, c;
42 |   std::vector<Expression> o;
43 | 
44 |   // initial values of h and c at each layer
45 |   // - both default to zero matrix input
46 |   bool has_initial_state; // if this is false, treat h0 and c0 as 0
47 |   std::vector<Expression> h0;
48 |   std::vector<Expression> c0;
49 |   unsigned layers;
50 | };
51 | 
52 | } // namespace cnn
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/cnn/devices.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/devices.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | #include "cnn/cuda.h"
 6 | 
 7 | using namespace std;
 8 | 
 9 | namespace cnn {
10 | 
11 | Device::~Device() {}
12 | 
13 | #if HAVE_CUDA
14 | Device_GPU::Device_GPU(int mb, int device_id) :
15 |     Device(DeviceType::GPU, &gpu_mem), cuda_device_id(device_id), gpu_mem(device_id) {
16 |   CUDA_CHECK(cudaSetDevice(device_id));
17 |   CUBLAS_CHECK(cublasCreate(&cublas_handle));
18 |   CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE));
19 |   kSCALAR_MINUSONE = (float*)gpu_mem.malloc(sizeof(float));
20 |   kSCALAR_ONE = (float*)gpu_mem.malloc(sizeof(float));
21 |   kSCALAR_ZERO = (float*)gpu_mem.malloc(sizeof(float));
22 |   float minusone = -1;
23 |   CUDA_CHECK(cudaMemcpyAsync(kSCALAR_MINUSONE, &minusone, sizeof(float), cudaMemcpyHostToDevice));
24 |   float one = 1;
25 |   CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ONE, &one, sizeof(float), cudaMemcpyHostToDevice));
26 |   float zero = 0;
27 |   CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ZERO, &zero, sizeof(float), cudaMemcpyHostToDevice));
28 | 
29 |   // this is the big memory allocation
30 |         
31 |   size_t byte_count = (size_t)mb << 20;
32 |   fxs = new AlignedMemoryPool(byte_count, mem); // memory for node values
33 |   dEdfs = new AlignedMemoryPool(byte_count, mem); // memory for node gradients
34 |   ps = new AlignedMemoryPool(byte_count, mem); // memory for parameters
35 | 
36 | }
37 | 
38 | Device_GPU::~Device_GPU() {}
39 | #endif
40 | 
41 | // TODO we should be able to configure this carefully with a configuration
42 | // script
43 | // CPU -- 0 params
44 | //     -- 50mb fxs
45 | //     -- 50mb dEdfx
46 | Device_CPU::Device_CPU(int mb, bool shared) :
47 |     Device(DeviceType::CPU, &cpu_mem), shmem(mem) {
48 |   if (shared) shmem = new SharedAllocator();
49 |   kSCALAR_MINUSONE = (float*) mem->malloc(sizeof(float));
50 |   *kSCALAR_MINUSONE = -1;
51 |   kSCALAR_ONE = (float*) mem->malloc(sizeof(float));
52 |   *kSCALAR_ONE = 1;
53 |   kSCALAR_ZERO = (float*) mem->malloc(sizeof(float));
54 |   *kSCALAR_ZERO = 0;
55 | 
56 |   // this is the big memory allocation: the pools
57 |         
58 |   size_t byte_count = (size_t)mb << 20;
59 |   fxs = new AlignedMemoryPool(byte_count, mem); // memory for node values
60 |   dEdfs = new AlignedMemoryPool(byte_count, mem); // memory for node gradients
61 |   ps = new AlignedMemoryPool(byte_count, mem); // memory for parameters
62 | 
63 | }
64 | 
65 | Device_CPU::~Device_CPU() {}
66 | 
67 | } // namespace cnn
68 | 


--------------------------------------------------------------------------------
/cnn/devices.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_DEVICES_H
 2 | #define CNN_DEVICES_H
 3 | 
 4 | #include <string>
 5 | #include "cnn/aligned-mem-pool.h"
 6 | #include "cnn/cuda.h"
 7 | 
 8 | namespace cnn {
 9 | 
10 | enum class DeviceType {CPU, GPU};
11 | 
12 | class Device {
13 |  protected:
14 |   Device(DeviceType t, MemAllocator* m) : type(t), mem(m) {}
15 |   Device(const Device&) = delete;
16 |   Device& operator=(const Device&) = delete;
17 |   virtual ~Device();
18 |  public:
19 |   DeviceType type;
20 |   MemAllocator* mem;
21 |   AlignedMemoryPool* fxs;
22 |   AlignedMemoryPool* dEdfs;
23 |   AlignedMemoryPool* ps;
24 |   float* kSCALAR_MINUSONE;
25 |   float* kSCALAR_ONE;
26 |   float* kSCALAR_ZERO;
27 |   std::string name;
28 | };
29 | 
30 | #if HAVE_CUDA
31 | class Device_GPU : public Device {
32 |  public:
33 |   explicit Device_GPU(int mb, int device_id);
34 |   ~Device_GPU();
35 |   int cuda_device_id;
36 |   cublasHandle_t cublas_handle;
37 |   GPUAllocator gpu_mem;
38 | };
39 | #endif
40 | 
41 | class Device_CPU : public Device {
42 |  public:
43 |   explicit Device_CPU(int mb, bool shared);
44 |   ~Device_CPU();
45 |   CPUAllocator cpu_mem;
46 |   MemAllocator* shmem;
47 | };
48 | 
49 | } // namespace cnn
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/cnn/dict.cc:
--------------------------------------------------------------------------------
 1 | #include "dict.h"
 2 | 
 3 | #include <string>
 4 | #include <vector>
 5 | #include <sstream>
 6 | 
 7 | using namespace std;
 8 | 
 9 | namespace cnn {
10 | 
11 | std::vector<int> ReadSentence(const std::string& line, Dict* sd) {
12 |   std::istringstream in(line);
13 |   std::string word;
14 |   std::vector<int> res;
15 |   while(in) {
16 |     in >> word;
17 |     if (!in || word.empty()) break;
18 |     res.push_back(sd->Convert(word));
19 |   }
20 |   return res;
21 | }
22 | 
23 | void ReadSentencePair(const std::string& line, std::vector<int>* s, Dict* sd, std::vector<int>* t, Dict* td) {
24 |   std::istringstream in(line);
25 |   std::string word;
26 |   std::string sep = "|||";
27 |   Dict* d = sd;
28 |   std::vector<int>* v = s;
29 |   while(in) {
30 |     in >> word;
31 |     if (!in) break;
32 |     if (word == sep) { d = td; v = t; continue; }
33 |     v->push_back(d->Convert(word));
34 |   }
35 | }
36 | 
37 | } // namespace cnn
38 | 
39 | 


--------------------------------------------------------------------------------
/cnn/dict.h:
--------------------------------------------------------------------------------
  1 | #ifndef CNN_DICT_H_
  2 | #define CNN_DICT_H_
  3 | 
  4 | #include <cassert>
  5 | #include <unordered_map>
  6 | #include <string>
  7 | #include <vector>
  8 | #include <iostream>
  9 | #include <stdexcept>
 10 | 
 11 | #include <boost/version.hpp>
 12 | #include <boost/serialization/vector.hpp>
 13 | #include <boost/serialization/string.hpp>
 14 | #if BOOST_VERSION >= 105600
 15 | #include <boost/serialization/split_member.hpp>
 16 | #include <boost/serialization/unordered_map.hpp>
 17 | #endif
 18 | 
 19 | namespace cnn {
 20 | 
 21 | class Dict {
 22 | typedef std::unordered_map<std::string, int> Map;
 23 | public:
 24 |   Dict() : frozen(false), map_unk(false), unk_id(-1) {
 25 |   }
 26 | 
 27 |   inline unsigned size() const { return words_.size(); }
 28 | 
 29 |   inline bool Contains(const std::string& words) {
 30 |     return !(d_.find(words) == d_.end());
 31 |   }
 32 | 
 33 |   void Freeze() { frozen = true; }
 34 |   bool is_frozen() { return frozen; }
 35 | 
 36 |   inline int Convert(const std::string& word) {
 37 |     auto i = d_.find(word);
 38 |     if (i == d_.end()) {
 39 |       if (frozen) {
 40 |         if (map_unk) {
 41 |           return unk_id;
 42 |         }
 43 |         else {
 44 |          std::cerr << map_unk << std::endl;
 45 |           std::cerr << "Unknown word encountered: " << word << std::endl;
 46 |           throw std::runtime_error("Unknown word encountered in frozen dictionary: " + word);
 47 |         }
 48 |       }
 49 |       words_.push_back(word);
 50 |       return d_[word] = words_.size() - 1;
 51 |     } else {
 52 |       return i->second;
 53 |     }
 54 |   }
 55 |   
 56 |   inline const std::string& Convert(const int& id) const {
 57 |     assert(id < (int)words_.size());
 58 |     return words_[id];
 59 |   }
 60 |   
 61 |   void SetUnk(const std::string& word) {
 62 |     if (!frozen)
 63 |       throw std::runtime_error("Please call SetUnk() only after dictionary is frozen");
 64 |     if (map_unk)
 65 |       throw std::runtime_error("Set UNK more than one time");
 66 |   
 67 |     // temporarily unfrozen the dictionary to allow the add of the UNK
 68 |     frozen = false;
 69 |     unk_id = Convert(word);
 70 |     frozen = true;
 71 |   
 72 |     map_unk = true;
 73 |   }
 74 |   
 75 |   void clear() { words_.clear(); d_.clear(); }
 76 | 
 77 | private:
 78 |   bool frozen;
 79 |   bool map_unk; // if true, map unknown word to unk_id
 80 |   int unk_id; 
 81 |   std::vector<std::string> words_;
 82 |   Map d_;
 83 | 
 84 |   friend class boost::serialization::access;
 85 | #if BOOST_VERSION >= 105600
 86 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 87 |     ar & frozen;
 88 |     ar & map_unk;
 89 |     ar & unk_id;
 90 |     ar & words_;
 91 |     ar & d_;
 92 |   }
 93 | #else
 94 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 95 |     throw std::invalid_argument("Serializing dictionaries is only supported on versions of boost 1.56 or higher");
 96 |   }
 97 | #endif
 98 | };
 99 | 
100 | std::vector<int> ReadSentence(const std::string& line, Dict* sd);
101 | void ReadSentencePair(const std::string& line, std::vector<int>* s, Dict* sd, std::vector<int>* t, Dict* td);
102 | 
103 | } // namespace cnn
104 | 
105 | #endif
106 | 


--------------------------------------------------------------------------------
/cnn/dim.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/dim.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | using namespace std;
 6 | 
 7 | namespace cnn {
 8 | 
 9 | ostream& operator<<(ostream& os, const Dim& d) {
10 |   os << '{';
11 |   for (unsigned i = 0; i < d.nd; ++i) {
12 |     if (i) os << ',';
13 |     os << d.d[i];
14 |   }
15 |   if(d.bd != 1) os << 'X' << d.bd;
16 |   return os << '}';
17 | }
18 | 
19 | ostream& operator<<(ostream& os, const vector<Dim>& ds) {
20 |   os << '[';
21 |   for (unsigned i = 0; i < ds.size(); ++i)
22 |     os << (i ? " " : "") << ds[i];
23 |   return os << ']';
24 | }
25 | 
26 | } // namespace cnn
27 | 
28 | 


--------------------------------------------------------------------------------
/cnn/dim.h:
--------------------------------------------------------------------------------
  1 | #ifndef CNN_DIM_H
  2 | #define CNN_DIM_H
  3 | 
  4 | #include <cassert>
  5 | #include <initializer_list>
  6 | #include <type_traits>
  7 | #include <stdexcept>
  8 | #include <iosfwd>
  9 | #include <cstring>
 10 | #include <vector>
 11 | 
 12 | #define CNN_MAX_TENSOR_DIM 7
 13 | 
 14 | namespace boost { namespace serialization { class access; } }
 15 | 
 16 | namespace cnn {
 17 | 
 18 | struct Dim {
 19 |   Dim() : nd(), bd(1) {}
 20 |   // explicit Dim(unsigned int m) : nd(1), bd(1) { d[0] = m; }
 21 |   // TODO: The constructors for dimensions w/ and w/o batches is not intuitive.
 22 |   //       can this be fixed in some way?
 23 |   // Dim(unsigned int m, unsigned int n) : nd(2), bd(1) { d[0] = m; d[1] = n; }
 24 |   Dim(std::initializer_list<unsigned int> x) : nd(), bd(1) {
 25 |     for(auto v : x) d[nd++] = v;
 26 |   }
 27 |   Dim(std::initializer_list<unsigned int> x, unsigned int b) : nd(), bd(b) {
 28 |     for(auto v : x) d[nd++] = v;
 29 |   }
 30 |   Dim(const std::vector<long> & x) : nd(), bd(1) {
 31 |      for(auto v : x) d[nd++] = v;
 32 |   }
 33 |   Dim(const std::vector<long> & x, unsigned int b) : nd(), bd(b) {
 34 |      for(auto v : x) d[nd++] = v;
 35 |   }
 36 |   inline unsigned int size() const {
 37 |     return batch_size() * bd;
 38 |   }
 39 |   inline unsigned int batch_size() const {
 40 |     unsigned int p = 1;
 41 |     for (unsigned int i = 0; i < nd; ++i) p *= d[i];
 42 |     return p;
 43 |   }
 44 |   inline unsigned int sum_dims() const {
 45 |     unsigned int p = 0;
 46 |     for (unsigned int i = 0; i < nd; ++i) p += d[i];
 47 |     return p;
 48 |   }
 49 |   inline Dim truncate() const {
 50 |     Dim r = *this;
 51 |     unsigned int m = 1;
 52 |     unsigned int s = size();
 53 |     for (unsigned int i = 1; i < s; ++i)
 54 |       if (size(i) > 1) m = i + 1;
 55 |     r.resize(m);
 56 |     return r;
 57 |   }
 58 |   inline Dim single_batch() const {
 59 |     Dim r = *this;
 60 |     r.bd = 1;
 61 |     return r;
 62 |   }
 63 |   inline void resize(unsigned int i) { nd = i; }
 64 |   inline unsigned int ndims() const { return nd; }
 65 |   inline unsigned int rows() const { return d[0]; }
 66 |   inline unsigned int cols() const { return nd > 1 ? d[1] : 1; }
 67 |   inline unsigned int batch_elems() const { return bd; }
 68 |   inline void set(unsigned int i, unsigned int s) { assert(i < nd); assert(s > 0); d[i] = s; }
 69 |   inline unsigned int operator[](unsigned int i) const { return i < nd ? d[i] : 1; }
 70 |   inline unsigned int size(unsigned int i) const { return (*this)[i]; }
 71 |   inline Dim transpose() const {
 72 |     if (nd == 1) { return Dim({1, d[0]}, bd); }
 73 |     else if (nd == 2) { return Dim({d[1], d[0]}, bd); }
 74 |     throw std::invalid_argument("Cannot transpose Dim object with more than 2 dimensions");
 75 |   }
 76 |   unsigned int d[CNN_MAX_TENSOR_DIM];
 77 |   unsigned int nd;
 78 |   unsigned int bd;
 79 |  private:
 80 |   friend class boost::serialization::access;
 81 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 82 |     ar & nd;
 83 |     ar & d;
 84 |   }
 85 | };
 86 | 
 87 | //static_assert(std::is_trivially_copyable<Dim>::value, "Dim must be trivially copyable");
 88 | 
 89 | inline bool operator==(const Dim& a, const Dim& b) {
 90 |   if (a.nd != b.nd || a.bd != b.bd) return false;
 91 |   return std::memcmp(a.d, b.d, a.nd) == 0;
 92 | }
 93 | 
 94 | inline bool operator!=(const Dim& a, const Dim& b) { return !(a == b); }
 95 | 
 96 | std::ostream& operator<<(std::ostream& os, const Dim& d);
 97 | std::ostream& operator<<(std::ostream& os, const std::vector<Dim>& ds);
 98 | 
 99 | } // namespace cnn
100 | 
101 | #endif
102 | 


--------------------------------------------------------------------------------
/cnn/except.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_EXCEPT_H_
 2 | #define CNN_EXCEPT_H_
 3 | 
 4 | #include <stdexcept>
 5 | 
 6 | namespace cnn {
 7 | 
 8 | // if CNN exhausts its memory pool
 9 | class out_of_memory : public std::runtime_error {
10 |  public:
11 |   out_of_memory(const std::string& what_arg) : runtime_error(what_arg) {}
12 | };
13 | 
14 | // this error occurs when some logic is
15 | // attempted to execut on a CUDA backend but the
16 | // logic has not been implemented.
17 | class cuda_not_implemented : public std::logic_error {
18 |  public:
19 |   cuda_not_implemented(const std::string& what_arg) : logic_error(what_arg) {}
20 | };
21 | 
22 | // this is thrown when cuda returns an error (bad arguments, memory, state, etc)
23 | class cuda_exception : public std::runtime_error {
24 |  public:
25 |   cuda_exception(const std::string& what_arg) : runtime_error(what_arg) {}
26 | };
27 | 
28 | } // namespace cnn
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/cnn/exec.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/exec.h"
  2 | 
  3 | #include "cnn/param-nodes.h"
  4 | 
  5 | using namespace std;
  6 | 
  7 | namespace cnn {
  8 | 
  9 | ExecutionEngine::~ExecutionEngine() {}
 10 | 
 11 | void SimpleExecutionEngine::invalidate() {
 12 |   num_nodes_evaluated = 0;
 13 | }
 14 | 
 15 | const Tensor& SimpleExecutionEngine::forward() { 
 16 |   const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1);
 17 |   return forward(node_max_index);
 18 | }
 19 | 
 20 | const Tensor& SimpleExecutionEngine::forward(VariableIndex i) {
 21 |   invalidate();
 22 |   return incremental_forward(i);
 23 | }
 24 | 
 25 | const Tensor& SimpleExecutionEngine::get_value(VariableIndex i) {
 26 |   assert(i < cg.nodes.size());
 27 |   if (i >= num_nodes_evaluated) {
 28 |     incremental_forward();
 29 |   }
 30 |   return nfxs[i];
 31 | }
 32 | 
 33 | const Tensor& SimpleExecutionEngine::incremental_forward() {
 34 |   const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1);
 35 |   return incremental_forward(node_max_index);
 36 | }
 37 | 
 38 | const Tensor& SimpleExecutionEngine::incremental_forward(VariableIndex i) {
 39 |   assert(i < cg.nodes.size());
 40 | 
 41 |   // free any old memory if this is a new CG
 42 |   if (num_nodes_evaluated == 0) fxs->free();
 43 | 
 44 |   if (i >= num_nodes_evaluated) {
 45 |     nfxs.resize(i + 1);
 46 | 
 47 |     //vector<string> dummy(5, "x");
 48 |     vector<const Tensor*> xs(16);
 49 |     for (; num_nodes_evaluated <= i; ++num_nodes_evaluated) {
 50 |       const Node* node = cg.nodes[num_nodes_evaluated];
 51 |       xs.resize(node->arity());
 52 |       unsigned ai = 0;
 53 |       for (VariableIndex arg : node->args) {
 54 |         xs[ai] = &nfxs[arg];
 55 |         ++ai;
 56 |       }
 57 |       nfxs[num_nodes_evaluated].d = node->dim;
 58 |       nfxs[num_nodes_evaluated].v = static_cast<float*>(fxs->allocate(node->dim.size() * sizeof(float)));
 59 |       if (nfxs[num_nodes_evaluated].v == nullptr) {
 60 |         cerr << "out of memory\n";
 61 |         abort();
 62 |       }
 63 |       void* aux_mem = nullptr;
 64 |       size_t aux_size = node->aux_storage_size();
 65 |       if (aux_size) {
 66 |         aux_mem = fxs->allocate(aux_size);
 67 |         if (!aux_mem) {
 68 |           cerr << "aux out of memory\n";
 69 |           abort();
 70 |         }
 71 |       }
 72 |       node->aux_mem = aux_mem;
 73 |       node->forward(xs, nfxs[num_nodes_evaluated]);
 74 |     }
 75 |   }
 76 |   return nfxs[i];
 77 | }
 78 | 
 79 | void SimpleExecutionEngine::backward() {
 80 |   assert(nfxs.size() == cg.nodes.size());
 81 |   backward((VariableIndex)(cg.nodes.size()-1));
 82 | }
 83 | 
 84 | // TODO what is happening with parameter nodes if from_where > param_node_id ?
 85 | void SimpleExecutionEngine::backward(VariableIndex from_where) {
 86 |   assert(from_where+1 <= nfxs.size());
 87 |   assert(from_where+1 <= cg.nodes.size());
 88 |   if (nfxs[from_where].d.size() != 1) {
 89 |     cerr << "backward() called on non-scalar node.\n";
 90 |     abort();
 91 |   }
 92 | 
 93 |   const unsigned num_nodes = from_where+1;
 94 |   ndEdfs.resize(num_nodes);
 95 |   dEdfs->free();
 96 |   for (unsigned i = 0; i < num_nodes; ++i) {
 97 |     const auto dim = nfxs[i].d;
 98 |     ndEdfs[i].d = dim;
 99 |     ndEdfs[i].v = static_cast<float*>(dEdfs->allocate(dim.size() * sizeof(float)));
100 |     if (!ndEdfs[i].v) {
101 |       cerr << "out of memory while attempting to allocate space for derivatives\n";
102 |       abort();
103 |     }
104 |   }
105 |   dEdfs->zero_allocated_memory();
106 |   // initialize dE/dE = 1
107 |   ndEdfs.back().v = kSCALAR_ONE;
108 | 
109 |   // here we find constant paths to avoid doing extra work
110 |   // by default, a node is constant unless
111 |   //   1) it is a parameter node
112 |   //   2) it depends on a non-constant node
113 |   // (thus, functions of constants and inputs end up being
114 |   //  false in this computation)
115 |   vector<bool> needs_derivative(num_nodes, false);
116 |   for (auto i : cg.parameter_nodes)
117 |     needs_derivative[i] = true;
118 | 
119 |   for (unsigned ni = 0; ni < num_nodes; ++ni) {
120 |     bool nd = needs_derivative[ni];
121 |     for (auto arg : cg.nodes[ni]->args)
122 |       nd |= needs_derivative[arg];
123 |     needs_derivative[ni] = nd;
124 |   }
125 | 
126 |   // loop in reverse topological order
127 |   // consider only nodes that participate in the computation.
128 |   vector<bool> in_computation(num_nodes, false);
129 |   in_computation[num_nodes - 1] = true;
130 |   vector<const Tensor*> xs;
131 |   for (int i = num_nodes - 1; i >= 0; --i) {
132 |     if (!in_computation[i]) continue;
133 |     const Node* node = cg.nodes[i];
134 |     xs.resize(node->arity());
135 |     unsigned ai = 0;
136 |     for (VariableIndex arg : node->args) {
137 |       in_computation[arg] = true;
138 |       xs[ai] = &nfxs[arg];
139 |       ++ai;
140 |     }
141 |     ai = 0;
142 |     for (VariableIndex arg : node->args) {
143 |       if (needs_derivative[arg]) {
144 |         node->backward(xs, nfxs[i], ndEdfs[i], ai, ndEdfs[arg]);
145 |       }
146 |       ++ai;
147 |     }
148 |   }
149 | 
150 |   // accumulate gradients into parameters
151 |   // this is simpler than you might find in some other frameworks
152 |   // since we assume parameters come into the graph as a "function"
153 |   // that returns the current value of the parameters
154 |   for (VariableIndex i : cg.parameter_nodes)
155 |     static_cast<ParameterNodeBase*>(cg.nodes[i])->accumulate_grad(ndEdfs[i]);
156 | }
157 | 
158 | } // namespace cnn
159 | 


--------------------------------------------------------------------------------
/cnn/exec.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_EXEC_H
 2 | #define CNN_EXEC_H
 3 | 
 4 | #include "cnn/cnn.h"
 5 | 
 6 | namespace cnn {
 7 | 
 8 | class ExecutionEngine {
 9 |  public:
10 |   virtual ~ExecutionEngine();
11 |   virtual void invalidate() = 0;
12 |   virtual const Tensor& forward() = 0;
13 |   virtual const Tensor& forward(VariableIndex i) = 0;
14 |   virtual const Tensor& incremental_forward() = 0;  // if you want to add nodes and evaluate just the new parts
15 |   virtual const Tensor& incremental_forward(VariableIndex i) = 0;
16 |   virtual const Tensor& get_value(VariableIndex i) = 0;
17 |   virtual void backward() = 0;
18 |   virtual void backward(VariableIndex i) = 0;
19 |  protected:
20 |   explicit ExecutionEngine(const ComputationGraph& cg) : cg(cg) {}
21 |   const ComputationGraph& cg;
22 | };
23 | 
24 | class SimpleExecutionEngine : public ExecutionEngine {
25 |  public:
26 |   explicit SimpleExecutionEngine(const ComputationGraph& cg) : ExecutionEngine(cg) {}
27 |   void invalidate() override;
28 |   const Tensor& forward() override;
29 |   const Tensor& forward(VariableIndex i) override;
30 |   const Tensor& incremental_forward() override;  // if you want to add nodes and evaluate just the new parts
31 |   const Tensor& incremental_forward(VariableIndex i) override;
32 |   const Tensor& get_value(VariableIndex i) override;
33 |   void backward() override;
34 |   void backward(VariableIndex i) override;
35 |  private:
36 |   std::vector<Tensor> nfxs;
37 |   std::vector<Tensor> ndEdfs;
38 |   VariableIndex num_nodes_evaluated;
39 | };
40 | 
41 | } // namespace cnn
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/cnn/fast-lstm.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_FAST_LSTM_H_
 2 | #define CNN_FAST_LSTM_H_
 3 | 
 4 | #include "cnn/cnn.h"
 5 | #include "cnn/rnn.h"
 6 | #include "cnn/expr.h"
 7 | 
 8 | using namespace cnn::expr;
 9 | 
10 | namespace cnn {
11 | 
12 | class Model;
13 | 
14 | /*
15 | FastLSTM replaces the matrices from cell to other units, by diagonal matrices.
16 | */
17 | struct FastLSTMBuilder : public RNNBuilder {
18 |   FastLSTMBuilder() = default;
19 |   explicit FastLSTMBuilder(unsigned layers,
20 |                            unsigned input_dim,
21 |                            unsigned hidden_dim,
22 |                            Model* model);
23 | 
24 |   Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); }
25 |   std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
26 |   std::vector<Expression> final_s() const override {
27 |     std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
28 |     for(auto my_h : final_h()) ret.push_back(my_h);
29 |     return ret;
30 |   }
31 |   unsigned num_h0_components() const override { return 2 * layers; }
32 | 
33 |   std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
34 |   std::vector<Expression> get_s(RNNPointer i) const override {
35 |     std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
36 |     for(auto my_h : get_h(i)) ret.push_back(my_h);
37 |     return ret;
38 |   }
39 | 
40 |   void copy(const RNNBuilder & params) override;
41 |  protected:
42 |   void new_graph_impl(ComputationGraph& cg) override;
43 |   void start_new_sequence_impl(const std::vector<Expression>& h0) override;
44 |   Expression add_input_impl(int prev, const Expression& x) override;
45 | 
46 |  public:
47 |   // first index is layer, then ...
48 |   std::vector<std::vector<Parameters*>> params;
49 | 
50 |   // first index is layer, then ...
51 |   std::vector<std::vector<Expression>> param_vars;
52 | 
53 |   // first index is time, second is layer
54 |   std::vector<std::vector<Expression>> h, c;
55 | 
56 |   // initial values of h and c at each layer
57 |   // - both default to zero matrix input
58 |   bool has_initial_state; // if this is false, treat h0 and c0 as 0
59 |   std::vector<Expression> h0;
60 |   std::vector<Expression> c0;
61 |   unsigned layers;
62 | };
63 | 
64 | } // namespace cnn
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/cnn/gpu-kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_GPU_KERNELS_H
 2 | #define CNN_GPU_KERNELS_H
 3 | 
 4 | #include "cnn/cuda.h"
 5 | 
 6 | namespace cnn {
 7 | namespace gpu {
 8 | 
 9 | template<typename Func>
10 | __global__ void unaryExprKernel(int n, const float* x, float* y, Func func) {
11 |   int i = threadIdx.x + blockIdx.x * blockDim.x;
12 |   while (i < n) {
13 |     y[i] = func(x[i]);
14 |     i += gridDim.x * blockDim.x;
15 |   }
16 | }
17 | 
18 | template<typename Func>
19 | __global__ void accUnaryExprKernel(int n, const float* x, float* y, Func func) {
20 |   int i = threadIdx.x + blockIdx.x * blockDim.x;
21 |   while (i < n) {
22 |     y[i] += func(x[i]);
23 |     i += gridDim.x * blockDim.x;
24 |   }
25 | }
26 | 
27 | template<typename Func>
28 | __global__ void binaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) {
29 |   int i = threadIdx.x + blockIdx.x * blockDim.x;
30 |   while (i < n) {
31 |     y[i] = func(x0[i], x1[i]);
32 |     i += gridDim.x * blockDim.x;
33 |   }
34 | }
35 | 
36 | template<typename Func>
37 | __global__ void accBinaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) {
38 |   int i = threadIdx.x + blockIdx.x * blockDim.x;
39 |   while (i < n) {
40 |     y[i] += func(x0[i], x1[i]);
41 |     i += gridDim.x * blockDim.x;
42 |   }
43 | }
44 | 
45 | template<typename Func>
46 | __global__ void slowReduceKernel(int n, const float* x0, const float* x1, float* y, Func func) {
47 |   float ty = 0;
48 |   // THIS IS BAD - FIX THIS TO MAKE IT FAST
49 |   for (int i = 0; i < n; ++i)
50 |     ty += func(x0[i], x1[i]);
51 |   y[0] = ty;
52 | }
53 | 
54 | } // namespace gpu
55 | } // namespace cnn
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/cnn/gpu-ops.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_GPU_OPS_H
 2 | #define CNN_GPU_OPS_H
 3 | 
 4 | namespace cnn {
 5 | namespace gpu {
 6 | 
 7 | void vpairwise_rank_loss(int n, float margin, const float* xgood, const float* xbad, float* y);
 8 | void vpairwise_rank_loss_backward(int n, bool d_wrt_correct, const float* fx, const float* dEdf, float* dEdx);
 9 | void vcwise_product(int n, const float* x0, const float* x1, float* y);
10 | void vcwise_product_backward(int n, const float* dEdy, const float* x_other, float* dEdx);
11 | void vconstant_minusx(int n, float c, const float* x, float* y);
12 | void vnegate(int n, const float* x, float* y);
13 | void vnegate_backward(int n, const float* dEdf, float* dEdx);
14 | void vrelu(int n, const float* x, float* y);
15 | void vrelu_backward(int n, const float* fx, const float* dEdf, float* dEdx);
16 | void vtanh(int n, const float* x, float* y);
17 | void vtanh_backward(int n, const float* fx, const float* dEdf, float* dEdx);
18 | void vlog(int n, const float* x, float* y);
19 | void vlog_backward(int n, const float* fx, const float* dEdf, float* dEdx);
20 | void vlogistic(int n, const float* x, float* y);
21 | void vlogistic_backward(int n, const float* fx, const float* dEdf, float* dEdx);
22 | void l2_norm_reducer(int n, const float* x0, float* y, bool square, bool accumulate);
23 | void sqeucdist(int n, const float* x0, const float *x1, float* y);
24 | void sqeucdist_backward(int n, const float* dEdy, const float* x0, const float* x1, float* dEdx, int i);
25 | void softmax(int n, const float* x0, float* y);
26 | void softmax_backward(int n, const float* x0, const float* dEdf, float* dEdx);
27 | void pnlsoftmax(int n, int elem_idx, const float* x0, float* y, float* logz);
28 | void pnlsoftmax_backward(int n, int elem_idx, const float* x0, const float* dEdf, const float* logz, float* dEdx);
29 | 
30 | void sgd_update(int n, const float* g, float* x, float scale, float lambda);
31 | 
32 | } // namespace gpu
33 | } // namespace cnn
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/cnn/grad-check.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/grad-check.h"
 2 | 
 3 | #include <cassert>
 4 | #include <iostream>
 5 | 
 6 | #include "cnn/model.h"
 7 | #include "cnn/cnn.h"
 8 | #include "cnn/tensor.h"
 9 | 
10 | using namespace std;
11 | 
12 | namespace cnn {
13 | 
14 | bool CheckGrad(Model& m, ComputationGraph& g, int verbosity) {
15 |   // Clear the parameters first
16 |   const vector<Parameters*>& params = m.parameters_list();
17 |   const vector<LookupParameters*>& lookup_params = m.lookup_parameters_list();
18 |   for (auto pp : params)
19 |     pp->clear();
20 |   for (auto pp : lookup_params)
21 |     pp->clear();
22 | 
23 |   // Perform forward and backward steps
24 |   float alpha = 5e-4;
25 |   g.forward();
26 |   g.backward();
27 | 
28 |   // Check
29 |   bool flag = false, curr_flag = false;
30 |   for (auto pp : params) {
31 |     if(verbosity > 1)
32 |       cerr << endl << "PARAMETERS " << pp << endl;
33 |     Parameters& p = *pp;
34 |     size_t ts = p.dim.size();
35 |     for (size_t i = 0; i < ts; ++i) {
36 |       float old = TensorTools::AccessElement(p.values, i);
37 |       TensorTools::SetElement(p.values, i, old - alpha);
38 |       float E_left = as_scalar(g.forward());
39 |       TensorTools::SetElement(p.values, i, old + alpha);
40 |       float E_right = as_scalar(g.forward());
41 |       TensorTools::SetElement(p.values, i, old);
42 |       float g = (E_right - E_left) / (2 * alpha);
43 |       float g_act = TensorTools::AccessElement(p.g, i);
44 |       float f = fabs(g - g_act);
45 |       float m = max(fabs(g), fabs(g_act));
46 |       if (f > 0.1 && m > 0.f) f /= m;
47 |       if (f > 0.1 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } }
48 |       if(verbosity + (curr_flag ? 1 : 0) > 1) {
49 |         cerr << g_act << ' ' << g << endl;
50 |         curr_flag = false;
51 |       }
52 |     }
53 |   }
54 | 
55 |   for (auto pp : lookup_params) {
56 |     if(verbosity > 1)
57 |       cerr << endl << "LOOKUP PARAMETERS " << pp << endl;
58 |     LookupParameters& p = *pp;
59 |     size_t ts = p.dim.size();
60 |     for (unsigned j : p.non_zero_grads) {
61 |       if(verbosity > 1)
62 |         cerr << "OBJECT=" << j << endl;
63 |       Tensor& v = p.values[j];
64 |       Tensor& ag = p.grads[j];
65 |       for (size_t i = 0; i < ts; ++i) {
66 |         float old = TensorTools::AccessElement(v, i);
67 |         TensorTools::SetElement(v, i, old - alpha);
68 |         float E_left = as_scalar(g.forward());
69 |         TensorTools::SetElement(v, i, old + alpha);
70 |         float E_right = as_scalar(g.forward());
71 |         TensorTools::SetElement(v, i, old);
72 |         float g = (E_right - E_left) / (2 * alpha);
73 |         float g_act = TensorTools::AccessElement(ag, i);
74 |         float f = fabs(g - g_act);
75 |         float m = max(fabs(g), fabs(g_act));
76 |         if (f > 0.1 && m > 0.f) f /= m;
77 |         if (f > 0.1 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } }
78 |         if(verbosity + (curr_flag ? 1 : 0) > 1) {
79 |           cerr << g_act << ' ' << g << endl;
80 |           curr_flag = false;
81 |         }
82 |       }
83 |     }
84 |   }
85 | 
86 |   if (flag) {
87 |     if (verbosity > 1)
88 |       cerr << endl << "*** GRADIENT CHECK FAILED ***" << endl;
89 |   } else {
90 |     if (verbosity > 0)
91 |       cerr << endl << "GRADIENT CHECK PASSED" << endl;
92 |   }
93 |   return !flag;
94 | }
95 | 
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/cnn/grad-check.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_GRAD_CHECK_H
 2 | #define CNN_GRAD_CHECK_H
 3 | 
 4 | namespace cnn {
 5 | 
 6 | class Model;
 7 | struct ComputationGraph;
 8 | 
 9 | // verbosity is zero for silence, one for only printing errors, two for everything
10 | bool CheckGrad(Model& m, ComputationGraph& g, int verbosity = 1);
11 | 
12 | } // namespace cnn
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/cnn/graph.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/graph.h"
 2 | #include "cnn/cnn.h"
 3 | #include <vector>
 4 | #include "cnn/cnn-helper.h"
 5 | 
 6 | using namespace std;
 7 | 
 8 | namespace cnn {
 9 | 
10 | void GraphOptimize(ComputationGraph* cg) {
11 |   // topo sort
12 |   vector<Node*>& nodes = cg->nodes;
13 |   vector<int> longest_paths(nodes.size());
14 |   for (unsigned i = 0; i < nodes.size(); ++i) {
15 |     auto& v = *nodes[i];  // vertex v_i
16 |     auto& lp = longest_paths[i]; // distance to v_i
17 |     for (auto e : v.args) {
18 |       int weight = 0;
19 |       if (v.args.size() == 7) weight = 1;
20 |       int pte = longest_paths[e] + weight;
21 |       if (pte > lp) lp = pte;
22 |     }
23 |   }
24 |   for (unsigned i = 0; i < nodes.size(); ++i) {
25 |     vector<string> x;
26 |     for (auto e : nodes[i]->args) {
27 |       x.push_back(string("x") + to_string(e));
28 |     }
29 |     cerr << "LONGEST PATH: " << longest_paths[i] << "\tx" << i << " = " << nodes[i]->as_string(x) << endl;
30 |   }
31 |   abort();// DEBUGGING
32 | }
33 | 
34 | } // namespaiice cnn
35 | 


--------------------------------------------------------------------------------
/cnn/graph.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_GRAPH_H
 2 | #define CNN_GRAPH_H
 3 | 
 4 | namespace cnn {
 5 | struct ComputationGraph;
 6 | void GraphOptimize(ComputationGraph* cg);
 7 | } // namespace cnn
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/cnn/gru.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/gru.h"
  2 | 
  3 | #include <string>
  4 | #include <cassert>
  5 | #include <vector>
  6 | #include <iostream>
  7 | 
  8 | #include "cnn/nodes.h"
  9 | #include "cnn/training.h"
 10 | 
 11 | using namespace std;
 12 | 
 13 | namespace cnn {
 14 | 
 15 | enum { X2Z, H2Z, BZ, X2R, H2R, BR, X2H, H2H, BH };
 16 | 
 17 | GRUBuilder::GRUBuilder(unsigned layers,
 18 |                        unsigned input_dim,
 19 |                        unsigned hidden_dim,
 20 |                        Model* model) : hidden_dim(hidden_dim), layers(layers) {
 21 |   unsigned layer_input_dim = input_dim;
 22 |   for (unsigned i = 0; i < layers; ++i) {
 23 |     // z
 24 |     Parameters* p_x2z = model->add_parameters({hidden_dim, layer_input_dim});
 25 |     Parameters* p_h2z = model->add_parameters({hidden_dim, hidden_dim});
 26 |     Parameters* p_bz = model->add_parameters({hidden_dim});
 27 | 
 28 |     // r
 29 |     Parameters* p_x2r = model->add_parameters({hidden_dim, layer_input_dim});
 30 |     Parameters* p_h2r = model->add_parameters({hidden_dim, hidden_dim});
 31 |     Parameters* p_br = model->add_parameters({hidden_dim});
 32 | 
 33 |     // h
 34 |     Parameters* p_x2h = model->add_parameters({hidden_dim, layer_input_dim});
 35 |     Parameters* p_h2h = model->add_parameters({hidden_dim, hidden_dim});
 36 |     Parameters* p_bh = model->add_parameters({hidden_dim});
 37 |     layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
 38 | 
 39 |     vector<Parameters*> ps = {p_x2z, p_h2z, p_bz, p_x2r, p_h2r, p_br, p_x2h, p_h2h, p_bh};
 40 |     params.push_back(ps);
 41 |   }  // layers
 42 | }
 43 | 
 44 | void GRUBuilder::new_graph_impl(ComputationGraph& cg) {
 45 |   param_vars.clear();
 46 |   for (unsigned i = 0; i < layers; ++i) {
 47 |     auto& p = params[i];
 48 | 
 49 |     // z
 50 |     Expression x2z = parameter(cg,p[X2Z]);
 51 |     Expression h2z = parameter(cg,p[H2Z]);
 52 |     Expression bz = parameter(cg,p[BZ]);
 53 | 
 54 |     // r
 55 |     Expression x2r = parameter(cg,p[X2R]);
 56 |     Expression h2r = parameter(cg,p[H2R]);
 57 |     Expression br = parameter(cg,p[BR]);
 58 | 
 59 |     // h
 60 |     Expression x2h = parameter(cg,p[X2H]);
 61 |     Expression h2h = parameter(cg,p[H2H]);
 62 |     Expression bh = parameter(cg,p[BH]);
 63 | 
 64 |     vector<Expression> vars = {x2z, h2z, bz, x2r, h2r, br, x2h, h2h, bh};
 65 |     param_vars.push_back(vars);
 66 |   }
 67 | }
 68 | 
 69 | void GRUBuilder::start_new_sequence_impl(const std::vector<Expression>& h_0) {
 70 |   h.clear();
 71 |   h0 = h_0;
 72 |   if (!h0.empty()) {
 73 |     assert (h0.size() == layers);
 74 |   }
 75 | }
 76 | 
 77 | Expression GRUBuilder::add_input_impl(int prev, const Expression& x) {
 78 |   const bool has_initial_state = (h0.size() > 0);
 79 |   h.push_back(vector<Expression>(layers));
 80 |   vector<Expression>& ht = h.back();
 81 |   Expression in = x;
 82 |   for (unsigned i = 0; i < layers; ++i) {
 83 |     const vector<Expression>& vars = param_vars[i];
 84 |     Expression h_tprev;
 85 |     // prev_zero means that h_tprev should be treated as 0
 86 |     bool prev_zero = false;
 87 |     if (prev >= 0 || has_initial_state) {
 88 |       h_tprev = (prev < 0) ? h0[i] : h[prev][i];
 89 |     } else { prev_zero = true; }
 90 |     // update gate
 91 |     Expression zt;
 92 |     if (prev_zero)
 93 |       zt = affine_transform({vars[BZ], vars[X2Z], in});
 94 |     else
 95 |       zt = affine_transform({vars[BZ], vars[X2Z], in, vars[H2Z], h_tprev});
 96 |     zt = logistic(zt);
 97 |     // forget
 98 |     Expression ft = 1.f - zt;
 99 |     // reset gate
100 |     Expression rt;
101 |     if (prev_zero)
102 |       rt = affine_transform({vars[BR], vars[X2R], in});
103 |     else
104 |       rt = affine_transform({vars[BR], vars[X2R], in, vars[H2R], h_tprev});
105 |     rt = logistic(rt);
106 | 
107 |     // candidate activation
108 |     Expression ct;
109 |     if (prev_zero) {
110 |       ct = affine_transform({vars[BH], vars[X2H], in});
111 |       ct = tanh(ct);
112 |       Expression nwt = cwise_multiply(zt, ct);
113 |       in = ht[i] = nwt;
114 |     } else {
115 |       Expression ght = cwise_multiply(rt, h_tprev);
116 |       ct = affine_transform({vars[BH], vars[X2H], in, vars[H2H], ght});
117 |       ct = tanh(ct);
118 |       Expression nwt = cwise_multiply(zt, ct);
119 |       Expression crt = cwise_multiply(ft, h_tprev);
120 |       in = ht[i] = crt + nwt;
121 |     }
122 |   }
123 |   return ht.back();
124 | }
125 | 
126 | void GRUBuilder::copy(const RNNBuilder & rnn) {
127 |   const GRUBuilder & rnn_gru = (const GRUBuilder&)rnn;
128 |   assert(params.size() == rnn_gru.params.size());
129 |   for(size_t i = 0; i < params.size(); ++i)
130 |       for(size_t j = 0; j < params[i].size(); ++j)
131 |         params[i][j]->copy(*rnn_gru.params[i][j]);
132 | }
133 | 
134 | } // namespace cnn
135 | 


--------------------------------------------------------------------------------
/cnn/gru.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_GRU_H_
 2 | #define CNN_GRU_H_
 3 | 
 4 | #include "cnn/cnn.h"
 5 | #include "cnn/rnn.h"
 6 | 
 7 | namespace cnn {
 8 | 
 9 | class Model;
10 | 
11 | struct GRUBuilder : public RNNBuilder {
12 |   GRUBuilder() = default;
13 |   explicit GRUBuilder(unsigned layers,
14 |                       unsigned input_dim,
15 |                       unsigned hidden_dim,
16 |                       Model* model);
17 |   Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); }
18 |   std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
19 |   std::vector<Expression> final_s() const override { return final_h(); }
20 |   std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
21 |   std::vector<Expression> get_s(RNNPointer i) const override { return get_h(i); }
22 |   unsigned num_h0_components() const override { return layers; }
23 |   void copy(const RNNBuilder & params) override;
24 | 
25 |  protected:
26 |   void new_graph_impl(ComputationGraph& cg) override;
27 |   void start_new_sequence_impl(const std::vector<Expression>& h0) override;
28 |   Expression add_input_impl(int prev, const Expression& x) override;
29 | 
30 |   // first index is layer, then ...
31 |   std::vector<std::vector<Parameters*>> params;
32 | 
33 |   // first index is layer, then ...
34 |   std::vector<std::vector<Expression>> param_vars;
35 | 
36 |   // first index is time, second is layer
37 |   std::vector<std::vector<Expression>> h;
38 | 
39 |   // initial values of h at each layer
40 |   // - default to zero matrix input
41 |   std::vector<Expression> h0;
42 | 
43 |   unsigned hidden_dim;
44 |   unsigned layers;
45 | };
46 | 
47 | } // namespace cnn
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/cnn/hsm-builder.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_HSMBUILDER_H
 2 | #define CNN_HSMBUILDER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <unordered_map>
 7 | #include "cnn/cnn.h"
 8 | #include "cnn/expr.h"
 9 | #include "cnn/dict.h"
10 | #include "cnn/cfsm-builder.h"
11 | 
12 | namespace cnn {
13 | 
14 | struct Parameters;
15 | 
16 | class Cluster {
17 | private:
18 |   std::vector<Cluster*> children;
19 |   std::vector<unsigned> path;
20 |   std::vector<unsigned> terminals;
21 |   std::unordered_map<unsigned, unsigned> word2ind;
22 |   Parameters* p_weights;
23 |   Parameters* p_bias;
24 |   mutable expr::Expression weights;
25 |   mutable expr::Expression bias;
26 |   bool initialized;
27 |   unsigned output_size;
28 | 
29 |   expr::Expression predict(expr::Expression h, ComputationGraph& cg) const;
30 | 
31 | public:
32 |   Cluster();
33 |   Cluster* add_child(unsigned sym);
34 |   void add_word(unsigned word);
35 |   void initialize(unsigned rep_dim, Model* model);
36 | 
37 |   void new_graph(ComputationGraph& cg);
38 |   unsigned sample(expr::Expression h, ComputationGraph& cg) const;
39 |   expr::Expression neg_log_softmax(expr::Expression h, unsigned r, ComputationGraph& cg) const;
40 | 
41 |   unsigned get_index(unsigned word) const;
42 |   unsigned get_word(unsigned index) const;
43 |   unsigned num_children() const;
44 |   const Cluster* get_child(unsigned i) const;
45 |   const std::vector<unsigned>& get_path() const;
46 |   expr::Expression get_weights(ComputationGraph& cg) const;
47 |   expr::Expression get_bias(ComputationGraph& cg) const;
48 | 
49 |   std::string toString() const;
50 | };
51 | 
52 | // helps with implementation of hierarchical softmax
53 | // read a file with lines of the following format
54 | // CLASSID   word    [freq]
55 | class HierarchicalSoftmaxBuilder : public FactoredSoftmaxBuilder {
56 |  public:
57 |   HierarchicalSoftmaxBuilder(unsigned rep_dim,
58 |                               const std::string& cluster_file,
59 |                               Dict* word_dict,
60 |                               Model* model);
61 |   ~HierarchicalSoftmaxBuilder();
62 |   // call this once per ComputationGraph
63 |   void new_graph(ComputationGraph& cg);
64 | 
65 |   // -log(p(c | rep) * p(w | c, rep))
66 |   expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx);
67 | 
68 |   // samples a word from p(w,c | rep)
69 |   unsigned sample(const expr::Expression& rep);
70 | 
71 |  private:
72 |   Cluster* ReadClusterFile(const std::string& cluster_file, Dict* word_dict);
73 |   std::vector<Cluster*> widx2path; // will be NULL if not found
74 |   Dict path_symbols;
75 | 
76 |   ComputationGraph* pcg;
77 |   Cluster* root;
78 | };
79 | 
80 | }  // namespace cnn
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/cnn/init.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/init.h"
  2 | #include "cnn/aligned-mem-pool.h"
  3 | #include "cnn/cnn.h"
  4 | 
  5 | #include <iostream>
  6 | #include <random>
  7 | #include <cmath>
  8 | 
  9 | #if HAVE_CUDA
 10 | #include "cnn/cuda.h"
 11 | #include <device_launch_parameters.h>
 12 | #endif
 13 | 
 14 | using namespace std;
 15 | 
 16 | namespace cnn {
 17 | 
 18 | // these should maybe live in a file called globals.cc or something
 19 | AlignedMemoryPool* fxs = nullptr;
 20 | AlignedMemoryPool* dEdfs = nullptr;
 21 | AlignedMemoryPool* ps = nullptr;
 22 | mt19937* rndeng = nullptr;
 23 | std::vector<Device*> devices;
 24 | Device* default_device = nullptr;
 25 | 
 26 | static void RemoveArgs(int& argc, char**& argv, int& argi, int n) {
 27 |   for (int i = argi + n; i < argc; ++i)
 28 |     argv[i - n] = argv[i];
 29 |   argc -= n;
 30 |   assert(argc >= 0);
 31 | }
 32 | 
 33 | void Initialize(int& argc, char**& argv, unsigned random_seed, bool shared_parameters) {
 34 |   vector<Device*> gpudevices;
 35 | #if HAVE_CUDA
 36 |   cerr << "[cnn] initializing CUDA\n";
 37 |   gpudevices = Initialize_GPU(argc, argv);
 38 | #endif
 39 |   unsigned long num_mb = 512UL;
 40 |   int argi = 1;
 41 |   while(argi < argc) {
 42 |     string arg = argv[argi];
 43 |     if (arg == "--cnn-mem" || arg == "--cnn_mem") {
 44 |       if ((argi + 1) > argc) {
 45 |         cerr << "[cnn] --cnn-mem expects an argument (the memory, in megabytes, to reserve)\n";
 46 |         abort();
 47 |       } else {
 48 |         string a2 = argv[argi+1];
 49 |         istringstream c(a2); c >> num_mb;
 50 |         RemoveArgs(argc, argv, argi, 2);
 51 |       }
 52 |     } else if (arg == "--cnn-seed" || arg == "--cnn_seed") {
 53 |       if ((argi + 1) > argc) {
 54 |         cerr << "[cnn] --cnn-seed expects an argument (the random number seed)\n";
 55 |         abort();
 56 |       } else {
 57 |         string a2 = argv[argi+1];
 58 |         istringstream c(a2); c >> random_seed;
 59 |         RemoveArgs(argc, argv, argi, 2);
 60 |       }
 61 |     } else if (arg.find("--cnn") == 0) {
 62 |       cerr << "[cnn] Bad command line argument: " << arg << endl;
 63 |       abort();
 64 |     } else { break; }
 65 |   }
 66 |   if (random_seed == 0) {
 67 |     random_device rd;
 68 |     random_seed = rd();
 69 |   }
 70 |   cerr << "[cnn] random seed: " << random_seed << endl;
 71 |   rndeng = new mt19937(random_seed);
 72 | 
 73 |   cerr << "[cnn] allocating memory: " << num_mb << "MB\n";
 74 |   devices.push_back(new Device_CPU(num_mb, shared_parameters));
 75 |   int default_index = 0;
 76 |   if (gpudevices.size() > 0) {
 77 |     for (auto gpu : gpudevices)
 78 |       devices.push_back(gpu);
 79 |     default_index++;
 80 |   }
 81 |   default_device = devices[default_index];
 82 | 
 83 |   // TODO these should be accessed through the relevant device and removed here
 84 |   fxs = default_device->fxs;
 85 |   dEdfs = default_device->dEdfs;
 86 |   ps = default_device->ps;
 87 |   kSCALAR_MINUSONE = default_device->kSCALAR_MINUSONE;
 88 |   kSCALAR_ONE = default_device->kSCALAR_ONE;
 89 |   kSCALAR_ZERO = default_device->kSCALAR_ZERO;
 90 |   cerr << "[cnn] memory allocation done.\n";
 91 | }
 92 | 
 93 | void Cleanup() {
 94 |   delete rndeng;
 95 |   delete fxs;
 96 |   delete dEdfs;
 97 |   delete ps;
 98 | }
 99 | 
100 | } // namespace cnn
101 | 
102 | 


--------------------------------------------------------------------------------
/cnn/init.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_EIGEN_INIT_H
 2 | #define CNN_EIGEN_INIT_H
 3 | 
 4 | namespace cnn {
 5 | 
 6 | void Initialize(int& argc, char**& argv, unsigned random_seed = 0, bool shared_parameters = false);
 7 | void Cleanup();
 8 | 
 9 | } // namespace cnn
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/cnn/lstm.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/lstm.h"
  2 | 
  3 | #include <string>
  4 | #include <cassert>
  5 | #include <vector>
  6 | #include <iostream>
  7 | 
  8 | #include "cnn/nodes.h"
  9 | 
 10 | using namespace std;
 11 | using namespace cnn::expr;
 12 | 
 13 | namespace cnn {
 14 | 
 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC };
 16 | 
 17 | LSTMBuilder::LSTMBuilder(unsigned layers,
 18 |                          unsigned input_dim,
 19 |                          unsigned hidden_dim,
 20 |                          Model* model) : layers(layers) {
 21 |   unsigned layer_input_dim = input_dim;
 22 |   for (unsigned i = 0; i < layers; ++i) {
 23 |     // i
 24 |     Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim});
 25 |     Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim});
 26 |     Parameters* p_c2i = model->add_parameters({hidden_dim, hidden_dim});
 27 |     Parameters* p_bi = model->add_parameters({hidden_dim});
 28 | 
 29 |     // o
 30 |     Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim});
 31 |     Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim});
 32 |     Parameters* p_c2o = model->add_parameters({hidden_dim, hidden_dim});
 33 |     Parameters* p_bo = model->add_parameters({hidden_dim});
 34 | 
 35 |     // c
 36 |     Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim});
 37 |     Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim});
 38 |     Parameters* p_bc = model->add_parameters({hidden_dim});
 39 |     layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
 40 | 
 41 |     vector<Parameters*> ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc};
 42 |     params.push_back(ps);
 43 |   }  // layers
 44 |   dropout_rate = 0.0f;
 45 | }
 46 | 
 47 | void LSTMBuilder::new_graph_impl(ComputationGraph& cg){
 48 |   param_vars.clear();
 49 | 
 50 |   for (unsigned i = 0; i < layers; ++i){
 51 |     auto& p = params[i];
 52 | 
 53 |     //i
 54 |     Expression i_x2i = parameter(cg,p[X2I]);
 55 |     Expression i_h2i = parameter(cg,p[H2I]);
 56 |     Expression i_c2i = parameter(cg,p[C2I]);
 57 |     Expression i_bi = parameter(cg,p[BI]);
 58 |     //o
 59 |     Expression i_x2o = parameter(cg,p[X2O]);
 60 |     Expression i_h2o = parameter(cg,p[H2O]);
 61 |     Expression i_c2o = parameter(cg,p[C2O]);
 62 |     Expression i_bo = parameter(cg,p[BO]);
 63 |     //c
 64 |     Expression i_x2c = parameter(cg,p[X2C]);
 65 |     Expression i_h2c = parameter(cg,p[H2C]);
 66 |     Expression i_bc = parameter(cg,p[BC]);
 67 | 
 68 |     vector<Expression> vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc};
 69 |     param_vars.push_back(vars);
 70 |   }
 71 | }
 72 | 
 73 | // layout: 0..layers = c
 74 | //         layers+1..2*layers = h
 75 | void LSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
 76 |   h.clear();
 77 |   c.clear();
 78 |   if (hinit.size() > 0) {
 79 |     assert(layers*2 == hinit.size());
 80 |     h0.resize(layers);
 81 |     c0.resize(layers);
 82 |     for (unsigned i = 0; i < layers; ++i) {
 83 |       c0[i] = hinit[i];
 84 |       h0[i] = hinit[i + layers];
 85 |     }
 86 |     has_initial_state = true;
 87 |   } else {
 88 |     has_initial_state = false;
 89 |   }
 90 | }
 91 | 
 92 | Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) {
 93 |   h.push_back(vector<Expression>(layers));
 94 |   c.push_back(vector<Expression>(layers));
 95 |   vector<Expression>& ht = h.back();
 96 |   vector<Expression>& ct = c.back();
 97 |   Expression in = x;
 98 |   for (unsigned i = 0; i < layers; ++i) {
 99 |     const vector<Expression>& vars = param_vars[i];
100 |     Expression i_h_tm1, i_c_tm1;
101 |     bool has_prev_state = (prev >= 0 || has_initial_state);
102 |     if (prev < 0) {
103 |       if (has_initial_state) {
104 |         // intial value for h and c at timestep 0 in layer i
105 |         // defaults to zero matrix input if not set in add_parameter_edges
106 |         i_h_tm1 = h0[i];
107 |         i_c_tm1 = c0[i];
108 |       }
109 |     } else {  // t > 0
110 |       i_h_tm1 = h[prev][i];
111 |       i_c_tm1 = c[prev][i];
112 |     }
113 |     // apply dropout according to http://arxiv.org/pdf/1409.2329v5.pdf
114 |     if (dropout_rate) in = dropout(in, dropout_rate);
115 |     // input
116 |     Expression i_ait;
117 |     if (has_prev_state)
118 |       i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1});
119 |     else
120 |       i_ait = affine_transform({vars[BI], vars[X2I], in});
121 |     Expression i_it = logistic(i_ait);
122 |     // forget
123 |     Expression i_ft = 1.f - i_it;
124 |     // write memory cell
125 |     Expression i_awt;
126 |     if (has_prev_state)
127 |       i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1});
128 |     else
129 |       i_awt = affine_transform({vars[BC], vars[X2C], in});
130 |     Expression i_wt = tanh(i_awt);
131 |     // output
132 |     if (has_prev_state) {
133 |       Expression i_nwt = cwise_multiply(i_it,i_wt);
134 |       Expression i_crt = cwise_multiply(i_ft,i_c_tm1);
135 |       ct[i] = i_crt + i_nwt;
136 |     } else {
137 |       ct[i] = cwise_multiply(i_it,i_wt);
138 |     }
139 | 
140 |     Expression i_aot;
141 |     if (has_prev_state)
142 |       i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]});
143 |     else
144 |       i_aot = affine_transform({vars[BO], vars[X2O], in, vars[C2O], ct[i]});
145 |     Expression i_ot = logistic(i_aot);
146 |     Expression ph_t = tanh(ct[i]);
147 |     in = ht[i] = cwise_multiply(i_ot,ph_t);
148 |   }
149 |   if (dropout_rate) return dropout(ht.back(), dropout_rate);
150 |     else return ht.back();
151 | }
152 | 
153 | void LSTMBuilder::copy(const RNNBuilder & rnn) {
154 |   const LSTMBuilder & rnn_lstm = (const LSTMBuilder&)rnn;
155 |   assert(params.size() == rnn_lstm.params.size());
156 |   for(size_t i = 0; i < params.size(); ++i)
157 |       for(size_t j = 0; j < params[i].size(); ++j)
158 |         params[i][j]->copy(*rnn_lstm.params[i][j]);
159 | }
160 | 
161 | } // namespace cnn
162 | 


--------------------------------------------------------------------------------
/cnn/lstm.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_LSTM_H_
 2 | #define CNN_LSTM_H_
 3 | 
 4 | #include "cnn/cnn.h"
 5 | #include "cnn/rnn.h"
 6 | #include "cnn/expr.h"
 7 | 
 8 | using namespace cnn::expr;
 9 | 
10 | namespace cnn {
11 | 
12 | class Model;
13 | 
14 | struct LSTMBuilder : public RNNBuilder {
15 |   LSTMBuilder() = default;
16 |   explicit LSTMBuilder(unsigned layers,
17 |                        unsigned input_dim,
18 |                        unsigned hidden_dim,
19 |                        Model* model);
20 | 
21 |   void set_dropout(float d) { dropout_rate = d; }
22 |   // in general, you should disable dropout at test time
23 |   void disable_dropout() { dropout_rate = 0; }
24 | 
25 |   Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); }
26 |   std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
27 |   std::vector<Expression> final_s() const override {
28 |     std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
29 |     for(auto my_h : final_h()) ret.push_back(my_h);
30 |     return ret;
31 |   }
32 |   unsigned num_h0_components() const override { return 2 * layers; }
33 | 
34 |   std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
35 |   std::vector<Expression> get_s(RNNPointer i) const override {
36 |     std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
37 |     for(auto my_h : get_h(i)) ret.push_back(my_h);
38 |     return ret;
39 |   }
40 | 
41 |   void copy(const RNNBuilder & params) override;
42 |  protected:
43 |   void new_graph_impl(ComputationGraph& cg) override;
44 |   void start_new_sequence_impl(const std::vector<Expression>& h0) override;
45 |   Expression add_input_impl(int prev, const Expression& x) override;
46 | 
47 |  public:
48 |   // first index is layer, then ...
49 |   std::vector<std::vector<Parameters*>> params;
50 | 
51 |   // first index is layer, then ...
52 |   std::vector<std::vector<Expression>> param_vars;
53 | 
54 |   // first index is time, second is layer
55 |   std::vector<std::vector<Expression>> h, c;
56 | 
57 |   // initial values of h and c at each layer
58 |   // - both default to zero matrix input
59 |   bool has_initial_state; // if this is false, treat h0 and c0 as 0
60 |   std::vector<Expression> h0;
61 |   std::vector<Expression> c0;
62 |   unsigned layers;
63 |   float dropout_rate;
64 | };
65 | 
66 | } // namespace cnn
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/cnn/mem.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/mem.h"
 2 | 
 3 | #include <cstdlib>
 4 | #include <cstring>
 5 | #include <iostream>
 6 | #include <sys/shm.h>
 7 | #include <sys/mman.h>
 8 | #include <fcntl.h>
 9 | #include <mm_malloc.h>
10 | #include "cnn/except.h"
11 | #if HAVE_CUDA
12 | #include "cnn/cuda.h"
13 | #include <cuda.h>
14 | #include <cuda_runtime.h>
15 | #endif
16 | 
17 | using namespace std;
18 | 
19 | namespace cnn {
20 | 
21 | MemAllocator::~MemAllocator() {}
22 | 
23 | void* CPUAllocator::malloc(size_t n) {
24 |   void* ptr = _mm_malloc(n, align);
25 |   if (!ptr) {
26 |     cerr << "CPU memory allocation failed n=" << n << " align=" << align << endl;
27 |     throw cnn::out_of_memory("CPU memory allocation failed");
28 |   }
29 |   return ptr;
30 | }
31 | 
32 | void CPUAllocator::free(void* mem) {
33 |   _mm_free(mem);
34 | }
35 | 
36 | void CPUAllocator::zero(void* p, size_t n) {
37 |   memset(p, 0, n);
38 | }
39 | 
40 | void* SharedAllocator::malloc(size_t n) {
41 |   void* ptr = mmap(NULL, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0);
42 |   if (!ptr) {
43 |     cerr << "Shared memory allocation failed n=" << n << endl;
44 |     throw cnn::out_of_memory("Shared memory allocation failed");
45 |   }
46 |   return ptr;
47 | }
48 | 
49 | void SharedAllocator::free(void* mem) {
50 | //  munmap(mem, n);
51 | }
52 | 
53 | void SharedAllocator::zero(void* p, size_t n) {
54 |   memset(p, 0, n);
55 | }
56 | 
57 | #if HAVE_CUDA
58 | void* GPUAllocator::malloc(size_t n) {
59 |   void* ptr = nullptr;
60 |   CUDA_CHECK(cudaSetDevice(devid));
61 |   CUDA_CHECK(cudaMalloc(&ptr, n));
62 |   if (!ptr) {
63 |     cerr << "GPU memory allocation failed n=" << n << endl;
64 |     throw cnn::out_of_memory("GPU memory allocation failed");
65 |   }
66 |   return ptr;
67 | }
68 | 
69 | void GPUAllocator::free(void* mem) {
70 |   CUDA_CHECK(cudaFree(mem));
71 | }
72 | 
73 | void GPUAllocator::zero(void* p, size_t n) {
74 |   CUDA_CHECK(cudaSetDevice(devid));
75 |   CUDA_CHECK(cudaMemsetAsync(p, 0, n));
76 | }
77 | 
78 | #endif
79 | 
80 | } // namespace cnn
81 | 


--------------------------------------------------------------------------------
/cnn/mem.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_MEM_H
 2 | #define CNN_MEM_H
 3 | 
 4 | #include <vector>
 5 | 
 6 | namespace cnn {
 7 | 
 8 | // allocates memory from the device (CPU, GPU)
 9 | // only used to create the memory pools
10 | // creates alignment appropriate for that device
11 | struct MemAllocator {
12 |   explicit MemAllocator(int align) : align(align) {}
13 |   MemAllocator(const MemAllocator&) = delete;
14 |   MemAllocator& operator=(const MemAllocator&) = delete;
15 |   virtual ~MemAllocator();
16 |   virtual void* malloc(std::size_t n) = 0;
17 |   virtual void free(void* mem) = 0;
18 |   virtual void zero(void* p, std::size_t n) = 0;
19 |   inline std::size_t round_up_align(std::size_t n) const {
20 |     if (align < 2) return n;
21 |     return ((n + align - 1) / align) * align;
22 |   }
23 |   const int align;
24 | };
25 | 
26 | struct CPUAllocator : public MemAllocator {
27 |   CPUAllocator() : MemAllocator(32) {}
28 |   void* malloc(std::size_t n) override;
29 |   void free(void* mem) override;
30 |   void zero(void* p, std::size_t n) override;
31 | };
32 | 
33 | struct SharedAllocator : public MemAllocator {
34 |   SharedAllocator() : MemAllocator(32) {}
35 |   void* malloc(std::size_t n) override;
36 |   void free(void* mem) override;
37 |   void zero(void* p, std::size_t n) override;
38 | };
39 | 
40 | #if HAVE_CUDA
41 | struct GPUAllocator : public MemAllocator {
42 |   explicit GPUAllocator(int devid) : MemAllocator(256), devid(devid) {}
43 |   void* malloc(std::size_t n) override;
44 |   void free(void* mem) override;
45 |   void zero(void* p, std::size_t n) override;
46 |   const int devid;
47 | };
48 | #endif
49 | 
50 | } // namespace cnn
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/cnn/model.h:
--------------------------------------------------------------------------------
  1 | #ifndef CNN_PARAMS_H_
  2 | #define CNN_PARAMS_H_
  3 | 
  4 | #include <vector>
  5 | #include <unordered_set>
  6 | #include <string>
  7 | 
  8 | 
  9 | #include <boost/serialization/split_member.hpp>
 10 | #include <boost/serialization/vector.hpp>
 11 | 
 12 | #include "cnn/tensor.h"
 13 | 
 14 | namespace cnn {
 15 | 
 16 | // to deal with sparse updates, there are two parameter classes:
 17 | // * Parameters represents a vector, matrix, (eventually higher order tensors)
 18 | //   of parameters. These are densely updated.
 19 | // * LookupParameters represents a table of vectors that are used to embed a
 20 | //   set of discrete objects. These are sparsely updated.
 21 | 
 22 | struct ParametersBase {
 23 |   friend class Model;
 24 |   virtual void scale_parameters(float a) = 0;
 25 |   virtual void squared_l2norm(float* sqnorm) const = 0;
 26 |   virtual void g_squared_l2norm(float* sqnorm) const = 0;
 27 |   virtual size_t size() const = 0;
 28 |   virtual ~ParametersBase();
 29 | };
 30 | 
 31 | // represents parameters (e.g., a weight matrix) that will be optimized
 32 | struct Parameters : public ParametersBase {
 33 |   friend class Model;
 34 |   void scale_parameters(float a) override;
 35 |   void squared_l2norm(float* sqnorm) const override;
 36 |   void g_squared_l2norm(float* sqnorm) const override;
 37 |   size_t size() const override;
 38 | 
 39 |   void copy(const Parameters & val);
 40 |   void accumulate_grad(const Tensor& g);
 41 |   void clear();
 42 | 
 43 |   Dim dim;
 44 |   Tensor values;
 45 |   Tensor g;
 46 |  private:
 47 |   Parameters() {}
 48 |   explicit Parameters(const Dim& d, float minmax); // initialize with ~U(-minmax,+minmax)
 49 |                                  // or Glorot initialization if minmax = 0
 50 |   friend class boost::serialization::access;
 51 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 52 |     ar & dim;
 53 |     ar & values;
 54 |   }
 55 | };
 56 | 
 57 | // represents a matrix/vector embedding of a discrete set
 58 | struct LookupParameters : public ParametersBase {
 59 |   friend class Model;
 60 |   void scale_parameters(float a) override;
 61 |   void squared_l2norm(float* sqnorm) const override;
 62 |   void g_squared_l2norm(float* sqnorm) const override;
 63 |   size_t size() const override;
 64 |   void Initialize(unsigned index, const std::vector<float>& val);
 65 | 
 66 |   void copy(const LookupParameters & val);
 67 |   void accumulate_grad(unsigned index, const Tensor& g);
 68 |   void clear();
 69 | 
 70 |   Dim dim;
 71 |   std::vector<Tensor> values;
 72 |   std::vector<Tensor> grads;
 73 |   // gradients are sparse, so track which components are nonzero
 74 |   std::unordered_set<unsigned> non_zero_grads;
 75 |  private:
 76 |   LookupParameters() {}
 77 |   LookupParameters(unsigned n, const Dim& d);
 78 |   friend class boost::serialization::access;
 79 |   template<class Archive>
 80 |   void save(Archive& ar, const unsigned int) const {
 81 |     ar & dim;
 82 |     int nv = values.size();
 83 |     ar & nv;
 84 |     for (unsigned i = 0; i < values.size(); ++i)
 85 |       ar & values[i];
 86 |   }
 87 |   template<class Archive>
 88 |   void load(Archive& ar, const unsigned int) {
 89 |     ar & dim;
 90 |     int nv;
 91 |     ar & nv;
 92 |     assert(nv == (int)values.size());
 93 |     for (unsigned i = 0; i < values.size(); ++i)
 94 |       ar & values[i];
 95 |   }
 96 |   BOOST_SERIALIZATION_SPLIT_MEMBER()
 97 | };
 98 | 
 99 | // this is a collection of parameters
100 | // if you need a matrix of parameters, or a lookup table - ask an instance of this class
101 | // this knows how to serialize itself
102 | // parameters know how to track their gradients, but any extra information (like velocity) will live here
103 | class Model {
104 |  public:
105 |   Model() : gradient_norm_scratch() {}
106 |   ~Model();
107 |   float gradient_l2_norm() const;
108 |   void reset_gradient();
109 |   // set scale to use custom initialization
110 |   Parameters* add_parameters(const Dim& d, float scale = 0.0f);
111 |   LookupParameters* add_lookup_parameters(unsigned n, const Dim& d);
112 |   // project weights so their L2 norm = radius
113 |   void project_weights(float radius = 1.0f);
114 | 
115 |   const std::vector<ParametersBase*>& all_parameters_list() const { return all_params; }
116 |   const std::vector<Parameters*>& parameters_list() const { return params; }
117 |   const std::vector<LookupParameters*>& lookup_parameters_list() const { return lookup_params; }
118 | 
119 |  private:
120 |   friend class boost::serialization::access;
121 |   template<class Archive>
122 |   void save(Archive& ar, const unsigned int) const {
123 |     int np = params.size();
124 |     int nlp = lookup_params.size();
125 |     ar & np;
126 |     ar & nlp;
127 |     for (unsigned i = 0; i < params.size(); ++i)
128 |       ar & *params[i];
129 |     for (unsigned i = 0; i < lookup_params.size(); ++i)
130 |       ar & *lookup_params[i];
131 |   }
132 |   template<class Archive>
133 |   void load(Archive& ar, const unsigned int) {
134 |     int np, nlp;
135 |     ar & np;
136 |     ar & nlp;
137 |     assert(np == (int)params.size());
138 |     assert(nlp == (int)lookup_params.size());
139 |     for (unsigned i = 0; i < params.size(); ++i)
140 |       ar & *params[i];
141 |     for (unsigned i = 0; i < lookup_params.size(); ++i)
142 |       ar & *lookup_params[i];
143 |     all_params.clear();
144 |     for (auto p : params) all_params.push_back(p);
145 |     for (auto p : lookup_params) all_params.push_back(p);
146 |   }
147 |   BOOST_SERIALIZATION_SPLIT_MEMBER()
148 | 
149 |   std::vector<ParametersBase*> all_params;
150 |   std::vector<Parameters*> params;
151 |   std::vector<LookupParameters*> lookup_params;
152 |   mutable float* gradient_norm_scratch;
153 | };
154 | 
155 | void save_cnn_model(std::string filename, Model* model);
156 | void load_cnn_model(std::string filename, Model* model);
157 | 
158 | } // namespace cnn
159 | 
160 | #endif
161 | 


--------------------------------------------------------------------------------
/cnn/mp.cc:
--------------------------------------------------------------------------------
 1 | #include "mp.h"
 2 | using namespace std;
 3 | using namespace boost::interprocess;
 4 | 
 5 | namespace cnn {
 6 |   namespace mp {
 7 |     // TODO: Pass these around instead of having them be global
 8 |     std::string queue_name = "cnn_mp_work_queue";
 9 |     std::string shared_memory_name = "cnn_mp_shared_memory";
10 |     timespec start_time;
11 |     bool stop_requested = false;
12 |     SharedObject* shared_object = nullptr;
13 | 
14 |     std::string GenerateQueueName() {
15 |       std::ostringstream ss;
16 |       ss << "cnn_mp_work_queue";
17 |       ss << rand();
18 |       return ss.str();
19 |     }
20 | 
21 |     std::string GenerateSharedMemoryName() {
22 |       std::ostringstream ss;
23 |       ss << "cnn_mp_shared_memory";
24 |       ss << rand();
25 |       return ss.str();
26 |     }
27 | 
28 |     cnn::real SumValues(const std::vector<cnn::real>& values) {
29 |       return accumulate(values.begin(), values.end(), 0.0);
30 |     }
31 | 
32 |     cnn::real Mean(const std::vector<cnn::real>& values) {
33 |       return SumValues(values) / values.size();
34 |     }
35 | 
36 |     std::string ElapsedTimeString(const timespec& start, const timespec& end) {
37 |       std::ostringstream ss;
38 |       time_t secs = end.tv_sec - start.tv_sec;
39 |       long nsec = end.tv_nsec - start.tv_nsec;
40 |       ss << secs << " seconds and " << nsec << "nseconds";
41 |       return ss.str();
42 |     }
43 | 
44 |     unsigned SpawnChildren(std::vector<Workload>& workloads) {
45 |       const unsigned num_children = workloads.size();
46 |       assert (workloads.size() == num_children);
47 |       pid_t pid;
48 |       unsigned cid;
49 |       for (cid = 0; cid < num_children; ++cid) {
50 |         pid = fork();
51 |         if (pid == -1) {
52 |           std::cerr << "Fork failed. Exiting ..." << std::endl;
53 |           return 1;
54 |         }
55 |         else if (pid == 0) {
56 |           // children shouldn't continue looping
57 |           break;
58 |         }
59 |         workloads[cid].pid = pid;
60 |       }
61 |       return cid;
62 |     }
63 | 
64 |     std::vector<Workload> CreateWorkloads(unsigned num_children) {
65 |       int err;
66 |       std::vector<Workload> workloads(num_children);
67 |       for (unsigned cid = 0; cid < num_children; cid++) { 
68 |         err = pipe(workloads[cid].p2c);
69 |         assert (err == 0);
70 |         err = pipe(workloads[cid].c2p);
71 |         assert (err == 0);
72 |       }
73 |       return workloads;
74 |     }
75 | 
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/cnn/param-nodes.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/param-nodes.h"
  2 | #include "cnn/tensor.h"
  3 | 
  4 | #include <sstream>
  5 | 
  6 | using namespace std;
  7 | 
  8 | namespace cnn {
  9 | 
 10 | string ConstParameterNode::as_string(const vector<string>& arg_names) const {
 11 |   ostringstream s;
 12 |   s << "const_parameters(" << dim << ", " << params << ')';
 13 |   return s.str();
 14 | }
 15 | 
 16 | Dim ConstParameterNode::dim_forward(const vector<Dim>& xs) const {
 17 |   assert(xs.size() == 0);
 18 |   return dim;
 19 | }
 20 | 
 21 | void ConstParameterNode::forward_impl(const vector<const Tensor*>& xs, Tensor& fx) const {
 22 |   assert(xs.size() == 0);
 23 |   fx.v = params->values.v;
 24 | }
 25 | 
 26 | void ConstParameterNode::backward_impl(const vector<const Tensor*>& xs,
 27 |                     const Tensor& fx,
 28 |                     const Tensor& dEdf,
 29 |                                unsigned i,
 30 |                                Tensor& dEdxi) const {
 31 |   cerr << "called backward() on arity 0 node: i = " << i << endl;
 32 |   abort();
 33 | }
 34 | 
 35 | string ParameterNode::as_string(const vector<string>& arg_names) const {
 36 |   ostringstream s;
 37 |   s << "parameters(" << dim << ", " << params << ')';
 38 |   return s.str();
 39 | }
 40 | 
 41 | Dim ParameterNode::dim_forward(const vector<Dim>& xs) const {
 42 |   assert(xs.size() == 0);
 43 |   return dim;
 44 | }
 45 | 
 46 | void ParameterNode::forward_impl(const vector<const Tensor*>& xs, Tensor& fx) const {
 47 |   assert(xs.size() == 0);
 48 |   fx.v = params->values.v;
 49 | }
 50 | 
 51 | void ParameterNode::backward_impl(const vector<const Tensor*>& xs,
 52 |                     const Tensor& fx,
 53 |                     const Tensor& dEdf,
 54 |                                unsigned i,
 55 |                                Tensor& dEdxi) const {
 56 |   cerr << "called backward() on arity 0 node: i = " << i << endl;
 57 |   abort();
 58 | }
 59 | 
 60 | void ParameterNode::accumulate_grad(const Tensor& g) {
 61 |   params->accumulate_grad(g);
 62 | }
 63 | 
 64 | string InputNode::as_string(const vector<string>& arg_names) const {
 65 |   ostringstream s;
 66 |   s << "constant(" << dim << ')';
 67 |   return s.str();
 68 | }
 69 | 
 70 | Dim InputNode::dim_forward(const vector<Dim>& xs) const {
 71 |   return dim;
 72 | }
 73 | 
 74 | void InputNode::forward_impl(const vector<const Tensor*>& xs, Tensor& fx) const {
 75 |   assert(xs.size() == 0);
 76 | #if HAVE_CUDA
 77 |   cudaMemcpyAsync(fx.v, &pdata->front(), dim.size() * sizeof(float), cudaMemcpyHostToDevice);
 78 | #else
 79 |   // TODO memcpy is only necessary if pdata->front() points to an unaligned location
 80 |   // need to compute this value
 81 |   bool is_input_address_aligned = false;
 82 |   if (!is_input_address_aligned) {
 83 |     memcpy(fx.v, &pdata->front(), dim.size() * sizeof(float));
 84 |   } else {
 85 |     fx.v = const_cast<float*>(&pdata->front());
 86 |   }
 87 | #endif
 88 | }
 89 | 
 90 | void InputNode::backward_impl(const vector<const Tensor*>& xs,
 91 |                     const Tensor& fx,
 92 |                     const Tensor& dEdf,
 93 |                                unsigned i,
 94 |                                Tensor& dEdxi) const {
 95 |   cerr << "called backward() on arity 0 node\n";
 96 |   abort();
 97 | }
 98 | 
 99 | string ScalarInputNode::as_string(const vector<string>& arg_names) const {
100 |   ostringstream s;
101 |   s << "scalar_constant(" << pdata << ')';
102 |   return s.str();
103 | }
104 | 
105 | Dim ScalarInputNode::dim_forward(const vector<Dim>& xs) const {
106 |   return Dim({1});
107 | }
108 | 
109 | void ScalarInputNode::forward_impl(const vector<const Tensor*>& xs, Tensor& fx) const {
110 |   assert(xs.size() == 0);
111 | #if HAVE_CUDA
112 |   cudaMemcpyAsync(fx.v, pdata, 1 * sizeof(float), cudaMemcpyHostToDevice);
113 | #else
114 |   fx.v[0] = *pdata;
115 | #endif
116 | }
117 | 
118 | void ScalarInputNode::backward_impl(const vector<const Tensor*>& xs,
119 |                                const Tensor& fx,
120 |                                const Tensor& dEdf,
121 |                                unsigned i,
122 |                                Tensor& dEdxi) const {
123 |   cerr << "called backward() on arity 0 node\n";
124 |   abort();
125 | }
126 | 
127 | string LookupNode::as_string(const vector<string>& arg_names) const {
128 |   ostringstream s;
129 |   s << "lookup_parameters(|x|=" << params->values.size() << " --> " << dim << ')';
130 |   return s.str();
131 | }
132 | 
133 | Dim LookupNode::dim_forward(const vector<Dim>& xs) const {
134 |   return dim;
135 | }
136 | 
137 | void LookupNode::forward_impl(const vector<const Tensor*>& xs, Tensor& fx) const {
138 |   assert(xs.size() == 0);
139 |   if(pindex) {
140 |     assert(*pindex < params->values.size());
141 |     assert (fx.d.batch_elems() == 1);
142 |     fx.v = params->values[*pindex].v;
143 |   } else {
144 |     assert (pindices);
145 |     assert (fx.d.batch_elems() == pindices->size());
146 |     for (unsigned b = 0; b < pindices->size(); ++b) {
147 |       unsigned i = pindices->at(b);
148 |       assert (i < params->values.size());
149 |       float* v = fx.v + fx.d.batch_size() * (b % fx.d.batch_elems());
150 | #if HAVE_CUDA
151 |       cudaMemcpyAsync(v, params->values[i].v, fx.d.batch_size() * sizeof(float), cudaMemcpyDeviceToDevice);
152 | #else
153 |       memcpy(v, params->values[i].v, fx.d.batch_size() * sizeof(float));
154 | #endif
155 |     }
156 |   }
157 | }
158 | 
159 | void LookupNode::backward_impl(const vector<const Tensor*>& xs,
160 |                             const Tensor& fx,
161 |                             const Tensor& dEdf,
162 |                             unsigned i,
163 |                             Tensor& dEdxi) const {
164 |   cerr << "called backward() on arity 0 node\n";
165 |   abort();
166 | }
167 | 
168 | void LookupNode::accumulate_grad(const Tensor& g) {
169 |   if(pindex) {
170 |     params->accumulate_grad(*pindex, g);
171 |   } else {
172 |     assert (pindices);
173 |     const vector<Tensor>& gb = g.batch_elems();
174 |     for (unsigned b = 0; b < pindices->size(); ++b) {
175 |       unsigned i = pindices->at(b);
176 |       assert (i < params->values.size());
177 |       params->accumulate_grad(i, gb[b]);
178 |     }
179 |   }
180 | }
181 | 
182 | } // namespace cnn
183 | 


--------------------------------------------------------------------------------
/cnn/param-nodes.h:
--------------------------------------------------------------------------------
  1 | #ifndef CNN_PARAM_NODES_H_
  2 | #define CNN_PARAM_NODES_H_
  3 | 
  4 | #include "cnn/cnn.h"
  5 | #include "cnn/model.h"
  6 | 
  7 | namespace cnn {
  8 | 
  9 | struct ParameterNodeBase : public Node {
 10 |   virtual void accumulate_grad(const Tensor& g) = 0;
 11 | };
 12 | 
 13 | // represents optimizable parameters
 14 | struct ParameterNode : public ParameterNodeBase {
 15 |   explicit ParameterNode(Parameters* p) : dim(p->dim), params(p) {}
 16 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
 17 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
 18 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
 19 |   void backward_impl(const std::vector<const Tensor*>& xs,
 20 |                   const Tensor& fx,
 21 |                   const Tensor& dEdf,
 22 |                   unsigned i,
 23 |                   Tensor& dEdxi) const override;
 24 |   void accumulate_grad(const Tensor& g) override;
 25 |   Dim dim;
 26 |   Parameters* params;
 27 | };
 28 | 
 29 | // represents optimizable parameters that are being held constant
 30 | struct ConstParameterNode : public Node {
 31 |   explicit ConstParameterNode(Parameters* p) : dim(p->dim), params(p) {}
 32 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
 33 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
 34 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
 35 |   void backward_impl(const std::vector<const Tensor*>& xs,
 36 |                   const Tensor& fx,
 37 |                   const Tensor& dEdf,
 38 |                   unsigned i,
 39 |                   Tensor& dEdxi) const override;
 40 |   Dim dim;
 41 |   Parameters* params;
 42 | };
 43 | 
 44 | // represents specified (not learned) inputs to the network
 45 | struct InputNode : public Node {
 46 |   explicit InputNode(const Dim& d, const std::vector<float>& dat) : dim(d), data(dat), pdata(&data) {}
 47 |   explicit InputNode(const Dim& d, const std::vector<float>* pdat) : dim(d), data(), pdata(pdat) {}
 48 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
 49 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
 50 |   virtual bool supports_multibatch() const override { return true; }
 51 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
 52 |   void backward_impl(const std::vector<const Tensor*>& xs,
 53 |                   const Tensor& fx,
 54 |                   const Tensor& dEdf,
 55 |                   unsigned i,
 56 |                   Tensor& dEdxi) const override;
 57 |   Dim dim;
 58 |   const std::vector<float> data;
 59 |   const std::vector<float>* pdata;
 60 | };
 61 | 
 62 | // represents specified (not learned) scalar inputs to the network
 63 | struct ScalarInputNode : public Node {
 64 |   explicit ScalarInputNode(real s) : data(s), pdata(&data) {}
 65 |   explicit ScalarInputNode(const real* ps) : data(), pdata(ps) {}
 66 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
 67 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
 68 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
 69 |   void backward_impl(const std::vector<const Tensor*>& xs,
 70 |                   const Tensor& fx,
 71 |                   const Tensor& dEdf,
 72 |                   unsigned i,
 73 |                   Tensor& dEdxi) const override;
 74 |   const cnn::real data;
 75 |   const cnn::real* pdata;
 76 | };
 77 | 
 78 | // represents a matrix/vector embedding of an item of a discrete set (1-hot coding)
 79 | struct LookupNode : public ParameterNodeBase {
 80 |   LookupNode(LookupParameters* p, unsigned ind) : dim(p->dim), index(ind), pindex(&index), indices(), pindices(), params(p) {}
 81 |   LookupNode(LookupParameters* p, const unsigned* pind) : dim(p->dim), index(), pindex(pind), indices(), pindices(), params(p) {}
 82 |   LookupNode(LookupParameters* p, const std::vector<unsigned>& indices) : dim(p->dim), index(), pindex(), indices(indices), pindices(&this->indices), params(p) {
 83 |     dim.bd = pindices->size();
 84 |   }
 85 |   LookupNode(LookupParameters* p, const std::vector<unsigned>* pindices) : dim(p->dim), index(), pindex(), indices(), pindices(pindices), params(p) {
 86 |     dim.bd = pindices->size();
 87 |   }
 88 |   std::string as_string(const std::vector<std::string>& arg_names) const override;
 89 |   Dim dim_forward(const std::vector<Dim>& xs) const override;
 90 |   virtual bool supports_multibatch() const override { return true; }  
 91 |   void forward_impl(const std::vector<const Tensor*>& xs, Tensor& fx) const override;
 92 |   void backward_impl(const std::vector<const Tensor*>& xs,
 93 |                   const Tensor& fx,
 94 |                   const Tensor& dEdf,
 95 |                   unsigned i,
 96 |                   Tensor& dEdxi) const override;
 97 |   void accumulate_grad(const Tensor& g) override;
 98 |   Dim dim;
 99 |   unsigned index;
100 |   const unsigned* pindex;
101 |   std::vector<unsigned> indices;
102 |   const std::vector<unsigned>* pindices;
103 |   LookupParameters* params;
104 | };
105 | 
106 | } // namespace cnn
107 | 
108 | #endif
109 | 


--------------------------------------------------------------------------------
/cnn/random.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_EIGEN_RANDOM_H
 2 | #define CNN_EIGEN_RANDOM_H
 3 | 
 4 | #include <random>
 5 | 
 6 | namespace cnn {
 7 | 
 8 | extern std::mt19937* rndeng;
 9 | 
10 | } // namespace cnn
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/cnn/rnn-state-machine.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/rnn-state-machine.h"
 2 | 
 3 | #include <iostream>
 4 | #include "cnn/cnn.h"
 5 | 
 6 | using namespace std;
 7 | 
 8 | namespace cnn {
 9 | 
10 | void RNNStateMachine::failure(RNNOp op) {
11 |   cerr << "State transition error: currently in state " << q_ << " but received operation " << op << endl;
12 |   abort();
13 | }
14 | 
15 | } // namespace cnn
16 | 
17 | 


--------------------------------------------------------------------------------
/cnn/rnn-state-machine.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_RNN_STATE_MACHINE_H_
 2 | #define CNN_RNN_STATE_MACHINE_H_
 3 | 
 4 | namespace cnn {
 5 | 
 6 | // CURRENT STATE | ACTION              | NEXT STATE
 7 | // --------------+---------------------+-----------------
 8 | // CREATED       | new_graph           | GRAPH_READY
 9 | // GRAPH_READY   | start_new_sequence  | READING_INPUT
10 | // READING_INPUT | add_input           | READING_INPUT
11 | // READING_INPUT | start_new_seqeunce  | READING_INPUT
12 | // READING_INPUT | new_graph           | GRAPH_READY
13 | 
14 | enum RNNState {CREATED, GRAPH_READY, READING_INPUT};
15 | enum RNNOp {new_graph, start_new_sequence, add_input};
16 | 
17 | class RNNStateMachine {
18 |  public:
19 |   RNNStateMachine() : q_(RNNState::CREATED) {}
20 |   void failure(RNNOp op);
21 |   void transition(RNNOp op) {
22 |     switch (q_) {
23 |       case RNNState::CREATED:
24 |         if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; }
25 |         failure(op);
26 |       case RNNState::GRAPH_READY:
27 |         if (op == RNNOp::new_graph) { break; }
28 |         if (op == RNNOp::start_new_sequence) { q_ = RNNState::READING_INPUT; break; }
29 |         failure(op);
30 |       case RNNState::READING_INPUT:
31 |         if (op == RNNOp::add_input) { break; }
32 |         if (op == RNNOp::start_new_sequence) { break; }
33 |         if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; }
34 |         failure(op);
35 |     }
36 |   }
37 |  private:
38 |   RNNState q_;
39 | };
40 | 
41 | } // namespace cnn
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/cnn/rnn.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/rnn.h"
  2 | 
  3 | #include <string>
  4 | #include <cassert>
  5 | #include <vector>
  6 | #include <iostream>
  7 | 
  8 | #include "cnn/nodes.h"
  9 | #include "cnn/expr.h"
 10 | 
 11 | using namespace std;
 12 | using namespace cnn::expr;
 13 | using namespace cnn;
 14 | 
 15 | namespace cnn {
 16 | 
 17 | enum { X2H=0, H2H, HB, L2H };
 18 | 
 19 | RNNBuilder::~RNNBuilder() {}
 20 | 
 21 | SimpleRNNBuilder::SimpleRNNBuilder(unsigned layers,
 22 |                        unsigned input_dim,
 23 |                        unsigned hidden_dim,
 24 |                        Model* model,
 25 |                        bool support_lags) : layers(layers), lagging(support_lags) {
 26 |   unsigned layer_input_dim = input_dim;
 27 |   for (unsigned i = 0; i < layers; ++i) {
 28 |     Parameters* p_x2h = model->add_parameters({hidden_dim, layer_input_dim});
 29 |     Parameters* p_h2h = model->add_parameters({hidden_dim, hidden_dim});
 30 |     Parameters* p_hb = model->add_parameters({hidden_dim});
 31 |     vector<Parameters*> ps = {p_x2h, p_h2h, p_hb};
 32 |     if (lagging)
 33 |         ps.push_back(model->add_parameters({hidden_dim, hidden_dim}));
 34 |     params.push_back(ps);
 35 |     layer_input_dim = hidden_dim;
 36 |   }
 37 | }
 38 | 
 39 | void SimpleRNNBuilder::new_graph_impl(ComputationGraph& cg) {
 40 |   param_vars.clear();
 41 |   for (unsigned i = 0; i < layers; ++i) {
 42 |     Parameters* p_x2h = params[i][X2H];
 43 |     Parameters* p_h2h = params[i][H2H];
 44 |     Parameters* p_hb = params[i][HB];
 45 |     Expression i_x2h =  parameter(cg,p_x2h);
 46 |     Expression i_h2h =  parameter(cg,p_h2h);
 47 |     Expression i_hb =  parameter(cg,p_hb);
 48 |     vector<Expression> vars = {i_x2h, i_h2h, i_hb};
 49 | 
 50 |     if (lagging) {
 51 |         Parameters* p_l2h = params[i][L2H];
 52 |         Expression i_l2h =  parameter(cg,p_l2h);
 53 |         vars.push_back(i_l2h);
 54 |     }
 55 | 
 56 |     param_vars.push_back(vars);
 57 |   }
 58 | }
 59 | 
 60 | void SimpleRNNBuilder::start_new_sequence_impl(const vector<Expression>& h_0) {
 61 |   h.clear();
 62 |   h0 = h_0;
 63 |   if (h0.size()) { assert(h0.size() == layers); }
 64 | }
 65 | 
 66 | Expression SimpleRNNBuilder::add_input_impl(int prev, const Expression &in) {
 67 |   const unsigned t = h.size();
 68 |   h.push_back(vector<Expression>(layers));
 69 | 
 70 |   Expression x = in;
 71 | 
 72 |   for (unsigned i = 0; i < layers; ++i) {
 73 |     const vector<Expression>& vars = param_vars[i];
 74 | 
 75 |     // y <--- f(x)
 76 |     Expression y = affine_transform({vars[2], vars[0], x});
 77 | 
 78 |     // y <--- g(y_prev)
 79 |     if (prev == -1 && h0.size() > 0)
 80 |       y = affine_transform({y, vars[1], h0[i]});
 81 |     else if (prev >= 0)
 82 |       y = affine_transform({y, vars[1], h[prev][i]});
 83 | 
 84 |     // x <--- tanh(y)
 85 |     x = h[t][i] = tanh(y);
 86 |   }
 87 |   return h[t].back();
 88 | }
 89 | 
 90 | Expression SimpleRNNBuilder::add_auxiliary_input(const Expression &in, const Expression &aux) {
 91 |   const unsigned t = h.size();
 92 |   h.push_back(vector<Expression>(layers));
 93 | 
 94 |   Expression x = in;
 95 | 
 96 |   for (unsigned i = 0; i < layers; ++i) {
 97 |     const vector<Expression>& vars = param_vars[i];
 98 |     assert(vars.size() >= L2H + 1);
 99 | 
100 |     Expression y = affine_transform({vars[HB], vars[X2H], x, vars[L2H], aux});
101 | 
102 |     if (t == 0 && h0.size() > 0)
103 |       y = affine_transform({y, vars[H2H], h0[i]});
104 |     else if (t >= 1)
105 |       y = affine_transform({y, vars[H2H], h[t-1][i]});
106 | 
107 |     x = h[t][i] = tanh(y);
108 |   }
109 |   return h[t].back();
110 | }
111 | 
112 | void SimpleRNNBuilder::copy(const RNNBuilder & rnn) {
113 |   const SimpleRNNBuilder & rnn_simple = (const SimpleRNNBuilder&)rnn;
114 |   assert(params.size() == rnn_simple.params.size());
115 |   for(size_t i = 0; i < rnn_simple.params.size(); ++i) {
116 |       params[i][0]->copy(*rnn_simple.params[i][0]);
117 |       params[i][1]->copy(*rnn_simple.params[i][1]);
118 |       params[i][2]->copy(*rnn_simple.params[i][2]);
119 |   }
120 | }
121 | 
122 | } // namespace cnn
123 | 


--------------------------------------------------------------------------------
/cnn/rnn.h:
--------------------------------------------------------------------------------
  1 | #ifndef CNN_RNN_H_
  2 | #define CNN_RNN_H_
  3 | 
  4 | #include "cnn/cnn.h"
  5 | #include "cnn/rnn-state-machine.h"
  6 | #include "cnn/expr.h"
  7 | 
  8 | using namespace cnn::expr;
  9 | 
 10 | namespace cnn {
 11 | 
 12 | class Model;
 13 | 
 14 | BOOST_STRONG_TYPEDEF(int, RNNPointer)
 15 | inline void swap(RNNPointer& i1, RNNPointer& i2) {
 16 |   RNNPointer t = i1; i1 = i2; i2 = t;
 17 | }
 18 | 
 19 | // interface for constructing an RNN, LSTM, GRU, etc.
 20 | struct RNNBuilder {
 21 |   RNNBuilder() : cur(-1) {}
 22 |   virtual ~RNNBuilder();
 23 | 
 24 |   RNNPointer state() const { return cur; }
 25 | 
 26 |   // call this to reset the builder when you are working with a newly
 27 |   // created ComputationGraph object
 28 |   void new_graph(ComputationGraph& cg) {
 29 |     sm.transition(RNNOp::new_graph);
 30 |     new_graph_impl(cg);
 31 |   }
 32 | 
 33 |   // Reset for new sequence
 34 |   // call this before add_input and after new_graph,
 35 |   // when starting a new sequence on the same hypergraph.
 36 |   // h_0 is used to initialize hidden layers at timestep 0 to given values
 37 |   void start_new_sequence(const std::vector<Expression>& h_0={}) {
 38 |     sm.transition(RNNOp::start_new_sequence);
 39 |     cur = RNNPointer(-1);
 40 |     head.clear();
 41 |     start_new_sequence_impl(h_0);
 42 |   }
 43 | 
 44 |   // add another timestep by reading in the variable x
 45 |   // return the hidden representation of the deepest layer
 46 |   Expression add_input(const Expression& x) {
 47 |     sm.transition(RNNOp::add_input);
 48 |     head.push_back(cur);
 49 |     int rcp = cur;
 50 |     cur = head.size() - 1;
 51 |     return add_input_impl(rcp, x);
 52 |   }
 53 | 
 54 |   // add another timestep, but define recurrent connection to prev
 55 |   // rather than to head[cur]
 56 |   // this can be used to construct trees, implement beam search, etc.
 57 |   Expression add_input(const RNNPointer& prev, const Expression& x) {
 58 |     sm.transition(RNNOp::add_input);
 59 |     head.push_back(prev);
 60 |     cur = head.size() - 1;
 61 |     return add_input_impl(prev, x);
 62 |   }
 63 | 
 64 |   // rewind the last timestep - this DOES NOT remove the variables
 65 |   // from the computation graph, it just means the next time step will
 66 |   // see a different previous state. You can remind as many times as
 67 |   // you want.
 68 |   void rewind_one_step() {
 69 |     cur = head[cur];
 70 |   }
 71 | 
 72 |   // returns node (index) of most recent output
 73 |   virtual Expression back() const = 0;
 74 |   // access the final output of each hidden layer
 75 |   virtual std::vector<Expression> final_h() const = 0;
 76 |   virtual std::vector<Expression> get_h(RNNPointer i) const = 0;
 77 |   // access the state of each hidden layer, in a format that can be used in
 78 |   // start_new_sequence
 79 |   virtual std::vector<Expression> final_s() const = 0;
 80 |   virtual unsigned num_h0_components() const  = 0;
 81 |   virtual std::vector<Expression> get_s(RNNPointer i) const = 0;
 82 |   // copy the parameters of another builder
 83 |   virtual void copy(const RNNBuilder & params) = 0;
 84 |  protected:
 85 |   virtual void new_graph_impl(ComputationGraph& cg) = 0;
 86 |   virtual void start_new_sequence_impl(const std::vector<Expression>& h_0) = 0;
 87 |   virtual Expression add_input_impl(int prev, const Expression& x) = 0;
 88 |   RNNPointer cur;
 89 |  private:
 90 |   // the state machine ensures that the caller is behaving
 91 |   RNNStateMachine sm;
 92 |   std::vector<RNNPointer> head; // head[i] returns the head position
 93 | };
 94 | 
 95 | struct SimpleRNNBuilder : public RNNBuilder {
 96 |   SimpleRNNBuilder() = default;
 97 |   explicit SimpleRNNBuilder(unsigned layers,
 98 |                             unsigned input_dim,
 99 |                             unsigned hidden_dim,
100 |                             Model* model,
101 |                             bool support_lags=false);
102 | 
103 |  protected:
104 |   void new_graph_impl(ComputationGraph& cg) override;
105 |   void start_new_sequence_impl(const std::vector<Expression>& h_0) override;
106 |   Expression add_input_impl(int prev, const Expression& x) override;
107 | 
108 |  public:
109 |   Expression add_auxiliary_input(const Expression& x, const Expression &aux);
110 | 
111 |   Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
112 |   std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
113 |   std::vector<Expression> final_s() const override { return final_h(); }
114 | 
115 |   std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
116 |   std::vector<Expression> get_s(RNNPointer i) const override { return get_h(i); }
117 |   void copy(const RNNBuilder & params) override;
118 | 
119 |   unsigned num_h0_components() const override { return layers; }
120 | 
121 |  private:
122 |   // first index is layer, then x2h h2h hb
123 |   std::vector<std::vector<Parameters*>> params;
124 | 
125 |   // first index is layer, then x2h h2h hb
126 |   std::vector<std::vector<Expression>> param_vars;
127 | 
128 |   // first index is time, second is layer
129 |   std::vector<std::vector<Expression>> h;
130 | 
131 |   // initial value of h
132 |   // defaults to zero matrix input
133 |   std::vector<Expression> h0;
134 | 
135 |   unsigned layers;
136 |   bool lagging;
137 | };
138 | 
139 | } // namespace cnn
140 | 
141 | #endif
142 | 


--------------------------------------------------------------------------------
/cnn/saxe-init.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/saxe-init.h"
 2 | #include "cnn/tensor.h"
 3 | 
 4 | #include <random>
 5 | #include <cstring>
 6 | 
 7 | #include <Eigen/SVD>
 8 | 
 9 | using namespace std;
10 | 
11 | namespace cnn {
12 | 
13 | void OrthonormalRandom(unsigned dd, float g, Tensor& x) {
14 |   Tensor t;
15 |   t.d = Dim({dd, dd});
16 |   t.v = new float[dd * dd];
17 |   normal_distribution<float> distribution(0, 0.01);
18 |   auto b = [&] () {return distribution(*rndeng);};
19 |   generate(t.v, t.v + dd*dd, b);
20 |   Eigen::JacobiSVD<Eigen::MatrixXf> svd(*t, Eigen::ComputeFullU);
21 |   *x = svd.matrixU();
22 |   delete[] t.v;
23 | }
24 | 
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/cnn/saxe-init.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_SAXE_INIT_H_
 2 | #define CNN_SAXE_INIT_H_
 3 | 
 4 | namespace cnn {
 5 | 
 6 | struct Tensor;
 7 | 
 8 | void OrthonormalRandom(unsigned dim, float g, Tensor& x);
 9 | 
10 | }
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/cnn/shadow-params.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/cnn.h"
 2 | #include "cnn/shadow-params.h"
 3 | #include "cnn/tensor.h"
 4 | #include "cnn/aligned-mem-pool.h"
 5 | #include "cnn/model.h"
 6 | 
 7 | using namespace std;
 8 | 
 9 | namespace cnn {
10 | 
11 | ShadowParameters::ShadowParameters(const Parameters& p) : h(p.values) {
12 |   h.v = (float*)default_device->mem->malloc(h.d.size() * sizeof(float));
13 |   TensorTools::Zero(h);
14 | }
15 | 
16 | ShadowLookupParameters::ShadowLookupParameters(const LookupParameters& lp) : h(lp.values) {
17 |   for (auto& t : h) {
18 |     t.v = (float*)default_device->mem->malloc(t.d.size() * sizeof(float));
19 |     TensorTools::Zero(t);
20 |   }
21 | }
22 | 
23 | vector<ShadowParameters> AllocateShadowParameters(const Model& m) {
24 |   vector<ShadowParameters> v;
25 |   v.reserve(m.parameters_list().size());
26 |   for (auto& p : m.parameters_list())
27 |     v.emplace_back(*p);
28 |   return v;
29 | }
30 | 
31 | vector<ShadowLookupParameters> AllocateShadowLookupParameters(const Model& m) {
32 |   vector<ShadowLookupParameters> v;
33 |   v.reserve(m.lookup_parameters_list().size());
34 |   for (auto& p : m.lookup_parameters_list())
35 |     v.emplace_back(*p);
36 |   return v;
37 | }
38 | 
39 | } // namespace cnn
40 | 
41 | 


--------------------------------------------------------------------------------
/cnn/shadow-params.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_SHADOW_PARAMS_H
 2 | #define CNN_SHADOW_PARAMS_H
 3 | 
 4 | #include <vector>
 5 | #include "cnn/tensor.h"
 6 | 
 7 | // if your learner needs to keep track of an extra set of values (one per
 8 | // parameter), use the Shadow classes. this can be used to implement, e.g.,
 9 | // momentum or adagrad
10 | 
11 | namespace cnn {
12 | 
13 | class Model;
14 | struct Parameters;
15 | struct LookupParameters;
16 | 
17 | struct ShadowParameters {
18 |   explicit ShadowParameters(const Parameters& p);
19 |   Tensor h;
20 | };
21 | 
22 | struct ShadowLookupParameters {
23 |   explicit ShadowLookupParameters(const LookupParameters& lp);
24 |   std::vector<Tensor> h;
25 | };
26 | 
27 | // one per element in model.parameters_list
28 | std::vector<ShadowParameters> AllocateShadowParameters(const Model& model);
29 | // one per element in model.lookup_parameters_list
30 | std::vector<ShadowLookupParameters> AllocateShadowLookupParameters(const Model& model);
31 | 
32 | } // namespace cnn
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/cnn/tensor.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/tensor.h"
  2 | 
  3 | #include <random>
  4 | #include <vector>
  5 | #include <cstring>
  6 | 
  7 | #if HAVE_CUDA
  8 | #include "cnn/cuda.h"
  9 | #endif
 10 | 
 11 | using namespace std;
 12 | 
 13 | namespace cnn {
 14 | 
 15 | ostream& operator<<(ostream& os, const Tensor& t) {
 16 | #if HAVE_CUDA
 17 |   vector<real> vt = as_vector(t);
 18 |   Eigen::Map<Eigen::MatrixXf> m(&vt[0], t.d.rows(), t.d.cols());
 19 |   os << m;
 20 | #else
 21 |   os << (*t);
 22 | #endif
 23 |   return os;
 24 | }
 25 | 
 26 | real as_scalar(const Tensor& t) {
 27 |   assert(t.d.size() == 1);
 28 | #if HAVE_CUDA
 29 |   float res;
 30 |   CUDA_CHECK(cudaMemcpy(&res, t.v, sizeof(float), cudaMemcpyDeviceToHost));
 31 |   return res;
 32 | #else
 33 |   return t.v[0];
 34 | #endif
 35 | }
 36 | 
 37 | vector<real> as_vector(const Tensor& v) {
 38 |   vector<real> res(v.d.size());
 39 | #if HAVE_CUDA
 40 |   CUDA_CHECK(cudaMemcpy(&res[0], v.v, sizeof(real) * res.size(), cudaMemcpyDeviceToHost));
 41 | #else
 42 |   memcpy(&res[0], v.v, sizeof(real) * res.size());
 43 | #endif
 44 |   return res;
 45 | }
 46 | 
 47 | float TensorTools::AccessElement(const Tensor& v, int index) {
 48 | #if HAVE_CUDA
 49 |   float ret;
 50 |   cudaMemcpyAsync(&ret, &v.v[index], sizeof(real), cudaMemcpyDeviceToHost);
 51 |   return ret;
 52 | #else
 53 |   return v.v[index];
 54 | #endif
 55 | }
 56 | 
 57 | float TensorTools::AccessElement(const Tensor& v, const Dim& index) {
 58 | #if HAVE_CUDA
 59 |   abort();
 60 | #else
 61 |   return (*v)(index[0], index[1]);
 62 | #endif
 63 | }
 64 | 
 65 | void TensorTools::SetElement(const Tensor& v, int index, float value) {
 66 | #if HAVE_CUDA
 67 |   cudaMemcpyAsync(&v.v[index], &value, sizeof(real), cudaMemcpyHostToDevice);
 68 | #else
 69 |   v.v[index] = value;
 70 | #endif
 71 | }
 72 | 
 73 | void TensorTools::SetElements(const Tensor& v, const vector<float>& vec) {
 74 | #if HAVE_CUDA
 75 |   cudaMemcpyAsync(v.v, &vec[0], sizeof(real) * vec.size(), cudaMemcpyHostToDevice);
 76 | #else
 77 |   memcpy(v.v, &vec[0], sizeof(real) * vec.size());
 78 | #endif
 79 | }
 80 | 
 81 | void TensorTools::CopyElements(const Tensor& v, const Tensor& v_src) {
 82 | #if HAVE_CUDA
 83 |   cudaMemcpyAsync(v.v, v_src.v, sizeof(real) * v.d.size(), cudaMemcpyDeviceToDevice);
 84 | #else
 85 |   memcpy(v.v, v_src.v, sizeof(real) * v.d.size());
 86 | #endif
 87 | }
 88 | 
 89 | void TensorTools::Constant(Tensor& d, float c) {
 90 | #if HAVE_CUDA
 91 |   if (!c) {
 92 |     CUDA_CHECK(cudaMemsetAsync(d.v, 0, d.d.size() * sizeof(float)));
 93 |   } else {
 94 |     fill(d.v, d.v + d.d.size(), c);
 95 |   }
 96 | #else
 97 |   if (!c) {
 98 |     memset(d.v, c, d.d.size() * sizeof(float));
 99 |   } else {
100 |     fill(d.v, d.v + d.d.size(), c);
101 |   }
102 | #endif
103 | }
104 | 
105 | void TensorTools::Zero(Tensor& d) {
106 |   Constant(d, 0);
107 | }
108 | 
109 | void TensorTools::Randomize(Tensor& val, real scale) {
110 |   uniform_real_distribution<real> distribution(-scale,scale);
111 |   auto b = [&] {return distribution(*rndeng);};
112 | #if HAVE_CUDA
113 |   float* t = new float[val.d.size()];
114 |   generate(t, t + val.d.size(), b);
115 |   CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice));
116 |   delete[] t;
117 | #else
118 |   generate(val.v, val.v + val.d.size(), b);
119 | #endif
120 | }
121 | 
122 | void TensorTools::Randomize(Tensor& d) {
123 |   Randomize(d, sqrt(6) / sqrt(d.d.sum_dims()));
124 | }
125 | 
126 | void TensorTools::RandomBernoulli(Tensor& val, real p, real scale) {
127 |   bernoulli_distribution distribution(p);
128 |   auto b = [&] {return distribution(*rndeng) * scale;};
129 | #if HAVE_CUDA
130 |   float* t = new float[val.d.size()];
131 |   generate(t, t + val.d.size(), b);
132 |   CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice));
133 |   delete[] t;
134 | #else
135 |   generate(val.v, val.v + val.d.size(), b);
136 | #endif
137 | }
138 | 
139 | void TensorTools::RandomizeNormal(real mean, real stddev, Tensor& val) {
140 |   normal_distribution<real> distribution(mean, stddev);
141 |   auto b = [&] {return distribution(*rndeng);};
142 | #if HAVE_CUDA
143 |   float* t = new float[val.d.size()];
144 |   generate(t, t + val.d.size(), b);
145 |   CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice));
146 |   delete[] t;
147 | #else
148 |   generate(val.v, val.v + val.d.size(), b);
149 | #endif
150 | }
151 | 
152 | real rand01() {
153 |   uniform_real_distribution<real> distribution(0, 1);
154 |   return distribution(*rndeng);
155 | }
156 | 
157 | int rand0n(int n) {
158 |   assert(n > 0);
159 |   int x = rand01() * n;
160 |   while(n == x) { x = rand01() * n; }
161 |   return x;
162 | }
163 | 
164 | real rand_normal() {
165 |   normal_distribution<real> distribution(0, 1);
166 |   return distribution(*rndeng);
167 | }
168 | 
169 | } // namespace cnn
170 | 


--------------------------------------------------------------------------------
/cnn/tests/test_init.cc:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_DYN_LINK
 2 | #define BOOST_TEST_MODULE "CNNInit"
 3 | #include <boost/test/unit_test.hpp>
 4 | 
 5 | #include <vector>
 6 | 
 7 | #include "cnn/tests/test_utils.h"
 8 | #include "cnn/tensor.h"
 9 | #include "cnn/saxe-init.h"
10 | 
11 | using namespace std;
12 | using namespace cnn;
13 | 
14 | BOOST_GLOBAL_FIXTURE(TestTensorSetup)
15 | 
16 | BOOST_AUTO_TEST_CASE(EOrthonormalRandom)
17 | {
18 |   for (int d = 4; d < 128; d += 2) {
19 |     Tensor Q = OrthonormalRandom(d, 1.0);
20 | //    BOOST_REQUIRE_EQUAL(size(Q), Dim({d,d}));
21 | 
22 |     // check that this is actually returning orthogonal matrices
23 | #if MINERVA_BACKEND
24 |     Tensor I = Q.Trans() * Q;
25 | #endif
26 | #if THPP_BACKEND
27 |     Tensor QT = Q;
28 |     QT.transpose();
29 |     //cerr << str(Q) << endl << str(QT) << endl;
30 |     Tensor I = Zero({d,d});
31 |     I.addmm(0, 1, Q, QT);
32 |     //cerr << str(I) << endl;
33 | #endif
34 | #if EIGEN_BACKEND
35 |     Tensor I = Q.transpose() * Q;
36 | #endif
37 |     double eps = 1e-1;
38 |     for (int i = 0; i < d; ++i)
39 |       for (int j = 0; j < d; ++j)
40 |         BOOST_CHECK_CLOSE(t(I,i,j) + 1., (i == j ? 2. : 1.), eps);
41 |   }
42 |   cerr << "Finished\n";
43 | }
44 | 
45 | BOOST_AUTO_TEST_CASE(BernoulliInit) {
46 |   Tensor r = RandomBernoulli(Dim({1000,1000}), 0.5f);
47 |   int tot = 0;
48 |   for (int i = 0; i < 1000; ++i)
49 |     for (int j = 0; j < 1000; ++j)
50 |       if (t(r,i,j)) ++tot;
51 |   BOOST_CHECK_GT(tot, 490000);
52 |   BOOST_CHECK_LT(tot, 510000);
53 | }
54 | 
55 | BOOST_AUTO_TEST_CASE(Rand01) {
56 |   cnn::real tot = 0;
57 |   for (unsigned i = 0; i < 1000000; ++i)
58 |     tot += cnn::rand01();
59 |   BOOST_CHECK_GT(tot, 490000.);
60 |   BOOST_CHECK_LT(tot, 510000.);
61 | }
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/cnn/tests/test_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef CNN_TEST_UTILS_H_
 2 | #define CNN_TEST_UTILS_H_
 3 | 
 4 | #include "cnn/tensor.h"
 5 | 
 6 | namespace cnn {
 7 | 
 8 | #if WITH_MINERVA_BACKEND
 9 | 
10 | struct TestTensorSetup {
11 |   TestTensorSetup() {
12 |     int argc = 1;
13 |     char* foo = "foo";
14 |     char** argv = {&foo};
15 |     minerva::MinervaSystem::Initialize(&argc, &argv);
16 | #if HAS_CUDA
17 |     minerva::MinervaSystem::Instance().device_manager().CreateGpuDevice(0);
18 | #else
19 |     minerva::MinervaSystem::Instance().device_manager().CreateCpuDevice();
20 | #endif
21 |   }
22 | };
23 | 
24 | double t(const Tensor& T, unsigned i, unsigned j) {
25 |   int m = T.Size(0);
26 |   return T.Get().get()[j * m + i];
27 | }
28 | 
29 | std::ostream& operator<<(std::ostream& os, const Tensor& T) {
30 |   if (T.Size().NumDims() == 2) {
31 |     int m = T.Size(0);
32 |     int n = T.Size(1);
33 |     for (int i = 0; i < m; ++i) {
34 |       for (int j = 0; j < n; ++j) {
35 |         os << '\t' << t(T,i,j);
36 |       }
37 |       os << std::endl;
38 |     }
39 |     return os;
40 |   } else {
41 |     os << T.Size() << ": ";
42 |     minerva::FileFormat ff; ff.binary = false;
43 |     T.ToStream(os, ff);
44 |     return os;
45 |   }
46 | }
47 | 
48 | #else
49 | 
50 | struct TestTensorSetup {
51 |   TestTensorSetup() {
52 |     int argc = 1;
53 |     char* p = "foo";
54 |     char** argv = {&p};
55 |     cnn::Initialize(argc, argv);
56 |   }
57 | };
58 | 
59 | double t(const Tensor& T, unsigned i, unsigned j) {
60 | #if WITH_THPP_BACKEND
61 |   return T.at({i,j});
62 | #else
63 |   return T(i, j);
64 | #endif
65 | }
66 | 
67 | double t(const Tensor& T, unsigned i) {
68 | #if WITH_THPP_BACKEND
69 |   return T.at({i});
70 | #else
71 |   return T(i, 0);
72 | #endif
73 | }
74 | 
75 | #endif
76 | 
77 | } // namespace cnn
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/cnn/timing.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TIMING_H_
 2 | #define _TIMING_H_
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <chrono>
 7 | 
 8 | namespace cnn {
 9 | 
10 | struct Timer {
11 |   Timer(const std::string& msg) : msg(msg), start(std::chrono::high_resolution_clock::now()) {}
12 |   ~Timer() {
13 |     auto stop = std::chrono::high_resolution_clock::now();
14 |     std::cerr << '[' << msg << ' ' << std::chrono::duration<double, std::milli>(stop-start).count() << " ms]\n";
15 |   }
16 |   std::string msg;
17 |   std::chrono::high_resolution_clock::time_point start;
18 | };
19 | 
20 | } // namespace cnn
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/cnn/training.h:
--------------------------------------------------------------------------------
  1 | #ifndef CNN_TRAINING_H_
  2 | #define CNN_TRAINING_H_
  3 | 
  4 | #include <vector>
  5 | #include "cnn/model.h"
  6 | #include "cnn/shadow-params.h"
  7 | 
  8 | namespace cnn {
  9 | 
 10 | struct Trainer {
 11 |   explicit Trainer(Model* m, real lam, real e0) :
 12 |     eta0(e0), eta(e0), eta_decay(), epoch(), lambda(lam), clipping_enabled(true), clip_threshold(5), clips(), updates(), model(m) {}
 13 |   virtual ~Trainer();
 14 | 
 15 |   virtual void update(real scale = 1.0) = 0;
 16 |   void update_epoch(real r = 1) {
 17 |     epoch += r;
 18 |     eta = eta0 / (1 + epoch * eta_decay);
 19 |   }
 20 | 
 21 |   // if clipping is enabled and the gradient is too big, return the amount to
 22 |   // scale the gradient by (otherwise 1)
 23 |   float clip_gradients();
 24 | 
 25 |   // learning rates
 26 |   real eta0;
 27 |   real eta;
 28 |   real eta_decay;
 29 |   real epoch;
 30 | 
 31 |   real lambda; // weight regularization (l2)
 32 | 
 33 |   // clipping
 34 |   real clipping_enabled;
 35 |   real clip_threshold;
 36 |   real clips;
 37 |   real updates;
 38 | 
 39 |   void status() {
 40 |     std::cerr << "[epoch=" << epoch << " eta=" << eta << " clips=" << clips << " updates=" << updates << "] ";
 41 |     updates = clips = 0;
 42 |   }
 43 | 
 44 |   Model* model;  // parameters and gradients live here
 45 | };
 46 | 
 47 | struct SimpleSGDTrainer : public Trainer {
 48 |   explicit SimpleSGDTrainer(Model* m, real lam = 1e-6, real e0 = 0.1) : Trainer(m, lam, e0) {}
 49 |   void update(real scale) override;
 50 |   void update(const std::vector<LookupParameters*> &lookup_params, const std::vector<Parameters*> &params, real scale = 1);
 51 | };
 52 | 
 53 | struct MomentumSGDTrainer : public Trainer {
 54 |   explicit MomentumSGDTrainer(Model* m, real lam = 1e-6, real e0 = 0.01, real mom = 0.9) :
 55 |     Trainer(m, lam, e0), momentum(mom), velocity_allocated(false) {}
 56 |   void update(real scale) override;
 57 | 
 58 |   real momentum;
 59 | 
 60 |   bool velocity_allocated;
 61 | 
 62 |   // the following represent the current velocity
 63 |   std::vector<ShadowParameters> vp;
 64 |   std::vector<ShadowLookupParameters> vlp;
 65 |   //std::unordered_map<Parameters*, Tensor> vp;
 66 |   //std::unordered_map<LookupParameters*, std::unordered_map<unsigned, Tensor>> vl;
 67 | };
 68 | 
 69 | struct AdagradTrainer : public Trainer {
 70 |   explicit AdagradTrainer(Model* m, real lam = 1e-6, real e0 = 0.1, real eps = 1e-20) :
 71 |     Trainer(m, lam, e0), epsilon(eps), shadow_params_allocated(false) {}
 72 |   void update(real scale) override;
 73 | 
 74 |   real epsilon;
 75 |   bool shadow_params_allocated;
 76 |   std::vector<ShadowParameters> vp;
 77 |   std::vector<ShadowLookupParameters> vlp;
 78 | };
 79 | 
 80 | struct AdadeltaTrainer : public Trainer {
 81 |   explicit AdadeltaTrainer(Model* m, real lam = 1e-6, real eps = 1e-6, real rho = 0.95) :
 82 |     Trainer(m, lam, 1.0), epsilon(eps), rho(rho), shadow_params_allocated(false) {}
 83 |   void update(real scale) override;
 84 | 
 85 |   real epsilon;
 86 |   real rho;
 87 |   bool shadow_params_allocated;
 88 |   std::vector<ShadowParameters> hg; // History of gradients
 89 |   std::vector<ShadowLookupParameters> hlg;
 90 |   std::vector<ShadowParameters> hd; // History of deltas
 91 |   std::vector<ShadowLookupParameters> hld;
 92 | };
 93 | 
 94 | struct RmsPropTrainer : public Trainer {
 95 |   explicit RmsPropTrainer(Model* m, real lam = 1e-6, real e0 = 0.1, real eps = 1e-20, real rho = 0.95) :
 96 |     Trainer(m, lam, e0), epsilon(eps), rho(rho), shadow_params_allocated(false) {}
 97 |   void update(real scale) override;
 98 | 
 99 |   real epsilon;
100 |   real rho;
101 |   bool shadow_params_allocated;
102 |   std::vector<real> hg; // History of gradients
103 |   std::vector<std::vector<real> > hlg;
104 | };
105 | 
106 | struct AdamTrainer : public Trainer {
107 |   explicit AdamTrainer(Model* m, float lambda = 1e-6, float alpha = 0.001, float beta_1 = 0.9, float beta_2 = 0.999, float eps = 1e-8) :
108 |     Trainer(m, lambda, alpha), beta_1(beta_1), beta_2(beta_2), eps(eps), shadow_params_allocated(false) {}
109 | 
110 |   void update(real scale) override;
111 | 
112 |   float beta_1;
113 |   float beta_2;
114 |   float eps;
115 |   bool shadow_params_allocated;
116 |   std::vector<ShadowParameters> m; // History of gradients
117 |   std::vector<ShadowLookupParameters> lm;
118 |   std::vector<ShadowParameters> v; // History of deltas
119 |   std::vector<ShadowLookupParameters> lv;
120 | };
121 | 
122 | } // namespace cnn
123 | 
124 | #endif
125 | 


--------------------------------------------------------------------------------
/config.h.cmake:
--------------------------------------------------------------------------------
1 | #ifndef CNN_CONFIG_H_
2 | #define CNN_CONFIG_H_
3 | 
4 | #cmakedefine WITH_MINERVA_BACKEND @WITH_MINERVA_BACKEND@
5 | #cmakedefine WITH_THPP_BACKEND @WITH_THPP_BACKEND@
6 | #cmakedefine WITH_EIGEN_BACKEND @WITH_EIGEN_BACKEND@
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 2 | 
 3 | foreach(TARGET mlc tok-embed segrnn-sup poisson-regression tag-bilstm embed-cl encdec xor xor-xent xor-batch xor-batch-lookup rnnlm rnnlm-aevb rnnlm-cfsm rnnlm-batch nlm textcat rnnlm2 rnnlm-mp read-write)
 4 |   ADD_EXECUTABLE(${TARGET} ${TARGET}.cc)
 5 |   target_link_libraries(${TARGET} cnn ${LIBS} pthread)
 6 |   if(UNIX AND NOT APPLE)
 7 |     target_link_libraries(${TARGET} rt)
 8 |   endif()
 9 |   if (WITH_CUDA_BACKEND)
10 |     add_dependencies(${TARGET} cnncuda)
11 |     target_link_libraries(${TARGET} cnncuda)
12 |     CUDA_ADD_CUBLAS_TO_TARGET(${TARGET})
13 |   endif (WITH_CUDA_BACKEND)
14 | endforeach()
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/example-data/dev-hsm.txt:
--------------------------------------------------------------------------------
 1 | <s> i think this is wrong . can you check it ? </s>
 2 | <s> fine . </s>
 3 | <s> i went back to my hotel room and found a <unk> here . </s>
 4 | <s> it would be one hundred sixty- eight dollars per night for a twin , with tax and service charge . </s>
 5 | <s> i would like to sit down and take a rest for a little while . </s>
 6 | <s> i am <unk> . </s>
 7 | <s> number three . </s>
 8 | <s> who are you ? </s>
 9 | <s> which is the biggest department store in this city ? </s>
10 | <s> can i see your ticket , sir ? </s>
11 | 


--------------------------------------------------------------------------------
/examples/example-data/dev-poi.txt:
--------------------------------------------------------------------------------
1 | <s> i 'll take it . </s> ||| 5
2 | <s> we want to have a table near the window . </s> ||| 10
3 | <s> how can i help you ? </s> ||| 6
4 | <s> would you , please ? </s> ||| 5
5 | 


--------------------------------------------------------------------------------
/examples/example-data/fin-toy.txt:
--------------------------------------------------------------------------------
 1 | <s> 21 11 7 6 7 22 22 1 24 1 21 22 11 </s>
 2 | <s> 21 3 13 21 3 14 3 11 21 14 7 10 22 11 7 16 </s>
 3 | <s> 24 3 14 11 22 22 3 24 3 21 22 3 </s>
 4 | <s> 12 17 10 3 16 16 3 3 </s>
 5 | <s> 14 3 11 15 7 16 22 3 15 11 21 7 13 21 11 </s>
 6 | <s> 20 23 23 13 11 16 13 23 12 3 14 14 3 </s>
 7 | <s> 27 6 11 16 24 3 13 3 24 3 20 3 11 21 23 23 6 7 16 </s>
 8 | <s> 8 17 14 13 10 1 14 21 3 16 11 16 </s>
 9 | <s> 27 17 11 9 17 3 </s>
10 | <s> 20 3 21 11 22 22 11 24 3 22 </s>
11 | <s> 14 3 23 14 3 15 11 16 7 16 </s>
12 | <s> 22 23 17 10 11 14 3 15 15 7 14 22 3 </s>
13 | <s> 10 7 14 21 11 16 13 11 14 1 11 21 13 7 11 14 3 3 12 3 </s>
14 | <s> 15 3 13 21 3 16 7 11 6 7 16 </s>
15 | <s> 21 23 18 7 20 27 13 21 11 13 2 16 </s>
16 | <s> 21 27 27 22 22 1 12 1 21 22 1 </s>
17 | <s> 18 1 11 10 6 7 17 16 9 7 14 15 11 11 16 </s>
18 | <s> 21 11 11 22 17 21 7 16 </s>
19 | <s> 13 11 10 3 20 17 11 22 3 </s>
20 | <s> 22 23 10 13 3 11 21 7 16 </s>
21 | <s> 20 23 15 3 3 16 </s>
22 | <s> 21 23 15 15 11 21 21 3 </s>
23 | <s> 18 1 11 10 6 7 3 21 11 17 11 21 22 3 </s>
24 | <s> 7 21 11 17 22 22 7 14 23 11 21 21 3 3 16 </s>
25 | <s> 24 3 11 24 3 23 22 23 16 23 22 22 3 </s>
26 | <s> 15 7 16 11 13 2 </s>
27 | <s> 4 3 5 13 7 11 14 14 3 </s>
28 | <s> 15 11 9 20 7 7 16 11 16 </s>
29 | <s> 20 3 13 13 3 23 21 21 23 10 22 7 11 21 21 3 </s>
30 | <s> 17 4 12 7 13 22 11 21 22 3 </s>
31 | <s> 21 17 21 11 3 14 11 21 22 11 18 20 7 21 11 6 7 16 22 22 11 </s>
32 | <s> 22 27 10 12 11 7 16 </s>
33 | <s> 21 10 7 16 28 10 7 16 11 14 1 11 16 7 16 </s>
34 | <s> 3 14 3 13 3 21 22 11 11 16 </s>
35 | <s> 10 3 23 21 13 3 16 18 11 22 17 3 </s>
36 | <s> 15 23 17 22 22 11 11 16 </s>
37 | <s> 3 20 17 15 3 3 22 22 11 21 7 22 </s>
38 | <s> 12 1 20 24 7 16 22 11 7 </s>
39 | <s> 17 15 11 21 22 7 11 21 7 14 14 7 </s>
40 | <s> 21 23 24 3 11 22 21 7 24 3 11 16 7 16 </s>
41 | <s> 10 7 20 13 11 21 22 27 11 16 </s>
42 | <s> 22 17 11 15 7 22 22 17 15 3 16 3 </s>
43 | <s> 22 3 13 7 20 22 23 23 </s>
44 | <s> 22 23 17 22 3 16 22 17 24 3 11 10 7 7 21 21 3 </s>
45 | <s> 24 3 18 3 3 13 3 18 18 3 14 7 17 11 13 7 23 21 </s>
46 | <s> 22 11 7 22 7 7 16 22 7 13 11 12 1 22 </s>
47 | <s> 15 3 23 14 3 24 11 20 22 3 </s>
48 | <s> 27 2 18 3 13 13 3 21 11 3 </s>
49 | <s> 18 7 14 11 3 12 3 21 21 3 </s>
50 | <s> 15 1 16 16 11 13 2 11 21 22 1 </s>
51 | 


--------------------------------------------------------------------------------
/examples/example-data/seg-sup.dev.txt:
--------------------------------------------------------------------------------
1 | a a 0 1 a ||| O:1 O:1 N:2 O:1
2 | x y 1 a ||| O:1 O:1 N:1 O:1
3 | a a a ||| O:1 O:1 O:1
4 | 1 0 1 a ||| N:3 O:1
5 | a 1 a 1 ||| O:1 N:1 O:1 N:1
6 | 0 0 a a 0 0 a a ||| N:2 O:1 O:1 N:2 O:1 O:1
7 | 


--------------------------------------------------------------------------------
/examples/example-data/textcat.txt:
--------------------------------------------------------------------------------
1 | what is your name ? ||| Q
2 | where do you live ? ||| Q
3 | i live in pittsburgh . ||| D
4 | my name is bond . ||| D
5 | wtf ? she asked . ||| D
6 | did she say " yes . " ? ||| Q
7 | 


--------------------------------------------------------------------------------
/examples/mlc.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/timing.h"
  2 | #include "cnn/nodes.h"
  3 | #include "cnn/cnn.h"
  4 | #include "cnn/training.h"
  5 | #include "cnn/expr.h"
  6 | #include "cnn/grad-check.h"
  7 | 
  8 | #include <sstream>
  9 | #include <string>
 10 | #include <cstdlib>
 11 | #include <iostream>
 12 | #include <fstream>
 13 | 
 14 | using namespace std;
 15 | using namespace cnn;
 16 | using namespace cnn::expr;
 17 | 
 18 | struct TrainingInstance {
 19 |   TrainingInstance() {}
 20 |   TrainingInstance(const vector<pair<unsigned,float>>& x, const vector<unsigned>& y) : feats(x), labels(y) {}
 21 |   vector<pair<unsigned,float>> feats;  // sparse representation of x vector
 22 |   vector<unsigned> labels;  // labels
 23 | };
 24 | 
 25 | void swap(TrainingInstance& a, TrainingInstance& b) {
 26 |   using std::swap;
 27 |   swap(a.feats, b.feats);
 28 |   swap(a.labels, b.labels);
 29 | }
 30 | 
 31 | // X: 0 864:0.0497399 1523:0.0446641 1681:0.0673872 2293:0.0718105 2845:0.0657134 2867:0.0653402 3240:0.0795168 4125:0.0423215 4271:0.0691369 4665:0.0500863 5216:0.252185 5573:0.0672562 5699:0.0594998 5794:0.0737821 6222:0.124501 6592:0.101431 7227:0.194091 7975:0.0766401
 32 | // Y: 0 35:1 60:1 94:1 95:1 103:1
 33 | vector<TrainingInstance> ReadFiles(const char* xfname, const char* yfname, unsigned& maxfeat, unsigned& maxlabel) {
 34 |   maxfeat = 0;
 35 |   maxlabel = 0;
 36 |   vector<TrainingInstance> d;
 37 |   ifstream inx(xfname);
 38 |   assert(inx);
 39 |   ifstream iny(yfname);
 40 |   assert(iny);
 41 |   string linex, liney;
 42 |   string tok;
 43 |   while(getline(inx,linex)) {
 44 |     getline(iny,liney);
 45 | 
 46 |     vector<pair<unsigned,float>> v;
 47 |     istringstream isx(linex);
 48 |     isx >> tok;
 49 |     assert(tok == "0");
 50 |     while (isx) {
 51 |       isx >> tok;
 52 |       if (!isx) break;
 53 |       size_t pos = tok.find(':');
 54 |       assert(pos != string::npos);
 55 |       tok[pos] = 0;
 56 |       unsigned fi = atoi(&tok[0]);
 57 |       if (fi > maxfeat) maxfeat = fi;
 58 |       float fv = strtof(&tok[pos+1], 0);
 59 |       v.emplace_back(make_pair(fi, fv));
 60 |     }
 61 |     vector<unsigned> y;
 62 |     istringstream isy(liney);
 63 |     isy >> tok;
 64 |     assert(tok == "0");
 65 |     while (isy) {
 66 |       isy >> tok;
 67 |       if (!isy) break;
 68 |       size_t pos = tok.find(':');
 69 |       assert(pos != string::npos);
 70 |       tok[pos] = 0;
 71 |       unsigned yi = atoi(&tok[0]);
 72 |       if (yi > maxlabel) maxlabel = yi;
 73 |       y.push_back(yi);
 74 |     }
 75 |     d.emplace_back(v, y);
 76 |   }
 77 |   return d;
 78 | }
 79 | 
 80 | struct MLCBuilder {
 81 |   explicit MLCBuilder(Model& m, unsigned nfeats, unsigned labels) {
 82 |     unsigned HIDDEN_SIZE = 200;
 83 |     p_xe = m.add_lookup_parameters(nfeats, {HIDDEN_SIZE});
 84 |     p_bh = m.add_parameters({HIDDEN_SIZE});
 85 |     p_h2y = m.add_parameters({labels, HIDDEN_SIZE});
 86 |     p_by = m.add_parameters({labels});
 87 |   }
 88 |   // output will be a vector of scores that can be 'softmaxed' or 'sparsemaxed'
 89 |   // into a probability distribution, or it can be compared with a target
 90 |   // distribution and a loss will be computed
 91 |   Expression BuildPredictionScores(ComputationGraph& cg, const vector<pair<unsigned,float>>& feats) const {
 92 |     vector<Expression> fe(feats.size() + 1);
 93 |     unsigned fi = 0;
 94 |     for (auto& xi : feats) {
 95 |       fe[fi++] = lookup(cg, p_xe, xi.first) * xi.second;  // xi.second is the input feature value
 96 |     }
 97 |     fe[fi] = parameter(cg, p_bh); // put bias term at the end
 98 |     Expression h = tanh(sum(fe));
 99 |     Expression h2y = parameter(cg, p_h2y);
100 |     Expression by = parameter(cg, p_by);
101 |     return affine_transform({by, h2y, h});
102 |   }
103 |   LookupParameters* p_xe;
104 |   Parameters* p_bh;
105 |   Parameters* p_h2y;
106 |   Parameters* p_by;
107 | };
108 | 
109 | int main(int argc, char** argv) {
110 |   cnn::Initialize(argc, argv);
111 | 
112 |   if (argc != 5) {
113 |     cerr << "Usage: " << argv[0] << " x.train.txt y.train.txt x.dev.txt y.dev.txt\n";
114 |     return 1;
115 |   }
116 |   vector<TrainingInstance> train, dev;
117 |   unsigned max_xi, max_yi, dxi, dyi;
118 |   train = ReadFiles(argv[1], argv[2], max_xi, max_yi);
119 |   cerr << "Maximum feature index: " << max_xi << endl;
120 |   cerr << "Maximum label index: " << max_yi << endl;
121 |   dev = ReadFiles(argv[3], argv[4], dxi, dyi);
122 |   assert(dxi <= max_xi);
123 |   assert(dyi <= max_yi);
124 |   max_xi++;
125 |   max_yi++;
126 | 
127 |   // parameters
128 |   Model m;
129 |   MLCBuilder mlc(m, max_xi, max_yi);
130 | 
131 |   //AdadeltaTrainer sgd(&m);
132 |   SimpleSGDTrainer sgd(&m);
133 |   sgd.eta0 = 0.001;
134 |   sgd.eta = 0.001;
135 | 
136 |   unsigned report_every_i = 50;
137 |   unsigned si = train.size();
138 |   bool first = true;
139 |   vector<unsigned> order(train.size());
140 |   for (unsigned i = 0; i < order.size(); ++i) order[i] = i;
141 |   double ti = 0;
142 |   while(1) {
143 |     Timer iteration("completed in");
144 |     double loss = 0;
145 |     unsigned instances = 0;
146 |     for (unsigned i = 0; i < report_every_i; ++i) {
147 |       if (si == train.size()) {
148 |         si = 0;
149 |         if (first) { first = false; } else { sgd.update_epoch(); }
150 |         cerr << "**SHUFFLE\n";
151 |         shuffle(order.begin(), order.end(), *rndeng);
152 |       }
153 |       // build graph for this instance
154 |       ComputationGraph cg;
155 |       auto& xy = train[order[si]];
156 |       ++si;
157 |       ++instances;
158 |       ++ti;
159 |       Expression u = mlc.BuildPredictionScores(cg, xy.feats);
160 | 
161 |       if (rand01() < 0.004) {
162 |         sparsemax(u * 1.5);  // this increases sparsity at test time, which Andre found the be useful
163 |         vector<float> p = as_vector(cg.incremental_forward());
164 |         for (unsigned j = 0; j < p.size(); ++j)
165 |           if (p[j] > 0) cerr << j << ' ';
166 |         cerr << " |||";
167 |         for (auto y : xy.labels)
168 |           cerr << ' ' << y;
169 |         cerr << endl;
170 |       }
171 |       sparsemax_loss(u, &xy.labels);
172 |       loss += as_scalar(cg.forward());
173 |       cg.backward();
174 |       sgd.update(1.0);
175 |     }
176 |     cerr << "[epoch=" << (ti / train.size()) << "] E=" << (loss / instances) << ' ';
177 |   }
178 | }
179 | 
180 | 


--------------------------------------------------------------------------------
/examples/nlm.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/training.h"
 4 | #include "cnn/timing.h"
 5 | #include "cnn/expr.h"
 6 | 
 7 | #include <iostream>
 8 | #include <fstream>
 9 | #include <sstream>
10 | #include <algorithm>
11 | 
12 | using namespace std;
13 | using namespace cnn;
14 | using namespace cnn::expr;
15 | 
16 | int main(int argc, char** argv) {
17 |   cnn::Initialize(argc, argv);
18 | 
19 |   unsigned CONTEXT = 3;
20 |   unsigned DIM = 100;
21 |   unsigned VOCAB_SIZE = 29;
22 | 
23 |   // parameters
24 |   Model model;
25 |   SimpleSGDTrainer sgd(&model);
26 |   LookupParameters* p_c = model.add_lookup_parameters(VOCAB_SIZE, {DIM});
27 | 
28 |   ComputationGraph cg;
29 | 
30 |   vector<unsigned> in_c(CONTEXT); // set these to set the context words
31 |   vector<Expression> c(CONTEXT);
32 |   for (int i=0; i<CONTEXT; ++i)
33 |     c[i] = lookup(cg, p_c, &in_c[i]);
34 | 
35 |   Expression C = parameter(cg, model.add_parameters({DIM, DIM*CONTEXT}));
36 |   Expression hb = parameter(cg, model.add_parameters({DIM}));
37 |   Expression R = parameter(cg, model.add_parameters({VOCAB_SIZE, DIM}));
38 |   unsigned ytrue;  // set ytrue to change the value of the input
39 |   Expression bias = parameter(cg, model.add_parameters({VOCAB_SIZE}));
40 | 
41 |   Expression cc = concatenate(c);
42 |   Expression r = hb + C * cc;
43 |   Expression nl = rectify(r);
44 |   Expression o2 = bias + R * nl;
45 |   Expression ydist = log_softmax(o2);
46 |   Expression nerr = -pick(ydist, &ytrue);
47 |   cg.PrintGraphviz();
48 | 
49 |   // load some training data
50 |   if (argc != 2) {
51 |     cerr << "Usage: " << argv[0] << " ngrams.txt\n";
52 |     return 1;
53 |   }
54 |   ifstream in(argv[1]);
55 |   vector<vector<unsigned>> corpus;
56 |   string line;
57 |   while(getline(in, line)) {
58 |     istringstream is(line);
59 |     vector<unsigned> x(CONTEXT+1);
60 |     for (unsigned i = 0; i <= CONTEXT; ++i) {
61 |       is >> x[i];
62 |       assert(x[i] < VOCAB_SIZE);
63 |     }
64 |     corpus.push_back(x);
65 |   }
66 | 
67 | 
68 |   // train the parameters
69 |   for (unsigned iter = 0; iter < 100; ++iter) {
70 |     Timer iteration("epoch completed in");
71 |     double loss = 0;
72 |     unsigned n = 0;
73 |     for (auto& ci : corpus) {
74 |       copy(ci.begin(), ci.begin()+CONTEXT, in_c.begin());
75 |       ytrue  = ci.back();
76 |       loss += as_scalar(cg.forward());
77 |       cg.backward();
78 |       ++n;
79 |       sgd.update(1.0);
80 |       if (n == 2500) break;
81 |     }
82 |     loss /= n;
83 |     cerr << "E = " << loss << ' ';
84 |   }
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/examples/read-write.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/training.h"
  4 | #include "cnn/gpu-ops.h"
  5 | #include "cnn/expr.h"
  6 | #include <boost/archive/text_oarchive.hpp>
  7 | #include <boost/archive/text_iarchive.hpp>
  8 | 
  9 | #include <iostream>
 10 | #include <fstream>
 11 | 
 12 | using namespace std;
 13 | using namespace cnn;
 14 | using namespace cnn::expr;
 15 | 
 16 | 
 17 | // This is a sample class which implements the xor model from xor.cc
 18 | // Everything in this class is just as you would do the usual except for
 19 | // parts with provided comments.
 20 | class XORModel {
 21 | public:
 22 |   unsigned hidden_size;
 23 | 
 24 |   Expression W, b, V, a;
 25 |   Parameters *pW, *pb, *pV, *pa;
 26 | 
 27 |   // It is important to have a null default constructor for the class, as
 28 |   // we would first need to read the class object from the file, followed by
 29 |   // the cnn model which has saved parameters.
 30 |   XORModel() {}
 31 | 
 32 |   XORModel(const unsigned& hidden_len, Model *m) {
 33 |     hidden_size = hidden_len;
 34 |     InitParams(m);
 35 |   }
 36 | 
 37 |   void InitParams(Model *m) {
 38 |     pW = m->add_parameters({hidden_size, 2});
 39 |     pb = m->add_parameters({hidden_size});
 40 |     pV = m->add_parameters({1, hidden_size});
 41 |     pa = m->add_parameters({1});
 42 |   }
 43 | 
 44 |   void AddParamsToCG(ComputationGraph *cg) {
 45 |     W = parameter(*cg, pW);
 46 |     b = parameter(*cg, pb);
 47 |     V = parameter(*cg, pV);
 48 |     a = parameter(*cg, pa);
 49 |   }
 50 | 
 51 |   float Train(vector<cnn::real> &input, cnn::real &gold_output,
 52 |               SimpleSGDTrainer *sgd) {
 53 |     ComputationGraph cg;
 54 |     AddParamsToCG(&cg);
 55 | 
 56 |     Expression x = cnn::expr::input(cg, {(unsigned int)input.size()}, &input);
 57 |     Expression y = cnn::expr::input(cg, &gold_output);
 58 | 
 59 |     Expression h = tanh(W*x + b);
 60 |     Expression y_pred = V*h + a;
 61 |     Expression loss = squared_distance(y_pred, y);
 62 |     float return_loss = as_scalar(cg.forward());
 63 |     cg.backward();
 64 |     sgd->update(1.0);
 65 |     return return_loss;
 66 |   }
 67 | 
 68 |   float Decode(vector<cnn::real> &input) {
 69 |     ComputationGraph cg;
 70 |     AddParamsToCG(&cg);
 71 | 
 72 |     Expression x = cnn::expr::input(cg, {(unsigned int)input.size()}, &input);
 73 |     Expression h = tanh(W*x + b);
 74 |     Expression y_pred = V*h + a;
 75 |     return as_scalar(cg.forward());
 76 |   }
 77 | 
 78 |   // This function should save all those variables in the archive, which
 79 |   // determine the size of other members of the class, here: hidden_size
 80 |   friend class boost::serialization::access;
 81 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 82 | 
 83 |     // This can either save or read the value of hidden_size from ar,
 84 |     // depending on whether its the output or input archive.
 85 |     ar & hidden_size;
 86 |   }
 87 | };
 88 | 
 89 | void WriteToFile(string& filename, XORModel &model, Model &cnn_model) {
 90 |   ofstream outfile(filename);
 91 |   if (!outfile.is_open()) {
 92 |     cerr << "File opening failed" << endl;
 93 |   }
 94 | 
 95 |   boost::archive::text_oarchive oa(outfile);
 96 |   oa & model;  // Write down your class object.
 97 |   oa & cnn_model;  // Write down the cnn::Model object.
 98 |   outfile.close();
 99 | }
100 | 
101 | void ReadFromFile(string& filename, XORModel *model, Model *cnn_model) {
102 |   ifstream infile(filename);
103 |   if (!infile.is_open()) {
104 |     cerr << "File opening failed" << endl;
105 |   }
106 | 
107 |   boost::archive::text_iarchive ia(infile);
108 |   ia & *model;  // Read your class object
109 | 
110 |   // Now determine structure of cnn::Model depending on the
111 |   // the structure of your class object
112 |   model->InitParams(cnn_model);
113 |   ia & *cnn_model;  // Read the cnn::Model
114 | 
115 |   infile.close();
116 | }
117 | 
118 | 
119 | int main(int argc, char** argv) {
120 |   cnn::Initialize(argc, argv);
121 | 
122 |   const unsigned HIDDEN = 8;
123 |   const unsigned ITERATIONS = 20;
124 |   Model m;
125 |   SimpleSGDTrainer sgd(&m);
126 |   XORModel model(HIDDEN, &m);
127 | 
128 |   vector<cnn::real> x_values(2);  // set x_values to change the inputs
129 |   cnn::real y_value;  // set y_value to change the target output
130 | 
131 |   // Train the model
132 |   for (unsigned iter = 0; iter < ITERATIONS; ++iter) {
133 |     double loss = 0;
134 |     for (unsigned mi = 0; mi < 4; ++mi) {
135 |       bool x1 = mi % 2;
136 |       bool x2 = (mi / 2) % 2;
137 |       x_values[0] = x1 ? 1 : -1;
138 |       x_values[1] = x2 ? 1 : -1;
139 |       y_value = (x1 != x2) ? 1 : -1;
140 |       loss += model.Train(x_values, y_value, &sgd);
141 |     }
142 |     loss /= 4;
143 |     cerr << "E = " << loss << endl;
144 |   }
145 | 
146 |   string outfile = "out.txt";
147 |   cerr << "Written model to File: " << outfile << endl;
148 |   WriteToFile(outfile, model, m);  // Writing objects to file
149 | 
150 |   // New objects in which the written archive will be read
151 |   Model read_cnn_model;
152 |   XORModel read_model;
153 | 
154 |   cerr << "Reading model from File: " << outfile << endl;
155 |   ReadFromFile(outfile, &read_model, &read_cnn_model);  // Reading from file
156 |   cerr << "Output for the input: " << x_values[0] << " " << x_values[1] << endl;
157 |   cerr << read_model.Decode(x_values);  // Checking output for sanity
158 | }
159 | 
160 | 


--------------------------------------------------------------------------------
/examples/rnnlm-mp.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/cnn.h"
 2 | #include "cnn/training.h"
 3 | #include "cnn/expr.h"
 4 | #include "cnn/lstm.h"
 5 | #include "cnn/mp.h"
 6 | #include "rnnlm.h"
 7 | #include <boost/archive/text_oarchive.hpp>
 8 | #include <boost/archive/text_iarchive.hpp>
 9 | #include <boost/algorithm/string.hpp>
10 | 
11 | #include <iostream>
12 | #include <fstream>
13 | #include <vector>
14 | /*
15 | TODO:
16 | - The shadow params in the trainers need to be shared.
17 | */
18 | 
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 | using namespace cnn::mp;
23 | using namespace boost::interprocess;
24 | 
25 | typedef vector<int> Datum;
26 | 
27 | vector<Datum> ReadData(string filename) {
28 |   vector<Datum> data;
29 |   ifstream fs(filename);
30 |   if (!fs.is_open()) {
31 |     cerr << "ERROR: Unable to open " << filename << endl;
32 |     exit(1);
33 |   }
34 |   string line;
35 |   while (getline(fs, line)) {
36 |     data.push_back(ReadSentence(line, &d));
37 |   }
38 |   return data;
39 | }
40 | 
41 | template<class T, class D>
42 | class Learner : public ILearner<D, cnn::real> {
43 | public:
44 |   explicit Learner(RNNLanguageModel<T>& rnnlm, unsigned data_size) : rnnlm(rnnlm) {}
45 |   ~Learner() {}
46 | 
47 |   cnn::real LearnFromDatum(const D& datum, bool learn) {
48 |     ComputationGraph cg;
49 |     rnnlm.BuildLMGraph(datum, cg);
50 |     cnn::real loss = as_scalar(cg.forward());
51 |     if (learn) {
52 |       cg.backward();
53 |     }
54 |     return loss;
55 |   }
56 | 
57 |   void SaveModel() {}
58 | 
59 | private:
60 |   RNNLanguageModel<T>& rnnlm;
61 | };
62 | 
63 | int main(int argc, char** argv) {
64 |   if (argc < 4) {
65 |     cerr << "Usage: " << argv[0] << " cores corpus.txt dev.txt [iterations]" << endl;
66 |     return 1;
67 |   }
68 |   srand(time(NULL));
69 |   unsigned num_children = atoi(argv[1]);
70 |   assert (num_children <= 64);
71 |   vector<Datum> data = ReadData(argv[2]);
72 |   vector<Datum> dev_data = ReadData(argv[3]);
73 |   unsigned num_iterations = (argc >= 5) ? atoi(argv[4]) : UINT_MAX;
74 |   unsigned dev_frequency = 5000;
75 |   unsigned report_frequency = 10;
76 | 
77 |   cnn::Initialize(argc, argv, 1, true);
78 | 
79 |   Model model;
80 |   SimpleSGDTrainer sgd(&model, 0.0, 0.2);
81 |   //AdagradTrainer sgd(&model, 0.0);
82 |   //AdamTrainer sgd(&model, 0.0);
83 | 
84 |   RNNLanguageModel<LSTMBuilder> rnnlm(model);
85 | 
86 |   Learner<LSTMBuilder, Datum> learner(rnnlm, data.size());
87 |   RunMultiProcess<Datum>(num_children, &learner, &sgd, data, dev_data, num_iterations, dev_frequency, report_frequency);
88 | }
89 | 


--------------------------------------------------------------------------------
/examples/rnnlm.h:
--------------------------------------------------------------------------------
 1 | #include "cnn/cnn.h"
 2 | #include "cnn/expr.h"
 3 | #include "cnn/dict.h"
 4 | #include "cnn/lstm.h"
 5 | 
 6 | #include <vector>
 7 | 
 8 | using namespace std;
 9 | using namespace cnn;
10 | using namespace cnn::expr;
11 | 
12 | unsigned LAYERS = 2;
13 | unsigned INPUT_DIM = 8;  //256
14 | unsigned HIDDEN_DIM = 24;  // 1024
15 | unsigned VOCAB_SIZE = 5500;
16 | 
17 | cnn::Dict d;
18 | int kSOS;
19 | int kEOS;
20 | 
21 | template <class Builder>
22 | struct RNNLanguageModel {
23 |   LookupParameters* p_c;
24 |   Parameters* p_R;
25 |   Parameters* p_bias;
26 |   Builder builder;
27 |   explicit RNNLanguageModel(Model& model) : builder(LAYERS, INPUT_DIM, HIDDEN_DIM, &model) {
28 |     kSOS = d.Convert("<s>");
29 |     kEOS = d.Convert("</s>");
30 |     p_c = model.add_lookup_parameters(VOCAB_SIZE, {INPUT_DIM});
31 |     p_R = model.add_parameters({VOCAB_SIZE, HIDDEN_DIM});
32 |     p_bias = model.add_parameters({VOCAB_SIZE});
33 |   }
34 | 
35 |   // return Expression of total loss
36 |   Expression BuildLMGraph(const vector<int>& sent, ComputationGraph& cg) {
37 |     const unsigned slen = sent.size() - 1;
38 |     builder.new_graph(cg);  // reset RNN builder for new graph
39 |     builder.start_new_sequence();
40 |     Expression i_R = parameter(cg, p_R); // hidden -> word rep parameter
41 |     Expression i_bias = parameter(cg, p_bias);  // word bias
42 |     vector<Expression> errs;
43 |     for (unsigned t = 0; t < slen; ++t) {
44 |       Expression i_x_t = lookup(cg, p_c, sent[t]);
45 |       // y_t = RNN(x_t)
46 |       Expression i_y_t = builder.add_input(i_x_t);
47 |       Expression i_r_t =  i_bias + i_R * i_y_t;
48 | 
49 |       // LogSoftmax followed by PickElement can be written in one step
50 |       // using PickNegLogSoftmax
51 |       Expression i_err = pickneglogsoftmax(i_r_t, sent[t+1]);
52 |       errs.push_back(i_err);
53 |     }
54 |     Expression i_nerr = sum(errs);
55 |     return i_nerr;
56 |   }
57 | 
58 |   // return Expression for total loss
59 |   void RandomSample(int max_len = 150) {
60 |     cerr << endl;
61 |     ComputationGraph cg;
62 |     builder.new_graph(cg);  // reset RNN builder for new graph
63 |     builder.start_new_sequence();
64 | 
65 |     Expression i_R = parameter(cg, p_R);
66 |     Expression i_bias = parameter(cg, p_bias);
67 |     vector<Expression> errs;
68 |     int len = 0;
69 |     int cur = kSOS;
70 |     while(len < max_len && cur != kEOS) {
71 |       ++len;
72 |       Expression i_x_t = lookup(cg, p_c, cur);
73 |       // y_t = RNN(x_t)
74 |       Expression i_y_t = builder.add_input(i_x_t);
75 |       Expression i_r_t = i_bias + i_R * i_y_t;
76 | 
77 |       Expression ydist = softmax(i_r_t);
78 | 
79 |       unsigned w = 0;
80 |       while (w == 0 || (int)w == kSOS) {
81 |         auto dist = as_vector(cg.incremental_forward());
82 |         double p = rand01();
83 |         for (; w < dist.size(); ++w) {
84 |           p -= dist[w];
85 |           if (p < 0.0) { break; }
86 |         }
87 |         if (w == dist.size()) w = kEOS;
88 |       }
89 |       cerr << (len == 1 ? "" : " ") << d.Convert(w);
90 |       cur = w;
91 |     }
92 |     cerr << endl;
93 |   }
94 | };
95 | 


--------------------------------------------------------------------------------
/examples/xor-batch-lookup.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/training.h"
 4 | #include "cnn/gpu-ops.h"
 5 | #include "cnn/expr.h"
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/archive/text_iarchive.hpp>
 8 | 
 9 | #include <iostream>
10 | #include <fstream>
11 | 
12 | using namespace std;
13 | using namespace cnn;
14 | using namespace cnn::expr;
15 | 
16 | int main(int argc, char** argv) {
17 |   cnn::Initialize(argc, argv);
18 | 
19 |   // parameters
20 |   const unsigned HIDDEN_SIZE = 8;
21 |   const unsigned ITERATIONS = 200;
22 |   Model m;
23 |   SimpleSGDTrainer sgd(&m);
24 | 
25 |   ComputationGraph cg;
26 | 
27 |   Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2}));
28 |   Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE}));
29 |   Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE}));
30 |   Expression a = parameter(cg, m.add_parameters({1}));
31 | 
32 |   LookupParameters* x_values = m.add_lookup_parameters(4, {2});
33 |   LookupParameters* y_values = m.add_lookup_parameters(4, {1});
34 |   x_values->Initialize(0, {1.0, 1.0});
35 |   x_values->Initialize(1, {-1.0, 1.0});
36 |   x_values->Initialize(2, {1.0, -1.0});
37 |   x_values->Initialize(3, {-1.0, -1.0});
38 |   y_values->Initialize(0, {-1.0});
39 |   y_values->Initialize(1, {1.0});
40 |   y_values->Initialize(2, {1.0});
41 |   y_values->Initialize(3, {-1.0});
42 | 
43 |   Expression x = const_lookup(cg, x_values, {0, 1, 2, 3});
44 |   Expression y = const_lookup(cg, y_values, {0, 1, 2, 3});
45 | 
46 |   cerr << "x is " << x.value().d << ", y is " << y.value().d << endl;
47 |   Expression h = tanh(W*x + b);
48 |   //Expression h = softsign(W*x + b);
49 |   Expression y_pred = V*h + a;
50 |   Expression loss = squared_distance(y_pred, y);
51 |   Expression sum_loss = sum_batches(loss);
52 | 
53 |   cg.PrintGraphviz();
54 |   if (argc == 2) {
55 |     ifstream in(argv[1]);
56 |     boost::archive::text_iarchive ia(in);
57 |     ia >> m;
58 |   }
59 | 
60 |   // train the parameters
61 |   for (unsigned iter = 0; iter < ITERATIONS; ++iter) {
62 |     vector<float> losses = as_vector(cg.forward());
63 |     cg.backward();
64 |     sgd.update(0.25);
65 |     sgd.update_epoch();
66 |     float loss = 0;
67 |     for(auto l : losses)
68 |       loss += l;
69 |     loss /= 4;
70 |     cerr << "E = " << loss << endl;
71 |   }
72 |   //boost::archive::text_oarchive oa(cout);
73 |   //oa << m;
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/examples/xor-batch.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/training.h"
 4 | #include "cnn/gpu-ops.h"
 5 | #include "cnn/expr.h"
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/archive/text_iarchive.hpp>
 8 | 
 9 | #include <iostream>
10 | #include <fstream>
11 | 
12 | using namespace std;
13 | using namespace cnn;
14 | using namespace cnn::expr;
15 | 
16 | int main(int argc, char** argv) {
17 |   cnn::Initialize(argc, argv);
18 | 
19 |   // parameters
20 |   const unsigned HIDDEN_SIZE = 8;
21 |   const unsigned ITERATIONS = 200;
22 |   Model m;
23 |   SimpleSGDTrainer sgd(&m);
24 | 
25 |   ComputationGraph cg;
26 | 
27 |   Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2}));
28 |   Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE}));
29 |   Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE}));
30 |   Expression a = parameter(cg, m.add_parameters({1}));
31 | 
32 |   // set x_values to change the inputs to the network
33 |   Dim x_dim({2}, 4), y_dim({1}, 4);
34 |   cerr << "x_dim=" << x_dim << ", y_dim=" << y_dim << endl;
35 |   vector<cnn::real> x_values = {1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, -1.0};
36 |   Expression x = input(cg, x_dim, &x_values);
37 |   // set y_values expressing the output
38 |   vector<cnn::real> y_values = {-1.0, 1.0, 1.0, -1.0};
39 |   Expression y = input(cg, y_dim, &y_values);
40 | 
41 |   Expression h = tanh(W*x + b);
42 |   //Expression h = tanh(affine_transform({b, W, x}));
43 |   //Expression h = softsign(W*x + b);
44 |   Expression y_pred = V*h + a;
45 |   Expression loss = squared_distance(y_pred, y);
46 |   Expression sum_loss = sum_batches(loss);
47 | 
48 |   cg.PrintGraphviz();
49 |   if (argc == 2) {
50 |     ifstream in(argv[1]);
51 |     boost::archive::text_iarchive ia(in);
52 |     ia >> m;
53 |   }
54 | 
55 |   // train the parameters
56 |   for (unsigned iter = 0; iter < ITERATIONS; ++iter) {
57 |     float my_loss = as_scalar(cg.forward()) / 4;
58 |     cg.backward();
59 |     sgd.update(0.25);
60 |     sgd.update_epoch();
61 |     cerr << "E = " << my_loss << endl;
62 |   }
63 |   //boost::archive::text_oarchive oa(cout);
64 |   //oa << m;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/examples/xor-xent.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/training.h"
 4 | #include "cnn/expr.h"
 5 | 
 6 | #include <boost/archive/text_iarchive.hpp>
 7 | #include <boost/archive/text_oarchive.hpp>
 8 | 
 9 | #include <iostream>
10 | #include <fstream>
11 | 
12 | using namespace std;
13 | using namespace cnn;
14 | using namespace cnn::expr;
15 | 
16 | int main(int argc, char** argv) {
17 |   cnn::Initialize(argc, argv);
18 | 
19 |   // parameters
20 |   const unsigned HIDDEN_SIZE = 8;
21 |   Model m;
22 |   SimpleSGDTrainer sgd(&m);
23 |   //MomentumSGDTrainer sgd(&m);
24 | 
25 |   ComputationGraph cg;
26 | 
27 |   Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2}));
28 |   Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE}));
29 |   Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE}));
30 |   Expression a = parameter(cg, m.add_parameters({1}));
31 | 
32 |   vector<float> x_values(2);  // set x_values to change the inputs to the network
33 |   Expression x = input(cg, {2}, &x_values);
34 |   cnn::real y_value;  // set y_value to change the target output
35 |   Expression y = input(cg, &y_value);
36 | 
37 |   Expression h = tanh(W*x + b);
38 |   Expression y_pred = logistic(V*h + a);
39 |   Expression loss = binary_log_loss(y_pred, y);
40 | 
41 |   cg.PrintGraphviz();
42 |   //if (argc == 2) {
43 |   //  ifstream in(argv[1]);
44 |   //  boost::archive::text_iarchive ia(in);
45 |   //  ia >> m;
46 |   //}
47 | 
48 |   // train the parameters
49 |   for (unsigned iter = 0; iter < 2000; ++iter) {
50 |     double loss = 0;
51 |     for (unsigned mi = 0; mi < 4; ++mi) {
52 |       bool x1 = mi % 2;
53 |       bool x2 = (mi / 2) % 2;
54 |       x_values[0] = x1 ? 1 : 0;
55 |       x_values[1] = x2 ? 1 : 0;
56 |       y_value = (x1 != x2) ? 1 : 0;
57 |       loss += as_scalar(cg.forward());
58 |       cg.backward();
59 |       sgd.update(1.0);
60 |     }
61 |     sgd.update_epoch();
62 |     loss /= 4;
63 |     cerr << "E = " << loss << endl;
64 |   }
65 |   boost::archive::text_oarchive oa(cout);
66 |   oa << m;
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/xor.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/training.h"
 4 | #include "cnn/gpu-ops.h"
 5 | #include "cnn/expr.h"
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/archive/text_iarchive.hpp>
 8 | 
 9 | #include <iostream>
10 | #include <fstream>
11 | 
12 | using namespace std;
13 | using namespace cnn;
14 | using namespace cnn::expr;
15 | 
16 | int main(int argc, char** argv) {
17 |   cnn::Initialize(argc, argv);
18 | 
19 |   // parameters
20 |   const unsigned HIDDEN_SIZE = 8;
21 |   const unsigned ITERATIONS = 30;
22 |   Model m;
23 |   SimpleSGDTrainer sgd(&m);
24 |   //MomentumSGDTrainer sgd(&m);
25 | 
26 |   ComputationGraph cg;
27 | 
28 |   Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2}));
29 |   Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE}));
30 |   Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE}));
31 |   Expression a = parameter(cg, m.add_parameters({1}));
32 | 
33 |   vector<cnn::real> x_values(2);  // set x_values to change the inputs to the network
34 |   Expression x = input(cg, {2}, &x_values);
35 |   cnn::real y_value;  // set y_value to change the target output
36 |   Expression y = input(cg, &y_value);
37 | 
38 |   Expression h = tanh(W*x + b);
39 |   //Expression h = tanh(affine_transform({b, W, x}));
40 |   //Expression h = softsign(W*x + b);
41 |   Expression y_pred = V*h + a;
42 |   Expression loss = squared_distance(y_pred, y);
43 | 
44 |   cg.PrintGraphviz();
45 |   if (argc == 2) {
46 |     ifstream in(argv[1]);
47 |     boost::archive::text_iarchive ia(in);
48 |     ia >> m;
49 |   }
50 | 
51 |   // train the parameters
52 |   for (unsigned iter = 0; iter < ITERATIONS; ++iter) {
53 |     double loss = 0;
54 |     for (unsigned mi = 0; mi < 4; ++mi) {
55 |       bool x1 = mi % 2;
56 |       bool x2 = (mi / 2) % 2;
57 |       x_values[0] = x1 ? 1 : -1;
58 |       x_values[1] = x2 ? 1 : -1;
59 |       y_value = (x1 != x2) ? 1 : -1;
60 |       loss += as_scalar(cg.forward());
61 |       cg.backward();
62 |       sgd.update(1.0);
63 |     }
64 |     sgd.update_epoch();
65 |     loss /= 4;
66 |     cerr << "E = " << loss << endl;
67 |   }
68 |   boost::archive::text_oarchive oa(cout);
69 |   oa << m;
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/pycnn/INSTALL:
--------------------------------------------------------------------------------
1 | See installation instruction in ../INSTALL.md
2 | 


--------------------------------------------------------------------------------
/pycnn/makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | PYTHON := python
 3 | UNAME := $(shell uname)
 4 | ifeq ($(UNAME),Darwin)
 5 | 	DYSUF := dylib
 6 | else
 7 | 	DYSUF := so
 8 | endif
 9 | 
10 | pycnn.${DYSUF}: ../build/cnn/libcnn_shared.${DYSUF} pycnn.pyx pycnn.pxd setup.py
11 | 	cp ../build/cnn/libcnn_shared.${DYSUF} .
12 | 	${PYTHON} setup.py build_ext --inplace
13 | 
14 | gpycnn.${DYSUF}: ../build/cnn/libgcnn_shared.${DYSUF} pycnn.pyx pycnn.pxd setup_gpu.py
15 | 	cp ../build/cnn/libgcnn_shared.${DYSUF} .
16 | 	cp ../build/cnn/libcnncuda_shared.${DYSUF} .
17 | 	cp pycnn.pyx gpycnn.pyx
18 | 	cp pycnn.pxd gpycnn.pxd
19 | 	${PYTHON} setup_gpu.py build_ext --inplace
20 | 
21 | ginstall: gpycnn.${DYSUF}
22 | 	${PYTHON} setup_gpu.py install --user
23 | 
24 | install: pycnn.${DYSUF}
25 | 	${PYTHON} setup.py install --user
26 | 
27 | clean:
28 | 	rm *.${DYSUF} *.cpp
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/pycnn/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools.extension import Extension
 3 | from Cython.Distutils import build_ext
 4 | 
 5 | 
 6 | # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++.
 7 | import distutils.sysconfig
 8 | cfg_vars = distutils.sysconfig.get_config_vars()
 9 | if "CFLAGS" in cfg_vars:
10 |        cfg_vars["CFLAGS"] = cfg_vars["CFLAGS"].replace("-Wstrict-prototypes", "")
11 | 
12 | ext = Extension(
13 |         "pycnn",                 # name of extension
14 |         ["pycnn.pyx"],           # filename of our Pyrex/Cython source
15 |         language="c++",              # this causes Pyrex/Cython to create C++ source
16 |         include_dirs=["../../cnn/", # this is the location of the main cnn directory.
17 |                       "../../eigen/"], # this is the directory where eigen is saved.
18 |         libraries=['cnn_shared'],             # ditto
19 |         library_dirs=["."],
20 |         #extra_link_args=["-L/home/yogo/Vork/Research/cnn/cnn/build/cnn"],       # if needed
21 |         extra_compile_args=["-std=c++11"],
22 |         runtime_library_dirs=["$ORIGIN/./"],
23 |         )
24 | 
25 | setup(ext_modules = [ext],
26 |         cmdclass = {'build_ext': build_ext},
27 |         name="pyCNN",
28 | 	 )
29 | 


--------------------------------------------------------------------------------
/pycnn/setup_gpu.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools.extension import Extension
 3 | from Cython.Distutils import build_ext
 4 | 
 5 | 
 6 | # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++.
 7 | import distutils.sysconfig
 8 | cfg_vars = distutils.sysconfig.get_config_vars()
 9 | if "CFLAGS" in cfg_vars:
10 |        cfg_vars["CFLAGS"] = cfg_vars["CFLAGS"].replace("-Wstrict-prototypes", "")
11 | 
12 | ext = Extension(
13 |         "gpycnn",                 # name of extension
14 |         ["gpycnn.pyx"],           # filename of our Pyrex/Cython source
15 |         language="c++",              # this causes Pyrex/Cython to create C++ source
16 |         include_dirs=["../../cnn/", # this is the location of the main cnn directory.
17 |                       "../../eigen/"], # this is the directory where eigen is saved.
18 |         #libraries=['cnn','cnncuda'], #,'cnncuda_shared'],             # ditto
19 |         libraries=['gcnn_shared','cnncuda_shared'],             # ditto
20 |         library_dirs=["."],
21 |         #extra_link_args=["-L/home/yogo/Vork/Research/cnn/cnn/build/cnn"],       # if needed
22 |         extra_compile_args=["-std=c++11","-fPIC"],#,"-lcudart","-lcublas"],
23 | 		extra_link_args=["-L/usr/local/cuda-7.5/lib64","-lcudart","-lcublas"],
24 | 		#extra_objects=["libcnncuda.a"],
25 |         )
26 | 
27 | setup(ext_modules = [ext],
28 |         cmdclass = {'build_ext': build_ext},
29 |         name="gpyCNN",
30 |         )
31 | 


--------------------------------------------------------------------------------
/pyexamples/attention.py:
--------------------------------------------------------------------------------
  1 | import pycnn as pc
  2 | import random
  3 | 
  4 | EOS = "<EOS>"
  5 | characters = list("abcdefghijklmnopqrstuvwxyz ")
  6 | characters.append(EOS)
  7 | 
  8 | int2char = list(characters)
  9 | char2int = {c:i for i,c in enumerate(characters)}
 10 | 
 11 | VOCAB_SIZE = len(characters)
 12 | 
 13 | LSTM_NUM_OF_LAYERS = 2
 14 | EMBEDDINGS_SIZE = 32
 15 | STATE_SIZE = 32
 16 | ATTENTION_SIZE = 32
 17 | 
 18 | model = pc.Model()
 19 | 
 20 | enc_fwd_lstm = pc.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model)
 21 | enc_bwd_lstm = pc.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model)
 22 | 
 23 | dec_lstm = pc.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE*2, STATE_SIZE, model)
 24 | 
 25 | model.add_lookup_parameters("lookup", (VOCAB_SIZE, EMBEDDINGS_SIZE))
 26 | model.add_parameters("attention_w1", (ATTENTION_SIZE, STATE_SIZE*2))
 27 | model.add_parameters("attention_w2", (ATTENTION_SIZE, STATE_SIZE*LSTM_NUM_OF_LAYERS*2))
 28 | model.add_parameters("attention_v", (1, ATTENTION_SIZE))
 29 | model.add_parameters("decoder_w", (VOCAB_SIZE, STATE_SIZE))
 30 | model.add_parameters("decoder_b", (VOCAB_SIZE))
 31 | 
 32 | 
 33 | def embedd_sentence(model, sentence):
 34 |     sentence = [EOS] + list(sentence) + [EOS]
 35 |     sentence = [char2int[c] for c in sentence]
 36 | 
 37 |     lookup = model["lookup"]
 38 | 
 39 |     return [lookup[char] for char in sentence]
 40 | 
 41 | 
 42 | def run_lstm(model, init_state, input_vecs):
 43 |     s = init_state
 44 | 
 45 |     out_vectors = []
 46 |     for vector in input_vecs:
 47 |         s = s.add_input(vector)
 48 |         out_vector = s.output()
 49 |         out_vectors.append(out_vector)
 50 |     return out_vectors
 51 | 
 52 | 
 53 | def encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, sentence):
 54 |     sentence_rev = [sentence[i] for i in range(len(sentence)-1, -1, -1)]
 55 | 
 56 |     fwd_vectors = run_lstm(model, enc_fwd_lstm.initial_state(), sentence)
 57 |     bwd_vectors = run_lstm(model, enc_bwd_lstm.initial_state(), sentence_rev)
 58 |     bwd_vectors = [bwd_vectors[i] for i in range(len(bwd_vectors)-1, -1, -1)]
 59 |     vectors = [pc.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)]
 60 | 
 61 |     return vectors
 62 | 
 63 | 
 64 | def attend(model, input_vectors, state):
 65 |     w1 = pc.parameter(model['attention_w1'])
 66 |     w2 = pc.parameter(model['attention_w2'])
 67 |     v = pc.parameter(model['attention_v'])
 68 |     attention_weights = []
 69 | 
 70 |     w2dt = w2*pc.concatenate(list(state.s()))
 71 |     for input_vector in input_vectors:
 72 |         attention_weight = v*pc.tanh(w1*input_vector + w2dt)
 73 |         attention_weights.append(attention_weight)
 74 |     attention_weights = pc.softmax(pc.concatenate(attention_weights))
 75 |     output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
 76 |     return output_vectors
 77 | 
 78 | 
 79 | def decode(model, dec_lstm, vectors, output):
 80 |     output = [EOS] + list(output) + [EOS]
 81 |     output = [char2int[c] for c in output]
 82 | 
 83 |     w = pc.parameter(model["decoder_w"])
 84 |     b = pc.parameter(model["decoder_b"])
 85 | 
 86 |     s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE*2))
 87 | 
 88 |     loss = []
 89 |     for char in output:
 90 |         vector = attend(model, vectors, s)
 91 | 
 92 |         s = s.add_input(vector)
 93 |         out_vector = w * s.output() + b
 94 |         probs = pc.softmax(out_vector)
 95 |         loss.append(-pc.log(pc.pick(probs, char)))
 96 |     loss = pc.esum(loss)
 97 |     return loss
 98 | 
 99 | 
100 | def generate(model, input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
101 |     def sample(probs):
102 |         rnd = random.random()
103 |         for i, p in enumerate(probs):
104 |             rnd -= p
105 |             if rnd <= 0: break
106 |         return i
107 | 
108 |     embedded = embedd_sentence(model, input)
109 |     encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded)
110 | 
111 |     w = pc.parameter(model["decoder_w"])
112 |     b = pc.parameter(model["decoder_b"])
113 | 
114 |     s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2))
115 |     out = ''
116 |     count_EOS = 0
117 |     for i in range(len(input)*2):
118 |         if count_EOS == 2: break
119 |         vector = attend(model, encoded, s)
120 | 
121 |         s = s.add_input(vector)
122 |         out_vector = w * s.output() + b
123 |         probs = pc.softmax(out_vector)
124 |         probs = probs.vec_value()
125 |         next_char = sample(probs)
126 |         if int2char[next_char] == EOS:
127 |             count_EOS += 1
128 |             continue
129 | 
130 |         out += int2char[next_char]
131 |     return out
132 | 
133 | 
134 | def get_loss(model, input_sentence, output_sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
135 |     pc.renew_cg()
136 |     embedded = embedd_sentence(model, input_sentence)
137 |     encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded)
138 |     return decode(model, dec_lstm, encoded, output_sentence)
139 | 
140 | 
141 | def train(model, sentence):
142 |     trainer = pc.SimpleSGDTrainer(model)
143 |     for i in xrange(600):
144 |         loss = get_loss(model, sentence, sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm)
145 |         loss_value = loss.value()
146 |         loss.backward()
147 |         trainer.update()
148 |         if i % 20 == 0:
149 |             print loss_value
150 |             print generate(model, sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm)
151 | 
152 | 
153 | train(model, "it is working")
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/pyexamples/bilstmtagger.py:
--------------------------------------------------------------------------------
  1 | from pycnn import *
  2 | from collections import Counter
  3 | import random
  4 | 
  5 | import util
  6 | 
  7 | # format of files: each line is "word<TAB>tag<newline>", blank line is new sentence.
  8 | train_file="/home/yogo/Vork/Research/corpora/pos/WSJ.TRAIN"
  9 | test_file="/home/yogo/Vork/Research/corpora/pos/WSJ.TEST"
 10 | 
 11 | 
 12 | MLP=True
 13 | 
 14 | def read(fname):
 15 |     sent = []
 16 |     for line in file(fname):
 17 |         line = line.strip().split()
 18 |         if not line:
 19 |             if sent: yield sent
 20 |             sent = []
 21 |         else:
 22 |             w,p = line
 23 |             sent.append((w,p))
 24 | 
 25 | train=list(read(train_file))
 26 | test=list(read(test_file))
 27 | words=[]
 28 | tags=[]
 29 | wc=Counter()
 30 | for s in train:
 31 |     for w,p in s:
 32 |         words.append(w)
 33 |         tags.append(p)
 34 |         wc[w]+=1
 35 | words.append("_UNK_")
 36 | #words=[w if wc[w] > 1 else "_UNK_" for w in words]
 37 | tags.append("_START_")
 38 | 
 39 | for s in test:
 40 |     for w,p in s:
 41 |         words.append(w)
 42 | 
 43 | vw = util.Vocab.from_corpus([words])
 44 | vt = util.Vocab.from_corpus([tags])
 45 | UNK = vw.w2i["_UNK_"]
 46 | 
 47 | nwords = vw.size()
 48 | ntags  = vt.size()
 49 | 
 50 | model = Model()
 51 | sgd = SimpleSGDTrainer(model)
 52 | 
 53 | model.add_lookup_parameters("lookup", (nwords, 128))
 54 | model.add_lookup_parameters("tl", (ntags, 30))
 55 | if MLP:
 56 |     pH = model.add_parameters("HID", (32, 50*2))
 57 |     pO = model.add_parameters("OUT", (ntags, 32))
 58 | else:
 59 |     pO = model.add_parameters("OUT", (ntags, 50*2))
 60 | 
 61 | builders=[
 62 |         LSTMBuilder(1, 128, 50, model),
 63 |         LSTMBuilder(1, 128, 50, model),
 64 |         ]
 65 | 
 66 | def build_tagging_graph(words, tags, model, builders):
 67 |     renew_cg()
 68 |     f_init, b_init = [b.initial_state() for b in builders]
 69 | 
 70 |     wembs = [lookup(model["lookup"], w) for w in words]
 71 |     wembs = [noise(we,0.1) for we in wembs]
 72 | 
 73 |     fw = [x.output() for x in f_init.add_inputs(wembs)]
 74 |     bw = [x.output() for x in b_init.add_inputs(reversed(wembs))]
 75 | 
 76 |     if MLP:
 77 |         H = parameter(pH)
 78 |         O = parameter(pO)
 79 |     else:
 80 |         O = parameter(pO)
 81 |     errs = []
 82 |     for f,b,t in zip(fw, reversed(bw), tags):
 83 |         f_b = concatenate([f,b])
 84 |         if MLP:
 85 |             r_t = O*(tanh(H * f_b))
 86 |         else:
 87 |             r_t = O * f_b
 88 |         err = pickneglogsoftmax(r_t, t)
 89 |         errs.append(err)
 90 |     return esum(errs)
 91 | 
 92 | def tag_sent(sent, model, builders):
 93 |     renew_cg()
 94 |     f_init, b_init = [b.initial_state() for b in builders]
 95 |     wembs = [lookup(model["lookup"], vw.w2i.get(w, UNK)) for w,t in sent]
 96 | 
 97 |     fw = [x.output() for x in f_init.add_inputs(wembs)]
 98 |     bw = [x.output() for x in b_init.add_inputs(reversed(wembs))]
 99 | 
100 |     if MLP:
101 |         H = parameter(pH)
102 |         O = parameter(pO)
103 |     else:
104 |         O = parameter(pO)
105 |     tags=[]
106 |     for f,b,(w,t) in zip(fw,reversed(bw),sent):
107 |         if MLP:
108 |             r_t = O*(tanh(H * concatenate([f,b])))
109 |         else:
110 |             r_t = O*concatenate([f,b])
111 |         out = softmax(r_t)
112 |         chosen = np.argmax(out.npvalue())
113 |         tags.append(vt.i2w[chosen])
114 |     return tags
115 | 
116 | 
117 | tagged = loss = 0
118 | for ITER in xrange(50):
119 |     random.shuffle(train)
120 |     for i,s in enumerate(train,1):
121 |         if i % 5000 == 0:
122 |             sgd.status()
123 |             print loss / tagged
124 |             loss = 0
125 |             tagged = 0
126 |         if i % 10000 == 0:
127 |             good = bad = 0.0
128 |             for sent in test:
129 |                 tags = tag_sent(sent, model, builders)
130 |                 golds = [t for w,t in sent]
131 |                 for go,gu in zip(golds,tags):
132 |                     if go == gu: good +=1 
133 |                     else: bad+=1
134 |             print good/(good+bad)
135 |         ws = [vw.w2i.get(w, UNK) for w,p in s]
136 |         ps = [vt.w2i[p] for w,p in s]
137 |         sum_errs = build_tagging_graph(ws,ps,model,builders)
138 |         squared = -sum_errs# * sum_errs
139 |         loss += sum_errs.scalar_value()
140 |         tagged += len(ps)
141 |         sum_errs.backward()
142 |         sgd.update()
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/pyexamples/cpu_vs_gpu.py:
--------------------------------------------------------------------------------
 1 | import gpycnn as G
 2 | print 
 3 | import pycnn as C
 4 | 
 5 | cm = C.Model()
 6 | gm = G.Model()
 7 | 
 8 | cpW = cm.add_parameters("W",(1000,1000))
 9 | gpW = gm.add_parameters("W",(1000,1000))
10 | 
11 | def do_cpu():
12 | 	C.renew_cg()
13 | 	W = C.parameter(cpW)
14 | 	W = W*W*W*W*W*W*W
15 | 	z = C.squared_distance(W,W)
16 | 	z.value()
17 | 	z.backward()
18 | 
19 | def do_gpu():
20 | 	G.renew_cg()
21 | 	W = G.parameter(gpW)
22 | 	W = W*W*W*W*W*W*W
23 | 	z = G.squared_distance(W,W)
24 | 	z.value()
25 | 	z.backward()
26 | 
27 | import time
28 | s = time.time()
29 | do_cpu()
30 | print "CPU time:",time.time() - s
31 | 
32 | s = time.time()
33 | do_gpu()
34 | print "GPU time:",time.time() - s
35 | 
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/pyexamples/minibatch.py:
--------------------------------------------------------------------------------
 1 | from pycnn import *
 2 | import numpy as np
 3 | 
 4 | m = Model()
 5 | lp = m.add_lookup_parameters("a",(100,10))
 6 | 
 7 | # regular lookup
 8 | a = lp[1].npvalue()
 9 | b = lp[2].npvalue()
10 | c = lp[3].npvalue()
11 | 
12 | # batch lookup instead of single elements.
13 | # two ways of doing this.
14 | abc1 = lookup_batch(lp, [1,2,3])
15 | print abc1.npvalue()
16 | 
17 | abc2 = lp.batch([1,2,3])
18 | print abc2.npvalue()
19 | 
20 | print np.hstack([a,b,c])
21 | 
22 | 
23 | # use pick and pickneglogsoftmax in batch mode
24 | # (must be used in conjunction with lookup_batch):
25 | print "\nPick"
26 | W = parameter( m.add_parameters("W", (5, 10)) )
27 | h = W * lp.batch([1,2,3])
28 | print h.npvalue()
29 | print pick_batch(h,[1,2,3]).npvalue()
30 | print pick(W*lp[1],1).value(), pick(W*lp[2],2).value(), pick(W*lp[3],3).value()
31 | 
32 | # using pickneglogsoftmax_batch
33 | print "\nPick neg log softmax"
34 | print (-log(softmax(h))).npvalue()
35 | print pickneglogsoftmax_batch(h,[1,2,3]).npvalue()
36 | 


--------------------------------------------------------------------------------
/pyexamples/rnnlm.py:
--------------------------------------------------------------------------------
  1 | from pycnn import *
  2 | import time
  3 | import random
  4 | 
  5 | LAYERS = 2
  6 | INPUT_DIM = 50  #256
  7 | HIDDEN_DIM = 50  #1024
  8 | VOCAB_SIZE = 0
  9 | 
 10 | from collections import defaultdict
 11 | from itertools import count
 12 | import sys
 13 | import util
 14 | 
 15 | class RNNLanguageModel:
 16 |     def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder):
 17 |         self.m = model
 18 |         self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model)
 19 | 
 20 |         model.add_lookup_parameters("lookup", (VOCAB_SIZE, INPUT_DIM))
 21 |         model.add_parameters("R", (VOCAB_SIZE, HIDDEN_DIM))
 22 |         model.add_parameters("bias", (VOCAB_SIZE))
 23 | 
 24 |     def BuildLMGraph(self, sent):
 25 |         renew_cg()
 26 |         init_state = self.builder.initial_state()
 27 | 
 28 |         R = parameter(self.m["R"])
 29 |         bias = parameter(self.m["bias"])
 30 |         errs = [] # will hold expressions
 31 |         es=[]
 32 |         state = init_state
 33 |         for (cw,nw) in zip(sent,sent[1:]):
 34 |             # assume word is already a word-id
 35 |             x_t = lookup(self.m["lookup"], int(cw))
 36 |             state = state.add_input(x_t)
 37 |             y_t = state.output()
 38 |             r_t = bias + (R * y_t)
 39 |             err = pickneglogsoftmax(r_t, int(nw))
 40 |             errs.append(err)
 41 |         nerr = esum(errs)
 42 |         return nerr
 43 | 
 44 |     def sample(self, first=1, nchars=0, stop=-1):
 45 |         res = [first]
 46 |         renew_cg()
 47 |         state = self.builder.initial_state()
 48 | 
 49 |         R = parameter(self.m["R"])
 50 |         bias = parameter(self.m["bias"])
 51 |         cw = first
 52 |         while True:
 53 |             x_t = lookup(self.m["lookup"], cw)
 54 |             state = state.add_input(x_t)
 55 |             y_t = state.output()
 56 |             r_t = bias + (R * y_t)
 57 |             ydist = softmax(r_t)
 58 |             dist = ydist.vec_value()
 59 |             rnd = random.random()
 60 |             for i,p in enumerate(dist):
 61 |                 rnd -= p
 62 |                 if rnd <= 0: break
 63 |             res.append(i)
 64 |             cw = i
 65 |             if cw == stop: break
 66 |             if nchars and len(res) > nchars: break
 67 |         return res
 68 | 
 69 | if __name__ == '__main__':
 70 |     train = util.CharsCorpusReader(sys.argv[1],begin="<s>")
 71 |     vocab = util.Vocab.from_corpus(train)
 72 |     
 73 |     VOCAB_SIZE = vocab.size()
 74 | 
 75 |     model = Model()
 76 |     sgd = SimpleSGDTrainer(model)
 77 | 
 78 |     #lm = RNNLanguageModel(model, builder=LSTMBuilder)
 79 |     lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder)
 80 | 
 81 |     train = list(train)
 82 | 
 83 |     chars = loss = 0.0
 84 |     for ITER in xrange(100):
 85 |         random.shuffle(train)
 86 |         for i,sent in enumerate(train):
 87 |             _start = time.time()
 88 |             if i % 50 == 0:
 89 |                 sgd.status()
 90 |                 if chars > 0: print loss / chars,
 91 |                 for _ in xrange(1):
 92 |                     samp = lm.sample(first=vocab.w2i["<s>"],stop=vocab.w2i["\n"])
 93 |                     print "".join([vocab.i2w[c] for c in samp]).strip()
 94 |                 loss = 0.0
 95 |                 chars = 0.0
 96 |                 
 97 |             chars += len(sent)-1
 98 |             isent = [vocab.w2i[w] for w in sent]
 99 |             errs = lm.BuildLMGraph(isent)
100 |             loss += errs.scalar_value()
101 |             errs.backward()
102 |             sgd.update(1.0)
103 |             #print "TM:",(time.time() - _start)/len(sent)
104 |         print "ITER",ITER,loss
105 |         sgd.status()
106 |         sgd.update_epoch(1.0)
107 | 


--------------------------------------------------------------------------------
/pyexamples/rnnlm_transduce.py:
--------------------------------------------------------------------------------
  1 | # a version rnnlm.py using the transduce() interface.
  2 | from pycnn import *
  3 | import time
  4 | import random
  5 | 
  6 | LAYERS = 2
  7 | INPUT_DIM = 50  #256
  8 | HIDDEN_DIM = 50  #1024
  9 | VOCAB_SIZE = 0
 10 | 
 11 | from collections import defaultdict
 12 | from itertools import count, izip
 13 | import sys
 14 | import util
 15 | 
 16 | class RNNLanguageModel:
 17 |     def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder):
 18 |         self.m = model
 19 |         self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model)
 20 | 
 21 |         model.add_lookup_parameters("lookup", (VOCAB_SIZE, INPUT_DIM))
 22 |         model.add_parameters("R", (VOCAB_SIZE, HIDDEN_DIM))
 23 |         model.add_parameters("bias", (VOCAB_SIZE))
 24 | 
 25 |     def BuildLMGraph(self, sent):
 26 |         renew_cg()
 27 |         init_state = self.builder.initial_state()
 28 | 
 29 |         R = parameter(self.m["R"])
 30 |         bias = parameter(self.m["bias"])
 31 |         errs = [] # will hold expressions
 32 |         es=[]
 33 |         state = init_state
 34 |         lookup = self.m["lookup"]
 35 |         inputs = [lookup[int(cw)] for cw in sent[:-1]]
 36 |         expected_outputs = [int(nw) for nw in sent[1:]]
 37 |         outputs = state.transduce(inputs)
 38 |         r_ts = ((bias + (R * y_t)) for y_t in outputs)
 39 |         errs = [pickneglogsoftmax(r_t, eo) for r_t, eo in izip(r_ts, expected_outputs)]
 40 |         nerr = esum(errs)
 41 |         return nerr
 42 | 
 43 |     def sample(self, first=1, nchars=0, stop=-1):
 44 |         # sampling must use the regular incremental interface.
 45 |         res = [first]
 46 |         renew_cg()
 47 |         state = self.builder.initial_state()
 48 | 
 49 |         R = parameter(self.m["R"])
 50 |         bias = parameter(self.m["bias"])
 51 |         cw = first
 52 |         while True:
 53 |             x_t = lookup(self.m["lookup"], cw)
 54 |             state = state.add_input(x_t)
 55 |             y_t = state.output()
 56 |             r_t = bias + (R * y_t)
 57 |             ydist = softmax(r_t)
 58 |             dist = ydist.vec_value()
 59 |             rnd = random.random()
 60 |             for i,p in enumerate(dist):
 61 |                 rnd -= p
 62 |                 if rnd <= 0: break
 63 |             res.append(i)
 64 |             cw = i
 65 |             if cw == stop: break
 66 |             if nchars and len(res) > nchars: break
 67 |         return res
 68 | 
 69 | if __name__ == '__main__':
 70 |     train = util.CharsCorpusReader(sys.argv[1],begin="<s>")
 71 |     vocab = util.Vocab.from_corpus(train)
 72 |     
 73 |     VOCAB_SIZE = vocab.size()
 74 | 
 75 |     model = Model()
 76 |     sgd = SimpleSGDTrainer(model)
 77 | 
 78 |     #lm = RNNLanguageModel(model, builder=LSTMBuilder)
 79 |     lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder)
 80 | 
 81 |     train = list(train)
 82 | 
 83 |     chars = loss = 0.0
 84 |     for ITER in xrange(100):
 85 |         random.shuffle(train)
 86 |         for i,sent in enumerate(train):
 87 |             _start = time.time()
 88 |             if i % 50 == 0:
 89 |                 sgd.status()
 90 |                 if chars > 0: print loss / chars,
 91 |                 for _ in xrange(1):
 92 |                     samp = lm.sample(first=vocab.w2i["<s>"],stop=vocab.w2i["\n"])
 93 |                     print "".join([vocab.i2w[c] for c in samp]).strip()
 94 |                 loss = 0.0
 95 |                 chars = 0.0
 96 |                 
 97 |             chars += len(sent)-1
 98 |             isent = [vocab.w2i[w] for w in sent]
 99 |             errs = lm.BuildLMGraph(isent)
100 |             loss += errs.scalar_value()
101 |             errs.backward()
102 |             sgd.update(1.0)
103 |             #print "TM:",(time.time() - _start)/len(sent)
104 |         print "ITER",ITER,loss
105 |         sgd.status()
106 |         sgd.update_epoch(1.0)
107 | 


--------------------------------------------------------------------------------
/pyexamples/util.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from itertools import count
 3 | class Vocab:
 4 |     def __init__(self, w2i=None):
 5 |         if w2i is None: w2i = defaultdict(count(0).next)
 6 |         self.w2i = dict(w2i)
 7 |         self.i2w = {i:w for w,i in w2i.iteritems()}
 8 |     @classmethod
 9 |     def from_corpus(cls, corpus):
10 |         w2i = defaultdict(count(0).next)
11 |         for sent in corpus:
12 |             [w2i[word] for word in sent]
13 |         return Vocab(w2i)
14 | 
15 |     def size(self): return len(self.w2i.keys())
16 | 
17 | class CorpusReader:
18 |     def __init__(self, fname):
19 |         self.fname = fname
20 |     def __iter__(self):
21 |         for line in file(self.fname):
22 |             line = line.strip().split()
23 |             #line = [' ' if x == '' else x for x in line]
24 |             yield line
25 | 
26 | class CharsCorpusReader:
27 |     def __init__(self, fname, begin=None):
28 |         self.fname = fname
29 |         self.begin = begin
30 |     def __iter__(self):
31 |         begin = self.begin
32 |         for line in file(self.fname):
33 |             line = list(line)
34 |             if begin:
35 |                 line = [begin] + line
36 |             yield line
37 | 


--------------------------------------------------------------------------------
/pyexamples/xor.py:
--------------------------------------------------------------------------------
 1 | from pycnn import *
 2 | 
 3 | #xsent = True
 4 | xsent = False
 5 | 
 6 | HIDDEN_SIZE = 8
 7 | ITERATIONS = 2000
 8 | 
 9 | m = Model()
10 | sgd = SimpleSGDTrainer(m)
11 | 
12 | m.add_parameters("W",(HIDDEN_SIZE, 2))
13 | m.add_parameters("b",HIDDEN_SIZE)
14 | m.add_parameters("V",(1, HIDDEN_SIZE))
15 | m.add_parameters("a",1)
16 | 
17 | W = parameter(m["W"])
18 | b = parameter(m["b"])
19 | V = parameter(m["V"])
20 | a = parameter(m["a"])
21 | 
22 | x = vecInput(2)
23 | y = scalarInput(0)
24 | h = tanh((W*x) + b)
25 | if xsent:
26 |     y_pred = logistic((V*h) + a)
27 |     loss = binary_log_loss(y_pred, y)
28 |     T = 1
29 |     F = 0
30 | else:
31 |     y_pred = (V*h) + a
32 |     loss = squared_distance(y_pred, y)
33 |     T = 1
34 |     F = -1
35 | 
36 | 
37 | for iter in xrange(ITERATIONS):
38 |     mloss = 0.0
39 |     for mi in xrange(4):
40 |         x1 = mi % 2
41 |         x2 = (mi / 2) % 2
42 |         x.set([T if x1 else F, T if x2 else F])
43 |         y.set(T if x1 != x2 else F)
44 |         #mloss += cg().forward_scalar()
45 |         mloss += loss.scalar_value()
46 |         #cg().backward()
47 |         loss.backward()
48 |         sgd.update(1.0)
49 |     sgd.update_epoch();
50 |     mloss /= 4.
51 |     print "loss: %0.9f" % mloss
52 | 
53 | x.set([F,T])
54 | z = -(-y_pred)
55 | print z.scalar_value()
56 | #print y_pred.scalar()
57 | 
58 | renew_cg()
59 | W = parameter(m["W"])
60 | b = parameter(m["b"])
61 | V = parameter(m["V"])
62 | a = parameter(m["a"])
63 | 
64 | x = vecInput(2)
65 | y = scalarInput(0)
66 | h = tanh((W*x) + b)
67 | if xsent:
68 |     y_pred = logistic((V*h) + a)
69 | else:
70 |     y_pred = (V*h) + a
71 | x.set([T,F])
72 | print "TF",y_pred.scalar_value()
73 | x.set([F,F])
74 | print "FF",y_pred.scalar_value()
75 | x.set([T,T])
76 | print "TT",y_pred.scalar_value()
77 | x.set([F,T])
78 | print "FT",y_pred.scalar_value()
79 | 
80 | 


--------------------------------------------------------------------------------
/rnnlm/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 2 | 
 3 | foreach(TARGET lm)
 4 |   ADD_EXECUTABLE(${TARGET} ${TARGET}.cc)
 5 |   target_link_libraries(${TARGET} cnn ${LIBS} pthread)
 6 |   if(UNIX AND NOT APPLE)
 7 |     target_link_libraries(${TARGET} rt)
 8 |   endif()
 9 |   if (WITH_CUDA_BACKEND)
10 |     add_dependencies(${TARGET} cnncuda)
11 |     target_link_libraries(${TARGET} cnncuda)
12 |     CUDA_ADD_CUBLAS_TO_TARGET(${TARGET})
13 |   endif (WITH_CUDA_BACKEND)
14 | endforeach()
15 | 
16 | 


--------------------------------------------------------------------------------
/rnnlm/README.md:
--------------------------------------------------------------------------------
 1 | ### Obtaining LM data
 2 | 
 3 |     ./install-examples.sh
 4 | 
 5 | This downloads the data used in this tutorial.
 6 | 
 7 | ### Training example
 8 | 
 9 | Train an LSTM LM using a class-factor softmax:
10 | 
11 |     ./rnnlm/lm -x -s -t ../rnnlm/ptb-mikolov/train.txt -d ../rnnlm/ptb-mikolov/valid.txt \
12 |          -c ../rnnlm/ptb-mikolov/clusters-mkcls.txt -D 0.3 -H 256 --eta_decay_onset_epoch 10 --eta_decay_rate 0.5
13 | 
14 | Train an LSTM LM with a standard softmax:
15 | 
16 |     ./rnnlm/lm -x -s -t ../rnnlm/ptb-mikolov/train.txt -d ../rnnlm/ptb-mikolov/valid.txt \
17 |          -D 0.3 -H 256 --eta_decay_onset_epoch 10 --eta_decay_rate 0.5
18 | 
19 | ### Evaluation example
20 | 
21 | Evaluate a trained model:
22 | 
23 |     ./rnnlm/lm -t ../rnnlm/ptb-mikolov/train.txt -c ../rnnlm/ptb-mikolov/clusters-mkcls.txt \
24 |          -m lm_0.3_2_128_256-pid7865.params -H 256 -p ../rnnlm/ptb-mikolov/test.txt
25 | 
26 | ### PTB Baselines
27 | 
28 | | Model | dev | test |
29 | | ----- | ---:| ----:|
30 | | 5-gram KN | 188.0 | 178.9 |
31 | | 2x128, dropout=0.3, class-factored softmax | 164.4 | 157.7 |
32 | | 2x256, dropout=0.3, CFSM, decay 0.5@>10 | 129.7 | 125.4 |
33 | 


--------------------------------------------------------------------------------
/rnnlm/install-examples.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | data_version=ptb-mikolov-01.tar.gz
 5 | 
 6 | ROOTDIR=`dirname $0`
 7 | cd $ROOTDIR
 8 | 
 9 | rm -f $data_version
10 | rm -rf ptb-mikolov
11 | curl -f http://demo.clab.cs.cmu.edu/cdyer/$data_version -o $data_version
12 | tar xzf $data_version
13 | rm -f $data_version
14 | 
15 | echo SUCCESS. 1>&2
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package (Boost COMPONENTS system filesystem unit_test_framework REQUIRED)
 2 | include_directories (${TEST_SOURCE_DIR}/src
 3 |                      ${Boost_INCLUDE_DIRS}
 4 |                      )
 5 | 
 6 | add_definitions (-DBOOST_TEST_DYN_LINK)
 7 | 
 8 | # Sources:
 9 | set(test_cnn_SRCS
10 |     test-nodes.cc
11 | )
12 | 
13 | add_executable (test-cnn test-cnn.cc ${test_cnn_SRCS})
14 | target_link_libraries (test-cnn cnn ${LIBS}
15 |                        ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}
16 |                        )
17 | if (WITH_CUDA_BACKEND)
18 |   add_dependencies(test-cnn cnncuda)
19 |   target_link_libraries(test-cnn cnncuda)
20 |   CUDA_ADD_CUBLAS_TO_TARGET(test-cnn)
21 | endif (WITH_CUDA_BACKEND)
22 | 
23 | add_test(test-cnn test-cnn)
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | Use `test-cnn.cc` as a reference for how to set up subsequent tests.
2 | 


--------------------------------------------------------------------------------
/tests/test-cnn.cc:
--------------------------------------------------------------------------------
 1 | #include <cnn/cnn.h>
 2 | #define BOOST_TEST_MODULE CNNBasicTest
 3 | #include <boost/test/unit_test.hpp>
 4 | 
 5 | struct ConfigureCNNTest {
 6 |   ConfigureCNNTest() {
 7 |     // set up some dummy arguments to cnn
 8 |     for (auto x : {"ConfigureCNNTest", "--cnn-mem", "10"}) {
 9 |       av.push_back(strdup(x));
10 |     }
11 |     char **argv = &av[0];
12 |     int argc = av.size();
13 |     cnn::Initialize(argc, argv);
14 |   }
15 |   ~ConfigureCNNTest() {
16 |     for (auto x : av) free(x);
17 |   }
18 |   std::vector<char*> av;
19 | };
20 | 
21 | // configure CNN
22 | BOOST_GLOBAL_FIXTURE(ConfigureCNNTest);
23 | 
24 | BOOST_AUTO_TEST_CASE( aligned_allocator ) {
25 |   cnn::CPUAllocator a;
26 |   void* mem = a.malloc(1024);
27 |   BOOST_CHECK_EQUAL(((unsigned long)(mem) & 0x1f), 0);
28 |   ((char*)mem)[0] = 99;
29 |   a.zero(mem, 1024);
30 |   BOOST_CHECK_EQUAL(((char*)mem)[0], 0);
31 |   a.free(mem);
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------