├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── INSTALL.md ├── LICENSE ├── MEM.notes ├── README.md ├── TODO.cnn ├── cmake ├── FindCNN.cmake └── FindEigen3.cmake ├── cnn ├── CMakeLists.txt ├── aligned-mem-pool.cc ├── aligned-mem-pool.h ├── c2w.h ├── cfsm-builder.cc ├── cfsm-builder.h ├── cnn-helper.h ├── cnn.cc ├── cnn.h ├── conv.cc ├── conv.h ├── cuda.cc ├── cuda.h ├── deep-lstm.cc ├── deep-lstm.h ├── devices.cc ├── devices.h ├── dict.cc ├── dict.h ├── dim.cc ├── dim.h ├── except.h ├── exec.cc ├── exec.h ├── expr.cc ├── expr.h ├── fast-lstm.cc ├── fast-lstm.h ├── functors.h ├── gpu-kernels.h ├── gpu-ops.cu ├── gpu-ops.h ├── grad-check.cc ├── grad-check.h ├── graph.cc ├── graph.h ├── gru.cc ├── gru.h ├── hsm-builder.cc ├── hsm-builder.h ├── init.cc ├── init.h ├── lstm.cc ├── lstm.h ├── mem.cc ├── mem.h ├── model.cc ├── model.h ├── mp.cc ├── mp.h ├── nodes-common.cc ├── nodes.cc ├── nodes.h ├── param-nodes.cc ├── param-nodes.h ├── random.h ├── rnn-state-machine.cc ├── rnn-state-machine.h ├── rnn.cc ├── rnn.h ├── saxe-init.cc ├── saxe-init.h ├── shadow-params.cc ├── shadow-params.h ├── simd-functors.h ├── tensor.cc ├── tensor.h ├── tests │ ├── test_edges.cc │ ├── test_init.cc │ └── test_utils.h ├── timing.h ├── training.cc └── training.h ├── config.h.cmake ├── examples ├── CMakeLists.txt ├── embed-cl.cc ├── encdec.cc ├── example-data │ ├── clusters-hsm.txt │ ├── dev-hsm.txt │ ├── dev-poi.txt │ ├── fin-dev.txt │ ├── fin-toy.txt │ ├── fin-words-dev.txt │ ├── fin-words.txt │ ├── fin.txt │ ├── seg-sup.dev.txt │ ├── textcat.txt │ ├── train-hsm.txt │ └── train-poi.txt ├── mlc.cc ├── nlm.cc ├── poisson-regression.cc ├── read-write.cc ├── rnnlm-aevb.cc ├── rnnlm-batch.cc ├── rnnlm-cfsm.cc ├── rnnlm-givenbag.cc ├── rnnlm-mp.cc ├── rnnlm.cc ├── rnnlm.h ├── rnnlm2.cc ├── segrnn-sup.cc ├── skiprnnlm.cc ├── tag-bilstm.cc ├── textcat.cc ├── tok-embed.cc ├── xor-batch-lookup.cc ├── xor-batch.cc ├── xor-xent.cc └── xor.cc ├── pycnn ├── INSTALL ├── makefile ├── pycnn.pxd ├── pycnn.pyx ├── pycnn_viz.py ├── setup.py └── setup_gpu.py ├── pyexamples ├── attention.py ├── bilstmtagger.py ├── cpu_vs_gpu.py ├── minibatch.py ├── rnnlm.py ├── rnnlm_transduce.py ├── tutorials │ ├── API.ipynb │ ├── RNNs.ipynb │ └── tutorial-1-xor.ipynb ├── util.py └── xor.py ├── rnnlm ├── CMakeLists.txt ├── README.md ├── install-examples.sh └── lm.cc └── tests ├── CMakeLists.txt ├── README.md ├── test-cnn.cc └── test-nodes.cc /.gitignore: -------------------------------------------------------------------------------- 1 | # cmake stuff 2 | build/ 3 | Testing/ 4 | cnn/Testing/ 5 | cnn/tests.bin/ 6 | CTestTestfile.cmake 7 | config.h 8 | Makefile 9 | CMakeCache.txt 10 | CMakeFiles 11 | cmake_install.cmake 12 | pycnn/pycnn.cpp 13 | pycnn/dist/ 14 | pycnn/pyCNN.egg-info/ 15 | 16 | # binaries 17 | examples/embed-cl 18 | examples/encdec 19 | examples/xor 20 | examples/xor-xent 21 | examples/rnnlm 22 | examples/nlm 23 | 24 | #data 25 | rnnlm/ptb-mikolov/ 26 | 27 | # Compiled Object files 28 | *.slo 29 | *.lo 30 | *.o 31 | *.obj 32 | 33 | # Precompiled Headers 34 | *.gch 35 | *.pch 36 | 37 | # Compiled Dynamic libraries 38 | *.so 39 | *.dylib 40 | *.dll 41 | 42 | # Fortran module files 43 | *.mod 44 | *.smod 45 | 46 | # Compiled Static libraries 47 | *.lai 48 | *.la 49 | *.a 50 | *.lib 51 | 52 | # Executables 53 | *.exe 54 | *.out 55 | *.app 56 | 57 | .RData 58 | .RHistory 59 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/easyloggingpp"] 2 | path = external/easyloggingpp 3 | url = https://github.com/easylogging/easyloggingpp.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - gcc 4 | 5 | install: 6 | - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi 7 | addons: 8 | apt: 9 | sources: 10 | - ubuntu-toolchain-r-test 11 | packages: 12 | - gcc-4.8 13 | - g++-4.8 14 | 15 | before_script: 16 | - sudo apt-get update -qq 17 | - sudo apt-get install libboost-filesystem-dev libboost-program-options-dev libboost-serialization-dev libboost-test-dev libboost-regex-dev 18 | - hg clone https://bitbucket.org/eigen/eigen 19 | - mkdir build 20 | - cd build 21 | - cmake .. -DEIGEN3_INCLUDE_DIR=eigen 22 | 23 | script: 24 | - make 25 | - make test 26 | 27 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cnn) 2 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 3 | 4 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 5 | 6 | # CNN uses Eigen which exploits modern CPU architectures. To get the 7 | # best possible performance, the following are recommended: 8 | # 1. use very recent versions of gcc or Clang to build 9 | # 2. use very recent versions of Eigen (ideally the dev version) 10 | # 3. try compiler options like -march=native or other architecture 11 | # flags (the compiler does not always make the best configuration 12 | # decisions without help) 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++11 -Ofast -g -DEIGEN_FAST_MATH -march=native") 14 | 15 | enable_testing() 16 | 17 | include_directories(${CMAKE_CURRENT_SOURCE_DIR} 18 | ${PROJECT_SOURCE_DIR}/external/easyloggingpp/src) 19 | 20 | function(find_cudnn) 21 | set(CUDNN_ROOT "" CACHE PATH "CUDNN root path") 22 | find_path(CUDNN_INCLUDE_DIRS cudnn.h 23 | PATHS ${CUDNN_ROOT} 24 | ${CUDNN_ROOT}/include 25 | DOC "CUDNN include path") 26 | find_library(CUDNN_LIBRARIES NAMES libcudnn.so 27 | PATHS ${CUDNN_ROOT} 28 | ${CUDNN_ROOT}/lib 29 | ${CUDNN_ROOT}/lib64 30 | DOC "CUDNN library path") 31 | if(CUDNN_INCLUDE_DIRS AND CUDNN_LIBRARIES) 32 | set(CUDNN_FOUND TRUE PARENT_SCOPE) 33 | message(STATUS "Found CUDNN (include: ${CUDNN_INCLUDE_DIRS}, library: ${CUDNN_LIBRARIES})") 34 | mark_as_advanced(CUDNN_INCLUDE_DIRS CUDNN_LIBRARIES) 35 | else() 36 | MESSAGE(FATAL_ERROR "Failed to find CUDNN in path: ${CUDNN_ROOT} (Did you set CUDNN_ROOT properly?)") 37 | endif() 38 | endfunction() 39 | 40 | # look for Boost 41 | if(DEFINED ENV{BOOST_ROOT}) 42 | set(Boost_NO_SYSTEM_PATHS ON) 43 | endif() 44 | set(Boost_REALPATH ON) 45 | find_package(Boost COMPONENTS program_options regex serialization REQUIRED) 46 | include_directories(${Boost_INCLUDE_DIR}) 47 | set(LIBS ${LIBS} ${Boost_LIBRARIES}) 48 | # trouble shooting: 49 | # if boost library cannot be found, in addition to install boost library 50 | # check if environment variables are set 51 | # 52 | # to set boost root and its library root in environment variable, use 53 | # for example 54 | # echo "export BOOST_LIBRARYDIR=/usr/local/lib" >> ~/.bashrc 55 | # echo "export BOOST_ROOT=/cygdrive/d/tools/boost_1_58_0/boost_1_58_0" >> ~/.bashrc 56 | # then run source ~/.bashrc to have those environment variable effective immediately 57 | 58 | if(BACKEND) 59 | message("-- BACKEND: ${BACKEND}") 60 | else() 61 | message("-- BACKEND not specified, defaulting to eigen.") 62 | set(BACKEND "eigen") 63 | endif() 64 | 65 | if(BACKEND MATCHES "^eigen$") 66 | set(WITH_EIGEN_BACKEND 1) 67 | elseif(BACKEND MATCHES "^cuda$") 68 | set(WITH_CUDA_BACKEND 1) 69 | else() 70 | message(SEND_ERROR "BACKEND must be eigen or cuda") 71 | endif() 72 | 73 | if (WITH_CUDA_BACKEND) 74 | find_package(CUDA REQUIRED) 75 | set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT}) 76 | include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) 77 | add_definitions(-DHAVE_CUDA) 78 | #list(APPEND CUDA_LIBRARIES /usr/lib64/libpthread.so) 79 | MESSAGE("CUDA_LIBRARIES: ${CUDA_LIBRARIES}") 80 | list(REMOVE_ITEM CUDA_LIBRARIES -lpthread) 81 | set(LIBS ${LIBS} ${CUDA_LIBRARIES}) 82 | #find_cudnn() 83 | #include_directories(SYSTEM ${CUDNN_INCLUDE_DIRS}) 84 | endif() 85 | 86 | # look for Eigen 87 | find_package(Eigen3 REQUIRED) 88 | include_directories(${EIGEN3_INCLUDE_DIR}) 89 | 90 | FIND_PACKAGE(Threads REQUIRED) 91 | set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) 92 | 93 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h) 94 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 95 | 96 | add_subdirectory(cnn) 97 | add_subdirectory(tests) 98 | add_subdirectory(examples) 99 | add_subdirectory(rnnlm) 100 | enable_testing() 101 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installing the pyCNN module. 2 | 3 | (for instructions on installing on a computer with GPU, see below) 4 | 5 | First, get CNN: 6 | 7 | ```bash 8 | cd $HOME 9 | git clone https://github.com/clab/cnn.git 10 | cd cnn 11 | git submodule init # To be consistent with CNN's installation instructions. 12 | git submodule update # To be consistent with CNN's installation instructions. 13 | ``` 14 | 15 | Then get Eigen: 16 | 17 | ```bash 18 | cd $HOME 19 | cd cnn 20 | # Latest version (17.03.16) of Eigen fails to compile , so we revert "-r" to the latest stable version. 21 | # Otherwise, we can use "hg clone https://bitbucket.org/eigen/eigen/" 22 | hg clone https://bitbucket.org/eigen/eigen/ -r 47fa289dda2dc13e0eea70adfc8671e93627d466 23 | ``` 24 | 25 | To simplify the following steps, we can set a bash variable to hold where we have saved the main directories of `cnn` and `eigen`. In case you have gotten `ccn` and `eigen` differently from the instructions above and saved them in different location(s), these variables will be helpful: 26 | 27 | ```bash 28 | PATH_TO_CNN=$HOME/cnn/ 29 | PATH_TO_EIGEN=$HOME/cnn/eigen/ 30 | ``` 31 | 32 | Compile CNN. 33 | (modify the code below to point to the correct boost location) 34 | 35 | ```bash 36 | cd $PATH_TO_CNN 37 | mkdir build 38 | cd build 39 | cmake .. -DEIGEN3_INCLUDE_DIR=$PATH_TO_EIGEN -DBOOST_ROOT=$HOME/.local/boost_1_58_0 -DBoost_NO_BOOST_CMAKE=ON 40 | make -j 2 41 | ``` 42 | 43 | If CNN fails to compile and throws an error like this: 44 | 45 | ```bash 46 | $ make -j 2 47 | Scanning dependencies of target cnn 48 | Scanning dependencies of target cnn_shared 49 | [ 1%] [ 2%] Building CXX object cnn/CMakeFiles/cnn.dir/cfsm-builder.cc.o 50 | Building CXX object cnn/CMakeFiles/cnn_shared.dir/cfsm-builder.cc.o 51 | In file included from /home/user/cnn/cnn/cnn.h:13:0, 52 | from /home/user/cnn/cnn/cfsm-builder.h:6, 53 | from /home/user/cnn/cnn/cfsm-builder.cc:1: 54 | /home/user/cnn/cnn/tensor.h:22:42: fatal error: unsupported/Eigen/CXX11/Tensor: No such file or directory 55 | #include 56 | ^ 57 | compilation terminated. 58 | ``` 59 | 60 | If CNN fails to compile with the error above, then you can download a stable version of Eigen and re-build CNN as such: 61 | 62 | ```bash 63 | cd $PATH_TO_CNN 64 | wget u.cs.biu.ac.il/~yogo/eigen.tgz 65 | tar zxvf eigen.tgz # or "dtrx eigen.tgz" if you have dtrx installed 66 | mkdir build 67 | cd build 68 | cmake .. -DEIGEN3_INCLUDE_DIR=$PATH_TO_EIGEN -DBOOST_ROOT=$HOME/.local/boost_1_58_0 -DBoost_NO_BOOST_CMAKE=ON 69 | make -j 2 70 | ``` 71 | 72 | Now that CNN is compiled, we need to compile the pycnn module. 73 | This requires having cython installed. 74 | If you don't have cython, it can be installed with either `pip install cython` or better yet `conda install cython`. 75 | 76 | ```bash 77 | pip2 install cython --user 78 | ``` 79 | 80 | Customize the `setup.py` to include (i) the parent directory where the main `cnn` directory is saved and (ii) the path to the main `eigen` directy: 81 | 82 | ```bash 83 | cd $PATH_TO_CNN/pycnn 84 | sed -i "s|..\/..\/cnn\/|$PATH_TO_CNN|g" setup.py 85 | sed -i "s|..\/..\/eigen\/|$PATH_TO_EIGEN|g" setup.py 86 | make 87 | make install 88 | ``` 89 | 90 | We are almost there. 91 | We need to tell the environment where to find the compiled cnn shared library. 92 | The pyCNN's `make` fetched a copy of `libcnn_shared.so` and put it in the `pycnn` lib. 93 | 94 | Add the following line to your profile (`.zshrc` or `.bashrc`), change 95 | according to your installation location. 96 | 97 | ```bash 98 | export LD_LIBRARY_PATH=$PATH_TO_CNN/pycnn 99 | ``` 100 | 101 | Now, check that everything works: 102 | 103 | ```bash 104 | # check that it works: 105 | cd $PATH_TO_CNN 106 | cd pyexamples 107 | python2 xor.py 108 | python2 rnnlm.py rnnlm.py 109 | ``` 110 | 111 | Alternatively, if the following script works for you, then your installation is likely to be working: 112 | ``` 113 | from pycnn import * 114 | model = Model() 115 | ``` 116 | 117 | ## Installing with GPU support 118 | 119 | For installing on a computer with GPU, first install CUDA. 120 | Here, we assume CUDA is installed in `/usr/local/cuda-7.5` 121 | 122 | There are two modules, `pycnn` which is the regular CPU module, and `gpycnn` which is the GPU 123 | module. You can import either of them, these are two independent modules. The GPU support 124 | is incomplete: some operations (i.e. `hubber_distance`) are not available for the GPU. 125 | 126 | First step is to build the CNN modules. 127 | Checkout and go to the `build` directory (same instructions as above). Then: 128 | 129 | To build a CPU version on a computer with CUDA: 130 | ```bash 131 | cmake .. -DEIGEN3_INCLUDE_DIR=../eigen -DBACKEND=eigen 132 | make -j 4 133 | ``` 134 | 135 | To build a GPU version on a computer with CUDA: 136 | ```bash 137 | cmake .. -DBACKEND=cuda -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-7.5/ 138 | make -j 4 139 | ``` 140 | 141 | Now, build the python modules (as above, we assume cython is installed): 142 | 143 | The GPU module (gpycnn): 144 | ```bash 145 | cd ../pycnn 146 | make gpycnn.so 147 | make ginstall 148 | ``` 149 | 150 | The CPU module (pycnn): 151 | ```bash 152 | cd ../pycnn 153 | make pycnn.so 154 | make install 155 | ``` 156 | 157 | Add the following to your env: 158 | `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PATH_TO_CNN/pycnn` 159 | 160 | Once both the `pycnn` and `gpycnn` are installed, run `python ../pyexamples/cpu_vs_gpu.py` for a small timing example. 161 | 162 | 163 | -------------------------------------------------------------------------------- /MEM.notes: -------------------------------------------------------------------------------- 1 | The code that computes the l2 norm of the gradient is going to need 2 | scratch space on every device that CNN is using that has a parameter. 3 | 1) devices should know whether they have parameters/gradients 4 | 5 | alignment code is hidden away. it's all hard coded, but it looks like 6 | Intel at least is getting more foregiving about alingment problems so 7 | we might not notice opportunities for speedups if something changes. 8 | GPU memory is aligned mostly by CUDA 9 | 10 | the MP stuff needs to be tested by Austin. 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cnn-v1 2 | Legacy version of CNN neural net toolkit (now called [dynet](https://github.com/clab/dynet)) 3 | 4 | # cnn 5 | C++ neural network library 6 | 7 | #### Important: Eigen version requirement 8 | 9 | You need the [development version of the Eigen library](https://bitbucket.org/eigen/eigen) for this software to function. **If you use any of the released versions, you may get assertion failures or compile errors.** 10 | 11 | #### Building 12 | 13 | First you need to fetch the dependent libraries 14 | 15 | git submodule init 16 | git submodule update 17 | 18 | In `src`, you need to first use [`cmake`](http://www.cmake.org/) to generate the makefiles 19 | 20 | mkdir build 21 | cd build 22 | cmake .. -DEIGEN3_INCLUDE_DIR=/path/to/eigen 23 | 24 | Then to compile, run 25 | 26 | make -j 2 27 | 28 | To see that things have built properly, you can run 29 | 30 | ./examples/xor 31 | 32 | which will train a multilayer perceptron to predict the xor function. 33 | 34 | #### Building without Eigen installed 35 | 36 | If you don't have Eigen installed, the instructions below will fetch and compile 37 | both `Eigen` and `cnn`. Eigen does not have to be compiled, so “installing” it is easy. 38 | 39 | git clone https://github.com/clab/cnn.git 40 | hg clone https://bitbucket.org/eigen/eigen/ 41 | 42 | cd cnn/ 43 | mkdir build 44 | cd build 45 | cmake .. -DEIGEN3_INCLUDE_DIR=../eigen 46 | make -j 2 47 | 48 | #### Debugging build problems 49 | 50 | If you want to see the compile commands that are used, you can run 51 | 52 | make VERBOSE=1 53 | 54 | #### Training Models 55 | 56 | An illustation of how models are trained (for a simple logistic regression model) is below: 57 | 58 | ```c++ 59 | // *** First, we set up the structure of the model 60 | // Create a model, and an SGD trainer to update its parameters. 61 | Model mod; 62 | SimpleSGDTrainer sgd(&mod); 63 | // Create a "computation graph," which will define the flow of information. 64 | ComputationGraph cg; 65 | // Initialize a 1x3 parameter vector, and add the parameters to be part of the 66 | // computation graph. 67 | Expression W = parameter(cg, mod.add_parameters({1, 3})); 68 | // Create variables defining the input and output of the regression, and load them 69 | // into the computation graph. Note that we don't need to set concrete values yet. 70 | vector x_values(3); 71 | Expression x = input(cg, {3}, &x_values); 72 | cnn::real y_value; 73 | Expression y = input(cg, &y_value); 74 | // Next, set up the structure to multiply the input by the weight vector, then run 75 | // the output of this through a logistic sigmoid function (logistic regression). 76 | Expression y_pred = logistic(W*x); 77 | // Finally, we create a function to calculate the loss. The model will be optimized 78 | // to minimize the value of the final function in the computation graph. 79 | Expression l = binary_log_loss(y_pred, y); 80 | // We are now done setting up the graph, and we can print out its structure: 81 | cg.PrintGraphviz(); 82 | 83 | // *** Now, we perform a parameter update for a single example. 84 | // Set the input/output to the values specified by the training data: 85 | x_values = {0.5, 0.3, 0.7}; 86 | y_value = 1.0; 87 | // "forward" propagates values forward through the computation graph, and returns 88 | // the loss. 89 | cnn::real loss = as_scalar(cg.forward()); 90 | // "backward" performs back-propagation, and accumulates the gradients of the 91 | // parameters within the "Model" data structure. 92 | cg.backward(); 93 | // "sgd.update" updates parameters of the model that was passed to its constructor. 94 | // Here 1.0 is the scaling factor that allows us to control the size of the update. 95 | sgd.update(1.0); 96 | ``` 97 | 98 | Note that this very simple example that doesn't cover things like memory initialization, reading/writing models, recurrent/LSTM networks, or adding biases to functions. The best way to get an idea of how to use cnn for real is to look in the `example` directory, particularly starting with the simplest `xor` example. 99 | -------------------------------------------------------------------------------- /TODO.cnn: -------------------------------------------------------------------------------- 1 | PRIORITIES: 2 | 3 | Multiprocessor/single memory version has to get merged with good, clear examples [cdyer needs to try it out, then work with Austin] 4 | 5 | throughout: instead of aborting, throw a proper exception type (this make's life easier 6 | for Yoav's Python wrapper) [volunteer!!!] 7 | 8 | cnn/init.cc [volunteer!!!] 9 | * This is an unlovely place that every CNN code calls as its first thing 10 | * it should read (and remove) any cnn specific arguments from argc, argv 11 | * add a --help argument 12 | * what should the other arguments do? 13 | - configure memory limits 14 | - possibly enable things like initialization strategies for random variables (this 15 | is not trivial, but worth doing0 16 | - set rnd seed behavior 17 | - configure GPU nonsense 18 | - configure multiproc/multithread 19 | 20 | tests/ [volunteer!!!] 21 | * speaks for itself 22 | * we need to report very clear, detailed runtime on lots of things. in PARTICULAR: big M-v and M-M products, but also softmax. these should be as close to the "user expr.h API" as possible since we want the these tests to be stable. 23 | * we should have a separate mechanism for testing nodes in isolation, make sure fx can deal with non-zero numbers, make sure dEdxi does the right thing with non-zero numbers (different than fx!). 24 | * we should have one example that calls the fin diff gradient checker on a non-trivial example 25 | * TODO(wammar): test the basic functionality of LSTMs. 26 | 27 | cnn/tensor.* [volunteer!!!] 28 | * big change: start using the CNN multidim tensor library when it makes sense 29 | * almost as big: make it so memory lives in GPU and CPU, and the scheduler will try to do smart things with CPU memory. This will mean the behavior of where memory lives will not be #if CUDA but rather a runtime property of the tensor. this affect all nodes. 30 | 31 | cnn/exec.cc 32 | * parallel execution of nodes (we've got the whole FSCKING graph). problem is, i don't know anything about how to elegantly put work like we've got into a threadpool or whatever it is the low-overhead kids are doing these days. I'd rather not pollute the Node code with this AT ALL. 33 | * more importantly, auto-batching 34 | 35 | cnn/examples/rnnlm.cc 36 | * add real program options to do something nontrivial 37 | * give elegant example of beam search implementation 38 | 39 | -------------------------------------------------------------------------------- /cmake/FindCNN.cmake: -------------------------------------------------------------------------------- 1 | 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 3 | 4 | INCLUDE(FindPackageHandleStandardArgs) 5 | 6 | FIND_LIBRARY(TH_LIBRARY TH) 7 | FIND_PATH(TH_INCLUDE_DIR "TH.h" PATHS "${CMAKE_PREFIX_PATH}/include/TH") 8 | 9 | SET(TH_LIBRARIES ${TH_LIBRARY}) 10 | 11 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( 12 | TH 13 | REQUIRED_ARGS 14 | TH_INCLUDE_DIR 15 | TH_LIBRARY) 16 | -------------------------------------------------------------------------------- /cmake/FindEigen3.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Eigen3 lib 2 | # 3 | # This module supports requiring a minimum version, e.g. you can do 4 | # find_package(Eigen3 3.1.2) 5 | # to require version 3.1.2 or newer of Eigen3. 6 | # 7 | # Once done this will define 8 | # 9 | # EIGEN3_FOUND - system has eigen lib with correct version 10 | # EIGEN3_INCLUDE_DIR - the eigen include directory 11 | # EIGEN3_VERSION - eigen version 12 | # 13 | # This module reads hints about search locations from 14 | # the following enviroment variables: 15 | # 16 | # EIGEN3_ROOT 17 | # EIGEN3_ROOT_DIR 18 | 19 | # Copyright (c) 2006, 2007 Montel Laurent, 20 | # Copyright (c) 2008, 2009 Gael Guennebaud, 21 | # Copyright (c) 2009 Benoit Jacob 22 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license. 23 | 24 | if(NOT Eigen3_FIND_VERSION) 25 | if(NOT Eigen3_FIND_VERSION_MAJOR) 26 | set(Eigen3_FIND_VERSION_MAJOR 2) 27 | endif(NOT Eigen3_FIND_VERSION_MAJOR) 28 | if(NOT Eigen3_FIND_VERSION_MINOR) 29 | set(Eigen3_FIND_VERSION_MINOR 91) 30 | endif(NOT Eigen3_FIND_VERSION_MINOR) 31 | if(NOT Eigen3_FIND_VERSION_PATCH) 32 | set(Eigen3_FIND_VERSION_PATCH 0) 33 | endif(NOT Eigen3_FIND_VERSION_PATCH) 34 | 35 | set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") 36 | endif(NOT Eigen3_FIND_VERSION) 37 | 38 | macro(_eigen3_check_version) 39 | file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) 40 | 41 | string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") 42 | set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") 43 | string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") 44 | set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") 45 | string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") 46 | set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") 47 | 48 | set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) 49 | if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 50 | set(EIGEN3_VERSION_OK FALSE) 51 | else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 52 | set(EIGEN3_VERSION_OK TRUE) 53 | endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 54 | 55 | if(NOT EIGEN3_VERSION_OK) 56 | 57 | message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " 58 | "but at least version ${Eigen3_FIND_VERSION} is required") 59 | endif(NOT EIGEN3_VERSION_OK) 60 | endmacro(_eigen3_check_version) 61 | 62 | if (EIGEN3_INCLUDE_DIR) 63 | 64 | # in cache already 65 | _eigen3_check_version() 66 | set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) 67 | 68 | else (EIGEN3_INCLUDE_DIR) 69 | 70 | find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library 71 | HINTS 72 | ENV EIGEN3_ROOT 73 | ENV EIGEN3_ROOT_DIR 74 | PATHS 75 | ${CMAKE_INSTALL_PREFIX}/include 76 | ${KDE4_INCLUDE_DIR} 77 | PATH_SUFFIXES eigen3 eigen 78 | ) 79 | 80 | if(EIGEN3_INCLUDE_DIR) 81 | _eigen3_check_version() 82 | endif(EIGEN3_INCLUDE_DIR) 83 | 84 | include(FindPackageHandleStandardArgs) 85 | find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) 86 | 87 | mark_as_advanced(EIGEN3_INCLUDE_DIR) 88 | 89 | endif(EIGEN3_INCLUDE_DIR) 90 | 91 | -------------------------------------------------------------------------------- /cnn/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ########## cnn library ########## 2 | # Sources: 3 | set(cnn_library_SRCS 4 | cfsm-builder.cc 5 | cnn.cc 6 | conv.cc 7 | deep-lstm.cc 8 | devices.cc 9 | dict.cc 10 | dim.cc 11 | exec.cc 12 | expr.cc 13 | fast-lstm.cc 14 | grad-check.cc 15 | graph.cc 16 | gru.cc 17 | hsm-builder.cc 18 | init.cc 19 | lstm.cc 20 | mem.cc 21 | model.cc 22 | mp.cc 23 | nodes.cc 24 | nodes-common.cc 25 | param-nodes.cc 26 | rnn.cc 27 | rnn-state-machine.cc 28 | saxe-init.cc 29 | shadow-params.cc 30 | tensor.cc 31 | training.cc 32 | ) 33 | 34 | # Headers: 35 | set(cnn_library_HDRS 36 | aligned-mem-pool.h 37 | cfsm-builder.h 38 | c2w.h 39 | cnn.h 40 | conv.h 41 | cuda.h 42 | devices.h 43 | dict.h 44 | dim.h 45 | exec.h 46 | expr.h 47 | fast-lstm.h 48 | functors.h 49 | gpu-kernels.h 50 | gpu-ops.h 51 | graph.h 52 | gru.h 53 | hsm-builder.h 54 | init.h 55 | lstm.h 56 | mem.h 57 | model.h 58 | mp.h 59 | nodes.h 60 | param-nodes.h 61 | random.h 62 | rnn-state-machine.h 63 | rnn.h 64 | saxe-init.h 65 | shadow-params.h 66 | simd-functors.h 67 | tensor.h 68 | timing.h 69 | training.h 70 | ) 71 | 72 | if(WITH_CUDA_BACKEND) 73 | list(APPEND cnn_library_SRCS 74 | cuda.cc) 75 | endif(WITH_CUDA_BACKEND) 76 | 77 | file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc) 78 | 79 | #foreach(test_src ${TEST_SRCS}) 80 | #Extract the filename without an extension (NAME_WE) 81 | # get_filename_component(testName ${test_src} NAME_WE) 82 | 83 | #Add compile target 84 | # add_executable(${testName} ${test_src}) 85 | 86 | #link to Boost libraries AND your targets and dependencies 87 | # target_link_libraries(${testName} cnn ${LIBS}) 88 | 89 | # set_target_properties(${testName} PROPERTIES 90 | # RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin) 91 | 92 | #Finally add it to test execution - 93 | #Notice the WORKING_DIRECTORY and COMMAND 94 | # add_test(NAME ${testName} 95 | # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin 96 | # COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin/${testName} ) 97 | #endforeach(test_src) 98 | 99 | # actual target: 100 | add_library(cnn STATIC ${cnn_library_SRCS} ${cnn_library_HDRS}) 101 | target_link_libraries(cnn ${LIBS}) 102 | if(WITH_CUDA_BACKEND) 103 | add_library(gcnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS}) 104 | target_link_libraries(gcnn_shared ${LIBS}) 105 | else() 106 | add_library(cnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS}) 107 | target_link_libraries(cnn_shared ${LIBS}) 108 | endif(WITH_CUDA_BACKEND) 109 | #add_library(cnn ${cnn_library_SRCS} ${cnn_library_HDRS} ${LIBS}) 110 | if(WITH_CUDA_BACKEND) 111 | set(CUDA_SEPARABLE_COMPILATION ON) 112 | list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-O2;-DVERBOSE;-Xcompiler;-fpic") 113 | SET(CUDA_PROPAGATE_HOST_FLAGS OFF) 114 | cuda_add_library(cnncuda STATIC gpu-ops.cu) 115 | cuda_add_library(cnncuda_shared SHARED gpu-ops.cu) 116 | endif(WITH_CUDA_BACKEND) 117 | 118 | install(FILES ${cnn_library_HDRS} DESTINATION include/cnn) 119 | install(TARGETS cnn DESTINATION lib) 120 | 121 | # target_compile_features(cnn PRIVATE cxx_range_for) 122 | 123 | -------------------------------------------------------------------------------- /cnn/aligned-mem-pool.cc: -------------------------------------------------------------------------------- 1 | #include "aligned-mem-pool.h" 2 | -------------------------------------------------------------------------------- /cnn/aligned-mem-pool.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_ALIGNED_MEM_POOL_H 2 | #define CNN_ALIGNED_MEM_POOL_H 3 | 4 | #include 5 | #include "cnn/mem.h" 6 | 7 | namespace cnn { 8 | 9 | class AlignedMemoryPool { 10 | public: 11 | explicit AlignedMemoryPool(size_t cap, MemAllocator* a) : a(a) { 12 | sys_alloc(cap); 13 | zero_all(); 14 | } 15 | 16 | void* allocate(size_t n) { 17 | auto rounded_n = a->round_up_align(n); 18 | if (rounded_n + used > capacity) { 19 | std::cerr << "cnn is out of memory, try increasing with --cnn-mem\n"; 20 | abort(); 21 | } 22 | void* res = static_cast(mem) + used; 23 | used += rounded_n; 24 | return res; 25 | } 26 | void free() { 27 | //std::cerr << "freeing " << used << " bytes\n"; 28 | used = 0; 29 | } 30 | // zeros out the amount of allocations 31 | void zero_allocated_memory() { 32 | if (used == 0) return; 33 | a->zero(mem, used); 34 | } 35 | 36 | bool is_shared() { 37 | return shared; 38 | } 39 | private: 40 | void sys_alloc(size_t cap) { 41 | capacity = a->round_up_align(cap); 42 | //std::cerr << "Allocating " << capacity << " ...\n"; 43 | mem = a->malloc(capacity); 44 | if (!mem) { std::cerr << "Failed to allocate " << capacity << std::endl; abort(); } 45 | used = 0; 46 | } 47 | void zero_all() { 48 | a->zero(mem, capacity); 49 | } 50 | size_t capacity; 51 | size_t used; 52 | bool shared; 53 | MemAllocator* a; 54 | void* mem; 55 | }; 56 | 57 | } // namespace cnn 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /cnn/c2w.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_C2W_H_ 2 | #define CNN_C2W_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "cnn/cnn.h" 8 | #include "cnn/model.h" 9 | #include "cnn/lstm.h" 10 | 11 | namespace cnn { 12 | 13 | // computes a representation of a word by reading characters 14 | // one at a time 15 | struct C2WBuilder { 16 | LSTMBuilder fc2w; 17 | LSTMBuilder rc2w; 18 | LookupParameters* p_lookup; 19 | std::vector words; 20 | std::map wordid2vi; 21 | explicit C2WBuilder(int vocab_size, 22 | unsigned layers, 23 | unsigned input_dim, 24 | unsigned hidden_dim, 25 | Model* m) : 26 | fc2w(layers, input_dim, hidden_dim, m), 27 | rc2w(layers, input_dim, hidden_dim, m), 28 | p_lookup(m->add_lookup_parameters(vocab_size, {input_dim})) { 29 | } 30 | void new_graph(ComputationGraph* cg) { 31 | words.clear(); 32 | fc2w.new_graph(cg); 33 | rc2w.new_graph(cg); 34 | } 35 | // compute a composed representation of a word out of characters 36 | // wordid should be a unique index for each word *type* in the graph being built 37 | VariableIndex add_word(int word_id, const std::vector& chars, ComputationGraph* cg) { 38 | auto it = wordid2vi.find(word_id); 39 | if (it == wordid2vi.end()) { 40 | fc2w.start_new_sequence(cg); 41 | rc2w.start_new_sequence(cg); 42 | std::vector ins(chars.size()); 43 | std::map c2i; 44 | for (unsigned i = 0; i < ins.size(); ++i) { 45 | VariableIndex& v = c2i[chars[i]]; 46 | if (!v) v = cg->add_lookup(p_lookup, chars[i]); 47 | ins[i] = v; 48 | fc2w.add_input(v, cg); 49 | } 50 | for (int i = ins.size() - 1; i >= 0; --i) 51 | rc2w.add_input(ins[i], cg); 52 | VariableIndex i_concat = cg->add_function({fc2w.back(), rc2w.back()}); 53 | it = wordid2vi.insert(std::make_pair(word_id, i_concat)).first; 54 | } 55 | return it->second; 56 | } 57 | }; 58 | 59 | } // namespace cnn 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /cnn/cfsm-builder.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CFSMBUILDER_H 2 | #define CNN_CFSMBUILDER_H 3 | 4 | #include 5 | #include 6 | #include "cnn/cnn.h" 7 | #include "cnn/expr.h" 8 | #include "cnn/dict.h" 9 | 10 | namespace cnn { 11 | 12 | struct Parameters; 13 | 14 | class FactoredSoftmaxBuilder { 15 | public: 16 | // call this once per ComputationGraph 17 | virtual void new_graph(ComputationGraph& cg) = 0; 18 | 19 | // -log(p(c | rep) * p(w | c, rep)) 20 | virtual expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx) = 0; 21 | 22 | // samples a word from p(w,c | rep) 23 | virtual unsigned sample(const expr::Expression& rep) = 0; 24 | }; 25 | 26 | class NonFactoredSoftmaxBuilder : public FactoredSoftmaxBuilder { 27 | public: 28 | NonFactoredSoftmaxBuilder(unsigned rep_dim, unsigned vocab_size, Model* model); 29 | void new_graph(ComputationGraph& cg); 30 | expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); 31 | unsigned sample(const expr::Expression& rep); 32 | private: 33 | Parameters* p_w; 34 | Parameters* p_b; 35 | expr::Expression w; 36 | expr::Expression b; 37 | ComputationGraph* pcg; 38 | }; 39 | 40 | // helps with implementation of hierarchical softmax 41 | // read a file with lines of the following format 42 | // CLASSID word [freq] 43 | class ClassFactoredSoftmaxBuilder : public FactoredSoftmaxBuilder { 44 | public: 45 | ClassFactoredSoftmaxBuilder(unsigned rep_dim, 46 | const std::string& cluster_file, 47 | Dict* word_dict, 48 | Model* model); 49 | 50 | void new_graph(ComputationGraph& cg); 51 | expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); 52 | unsigned sample(const expr::Expression& rep); 53 | 54 | private: 55 | void ReadClusterFile(const std::string& cluster_file, Dict* word_dict); 56 | Dict cdict; 57 | std::vector widx2cidx; // will be -1 if not present 58 | std::vector widx2cwidx; // word index to word index inside of cluster 59 | std::vector> cidx2words; 60 | std::vector singleton_cluster; // does cluster contain a single word type? 61 | 62 | // parameters 63 | Parameters* p_r2c; 64 | Parameters* p_cbias; 65 | std::vector p_rc2ws; // len = number of classes 66 | std::vector p_rcwbiases; // len = number of classes 67 | 68 | // Expressions for current graph 69 | inline expr::Expression& get_rc2w(unsigned cluster_idx) { 70 | expr::Expression& e = rc2ws[cluster_idx]; 71 | if (!e.pg) 72 | e = expr::parameter(*pcg, p_rc2ws[cluster_idx]); 73 | return e; 74 | } 75 | inline expr::Expression& get_rc2wbias(unsigned cluster_idx) { 76 | expr::Expression& e = rc2biases[cluster_idx]; 77 | if (!e.pg) 78 | e = expr::parameter(*pcg, p_rcwbiases[cluster_idx]); 79 | return e; 80 | } 81 | ComputationGraph* pcg; 82 | expr::Expression r2c; 83 | expr::Expression cbias; 84 | std::vector rc2ws; 85 | std::vector rc2biases; 86 | }; 87 | 88 | } // namespace cnn 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /cnn/cnn-helper.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_HELPER_H_ 2 | #define CNN_HELPER_H_ 3 | 4 | #include 5 | 6 | /// helper functions 7 | 8 | namespace cnn { 9 | 10 | /** 11 | this fix a compilation problem in cygwin 12 | */ 13 | #if defined(__CYGWIN__) 14 | template 15 | inline std::string to_string(T value) 16 | { 17 | std::ostringstream os; 18 | os << value; 19 | return os.str(); 20 | } 21 | #endif 22 | 23 | } // namespace cnn 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /cnn/conv.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CONV_H_ 2 | #define CNN_CONV_H_ 3 | 4 | #include "cnn/cnn.h" 5 | 6 | namespace cnn { 7 | 8 | struct AddVectorToAllColumns : public Node { 9 | explicit AddVectorToAllColumns(const std::initializer_list& a) : Node(a) {} 10 | std::string as_string(const std::vector& arg_names) const override; 11 | Dim dim_forward(const std::vector& xs) const override; 12 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 13 | void backward_impl(const std::vector& xs, 14 | const Tensor& fx, 15 | const Tensor& dEdf, 16 | unsigned i, 17 | Tensor& dEdxi) const override; 18 | }; 19 | 20 | struct KMaxPooling : public Node { 21 | explicit KMaxPooling(const std::initializer_list& a, unsigned k = 1) : Node(a), k(k) {} 22 | std::string as_string(const std::vector& arg_names) const override; 23 | Dim dim_forward(const std::vector& xs) const override; 24 | size_t aux_storage_size() const override; 25 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 26 | void backward_impl(const std::vector& xs, 27 | const Tensor& fx, 28 | const Tensor& dEdf, 29 | unsigned i, 30 | Tensor& dEdxi) const override; 31 | unsigned k; 32 | }; 33 | 34 | struct FoldRows : public Node { 35 | explicit FoldRows(const std::initializer_list& a, unsigned nrows) : Node(a), nrows(nrows) {} 36 | std::string as_string(const std::vector& arg_names) const override; 37 | Dim dim_forward(const std::vector& xs) const override; 38 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 39 | void backward_impl(const std::vector& xs, 40 | const Tensor& fx, 41 | const Tensor& dEdf, 42 | unsigned i, 43 | Tensor& dEdxi) const override; 44 | unsigned nrows; 45 | }; 46 | 47 | // y = x_1 *conv x_2 48 | // x_1 \in R^{d x s} (input) 49 | // x_2 \in R^{d x m} (filter) 50 | struct Conv1DNarrow : public Node { 51 | explicit Conv1DNarrow(const std::initializer_list& a) : Node(a) {} 52 | std::string as_string(const std::vector& arg_names) const override; 53 | Dim dim_forward(const std::vector& xs) const override; 54 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 55 | void backward_impl(const std::vector& xs, 56 | const Tensor& fx, 57 | const Tensor& dEdf, 58 | unsigned i, 59 | Tensor& dEdxi) const override; 60 | }; 61 | 62 | // y = x_1 *conv x_2 63 | // x_1 \in R^{d x s} (input) 64 | // x_2 \in R^{d x m} (filter) 65 | struct Conv1DWide : public Node { 66 | explicit Conv1DWide(const std::initializer_list& a) : Node(a) {} 67 | std::string as_string(const std::vector& arg_names) const override; 68 | Dim dim_forward(const std::vector& xs) const override; 69 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 70 | void backward_impl(const std::vector& xs, 71 | const Tensor& fx, 72 | const Tensor& dEdf, 73 | unsigned i, 74 | Tensor& dEdxi) const override; 75 | }; 76 | 77 | } // namespace cnn 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /cnn/cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cnn/cnn.h" 6 | #include "cnn/cuda.h" 7 | 8 | using namespace std; 9 | 10 | namespace cnn { 11 | 12 | cublasHandle_t cublas_handle; 13 | 14 | static void RemoveArgs(int& argc, char**& argv, int& argi, int n) { 15 | for (int i = argi + n; i < argc; ++i) 16 | argv[i - n] = argv[i]; 17 | argc -= n; 18 | assert(argc >= 0); 19 | } 20 | 21 | #define MAX_GPUS 256 22 | 23 | vector Initialize_GPU(int& argc, char**& argv) { 24 | int nDevices; 25 | CUDA_CHECK(cudaGetDeviceCount(&nDevices)); 26 | if (nDevices < 1) { 27 | cerr << "[cnn] No GPUs found, recompile without DENABLE_CUDA=1\n"; 28 | throw std::runtime_error("No GPUs found but CNN compiled with CUDA support."); 29 | } 30 | // logic: no flags, you get 1 GPU 31 | // or you request a certain number of GPUs explicitly 32 | // or you request the device ids 33 | int requested_gpus = -1; 34 | vector gpu_mask(MAX_GPUS); 35 | int argi = 1; 36 | bool ngpus_requested = false; 37 | bool ids_requested = false; 38 | for( ;argi < argc; ++argi) { 39 | string arg = argv[argi]; 40 | if (arg == "--cnn_gpus" || arg == "--cnn-gpus") { 41 | if ((argi + 1) > argc) { 42 | cerr << "[cnn] --cnn-gpus expects an argument (number of GPUs to use)\n"; 43 | abort(); 44 | } else { 45 | if (ngpus_requested) { 46 | cerr << "Multiple instances of --cnn-gpus" << endl; abort(); 47 | } 48 | ngpus_requested = true; 49 | string a2 = argv[argi+1]; 50 | istringstream c(a2); c >> requested_gpus; 51 | RemoveArgs(argc, argv, argi, 2); 52 | } 53 | } else if (arg == "--cnn_gpu_ids" || arg == "--cnn-gpu-ids") { 54 | if ((argi + 1) > argc) { 55 | cerr << "[cnn] --cnn-gpu-ids expects an argument (comma separated list of physical GPU ids to use)\n"; 56 | abort(); 57 | } else { 58 | string a2 = argv[argi+1]; 59 | if (ids_requested) { 60 | cerr << "Multiple instances of --cnn-gpu-ids" << endl; abort(); 61 | } 62 | ids_requested = true; 63 | if (a2.size() % 2 != 1) { 64 | cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort(); 65 | } 66 | for (unsigned i = 0; i < a2.size(); ++i) { 67 | if ((i % 2 == 0 && (a2[i] < '0' || a2[i] > '9')) || 68 | (i % 2 == 1 && a2[i] != ',')) { 69 | cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort(); 70 | } 71 | if (i % 2 == 0) { 72 | int gpu_id = a2[i] - '0'; 73 | if (gpu_id >= nDevices) { 74 | cerr << "You requested GPU id " << gpu_id << " but system only reports up to " << nDevices << endl; 75 | abort(); 76 | } 77 | if (gpu_id >= MAX_GPUS) { cerr << "Raise MAX_GPUS\n"; abort(); } 78 | gpu_mask[gpu_id]++; 79 | requested_gpus++; 80 | if (gpu_mask[gpu_id] != 1) { 81 | cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort(); 82 | } 83 | } 84 | } 85 | RemoveArgs(argc, argv, argi, 2); 86 | } 87 | } 88 | } 89 | if (ids_requested && ngpus_requested) { 90 | cerr << "Use only --cnn_gpus or --cnn_gpu_ids, not both\n"; 91 | abort(); 92 | } 93 | if (ngpus_requested || requested_gpus == -1) { 94 | if (requested_gpus == -1) requested_gpus = 1; 95 | cerr << "Request for " << requested_gpus << " GPU" << (requested_gpus == 1 ? "" : "s") << " ...\n"; 96 | for (int i = 0; i < MAX_GPUS; ++i) gpu_mask[i] = 1; 97 | } else if (ids_requested) { 98 | requested_gpus++; 99 | cerr << "[cnn] Request for " << requested_gpus << " specific GPU" << (requested_gpus == 1 ? "" : "s") << " ...\n"; 100 | } 101 | 102 | vector gpudevices; 103 | if (requested_gpus == 0) return gpudevices; 104 | if (requested_gpus > nDevices) { 105 | cerr << "You requested " << requested_gpus << " GPUs but system only reports " << nDevices << endl; 106 | abort(); 107 | } 108 | 109 | // after all that, requested_gpus is the number of GPUs to reserve 110 | // we now pick the ones that are both requested by the user or have 111 | // the most memory free 112 | 113 | vector gpu_free_mem(MAX_GPUS, 0); 114 | vector gpus(MAX_GPUS, 0); 115 | for (int i = 0; i < MAX_GPUS; ++i) gpus[i] = i; 116 | size_t free_bytes, total_bytes, max_free = 0; 117 | int selected = 0; 118 | for (int i = 0; i < nDevices; i++) { 119 | if (!gpu_mask[i]) continue; 120 | cudaDeviceProp prop; 121 | CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); 122 | cerr << "[cnn] Device Number: " << i << endl; 123 | cerr << "[cnn] Device name: " << prop.name << endl; 124 | cerr << "[cnn] Memory Clock Rate (KHz): " << prop.memoryClockRate << endl; 125 | cerr << "[cnn] Memory Bus Width (bits): " << prop.memoryBusWidth << endl; 126 | cerr << "[cnn] Peak Memory Bandwidth (GB/s): " << (2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6) << endl; 127 | if (!prop.unifiedAddressing) { 128 | cerr << "[cnn] GPU does not support unified addressing.\n"; 129 | abort(); 130 | } 131 | CUDA_CHECK(cudaSetDevice(i)); 132 | CUDA_CHECK(cudaMemGetInfo( &free_bytes, &total_bytes )); 133 | CUDA_CHECK(cudaDeviceReset()); 134 | cerr << "[cnn] Memory Free (GB): " << free_bytes/1.0e9 << "/" << total_bytes/1.0e9 << endl; 135 | cerr << "[cnn]" << endl; 136 | gpu_free_mem[i] = free_bytes; 137 | } 138 | stable_sort(gpus.begin(), gpus.end(), [&](int a, int b) -> bool { return gpu_free_mem[a] > gpu_free_mem[b]; }); 139 | gpus.resize(requested_gpus); 140 | cerr << "[cnn] Device(s) selected:"; 141 | for (int i = 0; i < requested_gpus; ++i) { 142 | cerr << ' ' << gpus[i]; 143 | int mb = 512; 144 | Device* d = new Device_GPU(mb, gpus[i]); 145 | gpudevices.push_back(d); 146 | } 147 | cerr << endl; 148 | 149 | // eventually kill the global handle 150 | CUDA_CHECK(cudaSetDevice(gpus[0])); 151 | CUBLAS_CHECK(cublasCreate(&cublas_handle)); 152 | CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); 153 | return gpudevices; 154 | } 155 | 156 | } // namespace cnn 157 | -------------------------------------------------------------------------------- /cnn/cuda.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CUDA_H 2 | #define CNN_CUDA_H 3 | #if HAVE_CUDA 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "cnn/except.h" 14 | 15 | #define CUDA_CHECK(stmt) do { \ 16 | cudaError_t err = stmt; \ 17 | if (err != cudaSuccess) { \ 18 | std::cerr << "CUDA failure in " << #stmt << std::endl\ 19 | << cudaGetErrorString(err) << std::endl; \ 20 | throw cnn::cuda_exception(#stmt); \ 21 | } \ 22 | } while(0) 23 | 24 | #define CUBLAS_CHECK(stmt) do { \ 25 | cublasStatus_t stat = stmt; \ 26 | if (stat != CUBLAS_STATUS_SUCCESS) { \ 27 | std::cerr << "CUBLAS failure in " << #stmt \ 28 | << std::endl << stat << std::endl; \ 29 | throw cnn::cuda_exception(#stmt); \ 30 | } \ 31 | } while(0) 32 | 33 | namespace cnn { 34 | 35 | struct Device; 36 | 37 | inline std::pair SizeToBlockThreadPair(int n) { 38 | assert(n); 39 | int logn; 40 | asm("\tbsr %1, %0\n" 41 | : "=r"(logn) 42 | : "r" (n-1)); 43 | logn = logn > 9 ? 9 : (logn < 4 ? 4 : logn); 44 | ++logn; 45 | int threads = 1 << logn; 46 | int blocks = (n + threads - 1) >> logn; 47 | blocks = blocks > 128 ? 128 : blocks; 48 | return std::make_pair(blocks, threads); 49 | } 50 | 51 | std::vector Initialize_GPU(int& argc, char**& argv); 52 | extern cublasHandle_t cublas_handle; 53 | 54 | } // namespace cnn 55 | 56 | #endif 57 | #endif 58 | -------------------------------------------------------------------------------- /cnn/deep-lstm.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/deep-lstm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | 10 | using namespace std; 11 | using namespace cnn::expr; 12 | 13 | namespace cnn { 14 | 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; 16 | 17 | DeepLSTMBuilder::DeepLSTMBuilder(unsigned layers, 18 | unsigned input_dim, 19 | unsigned hidden_dim, 20 | Model* model) : layers(layers) { 21 | unsigned layer_input_dim = input_dim; 22 | for (unsigned i = 0; i < layers; ++i) { 23 | // i 24 | Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim}); 25 | Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim}); 26 | Parameters* p_c2i = model->add_parameters({hidden_dim, hidden_dim}); 27 | Parameters* p_bi = model->add_parameters({hidden_dim}); 28 | 29 | // o 30 | Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim}); 31 | Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim}); 32 | Parameters* p_c2o = model->add_parameters({hidden_dim, hidden_dim}); 33 | Parameters* p_bo = model->add_parameters({hidden_dim}); 34 | 35 | // c 36 | Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim}); 37 | Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim}); 38 | Parameters* p_bc = model->add_parameters({hidden_dim}); 39 | layer_input_dim = hidden_dim + input_dim; // output (hidden) from 1st layer is input to next 40 | 41 | vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; 42 | params.push_back(ps); 43 | } // layers 44 | } 45 | 46 | void DeepLSTMBuilder::new_graph_impl(ComputationGraph& cg){ 47 | param_vars.clear(); 48 | 49 | for (unsigned i = 0; i < layers; ++i){ 50 | auto& p = params[i]; 51 | 52 | //i 53 | Expression i_x2i = parameter(cg,p[X2I]); 54 | Expression i_h2i = parameter(cg,p[H2I]); 55 | Expression i_c2i = parameter(cg,p[C2I]); 56 | Expression i_bi = parameter(cg,p[BI]); 57 | //o 58 | Expression i_x2o = parameter(cg,p[X2O]); 59 | Expression i_h2o = parameter(cg,p[H2O]); 60 | Expression i_c2o = parameter(cg,p[C2O]); 61 | Expression i_bo = parameter(cg,p[BO]); 62 | //c 63 | Expression i_x2c = parameter(cg,p[X2C]); 64 | Expression i_h2c = parameter(cg,p[H2C]); 65 | Expression i_bc = parameter(cg,p[BC]); 66 | 67 | vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; 68 | param_vars.push_back(vars); 69 | } 70 | } 71 | 72 | // layout: 0..layers = c 73 | // layers+1..2*layers = h 74 | void DeepLSTMBuilder::start_new_sequence_impl(const vector& hinit) { 75 | h.clear(); 76 | c.clear(); 77 | if (hinit.size() > 0) { 78 | assert(layers*2 == hinit.size()); 79 | h0.resize(layers); 80 | c0.resize(layers); 81 | for (unsigned i = 0; i < layers; ++i) { 82 | c0[i] = hinit[i]; 83 | h0[i] = hinit[i + layers]; 84 | } 85 | has_initial_state = true; 86 | } else { 87 | has_initial_state = false; 88 | } 89 | } 90 | 91 | Expression DeepLSTMBuilder::add_input_impl(int prev, const Expression& x) { 92 | h.push_back(vector(layers)); 93 | c.push_back(vector(layers)); 94 | o.push_back(Expression()); 95 | vector& ht = h.back(); 96 | vector& ct = c.back(); 97 | Expression& ot = o.back(); 98 | Expression in = x; 99 | vector cc(layers); 100 | for (unsigned i = 0; i < layers; ++i) { 101 | if (i > 0) 102 | in = concatenate({in, x}); 103 | const vector& vars = param_vars[i]; 104 | Expression i_h_tm1, i_c_tm1; 105 | bool has_prev_state = (prev >= 0 || has_initial_state); 106 | if (prev < 0) { 107 | if (has_initial_state) { 108 | // intial value for h and c at timestep 0 in layer i 109 | // defaults to zero matrix input if not set in add_parameter_edges 110 | i_h_tm1 = h0[i]; 111 | i_c_tm1 = c0[i]; 112 | } 113 | } else { // t > 0 114 | i_h_tm1 = h[prev][i]; 115 | i_c_tm1 = c[prev][i]; 116 | } 117 | // input 118 | Expression i_ait; 119 | if (has_prev_state) 120 | // i_ait = vars[BI] + vars[X2I] * in + vars[H2I]*i_h_tm1 + vars[C2I] * i_c_tm1; 121 | i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1}); 122 | else 123 | // i_ait = vars[BI] + vars[X2I] * in; 124 | i_ait = affine_transform({vars[BI], vars[X2I], in}); 125 | Expression i_it = logistic(i_ait); 126 | // forget 127 | Expression i_ft = 1.f - i_it; 128 | // write memory cell 129 | Expression i_awt; 130 | if (has_prev_state) 131 | // i_awt = vars[BC] + vars[X2C] * in + vars[H2C]*i_h_tm1; 132 | i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); 133 | else 134 | // i_awt = vars[BC] + vars[X2C] * in; 135 | i_awt = affine_transform({vars[BC], vars[X2C], in}); 136 | Expression i_wt = tanh(i_awt); 137 | // output 138 | if (has_prev_state) { 139 | Expression i_nwt = cwise_multiply(i_it,i_wt); 140 | Expression i_crt = cwise_multiply(i_ft,i_c_tm1); 141 | ct[i] = i_crt + i_nwt; 142 | } else { 143 | ct[i] = cwise_multiply(i_it,i_wt); 144 | } 145 | 146 | Expression i_aot; 147 | if (has_prev_state) 148 | // i_aot = vars[BO] + vars[X2O] * in + vars[H2O] * i_h_tm1 + vars[C2O] * ct[i]; 149 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]}); 150 | else 151 | // i_aot = vars[BO] + vars[X2O] * in; 152 | i_aot = affine_transform({vars[BO], vars[X2O], in}); 153 | Expression i_ot = logistic(i_aot); 154 | Expression ph_t = tanh(ct[i]); 155 | in = ht[i] = cwise_multiply(i_ot,ph_t); 156 | cc[i] = in; 157 | } 158 | ot = concatenate(cc); 159 | return ot; 160 | } 161 | 162 | } // namespace cnn 163 | -------------------------------------------------------------------------------- /cnn/deep-lstm.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DEEP_LSTM_H_ 2 | #define CNN_DEEP_LSTM_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | struct DeepLSTMBuilder : public RNNBuilder { 15 | DeepLSTMBuilder() = default; 16 | explicit DeepLSTMBuilder(unsigned layers, 17 | unsigned input_dim, 18 | unsigned hidden_dim, 19 | Model* model); 20 | 21 | Expression back() const override { return h.back().back(); } 22 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 23 | std::vector final_s() const override { 24 | std::vector ret = (c.size() == 0 ? c0 : c.back()); 25 | for(auto my_h : final_h()) ret.push_back(my_h); 26 | return ret; 27 | } 28 | protected: 29 | void new_graph_impl(ComputationGraph& cg) override; 30 | void start_new_sequence_impl(const std::vector& h0) override; 31 | Expression add_input_impl(int prev, const Expression& x) override; 32 | 33 | public: 34 | // first index is layer, then ... 35 | std::vector> params; 36 | 37 | // first index is layer, then ... 38 | std::vector> param_vars; 39 | 40 | // first index is time, second is layer 41 | std::vector> h, c; 42 | std::vector o; 43 | 44 | // initial values of h and c at each layer 45 | // - both default to zero matrix input 46 | bool has_initial_state; // if this is false, treat h0 and c0 as 0 47 | std::vector h0; 48 | std::vector c0; 49 | unsigned layers; 50 | }; 51 | 52 | } // namespace cnn 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /cnn/devices.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/devices.h" 2 | 3 | #include 4 | 5 | #include "cnn/cuda.h" 6 | 7 | using namespace std; 8 | 9 | namespace cnn { 10 | 11 | Device::~Device() {} 12 | 13 | #if HAVE_CUDA 14 | Device_GPU::Device_GPU(int mb, int device_id) : 15 | Device(DeviceType::GPU, &gpu_mem), cuda_device_id(device_id), gpu_mem(device_id) { 16 | CUDA_CHECK(cudaSetDevice(device_id)); 17 | CUBLAS_CHECK(cublasCreate(&cublas_handle)); 18 | CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); 19 | kSCALAR_MINUSONE = (float*)gpu_mem.malloc(sizeof(float)); 20 | kSCALAR_ONE = (float*)gpu_mem.malloc(sizeof(float)); 21 | kSCALAR_ZERO = (float*)gpu_mem.malloc(sizeof(float)); 22 | float minusone = -1; 23 | CUDA_CHECK(cudaMemcpyAsync(kSCALAR_MINUSONE, &minusone, sizeof(float), cudaMemcpyHostToDevice)); 24 | float one = 1; 25 | CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ONE, &one, sizeof(float), cudaMemcpyHostToDevice)); 26 | float zero = 0; 27 | CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ZERO, &zero, sizeof(float), cudaMemcpyHostToDevice)); 28 | 29 | // this is the big memory allocation 30 | 31 | size_t byte_count = (size_t)mb << 20; 32 | fxs = new AlignedMemoryPool(byte_count, mem); // memory for node values 33 | dEdfs = new AlignedMemoryPool(byte_count, mem); // memory for node gradients 34 | ps = new AlignedMemoryPool(byte_count, mem); // memory for parameters 35 | 36 | } 37 | 38 | Device_GPU::~Device_GPU() {} 39 | #endif 40 | 41 | // TODO we should be able to configure this carefully with a configuration 42 | // script 43 | // CPU -- 0 params 44 | // -- 50mb fxs 45 | // -- 50mb dEdfx 46 | Device_CPU::Device_CPU(int mb, bool shared) : 47 | Device(DeviceType::CPU, &cpu_mem), shmem(mem) { 48 | if (shared) shmem = new SharedAllocator(); 49 | kSCALAR_MINUSONE = (float*) mem->malloc(sizeof(float)); 50 | *kSCALAR_MINUSONE = -1; 51 | kSCALAR_ONE = (float*) mem->malloc(sizeof(float)); 52 | *kSCALAR_ONE = 1; 53 | kSCALAR_ZERO = (float*) mem->malloc(sizeof(float)); 54 | *kSCALAR_ZERO = 0; 55 | 56 | // this is the big memory allocation: the pools 57 | 58 | size_t byte_count = (size_t)mb << 20; 59 | fxs = new AlignedMemoryPool(byte_count, mem); // memory for node values 60 | dEdfs = new AlignedMemoryPool(byte_count, mem); // memory for node gradients 61 | ps = new AlignedMemoryPool(byte_count, mem); // memory for parameters 62 | 63 | } 64 | 65 | Device_CPU::~Device_CPU() {} 66 | 67 | } // namespace cnn 68 | -------------------------------------------------------------------------------- /cnn/devices.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DEVICES_H 2 | #define CNN_DEVICES_H 3 | 4 | #include 5 | #include "cnn/aligned-mem-pool.h" 6 | #include "cnn/cuda.h" 7 | 8 | namespace cnn { 9 | 10 | enum class DeviceType {CPU, GPU}; 11 | 12 | class Device { 13 | protected: 14 | Device(DeviceType t, MemAllocator* m) : type(t), mem(m) {} 15 | Device(const Device&) = delete; 16 | Device& operator=(const Device&) = delete; 17 | virtual ~Device(); 18 | public: 19 | DeviceType type; 20 | MemAllocator* mem; 21 | AlignedMemoryPool* fxs; 22 | AlignedMemoryPool* dEdfs; 23 | AlignedMemoryPool* ps; 24 | float* kSCALAR_MINUSONE; 25 | float* kSCALAR_ONE; 26 | float* kSCALAR_ZERO; 27 | std::string name; 28 | }; 29 | 30 | #if HAVE_CUDA 31 | class Device_GPU : public Device { 32 | public: 33 | explicit Device_GPU(int mb, int device_id); 34 | ~Device_GPU(); 35 | int cuda_device_id; 36 | cublasHandle_t cublas_handle; 37 | GPUAllocator gpu_mem; 38 | }; 39 | #endif 40 | 41 | class Device_CPU : public Device { 42 | public: 43 | explicit Device_CPU(int mb, bool shared); 44 | ~Device_CPU(); 45 | CPUAllocator cpu_mem; 46 | MemAllocator* shmem; 47 | }; 48 | 49 | } // namespace cnn 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /cnn/dict.cc: -------------------------------------------------------------------------------- 1 | #include "dict.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | namespace cnn { 10 | 11 | std::vector ReadSentence(const std::string& line, Dict* sd) { 12 | std::istringstream in(line); 13 | std::string word; 14 | std::vector res; 15 | while(in) { 16 | in >> word; 17 | if (!in || word.empty()) break; 18 | res.push_back(sd->Convert(word)); 19 | } 20 | return res; 21 | } 22 | 23 | void ReadSentencePair(const std::string& line, std::vector* s, Dict* sd, std::vector* t, Dict* td) { 24 | std::istringstream in(line); 25 | std::string word; 26 | std::string sep = "|||"; 27 | Dict* d = sd; 28 | std::vector* v = s; 29 | while(in) { 30 | in >> word; 31 | if (!in) break; 32 | if (word == sep) { d = td; v = t; continue; } 33 | v->push_back(d->Convert(word)); 34 | } 35 | } 36 | 37 | } // namespace cnn 38 | 39 | -------------------------------------------------------------------------------- /cnn/dict.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DICT_H_ 2 | #define CNN_DICT_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #if BOOST_VERSION >= 105600 15 | #include 16 | #include 17 | #endif 18 | 19 | namespace cnn { 20 | 21 | class Dict { 22 | typedef std::unordered_map Map; 23 | public: 24 | Dict() : frozen(false), map_unk(false), unk_id(-1) { 25 | } 26 | 27 | inline unsigned size() const { return words_.size(); } 28 | 29 | inline bool Contains(const std::string& words) { 30 | return !(d_.find(words) == d_.end()); 31 | } 32 | 33 | void Freeze() { frozen = true; } 34 | bool is_frozen() { return frozen; } 35 | 36 | inline int Convert(const std::string& word) { 37 | auto i = d_.find(word); 38 | if (i == d_.end()) { 39 | if (frozen) { 40 | if (map_unk) { 41 | return unk_id; 42 | } 43 | else { 44 | std::cerr << map_unk << std::endl; 45 | std::cerr << "Unknown word encountered: " << word << std::endl; 46 | throw std::runtime_error("Unknown word encountered in frozen dictionary: " + word); 47 | } 48 | } 49 | words_.push_back(word); 50 | return d_[word] = words_.size() - 1; 51 | } else { 52 | return i->second; 53 | } 54 | } 55 | 56 | inline const std::string& Convert(const int& id) const { 57 | assert(id < (int)words_.size()); 58 | return words_[id]; 59 | } 60 | 61 | void SetUnk(const std::string& word) { 62 | if (!frozen) 63 | throw std::runtime_error("Please call SetUnk() only after dictionary is frozen"); 64 | if (map_unk) 65 | throw std::runtime_error("Set UNK more than one time"); 66 | 67 | // temporarily unfrozen the dictionary to allow the add of the UNK 68 | frozen = false; 69 | unk_id = Convert(word); 70 | frozen = true; 71 | 72 | map_unk = true; 73 | } 74 | 75 | void clear() { words_.clear(); d_.clear(); } 76 | 77 | private: 78 | bool frozen; 79 | bool map_unk; // if true, map unknown word to unk_id 80 | int unk_id; 81 | std::vector words_; 82 | Map d_; 83 | 84 | friend class boost::serialization::access; 85 | #if BOOST_VERSION >= 105600 86 | template void serialize(Archive& ar, const unsigned int) { 87 | ar & frozen; 88 | ar & map_unk; 89 | ar & unk_id; 90 | ar & words_; 91 | ar & d_; 92 | } 93 | #else 94 | template void serialize(Archive& ar, const unsigned int) { 95 | throw std::invalid_argument("Serializing dictionaries is only supported on versions of boost 1.56 or higher"); 96 | } 97 | #endif 98 | }; 99 | 100 | std::vector ReadSentence(const std::string& line, Dict* sd); 101 | void ReadSentencePair(const std::string& line, std::vector* s, Dict* sd, std::vector* t, Dict* td); 102 | 103 | } // namespace cnn 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /cnn/dim.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/dim.h" 2 | 3 | #include 4 | 5 | using namespace std; 6 | 7 | namespace cnn { 8 | 9 | ostream& operator<<(ostream& os, const Dim& d) { 10 | os << '{'; 11 | for (unsigned i = 0; i < d.nd; ++i) { 12 | if (i) os << ','; 13 | os << d.d[i]; 14 | } 15 | if(d.bd != 1) os << 'X' << d.bd; 16 | return os << '}'; 17 | } 18 | 19 | ostream& operator<<(ostream& os, const vector& ds) { 20 | os << '['; 21 | for (unsigned i = 0; i < ds.size(); ++i) 22 | os << (i ? " " : "") << ds[i]; 23 | return os << ']'; 24 | } 25 | 26 | } // namespace cnn 27 | 28 | -------------------------------------------------------------------------------- /cnn/dim.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DIM_H 2 | #define CNN_DIM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define CNN_MAX_TENSOR_DIM 7 13 | 14 | namespace boost { namespace serialization { class access; } } 15 | 16 | namespace cnn { 17 | 18 | struct Dim { 19 | Dim() : nd(), bd(1) {} 20 | // explicit Dim(unsigned int m) : nd(1), bd(1) { d[0] = m; } 21 | // TODO: The constructors for dimensions w/ and w/o batches is not intuitive. 22 | // can this be fixed in some way? 23 | // Dim(unsigned int m, unsigned int n) : nd(2), bd(1) { d[0] = m; d[1] = n; } 24 | Dim(std::initializer_list x) : nd(), bd(1) { 25 | for(auto v : x) d[nd++] = v; 26 | } 27 | Dim(std::initializer_list x, unsigned int b) : nd(), bd(b) { 28 | for(auto v : x) d[nd++] = v; 29 | } 30 | Dim(const std::vector & x) : nd(), bd(1) { 31 | for(auto v : x) d[nd++] = v; 32 | } 33 | Dim(const std::vector & x, unsigned int b) : nd(), bd(b) { 34 | for(auto v : x) d[nd++] = v; 35 | } 36 | inline unsigned int size() const { 37 | return batch_size() * bd; 38 | } 39 | inline unsigned int batch_size() const { 40 | unsigned int p = 1; 41 | for (unsigned int i = 0; i < nd; ++i) p *= d[i]; 42 | return p; 43 | } 44 | inline unsigned int sum_dims() const { 45 | unsigned int p = 0; 46 | for (unsigned int i = 0; i < nd; ++i) p += d[i]; 47 | return p; 48 | } 49 | inline Dim truncate() const { 50 | Dim r = *this; 51 | unsigned int m = 1; 52 | unsigned int s = size(); 53 | for (unsigned int i = 1; i < s; ++i) 54 | if (size(i) > 1) m = i + 1; 55 | r.resize(m); 56 | return r; 57 | } 58 | inline Dim single_batch() const { 59 | Dim r = *this; 60 | r.bd = 1; 61 | return r; 62 | } 63 | inline void resize(unsigned int i) { nd = i; } 64 | inline unsigned int ndims() const { return nd; } 65 | inline unsigned int rows() const { return d[0]; } 66 | inline unsigned int cols() const { return nd > 1 ? d[1] : 1; } 67 | inline unsigned int batch_elems() const { return bd; } 68 | inline void set(unsigned int i, unsigned int s) { assert(i < nd); assert(s > 0); d[i] = s; } 69 | inline unsigned int operator[](unsigned int i) const { return i < nd ? d[i] : 1; } 70 | inline unsigned int size(unsigned int i) const { return (*this)[i]; } 71 | inline Dim transpose() const { 72 | if (nd == 1) { return Dim({1, d[0]}, bd); } 73 | else if (nd == 2) { return Dim({d[1], d[0]}, bd); } 74 | throw std::invalid_argument("Cannot transpose Dim object with more than 2 dimensions"); 75 | } 76 | unsigned int d[CNN_MAX_TENSOR_DIM]; 77 | unsigned int nd; 78 | unsigned int bd; 79 | private: 80 | friend class boost::serialization::access; 81 | template void serialize(Archive& ar, const unsigned int) { 82 | ar & nd; 83 | ar & d; 84 | } 85 | }; 86 | 87 | //static_assert(std::is_trivially_copyable::value, "Dim must be trivially copyable"); 88 | 89 | inline bool operator==(const Dim& a, const Dim& b) { 90 | if (a.nd != b.nd || a.bd != b.bd) return false; 91 | return std::memcmp(a.d, b.d, a.nd) == 0; 92 | } 93 | 94 | inline bool operator!=(const Dim& a, const Dim& b) { return !(a == b); } 95 | 96 | std::ostream& operator<<(std::ostream& os, const Dim& d); 97 | std::ostream& operator<<(std::ostream& os, const std::vector& ds); 98 | 99 | } // namespace cnn 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /cnn/except.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EXCEPT_H_ 2 | #define CNN_EXCEPT_H_ 3 | 4 | #include 5 | 6 | namespace cnn { 7 | 8 | // if CNN exhausts its memory pool 9 | class out_of_memory : public std::runtime_error { 10 | public: 11 | out_of_memory(const std::string& what_arg) : runtime_error(what_arg) {} 12 | }; 13 | 14 | // this error occurs when some logic is 15 | // attempted to execut on a CUDA backend but the 16 | // logic has not been implemented. 17 | class cuda_not_implemented : public std::logic_error { 18 | public: 19 | cuda_not_implemented(const std::string& what_arg) : logic_error(what_arg) {} 20 | }; 21 | 22 | // this is thrown when cuda returns an error (bad arguments, memory, state, etc) 23 | class cuda_exception : public std::runtime_error { 24 | public: 25 | cuda_exception(const std::string& what_arg) : runtime_error(what_arg) {} 26 | }; 27 | 28 | } // namespace cnn 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cnn/exec.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/exec.h" 2 | 3 | #include "cnn/param-nodes.h" 4 | 5 | using namespace std; 6 | 7 | namespace cnn { 8 | 9 | ExecutionEngine::~ExecutionEngine() {} 10 | 11 | void SimpleExecutionEngine::invalidate() { 12 | num_nodes_evaluated = 0; 13 | } 14 | 15 | const Tensor& SimpleExecutionEngine::forward() { 16 | const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1); 17 | return forward(node_max_index); 18 | } 19 | 20 | const Tensor& SimpleExecutionEngine::forward(VariableIndex i) { 21 | invalidate(); 22 | return incremental_forward(i); 23 | } 24 | 25 | const Tensor& SimpleExecutionEngine::get_value(VariableIndex i) { 26 | assert(i < cg.nodes.size()); 27 | if (i >= num_nodes_evaluated) { 28 | incremental_forward(); 29 | } 30 | return nfxs[i]; 31 | } 32 | 33 | const Tensor& SimpleExecutionEngine::incremental_forward() { 34 | const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1); 35 | return incremental_forward(node_max_index); 36 | } 37 | 38 | const Tensor& SimpleExecutionEngine::incremental_forward(VariableIndex i) { 39 | assert(i < cg.nodes.size()); 40 | 41 | // free any old memory if this is a new CG 42 | if (num_nodes_evaluated == 0) fxs->free(); 43 | 44 | if (i >= num_nodes_evaluated) { 45 | nfxs.resize(i + 1); 46 | 47 | //vector dummy(5, "x"); 48 | vector xs(16); 49 | for (; num_nodes_evaluated <= i; ++num_nodes_evaluated) { 50 | const Node* node = cg.nodes[num_nodes_evaluated]; 51 | xs.resize(node->arity()); 52 | unsigned ai = 0; 53 | for (VariableIndex arg : node->args) { 54 | xs[ai] = &nfxs[arg]; 55 | ++ai; 56 | } 57 | nfxs[num_nodes_evaluated].d = node->dim; 58 | nfxs[num_nodes_evaluated].v = static_cast(fxs->allocate(node->dim.size() * sizeof(float))); 59 | if (nfxs[num_nodes_evaluated].v == nullptr) { 60 | cerr << "out of memory\n"; 61 | abort(); 62 | } 63 | void* aux_mem = nullptr; 64 | size_t aux_size = node->aux_storage_size(); 65 | if (aux_size) { 66 | aux_mem = fxs->allocate(aux_size); 67 | if (!aux_mem) { 68 | cerr << "aux out of memory\n"; 69 | abort(); 70 | } 71 | } 72 | node->aux_mem = aux_mem; 73 | node->forward(xs, nfxs[num_nodes_evaluated]); 74 | } 75 | } 76 | return nfxs[i]; 77 | } 78 | 79 | void SimpleExecutionEngine::backward() { 80 | assert(nfxs.size() == cg.nodes.size()); 81 | backward((VariableIndex)(cg.nodes.size()-1)); 82 | } 83 | 84 | // TODO what is happening with parameter nodes if from_where > param_node_id ? 85 | void SimpleExecutionEngine::backward(VariableIndex from_where) { 86 | assert(from_where+1 <= nfxs.size()); 87 | assert(from_where+1 <= cg.nodes.size()); 88 | if (nfxs[from_where].d.size() != 1) { 89 | cerr << "backward() called on non-scalar node.\n"; 90 | abort(); 91 | } 92 | 93 | const unsigned num_nodes = from_where+1; 94 | ndEdfs.resize(num_nodes); 95 | dEdfs->free(); 96 | for (unsigned i = 0; i < num_nodes; ++i) { 97 | const auto dim = nfxs[i].d; 98 | ndEdfs[i].d = dim; 99 | ndEdfs[i].v = static_cast(dEdfs->allocate(dim.size() * sizeof(float))); 100 | if (!ndEdfs[i].v) { 101 | cerr << "out of memory while attempting to allocate space for derivatives\n"; 102 | abort(); 103 | } 104 | } 105 | dEdfs->zero_allocated_memory(); 106 | // initialize dE/dE = 1 107 | ndEdfs.back().v = kSCALAR_ONE; 108 | 109 | // here we find constant paths to avoid doing extra work 110 | // by default, a node is constant unless 111 | // 1) it is a parameter node 112 | // 2) it depends on a non-constant node 113 | // (thus, functions of constants and inputs end up being 114 | // false in this computation) 115 | vector needs_derivative(num_nodes, false); 116 | for (auto i : cg.parameter_nodes) 117 | needs_derivative[i] = true; 118 | 119 | for (unsigned ni = 0; ni < num_nodes; ++ni) { 120 | bool nd = needs_derivative[ni]; 121 | for (auto arg : cg.nodes[ni]->args) 122 | nd |= needs_derivative[arg]; 123 | needs_derivative[ni] = nd; 124 | } 125 | 126 | // loop in reverse topological order 127 | // consider only nodes that participate in the computation. 128 | vector in_computation(num_nodes, false); 129 | in_computation[num_nodes - 1] = true; 130 | vector xs; 131 | for (int i = num_nodes - 1; i >= 0; --i) { 132 | if (!in_computation[i]) continue; 133 | const Node* node = cg.nodes[i]; 134 | xs.resize(node->arity()); 135 | unsigned ai = 0; 136 | for (VariableIndex arg : node->args) { 137 | in_computation[arg] = true; 138 | xs[ai] = &nfxs[arg]; 139 | ++ai; 140 | } 141 | ai = 0; 142 | for (VariableIndex arg : node->args) { 143 | if (needs_derivative[arg]) { 144 | node->backward(xs, nfxs[i], ndEdfs[i], ai, ndEdfs[arg]); 145 | } 146 | ++ai; 147 | } 148 | } 149 | 150 | // accumulate gradients into parameters 151 | // this is simpler than you might find in some other frameworks 152 | // since we assume parameters come into the graph as a "function" 153 | // that returns the current value of the parameters 154 | for (VariableIndex i : cg.parameter_nodes) 155 | static_cast(cg.nodes[i])->accumulate_grad(ndEdfs[i]); 156 | } 157 | 158 | } // namespace cnn 159 | -------------------------------------------------------------------------------- /cnn/exec.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EXEC_H 2 | #define CNN_EXEC_H 3 | 4 | #include "cnn/cnn.h" 5 | 6 | namespace cnn { 7 | 8 | class ExecutionEngine { 9 | public: 10 | virtual ~ExecutionEngine(); 11 | virtual void invalidate() = 0; 12 | virtual const Tensor& forward() = 0; 13 | virtual const Tensor& forward(VariableIndex i) = 0; 14 | virtual const Tensor& incremental_forward() = 0; // if you want to add nodes and evaluate just the new parts 15 | virtual const Tensor& incremental_forward(VariableIndex i) = 0; 16 | virtual const Tensor& get_value(VariableIndex i) = 0; 17 | virtual void backward() = 0; 18 | virtual void backward(VariableIndex i) = 0; 19 | protected: 20 | explicit ExecutionEngine(const ComputationGraph& cg) : cg(cg) {} 21 | const ComputationGraph& cg; 22 | }; 23 | 24 | class SimpleExecutionEngine : public ExecutionEngine { 25 | public: 26 | explicit SimpleExecutionEngine(const ComputationGraph& cg) : ExecutionEngine(cg) {} 27 | void invalidate() override; 28 | const Tensor& forward() override; 29 | const Tensor& forward(VariableIndex i) override; 30 | const Tensor& incremental_forward() override; // if you want to add nodes and evaluate just the new parts 31 | const Tensor& incremental_forward(VariableIndex i) override; 32 | const Tensor& get_value(VariableIndex i) override; 33 | void backward() override; 34 | void backward(VariableIndex i) override; 35 | private: 36 | std::vector nfxs; 37 | std::vector ndEdfs; 38 | VariableIndex num_nodes_evaluated; 39 | }; 40 | 41 | } // namespace cnn 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cnn/fast-lstm.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_FAST_LSTM_H_ 2 | #define CNN_FAST_LSTM_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | /* 15 | FastLSTM replaces the matrices from cell to other units, by diagonal matrices. 16 | */ 17 | struct FastLSTMBuilder : public RNNBuilder { 18 | FastLSTMBuilder() = default; 19 | explicit FastLSTMBuilder(unsigned layers, 20 | unsigned input_dim, 21 | unsigned hidden_dim, 22 | Model* model); 23 | 24 | Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } 25 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 26 | std::vector final_s() const override { 27 | std::vector ret = (c.size() == 0 ? c0 : c.back()); 28 | for(auto my_h : final_h()) ret.push_back(my_h); 29 | return ret; 30 | } 31 | unsigned num_h0_components() const override { return 2 * layers; } 32 | 33 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 34 | std::vector get_s(RNNPointer i) const override { 35 | std::vector ret = (i == -1 ? c0 : c[i]); 36 | for(auto my_h : get_h(i)) ret.push_back(my_h); 37 | return ret; 38 | } 39 | 40 | void copy(const RNNBuilder & params) override; 41 | protected: 42 | void new_graph_impl(ComputationGraph& cg) override; 43 | void start_new_sequence_impl(const std::vector& h0) override; 44 | Expression add_input_impl(int prev, const Expression& x) override; 45 | 46 | public: 47 | // first index is layer, then ... 48 | std::vector> params; 49 | 50 | // first index is layer, then ... 51 | std::vector> param_vars; 52 | 53 | // first index is time, second is layer 54 | std::vector> h, c; 55 | 56 | // initial values of h and c at each layer 57 | // - both default to zero matrix input 58 | bool has_initial_state; // if this is false, treat h0 and c0 as 0 59 | std::vector h0; 60 | std::vector c0; 61 | unsigned layers; 62 | }; 63 | 64 | } // namespace cnn 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /cnn/gpu-kernels.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GPU_KERNELS_H 2 | #define CNN_GPU_KERNELS_H 3 | 4 | #include "cnn/cuda.h" 5 | 6 | namespace cnn { 7 | namespace gpu { 8 | 9 | template 10 | __global__ void unaryExprKernel(int n, const float* x, float* y, Func func) { 11 | int i = threadIdx.x + blockIdx.x * blockDim.x; 12 | while (i < n) { 13 | y[i] = func(x[i]); 14 | i += gridDim.x * blockDim.x; 15 | } 16 | } 17 | 18 | template 19 | __global__ void accUnaryExprKernel(int n, const float* x, float* y, Func func) { 20 | int i = threadIdx.x + blockIdx.x * blockDim.x; 21 | while (i < n) { 22 | y[i] += func(x[i]); 23 | i += gridDim.x * blockDim.x; 24 | } 25 | } 26 | 27 | template 28 | __global__ void binaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) { 29 | int i = threadIdx.x + blockIdx.x * blockDim.x; 30 | while (i < n) { 31 | y[i] = func(x0[i], x1[i]); 32 | i += gridDim.x * blockDim.x; 33 | } 34 | } 35 | 36 | template 37 | __global__ void accBinaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) { 38 | int i = threadIdx.x + blockIdx.x * blockDim.x; 39 | while (i < n) { 40 | y[i] += func(x0[i], x1[i]); 41 | i += gridDim.x * blockDim.x; 42 | } 43 | } 44 | 45 | template 46 | __global__ void slowReduceKernel(int n, const float* x0, const float* x1, float* y, Func func) { 47 | float ty = 0; 48 | // THIS IS BAD - FIX THIS TO MAKE IT FAST 49 | for (int i = 0; i < n; ++i) 50 | ty += func(x0[i], x1[i]); 51 | y[0] = ty; 52 | } 53 | 54 | } // namespace gpu 55 | } // namespace cnn 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /cnn/gpu-ops.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GPU_OPS_H 2 | #define CNN_GPU_OPS_H 3 | 4 | namespace cnn { 5 | namespace gpu { 6 | 7 | void vpairwise_rank_loss(int n, float margin, const float* xgood, const float* xbad, float* y); 8 | void vpairwise_rank_loss_backward(int n, bool d_wrt_correct, const float* fx, const float* dEdf, float* dEdx); 9 | void vcwise_product(int n, const float* x0, const float* x1, float* y); 10 | void vcwise_product_backward(int n, const float* dEdy, const float* x_other, float* dEdx); 11 | void vconstant_minusx(int n, float c, const float* x, float* y); 12 | void vnegate(int n, const float* x, float* y); 13 | void vnegate_backward(int n, const float* dEdf, float* dEdx); 14 | void vrelu(int n, const float* x, float* y); 15 | void vrelu_backward(int n, const float* fx, const float* dEdf, float* dEdx); 16 | void vtanh(int n, const float* x, float* y); 17 | void vtanh_backward(int n, const float* fx, const float* dEdf, float* dEdx); 18 | void vlog(int n, const float* x, float* y); 19 | void vlog_backward(int n, const float* fx, const float* dEdf, float* dEdx); 20 | void vlogistic(int n, const float* x, float* y); 21 | void vlogistic_backward(int n, const float* fx, const float* dEdf, float* dEdx); 22 | void l2_norm_reducer(int n, const float* x0, float* y, bool square, bool accumulate); 23 | void sqeucdist(int n, const float* x0, const float *x1, float* y); 24 | void sqeucdist_backward(int n, const float* dEdy, const float* x0, const float* x1, float* dEdx, int i); 25 | void softmax(int n, const float* x0, float* y); 26 | void softmax_backward(int n, const float* x0, const float* dEdf, float* dEdx); 27 | void pnlsoftmax(int n, int elem_idx, const float* x0, float* y, float* logz); 28 | void pnlsoftmax_backward(int n, int elem_idx, const float* x0, const float* dEdf, const float* logz, float* dEdx); 29 | 30 | void sgd_update(int n, const float* g, float* x, float scale, float lambda); 31 | 32 | } // namespace gpu 33 | } // namespace cnn 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cnn/grad-check.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/grad-check.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "cnn/model.h" 7 | #include "cnn/cnn.h" 8 | #include "cnn/tensor.h" 9 | 10 | using namespace std; 11 | 12 | namespace cnn { 13 | 14 | bool CheckGrad(Model& m, ComputationGraph& g, int verbosity) { 15 | // Clear the parameters first 16 | const vector& params = m.parameters_list(); 17 | const vector& lookup_params = m.lookup_parameters_list(); 18 | for (auto pp : params) 19 | pp->clear(); 20 | for (auto pp : lookup_params) 21 | pp->clear(); 22 | 23 | // Perform forward and backward steps 24 | float alpha = 5e-4; 25 | g.forward(); 26 | g.backward(); 27 | 28 | // Check 29 | bool flag = false, curr_flag = false; 30 | for (auto pp : params) { 31 | if(verbosity > 1) 32 | cerr << endl << "PARAMETERS " << pp << endl; 33 | Parameters& p = *pp; 34 | size_t ts = p.dim.size(); 35 | for (size_t i = 0; i < ts; ++i) { 36 | float old = TensorTools::AccessElement(p.values, i); 37 | TensorTools::SetElement(p.values, i, old - alpha); 38 | float E_left = as_scalar(g.forward()); 39 | TensorTools::SetElement(p.values, i, old + alpha); 40 | float E_right = as_scalar(g.forward()); 41 | TensorTools::SetElement(p.values, i, old); 42 | float g = (E_right - E_left) / (2 * alpha); 43 | float g_act = TensorTools::AccessElement(p.g, i); 44 | float f = fabs(g - g_act); 45 | float m = max(fabs(g), fabs(g_act)); 46 | if (f > 0.1 && m > 0.f) f /= m; 47 | if (f > 0.1 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } } 48 | if(verbosity + (curr_flag ? 1 : 0) > 1) { 49 | cerr << g_act << ' ' << g << endl; 50 | curr_flag = false; 51 | } 52 | } 53 | } 54 | 55 | for (auto pp : lookup_params) { 56 | if(verbosity > 1) 57 | cerr << endl << "LOOKUP PARAMETERS " << pp << endl; 58 | LookupParameters& p = *pp; 59 | size_t ts = p.dim.size(); 60 | for (unsigned j : p.non_zero_grads) { 61 | if(verbosity > 1) 62 | cerr << "OBJECT=" << j << endl; 63 | Tensor& v = p.values[j]; 64 | Tensor& ag = p.grads[j]; 65 | for (size_t i = 0; i < ts; ++i) { 66 | float old = TensorTools::AccessElement(v, i); 67 | TensorTools::SetElement(v, i, old - alpha); 68 | float E_left = as_scalar(g.forward()); 69 | TensorTools::SetElement(v, i, old + alpha); 70 | float E_right = as_scalar(g.forward()); 71 | TensorTools::SetElement(v, i, old); 72 | float g = (E_right - E_left) / (2 * alpha); 73 | float g_act = TensorTools::AccessElement(ag, i); 74 | float f = fabs(g - g_act); 75 | float m = max(fabs(g), fabs(g_act)); 76 | if (f > 0.1 && m > 0.f) f /= m; 77 | if (f > 0.1 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } } 78 | if(verbosity + (curr_flag ? 1 : 0) > 1) { 79 | cerr << g_act << ' ' << g << endl; 80 | curr_flag = false; 81 | } 82 | } 83 | } 84 | } 85 | 86 | if (flag) { 87 | if (verbosity > 1) 88 | cerr << endl << "*** GRADIENT CHECK FAILED ***" << endl; 89 | } else { 90 | if (verbosity > 0) 91 | cerr << endl << "GRADIENT CHECK PASSED" << endl; 92 | } 93 | return !flag; 94 | } 95 | 96 | } 97 | 98 | -------------------------------------------------------------------------------- /cnn/grad-check.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GRAD_CHECK_H 2 | #define CNN_GRAD_CHECK_H 3 | 4 | namespace cnn { 5 | 6 | class Model; 7 | struct ComputationGraph; 8 | 9 | // verbosity is zero for silence, one for only printing errors, two for everything 10 | bool CheckGrad(Model& m, ComputationGraph& g, int verbosity = 1); 11 | 12 | } // namespace cnn 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /cnn/graph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/graph.h" 2 | #include "cnn/cnn.h" 3 | #include 4 | #include "cnn/cnn-helper.h" 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | void GraphOptimize(ComputationGraph* cg) { 11 | // topo sort 12 | vector& nodes = cg->nodes; 13 | vector longest_paths(nodes.size()); 14 | for (unsigned i = 0; i < nodes.size(); ++i) { 15 | auto& v = *nodes[i]; // vertex v_i 16 | auto& lp = longest_paths[i]; // distance to v_i 17 | for (auto e : v.args) { 18 | int weight = 0; 19 | if (v.args.size() == 7) weight = 1; 20 | int pte = longest_paths[e] + weight; 21 | if (pte > lp) lp = pte; 22 | } 23 | } 24 | for (unsigned i = 0; i < nodes.size(); ++i) { 25 | vector x; 26 | for (auto e : nodes[i]->args) { 27 | x.push_back(string("x") + to_string(e)); 28 | } 29 | cerr << "LONGEST PATH: " << longest_paths[i] << "\tx" << i << " = " << nodes[i]->as_string(x) << endl; 30 | } 31 | abort();// DEBUGGING 32 | } 33 | 34 | } // namespaiice cnn 35 | -------------------------------------------------------------------------------- /cnn/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GRAPH_H 2 | #define CNN_GRAPH_H 3 | 4 | namespace cnn { 5 | struct ComputationGraph; 6 | void GraphOptimize(ComputationGraph* cg); 7 | } // namespace cnn 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /cnn/gru.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/gru.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | #include "cnn/training.h" 10 | 11 | using namespace std; 12 | 13 | namespace cnn { 14 | 15 | enum { X2Z, H2Z, BZ, X2R, H2R, BR, X2H, H2H, BH }; 16 | 17 | GRUBuilder::GRUBuilder(unsigned layers, 18 | unsigned input_dim, 19 | unsigned hidden_dim, 20 | Model* model) : hidden_dim(hidden_dim), layers(layers) { 21 | unsigned layer_input_dim = input_dim; 22 | for (unsigned i = 0; i < layers; ++i) { 23 | // z 24 | Parameters* p_x2z = model->add_parameters({hidden_dim, layer_input_dim}); 25 | Parameters* p_h2z = model->add_parameters({hidden_dim, hidden_dim}); 26 | Parameters* p_bz = model->add_parameters({hidden_dim}); 27 | 28 | // r 29 | Parameters* p_x2r = model->add_parameters({hidden_dim, layer_input_dim}); 30 | Parameters* p_h2r = model->add_parameters({hidden_dim, hidden_dim}); 31 | Parameters* p_br = model->add_parameters({hidden_dim}); 32 | 33 | // h 34 | Parameters* p_x2h = model->add_parameters({hidden_dim, layer_input_dim}); 35 | Parameters* p_h2h = model->add_parameters({hidden_dim, hidden_dim}); 36 | Parameters* p_bh = model->add_parameters({hidden_dim}); 37 | layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next 38 | 39 | vector ps = {p_x2z, p_h2z, p_bz, p_x2r, p_h2r, p_br, p_x2h, p_h2h, p_bh}; 40 | params.push_back(ps); 41 | } // layers 42 | } 43 | 44 | void GRUBuilder::new_graph_impl(ComputationGraph& cg) { 45 | param_vars.clear(); 46 | for (unsigned i = 0; i < layers; ++i) { 47 | auto& p = params[i]; 48 | 49 | // z 50 | Expression x2z = parameter(cg,p[X2Z]); 51 | Expression h2z = parameter(cg,p[H2Z]); 52 | Expression bz = parameter(cg,p[BZ]); 53 | 54 | // r 55 | Expression x2r = parameter(cg,p[X2R]); 56 | Expression h2r = parameter(cg,p[H2R]); 57 | Expression br = parameter(cg,p[BR]); 58 | 59 | // h 60 | Expression x2h = parameter(cg,p[X2H]); 61 | Expression h2h = parameter(cg,p[H2H]); 62 | Expression bh = parameter(cg,p[BH]); 63 | 64 | vector vars = {x2z, h2z, bz, x2r, h2r, br, x2h, h2h, bh}; 65 | param_vars.push_back(vars); 66 | } 67 | } 68 | 69 | void GRUBuilder::start_new_sequence_impl(const std::vector& h_0) { 70 | h.clear(); 71 | h0 = h_0; 72 | if (!h0.empty()) { 73 | assert (h0.size() == layers); 74 | } 75 | } 76 | 77 | Expression GRUBuilder::add_input_impl(int prev, const Expression& x) { 78 | const bool has_initial_state = (h0.size() > 0); 79 | h.push_back(vector(layers)); 80 | vector& ht = h.back(); 81 | Expression in = x; 82 | for (unsigned i = 0; i < layers; ++i) { 83 | const vector& vars = param_vars[i]; 84 | Expression h_tprev; 85 | // prev_zero means that h_tprev should be treated as 0 86 | bool prev_zero = false; 87 | if (prev >= 0 || has_initial_state) { 88 | h_tprev = (prev < 0) ? h0[i] : h[prev][i]; 89 | } else { prev_zero = true; } 90 | // update gate 91 | Expression zt; 92 | if (prev_zero) 93 | zt = affine_transform({vars[BZ], vars[X2Z], in}); 94 | else 95 | zt = affine_transform({vars[BZ], vars[X2Z], in, vars[H2Z], h_tprev}); 96 | zt = logistic(zt); 97 | // forget 98 | Expression ft = 1.f - zt; 99 | // reset gate 100 | Expression rt; 101 | if (prev_zero) 102 | rt = affine_transform({vars[BR], vars[X2R], in}); 103 | else 104 | rt = affine_transform({vars[BR], vars[X2R], in, vars[H2R], h_tprev}); 105 | rt = logistic(rt); 106 | 107 | // candidate activation 108 | Expression ct; 109 | if (prev_zero) { 110 | ct = affine_transform({vars[BH], vars[X2H], in}); 111 | ct = tanh(ct); 112 | Expression nwt = cwise_multiply(zt, ct); 113 | in = ht[i] = nwt; 114 | } else { 115 | Expression ght = cwise_multiply(rt, h_tprev); 116 | ct = affine_transform({vars[BH], vars[X2H], in, vars[H2H], ght}); 117 | ct = tanh(ct); 118 | Expression nwt = cwise_multiply(zt, ct); 119 | Expression crt = cwise_multiply(ft, h_tprev); 120 | in = ht[i] = crt + nwt; 121 | } 122 | } 123 | return ht.back(); 124 | } 125 | 126 | void GRUBuilder::copy(const RNNBuilder & rnn) { 127 | const GRUBuilder & rnn_gru = (const GRUBuilder&)rnn; 128 | assert(params.size() == rnn_gru.params.size()); 129 | for(size_t i = 0; i < params.size(); ++i) 130 | for(size_t j = 0; j < params[i].size(); ++j) 131 | params[i][j]->copy(*rnn_gru.params[i][j]); 132 | } 133 | 134 | } // namespace cnn 135 | -------------------------------------------------------------------------------- /cnn/gru.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GRU_H_ 2 | #define CNN_GRU_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | 7 | namespace cnn { 8 | 9 | class Model; 10 | 11 | struct GRUBuilder : public RNNBuilder { 12 | GRUBuilder() = default; 13 | explicit GRUBuilder(unsigned layers, 14 | unsigned input_dim, 15 | unsigned hidden_dim, 16 | Model* model); 17 | Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } 18 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 19 | std::vector final_s() const override { return final_h(); } 20 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 21 | std::vector get_s(RNNPointer i) const override { return get_h(i); } 22 | unsigned num_h0_components() const override { return layers; } 23 | void copy(const RNNBuilder & params) override; 24 | 25 | protected: 26 | void new_graph_impl(ComputationGraph& cg) override; 27 | void start_new_sequence_impl(const std::vector& h0) override; 28 | Expression add_input_impl(int prev, const Expression& x) override; 29 | 30 | // first index is layer, then ... 31 | std::vector> params; 32 | 33 | // first index is layer, then ... 34 | std::vector> param_vars; 35 | 36 | // first index is time, second is layer 37 | std::vector> h; 38 | 39 | // initial values of h at each layer 40 | // - default to zero matrix input 41 | std::vector h0; 42 | 43 | unsigned hidden_dim; 44 | unsigned layers; 45 | }; 46 | 47 | } // namespace cnn 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /cnn/hsm-builder.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_HSMBUILDER_H 2 | #define CNN_HSMBUILDER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "cnn/cnn.h" 8 | #include "cnn/expr.h" 9 | #include "cnn/dict.h" 10 | #include "cnn/cfsm-builder.h" 11 | 12 | namespace cnn { 13 | 14 | struct Parameters; 15 | 16 | class Cluster { 17 | private: 18 | std::vector children; 19 | std::vector path; 20 | std::vector terminals; 21 | std::unordered_map word2ind; 22 | Parameters* p_weights; 23 | Parameters* p_bias; 24 | mutable expr::Expression weights; 25 | mutable expr::Expression bias; 26 | bool initialized; 27 | unsigned output_size; 28 | 29 | expr::Expression predict(expr::Expression h, ComputationGraph& cg) const; 30 | 31 | public: 32 | Cluster(); 33 | Cluster* add_child(unsigned sym); 34 | void add_word(unsigned word); 35 | void initialize(unsigned rep_dim, Model* model); 36 | 37 | void new_graph(ComputationGraph& cg); 38 | unsigned sample(expr::Expression h, ComputationGraph& cg) const; 39 | expr::Expression neg_log_softmax(expr::Expression h, unsigned r, ComputationGraph& cg) const; 40 | 41 | unsigned get_index(unsigned word) const; 42 | unsigned get_word(unsigned index) const; 43 | unsigned num_children() const; 44 | const Cluster* get_child(unsigned i) const; 45 | const std::vector& get_path() const; 46 | expr::Expression get_weights(ComputationGraph& cg) const; 47 | expr::Expression get_bias(ComputationGraph& cg) const; 48 | 49 | std::string toString() const; 50 | }; 51 | 52 | // helps with implementation of hierarchical softmax 53 | // read a file with lines of the following format 54 | // CLASSID word [freq] 55 | class HierarchicalSoftmaxBuilder : public FactoredSoftmaxBuilder { 56 | public: 57 | HierarchicalSoftmaxBuilder(unsigned rep_dim, 58 | const std::string& cluster_file, 59 | Dict* word_dict, 60 | Model* model); 61 | ~HierarchicalSoftmaxBuilder(); 62 | // call this once per ComputationGraph 63 | void new_graph(ComputationGraph& cg); 64 | 65 | // -log(p(c | rep) * p(w | c, rep)) 66 | expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); 67 | 68 | // samples a word from p(w,c | rep) 69 | unsigned sample(const expr::Expression& rep); 70 | 71 | private: 72 | Cluster* ReadClusterFile(const std::string& cluster_file, Dict* word_dict); 73 | std::vector widx2path; // will be NULL if not found 74 | Dict path_symbols; 75 | 76 | ComputationGraph* pcg; 77 | Cluster* root; 78 | }; 79 | 80 | } // namespace cnn 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /cnn/init.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/init.h" 2 | #include "cnn/aligned-mem-pool.h" 3 | #include "cnn/cnn.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #if HAVE_CUDA 10 | #include "cnn/cuda.h" 11 | #include 12 | #endif 13 | 14 | using namespace std; 15 | 16 | namespace cnn { 17 | 18 | // these should maybe live in a file called globals.cc or something 19 | AlignedMemoryPool* fxs = nullptr; 20 | AlignedMemoryPool* dEdfs = nullptr; 21 | AlignedMemoryPool* ps = nullptr; 22 | mt19937* rndeng = nullptr; 23 | std::vector devices; 24 | Device* default_device = nullptr; 25 | 26 | static void RemoveArgs(int& argc, char**& argv, int& argi, int n) { 27 | for (int i = argi + n; i < argc; ++i) 28 | argv[i - n] = argv[i]; 29 | argc -= n; 30 | assert(argc >= 0); 31 | } 32 | 33 | void Initialize(int& argc, char**& argv, unsigned random_seed, bool shared_parameters) { 34 | vector gpudevices; 35 | #if HAVE_CUDA 36 | cerr << "[cnn] initializing CUDA\n"; 37 | gpudevices = Initialize_GPU(argc, argv); 38 | #endif 39 | unsigned long num_mb = 512UL; 40 | int argi = 1; 41 | while(argi < argc) { 42 | string arg = argv[argi]; 43 | if (arg == "--cnn-mem" || arg == "--cnn_mem") { 44 | if ((argi + 1) > argc) { 45 | cerr << "[cnn] --cnn-mem expects an argument (the memory, in megabytes, to reserve)\n"; 46 | abort(); 47 | } else { 48 | string a2 = argv[argi+1]; 49 | istringstream c(a2); c >> num_mb; 50 | RemoveArgs(argc, argv, argi, 2); 51 | } 52 | } else if (arg == "--cnn-seed" || arg == "--cnn_seed") { 53 | if ((argi + 1) > argc) { 54 | cerr << "[cnn] --cnn-seed expects an argument (the random number seed)\n"; 55 | abort(); 56 | } else { 57 | string a2 = argv[argi+1]; 58 | istringstream c(a2); c >> random_seed; 59 | RemoveArgs(argc, argv, argi, 2); 60 | } 61 | } else if (arg.find("--cnn") == 0) { 62 | cerr << "[cnn] Bad command line argument: " << arg << endl; 63 | abort(); 64 | } else { break; } 65 | } 66 | if (random_seed == 0) { 67 | random_device rd; 68 | random_seed = rd(); 69 | } 70 | cerr << "[cnn] random seed: " << random_seed << endl; 71 | rndeng = new mt19937(random_seed); 72 | 73 | cerr << "[cnn] allocating memory: " << num_mb << "MB\n"; 74 | devices.push_back(new Device_CPU(num_mb, shared_parameters)); 75 | int default_index = 0; 76 | if (gpudevices.size() > 0) { 77 | for (auto gpu : gpudevices) 78 | devices.push_back(gpu); 79 | default_index++; 80 | } 81 | default_device = devices[default_index]; 82 | 83 | // TODO these should be accessed through the relevant device and removed here 84 | fxs = default_device->fxs; 85 | dEdfs = default_device->dEdfs; 86 | ps = default_device->ps; 87 | kSCALAR_MINUSONE = default_device->kSCALAR_MINUSONE; 88 | kSCALAR_ONE = default_device->kSCALAR_ONE; 89 | kSCALAR_ZERO = default_device->kSCALAR_ZERO; 90 | cerr << "[cnn] memory allocation done.\n"; 91 | } 92 | 93 | void Cleanup() { 94 | delete rndeng; 95 | delete fxs; 96 | delete dEdfs; 97 | delete ps; 98 | } 99 | 100 | } // namespace cnn 101 | 102 | -------------------------------------------------------------------------------- /cnn/init.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EIGEN_INIT_H 2 | #define CNN_EIGEN_INIT_H 3 | 4 | namespace cnn { 5 | 6 | void Initialize(int& argc, char**& argv, unsigned random_seed = 0, bool shared_parameters = false); 7 | void Cleanup(); 8 | 9 | } // namespace cnn 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /cnn/lstm.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/lstm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | 10 | using namespace std; 11 | using namespace cnn::expr; 12 | 13 | namespace cnn { 14 | 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; 16 | 17 | LSTMBuilder::LSTMBuilder(unsigned layers, 18 | unsigned input_dim, 19 | unsigned hidden_dim, 20 | Model* model) : layers(layers) { 21 | unsigned layer_input_dim = input_dim; 22 | for (unsigned i = 0; i < layers; ++i) { 23 | // i 24 | Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim}); 25 | Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim}); 26 | Parameters* p_c2i = model->add_parameters({hidden_dim, hidden_dim}); 27 | Parameters* p_bi = model->add_parameters({hidden_dim}); 28 | 29 | // o 30 | Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim}); 31 | Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim}); 32 | Parameters* p_c2o = model->add_parameters({hidden_dim, hidden_dim}); 33 | Parameters* p_bo = model->add_parameters({hidden_dim}); 34 | 35 | // c 36 | Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim}); 37 | Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim}); 38 | Parameters* p_bc = model->add_parameters({hidden_dim}); 39 | layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next 40 | 41 | vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; 42 | params.push_back(ps); 43 | } // layers 44 | dropout_rate = 0.0f; 45 | } 46 | 47 | void LSTMBuilder::new_graph_impl(ComputationGraph& cg){ 48 | param_vars.clear(); 49 | 50 | for (unsigned i = 0; i < layers; ++i){ 51 | auto& p = params[i]; 52 | 53 | //i 54 | Expression i_x2i = parameter(cg,p[X2I]); 55 | Expression i_h2i = parameter(cg,p[H2I]); 56 | Expression i_c2i = parameter(cg,p[C2I]); 57 | Expression i_bi = parameter(cg,p[BI]); 58 | //o 59 | Expression i_x2o = parameter(cg,p[X2O]); 60 | Expression i_h2o = parameter(cg,p[H2O]); 61 | Expression i_c2o = parameter(cg,p[C2O]); 62 | Expression i_bo = parameter(cg,p[BO]); 63 | //c 64 | Expression i_x2c = parameter(cg,p[X2C]); 65 | Expression i_h2c = parameter(cg,p[H2C]); 66 | Expression i_bc = parameter(cg,p[BC]); 67 | 68 | vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; 69 | param_vars.push_back(vars); 70 | } 71 | } 72 | 73 | // layout: 0..layers = c 74 | // layers+1..2*layers = h 75 | void LSTMBuilder::start_new_sequence_impl(const vector& hinit) { 76 | h.clear(); 77 | c.clear(); 78 | if (hinit.size() > 0) { 79 | assert(layers*2 == hinit.size()); 80 | h0.resize(layers); 81 | c0.resize(layers); 82 | for (unsigned i = 0; i < layers; ++i) { 83 | c0[i] = hinit[i]; 84 | h0[i] = hinit[i + layers]; 85 | } 86 | has_initial_state = true; 87 | } else { 88 | has_initial_state = false; 89 | } 90 | } 91 | 92 | Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) { 93 | h.push_back(vector(layers)); 94 | c.push_back(vector(layers)); 95 | vector& ht = h.back(); 96 | vector& ct = c.back(); 97 | Expression in = x; 98 | for (unsigned i = 0; i < layers; ++i) { 99 | const vector& vars = param_vars[i]; 100 | Expression i_h_tm1, i_c_tm1; 101 | bool has_prev_state = (prev >= 0 || has_initial_state); 102 | if (prev < 0) { 103 | if (has_initial_state) { 104 | // intial value for h and c at timestep 0 in layer i 105 | // defaults to zero matrix input if not set in add_parameter_edges 106 | i_h_tm1 = h0[i]; 107 | i_c_tm1 = c0[i]; 108 | } 109 | } else { // t > 0 110 | i_h_tm1 = h[prev][i]; 111 | i_c_tm1 = c[prev][i]; 112 | } 113 | // apply dropout according to http://arxiv.org/pdf/1409.2329v5.pdf 114 | if (dropout_rate) in = dropout(in, dropout_rate); 115 | // input 116 | Expression i_ait; 117 | if (has_prev_state) 118 | i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1}); 119 | else 120 | i_ait = affine_transform({vars[BI], vars[X2I], in}); 121 | Expression i_it = logistic(i_ait); 122 | // forget 123 | Expression i_ft = 1.f - i_it; 124 | // write memory cell 125 | Expression i_awt; 126 | if (has_prev_state) 127 | i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); 128 | else 129 | i_awt = affine_transform({vars[BC], vars[X2C], in}); 130 | Expression i_wt = tanh(i_awt); 131 | // output 132 | if (has_prev_state) { 133 | Expression i_nwt = cwise_multiply(i_it,i_wt); 134 | Expression i_crt = cwise_multiply(i_ft,i_c_tm1); 135 | ct[i] = i_crt + i_nwt; 136 | } else { 137 | ct[i] = cwise_multiply(i_it,i_wt); 138 | } 139 | 140 | Expression i_aot; 141 | if (has_prev_state) 142 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]}); 143 | else 144 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[C2O], ct[i]}); 145 | Expression i_ot = logistic(i_aot); 146 | Expression ph_t = tanh(ct[i]); 147 | in = ht[i] = cwise_multiply(i_ot,ph_t); 148 | } 149 | if (dropout_rate) return dropout(ht.back(), dropout_rate); 150 | else return ht.back(); 151 | } 152 | 153 | void LSTMBuilder::copy(const RNNBuilder & rnn) { 154 | const LSTMBuilder & rnn_lstm = (const LSTMBuilder&)rnn; 155 | assert(params.size() == rnn_lstm.params.size()); 156 | for(size_t i = 0; i < params.size(); ++i) 157 | for(size_t j = 0; j < params[i].size(); ++j) 158 | params[i][j]->copy(*rnn_lstm.params[i][j]); 159 | } 160 | 161 | } // namespace cnn 162 | -------------------------------------------------------------------------------- /cnn/lstm.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_LSTM_H_ 2 | #define CNN_LSTM_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | struct LSTMBuilder : public RNNBuilder { 15 | LSTMBuilder() = default; 16 | explicit LSTMBuilder(unsigned layers, 17 | unsigned input_dim, 18 | unsigned hidden_dim, 19 | Model* model); 20 | 21 | void set_dropout(float d) { dropout_rate = d; } 22 | // in general, you should disable dropout at test time 23 | void disable_dropout() { dropout_rate = 0; } 24 | 25 | Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } 26 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 27 | std::vector final_s() const override { 28 | std::vector ret = (c.size() == 0 ? c0 : c.back()); 29 | for(auto my_h : final_h()) ret.push_back(my_h); 30 | return ret; 31 | } 32 | unsigned num_h0_components() const override { return 2 * layers; } 33 | 34 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 35 | std::vector get_s(RNNPointer i) const override { 36 | std::vector ret = (i == -1 ? c0 : c[i]); 37 | for(auto my_h : get_h(i)) ret.push_back(my_h); 38 | return ret; 39 | } 40 | 41 | void copy(const RNNBuilder & params) override; 42 | protected: 43 | void new_graph_impl(ComputationGraph& cg) override; 44 | void start_new_sequence_impl(const std::vector& h0) override; 45 | Expression add_input_impl(int prev, const Expression& x) override; 46 | 47 | public: 48 | // first index is layer, then ... 49 | std::vector> params; 50 | 51 | // first index is layer, then ... 52 | std::vector> param_vars; 53 | 54 | // first index is time, second is layer 55 | std::vector> h, c; 56 | 57 | // initial values of h and c at each layer 58 | // - both default to zero matrix input 59 | bool has_initial_state; // if this is false, treat h0 and c0 as 0 60 | std::vector h0; 61 | std::vector c0; 62 | unsigned layers; 63 | float dropout_rate; 64 | }; 65 | 66 | } // namespace cnn 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /cnn/mem.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/mem.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cnn/except.h" 11 | #if HAVE_CUDA 12 | #include "cnn/cuda.h" 13 | #include 14 | #include 15 | #endif 16 | 17 | using namespace std; 18 | 19 | namespace cnn { 20 | 21 | MemAllocator::~MemAllocator() {} 22 | 23 | void* CPUAllocator::malloc(size_t n) { 24 | void* ptr = _mm_malloc(n, align); 25 | if (!ptr) { 26 | cerr << "CPU memory allocation failed n=" << n << " align=" << align << endl; 27 | throw cnn::out_of_memory("CPU memory allocation failed"); 28 | } 29 | return ptr; 30 | } 31 | 32 | void CPUAllocator::free(void* mem) { 33 | _mm_free(mem); 34 | } 35 | 36 | void CPUAllocator::zero(void* p, size_t n) { 37 | memset(p, 0, n); 38 | } 39 | 40 | void* SharedAllocator::malloc(size_t n) { 41 | void* ptr = mmap(NULL, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); 42 | if (!ptr) { 43 | cerr << "Shared memory allocation failed n=" << n << endl; 44 | throw cnn::out_of_memory("Shared memory allocation failed"); 45 | } 46 | return ptr; 47 | } 48 | 49 | void SharedAllocator::free(void* mem) { 50 | // munmap(mem, n); 51 | } 52 | 53 | void SharedAllocator::zero(void* p, size_t n) { 54 | memset(p, 0, n); 55 | } 56 | 57 | #if HAVE_CUDA 58 | void* GPUAllocator::malloc(size_t n) { 59 | void* ptr = nullptr; 60 | CUDA_CHECK(cudaSetDevice(devid)); 61 | CUDA_CHECK(cudaMalloc(&ptr, n)); 62 | if (!ptr) { 63 | cerr << "GPU memory allocation failed n=" << n << endl; 64 | throw cnn::out_of_memory("GPU memory allocation failed"); 65 | } 66 | return ptr; 67 | } 68 | 69 | void GPUAllocator::free(void* mem) { 70 | CUDA_CHECK(cudaFree(mem)); 71 | } 72 | 73 | void GPUAllocator::zero(void* p, size_t n) { 74 | CUDA_CHECK(cudaSetDevice(devid)); 75 | CUDA_CHECK(cudaMemsetAsync(p, 0, n)); 76 | } 77 | 78 | #endif 79 | 80 | } // namespace cnn 81 | -------------------------------------------------------------------------------- /cnn/mem.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_MEM_H 2 | #define CNN_MEM_H 3 | 4 | #include 5 | 6 | namespace cnn { 7 | 8 | // allocates memory from the device (CPU, GPU) 9 | // only used to create the memory pools 10 | // creates alignment appropriate for that device 11 | struct MemAllocator { 12 | explicit MemAllocator(int align) : align(align) {} 13 | MemAllocator(const MemAllocator&) = delete; 14 | MemAllocator& operator=(const MemAllocator&) = delete; 15 | virtual ~MemAllocator(); 16 | virtual void* malloc(std::size_t n) = 0; 17 | virtual void free(void* mem) = 0; 18 | virtual void zero(void* p, std::size_t n) = 0; 19 | inline std::size_t round_up_align(std::size_t n) const { 20 | if (align < 2) return n; 21 | return ((n + align - 1) / align) * align; 22 | } 23 | const int align; 24 | }; 25 | 26 | struct CPUAllocator : public MemAllocator { 27 | CPUAllocator() : MemAllocator(32) {} 28 | void* malloc(std::size_t n) override; 29 | void free(void* mem) override; 30 | void zero(void* p, std::size_t n) override; 31 | }; 32 | 33 | struct SharedAllocator : public MemAllocator { 34 | SharedAllocator() : MemAllocator(32) {} 35 | void* malloc(std::size_t n) override; 36 | void free(void* mem) override; 37 | void zero(void* p, std::size_t n) override; 38 | }; 39 | 40 | #if HAVE_CUDA 41 | struct GPUAllocator : public MemAllocator { 42 | explicit GPUAllocator(int devid) : MemAllocator(256), devid(devid) {} 43 | void* malloc(std::size_t n) override; 44 | void free(void* mem) override; 45 | void zero(void* p, std::size_t n) override; 46 | const int devid; 47 | }; 48 | #endif 49 | 50 | } // namespace cnn 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /cnn/model.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_PARAMS_H_ 2 | #define CNN_PARAMS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "cnn/tensor.h" 13 | 14 | namespace cnn { 15 | 16 | // to deal with sparse updates, there are two parameter classes: 17 | // * Parameters represents a vector, matrix, (eventually higher order tensors) 18 | // of parameters. These are densely updated. 19 | // * LookupParameters represents a table of vectors that are used to embed a 20 | // set of discrete objects. These are sparsely updated. 21 | 22 | struct ParametersBase { 23 | friend class Model; 24 | virtual void scale_parameters(float a) = 0; 25 | virtual void squared_l2norm(float* sqnorm) const = 0; 26 | virtual void g_squared_l2norm(float* sqnorm) const = 0; 27 | virtual size_t size() const = 0; 28 | virtual ~ParametersBase(); 29 | }; 30 | 31 | // represents parameters (e.g., a weight matrix) that will be optimized 32 | struct Parameters : public ParametersBase { 33 | friend class Model; 34 | void scale_parameters(float a) override; 35 | void squared_l2norm(float* sqnorm) const override; 36 | void g_squared_l2norm(float* sqnorm) const override; 37 | size_t size() const override; 38 | 39 | void copy(const Parameters & val); 40 | void accumulate_grad(const Tensor& g); 41 | void clear(); 42 | 43 | Dim dim; 44 | Tensor values; 45 | Tensor g; 46 | private: 47 | Parameters() {} 48 | explicit Parameters(const Dim& d, float minmax); // initialize with ~U(-minmax,+minmax) 49 | // or Glorot initialization if minmax = 0 50 | friend class boost::serialization::access; 51 | template void serialize(Archive& ar, const unsigned int) { 52 | ar & dim; 53 | ar & values; 54 | } 55 | }; 56 | 57 | // represents a matrix/vector embedding of a discrete set 58 | struct LookupParameters : public ParametersBase { 59 | friend class Model; 60 | void scale_parameters(float a) override; 61 | void squared_l2norm(float* sqnorm) const override; 62 | void g_squared_l2norm(float* sqnorm) const override; 63 | size_t size() const override; 64 | void Initialize(unsigned index, const std::vector& val); 65 | 66 | void copy(const LookupParameters & val); 67 | void accumulate_grad(unsigned index, const Tensor& g); 68 | void clear(); 69 | 70 | Dim dim; 71 | std::vector values; 72 | std::vector grads; 73 | // gradients are sparse, so track which components are nonzero 74 | std::unordered_set non_zero_grads; 75 | private: 76 | LookupParameters() {} 77 | LookupParameters(unsigned n, const Dim& d); 78 | friend class boost::serialization::access; 79 | template 80 | void save(Archive& ar, const unsigned int) const { 81 | ar & dim; 82 | int nv = values.size(); 83 | ar & nv; 84 | for (unsigned i = 0; i < values.size(); ++i) 85 | ar & values[i]; 86 | } 87 | template 88 | void load(Archive& ar, const unsigned int) { 89 | ar & dim; 90 | int nv; 91 | ar & nv; 92 | assert(nv == (int)values.size()); 93 | for (unsigned i = 0; i < values.size(); ++i) 94 | ar & values[i]; 95 | } 96 | BOOST_SERIALIZATION_SPLIT_MEMBER() 97 | }; 98 | 99 | // this is a collection of parameters 100 | // if you need a matrix of parameters, or a lookup table - ask an instance of this class 101 | // this knows how to serialize itself 102 | // parameters know how to track their gradients, but any extra information (like velocity) will live here 103 | class Model { 104 | public: 105 | Model() : gradient_norm_scratch() {} 106 | ~Model(); 107 | float gradient_l2_norm() const; 108 | void reset_gradient(); 109 | // set scale to use custom initialization 110 | Parameters* add_parameters(const Dim& d, float scale = 0.0f); 111 | LookupParameters* add_lookup_parameters(unsigned n, const Dim& d); 112 | // project weights so their L2 norm = radius 113 | void project_weights(float radius = 1.0f); 114 | 115 | const std::vector& all_parameters_list() const { return all_params; } 116 | const std::vector& parameters_list() const { return params; } 117 | const std::vector& lookup_parameters_list() const { return lookup_params; } 118 | 119 | private: 120 | friend class boost::serialization::access; 121 | template 122 | void save(Archive& ar, const unsigned int) const { 123 | int np = params.size(); 124 | int nlp = lookup_params.size(); 125 | ar & np; 126 | ar & nlp; 127 | for (unsigned i = 0; i < params.size(); ++i) 128 | ar & *params[i]; 129 | for (unsigned i = 0; i < lookup_params.size(); ++i) 130 | ar & *lookup_params[i]; 131 | } 132 | template 133 | void load(Archive& ar, const unsigned int) { 134 | int np, nlp; 135 | ar & np; 136 | ar & nlp; 137 | assert(np == (int)params.size()); 138 | assert(nlp == (int)lookup_params.size()); 139 | for (unsigned i = 0; i < params.size(); ++i) 140 | ar & *params[i]; 141 | for (unsigned i = 0; i < lookup_params.size(); ++i) 142 | ar & *lookup_params[i]; 143 | all_params.clear(); 144 | for (auto p : params) all_params.push_back(p); 145 | for (auto p : lookup_params) all_params.push_back(p); 146 | } 147 | BOOST_SERIALIZATION_SPLIT_MEMBER() 148 | 149 | std::vector all_params; 150 | std::vector params; 151 | std::vector lookup_params; 152 | mutable float* gradient_norm_scratch; 153 | }; 154 | 155 | void save_cnn_model(std::string filename, Model* model); 156 | void load_cnn_model(std::string filename, Model* model); 157 | 158 | } // namespace cnn 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /cnn/mp.cc: -------------------------------------------------------------------------------- 1 | #include "mp.h" 2 | using namespace std; 3 | using namespace boost::interprocess; 4 | 5 | namespace cnn { 6 | namespace mp { 7 | // TODO: Pass these around instead of having them be global 8 | std::string queue_name = "cnn_mp_work_queue"; 9 | std::string shared_memory_name = "cnn_mp_shared_memory"; 10 | timespec start_time; 11 | bool stop_requested = false; 12 | SharedObject* shared_object = nullptr; 13 | 14 | std::string GenerateQueueName() { 15 | std::ostringstream ss; 16 | ss << "cnn_mp_work_queue"; 17 | ss << rand(); 18 | return ss.str(); 19 | } 20 | 21 | std::string GenerateSharedMemoryName() { 22 | std::ostringstream ss; 23 | ss << "cnn_mp_shared_memory"; 24 | ss << rand(); 25 | return ss.str(); 26 | } 27 | 28 | cnn::real SumValues(const std::vector& values) { 29 | return accumulate(values.begin(), values.end(), 0.0); 30 | } 31 | 32 | cnn::real Mean(const std::vector& values) { 33 | return SumValues(values) / values.size(); 34 | } 35 | 36 | std::string ElapsedTimeString(const timespec& start, const timespec& end) { 37 | std::ostringstream ss; 38 | time_t secs = end.tv_sec - start.tv_sec; 39 | long nsec = end.tv_nsec - start.tv_nsec; 40 | ss << secs << " seconds and " << nsec << "nseconds"; 41 | return ss.str(); 42 | } 43 | 44 | unsigned SpawnChildren(std::vector& workloads) { 45 | const unsigned num_children = workloads.size(); 46 | assert (workloads.size() == num_children); 47 | pid_t pid; 48 | unsigned cid; 49 | for (cid = 0; cid < num_children; ++cid) { 50 | pid = fork(); 51 | if (pid == -1) { 52 | std::cerr << "Fork failed. Exiting ..." << std::endl; 53 | return 1; 54 | } 55 | else if (pid == 0) { 56 | // children shouldn't continue looping 57 | break; 58 | } 59 | workloads[cid].pid = pid; 60 | } 61 | return cid; 62 | } 63 | 64 | std::vector CreateWorkloads(unsigned num_children) { 65 | int err; 66 | std::vector workloads(num_children); 67 | for (unsigned cid = 0; cid < num_children; cid++) { 68 | err = pipe(workloads[cid].p2c); 69 | assert (err == 0); 70 | err = pipe(workloads[cid].c2p); 71 | assert (err == 0); 72 | } 73 | return workloads; 74 | } 75 | 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /cnn/param-nodes.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/param-nodes.h" 2 | #include "cnn/tensor.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | string ConstParameterNode::as_string(const vector& arg_names) const { 11 | ostringstream s; 12 | s << "const_parameters(" << dim << ", " << params << ')'; 13 | return s.str(); 14 | } 15 | 16 | Dim ConstParameterNode::dim_forward(const vector& xs) const { 17 | assert(xs.size() == 0); 18 | return dim; 19 | } 20 | 21 | void ConstParameterNode::forward_impl(const vector& xs, Tensor& fx) const { 22 | assert(xs.size() == 0); 23 | fx.v = params->values.v; 24 | } 25 | 26 | void ConstParameterNode::backward_impl(const vector& xs, 27 | const Tensor& fx, 28 | const Tensor& dEdf, 29 | unsigned i, 30 | Tensor& dEdxi) const { 31 | cerr << "called backward() on arity 0 node: i = " << i << endl; 32 | abort(); 33 | } 34 | 35 | string ParameterNode::as_string(const vector& arg_names) const { 36 | ostringstream s; 37 | s << "parameters(" << dim << ", " << params << ')'; 38 | return s.str(); 39 | } 40 | 41 | Dim ParameterNode::dim_forward(const vector& xs) const { 42 | assert(xs.size() == 0); 43 | return dim; 44 | } 45 | 46 | void ParameterNode::forward_impl(const vector& xs, Tensor& fx) const { 47 | assert(xs.size() == 0); 48 | fx.v = params->values.v; 49 | } 50 | 51 | void ParameterNode::backward_impl(const vector& xs, 52 | const Tensor& fx, 53 | const Tensor& dEdf, 54 | unsigned i, 55 | Tensor& dEdxi) const { 56 | cerr << "called backward() on arity 0 node: i = " << i << endl; 57 | abort(); 58 | } 59 | 60 | void ParameterNode::accumulate_grad(const Tensor& g) { 61 | params->accumulate_grad(g); 62 | } 63 | 64 | string InputNode::as_string(const vector& arg_names) const { 65 | ostringstream s; 66 | s << "constant(" << dim << ')'; 67 | return s.str(); 68 | } 69 | 70 | Dim InputNode::dim_forward(const vector& xs) const { 71 | return dim; 72 | } 73 | 74 | void InputNode::forward_impl(const vector& xs, Tensor& fx) const { 75 | assert(xs.size() == 0); 76 | #if HAVE_CUDA 77 | cudaMemcpyAsync(fx.v, &pdata->front(), dim.size() * sizeof(float), cudaMemcpyHostToDevice); 78 | #else 79 | // TODO memcpy is only necessary if pdata->front() points to an unaligned location 80 | // need to compute this value 81 | bool is_input_address_aligned = false; 82 | if (!is_input_address_aligned) { 83 | memcpy(fx.v, &pdata->front(), dim.size() * sizeof(float)); 84 | } else { 85 | fx.v = const_cast(&pdata->front()); 86 | } 87 | #endif 88 | } 89 | 90 | void InputNode::backward_impl(const vector& xs, 91 | const Tensor& fx, 92 | const Tensor& dEdf, 93 | unsigned i, 94 | Tensor& dEdxi) const { 95 | cerr << "called backward() on arity 0 node\n"; 96 | abort(); 97 | } 98 | 99 | string ScalarInputNode::as_string(const vector& arg_names) const { 100 | ostringstream s; 101 | s << "scalar_constant(" << pdata << ')'; 102 | return s.str(); 103 | } 104 | 105 | Dim ScalarInputNode::dim_forward(const vector& xs) const { 106 | return Dim({1}); 107 | } 108 | 109 | void ScalarInputNode::forward_impl(const vector& xs, Tensor& fx) const { 110 | assert(xs.size() == 0); 111 | #if HAVE_CUDA 112 | cudaMemcpyAsync(fx.v, pdata, 1 * sizeof(float), cudaMemcpyHostToDevice); 113 | #else 114 | fx.v[0] = *pdata; 115 | #endif 116 | } 117 | 118 | void ScalarInputNode::backward_impl(const vector& xs, 119 | const Tensor& fx, 120 | const Tensor& dEdf, 121 | unsigned i, 122 | Tensor& dEdxi) const { 123 | cerr << "called backward() on arity 0 node\n"; 124 | abort(); 125 | } 126 | 127 | string LookupNode::as_string(const vector& arg_names) const { 128 | ostringstream s; 129 | s << "lookup_parameters(|x|=" << params->values.size() << " --> " << dim << ')'; 130 | return s.str(); 131 | } 132 | 133 | Dim LookupNode::dim_forward(const vector& xs) const { 134 | return dim; 135 | } 136 | 137 | void LookupNode::forward_impl(const vector& xs, Tensor& fx) const { 138 | assert(xs.size() == 0); 139 | if(pindex) { 140 | assert(*pindex < params->values.size()); 141 | assert (fx.d.batch_elems() == 1); 142 | fx.v = params->values[*pindex].v; 143 | } else { 144 | assert (pindices); 145 | assert (fx.d.batch_elems() == pindices->size()); 146 | for (unsigned b = 0; b < pindices->size(); ++b) { 147 | unsigned i = pindices->at(b); 148 | assert (i < params->values.size()); 149 | float* v = fx.v + fx.d.batch_size() * (b % fx.d.batch_elems()); 150 | #if HAVE_CUDA 151 | cudaMemcpyAsync(v, params->values[i].v, fx.d.batch_size() * sizeof(float), cudaMemcpyDeviceToDevice); 152 | #else 153 | memcpy(v, params->values[i].v, fx.d.batch_size() * sizeof(float)); 154 | #endif 155 | } 156 | } 157 | } 158 | 159 | void LookupNode::backward_impl(const vector& xs, 160 | const Tensor& fx, 161 | const Tensor& dEdf, 162 | unsigned i, 163 | Tensor& dEdxi) const { 164 | cerr << "called backward() on arity 0 node\n"; 165 | abort(); 166 | } 167 | 168 | void LookupNode::accumulate_grad(const Tensor& g) { 169 | if(pindex) { 170 | params->accumulate_grad(*pindex, g); 171 | } else { 172 | assert (pindices); 173 | const vector& gb = g.batch_elems(); 174 | for (unsigned b = 0; b < pindices->size(); ++b) { 175 | unsigned i = pindices->at(b); 176 | assert (i < params->values.size()); 177 | params->accumulate_grad(i, gb[b]); 178 | } 179 | } 180 | } 181 | 182 | } // namespace cnn 183 | -------------------------------------------------------------------------------- /cnn/param-nodes.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_PARAM_NODES_H_ 2 | #define CNN_PARAM_NODES_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/model.h" 6 | 7 | namespace cnn { 8 | 9 | struct ParameterNodeBase : public Node { 10 | virtual void accumulate_grad(const Tensor& g) = 0; 11 | }; 12 | 13 | // represents optimizable parameters 14 | struct ParameterNode : public ParameterNodeBase { 15 | explicit ParameterNode(Parameters* p) : dim(p->dim), params(p) {} 16 | std::string as_string(const std::vector& arg_names) const override; 17 | Dim dim_forward(const std::vector& xs) const override; 18 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 19 | void backward_impl(const std::vector& xs, 20 | const Tensor& fx, 21 | const Tensor& dEdf, 22 | unsigned i, 23 | Tensor& dEdxi) const override; 24 | void accumulate_grad(const Tensor& g) override; 25 | Dim dim; 26 | Parameters* params; 27 | }; 28 | 29 | // represents optimizable parameters that are being held constant 30 | struct ConstParameterNode : public Node { 31 | explicit ConstParameterNode(Parameters* p) : dim(p->dim), params(p) {} 32 | std::string as_string(const std::vector& arg_names) const override; 33 | Dim dim_forward(const std::vector& xs) const override; 34 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 35 | void backward_impl(const std::vector& xs, 36 | const Tensor& fx, 37 | const Tensor& dEdf, 38 | unsigned i, 39 | Tensor& dEdxi) const override; 40 | Dim dim; 41 | Parameters* params; 42 | }; 43 | 44 | // represents specified (not learned) inputs to the network 45 | struct InputNode : public Node { 46 | explicit InputNode(const Dim& d, const std::vector& dat) : dim(d), data(dat), pdata(&data) {} 47 | explicit InputNode(const Dim& d, const std::vector* pdat) : dim(d), data(), pdata(pdat) {} 48 | std::string as_string(const std::vector& arg_names) const override; 49 | Dim dim_forward(const std::vector& xs) const override; 50 | virtual bool supports_multibatch() const override { return true; } 51 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 52 | void backward_impl(const std::vector& xs, 53 | const Tensor& fx, 54 | const Tensor& dEdf, 55 | unsigned i, 56 | Tensor& dEdxi) const override; 57 | Dim dim; 58 | const std::vector data; 59 | const std::vector* pdata; 60 | }; 61 | 62 | // represents specified (not learned) scalar inputs to the network 63 | struct ScalarInputNode : public Node { 64 | explicit ScalarInputNode(real s) : data(s), pdata(&data) {} 65 | explicit ScalarInputNode(const real* ps) : data(), pdata(ps) {} 66 | std::string as_string(const std::vector& arg_names) const override; 67 | Dim dim_forward(const std::vector& xs) const override; 68 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 69 | void backward_impl(const std::vector& xs, 70 | const Tensor& fx, 71 | const Tensor& dEdf, 72 | unsigned i, 73 | Tensor& dEdxi) const override; 74 | const cnn::real data; 75 | const cnn::real* pdata; 76 | }; 77 | 78 | // represents a matrix/vector embedding of an item of a discrete set (1-hot coding) 79 | struct LookupNode : public ParameterNodeBase { 80 | LookupNode(LookupParameters* p, unsigned ind) : dim(p->dim), index(ind), pindex(&index), indices(), pindices(), params(p) {} 81 | LookupNode(LookupParameters* p, const unsigned* pind) : dim(p->dim), index(), pindex(pind), indices(), pindices(), params(p) {} 82 | LookupNode(LookupParameters* p, const std::vector& indices) : dim(p->dim), index(), pindex(), indices(indices), pindices(&this->indices), params(p) { 83 | dim.bd = pindices->size(); 84 | } 85 | LookupNode(LookupParameters* p, const std::vector* pindices) : dim(p->dim), index(), pindex(), indices(), pindices(pindices), params(p) { 86 | dim.bd = pindices->size(); 87 | } 88 | std::string as_string(const std::vector& arg_names) const override; 89 | Dim dim_forward(const std::vector& xs) const override; 90 | virtual bool supports_multibatch() const override { return true; } 91 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 92 | void backward_impl(const std::vector& xs, 93 | const Tensor& fx, 94 | const Tensor& dEdf, 95 | unsigned i, 96 | Tensor& dEdxi) const override; 97 | void accumulate_grad(const Tensor& g) override; 98 | Dim dim; 99 | unsigned index; 100 | const unsigned* pindex; 101 | std::vector indices; 102 | const std::vector* pindices; 103 | LookupParameters* params; 104 | }; 105 | 106 | } // namespace cnn 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /cnn/random.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EIGEN_RANDOM_H 2 | #define CNN_EIGEN_RANDOM_H 3 | 4 | #include 5 | 6 | namespace cnn { 7 | 8 | extern std::mt19937* rndeng; 9 | 10 | } // namespace cnn 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /cnn/rnn-state-machine.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/rnn-state-machine.h" 2 | 3 | #include 4 | #include "cnn/cnn.h" 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | void RNNStateMachine::failure(RNNOp op) { 11 | cerr << "State transition error: currently in state " << q_ << " but received operation " << op << endl; 12 | abort(); 13 | } 14 | 15 | } // namespace cnn 16 | 17 | -------------------------------------------------------------------------------- /cnn/rnn-state-machine.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_RNN_STATE_MACHINE_H_ 2 | #define CNN_RNN_STATE_MACHINE_H_ 3 | 4 | namespace cnn { 5 | 6 | // CURRENT STATE | ACTION | NEXT STATE 7 | // --------------+---------------------+----------------- 8 | // CREATED | new_graph | GRAPH_READY 9 | // GRAPH_READY | start_new_sequence | READING_INPUT 10 | // READING_INPUT | add_input | READING_INPUT 11 | // READING_INPUT | start_new_seqeunce | READING_INPUT 12 | // READING_INPUT | new_graph | GRAPH_READY 13 | 14 | enum RNNState {CREATED, GRAPH_READY, READING_INPUT}; 15 | enum RNNOp {new_graph, start_new_sequence, add_input}; 16 | 17 | class RNNStateMachine { 18 | public: 19 | RNNStateMachine() : q_(RNNState::CREATED) {} 20 | void failure(RNNOp op); 21 | void transition(RNNOp op) { 22 | switch (q_) { 23 | case RNNState::CREATED: 24 | if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; } 25 | failure(op); 26 | case RNNState::GRAPH_READY: 27 | if (op == RNNOp::new_graph) { break; } 28 | if (op == RNNOp::start_new_sequence) { q_ = RNNState::READING_INPUT; break; } 29 | failure(op); 30 | case RNNState::READING_INPUT: 31 | if (op == RNNOp::add_input) { break; } 32 | if (op == RNNOp::start_new_sequence) { break; } 33 | if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; } 34 | failure(op); 35 | } 36 | } 37 | private: 38 | RNNState q_; 39 | }; 40 | 41 | } // namespace cnn 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cnn/rnn.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/rnn.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | #include "cnn/expr.h" 10 | 11 | using namespace std; 12 | using namespace cnn::expr; 13 | using namespace cnn; 14 | 15 | namespace cnn { 16 | 17 | enum { X2H=0, H2H, HB, L2H }; 18 | 19 | RNNBuilder::~RNNBuilder() {} 20 | 21 | SimpleRNNBuilder::SimpleRNNBuilder(unsigned layers, 22 | unsigned input_dim, 23 | unsigned hidden_dim, 24 | Model* model, 25 | bool support_lags) : layers(layers), lagging(support_lags) { 26 | unsigned layer_input_dim = input_dim; 27 | for (unsigned i = 0; i < layers; ++i) { 28 | Parameters* p_x2h = model->add_parameters({hidden_dim, layer_input_dim}); 29 | Parameters* p_h2h = model->add_parameters({hidden_dim, hidden_dim}); 30 | Parameters* p_hb = model->add_parameters({hidden_dim}); 31 | vector ps = {p_x2h, p_h2h, p_hb}; 32 | if (lagging) 33 | ps.push_back(model->add_parameters({hidden_dim, hidden_dim})); 34 | params.push_back(ps); 35 | layer_input_dim = hidden_dim; 36 | } 37 | } 38 | 39 | void SimpleRNNBuilder::new_graph_impl(ComputationGraph& cg) { 40 | param_vars.clear(); 41 | for (unsigned i = 0; i < layers; ++i) { 42 | Parameters* p_x2h = params[i][X2H]; 43 | Parameters* p_h2h = params[i][H2H]; 44 | Parameters* p_hb = params[i][HB]; 45 | Expression i_x2h = parameter(cg,p_x2h); 46 | Expression i_h2h = parameter(cg,p_h2h); 47 | Expression i_hb = parameter(cg,p_hb); 48 | vector vars = {i_x2h, i_h2h, i_hb}; 49 | 50 | if (lagging) { 51 | Parameters* p_l2h = params[i][L2H]; 52 | Expression i_l2h = parameter(cg,p_l2h); 53 | vars.push_back(i_l2h); 54 | } 55 | 56 | param_vars.push_back(vars); 57 | } 58 | } 59 | 60 | void SimpleRNNBuilder::start_new_sequence_impl(const vector& h_0) { 61 | h.clear(); 62 | h0 = h_0; 63 | if (h0.size()) { assert(h0.size() == layers); } 64 | } 65 | 66 | Expression SimpleRNNBuilder::add_input_impl(int prev, const Expression &in) { 67 | const unsigned t = h.size(); 68 | h.push_back(vector(layers)); 69 | 70 | Expression x = in; 71 | 72 | for (unsigned i = 0; i < layers; ++i) { 73 | const vector& vars = param_vars[i]; 74 | 75 | // y <--- f(x) 76 | Expression y = affine_transform({vars[2], vars[0], x}); 77 | 78 | // y <--- g(y_prev) 79 | if (prev == -1 && h0.size() > 0) 80 | y = affine_transform({y, vars[1], h0[i]}); 81 | else if (prev >= 0) 82 | y = affine_transform({y, vars[1], h[prev][i]}); 83 | 84 | // x <--- tanh(y) 85 | x = h[t][i] = tanh(y); 86 | } 87 | return h[t].back(); 88 | } 89 | 90 | Expression SimpleRNNBuilder::add_auxiliary_input(const Expression &in, const Expression &aux) { 91 | const unsigned t = h.size(); 92 | h.push_back(vector(layers)); 93 | 94 | Expression x = in; 95 | 96 | for (unsigned i = 0; i < layers; ++i) { 97 | const vector& vars = param_vars[i]; 98 | assert(vars.size() >= L2H + 1); 99 | 100 | Expression y = affine_transform({vars[HB], vars[X2H], x, vars[L2H], aux}); 101 | 102 | if (t == 0 && h0.size() > 0) 103 | y = affine_transform({y, vars[H2H], h0[i]}); 104 | else if (t >= 1) 105 | y = affine_transform({y, vars[H2H], h[t-1][i]}); 106 | 107 | x = h[t][i] = tanh(y); 108 | } 109 | return h[t].back(); 110 | } 111 | 112 | void SimpleRNNBuilder::copy(const RNNBuilder & rnn) { 113 | const SimpleRNNBuilder & rnn_simple = (const SimpleRNNBuilder&)rnn; 114 | assert(params.size() == rnn_simple.params.size()); 115 | for(size_t i = 0; i < rnn_simple.params.size(); ++i) { 116 | params[i][0]->copy(*rnn_simple.params[i][0]); 117 | params[i][1]->copy(*rnn_simple.params[i][1]); 118 | params[i][2]->copy(*rnn_simple.params[i][2]); 119 | } 120 | } 121 | 122 | } // namespace cnn 123 | -------------------------------------------------------------------------------- /cnn/rnn.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_RNN_H_ 2 | #define CNN_RNN_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn-state-machine.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | BOOST_STRONG_TYPEDEF(int, RNNPointer) 15 | inline void swap(RNNPointer& i1, RNNPointer& i2) { 16 | RNNPointer t = i1; i1 = i2; i2 = t; 17 | } 18 | 19 | // interface for constructing an RNN, LSTM, GRU, etc. 20 | struct RNNBuilder { 21 | RNNBuilder() : cur(-1) {} 22 | virtual ~RNNBuilder(); 23 | 24 | RNNPointer state() const { return cur; } 25 | 26 | // call this to reset the builder when you are working with a newly 27 | // created ComputationGraph object 28 | void new_graph(ComputationGraph& cg) { 29 | sm.transition(RNNOp::new_graph); 30 | new_graph_impl(cg); 31 | } 32 | 33 | // Reset for new sequence 34 | // call this before add_input and after new_graph, 35 | // when starting a new sequence on the same hypergraph. 36 | // h_0 is used to initialize hidden layers at timestep 0 to given values 37 | void start_new_sequence(const std::vector& h_0={}) { 38 | sm.transition(RNNOp::start_new_sequence); 39 | cur = RNNPointer(-1); 40 | head.clear(); 41 | start_new_sequence_impl(h_0); 42 | } 43 | 44 | // add another timestep by reading in the variable x 45 | // return the hidden representation of the deepest layer 46 | Expression add_input(const Expression& x) { 47 | sm.transition(RNNOp::add_input); 48 | head.push_back(cur); 49 | int rcp = cur; 50 | cur = head.size() - 1; 51 | return add_input_impl(rcp, x); 52 | } 53 | 54 | // add another timestep, but define recurrent connection to prev 55 | // rather than to head[cur] 56 | // this can be used to construct trees, implement beam search, etc. 57 | Expression add_input(const RNNPointer& prev, const Expression& x) { 58 | sm.transition(RNNOp::add_input); 59 | head.push_back(prev); 60 | cur = head.size() - 1; 61 | return add_input_impl(prev, x); 62 | } 63 | 64 | // rewind the last timestep - this DOES NOT remove the variables 65 | // from the computation graph, it just means the next time step will 66 | // see a different previous state. You can remind as many times as 67 | // you want. 68 | void rewind_one_step() { 69 | cur = head[cur]; 70 | } 71 | 72 | // returns node (index) of most recent output 73 | virtual Expression back() const = 0; 74 | // access the final output of each hidden layer 75 | virtual std::vector final_h() const = 0; 76 | virtual std::vector get_h(RNNPointer i) const = 0; 77 | // access the state of each hidden layer, in a format that can be used in 78 | // start_new_sequence 79 | virtual std::vector final_s() const = 0; 80 | virtual unsigned num_h0_components() const = 0; 81 | virtual std::vector get_s(RNNPointer i) const = 0; 82 | // copy the parameters of another builder 83 | virtual void copy(const RNNBuilder & params) = 0; 84 | protected: 85 | virtual void new_graph_impl(ComputationGraph& cg) = 0; 86 | virtual void start_new_sequence_impl(const std::vector& h_0) = 0; 87 | virtual Expression add_input_impl(int prev, const Expression& x) = 0; 88 | RNNPointer cur; 89 | private: 90 | // the state machine ensures that the caller is behaving 91 | RNNStateMachine sm; 92 | std::vector head; // head[i] returns the head position 93 | }; 94 | 95 | struct SimpleRNNBuilder : public RNNBuilder { 96 | SimpleRNNBuilder() = default; 97 | explicit SimpleRNNBuilder(unsigned layers, 98 | unsigned input_dim, 99 | unsigned hidden_dim, 100 | Model* model, 101 | bool support_lags=false); 102 | 103 | protected: 104 | void new_graph_impl(ComputationGraph& cg) override; 105 | void start_new_sequence_impl(const std::vector& h_0) override; 106 | Expression add_input_impl(int prev, const Expression& x) override; 107 | 108 | public: 109 | Expression add_auxiliary_input(const Expression& x, const Expression &aux); 110 | 111 | Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } 112 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 113 | std::vector final_s() const override { return final_h(); } 114 | 115 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 116 | std::vector get_s(RNNPointer i) const override { return get_h(i); } 117 | void copy(const RNNBuilder & params) override; 118 | 119 | unsigned num_h0_components() const override { return layers; } 120 | 121 | private: 122 | // first index is layer, then x2h h2h hb 123 | std::vector> params; 124 | 125 | // first index is layer, then x2h h2h hb 126 | std::vector> param_vars; 127 | 128 | // first index is time, second is layer 129 | std::vector> h; 130 | 131 | // initial value of h 132 | // defaults to zero matrix input 133 | std::vector h0; 134 | 135 | unsigned layers; 136 | bool lagging; 137 | }; 138 | 139 | } // namespace cnn 140 | 141 | #endif 142 | -------------------------------------------------------------------------------- /cnn/saxe-init.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/saxe-init.h" 2 | #include "cnn/tensor.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | using namespace std; 10 | 11 | namespace cnn { 12 | 13 | void OrthonormalRandom(unsigned dd, float g, Tensor& x) { 14 | Tensor t; 15 | t.d = Dim({dd, dd}); 16 | t.v = new float[dd * dd]; 17 | normal_distribution distribution(0, 0.01); 18 | auto b = [&] () {return distribution(*rndeng);}; 19 | generate(t.v, t.v + dd*dd, b); 20 | Eigen::JacobiSVD svd(*t, Eigen::ComputeFullU); 21 | *x = svd.matrixU(); 22 | delete[] t.v; 23 | } 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /cnn/saxe-init.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_SAXE_INIT_H_ 2 | #define CNN_SAXE_INIT_H_ 3 | 4 | namespace cnn { 5 | 6 | struct Tensor; 7 | 8 | void OrthonormalRandom(unsigned dim, float g, Tensor& x); 9 | 10 | } 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /cnn/shadow-params.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/cnn.h" 2 | #include "cnn/shadow-params.h" 3 | #include "cnn/tensor.h" 4 | #include "cnn/aligned-mem-pool.h" 5 | #include "cnn/model.h" 6 | 7 | using namespace std; 8 | 9 | namespace cnn { 10 | 11 | ShadowParameters::ShadowParameters(const Parameters& p) : h(p.values) { 12 | h.v = (float*)default_device->mem->malloc(h.d.size() * sizeof(float)); 13 | TensorTools::Zero(h); 14 | } 15 | 16 | ShadowLookupParameters::ShadowLookupParameters(const LookupParameters& lp) : h(lp.values) { 17 | for (auto& t : h) { 18 | t.v = (float*)default_device->mem->malloc(t.d.size() * sizeof(float)); 19 | TensorTools::Zero(t); 20 | } 21 | } 22 | 23 | vector AllocateShadowParameters(const Model& m) { 24 | vector v; 25 | v.reserve(m.parameters_list().size()); 26 | for (auto& p : m.parameters_list()) 27 | v.emplace_back(*p); 28 | return v; 29 | } 30 | 31 | vector AllocateShadowLookupParameters(const Model& m) { 32 | vector v; 33 | v.reserve(m.lookup_parameters_list().size()); 34 | for (auto& p : m.lookup_parameters_list()) 35 | v.emplace_back(*p); 36 | return v; 37 | } 38 | 39 | } // namespace cnn 40 | 41 | -------------------------------------------------------------------------------- /cnn/shadow-params.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_SHADOW_PARAMS_H 2 | #define CNN_SHADOW_PARAMS_H 3 | 4 | #include 5 | #include "cnn/tensor.h" 6 | 7 | // if your learner needs to keep track of an extra set of values (one per 8 | // parameter), use the Shadow classes. this can be used to implement, e.g., 9 | // momentum or adagrad 10 | 11 | namespace cnn { 12 | 13 | class Model; 14 | struct Parameters; 15 | struct LookupParameters; 16 | 17 | struct ShadowParameters { 18 | explicit ShadowParameters(const Parameters& p); 19 | Tensor h; 20 | }; 21 | 22 | struct ShadowLookupParameters { 23 | explicit ShadowLookupParameters(const LookupParameters& lp); 24 | std::vector h; 25 | }; 26 | 27 | // one per element in model.parameters_list 28 | std::vector AllocateShadowParameters(const Model& model); 29 | // one per element in model.lookup_parameters_list 30 | std::vector AllocateShadowLookupParameters(const Model& model); 31 | 32 | } // namespace cnn 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /cnn/tensor.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/tensor.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #if HAVE_CUDA 8 | #include "cnn/cuda.h" 9 | #endif 10 | 11 | using namespace std; 12 | 13 | namespace cnn { 14 | 15 | ostream& operator<<(ostream& os, const Tensor& t) { 16 | #if HAVE_CUDA 17 | vector vt = as_vector(t); 18 | Eigen::Map m(&vt[0], t.d.rows(), t.d.cols()); 19 | os << m; 20 | #else 21 | os << (*t); 22 | #endif 23 | return os; 24 | } 25 | 26 | real as_scalar(const Tensor& t) { 27 | assert(t.d.size() == 1); 28 | #if HAVE_CUDA 29 | float res; 30 | CUDA_CHECK(cudaMemcpy(&res, t.v, sizeof(float), cudaMemcpyDeviceToHost)); 31 | return res; 32 | #else 33 | return t.v[0]; 34 | #endif 35 | } 36 | 37 | vector as_vector(const Tensor& v) { 38 | vector res(v.d.size()); 39 | #if HAVE_CUDA 40 | CUDA_CHECK(cudaMemcpy(&res[0], v.v, sizeof(real) * res.size(), cudaMemcpyDeviceToHost)); 41 | #else 42 | memcpy(&res[0], v.v, sizeof(real) * res.size()); 43 | #endif 44 | return res; 45 | } 46 | 47 | float TensorTools::AccessElement(const Tensor& v, int index) { 48 | #if HAVE_CUDA 49 | float ret; 50 | cudaMemcpyAsync(&ret, &v.v[index], sizeof(real), cudaMemcpyDeviceToHost); 51 | return ret; 52 | #else 53 | return v.v[index]; 54 | #endif 55 | } 56 | 57 | float TensorTools::AccessElement(const Tensor& v, const Dim& index) { 58 | #if HAVE_CUDA 59 | abort(); 60 | #else 61 | return (*v)(index[0], index[1]); 62 | #endif 63 | } 64 | 65 | void TensorTools::SetElement(const Tensor& v, int index, float value) { 66 | #if HAVE_CUDA 67 | cudaMemcpyAsync(&v.v[index], &value, sizeof(real), cudaMemcpyHostToDevice); 68 | #else 69 | v.v[index] = value; 70 | #endif 71 | } 72 | 73 | void TensorTools::SetElements(const Tensor& v, const vector& vec) { 74 | #if HAVE_CUDA 75 | cudaMemcpyAsync(v.v, &vec[0], sizeof(real) * vec.size(), cudaMemcpyHostToDevice); 76 | #else 77 | memcpy(v.v, &vec[0], sizeof(real) * vec.size()); 78 | #endif 79 | } 80 | 81 | void TensorTools::CopyElements(const Tensor& v, const Tensor& v_src) { 82 | #if HAVE_CUDA 83 | cudaMemcpyAsync(v.v, v_src.v, sizeof(real) * v.d.size(), cudaMemcpyDeviceToDevice); 84 | #else 85 | memcpy(v.v, v_src.v, sizeof(real) * v.d.size()); 86 | #endif 87 | } 88 | 89 | void TensorTools::Constant(Tensor& d, float c) { 90 | #if HAVE_CUDA 91 | if (!c) { 92 | CUDA_CHECK(cudaMemsetAsync(d.v, 0, d.d.size() * sizeof(float))); 93 | } else { 94 | fill(d.v, d.v + d.d.size(), c); 95 | } 96 | #else 97 | if (!c) { 98 | memset(d.v, c, d.d.size() * sizeof(float)); 99 | } else { 100 | fill(d.v, d.v + d.d.size(), c); 101 | } 102 | #endif 103 | } 104 | 105 | void TensorTools::Zero(Tensor& d) { 106 | Constant(d, 0); 107 | } 108 | 109 | void TensorTools::Randomize(Tensor& val, real scale) { 110 | uniform_real_distribution distribution(-scale,scale); 111 | auto b = [&] {return distribution(*rndeng);}; 112 | #if HAVE_CUDA 113 | float* t = new float[val.d.size()]; 114 | generate(t, t + val.d.size(), b); 115 | CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); 116 | delete[] t; 117 | #else 118 | generate(val.v, val.v + val.d.size(), b); 119 | #endif 120 | } 121 | 122 | void TensorTools::Randomize(Tensor& d) { 123 | Randomize(d, sqrt(6) / sqrt(d.d.sum_dims())); 124 | } 125 | 126 | void TensorTools::RandomBernoulli(Tensor& val, real p, real scale) { 127 | bernoulli_distribution distribution(p); 128 | auto b = [&] {return distribution(*rndeng) * scale;}; 129 | #if HAVE_CUDA 130 | float* t = new float[val.d.size()]; 131 | generate(t, t + val.d.size(), b); 132 | CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); 133 | delete[] t; 134 | #else 135 | generate(val.v, val.v + val.d.size(), b); 136 | #endif 137 | } 138 | 139 | void TensorTools::RandomizeNormal(real mean, real stddev, Tensor& val) { 140 | normal_distribution distribution(mean, stddev); 141 | auto b = [&] {return distribution(*rndeng);}; 142 | #if HAVE_CUDA 143 | float* t = new float[val.d.size()]; 144 | generate(t, t + val.d.size(), b); 145 | CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); 146 | delete[] t; 147 | #else 148 | generate(val.v, val.v + val.d.size(), b); 149 | #endif 150 | } 151 | 152 | real rand01() { 153 | uniform_real_distribution distribution(0, 1); 154 | return distribution(*rndeng); 155 | } 156 | 157 | int rand0n(int n) { 158 | assert(n > 0); 159 | int x = rand01() * n; 160 | while(n == x) { x = rand01() * n; } 161 | return x; 162 | } 163 | 164 | real rand_normal() { 165 | normal_distribution distribution(0, 1); 166 | return distribution(*rndeng); 167 | } 168 | 169 | } // namespace cnn 170 | -------------------------------------------------------------------------------- /cnn/tests/test_init.cc: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "CNNInit" 3 | #include 4 | 5 | #include 6 | 7 | #include "cnn/tests/test_utils.h" 8 | #include "cnn/tensor.h" 9 | #include "cnn/saxe-init.h" 10 | 11 | using namespace std; 12 | using namespace cnn; 13 | 14 | BOOST_GLOBAL_FIXTURE(TestTensorSetup) 15 | 16 | BOOST_AUTO_TEST_CASE(EOrthonormalRandom) 17 | { 18 | for (int d = 4; d < 128; d += 2) { 19 | Tensor Q = OrthonormalRandom(d, 1.0); 20 | // BOOST_REQUIRE_EQUAL(size(Q), Dim({d,d})); 21 | 22 | // check that this is actually returning orthogonal matrices 23 | #if MINERVA_BACKEND 24 | Tensor I = Q.Trans() * Q; 25 | #endif 26 | #if THPP_BACKEND 27 | Tensor QT = Q; 28 | QT.transpose(); 29 | //cerr << str(Q) << endl << str(QT) << endl; 30 | Tensor I = Zero({d,d}); 31 | I.addmm(0, 1, Q, QT); 32 | //cerr << str(I) << endl; 33 | #endif 34 | #if EIGEN_BACKEND 35 | Tensor I = Q.transpose() * Q; 36 | #endif 37 | double eps = 1e-1; 38 | for (int i = 0; i < d; ++i) 39 | for (int j = 0; j < d; ++j) 40 | BOOST_CHECK_CLOSE(t(I,i,j) + 1., (i == j ? 2. : 1.), eps); 41 | } 42 | cerr << "Finished\n"; 43 | } 44 | 45 | BOOST_AUTO_TEST_CASE(BernoulliInit) { 46 | Tensor r = RandomBernoulli(Dim({1000,1000}), 0.5f); 47 | int tot = 0; 48 | for (int i = 0; i < 1000; ++i) 49 | for (int j = 0; j < 1000; ++j) 50 | if (t(r,i,j)) ++tot; 51 | BOOST_CHECK_GT(tot, 490000); 52 | BOOST_CHECK_LT(tot, 510000); 53 | } 54 | 55 | BOOST_AUTO_TEST_CASE(Rand01) { 56 | cnn::real tot = 0; 57 | for (unsigned i = 0; i < 1000000; ++i) 58 | tot += cnn::rand01(); 59 | BOOST_CHECK_GT(tot, 490000.); 60 | BOOST_CHECK_LT(tot, 510000.); 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /cnn/tests/test_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_TEST_UTILS_H_ 2 | #define CNN_TEST_UTILS_H_ 3 | 4 | #include "cnn/tensor.h" 5 | 6 | namespace cnn { 7 | 8 | #if WITH_MINERVA_BACKEND 9 | 10 | struct TestTensorSetup { 11 | TestTensorSetup() { 12 | int argc = 1; 13 | char* foo = "foo"; 14 | char** argv = {&foo}; 15 | minerva::MinervaSystem::Initialize(&argc, &argv); 16 | #if HAS_CUDA 17 | minerva::MinervaSystem::Instance().device_manager().CreateGpuDevice(0); 18 | #else 19 | minerva::MinervaSystem::Instance().device_manager().CreateCpuDevice(); 20 | #endif 21 | } 22 | }; 23 | 24 | double t(const Tensor& T, unsigned i, unsigned j) { 25 | int m = T.Size(0); 26 | return T.Get().get()[j * m + i]; 27 | } 28 | 29 | std::ostream& operator<<(std::ostream& os, const Tensor& T) { 30 | if (T.Size().NumDims() == 2) { 31 | int m = T.Size(0); 32 | int n = T.Size(1); 33 | for (int i = 0; i < m; ++i) { 34 | for (int j = 0; j < n; ++j) { 35 | os << '\t' << t(T,i,j); 36 | } 37 | os << std::endl; 38 | } 39 | return os; 40 | } else { 41 | os << T.Size() << ": "; 42 | minerva::FileFormat ff; ff.binary = false; 43 | T.ToStream(os, ff); 44 | return os; 45 | } 46 | } 47 | 48 | #else 49 | 50 | struct TestTensorSetup { 51 | TestTensorSetup() { 52 | int argc = 1; 53 | char* p = "foo"; 54 | char** argv = {&p}; 55 | cnn::Initialize(argc, argv); 56 | } 57 | }; 58 | 59 | double t(const Tensor& T, unsigned i, unsigned j) { 60 | #if WITH_THPP_BACKEND 61 | return T.at({i,j}); 62 | #else 63 | return T(i, j); 64 | #endif 65 | } 66 | 67 | double t(const Tensor& T, unsigned i) { 68 | #if WITH_THPP_BACKEND 69 | return T.at({i}); 70 | #else 71 | return T(i, 0); 72 | #endif 73 | } 74 | 75 | #endif 76 | 77 | } // namespace cnn 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /cnn/timing.h: -------------------------------------------------------------------------------- 1 | #ifndef _TIMING_H_ 2 | #define _TIMING_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace cnn { 9 | 10 | struct Timer { 11 | Timer(const std::string& msg) : msg(msg), start(std::chrono::high_resolution_clock::now()) {} 12 | ~Timer() { 13 | auto stop = std::chrono::high_resolution_clock::now(); 14 | std::cerr << '[' << msg << ' ' << std::chrono::duration(stop-start).count() << " ms]\n"; 15 | } 16 | std::string msg; 17 | std::chrono::high_resolution_clock::time_point start; 18 | }; 19 | 20 | } // namespace cnn 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /cnn/training.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_TRAINING_H_ 2 | #define CNN_TRAINING_H_ 3 | 4 | #include 5 | #include "cnn/model.h" 6 | #include "cnn/shadow-params.h" 7 | 8 | namespace cnn { 9 | 10 | struct Trainer { 11 | explicit Trainer(Model* m, real lam, real e0) : 12 | eta0(e0), eta(e0), eta_decay(), epoch(), lambda(lam), clipping_enabled(true), clip_threshold(5), clips(), updates(), model(m) {} 13 | virtual ~Trainer(); 14 | 15 | virtual void update(real scale = 1.0) = 0; 16 | void update_epoch(real r = 1) { 17 | epoch += r; 18 | eta = eta0 / (1 + epoch * eta_decay); 19 | } 20 | 21 | // if clipping is enabled and the gradient is too big, return the amount to 22 | // scale the gradient by (otherwise 1) 23 | float clip_gradients(); 24 | 25 | // learning rates 26 | real eta0; 27 | real eta; 28 | real eta_decay; 29 | real epoch; 30 | 31 | real lambda; // weight regularization (l2) 32 | 33 | // clipping 34 | real clipping_enabled; 35 | real clip_threshold; 36 | real clips; 37 | real updates; 38 | 39 | void status() { 40 | std::cerr << "[epoch=" << epoch << " eta=" << eta << " clips=" << clips << " updates=" << updates << "] "; 41 | updates = clips = 0; 42 | } 43 | 44 | Model* model; // parameters and gradients live here 45 | }; 46 | 47 | struct SimpleSGDTrainer : public Trainer { 48 | explicit SimpleSGDTrainer(Model* m, real lam = 1e-6, real e0 = 0.1) : Trainer(m, lam, e0) {} 49 | void update(real scale) override; 50 | void update(const std::vector &lookup_params, const std::vector ¶ms, real scale = 1); 51 | }; 52 | 53 | struct MomentumSGDTrainer : public Trainer { 54 | explicit MomentumSGDTrainer(Model* m, real lam = 1e-6, real e0 = 0.01, real mom = 0.9) : 55 | Trainer(m, lam, e0), momentum(mom), velocity_allocated(false) {} 56 | void update(real scale) override; 57 | 58 | real momentum; 59 | 60 | bool velocity_allocated; 61 | 62 | // the following represent the current velocity 63 | std::vector vp; 64 | std::vector vlp; 65 | //std::unordered_map vp; 66 | //std::unordered_map> vl; 67 | }; 68 | 69 | struct AdagradTrainer : public Trainer { 70 | explicit AdagradTrainer(Model* m, real lam = 1e-6, real e0 = 0.1, real eps = 1e-20) : 71 | Trainer(m, lam, e0), epsilon(eps), shadow_params_allocated(false) {} 72 | void update(real scale) override; 73 | 74 | real epsilon; 75 | bool shadow_params_allocated; 76 | std::vector vp; 77 | std::vector vlp; 78 | }; 79 | 80 | struct AdadeltaTrainer : public Trainer { 81 | explicit AdadeltaTrainer(Model* m, real lam = 1e-6, real eps = 1e-6, real rho = 0.95) : 82 | Trainer(m, lam, 1.0), epsilon(eps), rho(rho), shadow_params_allocated(false) {} 83 | void update(real scale) override; 84 | 85 | real epsilon; 86 | real rho; 87 | bool shadow_params_allocated; 88 | std::vector hg; // History of gradients 89 | std::vector hlg; 90 | std::vector hd; // History of deltas 91 | std::vector hld; 92 | }; 93 | 94 | struct RmsPropTrainer : public Trainer { 95 | explicit RmsPropTrainer(Model* m, real lam = 1e-6, real e0 = 0.1, real eps = 1e-20, real rho = 0.95) : 96 | Trainer(m, lam, e0), epsilon(eps), rho(rho), shadow_params_allocated(false) {} 97 | void update(real scale) override; 98 | 99 | real epsilon; 100 | real rho; 101 | bool shadow_params_allocated; 102 | std::vector hg; // History of gradients 103 | std::vector > hlg; 104 | }; 105 | 106 | struct AdamTrainer : public Trainer { 107 | explicit AdamTrainer(Model* m, float lambda = 1e-6, float alpha = 0.001, float beta_1 = 0.9, float beta_2 = 0.999, float eps = 1e-8) : 108 | Trainer(m, lambda, alpha), beta_1(beta_1), beta_2(beta_2), eps(eps), shadow_params_allocated(false) {} 109 | 110 | void update(real scale) override; 111 | 112 | float beta_1; 113 | float beta_2; 114 | float eps; 115 | bool shadow_params_allocated; 116 | std::vector m; // History of gradients 117 | std::vector lm; 118 | std::vector v; // History of deltas 119 | std::vector lv; 120 | }; 121 | 122 | } // namespace cnn 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /config.h.cmake: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CONFIG_H_ 2 | #define CNN_CONFIG_H_ 3 | 4 | #cmakedefine WITH_MINERVA_BACKEND @WITH_MINERVA_BACKEND@ 5 | #cmakedefine WITH_THPP_BACKEND @WITH_THPP_BACKEND@ 6 | #cmakedefine WITH_EIGEN_BACKEND @WITH_EIGEN_BACKEND@ 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8) 2 | 3 | foreach(TARGET mlc tok-embed segrnn-sup poisson-regression tag-bilstm embed-cl encdec xor xor-xent xor-batch xor-batch-lookup rnnlm rnnlm-aevb rnnlm-cfsm rnnlm-batch nlm textcat rnnlm2 rnnlm-mp read-write) 4 | ADD_EXECUTABLE(${TARGET} ${TARGET}.cc) 5 | target_link_libraries(${TARGET} cnn ${LIBS} pthread) 6 | if(UNIX AND NOT APPLE) 7 | target_link_libraries(${TARGET} rt) 8 | endif() 9 | if (WITH_CUDA_BACKEND) 10 | add_dependencies(${TARGET} cnncuda) 11 | target_link_libraries(${TARGET} cnncuda) 12 | CUDA_ADD_CUBLAS_TO_TARGET(${TARGET}) 13 | endif (WITH_CUDA_BACKEND) 14 | endforeach() 15 | 16 | -------------------------------------------------------------------------------- /examples/example-data/dev-hsm.txt: -------------------------------------------------------------------------------- 1 | i think this is wrong . can you check it ? 2 | fine . 3 | i went back to my hotel room and found a here . 4 | it would be one hundred sixty- eight dollars per night for a twin , with tax and service charge . 5 | i would like to sit down and take a rest for a little while . 6 | i am . 7 | number three . 8 | who are you ? 9 | which is the biggest department store in this city ? 10 | can i see your ticket , sir ? 11 | -------------------------------------------------------------------------------- /examples/example-data/dev-poi.txt: -------------------------------------------------------------------------------- 1 | i 'll take it . ||| 5 2 | we want to have a table near the window . ||| 10 3 | how can i help you ? ||| 6 4 | would you , please ? ||| 5 5 | -------------------------------------------------------------------------------- /examples/example-data/fin-toy.txt: -------------------------------------------------------------------------------- 1 | 21 11 7 6 7 22 22 1 24 1 21 22 11 2 | 21 3 13 21 3 14 3 11 21 14 7 10 22 11 7 16 3 | 24 3 14 11 22 22 3 24 3 21 22 3 4 | 12 17 10 3 16 16 3 3 5 | 14 3 11 15 7 16 22 3 15 11 21 7 13 21 11 6 | 20 23 23 13 11 16 13 23 12 3 14 14 3 7 | 27 6 11 16 24 3 13 3 24 3 20 3 11 21 23 23 6 7 16 8 | 8 17 14 13 10 1 14 21 3 16 11 16 9 | 27 17 11 9 17 3 10 | 20 3 21 11 22 22 11 24 3 22 11 | 14 3 23 14 3 15 11 16 7 16 12 | 22 23 17 10 11 14 3 15 15 7 14 22 3 13 | 10 7 14 21 11 16 13 11 14 1 11 21 13 7 11 14 3 3 12 3 14 | 15 3 13 21 3 16 7 11 6 7 16 15 | 21 23 18 7 20 27 13 21 11 13 2 16 16 | 21 27 27 22 22 1 12 1 21 22 1 17 | 18 1 11 10 6 7 17 16 9 7 14 15 11 11 16 18 | 21 11 11 22 17 21 7 16 19 | 13 11 10 3 20 17 11 22 3 20 | 22 23 10 13 3 11 21 7 16 21 | 20 23 15 3 3 16 22 | 21 23 15 15 11 21 21 3 23 | 18 1 11 10 6 7 3 21 11 17 11 21 22 3 24 | 7 21 11 17 22 22 7 14 23 11 21 21 3 3 16 25 | 24 3 11 24 3 23 22 23 16 23 22 22 3 26 | 15 7 16 11 13 2 27 | 4 3 5 13 7 11 14 14 3 28 | 15 11 9 20 7 7 16 11 16 29 | 20 3 13 13 3 23 21 21 23 10 22 7 11 21 21 3 30 | 17 4 12 7 13 22 11 21 22 3 31 | 21 17 21 11 3 14 11 21 22 11 18 20 7 21 11 6 7 16 22 22 11 32 | 22 27 10 12 11 7 16 33 | 21 10 7 16 28 10 7 16 11 14 1 11 16 7 16 34 | 3 14 3 13 3 21 22 11 11 16 35 | 10 3 23 21 13 3 16 18 11 22 17 3 36 | 15 23 17 22 22 11 11 16 37 | 3 20 17 15 3 3 22 22 11 21 7 22 38 | 12 1 20 24 7 16 22 11 7 39 | 17 15 11 21 22 7 11 21 7 14 14 7 40 | 21 23 24 3 11 22 21 7 24 3 11 16 7 16 41 | 10 7 20 13 11 21 22 27 11 16 42 | 22 17 11 15 7 22 22 17 15 3 16 3 43 | 22 3 13 7 20 22 23 23 44 | 22 23 17 22 3 16 22 17 24 3 11 10 7 7 21 21 3 45 | 24 3 18 3 3 13 3 18 18 3 14 7 17 11 13 7 23 21 46 | 22 11 7 22 7 7 16 22 7 13 11 12 1 22 47 | 15 3 23 14 3 24 11 20 22 3 48 | 27 2 18 3 13 13 3 21 11 3 49 | 18 7 14 11 3 12 3 21 21 3 50 | 15 1 16 16 11 13 2 11 21 22 1 51 | -------------------------------------------------------------------------------- /examples/example-data/seg-sup.dev.txt: -------------------------------------------------------------------------------- 1 | a a 0 1 a ||| O:1 O:1 N:2 O:1 2 | x y 1 a ||| O:1 O:1 N:1 O:1 3 | a a a ||| O:1 O:1 O:1 4 | 1 0 1 a ||| N:3 O:1 5 | a 1 a 1 ||| O:1 N:1 O:1 N:1 6 | 0 0 a a 0 0 a a ||| N:2 O:1 O:1 N:2 O:1 O:1 7 | -------------------------------------------------------------------------------- /examples/example-data/textcat.txt: -------------------------------------------------------------------------------- 1 | what is your name ? ||| Q 2 | where do you live ? ||| Q 3 | i live in pittsburgh . ||| D 4 | my name is bond . ||| D 5 | wtf ? she asked . ||| D 6 | did she say " yes . " ? ||| Q 7 | -------------------------------------------------------------------------------- /examples/mlc.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/timing.h" 2 | #include "cnn/nodes.h" 3 | #include "cnn/cnn.h" 4 | #include "cnn/training.h" 5 | #include "cnn/expr.h" 6 | #include "cnn/grad-check.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | using namespace cnn; 16 | using namespace cnn::expr; 17 | 18 | struct TrainingInstance { 19 | TrainingInstance() {} 20 | TrainingInstance(const vector>& x, const vector& y) : feats(x), labels(y) {} 21 | vector> feats; // sparse representation of x vector 22 | vector labels; // labels 23 | }; 24 | 25 | void swap(TrainingInstance& a, TrainingInstance& b) { 26 | using std::swap; 27 | swap(a.feats, b.feats); 28 | swap(a.labels, b.labels); 29 | } 30 | 31 | // X: 0 864:0.0497399 1523:0.0446641 1681:0.0673872 2293:0.0718105 2845:0.0657134 2867:0.0653402 3240:0.0795168 4125:0.0423215 4271:0.0691369 4665:0.0500863 5216:0.252185 5573:0.0672562 5699:0.0594998 5794:0.0737821 6222:0.124501 6592:0.101431 7227:0.194091 7975:0.0766401 32 | // Y: 0 35:1 60:1 94:1 95:1 103:1 33 | vector ReadFiles(const char* xfname, const char* yfname, unsigned& maxfeat, unsigned& maxlabel) { 34 | maxfeat = 0; 35 | maxlabel = 0; 36 | vector d; 37 | ifstream inx(xfname); 38 | assert(inx); 39 | ifstream iny(yfname); 40 | assert(iny); 41 | string linex, liney; 42 | string tok; 43 | while(getline(inx,linex)) { 44 | getline(iny,liney); 45 | 46 | vector> v; 47 | istringstream isx(linex); 48 | isx >> tok; 49 | assert(tok == "0"); 50 | while (isx) { 51 | isx >> tok; 52 | if (!isx) break; 53 | size_t pos = tok.find(':'); 54 | assert(pos != string::npos); 55 | tok[pos] = 0; 56 | unsigned fi = atoi(&tok[0]); 57 | if (fi > maxfeat) maxfeat = fi; 58 | float fv = strtof(&tok[pos+1], 0); 59 | v.emplace_back(make_pair(fi, fv)); 60 | } 61 | vector y; 62 | istringstream isy(liney); 63 | isy >> tok; 64 | assert(tok == "0"); 65 | while (isy) { 66 | isy >> tok; 67 | if (!isy) break; 68 | size_t pos = tok.find(':'); 69 | assert(pos != string::npos); 70 | tok[pos] = 0; 71 | unsigned yi = atoi(&tok[0]); 72 | if (yi > maxlabel) maxlabel = yi; 73 | y.push_back(yi); 74 | } 75 | d.emplace_back(v, y); 76 | } 77 | return d; 78 | } 79 | 80 | struct MLCBuilder { 81 | explicit MLCBuilder(Model& m, unsigned nfeats, unsigned labels) { 82 | unsigned HIDDEN_SIZE = 200; 83 | p_xe = m.add_lookup_parameters(nfeats, {HIDDEN_SIZE}); 84 | p_bh = m.add_parameters({HIDDEN_SIZE}); 85 | p_h2y = m.add_parameters({labels, HIDDEN_SIZE}); 86 | p_by = m.add_parameters({labels}); 87 | } 88 | // output will be a vector of scores that can be 'softmaxed' or 'sparsemaxed' 89 | // into a probability distribution, or it can be compared with a target 90 | // distribution and a loss will be computed 91 | Expression BuildPredictionScores(ComputationGraph& cg, const vector>& feats) const { 92 | vector fe(feats.size() + 1); 93 | unsigned fi = 0; 94 | for (auto& xi : feats) { 95 | fe[fi++] = lookup(cg, p_xe, xi.first) * xi.second; // xi.second is the input feature value 96 | } 97 | fe[fi] = parameter(cg, p_bh); // put bias term at the end 98 | Expression h = tanh(sum(fe)); 99 | Expression h2y = parameter(cg, p_h2y); 100 | Expression by = parameter(cg, p_by); 101 | return affine_transform({by, h2y, h}); 102 | } 103 | LookupParameters* p_xe; 104 | Parameters* p_bh; 105 | Parameters* p_h2y; 106 | Parameters* p_by; 107 | }; 108 | 109 | int main(int argc, char** argv) { 110 | cnn::Initialize(argc, argv); 111 | 112 | if (argc != 5) { 113 | cerr << "Usage: " << argv[0] << " x.train.txt y.train.txt x.dev.txt y.dev.txt\n"; 114 | return 1; 115 | } 116 | vector train, dev; 117 | unsigned max_xi, max_yi, dxi, dyi; 118 | train = ReadFiles(argv[1], argv[2], max_xi, max_yi); 119 | cerr << "Maximum feature index: " << max_xi << endl; 120 | cerr << "Maximum label index: " << max_yi << endl; 121 | dev = ReadFiles(argv[3], argv[4], dxi, dyi); 122 | assert(dxi <= max_xi); 123 | assert(dyi <= max_yi); 124 | max_xi++; 125 | max_yi++; 126 | 127 | // parameters 128 | Model m; 129 | MLCBuilder mlc(m, max_xi, max_yi); 130 | 131 | //AdadeltaTrainer sgd(&m); 132 | SimpleSGDTrainer sgd(&m); 133 | sgd.eta0 = 0.001; 134 | sgd.eta = 0.001; 135 | 136 | unsigned report_every_i = 50; 137 | unsigned si = train.size(); 138 | bool first = true; 139 | vector order(train.size()); 140 | for (unsigned i = 0; i < order.size(); ++i) order[i] = i; 141 | double ti = 0; 142 | while(1) { 143 | Timer iteration("completed in"); 144 | double loss = 0; 145 | unsigned instances = 0; 146 | for (unsigned i = 0; i < report_every_i; ++i) { 147 | if (si == train.size()) { 148 | si = 0; 149 | if (first) { first = false; } else { sgd.update_epoch(); } 150 | cerr << "**SHUFFLE\n"; 151 | shuffle(order.begin(), order.end(), *rndeng); 152 | } 153 | // build graph for this instance 154 | ComputationGraph cg; 155 | auto& xy = train[order[si]]; 156 | ++si; 157 | ++instances; 158 | ++ti; 159 | Expression u = mlc.BuildPredictionScores(cg, xy.feats); 160 | 161 | if (rand01() < 0.004) { 162 | sparsemax(u * 1.5); // this increases sparsity at test time, which Andre found the be useful 163 | vector p = as_vector(cg.incremental_forward()); 164 | for (unsigned j = 0; j < p.size(); ++j) 165 | if (p[j] > 0) cerr << j << ' '; 166 | cerr << " |||"; 167 | for (auto y : xy.labels) 168 | cerr << ' ' << y; 169 | cerr << endl; 170 | } 171 | sparsemax_loss(u, &xy.labels); 172 | loss += as_scalar(cg.forward()); 173 | cg.backward(); 174 | sgd.update(1.0); 175 | } 176 | cerr << "[epoch=" << (ti / train.size()) << "] E=" << (loss / instances) << ' '; 177 | } 178 | } 179 | 180 | -------------------------------------------------------------------------------- /examples/nlm.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/training.h" 4 | #include "cnn/timing.h" 5 | #include "cnn/expr.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace cnn; 14 | using namespace cnn::expr; 15 | 16 | int main(int argc, char** argv) { 17 | cnn::Initialize(argc, argv); 18 | 19 | unsigned CONTEXT = 3; 20 | unsigned DIM = 100; 21 | unsigned VOCAB_SIZE = 29; 22 | 23 | // parameters 24 | Model model; 25 | SimpleSGDTrainer sgd(&model); 26 | LookupParameters* p_c = model.add_lookup_parameters(VOCAB_SIZE, {DIM}); 27 | 28 | ComputationGraph cg; 29 | 30 | vector in_c(CONTEXT); // set these to set the context words 31 | vector c(CONTEXT); 32 | for (int i=0; i> corpus; 56 | string line; 57 | while(getline(in, line)) { 58 | istringstream is(line); 59 | vector x(CONTEXT+1); 60 | for (unsigned i = 0; i <= CONTEXT; ++i) { 61 | is >> x[i]; 62 | assert(x[i] < VOCAB_SIZE); 63 | } 64 | corpus.push_back(x); 65 | } 66 | 67 | 68 | // train the parameters 69 | for (unsigned iter = 0; iter < 100; ++iter) { 70 | Timer iteration("epoch completed in"); 71 | double loss = 0; 72 | unsigned n = 0; 73 | for (auto& ci : corpus) { 74 | copy(ci.begin(), ci.begin()+CONTEXT, in_c.begin()); 75 | ytrue = ci.back(); 76 | loss += as_scalar(cg.forward()); 77 | cg.backward(); 78 | ++n; 79 | sgd.update(1.0); 80 | if (n == 2500) break; 81 | } 82 | loss /= n; 83 | cerr << "E = " << loss << ' '; 84 | } 85 | } 86 | 87 | -------------------------------------------------------------------------------- /examples/read-write.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/training.h" 4 | #include "cnn/gpu-ops.h" 5 | #include "cnn/expr.h" 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace cnn; 14 | using namespace cnn::expr; 15 | 16 | 17 | // This is a sample class which implements the xor model from xor.cc 18 | // Everything in this class is just as you would do the usual except for 19 | // parts with provided comments. 20 | class XORModel { 21 | public: 22 | unsigned hidden_size; 23 | 24 | Expression W, b, V, a; 25 | Parameters *pW, *pb, *pV, *pa; 26 | 27 | // It is important to have a null default constructor for the class, as 28 | // we would first need to read the class object from the file, followed by 29 | // the cnn model which has saved parameters. 30 | XORModel() {} 31 | 32 | XORModel(const unsigned& hidden_len, Model *m) { 33 | hidden_size = hidden_len; 34 | InitParams(m); 35 | } 36 | 37 | void InitParams(Model *m) { 38 | pW = m->add_parameters({hidden_size, 2}); 39 | pb = m->add_parameters({hidden_size}); 40 | pV = m->add_parameters({1, hidden_size}); 41 | pa = m->add_parameters({1}); 42 | } 43 | 44 | void AddParamsToCG(ComputationGraph *cg) { 45 | W = parameter(*cg, pW); 46 | b = parameter(*cg, pb); 47 | V = parameter(*cg, pV); 48 | a = parameter(*cg, pa); 49 | } 50 | 51 | float Train(vector &input, cnn::real &gold_output, 52 | SimpleSGDTrainer *sgd) { 53 | ComputationGraph cg; 54 | AddParamsToCG(&cg); 55 | 56 | Expression x = cnn::expr::input(cg, {(unsigned int)input.size()}, &input); 57 | Expression y = cnn::expr::input(cg, &gold_output); 58 | 59 | Expression h = tanh(W*x + b); 60 | Expression y_pred = V*h + a; 61 | Expression loss = squared_distance(y_pred, y); 62 | float return_loss = as_scalar(cg.forward()); 63 | cg.backward(); 64 | sgd->update(1.0); 65 | return return_loss; 66 | } 67 | 68 | float Decode(vector &input) { 69 | ComputationGraph cg; 70 | AddParamsToCG(&cg); 71 | 72 | Expression x = cnn::expr::input(cg, {(unsigned int)input.size()}, &input); 73 | Expression h = tanh(W*x + b); 74 | Expression y_pred = V*h + a; 75 | return as_scalar(cg.forward()); 76 | } 77 | 78 | // This function should save all those variables in the archive, which 79 | // determine the size of other members of the class, here: hidden_size 80 | friend class boost::serialization::access; 81 | template void serialize(Archive& ar, const unsigned int) { 82 | 83 | // This can either save or read the value of hidden_size from ar, 84 | // depending on whether its the output or input archive. 85 | ar & hidden_size; 86 | } 87 | }; 88 | 89 | void WriteToFile(string& filename, XORModel &model, Model &cnn_model) { 90 | ofstream outfile(filename); 91 | if (!outfile.is_open()) { 92 | cerr << "File opening failed" << endl; 93 | } 94 | 95 | boost::archive::text_oarchive oa(outfile); 96 | oa & model; // Write down your class object. 97 | oa & cnn_model; // Write down the cnn::Model object. 98 | outfile.close(); 99 | } 100 | 101 | void ReadFromFile(string& filename, XORModel *model, Model *cnn_model) { 102 | ifstream infile(filename); 103 | if (!infile.is_open()) { 104 | cerr << "File opening failed" << endl; 105 | } 106 | 107 | boost::archive::text_iarchive ia(infile); 108 | ia & *model; // Read your class object 109 | 110 | // Now determine structure of cnn::Model depending on the 111 | // the structure of your class object 112 | model->InitParams(cnn_model); 113 | ia & *cnn_model; // Read the cnn::Model 114 | 115 | infile.close(); 116 | } 117 | 118 | 119 | int main(int argc, char** argv) { 120 | cnn::Initialize(argc, argv); 121 | 122 | const unsigned HIDDEN = 8; 123 | const unsigned ITERATIONS = 20; 124 | Model m; 125 | SimpleSGDTrainer sgd(&m); 126 | XORModel model(HIDDEN, &m); 127 | 128 | vector x_values(2); // set x_values to change the inputs 129 | cnn::real y_value; // set y_value to change the target output 130 | 131 | // Train the model 132 | for (unsigned iter = 0; iter < ITERATIONS; ++iter) { 133 | double loss = 0; 134 | for (unsigned mi = 0; mi < 4; ++mi) { 135 | bool x1 = mi % 2; 136 | bool x2 = (mi / 2) % 2; 137 | x_values[0] = x1 ? 1 : -1; 138 | x_values[1] = x2 ? 1 : -1; 139 | y_value = (x1 != x2) ? 1 : -1; 140 | loss += model.Train(x_values, y_value, &sgd); 141 | } 142 | loss /= 4; 143 | cerr << "E = " << loss << endl; 144 | } 145 | 146 | string outfile = "out.txt"; 147 | cerr << "Written model to File: " << outfile << endl; 148 | WriteToFile(outfile, model, m); // Writing objects to file 149 | 150 | // New objects in which the written archive will be read 151 | Model read_cnn_model; 152 | XORModel read_model; 153 | 154 | cerr << "Reading model from File: " << outfile << endl; 155 | ReadFromFile(outfile, &read_model, &read_cnn_model); // Reading from file 156 | cerr << "Output for the input: " << x_values[0] << " " << x_values[1] << endl; 157 | cerr << read_model.Decode(x_values); // Checking output for sanity 158 | } 159 | 160 | -------------------------------------------------------------------------------- /examples/rnnlm-mp.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/cnn.h" 2 | #include "cnn/training.h" 3 | #include "cnn/expr.h" 4 | #include "cnn/lstm.h" 5 | #include "cnn/mp.h" 6 | #include "rnnlm.h" 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | /* 15 | TODO: 16 | - The shadow params in the trainers need to be shared. 17 | */ 18 | 19 | using namespace std; 20 | using namespace cnn; 21 | using namespace cnn::expr; 22 | using namespace cnn::mp; 23 | using namespace boost::interprocess; 24 | 25 | typedef vector Datum; 26 | 27 | vector ReadData(string filename) { 28 | vector data; 29 | ifstream fs(filename); 30 | if (!fs.is_open()) { 31 | cerr << "ERROR: Unable to open " << filename << endl; 32 | exit(1); 33 | } 34 | string line; 35 | while (getline(fs, line)) { 36 | data.push_back(ReadSentence(line, &d)); 37 | } 38 | return data; 39 | } 40 | 41 | template 42 | class Learner : public ILearner { 43 | public: 44 | explicit Learner(RNNLanguageModel& rnnlm, unsigned data_size) : rnnlm(rnnlm) {} 45 | ~Learner() {} 46 | 47 | cnn::real LearnFromDatum(const D& datum, bool learn) { 48 | ComputationGraph cg; 49 | rnnlm.BuildLMGraph(datum, cg); 50 | cnn::real loss = as_scalar(cg.forward()); 51 | if (learn) { 52 | cg.backward(); 53 | } 54 | return loss; 55 | } 56 | 57 | void SaveModel() {} 58 | 59 | private: 60 | RNNLanguageModel& rnnlm; 61 | }; 62 | 63 | int main(int argc, char** argv) { 64 | if (argc < 4) { 65 | cerr << "Usage: " << argv[0] << " cores corpus.txt dev.txt [iterations]" << endl; 66 | return 1; 67 | } 68 | srand(time(NULL)); 69 | unsigned num_children = atoi(argv[1]); 70 | assert (num_children <= 64); 71 | vector data = ReadData(argv[2]); 72 | vector dev_data = ReadData(argv[3]); 73 | unsigned num_iterations = (argc >= 5) ? atoi(argv[4]) : UINT_MAX; 74 | unsigned dev_frequency = 5000; 75 | unsigned report_frequency = 10; 76 | 77 | cnn::Initialize(argc, argv, 1, true); 78 | 79 | Model model; 80 | SimpleSGDTrainer sgd(&model, 0.0, 0.2); 81 | //AdagradTrainer sgd(&model, 0.0); 82 | //AdamTrainer sgd(&model, 0.0); 83 | 84 | RNNLanguageModel rnnlm(model); 85 | 86 | Learner learner(rnnlm, data.size()); 87 | RunMultiProcess(num_children, &learner, &sgd, data, dev_data, num_iterations, dev_frequency, report_frequency); 88 | } 89 | -------------------------------------------------------------------------------- /examples/rnnlm.h: -------------------------------------------------------------------------------- 1 | #include "cnn/cnn.h" 2 | #include "cnn/expr.h" 3 | #include "cnn/dict.h" 4 | #include "cnn/lstm.h" 5 | 6 | #include 7 | 8 | using namespace std; 9 | using namespace cnn; 10 | using namespace cnn::expr; 11 | 12 | unsigned LAYERS = 2; 13 | unsigned INPUT_DIM = 8; //256 14 | unsigned HIDDEN_DIM = 24; // 1024 15 | unsigned VOCAB_SIZE = 5500; 16 | 17 | cnn::Dict d; 18 | int kSOS; 19 | int kEOS; 20 | 21 | template 22 | struct RNNLanguageModel { 23 | LookupParameters* p_c; 24 | Parameters* p_R; 25 | Parameters* p_bias; 26 | Builder builder; 27 | explicit RNNLanguageModel(Model& model) : builder(LAYERS, INPUT_DIM, HIDDEN_DIM, &model) { 28 | kSOS = d.Convert(""); 29 | kEOS = d.Convert(""); 30 | p_c = model.add_lookup_parameters(VOCAB_SIZE, {INPUT_DIM}); 31 | p_R = model.add_parameters({VOCAB_SIZE, HIDDEN_DIM}); 32 | p_bias = model.add_parameters({VOCAB_SIZE}); 33 | } 34 | 35 | // return Expression of total loss 36 | Expression BuildLMGraph(const vector& sent, ComputationGraph& cg) { 37 | const unsigned slen = sent.size() - 1; 38 | builder.new_graph(cg); // reset RNN builder for new graph 39 | builder.start_new_sequence(); 40 | Expression i_R = parameter(cg, p_R); // hidden -> word rep parameter 41 | Expression i_bias = parameter(cg, p_bias); // word bias 42 | vector errs; 43 | for (unsigned t = 0; t < slen; ++t) { 44 | Expression i_x_t = lookup(cg, p_c, sent[t]); 45 | // y_t = RNN(x_t) 46 | Expression i_y_t = builder.add_input(i_x_t); 47 | Expression i_r_t = i_bias + i_R * i_y_t; 48 | 49 | // LogSoftmax followed by PickElement can be written in one step 50 | // using PickNegLogSoftmax 51 | Expression i_err = pickneglogsoftmax(i_r_t, sent[t+1]); 52 | errs.push_back(i_err); 53 | } 54 | Expression i_nerr = sum(errs); 55 | return i_nerr; 56 | } 57 | 58 | // return Expression for total loss 59 | void RandomSample(int max_len = 150) { 60 | cerr << endl; 61 | ComputationGraph cg; 62 | builder.new_graph(cg); // reset RNN builder for new graph 63 | builder.start_new_sequence(); 64 | 65 | Expression i_R = parameter(cg, p_R); 66 | Expression i_bias = parameter(cg, p_bias); 67 | vector errs; 68 | int len = 0; 69 | int cur = kSOS; 70 | while(len < max_len && cur != kEOS) { 71 | ++len; 72 | Expression i_x_t = lookup(cg, p_c, cur); 73 | // y_t = RNN(x_t) 74 | Expression i_y_t = builder.add_input(i_x_t); 75 | Expression i_r_t = i_bias + i_R * i_y_t; 76 | 77 | Expression ydist = softmax(i_r_t); 78 | 79 | unsigned w = 0; 80 | while (w == 0 || (int)w == kSOS) { 81 | auto dist = as_vector(cg.incremental_forward()); 82 | double p = rand01(); 83 | for (; w < dist.size(); ++w) { 84 | p -= dist[w]; 85 | if (p < 0.0) { break; } 86 | } 87 | if (w == dist.size()) w = kEOS; 88 | } 89 | cerr << (len == 1 ? "" : " ") << d.Convert(w); 90 | cur = w; 91 | } 92 | cerr << endl; 93 | } 94 | }; 95 | -------------------------------------------------------------------------------- /examples/xor-batch-lookup.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/training.h" 4 | #include "cnn/gpu-ops.h" 5 | #include "cnn/expr.h" 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace cnn; 14 | using namespace cnn::expr; 15 | 16 | int main(int argc, char** argv) { 17 | cnn::Initialize(argc, argv); 18 | 19 | // parameters 20 | const unsigned HIDDEN_SIZE = 8; 21 | const unsigned ITERATIONS = 200; 22 | Model m; 23 | SimpleSGDTrainer sgd(&m); 24 | 25 | ComputationGraph cg; 26 | 27 | Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2})); 28 | Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE})); 29 | Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE})); 30 | Expression a = parameter(cg, m.add_parameters({1})); 31 | 32 | LookupParameters* x_values = m.add_lookup_parameters(4, {2}); 33 | LookupParameters* y_values = m.add_lookup_parameters(4, {1}); 34 | x_values->Initialize(0, {1.0, 1.0}); 35 | x_values->Initialize(1, {-1.0, 1.0}); 36 | x_values->Initialize(2, {1.0, -1.0}); 37 | x_values->Initialize(3, {-1.0, -1.0}); 38 | y_values->Initialize(0, {-1.0}); 39 | y_values->Initialize(1, {1.0}); 40 | y_values->Initialize(2, {1.0}); 41 | y_values->Initialize(3, {-1.0}); 42 | 43 | Expression x = const_lookup(cg, x_values, {0, 1, 2, 3}); 44 | Expression y = const_lookup(cg, y_values, {0, 1, 2, 3}); 45 | 46 | cerr << "x is " << x.value().d << ", y is " << y.value().d << endl; 47 | Expression h = tanh(W*x + b); 48 | //Expression h = softsign(W*x + b); 49 | Expression y_pred = V*h + a; 50 | Expression loss = squared_distance(y_pred, y); 51 | Expression sum_loss = sum_batches(loss); 52 | 53 | cg.PrintGraphviz(); 54 | if (argc == 2) { 55 | ifstream in(argv[1]); 56 | boost::archive::text_iarchive ia(in); 57 | ia >> m; 58 | } 59 | 60 | // train the parameters 61 | for (unsigned iter = 0; iter < ITERATIONS; ++iter) { 62 | vector losses = as_vector(cg.forward()); 63 | cg.backward(); 64 | sgd.update(0.25); 65 | sgd.update_epoch(); 66 | float loss = 0; 67 | for(auto l : losses) 68 | loss += l; 69 | loss /= 4; 70 | cerr << "E = " << loss << endl; 71 | } 72 | //boost::archive::text_oarchive oa(cout); 73 | //oa << m; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /examples/xor-batch.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/training.h" 4 | #include "cnn/gpu-ops.h" 5 | #include "cnn/expr.h" 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace cnn; 14 | using namespace cnn::expr; 15 | 16 | int main(int argc, char** argv) { 17 | cnn::Initialize(argc, argv); 18 | 19 | // parameters 20 | const unsigned HIDDEN_SIZE = 8; 21 | const unsigned ITERATIONS = 200; 22 | Model m; 23 | SimpleSGDTrainer sgd(&m); 24 | 25 | ComputationGraph cg; 26 | 27 | Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2})); 28 | Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE})); 29 | Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE})); 30 | Expression a = parameter(cg, m.add_parameters({1})); 31 | 32 | // set x_values to change the inputs to the network 33 | Dim x_dim({2}, 4), y_dim({1}, 4); 34 | cerr << "x_dim=" << x_dim << ", y_dim=" << y_dim << endl; 35 | vector x_values = {1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, -1.0}; 36 | Expression x = input(cg, x_dim, &x_values); 37 | // set y_values expressing the output 38 | vector y_values = {-1.0, 1.0, 1.0, -1.0}; 39 | Expression y = input(cg, y_dim, &y_values); 40 | 41 | Expression h = tanh(W*x + b); 42 | //Expression h = tanh(affine_transform({b, W, x})); 43 | //Expression h = softsign(W*x + b); 44 | Expression y_pred = V*h + a; 45 | Expression loss = squared_distance(y_pred, y); 46 | Expression sum_loss = sum_batches(loss); 47 | 48 | cg.PrintGraphviz(); 49 | if (argc == 2) { 50 | ifstream in(argv[1]); 51 | boost::archive::text_iarchive ia(in); 52 | ia >> m; 53 | } 54 | 55 | // train the parameters 56 | for (unsigned iter = 0; iter < ITERATIONS; ++iter) { 57 | float my_loss = as_scalar(cg.forward()) / 4; 58 | cg.backward(); 59 | sgd.update(0.25); 60 | sgd.update_epoch(); 61 | cerr << "E = " << my_loss << endl; 62 | } 63 | //boost::archive::text_oarchive oa(cout); 64 | //oa << m; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /examples/xor-xent.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/training.h" 4 | #include "cnn/expr.h" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace cnn; 14 | using namespace cnn::expr; 15 | 16 | int main(int argc, char** argv) { 17 | cnn::Initialize(argc, argv); 18 | 19 | // parameters 20 | const unsigned HIDDEN_SIZE = 8; 21 | Model m; 22 | SimpleSGDTrainer sgd(&m); 23 | //MomentumSGDTrainer sgd(&m); 24 | 25 | ComputationGraph cg; 26 | 27 | Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2})); 28 | Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE})); 29 | Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE})); 30 | Expression a = parameter(cg, m.add_parameters({1})); 31 | 32 | vector x_values(2); // set x_values to change the inputs to the network 33 | Expression x = input(cg, {2}, &x_values); 34 | cnn::real y_value; // set y_value to change the target output 35 | Expression y = input(cg, &y_value); 36 | 37 | Expression h = tanh(W*x + b); 38 | Expression y_pred = logistic(V*h + a); 39 | Expression loss = binary_log_loss(y_pred, y); 40 | 41 | cg.PrintGraphviz(); 42 | //if (argc == 2) { 43 | // ifstream in(argv[1]); 44 | // boost::archive::text_iarchive ia(in); 45 | // ia >> m; 46 | //} 47 | 48 | // train the parameters 49 | for (unsigned iter = 0; iter < 2000; ++iter) { 50 | double loss = 0; 51 | for (unsigned mi = 0; mi < 4; ++mi) { 52 | bool x1 = mi % 2; 53 | bool x2 = (mi / 2) % 2; 54 | x_values[0] = x1 ? 1 : 0; 55 | x_values[1] = x2 ? 1 : 0; 56 | y_value = (x1 != x2) ? 1 : 0; 57 | loss += as_scalar(cg.forward()); 58 | cg.backward(); 59 | sgd.update(1.0); 60 | } 61 | sgd.update_epoch(); 62 | loss /= 4; 63 | cerr << "E = " << loss << endl; 64 | } 65 | boost::archive::text_oarchive oa(cout); 66 | oa << m; 67 | } 68 | 69 | -------------------------------------------------------------------------------- /examples/xor.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/training.h" 4 | #include "cnn/gpu-ops.h" 5 | #include "cnn/expr.h" 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace cnn; 14 | using namespace cnn::expr; 15 | 16 | int main(int argc, char** argv) { 17 | cnn::Initialize(argc, argv); 18 | 19 | // parameters 20 | const unsigned HIDDEN_SIZE = 8; 21 | const unsigned ITERATIONS = 30; 22 | Model m; 23 | SimpleSGDTrainer sgd(&m); 24 | //MomentumSGDTrainer sgd(&m); 25 | 26 | ComputationGraph cg; 27 | 28 | Expression W = parameter(cg, m.add_parameters({HIDDEN_SIZE, 2})); 29 | Expression b = parameter(cg, m.add_parameters({HIDDEN_SIZE})); 30 | Expression V = parameter(cg, m.add_parameters({1, HIDDEN_SIZE})); 31 | Expression a = parameter(cg, m.add_parameters({1})); 32 | 33 | vector x_values(2); // set x_values to change the inputs to the network 34 | Expression x = input(cg, {2}, &x_values); 35 | cnn::real y_value; // set y_value to change the target output 36 | Expression y = input(cg, &y_value); 37 | 38 | Expression h = tanh(W*x + b); 39 | //Expression h = tanh(affine_transform({b, W, x})); 40 | //Expression h = softsign(W*x + b); 41 | Expression y_pred = V*h + a; 42 | Expression loss = squared_distance(y_pred, y); 43 | 44 | cg.PrintGraphviz(); 45 | if (argc == 2) { 46 | ifstream in(argv[1]); 47 | boost::archive::text_iarchive ia(in); 48 | ia >> m; 49 | } 50 | 51 | // train the parameters 52 | for (unsigned iter = 0; iter < ITERATIONS; ++iter) { 53 | double loss = 0; 54 | for (unsigned mi = 0; mi < 4; ++mi) { 55 | bool x1 = mi % 2; 56 | bool x2 = (mi / 2) % 2; 57 | x_values[0] = x1 ? 1 : -1; 58 | x_values[1] = x2 ? 1 : -1; 59 | y_value = (x1 != x2) ? 1 : -1; 60 | loss += as_scalar(cg.forward()); 61 | cg.backward(); 62 | sgd.update(1.0); 63 | } 64 | sgd.update_epoch(); 65 | loss /= 4; 66 | cerr << "E = " << loss << endl; 67 | } 68 | boost::archive::text_oarchive oa(cout); 69 | oa << m; 70 | } 71 | 72 | -------------------------------------------------------------------------------- /pycnn/INSTALL: -------------------------------------------------------------------------------- 1 | See installation instruction in ../INSTALL.md 2 | -------------------------------------------------------------------------------- /pycnn/makefile: -------------------------------------------------------------------------------- 1 | 2 | PYTHON := python 3 | UNAME := $(shell uname) 4 | ifeq ($(UNAME),Darwin) 5 | DYSUF := dylib 6 | else 7 | DYSUF := so 8 | endif 9 | 10 | pycnn.${DYSUF}: ../build/cnn/libcnn_shared.${DYSUF} pycnn.pyx pycnn.pxd setup.py 11 | cp ../build/cnn/libcnn_shared.${DYSUF} . 12 | ${PYTHON} setup.py build_ext --inplace 13 | 14 | gpycnn.${DYSUF}: ../build/cnn/libgcnn_shared.${DYSUF} pycnn.pyx pycnn.pxd setup_gpu.py 15 | cp ../build/cnn/libgcnn_shared.${DYSUF} . 16 | cp ../build/cnn/libcnncuda_shared.${DYSUF} . 17 | cp pycnn.pyx gpycnn.pyx 18 | cp pycnn.pxd gpycnn.pxd 19 | ${PYTHON} setup_gpu.py build_ext --inplace 20 | 21 | ginstall: gpycnn.${DYSUF} 22 | ${PYTHON} setup_gpu.py install --user 23 | 24 | install: pycnn.${DYSUF} 25 | ${PYTHON} setup.py install --user 26 | 27 | clean: 28 | rm *.${DYSUF} *.cpp 29 | 30 | 31 | -------------------------------------------------------------------------------- /pycnn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.extension import Extension 3 | from Cython.Distutils import build_ext 4 | 5 | 6 | # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++. 7 | import distutils.sysconfig 8 | cfg_vars = distutils.sysconfig.get_config_vars() 9 | if "CFLAGS" in cfg_vars: 10 | cfg_vars["CFLAGS"] = cfg_vars["CFLAGS"].replace("-Wstrict-prototypes", "") 11 | 12 | ext = Extension( 13 | "pycnn", # name of extension 14 | ["pycnn.pyx"], # filename of our Pyrex/Cython source 15 | language="c++", # this causes Pyrex/Cython to create C++ source 16 | include_dirs=["../../cnn/", # this is the location of the main cnn directory. 17 | "../../eigen/"], # this is the directory where eigen is saved. 18 | libraries=['cnn_shared'], # ditto 19 | library_dirs=["."], 20 | #extra_link_args=["-L/home/yogo/Vork/Research/cnn/cnn/build/cnn"], # if needed 21 | extra_compile_args=["-std=c++11"], 22 | runtime_library_dirs=["$ORIGIN/./"], 23 | ) 24 | 25 | setup(ext_modules = [ext], 26 | cmdclass = {'build_ext': build_ext}, 27 | name="pyCNN", 28 | ) 29 | -------------------------------------------------------------------------------- /pycnn/setup_gpu.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.extension import Extension 3 | from Cython.Distutils import build_ext 4 | 5 | 6 | # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++. 7 | import distutils.sysconfig 8 | cfg_vars = distutils.sysconfig.get_config_vars() 9 | if "CFLAGS" in cfg_vars: 10 | cfg_vars["CFLAGS"] = cfg_vars["CFLAGS"].replace("-Wstrict-prototypes", "") 11 | 12 | ext = Extension( 13 | "gpycnn", # name of extension 14 | ["gpycnn.pyx"], # filename of our Pyrex/Cython source 15 | language="c++", # this causes Pyrex/Cython to create C++ source 16 | include_dirs=["../../cnn/", # this is the location of the main cnn directory. 17 | "../../eigen/"], # this is the directory where eigen is saved. 18 | #libraries=['cnn','cnncuda'], #,'cnncuda_shared'], # ditto 19 | libraries=['gcnn_shared','cnncuda_shared'], # ditto 20 | library_dirs=["."], 21 | #extra_link_args=["-L/home/yogo/Vork/Research/cnn/cnn/build/cnn"], # if needed 22 | extra_compile_args=["-std=c++11","-fPIC"],#,"-lcudart","-lcublas"], 23 | extra_link_args=["-L/usr/local/cuda-7.5/lib64","-lcudart","-lcublas"], 24 | #extra_objects=["libcnncuda.a"], 25 | ) 26 | 27 | setup(ext_modules = [ext], 28 | cmdclass = {'build_ext': build_ext}, 29 | name="gpyCNN", 30 | ) 31 | -------------------------------------------------------------------------------- /pyexamples/attention.py: -------------------------------------------------------------------------------- 1 | import pycnn as pc 2 | import random 3 | 4 | EOS = "" 5 | characters = list("abcdefghijklmnopqrstuvwxyz ") 6 | characters.append(EOS) 7 | 8 | int2char = list(characters) 9 | char2int = {c:i for i,c in enumerate(characters)} 10 | 11 | VOCAB_SIZE = len(characters) 12 | 13 | LSTM_NUM_OF_LAYERS = 2 14 | EMBEDDINGS_SIZE = 32 15 | STATE_SIZE = 32 16 | ATTENTION_SIZE = 32 17 | 18 | model = pc.Model() 19 | 20 | enc_fwd_lstm = pc.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) 21 | enc_bwd_lstm = pc.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) 22 | 23 | dec_lstm = pc.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE*2, STATE_SIZE, model) 24 | 25 | model.add_lookup_parameters("lookup", (VOCAB_SIZE, EMBEDDINGS_SIZE)) 26 | model.add_parameters("attention_w1", (ATTENTION_SIZE, STATE_SIZE*2)) 27 | model.add_parameters("attention_w2", (ATTENTION_SIZE, STATE_SIZE*LSTM_NUM_OF_LAYERS*2)) 28 | model.add_parameters("attention_v", (1, ATTENTION_SIZE)) 29 | model.add_parameters("decoder_w", (VOCAB_SIZE, STATE_SIZE)) 30 | model.add_parameters("decoder_b", (VOCAB_SIZE)) 31 | 32 | 33 | def embedd_sentence(model, sentence): 34 | sentence = [EOS] + list(sentence) + [EOS] 35 | sentence = [char2int[c] for c in sentence] 36 | 37 | lookup = model["lookup"] 38 | 39 | return [lookup[char] for char in sentence] 40 | 41 | 42 | def run_lstm(model, init_state, input_vecs): 43 | s = init_state 44 | 45 | out_vectors = [] 46 | for vector in input_vecs: 47 | s = s.add_input(vector) 48 | out_vector = s.output() 49 | out_vectors.append(out_vector) 50 | return out_vectors 51 | 52 | 53 | def encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, sentence): 54 | sentence_rev = [sentence[i] for i in range(len(sentence)-1, -1, -1)] 55 | 56 | fwd_vectors = run_lstm(model, enc_fwd_lstm.initial_state(), sentence) 57 | bwd_vectors = run_lstm(model, enc_bwd_lstm.initial_state(), sentence_rev) 58 | bwd_vectors = [bwd_vectors[i] for i in range(len(bwd_vectors)-1, -1, -1)] 59 | vectors = [pc.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)] 60 | 61 | return vectors 62 | 63 | 64 | def attend(model, input_vectors, state): 65 | w1 = pc.parameter(model['attention_w1']) 66 | w2 = pc.parameter(model['attention_w2']) 67 | v = pc.parameter(model['attention_v']) 68 | attention_weights = [] 69 | 70 | w2dt = w2*pc.concatenate(list(state.s())) 71 | for input_vector in input_vectors: 72 | attention_weight = v*pc.tanh(w1*input_vector + w2dt) 73 | attention_weights.append(attention_weight) 74 | attention_weights = pc.softmax(pc.concatenate(attention_weights)) 75 | output_vectors = pc.esum([vector*attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)]) 76 | return output_vectors 77 | 78 | 79 | def decode(model, dec_lstm, vectors, output): 80 | output = [EOS] + list(output) + [EOS] 81 | output = [char2int[c] for c in output] 82 | 83 | w = pc.parameter(model["decoder_w"]) 84 | b = pc.parameter(model["decoder_b"]) 85 | 86 | s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE*2)) 87 | 88 | loss = [] 89 | for char in output: 90 | vector = attend(model, vectors, s) 91 | 92 | s = s.add_input(vector) 93 | out_vector = w * s.output() + b 94 | probs = pc.softmax(out_vector) 95 | loss.append(-pc.log(pc.pick(probs, char))) 96 | loss = pc.esum(loss) 97 | return loss 98 | 99 | 100 | def generate(model, input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): 101 | def sample(probs): 102 | rnd = random.random() 103 | for i, p in enumerate(probs): 104 | rnd -= p 105 | if rnd <= 0: break 106 | return i 107 | 108 | embedded = embedd_sentence(model, input) 109 | encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded) 110 | 111 | w = pc.parameter(model["decoder_w"]) 112 | b = pc.parameter(model["decoder_b"]) 113 | 114 | s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2)) 115 | out = '' 116 | count_EOS = 0 117 | for i in range(len(input)*2): 118 | if count_EOS == 2: break 119 | vector = attend(model, encoded, s) 120 | 121 | s = s.add_input(vector) 122 | out_vector = w * s.output() + b 123 | probs = pc.softmax(out_vector) 124 | probs = probs.vec_value() 125 | next_char = sample(probs) 126 | if int2char[next_char] == EOS: 127 | count_EOS += 1 128 | continue 129 | 130 | out += int2char[next_char] 131 | return out 132 | 133 | 134 | def get_loss(model, input_sentence, output_sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): 135 | pc.renew_cg() 136 | embedded = embedd_sentence(model, input_sentence) 137 | encoded = encode_sentence(model, enc_fwd_lstm, enc_bwd_lstm, embedded) 138 | return decode(model, dec_lstm, encoded, output_sentence) 139 | 140 | 141 | def train(model, sentence): 142 | trainer = pc.SimpleSGDTrainer(model) 143 | for i in xrange(600): 144 | loss = get_loss(model, sentence, sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm) 145 | loss_value = loss.value() 146 | loss.backward() 147 | trainer.update() 148 | if i % 20 == 0: 149 | print loss_value 150 | print generate(model, sentence, enc_fwd_lstm, enc_bwd_lstm, dec_lstm) 151 | 152 | 153 | train(model, "it is working") 154 | 155 | 156 | -------------------------------------------------------------------------------- /pyexamples/bilstmtagger.py: -------------------------------------------------------------------------------- 1 | from pycnn import * 2 | from collections import Counter 3 | import random 4 | 5 | import util 6 | 7 | # format of files: each line is "wordtag", blank line is new sentence. 8 | train_file="/home/yogo/Vork/Research/corpora/pos/WSJ.TRAIN" 9 | test_file="/home/yogo/Vork/Research/corpora/pos/WSJ.TEST" 10 | 11 | 12 | MLP=True 13 | 14 | def read(fname): 15 | sent = [] 16 | for line in file(fname): 17 | line = line.strip().split() 18 | if not line: 19 | if sent: yield sent 20 | sent = [] 21 | else: 22 | w,p = line 23 | sent.append((w,p)) 24 | 25 | train=list(read(train_file)) 26 | test=list(read(test_file)) 27 | words=[] 28 | tags=[] 29 | wc=Counter() 30 | for s in train: 31 | for w,p in s: 32 | words.append(w) 33 | tags.append(p) 34 | wc[w]+=1 35 | words.append("_UNK_") 36 | #words=[w if wc[w] > 1 else "_UNK_" for w in words] 37 | tags.append("_START_") 38 | 39 | for s in test: 40 | for w,p in s: 41 | words.append(w) 42 | 43 | vw = util.Vocab.from_corpus([words]) 44 | vt = util.Vocab.from_corpus([tags]) 45 | UNK = vw.w2i["_UNK_"] 46 | 47 | nwords = vw.size() 48 | ntags = vt.size() 49 | 50 | model = Model() 51 | sgd = SimpleSGDTrainer(model) 52 | 53 | model.add_lookup_parameters("lookup", (nwords, 128)) 54 | model.add_lookup_parameters("tl", (ntags, 30)) 55 | if MLP: 56 | pH = model.add_parameters("HID", (32, 50*2)) 57 | pO = model.add_parameters("OUT", (ntags, 32)) 58 | else: 59 | pO = model.add_parameters("OUT", (ntags, 50*2)) 60 | 61 | builders=[ 62 | LSTMBuilder(1, 128, 50, model), 63 | LSTMBuilder(1, 128, 50, model), 64 | ] 65 | 66 | def build_tagging_graph(words, tags, model, builders): 67 | renew_cg() 68 | f_init, b_init = [b.initial_state() for b in builders] 69 | 70 | wembs = [lookup(model["lookup"], w) for w in words] 71 | wembs = [noise(we,0.1) for we in wembs] 72 | 73 | fw = [x.output() for x in f_init.add_inputs(wembs)] 74 | bw = [x.output() for x in b_init.add_inputs(reversed(wembs))] 75 | 76 | if MLP: 77 | H = parameter(pH) 78 | O = parameter(pO) 79 | else: 80 | O = parameter(pO) 81 | errs = [] 82 | for f,b,t in zip(fw, reversed(bw), tags): 83 | f_b = concatenate([f,b]) 84 | if MLP: 85 | r_t = O*(tanh(H * f_b)) 86 | else: 87 | r_t = O * f_b 88 | err = pickneglogsoftmax(r_t, t) 89 | errs.append(err) 90 | return esum(errs) 91 | 92 | def tag_sent(sent, model, builders): 93 | renew_cg() 94 | f_init, b_init = [b.initial_state() for b in builders] 95 | wembs = [lookup(model["lookup"], vw.w2i.get(w, UNK)) for w,t in sent] 96 | 97 | fw = [x.output() for x in f_init.add_inputs(wembs)] 98 | bw = [x.output() for x in b_init.add_inputs(reversed(wembs))] 99 | 100 | if MLP: 101 | H = parameter(pH) 102 | O = parameter(pO) 103 | else: 104 | O = parameter(pO) 105 | tags=[] 106 | for f,b,(w,t) in zip(fw,reversed(bw),sent): 107 | if MLP: 108 | r_t = O*(tanh(H * concatenate([f,b]))) 109 | else: 110 | r_t = O*concatenate([f,b]) 111 | out = softmax(r_t) 112 | chosen = np.argmax(out.npvalue()) 113 | tags.append(vt.i2w[chosen]) 114 | return tags 115 | 116 | 117 | tagged = loss = 0 118 | for ITER in xrange(50): 119 | random.shuffle(train) 120 | for i,s in enumerate(train,1): 121 | if i % 5000 == 0: 122 | sgd.status() 123 | print loss / tagged 124 | loss = 0 125 | tagged = 0 126 | if i % 10000 == 0: 127 | good = bad = 0.0 128 | for sent in test: 129 | tags = tag_sent(sent, model, builders) 130 | golds = [t for w,t in sent] 131 | for go,gu in zip(golds,tags): 132 | if go == gu: good +=1 133 | else: bad+=1 134 | print good/(good+bad) 135 | ws = [vw.w2i.get(w, UNK) for w,p in s] 136 | ps = [vt.w2i[p] for w,p in s] 137 | sum_errs = build_tagging_graph(ws,ps,model,builders) 138 | squared = -sum_errs# * sum_errs 139 | loss += sum_errs.scalar_value() 140 | tagged += len(ps) 141 | sum_errs.backward() 142 | sgd.update() 143 | 144 | 145 | -------------------------------------------------------------------------------- /pyexamples/cpu_vs_gpu.py: -------------------------------------------------------------------------------- 1 | import gpycnn as G 2 | print 3 | import pycnn as C 4 | 5 | cm = C.Model() 6 | gm = G.Model() 7 | 8 | cpW = cm.add_parameters("W",(1000,1000)) 9 | gpW = gm.add_parameters("W",(1000,1000)) 10 | 11 | def do_cpu(): 12 | C.renew_cg() 13 | W = C.parameter(cpW) 14 | W = W*W*W*W*W*W*W 15 | z = C.squared_distance(W,W) 16 | z.value() 17 | z.backward() 18 | 19 | def do_gpu(): 20 | G.renew_cg() 21 | W = G.parameter(gpW) 22 | W = W*W*W*W*W*W*W 23 | z = G.squared_distance(W,W) 24 | z.value() 25 | z.backward() 26 | 27 | import time 28 | s = time.time() 29 | do_cpu() 30 | print "CPU time:",time.time() - s 31 | 32 | s = time.time() 33 | do_gpu() 34 | print "GPU time:",time.time() - s 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /pyexamples/minibatch.py: -------------------------------------------------------------------------------- 1 | from pycnn import * 2 | import numpy as np 3 | 4 | m = Model() 5 | lp = m.add_lookup_parameters("a",(100,10)) 6 | 7 | # regular lookup 8 | a = lp[1].npvalue() 9 | b = lp[2].npvalue() 10 | c = lp[3].npvalue() 11 | 12 | # batch lookup instead of single elements. 13 | # two ways of doing this. 14 | abc1 = lookup_batch(lp, [1,2,3]) 15 | print abc1.npvalue() 16 | 17 | abc2 = lp.batch([1,2,3]) 18 | print abc2.npvalue() 19 | 20 | print np.hstack([a,b,c]) 21 | 22 | 23 | # use pick and pickneglogsoftmax in batch mode 24 | # (must be used in conjunction with lookup_batch): 25 | print "\nPick" 26 | W = parameter( m.add_parameters("W", (5, 10)) ) 27 | h = W * lp.batch([1,2,3]) 28 | print h.npvalue() 29 | print pick_batch(h,[1,2,3]).npvalue() 30 | print pick(W*lp[1],1).value(), pick(W*lp[2],2).value(), pick(W*lp[3],3).value() 31 | 32 | # using pickneglogsoftmax_batch 33 | print "\nPick neg log softmax" 34 | print (-log(softmax(h))).npvalue() 35 | print pickneglogsoftmax_batch(h,[1,2,3]).npvalue() 36 | -------------------------------------------------------------------------------- /pyexamples/rnnlm.py: -------------------------------------------------------------------------------- 1 | from pycnn import * 2 | import time 3 | import random 4 | 5 | LAYERS = 2 6 | INPUT_DIM = 50 #256 7 | HIDDEN_DIM = 50 #1024 8 | VOCAB_SIZE = 0 9 | 10 | from collections import defaultdict 11 | from itertools import count 12 | import sys 13 | import util 14 | 15 | class RNNLanguageModel: 16 | def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder): 17 | self.m = model 18 | self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model) 19 | 20 | model.add_lookup_parameters("lookup", (VOCAB_SIZE, INPUT_DIM)) 21 | model.add_parameters("R", (VOCAB_SIZE, HIDDEN_DIM)) 22 | model.add_parameters("bias", (VOCAB_SIZE)) 23 | 24 | def BuildLMGraph(self, sent): 25 | renew_cg() 26 | init_state = self.builder.initial_state() 27 | 28 | R = parameter(self.m["R"]) 29 | bias = parameter(self.m["bias"]) 30 | errs = [] # will hold expressions 31 | es=[] 32 | state = init_state 33 | for (cw,nw) in zip(sent,sent[1:]): 34 | # assume word is already a word-id 35 | x_t = lookup(self.m["lookup"], int(cw)) 36 | state = state.add_input(x_t) 37 | y_t = state.output() 38 | r_t = bias + (R * y_t) 39 | err = pickneglogsoftmax(r_t, int(nw)) 40 | errs.append(err) 41 | nerr = esum(errs) 42 | return nerr 43 | 44 | def sample(self, first=1, nchars=0, stop=-1): 45 | res = [first] 46 | renew_cg() 47 | state = self.builder.initial_state() 48 | 49 | R = parameter(self.m["R"]) 50 | bias = parameter(self.m["bias"]) 51 | cw = first 52 | while True: 53 | x_t = lookup(self.m["lookup"], cw) 54 | state = state.add_input(x_t) 55 | y_t = state.output() 56 | r_t = bias + (R * y_t) 57 | ydist = softmax(r_t) 58 | dist = ydist.vec_value() 59 | rnd = random.random() 60 | for i,p in enumerate(dist): 61 | rnd -= p 62 | if rnd <= 0: break 63 | res.append(i) 64 | cw = i 65 | if cw == stop: break 66 | if nchars and len(res) > nchars: break 67 | return res 68 | 69 | if __name__ == '__main__': 70 | train = util.CharsCorpusReader(sys.argv[1],begin="") 71 | vocab = util.Vocab.from_corpus(train) 72 | 73 | VOCAB_SIZE = vocab.size() 74 | 75 | model = Model() 76 | sgd = SimpleSGDTrainer(model) 77 | 78 | #lm = RNNLanguageModel(model, builder=LSTMBuilder) 79 | lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder) 80 | 81 | train = list(train) 82 | 83 | chars = loss = 0.0 84 | for ITER in xrange(100): 85 | random.shuffle(train) 86 | for i,sent in enumerate(train): 87 | _start = time.time() 88 | if i % 50 == 0: 89 | sgd.status() 90 | if chars > 0: print loss / chars, 91 | for _ in xrange(1): 92 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i["\n"]) 93 | print "".join([vocab.i2w[c] for c in samp]).strip() 94 | loss = 0.0 95 | chars = 0.0 96 | 97 | chars += len(sent)-1 98 | isent = [vocab.w2i[w] for w in sent] 99 | errs = lm.BuildLMGraph(isent) 100 | loss += errs.scalar_value() 101 | errs.backward() 102 | sgd.update(1.0) 103 | #print "TM:",(time.time() - _start)/len(sent) 104 | print "ITER",ITER,loss 105 | sgd.status() 106 | sgd.update_epoch(1.0) 107 | -------------------------------------------------------------------------------- /pyexamples/rnnlm_transduce.py: -------------------------------------------------------------------------------- 1 | # a version rnnlm.py using the transduce() interface. 2 | from pycnn import * 3 | import time 4 | import random 5 | 6 | LAYERS = 2 7 | INPUT_DIM = 50 #256 8 | HIDDEN_DIM = 50 #1024 9 | VOCAB_SIZE = 0 10 | 11 | from collections import defaultdict 12 | from itertools import count, izip 13 | import sys 14 | import util 15 | 16 | class RNNLanguageModel: 17 | def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder): 18 | self.m = model 19 | self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model) 20 | 21 | model.add_lookup_parameters("lookup", (VOCAB_SIZE, INPUT_DIM)) 22 | model.add_parameters("R", (VOCAB_SIZE, HIDDEN_DIM)) 23 | model.add_parameters("bias", (VOCAB_SIZE)) 24 | 25 | def BuildLMGraph(self, sent): 26 | renew_cg() 27 | init_state = self.builder.initial_state() 28 | 29 | R = parameter(self.m["R"]) 30 | bias = parameter(self.m["bias"]) 31 | errs = [] # will hold expressions 32 | es=[] 33 | state = init_state 34 | lookup = self.m["lookup"] 35 | inputs = [lookup[int(cw)] for cw in sent[:-1]] 36 | expected_outputs = [int(nw) for nw in sent[1:]] 37 | outputs = state.transduce(inputs) 38 | r_ts = ((bias + (R * y_t)) for y_t in outputs) 39 | errs = [pickneglogsoftmax(r_t, eo) for r_t, eo in izip(r_ts, expected_outputs)] 40 | nerr = esum(errs) 41 | return nerr 42 | 43 | def sample(self, first=1, nchars=0, stop=-1): 44 | # sampling must use the regular incremental interface. 45 | res = [first] 46 | renew_cg() 47 | state = self.builder.initial_state() 48 | 49 | R = parameter(self.m["R"]) 50 | bias = parameter(self.m["bias"]) 51 | cw = first 52 | while True: 53 | x_t = lookup(self.m["lookup"], cw) 54 | state = state.add_input(x_t) 55 | y_t = state.output() 56 | r_t = bias + (R * y_t) 57 | ydist = softmax(r_t) 58 | dist = ydist.vec_value() 59 | rnd = random.random() 60 | for i,p in enumerate(dist): 61 | rnd -= p 62 | if rnd <= 0: break 63 | res.append(i) 64 | cw = i 65 | if cw == stop: break 66 | if nchars and len(res) > nchars: break 67 | return res 68 | 69 | if __name__ == '__main__': 70 | train = util.CharsCorpusReader(sys.argv[1],begin="") 71 | vocab = util.Vocab.from_corpus(train) 72 | 73 | VOCAB_SIZE = vocab.size() 74 | 75 | model = Model() 76 | sgd = SimpleSGDTrainer(model) 77 | 78 | #lm = RNNLanguageModel(model, builder=LSTMBuilder) 79 | lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder) 80 | 81 | train = list(train) 82 | 83 | chars = loss = 0.0 84 | for ITER in xrange(100): 85 | random.shuffle(train) 86 | for i,sent in enumerate(train): 87 | _start = time.time() 88 | if i % 50 == 0: 89 | sgd.status() 90 | if chars > 0: print loss / chars, 91 | for _ in xrange(1): 92 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i["\n"]) 93 | print "".join([vocab.i2w[c] for c in samp]).strip() 94 | loss = 0.0 95 | chars = 0.0 96 | 97 | chars += len(sent)-1 98 | isent = [vocab.w2i[w] for w in sent] 99 | errs = lm.BuildLMGraph(isent) 100 | loss += errs.scalar_value() 101 | errs.backward() 102 | sgd.update(1.0) 103 | #print "TM:",(time.time() - _start)/len(sent) 104 | print "ITER",ITER,loss 105 | sgd.status() 106 | sgd.update_epoch(1.0) 107 | -------------------------------------------------------------------------------- /pyexamples/util.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import count 3 | class Vocab: 4 | def __init__(self, w2i=None): 5 | if w2i is None: w2i = defaultdict(count(0).next) 6 | self.w2i = dict(w2i) 7 | self.i2w = {i:w for w,i in w2i.iteritems()} 8 | @classmethod 9 | def from_corpus(cls, corpus): 10 | w2i = defaultdict(count(0).next) 11 | for sent in corpus: 12 | [w2i[word] for word in sent] 13 | return Vocab(w2i) 14 | 15 | def size(self): return len(self.w2i.keys()) 16 | 17 | class CorpusReader: 18 | def __init__(self, fname): 19 | self.fname = fname 20 | def __iter__(self): 21 | for line in file(self.fname): 22 | line = line.strip().split() 23 | #line = [' ' if x == '' else x for x in line] 24 | yield line 25 | 26 | class CharsCorpusReader: 27 | def __init__(self, fname, begin=None): 28 | self.fname = fname 29 | self.begin = begin 30 | def __iter__(self): 31 | begin = self.begin 32 | for line in file(self.fname): 33 | line = list(line) 34 | if begin: 35 | line = [begin] + line 36 | yield line 37 | -------------------------------------------------------------------------------- /pyexamples/xor.py: -------------------------------------------------------------------------------- 1 | from pycnn import * 2 | 3 | #xsent = True 4 | xsent = False 5 | 6 | HIDDEN_SIZE = 8 7 | ITERATIONS = 2000 8 | 9 | m = Model() 10 | sgd = SimpleSGDTrainer(m) 11 | 12 | m.add_parameters("W",(HIDDEN_SIZE, 2)) 13 | m.add_parameters("b",HIDDEN_SIZE) 14 | m.add_parameters("V",(1, HIDDEN_SIZE)) 15 | m.add_parameters("a",1) 16 | 17 | W = parameter(m["W"]) 18 | b = parameter(m["b"]) 19 | V = parameter(m["V"]) 20 | a = parameter(m["a"]) 21 | 22 | x = vecInput(2) 23 | y = scalarInput(0) 24 | h = tanh((W*x) + b) 25 | if xsent: 26 | y_pred = logistic((V*h) + a) 27 | loss = binary_log_loss(y_pred, y) 28 | T = 1 29 | F = 0 30 | else: 31 | y_pred = (V*h) + a 32 | loss = squared_distance(y_pred, y) 33 | T = 1 34 | F = -1 35 | 36 | 37 | for iter in xrange(ITERATIONS): 38 | mloss = 0.0 39 | for mi in xrange(4): 40 | x1 = mi % 2 41 | x2 = (mi / 2) % 2 42 | x.set([T if x1 else F, T if x2 else F]) 43 | y.set(T if x1 != x2 else F) 44 | #mloss += cg().forward_scalar() 45 | mloss += loss.scalar_value() 46 | #cg().backward() 47 | loss.backward() 48 | sgd.update(1.0) 49 | sgd.update_epoch(); 50 | mloss /= 4. 51 | print "loss: %0.9f" % mloss 52 | 53 | x.set([F,T]) 54 | z = -(-y_pred) 55 | print z.scalar_value() 56 | #print y_pred.scalar() 57 | 58 | renew_cg() 59 | W = parameter(m["W"]) 60 | b = parameter(m["b"]) 61 | V = parameter(m["V"]) 62 | a = parameter(m["a"]) 63 | 64 | x = vecInput(2) 65 | y = scalarInput(0) 66 | h = tanh((W*x) + b) 67 | if xsent: 68 | y_pred = logistic((V*h) + a) 69 | else: 70 | y_pred = (V*h) + a 71 | x.set([T,F]) 72 | print "TF",y_pred.scalar_value() 73 | x.set([F,F]) 74 | print "FF",y_pred.scalar_value() 75 | x.set([T,T]) 76 | print "TT",y_pred.scalar_value() 77 | x.set([F,T]) 78 | print "FT",y_pred.scalar_value() 79 | 80 | -------------------------------------------------------------------------------- /rnnlm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8) 2 | 3 | foreach(TARGET lm) 4 | ADD_EXECUTABLE(${TARGET} ${TARGET}.cc) 5 | target_link_libraries(${TARGET} cnn ${LIBS} pthread) 6 | if(UNIX AND NOT APPLE) 7 | target_link_libraries(${TARGET} rt) 8 | endif() 9 | if (WITH_CUDA_BACKEND) 10 | add_dependencies(${TARGET} cnncuda) 11 | target_link_libraries(${TARGET} cnncuda) 12 | CUDA_ADD_CUBLAS_TO_TARGET(${TARGET}) 13 | endif (WITH_CUDA_BACKEND) 14 | endforeach() 15 | 16 | -------------------------------------------------------------------------------- /rnnlm/README.md: -------------------------------------------------------------------------------- 1 | ### Obtaining LM data 2 | 3 | ./install-examples.sh 4 | 5 | This downloads the data used in this tutorial. 6 | 7 | ### Training example 8 | 9 | Train an LSTM LM using a class-factor softmax: 10 | 11 | ./rnnlm/lm -x -s -t ../rnnlm/ptb-mikolov/train.txt -d ../rnnlm/ptb-mikolov/valid.txt \ 12 | -c ../rnnlm/ptb-mikolov/clusters-mkcls.txt -D 0.3 -H 256 --eta_decay_onset_epoch 10 --eta_decay_rate 0.5 13 | 14 | Train an LSTM LM with a standard softmax: 15 | 16 | ./rnnlm/lm -x -s -t ../rnnlm/ptb-mikolov/train.txt -d ../rnnlm/ptb-mikolov/valid.txt \ 17 | -D 0.3 -H 256 --eta_decay_onset_epoch 10 --eta_decay_rate 0.5 18 | 19 | ### Evaluation example 20 | 21 | Evaluate a trained model: 22 | 23 | ./rnnlm/lm -t ../rnnlm/ptb-mikolov/train.txt -c ../rnnlm/ptb-mikolov/clusters-mkcls.txt \ 24 | -m lm_0.3_2_128_256-pid7865.params -H 256 -p ../rnnlm/ptb-mikolov/test.txt 25 | 26 | ### PTB Baselines 27 | 28 | | Model | dev | test | 29 | | ----- | ---:| ----:| 30 | | 5-gram KN | 188.0 | 178.9 | 31 | | 2x128, dropout=0.3, class-factored softmax | 164.4 | 157.7 | 32 | | 2x256, dropout=0.3, CFSM, decay 0.5@>10 | 129.7 | 125.4 | 33 | -------------------------------------------------------------------------------- /rnnlm/install-examples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | data_version=ptb-mikolov-01.tar.gz 5 | 6 | ROOTDIR=`dirname $0` 7 | cd $ROOTDIR 8 | 9 | rm -f $data_version 10 | rm -rf ptb-mikolov 11 | curl -f http://demo.clab.cs.cmu.edu/cdyer/$data_version -o $data_version 12 | tar xzf $data_version 13 | rm -f $data_version 14 | 15 | echo SUCCESS. 1>&2 16 | 17 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package (Boost COMPONENTS system filesystem unit_test_framework REQUIRED) 2 | include_directories (${TEST_SOURCE_DIR}/src 3 | ${Boost_INCLUDE_DIRS} 4 | ) 5 | 6 | add_definitions (-DBOOST_TEST_DYN_LINK) 7 | 8 | # Sources: 9 | set(test_cnn_SRCS 10 | test-nodes.cc 11 | ) 12 | 13 | add_executable (test-cnn test-cnn.cc ${test_cnn_SRCS}) 14 | target_link_libraries (test-cnn cnn ${LIBS} 15 | ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} 16 | ) 17 | if (WITH_CUDA_BACKEND) 18 | add_dependencies(test-cnn cnncuda) 19 | target_link_libraries(test-cnn cnncuda) 20 | CUDA_ADD_CUBLAS_TO_TARGET(test-cnn) 21 | endif (WITH_CUDA_BACKEND) 22 | 23 | add_test(test-cnn test-cnn) 24 | 25 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | Use `test-cnn.cc` as a reference for how to set up subsequent tests. 2 | -------------------------------------------------------------------------------- /tests/test-cnn.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #define BOOST_TEST_MODULE CNNBasicTest 3 | #include 4 | 5 | struct ConfigureCNNTest { 6 | ConfigureCNNTest() { 7 | // set up some dummy arguments to cnn 8 | for (auto x : {"ConfigureCNNTest", "--cnn-mem", "10"}) { 9 | av.push_back(strdup(x)); 10 | } 11 | char **argv = &av[0]; 12 | int argc = av.size(); 13 | cnn::Initialize(argc, argv); 14 | } 15 | ~ConfigureCNNTest() { 16 | for (auto x : av) free(x); 17 | } 18 | std::vector av; 19 | }; 20 | 21 | // configure CNN 22 | BOOST_GLOBAL_FIXTURE(ConfigureCNNTest); 23 | 24 | BOOST_AUTO_TEST_CASE( aligned_allocator ) { 25 | cnn::CPUAllocator a; 26 | void* mem = a.malloc(1024); 27 | BOOST_CHECK_EQUAL(((unsigned long)(mem) & 0x1f), 0); 28 | ((char*)mem)[0] = 99; 29 | a.zero(mem, 1024); 30 | BOOST_CHECK_EQUAL(((char*)mem)[0], 0); 31 | a.free(mem); 32 | } 33 | 34 | --------------------------------------------------------------------------------