├── cnn ├── aligned-mem-pool.cc ├── graph.h ├── random.h ├── saxe-init.h ├── init.h ├── grad-check.h ├── rnn-state-machine.cc ├── cnn-helper.h ├── dim.cc ├── timing.h ├── saxe-init.cc ├── except.h ├── dict.cc ├── shadow-params.h ├── graph.cc ├── devices.h ├── shadow-params.cc ├── aligned-mem-pool.h ├── rnn-state-machine.h ├── exec.h ├── mem.h ├── gpu-kernels.h ├── gru.h ├── tests │ ├── test_init.cc │ └── test_utils.h ├── deep-lstm.h ├── cuda.h ├── gpu-ops.h ├── mem.cc ├── c2w.h ├── fast-lstm.h ├── lstm.h ├── devices.cc ├── mp.cc ├── hsm-builder.h ├── cfsm-builder.h ├── dict.h ├── init.cc ├── grad-check.cc ├── conv.h ├── dim.h ├── CMakeLists.txt ├── rnn.cc ├── training.h ├── gru.cc ├── tensor.cc ├── param-nodes.h ├── rnn.h ├── exec.cc ├── model.h ├── lstm.cc ├── deep-lstm.cc ├── param-nodes.cc ├── cuda.cc ├── cfsm-builder.cc ├── fast-lstm.cc ├── model.cc ├── simd-functors.h └── hsm-builder.cc ├── EVALB ├── Makefile ├── evalb ├── tgrep_proc.prl ├── LICENSE ├── sample │ ├── sample.tst │ ├── sample.gld │ ├── sample.prm │ └── sample.rsl ├── bug │ ├── bug.rsl-new │ ├── bug.rsl-old │ ├── bug.tst │ └── bug.gld ├── COLLINS.prm ├── COLLINS_ch.prm └── new.prm ├── config.h.cmake ├── impl ├── eval.h ├── pretrained.h ├── CMakeLists.txt ├── eval.cc ├── compressed-fstream.h ├── pretrained.cc ├── oracle.h └── oracle.cc ├── cmake ├── FindCNN.cmake └── FindEigen3.cmake ├── scripts ├── mid2tree.py └── get_dictionary.py ├── CMakeLists.txt └── README.md /cnn/aligned-mem-pool.cc: -------------------------------------------------------------------------------- 1 | #include "aligned-mem-pool.h" 2 | -------------------------------------------------------------------------------- /EVALB/Makefile: -------------------------------------------------------------------------------- 1 | all: evalb 2 | 3 | evalb: evalb.c 4 | gcc -Wall -g -o evalb evalb.c 5 | -------------------------------------------------------------------------------- /EVALB/evalb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeonCrashCode/InOrderParser/HEAD/EVALB/evalb -------------------------------------------------------------------------------- /EVALB/tgrep_proc.prl: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl 2 | 3 | while(<>) 4 | { 5 | if(m/TOP/) #skip lines which are blank 6 | { 7 | print; 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /cnn/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GRAPH_H 2 | #define CNN_GRAPH_H 3 | 4 | namespace cnn { 5 | struct ComputationGraph; 6 | void GraphOptimize(ComputationGraph* cg); 7 | } // namespace cnn 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /cnn/random.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EIGEN_RANDOM_H 2 | #define CNN_EIGEN_RANDOM_H 3 | 4 | #include 5 | 6 | namespace cnn { 7 | 8 | extern std::mt19937* rndeng; 9 | 10 | } // namespace cnn 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /cnn/saxe-init.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_SAXE_INIT_H_ 2 | #define CNN_SAXE_INIT_H_ 3 | 4 | namespace cnn { 5 | 6 | struct Tensor; 7 | 8 | void OrthonormalRandom(unsigned dim, float g, Tensor& x); 9 | 10 | } 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /config.h.cmake: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CONFIG_H_ 2 | #define CNN_CONFIG_H_ 3 | 4 | #cmakedefine WITH_MINERVA_BACKEND @WITH_MINERVA_BACKEND@ 5 | #cmakedefine WITH_THPP_BACKEND @WITH_THPP_BACKEND@ 6 | #cmakedefine WITH_EIGEN_BACKEND @WITH_EIGEN_BACKEND@ 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /cnn/init.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EIGEN_INIT_H 2 | #define CNN_EIGEN_INIT_H 3 | 4 | namespace cnn { 5 | 6 | void Initialize(int& argc, char**& argv, unsigned random_seed = 0, bool shared_parameters = false); 7 | void Cleanup(); 8 | 9 | } // namespace cnn 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /impl/eval.h: -------------------------------------------------------------------------------- 1 | #ifndef NTPARSER_EVAL_H_ 2 | #define NTPARSER_EVAL_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace parser { 8 | 9 | struct EvalBResults { 10 | float p,r,f; 11 | }; 12 | 13 | EvalBResults Evaluate(const std::string& ref_fname, const std::string& hyp_fname); 14 | 15 | } // namespace parser 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /cnn/grad-check.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GRAD_CHECK_H 2 | #define CNN_GRAD_CHECK_H 3 | 4 | namespace cnn { 5 | 6 | class Model; 7 | struct ComputationGraph; 8 | 9 | // verbosity is zero for silence, one for only printing errors, two for everything 10 | bool CheckGrad(Model& m, ComputationGraph& g, int verbosity = 1); 11 | 12 | } // namespace cnn 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /cnn/rnn-state-machine.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/rnn-state-machine.h" 2 | 3 | #include 4 | #include "cnn/cnn.h" 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | void RNNStateMachine::failure(RNNOp op) { 11 | cerr << "State transition error: currently in state " << q_ << " but received operation " << op << endl; 12 | abort(); 13 | } 14 | 15 | } // namespace cnn 16 | 17 | -------------------------------------------------------------------------------- /cmake/FindCNN.cmake: -------------------------------------------------------------------------------- 1 | 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) 3 | 4 | INCLUDE(FindPackageHandleStandardArgs) 5 | 6 | FIND_LIBRARY(TH_LIBRARY TH) 7 | FIND_PATH(TH_INCLUDE_DIR "TH.h" PATHS "${CMAKE_PREFIX_PATH}/include/TH") 8 | 9 | SET(TH_LIBRARIES ${TH_LIBRARY}) 10 | 11 | FIND_PACKAGE_HANDLE_STANDARD_ARGS( 12 | TH 13 | REQUIRED_ARGS 14 | TH_INCLUDE_DIR 15 | TH_LIBRARY) 16 | -------------------------------------------------------------------------------- /impl/pretrained.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_PRETRAINED_H 2 | #define PARSER_PRETRAINED_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace cnn { struct Dict; } 9 | 10 | namespace parser { 11 | 12 | unsigned ReadEmbeddings_word2vec(const std::string& fname, 13 | cnn::Dict* dict, 14 | std::unordered_map>* pretrained); 15 | 16 | } // namespace parser 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /cnn/cnn-helper.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_HELPER_H_ 2 | #define CNN_HELPER_H_ 3 | 4 | #include 5 | 6 | /// helper functions 7 | 8 | namespace cnn { 9 | 10 | /** 11 | this fix a compilation problem in cygwin 12 | */ 13 | #if defined(__CYGWIN__) 14 | template 15 | inline std::string to_string(T value) 16 | { 17 | std::ostringstream os; 18 | os << value; 19 | return os.str(); 20 | } 21 | #endif 22 | 23 | } // namespace cnn 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /impl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8) 2 | 3 | foreach(TARGET 4 | Kparser 5 | Kparser-standard 6 | ) 7 | ADD_EXECUTABLE(${TARGET} ${TARGET}.cc pretrained.cc eval.cc oracle.cc) 8 | target_link_libraries(${TARGET} cnn ${LIBS} pthread boost_iostreams) 9 | if(UNIX AND NOT APPLE) 10 | target_link_libraries(${TARGET} rt) 11 | endif() 12 | if (WITH_CUDA_BACKEND) 13 | add_dependencies(${TARGET} cnncuda) 14 | target_link_libraries(${TARGET} cnncuda) 15 | CUDA_ADD_CUBLAS_TO_TARGET(${TARGET}) 16 | endif (WITH_CUDA_BACKEND) 17 | endforeach() 18 | 19 | -------------------------------------------------------------------------------- /cnn/dim.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/dim.h" 2 | 3 | #include 4 | 5 | using namespace std; 6 | 7 | namespace cnn { 8 | 9 | ostream& operator<<(ostream& os, const Dim& d) { 10 | os << '{'; 11 | for (unsigned i = 0; i < d.nd; ++i) { 12 | if (i) os << ','; 13 | os << d.d[i]; 14 | } 15 | if(d.bd != 1) os << 'X' << d.bd; 16 | return os << '}'; 17 | } 18 | 19 | ostream& operator<<(ostream& os, const vector& ds) { 20 | os << '['; 21 | for (unsigned i = 0; i < ds.size(); ++i) 22 | os << (i ? " " : "") << ds[i]; 23 | return os << ']'; 24 | } 25 | 26 | } // namespace cnn 27 | 28 | -------------------------------------------------------------------------------- /cnn/timing.h: -------------------------------------------------------------------------------- 1 | #ifndef _TIMING_H_ 2 | #define _TIMING_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace cnn { 9 | 10 | struct Timer { 11 | Timer(const std::string& msg) : msg(msg), start(std::chrono::high_resolution_clock::now()) {} 12 | ~Timer() { 13 | auto stop = std::chrono::high_resolution_clock::now(); 14 | std::cerr << '[' << msg << ' ' << std::chrono::duration(stop-start).count() << " ms]\n"; 15 | } 16 | std::string msg; 17 | std::chrono::high_resolution_clock::time_point start; 18 | }; 19 | 20 | } // namespace cnn 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /cnn/saxe-init.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/saxe-init.h" 2 | #include "cnn/tensor.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | using namespace std; 10 | 11 | namespace cnn { 12 | 13 | void OrthonormalRandom(unsigned dd, float g, Tensor& x) { 14 | Tensor t; 15 | t.d = Dim({dd, dd}); 16 | t.v = new float[dd * dd]; 17 | normal_distribution distribution(0, 0.01); 18 | auto b = [&] () {return distribution(*rndeng);}; 19 | generate(t.v, t.v + dd*dd, b); 20 | Eigen::JacobiSVD svd(*t, Eigen::ComputeFullU); 21 | *x = svd.matrixU(); 22 | delete[] t.v; 23 | } 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /scripts/mid2tree.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def tree(acts): 4 | btree = [] 5 | openidx = [] 6 | wid = 0 7 | for act in acts: 8 | if act[0] == 'S': 9 | tmp = act.split() 10 | btree.append("("+tmp[1]+" "+tmp[2]+")") 11 | wid += 1 12 | elif act[0] == 'P': 13 | btree.insert(-1,"("+act[3:-1]) 14 | openidx.append(len(btree)-2) 15 | else: 16 | tmp = " ".join(btree[openidx[-1]:])+")" 17 | btree = btree[:openidx[-1]] 18 | btree.append(tmp) 19 | openidx = openidx[:-1] 20 | print btree[0] 21 | 22 | if __name__ == "__main__": 23 | actions = [] 24 | action = [] 25 | for line in open(sys.argv[1]): 26 | line = line.strip() 27 | if line == "": 28 | actions.append(action[:-1]) 29 | action = [] 30 | else: 31 | action.append(line) 32 | 33 | for i in range(len(actions)): 34 | tree(actions[i]); 35 | -------------------------------------------------------------------------------- /impl/eval.cc: -------------------------------------------------------------------------------- 1 | #include "impl/eval.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | namespace io = boost::iostreams; 11 | 12 | namespace parser { 13 | 14 | EvalBResults Evaluate(const string& ref_fname, const string& hyp_fname) { 15 | string cmd = "echo " + ref_fname + " " + hyp_fname; 16 | cerr << "COMMAND: " << cmd << endl; 17 | FILE *pipe = popen(cmd.c_str(), "r"); 18 | io::stream_buffer fpstream (fileno(pipe), io::never_close_handle); 19 | istream in(&fpstream); 20 | string line; 21 | while(getline(in, line)) { 22 | cerr << "Got line: " << line << endl; 23 | } 24 | fclose(pipe); 25 | EvalBResults r; 26 | return r; 27 | } 28 | 29 | }; 30 | 31 | -------------------------------------------------------------------------------- /cnn/except.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EXCEPT_H_ 2 | #define CNN_EXCEPT_H_ 3 | 4 | #include 5 | 6 | namespace cnn { 7 | 8 | // if CNN exhausts its memory pool 9 | class out_of_memory : public std::runtime_error { 10 | public: 11 | out_of_memory(const std::string& what_arg) : runtime_error(what_arg) {} 12 | }; 13 | 14 | // this error occurs when some logic is 15 | // attempted to execut on a CUDA backend but the 16 | // logic has not been implemented. 17 | class cuda_not_implemented : public std::logic_error { 18 | public: 19 | cuda_not_implemented(const std::string& what_arg) : logic_error(what_arg) {} 20 | }; 21 | 22 | // this is thrown when cuda returns an error (bad arguments, memory, state, etc) 23 | class cuda_exception : public std::runtime_error { 24 | public: 25 | cuda_exception(const std::string& what_arg) : runtime_error(what_arg) {} 26 | }; 27 | 28 | } // namespace cnn 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cnn/dict.cc: -------------------------------------------------------------------------------- 1 | #include "dict.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | namespace cnn { 10 | 11 | std::vector ReadSentence(const std::string& line, Dict* sd) { 12 | std::istringstream in(line); 13 | std::string word; 14 | std::vector res; 15 | while(in) { 16 | in >> word; 17 | if (!in || word.empty()) break; 18 | res.push_back(sd->Convert(word)); 19 | } 20 | return res; 21 | } 22 | 23 | void ReadSentencePair(const std::string& line, std::vector* s, Dict* sd, std::vector* t, Dict* td) { 24 | std::istringstream in(line); 25 | std::string word; 26 | std::string sep = "|||"; 27 | Dict* d = sd; 28 | std::vector* v = s; 29 | while(in) { 30 | in >> word; 31 | if (!in) break; 32 | if (word == sep) { d = td; v = t; continue; } 33 | v->push_back(d->Convert(word)); 34 | } 35 | } 36 | 37 | } // namespace cnn 38 | 39 | -------------------------------------------------------------------------------- /cnn/shadow-params.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_SHADOW_PARAMS_H 2 | #define CNN_SHADOW_PARAMS_H 3 | 4 | #include 5 | #include "cnn/tensor.h" 6 | 7 | // if your learner needs to keep track of an extra set of values (one per 8 | // parameter), use the Shadow classes. this can be used to implement, e.g., 9 | // momentum or adagrad 10 | 11 | namespace cnn { 12 | 13 | class Model; 14 | struct Parameters; 15 | struct LookupParameters; 16 | 17 | struct ShadowParameters { 18 | explicit ShadowParameters(const Parameters& p); 19 | Tensor h; 20 | }; 21 | 22 | struct ShadowLookupParameters { 23 | explicit ShadowLookupParameters(const LookupParameters& lp); 24 | std::vector h; 25 | }; 26 | 27 | // one per element in model.parameters_list 28 | std::vector AllocateShadowParameters(const Model& model); 29 | // one per element in model.lookup_parameters_list 30 | std::vector AllocateShadowLookupParameters(const Model& model); 31 | 32 | } // namespace cnn 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /cnn/graph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/graph.h" 2 | #include "cnn/cnn.h" 3 | #include 4 | #include "cnn/cnn-helper.h" 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | void GraphOptimize(ComputationGraph* cg) { 11 | // topo sort 12 | vector& nodes = cg->nodes; 13 | vector longest_paths(nodes.size()); 14 | for (unsigned i = 0; i < nodes.size(); ++i) { 15 | auto& v = *nodes[i]; // vertex v_i 16 | auto& lp = longest_paths[i]; // distance to v_i 17 | for (auto e : v.args) { 18 | int weight = 0; 19 | if (v.args.size() == 7) weight = 1; 20 | int pte = longest_paths[e] + weight; 21 | if (pte > lp) lp = pte; 22 | } 23 | } 24 | for (unsigned i = 0; i < nodes.size(); ++i) { 25 | vector x; 26 | for (auto e : nodes[i]->args) { 27 | x.push_back(string("x") + to_string(e)); 28 | } 29 | cerr << "LONGEST PATH: " << longest_paths[i] << "\tx" << i << " = " << nodes[i]->as_string(x) << endl; 30 | } 31 | abort();// DEBUGGING 32 | } 33 | 34 | } // namespaiice cnn 35 | -------------------------------------------------------------------------------- /cnn/devices.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DEVICES_H 2 | #define CNN_DEVICES_H 3 | 4 | #include 5 | #include "cnn/aligned-mem-pool.h" 6 | #include "cnn/cuda.h" 7 | 8 | namespace cnn { 9 | 10 | enum class DeviceType {CPU, GPU}; 11 | 12 | class Device { 13 | protected: 14 | Device(DeviceType t, MemAllocator* m) : type(t), mem(m) {} 15 | Device(const Device&) = delete; 16 | Device& operator=(const Device&) = delete; 17 | virtual ~Device(); 18 | public: 19 | DeviceType type; 20 | MemAllocator* mem; 21 | AlignedMemoryPool* fxs; 22 | AlignedMemoryPool* dEdfs; 23 | AlignedMemoryPool* ps; 24 | float* kSCALAR_MINUSONE; 25 | float* kSCALAR_ONE; 26 | float* kSCALAR_ZERO; 27 | std::string name; 28 | }; 29 | 30 | #if HAVE_CUDA 31 | class Device_GPU : public Device { 32 | public: 33 | explicit Device_GPU(int mb, int device_id); 34 | ~Device_GPU(); 35 | int cuda_device_id; 36 | cublasHandle_t cublas_handle; 37 | GPUAllocator gpu_mem; 38 | }; 39 | #endif 40 | 41 | class Device_CPU : public Device { 42 | public: 43 | explicit Device_CPU(int mb, bool shared); 44 | ~Device_CPU(); 45 | CPUAllocator cpu_mem; 46 | MemAllocator* shmem; 47 | }; 48 | 49 | } // namespace cnn 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /impl/compressed-fstream.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_COMPRESSED_FSTREAM_H_ 2 | #define PARSER_COMPRESSED_FSTREAM_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace cnn { 13 | 14 | // acts just like std::ifstream, but decompresses if the filename ends in .gz or .bz2 15 | class compressed_ifstream : public std::istream { 16 | private: 17 | std::ifstream file; 18 | boost::iostreams::filtering_streambuf inbuf; 19 | public: 20 | compressed_ifstream(const std::string& fname) : std::istream(&inbuf), file(fname.c_str()) { 21 | std::size_t pos = fname.rfind('.'); 22 | if (pos != std::string::npos && pos > 0) { 23 | const std::string suf = fname.substr(pos + 1); 24 | if (suf == "gz") { 25 | inbuf.push(boost::iostreams::gzip_decompressor()); 26 | } 27 | else if (suf == "bz2") 28 | inbuf.push(boost::iostreams::bzip2_decompressor()); 29 | } 30 | inbuf.push(file); 31 | } 32 | }; 33 | 34 | }; // namespace cnn 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /cnn/shadow-params.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/cnn.h" 2 | #include "cnn/shadow-params.h" 3 | #include "cnn/tensor.h" 4 | #include "cnn/aligned-mem-pool.h" 5 | #include "cnn/model.h" 6 | 7 | using namespace std; 8 | 9 | namespace cnn { 10 | 11 | ShadowParameters::ShadowParameters(const Parameters& p) : h(p.values) { 12 | h.v = (float*)default_device->mem->malloc(h.d.size() * sizeof(float)); 13 | TensorTools::Zero(h); 14 | } 15 | 16 | ShadowLookupParameters::ShadowLookupParameters(const LookupParameters& lp) : h(lp.values) { 17 | for (auto& t : h) { 18 | t.v = (float*)default_device->mem->malloc(t.d.size() * sizeof(float)); 19 | TensorTools::Zero(t); 20 | } 21 | } 22 | 23 | vector AllocateShadowParameters(const Model& m) { 24 | vector v; 25 | v.reserve(m.parameters_list().size()); 26 | for (auto& p : m.parameters_list()) 27 | v.emplace_back(*p); 28 | return v; 29 | } 30 | 31 | vector AllocateShadowLookupParameters(const Model& m) { 32 | vector v; 33 | v.reserve(m.lookup_parameters_list().size()); 34 | for (auto& p : m.lookup_parameters_list()) 35 | v.emplace_back(*p); 36 | return v; 37 | } 38 | 39 | } // namespace cnn 40 | 41 | -------------------------------------------------------------------------------- /EVALB/LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /cnn/aligned-mem-pool.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_ALIGNED_MEM_POOL_H 2 | #define CNN_ALIGNED_MEM_POOL_H 3 | 4 | #include 5 | #include "cnn/mem.h" 6 | 7 | namespace cnn { 8 | 9 | class AlignedMemoryPool { 10 | public: 11 | explicit AlignedMemoryPool(size_t cap, MemAllocator* a) : a(a) { 12 | sys_alloc(cap); 13 | zero_all(); 14 | } 15 | 16 | void* allocate(size_t n) { 17 | auto rounded_n = a->round_up_align(n); 18 | if (rounded_n + used > capacity) { 19 | std::cerr << "cnn is out of memory, try increasing with --cnn-mem\n"; 20 | abort(); 21 | } 22 | void* res = static_cast(mem) + used; 23 | used += rounded_n; 24 | return res; 25 | } 26 | void free() { 27 | //std::cerr << "freeing " << used << " bytes\n"; 28 | used = 0; 29 | } 30 | // zeros out the amount of allocations 31 | void zero_allocated_memory() { 32 | if (used == 0) return; 33 | a->zero(mem, used); 34 | } 35 | 36 | bool is_shared() { 37 | return shared; 38 | } 39 | private: 40 | void sys_alloc(size_t cap) { 41 | capacity = a->round_up_align(cap); 42 | //std::cerr << "Allocating " << capacity << " ...\n"; 43 | mem = a->malloc(capacity); 44 | if (!mem) { std::cerr << "Failed to allocate " << capacity << std::endl; abort(); } 45 | used = 0; 46 | } 47 | void zero_all() { 48 | a->zero(mem, capacity); 49 | } 50 | size_t capacity; 51 | size_t used; 52 | bool shared; 53 | MemAllocator* a; 54 | void* mem; 55 | }; 56 | 57 | } // namespace cnn 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /cnn/rnn-state-machine.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_RNN_STATE_MACHINE_H_ 2 | #define CNN_RNN_STATE_MACHINE_H_ 3 | 4 | namespace cnn { 5 | 6 | // CURRENT STATE | ACTION | NEXT STATE 7 | // --------------+---------------------+----------------- 8 | // CREATED | new_graph | GRAPH_READY 9 | // GRAPH_READY | start_new_sequence | READING_INPUT 10 | // READING_INPUT | add_input | READING_INPUT 11 | // READING_INPUT | start_new_seqeunce | READING_INPUT 12 | // READING_INPUT | new_graph | GRAPH_READY 13 | 14 | enum RNNState {CREATED, GRAPH_READY, READING_INPUT}; 15 | enum RNNOp {new_graph, start_new_sequence, add_input}; 16 | 17 | class RNNStateMachine { 18 | public: 19 | RNNStateMachine() : q_(RNNState::CREATED) {} 20 | void failure(RNNOp op); 21 | void transition(RNNOp op) { 22 | switch (q_) { 23 | case RNNState::CREATED: 24 | if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; } 25 | failure(op); 26 | case RNNState::GRAPH_READY: 27 | if (op == RNNOp::new_graph) { break; } 28 | if (op == RNNOp::start_new_sequence) { q_ = RNNState::READING_INPUT; break; } 29 | failure(op); 30 | case RNNState::READING_INPUT: 31 | if (op == RNNOp::add_input) { break; } 32 | if (op == RNNOp::start_new_sequence) { break; } 33 | if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; } 34 | failure(op); 35 | } 36 | } 37 | private: 38 | RNNState q_; 39 | }; 40 | 41 | } // namespace cnn 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /impl/pretrained.cc: -------------------------------------------------------------------------------- 1 | #include "impl/pretrained.h" 2 | 3 | #include 4 | #include "cnn/dict.h" 5 | #include "impl/compressed-fstream.h" 6 | 7 | using namespace std; 8 | using namespace cnn; 9 | 10 | namespace parser { 11 | 12 | unsigned ReadEmbeddings_word2vec(const string& fname, 13 | Dict* dict, 14 | unordered_map>* pretrained) { 15 | cerr << "Reading pretrained embeddings from " << fname << " ...\n"; 16 | compressed_ifstream in(fname); 17 | string line; 18 | getline(in, line); 19 | bool bad = false; 20 | int spaces = 0; 21 | for (auto c : line) { 22 | if (c == ' ' || c == '\t') ++spaces; 23 | else if (c < '0' || c > '9') bad = true; 24 | } 25 | if (spaces != 1 || bad) { 26 | cerr << "File does not seem to be in word2vec format\n"; 27 | abort(); 28 | } 29 | istringstream iss(line); 30 | unsigned nwords = 0, dims = 0; 31 | iss >> nwords >> dims; 32 | cerr << " file reports " << nwords << " words with " << dims << " dims\n"; 33 | unsigned lc = 1; 34 | string word; 35 | while(getline(in, line)) { 36 | ++lc; 37 | vector v(dims); 38 | istringstream iss(line); 39 | iss >> word; 40 | unsigned wordid = dict->Convert(word); 41 | for (unsigned i = 0; i < dims; ++i) 42 | iss >> v[i]; 43 | (*pretrained)[wordid] = v; 44 | } 45 | if ((lc-1) != nwords) { 46 | cerr << "[WARNING] mismatched number of words reported and loaded\n"; 47 | } 48 | cerr << " done.\n"; 49 | return dims; 50 | } 51 | 52 | } // namespace parser 53 | -------------------------------------------------------------------------------- /cnn/exec.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_EXEC_H 2 | #define CNN_EXEC_H 3 | 4 | #include "cnn/cnn.h" 5 | 6 | namespace cnn { 7 | 8 | class ExecutionEngine { 9 | public: 10 | virtual ~ExecutionEngine(); 11 | virtual void invalidate() = 0; 12 | virtual const Tensor& forward() = 0; 13 | virtual const Tensor& forward(VariableIndex i) = 0; 14 | virtual const Tensor& incremental_forward() = 0; // if you want to add nodes and evaluate just the new parts 15 | virtual const Tensor& incremental_forward(VariableIndex i) = 0; 16 | virtual const Tensor& get_value(VariableIndex i) = 0; 17 | virtual void backward() = 0; 18 | virtual void backward(VariableIndex i) = 0; 19 | protected: 20 | explicit ExecutionEngine(const ComputationGraph& cg) : cg(cg) {} 21 | const ComputationGraph& cg; 22 | }; 23 | 24 | class SimpleExecutionEngine : public ExecutionEngine { 25 | public: 26 | explicit SimpleExecutionEngine(const ComputationGraph& cg) : ExecutionEngine(cg) {} 27 | void invalidate() override; 28 | const Tensor& forward() override; 29 | const Tensor& forward(VariableIndex i) override; 30 | const Tensor& incremental_forward() override; // if you want to add nodes and evaluate just the new parts 31 | const Tensor& incremental_forward(VariableIndex i) override; 32 | const Tensor& get_value(VariableIndex i) override; 33 | void backward() override; 34 | void backward(VariableIndex i) override; 35 | private: 36 | std::vector nfxs; 37 | std::vector ndEdfs; 38 | VariableIndex num_nodes_evaluated; 39 | }; 40 | 41 | } // namespace cnn 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /EVALB/sample/sample.tst: -------------------------------------------------------------------------------- 1 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 2 | (S (A (P this)) (B (Q is) (C (R a) (T test)))) 3 | (S (A (P this)) (B (Q is) (A (R a) (U test)))) 4 | (S (C (P this)) (B (Q is) (A (R a) (U test)))) 5 | (S (A (P this)) (B (Q is) (R a) (A (T test)))) 6 | (S (A (P this) (Q is)) (A (R a) (T test))) 7 | (S (P this) (Q is) (R a) (T test)) 8 | (P this) (Q is) (R a) (T test) 9 | (S (A (P this)) (B (Q is) (A (A (R a) (T test))))) 10 | (S (A (P this)) (B (Q is) (A (A (A (A (A (R a) (T test)))))))) 11 | 12 | (S (A (P this)) (B (Q was) (A (A (R a) (T test))))) 13 | (S (A (P this)) (B (Q is) (U not) (A (A (R a) (T test))))) 14 | 15 | (TOP (S (A (P this)) (B (Q is) (A (R a) (T test))))) 16 | (S (A (P this)) (NONE *) (B (Q is) (A (R a) (T test)))) 17 | (S (A (P this)) (S (NONE abc) (A (NONE *))) (B (Q is) (A (R a) (T test)))) 18 | (S (A (P this)) (B (Q is) (A (R a) (TT test)))) 19 | (S (A (P This)) (B (Q is) (A (R a) (T test)))) 20 | (S (A (P That)) (B (Q is) (A (R a) (T test)))) 21 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test)))) 23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *)) 24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *)) 25 | -------------------------------------------------------------------------------- /cnn/mem.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_MEM_H 2 | #define CNN_MEM_H 3 | 4 | #include 5 | 6 | namespace cnn { 7 | 8 | // allocates memory from the device (CPU, GPU) 9 | // only used to create the memory pools 10 | // creates alignment appropriate for that device 11 | struct MemAllocator { 12 | explicit MemAllocator(int align) : align(align) {} 13 | MemAllocator(const MemAllocator&) = delete; 14 | MemAllocator& operator=(const MemAllocator&) = delete; 15 | virtual ~MemAllocator(); 16 | virtual void* malloc(std::size_t n) = 0; 17 | virtual void free(void* mem) = 0; 18 | virtual void zero(void* p, std::size_t n) = 0; 19 | inline std::size_t round_up_align(std::size_t n) const { 20 | if (align < 2) return n; 21 | return ((n + align - 1) / align) * align; 22 | } 23 | const int align; 24 | }; 25 | 26 | struct CPUAllocator : public MemAllocator { 27 | CPUAllocator() : MemAllocator(32) {} 28 | void* malloc(std::size_t n) override; 29 | void free(void* mem) override; 30 | void zero(void* p, std::size_t n) override; 31 | }; 32 | 33 | struct SharedAllocator : public MemAllocator { 34 | SharedAllocator() : MemAllocator(32) {} 35 | void* malloc(std::size_t n) override; 36 | void free(void* mem) override; 37 | void zero(void* p, std::size_t n) override; 38 | }; 39 | 40 | #if HAVE_CUDA 41 | struct GPUAllocator : public MemAllocator { 42 | explicit GPUAllocator(int devid) : MemAllocator(256), devid(devid) {} 43 | void* malloc(std::size_t n) override; 44 | void free(void* mem) override; 45 | void zero(void* p, std::size_t n) override; 46 | const int devid; 47 | }; 48 | #endif 49 | 50 | } // namespace cnn 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /cnn/gpu-kernels.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GPU_KERNELS_H 2 | #define CNN_GPU_KERNELS_H 3 | 4 | #include "cnn/cuda.h" 5 | 6 | namespace cnn { 7 | namespace gpu { 8 | 9 | template 10 | __global__ void unaryExprKernel(int n, const float* x, float* y, Func func) { 11 | int i = threadIdx.x + blockIdx.x * blockDim.x; 12 | while (i < n) { 13 | y[i] = func(x[i]); 14 | i += gridDim.x * blockDim.x; 15 | } 16 | } 17 | 18 | template 19 | __global__ void accUnaryExprKernel(int n, const float* x, float* y, Func func) { 20 | int i = threadIdx.x + blockIdx.x * blockDim.x; 21 | while (i < n) { 22 | y[i] += func(x[i]); 23 | i += gridDim.x * blockDim.x; 24 | } 25 | } 26 | 27 | template 28 | __global__ void binaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) { 29 | int i = threadIdx.x + blockIdx.x * blockDim.x; 30 | while (i < n) { 31 | y[i] = func(x0[i], x1[i]); 32 | i += gridDim.x * blockDim.x; 33 | } 34 | } 35 | 36 | template 37 | __global__ void accBinaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) { 38 | int i = threadIdx.x + blockIdx.x * blockDim.x; 39 | while (i < n) { 40 | y[i] += func(x0[i], x1[i]); 41 | i += gridDim.x * blockDim.x; 42 | } 43 | } 44 | 45 | template 46 | __global__ void slowReduceKernel(int n, const float* x0, const float* x1, float* y, Func func) { 47 | float ty = 0; 48 | // THIS IS BAD - FIX THIS TO MAKE IT FAST 49 | for (int i = 0; i < n; ++i) 50 | ty += func(x0[i], x1[i]); 51 | y[0] = ty; 52 | } 53 | 54 | } // namespace gpu 55 | } // namespace cnn 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /EVALB/sample/sample.gld: -------------------------------------------------------------------------------- 1 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 2 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 3 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 4 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 5 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 6 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 7 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 8 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 9 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 10 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 11 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 12 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 13 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 14 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 15 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 16 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 17 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 18 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 19 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 20 | (S (A (P this)) (B (Q is) (A (R a) (T test)))) 21 | (S (A-SBJ-1 (P this)) (B-WHATEVER (Q is) (A (R a) (T test)))) 22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test)))) 23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *)) 24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *)) 25 | -------------------------------------------------------------------------------- /cnn/gru.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GRU_H_ 2 | #define CNN_GRU_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | 7 | namespace cnn { 8 | 9 | class Model; 10 | 11 | struct GRUBuilder : public RNNBuilder { 12 | GRUBuilder() = default; 13 | explicit GRUBuilder(unsigned layers, 14 | unsigned input_dim, 15 | unsigned hidden_dim, 16 | Model* model); 17 | Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } 18 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 19 | std::vector final_s() const override { return final_h(); } 20 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 21 | std::vector get_s(RNNPointer i) const override { return get_h(i); } 22 | unsigned num_h0_components() const override { return layers; } 23 | void copy(const RNNBuilder & params) override; 24 | 25 | protected: 26 | void new_graph_impl(ComputationGraph& cg) override; 27 | void start_new_sequence_impl(const std::vector& h0) override; 28 | Expression add_input_impl(int prev, const Expression& x) override; 29 | 30 | // first index is layer, then ... 31 | std::vector> params; 32 | 33 | // first index is layer, then ... 34 | std::vector> param_vars; 35 | 36 | // first index is time, second is layer 37 | std::vector> h; 38 | 39 | // initial values of h at each layer 40 | // - default to zero matrix input 41 | std::vector h0; 42 | 43 | unsigned hidden_dim; 44 | unsigned layers; 45 | }; 46 | 47 | } // namespace cnn 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /cnn/tests/test_init.cc: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "CNNInit" 3 | #include 4 | 5 | #include 6 | 7 | #include "cnn/tests/test_utils.h" 8 | #include "cnn/tensor.h" 9 | #include "cnn/saxe-init.h" 10 | 11 | using namespace std; 12 | using namespace cnn; 13 | 14 | BOOST_GLOBAL_FIXTURE(TestTensorSetup) 15 | 16 | BOOST_AUTO_TEST_CASE(EOrthonormalRandom) 17 | { 18 | for (int d = 4; d < 128; d += 2) { 19 | Tensor Q = OrthonormalRandom(d, 1.0); 20 | // BOOST_REQUIRE_EQUAL(size(Q), Dim({d,d})); 21 | 22 | // check that this is actually returning orthogonal matrices 23 | #if MINERVA_BACKEND 24 | Tensor I = Q.Trans() * Q; 25 | #endif 26 | #if THPP_BACKEND 27 | Tensor QT = Q; 28 | QT.transpose(); 29 | //cerr << str(Q) << endl << str(QT) << endl; 30 | Tensor I = Zero({d,d}); 31 | I.addmm(0, 1, Q, QT); 32 | //cerr << str(I) << endl; 33 | #endif 34 | #if EIGEN_BACKEND 35 | Tensor I = Q.transpose() * Q; 36 | #endif 37 | double eps = 1e-1; 38 | for (int i = 0; i < d; ++i) 39 | for (int j = 0; j < d; ++j) 40 | BOOST_CHECK_CLOSE(t(I,i,j) + 1., (i == j ? 2. : 1.), eps); 41 | } 42 | cerr << "Finished\n"; 43 | } 44 | 45 | BOOST_AUTO_TEST_CASE(BernoulliInit) { 46 | Tensor r = RandomBernoulli(Dim({1000,1000}), 0.5f); 47 | int tot = 0; 48 | for (int i = 0; i < 1000; ++i) 49 | for (int j = 0; j < 1000; ++j) 50 | if (t(r,i,j)) ++tot; 51 | BOOST_CHECK_GT(tot, 490000); 52 | BOOST_CHECK_LT(tot, 510000); 53 | } 54 | 55 | BOOST_AUTO_TEST_CASE(Rand01) { 56 | cnn::real tot = 0; 57 | for (unsigned i = 0; i < 1000000; ++i) 58 | tot += cnn::rand01(); 59 | BOOST_CHECK_GT(tot, 490000.); 60 | BOOST_CHECK_LT(tot, 510000.); 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /EVALB/bug/bug.rsl-new: -------------------------------------------------------------------------------- 1 | Sent. Matched Bracket Cross Correct Tag 2 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy 3 | ============================================================================ 4 | 1 37 0 77.27 65.38 17 22 26 5 34 27 79.41 5 | 2 21 0 69.23 64.29 9 13 14 2 20 16 80.00 6 | 3 47 0 80.00 82.35 28 35 34 4 44 40 90.91 7 | 4 26 0 35.29 37.50 6 17 16 8 25 18 72.00 8 | 5 44 0 42.31 33.33 11 26 33 17 38 28 73.68 9 | ============================================================================ 10 | 62.83 57.72 71 113 123 0 161 129 80.12 11 | === Summary === 12 | 13 | -- All -- 14 | Number of sentence = 5 15 | Number of Error sentence = 0 16 | Number of Skip sentence = 0 17 | Number of Valid sentence = 5 18 | Bracketing Recall = 62.83 19 | Bracketing Precision = 57.72 20 | Bracketing FMeasure = 60.17 21 | Complete match = 0.00 22 | Average crossing = 7.20 23 | No crossing = 0.00 24 | 2 or less crossing = 20.00 25 | Tagging accuracy = 80.12 26 | 27 | -- len<=40 -- 28 | Number of sentence = 3 29 | Number of Error sentence = 0 30 | Number of Skip sentence = 0 31 | Number of Valid sentence = 3 32 | Bracketing Recall = 61.54 33 | Bracketing Precision = 57.14 34 | Bracketing FMeasure = 59.26 35 | Complete match = 0.00 36 | Average crossing = 5.00 37 | No crossing = 0.00 38 | 2 or less crossing = 33.33 39 | Tagging accuracy = 77.22 40 | -------------------------------------------------------------------------------- /cnn/deep-lstm.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DEEP_LSTM_H_ 2 | #define CNN_DEEP_LSTM_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | struct DeepLSTMBuilder : public RNNBuilder { 15 | DeepLSTMBuilder() = default; 16 | explicit DeepLSTMBuilder(unsigned layers, 17 | unsigned input_dim, 18 | unsigned hidden_dim, 19 | Model* model); 20 | 21 | Expression back() const override { return h.back().back(); } 22 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 23 | std::vector final_s() const override { 24 | std::vector ret = (c.size() == 0 ? c0 : c.back()); 25 | for(auto my_h : final_h()) ret.push_back(my_h); 26 | return ret; 27 | } 28 | protected: 29 | void new_graph_impl(ComputationGraph& cg) override; 30 | void start_new_sequence_impl(const std::vector& h0) override; 31 | Expression add_input_impl(int prev, const Expression& x) override; 32 | 33 | public: 34 | // first index is layer, then ... 35 | std::vector> params; 36 | 37 | // first index is layer, then ... 38 | std::vector> param_vars; 39 | 40 | // first index is time, second is layer 41 | std::vector> h, c; 42 | std::vector o; 43 | 44 | // initial values of h and c at each layer 45 | // - both default to zero matrix input 46 | bool has_initial_state; // if this is false, treat h0 and c0 as 0 47 | std::vector h0; 48 | std::vector c0; 49 | unsigned layers; 50 | }; 51 | 52 | } // namespace cnn 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /cnn/tests/test_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_TEST_UTILS_H_ 2 | #define CNN_TEST_UTILS_H_ 3 | 4 | #include "cnn/tensor.h" 5 | 6 | namespace cnn { 7 | 8 | #if WITH_MINERVA_BACKEND 9 | 10 | struct TestTensorSetup { 11 | TestTensorSetup() { 12 | int argc = 1; 13 | char* foo = "foo"; 14 | char** argv = {&foo}; 15 | minerva::MinervaSystem::Initialize(&argc, &argv); 16 | #if HAS_CUDA 17 | minerva::MinervaSystem::Instance().device_manager().CreateGpuDevice(0); 18 | #else 19 | minerva::MinervaSystem::Instance().device_manager().CreateCpuDevice(); 20 | #endif 21 | } 22 | }; 23 | 24 | double t(const Tensor& T, unsigned i, unsigned j) { 25 | int m = T.Size(0); 26 | return T.Get().get()[j * m + i]; 27 | } 28 | 29 | std::ostream& operator<<(std::ostream& os, const Tensor& T) { 30 | if (T.Size().NumDims() == 2) { 31 | int m = T.Size(0); 32 | int n = T.Size(1); 33 | for (int i = 0; i < m; ++i) { 34 | for (int j = 0; j < n; ++j) { 35 | os << '\t' << t(T,i,j); 36 | } 37 | os << std::endl; 38 | } 39 | return os; 40 | } else { 41 | os << T.Size() << ": "; 42 | minerva::FileFormat ff; ff.binary = false; 43 | T.ToStream(os, ff); 44 | return os; 45 | } 46 | } 47 | 48 | #else 49 | 50 | struct TestTensorSetup { 51 | TestTensorSetup() { 52 | int argc = 1; 53 | char* p = "foo"; 54 | char** argv = {&p}; 55 | cnn::Initialize(argc, argv); 56 | } 57 | }; 58 | 59 | double t(const Tensor& T, unsigned i, unsigned j) { 60 | #if WITH_THPP_BACKEND 61 | return T.at({i,j}); 62 | #else 63 | return T(i, j); 64 | #endif 65 | } 66 | 67 | double t(const Tensor& T, unsigned i) { 68 | #if WITH_THPP_BACKEND 69 | return T.at({i}); 70 | #else 71 | return T(i, 0); 72 | #endif 73 | } 74 | 75 | #endif 76 | 77 | } // namespace cnn 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /cnn/cuda.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CUDA_H 2 | #define CNN_CUDA_H 3 | #if HAVE_CUDA 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "cnn/except.h" 14 | 15 | #define CUDA_CHECK(stmt) do { \ 16 | cudaError_t err = stmt; \ 17 | if (err != cudaSuccess) { \ 18 | std::cerr << "CUDA failure in " << #stmt << std::endl\ 19 | << cudaGetErrorString(err) << std::endl; \ 20 | throw cnn::cuda_exception(#stmt); \ 21 | } \ 22 | } while(0) 23 | 24 | #define CUBLAS_CHECK(stmt) do { \ 25 | cublasStatus_t stat = stmt; \ 26 | if (stat != CUBLAS_STATUS_SUCCESS) { \ 27 | std::cerr << "CUBLAS failure in " << #stmt \ 28 | << std::endl << stat << std::endl; \ 29 | throw cnn::cuda_exception(#stmt); \ 30 | } \ 31 | } while(0) 32 | 33 | namespace cnn { 34 | 35 | struct Device; 36 | 37 | inline std::pair SizeToBlockThreadPair(int n) { 38 | assert(n); 39 | int logn; 40 | asm("\tbsr %1, %0\n" 41 | : "=r"(logn) 42 | : "r" (n-1)); 43 | logn = logn > 9 ? 9 : (logn < 4 ? 4 : logn); 44 | ++logn; 45 | int threads = 1 << logn; 46 | int blocks = (n + threads - 1) >> logn; 47 | blocks = blocks > 128 ? 128 : blocks; 48 | return std::make_pair(blocks, threads); 49 | } 50 | 51 | std::vector Initialize_GPU(int& argc, char**& argv); 52 | extern cublasHandle_t cublas_handle; 53 | 54 | } // namespace cnn 55 | 56 | #endif 57 | #endif 58 | -------------------------------------------------------------------------------- /EVALB/bug/bug.rsl-old: -------------------------------------------------------------------------------- 1 | Sent. Matched Bracket Cross Correct Tag 2 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy 3 | ============================================================================ 4 | 1 : Length unmatch (33|35) 5 | 1 37 1 0.00 0.00 0 0 0 0 0 0 0.00 6 | 2 : Length unmatch (19|21) 7 | 2 21 1 0.00 0.00 0 0 0 0 0 0 0.00 8 | 3 : Length unmatch (44|45) 9 | 3 47 1 0.00 0.00 0 0 0 0 0 0 0.00 10 | 4 : Length unmatch (24|26) 11 | 4 26 1 0.00 0.00 0 0 0 0 0 0 0.00 12 | 5 : Length unmatch (38|39) 13 | 5 44 1 0.00 0.00 0 0 0 0 0 0 0.00 14 | ============================================================================ 15 | 0 0 0.00 16 | 17 | === Summary === 18 | 19 | -- All -- 20 | Number of sentence = 5 21 | Number of Error sentence = 5 22 | Number of Skip sentence = 0 23 | Number of Valid sentence = 0 24 | Bracketing Recall = 0.00 25 | Bracketing Precision = 0.00 26 | Bracketing FMeasure = nan 27 | Complete match = 0.00 28 | Average crossing = 0.00 29 | No crossing = 0.00 30 | 2 or less crossing = 0.00 31 | Tagging accuracy = 0.00 32 | 33 | -- len<=40 -- 34 | Number of sentence = 3 35 | Number of Error sentence = 3 36 | Number of Skip sentence = 0 37 | Number of Valid sentence = 0 38 | Bracketing Recall = 0.00 39 | Bracketing Precision = 0.00 40 | Bracketing FMeasure = nan 41 | Complete match = 0.00 42 | Average crossing = 0.00 43 | No crossing = 0.00 44 | 2 or less crossing = 0.00 45 | Tagging accuracy = 0.00 46 | -------------------------------------------------------------------------------- /cnn/gpu-ops.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_GPU_OPS_H 2 | #define CNN_GPU_OPS_H 3 | 4 | namespace cnn { 5 | namespace gpu { 6 | 7 | void vpairwise_rank_loss(int n, float margin, const float* xgood, const float* xbad, float* y); 8 | void vpairwise_rank_loss_backward(int n, bool d_wrt_correct, const float* fx, const float* dEdf, float* dEdx); 9 | void vcwise_product(int n, const float* x0, const float* x1, float* y); 10 | void vcwise_product_backward(int n, const float* dEdy, const float* x_other, float* dEdx); 11 | void vconstant_minusx(int n, float c, const float* x, float* y); 12 | void vnegate(int n, const float* x, float* y); 13 | void vnegate_backward(int n, const float* dEdf, float* dEdx); 14 | void vrelu(int n, const float* x, float* y); 15 | void vrelu_backward(int n, const float* fx, const float* dEdf, float* dEdx); 16 | void vtanh(int n, const float* x, float* y); 17 | void vtanh_backward(int n, const float* fx, const float* dEdf, float* dEdx); 18 | void vlog(int n, const float* x, float* y); 19 | void vlog_backward(int n, const float* fx, const float* dEdf, float* dEdx); 20 | void vlogistic(int n, const float* x, float* y); 21 | void vlogistic_backward(int n, const float* fx, const float* dEdf, float* dEdx); 22 | void l2_norm_reducer(int n, const float* x0, float* y, bool square, bool accumulate); 23 | void sqeucdist(int n, const float* x0, const float *x1, float* y); 24 | void sqeucdist_backward(int n, const float* dEdy, const float* x0, const float* x1, float* dEdx, int i); 25 | void softmax(int n, const float* x0, float* y); 26 | void softmax_backward(int n, const float* x0, const float* dEdf, float* dEdx); 27 | void pnlsoftmax(int n, int elem_idx, const float* x0, float* y, float* logz); 28 | void pnlsoftmax_backward(int n, int elem_idx, const float* x0, const float* dEdf, const float* logz, float* dEdx); 29 | 30 | void sgd_update(int n, const float* g, float* x, float scale, float lambda); 31 | 32 | } // namespace gpu 33 | } // namespace cnn 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cnn/mem.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/mem.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "cnn/except.h" 11 | #if HAVE_CUDA 12 | #include "cnn/cuda.h" 13 | #include 14 | #include 15 | #endif 16 | 17 | using namespace std; 18 | 19 | namespace cnn { 20 | 21 | MemAllocator::~MemAllocator() {} 22 | 23 | void* CPUAllocator::malloc(size_t n) { 24 | void* ptr = _mm_malloc(n, align); 25 | if (!ptr) { 26 | cerr << "CPU memory allocation failed n=" << n << " align=" << align << endl; 27 | throw cnn::out_of_memory("CPU memory allocation failed"); 28 | } 29 | return ptr; 30 | } 31 | 32 | void CPUAllocator::free(void* mem) { 33 | _mm_free(mem); 34 | } 35 | 36 | void CPUAllocator::zero(void* p, size_t n) { 37 | memset(p, 0, n); 38 | } 39 | 40 | void* SharedAllocator::malloc(size_t n) { 41 | void* ptr = mmap(NULL, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); 42 | if (!ptr) { 43 | cerr << "Shared memory allocation failed n=" << n << endl; 44 | throw cnn::out_of_memory("Shared memory allocation failed"); 45 | } 46 | return ptr; 47 | } 48 | 49 | void SharedAllocator::free(void* mem) { 50 | // munmap(mem, n); 51 | } 52 | 53 | void SharedAllocator::zero(void* p, size_t n) { 54 | memset(p, 0, n); 55 | } 56 | 57 | #if HAVE_CUDA 58 | void* GPUAllocator::malloc(size_t n) { 59 | void* ptr = nullptr; 60 | CUDA_CHECK(cudaSetDevice(devid)); 61 | CUDA_CHECK(cudaMalloc(&ptr, n)); 62 | if (!ptr) { 63 | cerr << "GPU memory allocation failed n=" << n << endl; 64 | throw cnn::out_of_memory("GPU memory allocation failed"); 65 | } 66 | return ptr; 67 | } 68 | 69 | void GPUAllocator::free(void* mem) { 70 | CUDA_CHECK(cudaFree(mem)); 71 | } 72 | 73 | void GPUAllocator::zero(void* p, size_t n) { 74 | CUDA_CHECK(cudaSetDevice(devid)); 75 | CUDA_CHECK(cudaMemsetAsync(p, 0, n)); 76 | } 77 | 78 | #endif 79 | 80 | } // namespace cnn 81 | -------------------------------------------------------------------------------- /scripts/get_dictionary.py: -------------------------------------------------------------------------------- 1 | def is_next_open_bracket(line, start_idx): 2 | for char in line[(start_idx + 1):]: 3 | if char == '(': 4 | return True 5 | elif char == ')': 6 | return False 7 | raise IndexError('Bracket possibly not balanced, open bracket not followed by closed bracket') 8 | 9 | def get_between_brackets(line, start_idx): 10 | output = [] 11 | for char in line[(start_idx + 1):]: 12 | if char == ')': 13 | break 14 | assert not(char == '(') 15 | output.append(char) 16 | return ''.join(output) 17 | 18 | def get_dict(lines): 19 | output = [] 20 | for line in lines: 21 | #print 'curr line', line_strip 22 | line_strip = line.rstrip() 23 | #print 'length of the sentence', len(line_strip) 24 | for i in range(len(line_strip)): 25 | if i == 0: 26 | assert line_strip[i] == '(' 27 | if line_strip[i] == '(' and not(is_next_open_bracket(line_strip, i)): # fulfilling this condition means this is a terminal symbol 28 | output.append(get_between_brackets(line_strip, i)) 29 | #print 'output:',output 30 | words_dict = {} 31 | for terminal in output: 32 | terminal_split = terminal.split() 33 | assert len(terminal_split) == 2 # each terminal contains a POS tag and word 34 | if not(terminal_split[1] in words_dict): 35 | words_dict[terminal_split[1]] = 1 36 | else: 37 | words_dict[terminal_split[1]] = words_dict[terminal_split[1]] + 1 38 | words_list = [] 39 | for item in words_dict: 40 | if words_dict[item] > 1: 41 | words_list.append(item) 42 | return words_list 43 | 44 | if __name__ == '__main__': 45 | input_file = open('train.con', 'r') 46 | lines = input_file.readlines() 47 | words_list = get_dict(lines) 48 | #print 'number of words', len(words_list) 49 | for word in words_list: 50 | print word 51 | input_file.close() 52 | -------------------------------------------------------------------------------- /cnn/c2w.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_C2W_H_ 2 | #define CNN_C2W_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "cnn/cnn.h" 8 | #include "cnn/model.h" 9 | #include "cnn/lstm.h" 10 | 11 | namespace cnn { 12 | 13 | // computes a representation of a word by reading characters 14 | // one at a time 15 | struct C2WBuilder { 16 | LSTMBuilder fc2w; 17 | LSTMBuilder rc2w; 18 | LookupParameters* p_lookup; 19 | std::vector words; 20 | std::map wordid2vi; 21 | explicit C2WBuilder(int vocab_size, 22 | unsigned layers, 23 | unsigned input_dim, 24 | unsigned hidden_dim, 25 | Model* m) : 26 | fc2w(layers, input_dim, hidden_dim, m), 27 | rc2w(layers, input_dim, hidden_dim, m), 28 | p_lookup(m->add_lookup_parameters(vocab_size, {input_dim})) { 29 | } 30 | void new_graph(ComputationGraph* cg) { 31 | words.clear(); 32 | fc2w.new_graph(cg); 33 | rc2w.new_graph(cg); 34 | } 35 | // compute a composed representation of a word out of characters 36 | // wordid should be a unique index for each word *type* in the graph being built 37 | VariableIndex add_word(int word_id, const std::vector& chars, ComputationGraph* cg) { 38 | auto it = wordid2vi.find(word_id); 39 | if (it == wordid2vi.end()) { 40 | fc2w.start_new_sequence(cg); 41 | rc2w.start_new_sequence(cg); 42 | std::vector ins(chars.size()); 43 | std::map c2i; 44 | for (unsigned i = 0; i < ins.size(); ++i) { 45 | VariableIndex& v = c2i[chars[i]]; 46 | if (!v) v = cg->add_lookup(p_lookup, chars[i]); 47 | ins[i] = v; 48 | fc2w.add_input(v, cg); 49 | } 50 | for (int i = ins.size() - 1; i >= 0; --i) 51 | rc2w.add_input(ins[i], cg); 52 | VariableIndex i_concat = cg->add_function({fc2w.back(), rc2w.back()}); 53 | it = wordid2vi.insert(std::make_pair(word_id, i_concat)).first; 54 | } 55 | return it->second; 56 | } 57 | }; 58 | 59 | } // namespace cnn 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /impl/oracle.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_ORACLE_H_ 2 | #define PARSER_ORACLE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | namespace cnn { class Dict; } 9 | 10 | namespace parser { 11 | 12 | // a sentence can be viewed in 4 different ways: 13 | // raw tokens, UNKed, lowercased, and POS tags 14 | struct Sentence { 15 | bool SizesMatch() const { return raw.size() == unk.size() && raw.size() == lc.size() && raw.size() == pos.size(); } 16 | size_t size() const { return raw.size(); } 17 | std::vector raw, unk, lc, pos; 18 | std::vector surfaces; 19 | }; 20 | 21 | // base class for transition based parse oracles 22 | struct Oracle { 23 | virtual ~Oracle(); 24 | Oracle(cnn::Dict* dict, cnn::Dict* adict, cnn::Dict* pdict) : d(dict), ad(adict), pd(pdict), sents() {} 25 | unsigned size() const { return sents.size(); } 26 | cnn::Dict* d; // dictionary of terminal symbols 27 | cnn::Dict* ad; // dictionary of action types 28 | cnn::Dict* pd; // dictionary of POS tags (preterminal symbols) 29 | std::string devdata; 30 | std::vector sents; 31 | std::vector> actions; 32 | protected: 33 | static void ReadSentenceView(const std::string& line, cnn::Dict* dict, std::vector* sent); 34 | }; 35 | 36 | // oracle that predicts nonterminal symbols with a PJ(X) action 37 | // the action PJ(X) effectively introduces an "(X" on the stack 38 | // # (S (NP ... 39 | // raw tokens 40 | // tokens with OOVs replaced 41 | class KOracle : public Oracle { 42 | public: 43 | KOracle(cnn::Dict* termdict, cnn::Dict* adict, cnn::Dict* pdict, cnn::Dict* nontermdict) : 44 | Oracle(termdict, adict, pdict), nd(nontermdict) {} 45 | // if is_training is true, then both the "raw" tokens and the mapped tokens 46 | // will be read, and both will be available. if false, then only the mapped 47 | // tokens will be available 48 | void load_bdata(const std::string& file); 49 | void load_oracle(const std::string& file, bool is_training); 50 | cnn::Dict* nd; // dictionary of nonterminal types 51 | }; 52 | 53 | } // namespace parser 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /cnn/fast-lstm.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_FAST_LSTM_H_ 2 | #define CNN_FAST_LSTM_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | /* 15 | FastLSTM replaces the matrices from cell to other units, by diagonal matrices. 16 | */ 17 | struct FastLSTMBuilder : public RNNBuilder { 18 | FastLSTMBuilder() = default; 19 | explicit FastLSTMBuilder(unsigned layers, 20 | unsigned input_dim, 21 | unsigned hidden_dim, 22 | Model* model); 23 | 24 | Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } 25 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 26 | std::vector final_s() const override { 27 | std::vector ret = (c.size() == 0 ? c0 : c.back()); 28 | for(auto my_h : final_h()) ret.push_back(my_h); 29 | return ret; 30 | } 31 | unsigned num_h0_components() const override { return 2 * layers; } 32 | 33 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 34 | std::vector get_s(RNNPointer i) const override { 35 | std::vector ret = (i == -1 ? c0 : c[i]); 36 | for(auto my_h : get_h(i)) ret.push_back(my_h); 37 | return ret; 38 | } 39 | 40 | void copy(const RNNBuilder & params) override; 41 | protected: 42 | void new_graph_impl(ComputationGraph& cg) override; 43 | void start_new_sequence_impl(const std::vector& h0) override; 44 | Expression add_input_impl(int prev, const Expression& x) override; 45 | 46 | public: 47 | // first index is layer, then ... 48 | std::vector> params; 49 | 50 | // first index is layer, then ... 51 | std::vector> param_vars; 52 | 53 | // first index is time, second is layer 54 | std::vector> h, c; 55 | 56 | // initial values of h and c at each layer 57 | // - both default to zero matrix input 58 | bool has_initial_state; // if this is false, treat h0 and c0 as 0 59 | std::vector h0; 60 | std::vector c0; 61 | unsigned layers; 62 | }; 63 | 64 | } // namespace cnn 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /cnn/lstm.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_LSTM_H_ 2 | #define CNN_LSTM_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | struct LSTMBuilder : public RNNBuilder { 15 | LSTMBuilder() = default; 16 | explicit LSTMBuilder(unsigned layers, 17 | unsigned input_dim, 18 | unsigned hidden_dim, 19 | Model* model); 20 | 21 | void set_dropout(float d) { dropout_rate = d; } 22 | // in general, you should disable dropout at test time 23 | void disable_dropout() { dropout_rate = 0; } 24 | 25 | Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } 26 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 27 | std::vector final_s() const override { 28 | std::vector ret = (c.size() == 0 ? c0 : c.back()); 29 | for(auto my_h : final_h()) ret.push_back(my_h); 30 | return ret; 31 | } 32 | unsigned num_h0_components() const override { return 2 * layers; } 33 | 34 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 35 | std::vector get_s(RNNPointer i) const override { 36 | std::vector ret = (i == -1 ? c0 : c[i]); 37 | for(auto my_h : get_h(i)) ret.push_back(my_h); 38 | return ret; 39 | } 40 | 41 | void copy(const RNNBuilder & params) override; 42 | protected: 43 | void new_graph_impl(ComputationGraph& cg) override; 44 | void start_new_sequence_impl(const std::vector& h0) override; 45 | Expression add_input_impl(int prev, const Expression& x) override; 46 | 47 | public: 48 | // first index is layer, then ... 49 | std::vector> params; 50 | 51 | // first index is layer, then ... 52 | std::vector> param_vars; 53 | 54 | // first index is time, second is layer 55 | std::vector> h, c; 56 | 57 | // initial values of h and c at each layer 58 | // - both default to zero matrix input 59 | bool has_initial_state; // if this is false, treat h0 and c0 as 0 60 | std::vector h0; 61 | std::vector c0; 62 | unsigned layers; 63 | float dropout_rate; 64 | }; 65 | 66 | } // namespace cnn 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /cnn/devices.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/devices.h" 2 | 3 | #include 4 | 5 | #include "cnn/cuda.h" 6 | 7 | using namespace std; 8 | 9 | namespace cnn { 10 | 11 | Device::~Device() {} 12 | 13 | #if HAVE_CUDA 14 | Device_GPU::Device_GPU(int mb, int device_id) : 15 | Device(DeviceType::GPU, &gpu_mem), cuda_device_id(device_id), gpu_mem(device_id) { 16 | CUDA_CHECK(cudaSetDevice(device_id)); 17 | CUBLAS_CHECK(cublasCreate(&cublas_handle)); 18 | CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); 19 | kSCALAR_MINUSONE = (float*)gpu_mem.malloc(sizeof(float)); 20 | kSCALAR_ONE = (float*)gpu_mem.malloc(sizeof(float)); 21 | kSCALAR_ZERO = (float*)gpu_mem.malloc(sizeof(float)); 22 | float minusone = -1; 23 | CUDA_CHECK(cudaMemcpyAsync(kSCALAR_MINUSONE, &minusone, sizeof(float), cudaMemcpyHostToDevice)); 24 | float one = 1; 25 | CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ONE, &one, sizeof(float), cudaMemcpyHostToDevice)); 26 | float zero = 0; 27 | CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ZERO, &zero, sizeof(float), cudaMemcpyHostToDevice)); 28 | 29 | // this is the big memory allocation 30 | 31 | size_t byte_count = (size_t)mb << 20; 32 | fxs = new AlignedMemoryPool(byte_count, mem); // memory for node values 33 | dEdfs = new AlignedMemoryPool(byte_count, mem); // memory for node gradients 34 | ps = new AlignedMemoryPool(byte_count, mem); // memory for parameters 35 | 36 | } 37 | 38 | Device_GPU::~Device_GPU() {} 39 | #endif 40 | 41 | // TODO we should be able to configure this carefully with a configuration 42 | // script 43 | // CPU -- 0 params 44 | // -- 50mb fxs 45 | // -- 50mb dEdfx 46 | Device_CPU::Device_CPU(int mb, bool shared) : 47 | Device(DeviceType::CPU, &cpu_mem), shmem(mem) { 48 | if (shared) shmem = new SharedAllocator(); 49 | kSCALAR_MINUSONE = (float*) mem->malloc(sizeof(float)); 50 | *kSCALAR_MINUSONE = -1; 51 | kSCALAR_ONE = (float*) mem->malloc(sizeof(float)); 52 | *kSCALAR_ONE = 1; 53 | kSCALAR_ZERO = (float*) mem->malloc(sizeof(float)); 54 | *kSCALAR_ZERO = 0; 55 | 56 | // this is the big memory allocation: the pools 57 | 58 | size_t byte_count = (size_t)mb << 20; 59 | fxs = new AlignedMemoryPool(byte_count, mem); // memory for node values 60 | dEdfs = new AlignedMemoryPool(byte_count, mem); // memory for node gradients 61 | ps = new AlignedMemoryPool(byte_count, mem); // memory for parameters 62 | 63 | } 64 | 65 | Device_CPU::~Device_CPU() {} 66 | 67 | } // namespace cnn 68 | -------------------------------------------------------------------------------- /cnn/mp.cc: -------------------------------------------------------------------------------- 1 | #include "mp.h" 2 | using namespace std; 3 | using namespace boost::interprocess; 4 | 5 | namespace cnn { 6 | namespace mp { 7 | // TODO: Pass these around instead of having them be global 8 | std::string queue_name = "cnn_mp_work_queue"; 9 | std::string shared_memory_name = "cnn_mp_shared_memory"; 10 | timespec start_time; 11 | bool stop_requested = false; 12 | SharedObject* shared_object = nullptr; 13 | 14 | std::string GenerateQueueName() { 15 | std::ostringstream ss; 16 | ss << "cnn_mp_work_queue"; 17 | ss << rand(); 18 | return ss.str(); 19 | } 20 | 21 | std::string GenerateSharedMemoryName() { 22 | std::ostringstream ss; 23 | ss << "cnn_mp_shared_memory"; 24 | ss << rand(); 25 | return ss.str(); 26 | } 27 | 28 | cnn::real SumValues(const std::vector& values) { 29 | return accumulate(values.begin(), values.end(), 0.0); 30 | } 31 | 32 | cnn::real Mean(const std::vector& values) { 33 | return SumValues(values) / values.size(); 34 | } 35 | 36 | std::string ElapsedTimeString(const timespec& start, const timespec& end) { 37 | std::ostringstream ss; 38 | time_t secs = end.tv_sec - start.tv_sec; 39 | long nsec = end.tv_nsec - start.tv_nsec; 40 | ss << secs << " seconds and " << nsec << "nseconds"; 41 | return ss.str(); 42 | } 43 | 44 | unsigned SpawnChildren(std::vector& workloads) { 45 | const unsigned num_children = workloads.size(); 46 | assert (workloads.size() == num_children); 47 | pid_t pid; 48 | unsigned cid; 49 | for (cid = 0; cid < num_children; ++cid) { 50 | pid = fork(); 51 | if (pid == -1) { 52 | std::cerr << "Fork failed. Exiting ..." << std::endl; 53 | return 1; 54 | } 55 | else if (pid == 0) { 56 | // children shouldn't continue looping 57 | break; 58 | } 59 | workloads[cid].pid = pid; 60 | } 61 | return cid; 62 | } 63 | 64 | std::vector CreateWorkloads(unsigned num_children) { 65 | int err; 66 | std::vector workloads(num_children); 67 | for (unsigned cid = 0; cid < num_children; cid++) { 68 | err = pipe(workloads[cid].p2c); 69 | assert (err == 0); 70 | err = pipe(workloads[cid].c2p); 71 | assert (err == 0); 72 | } 73 | return workloads; 74 | } 75 | 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /EVALB/bug/bug.tst: -------------------------------------------------------------------------------- 1 | (S1 (S (NP (DT The) (JJ Thy-1) (NN gene) (NN promoter)) (VP (VP (VBZ resembles) (NP (NP (DT a) (ADJP (CD ") (NN housekeeping)) (NN ") (NN promoter)) (SBAR (WHPP (IN in) (WHNP (WDT that))) (S (NP (PRP it)) (VP (VBZ is) (VP (VBN located) (PP (IN within) (NP (DT a) (JJ methylation-free) (NN island))))))))) (, ,) (VP (VBZ lacks) (NP (DT a) (JJ canonical) (NNP TATA) (NN box))) (, ,) (CC and) (VP (VBZ displays) (NP (NP (NN heterogeneity)) (PP (IN in) (NP (NP (DT the) (JJ 5'-end) (NNS termini)) (PP (IN of) (NP (DT the) (NN mRNA)))))))) (. .))) 2 | (S1 (S (NP (NP (DT The) (JJ latter) (CD ") (JJ nuclear) (NN factor)) (PP (IN for) (NP (VBN activated) (NN T) (NNS cells)))) (VP (VBZ ") (ADJP (JJ likely) (S (VP (VBZ contributes) (PP (TO to) (NP (NP (DT the) (NN tissue) (NN specificity)) (PP (IN of) (NP (JJ IL-2) (NN gene) (NN expression))))))))) (. .))) 3 | (S1 (S (ADVP (RB Thus)) (, ,) (NP (PRP we)) (VP (VBD postulated) (SBAR (SBAR (IN that) (S (NP (NP (DT the) (JJ circadian) (NN modification)) (PP (IN of) (NP (NNP GR)))) (VP (VBD was) (ADJP (JJ independent) (PP (IN of) (NP (DT the) (JJ diurnal) (NNS fluctuations)))) (PP (IN in) (NP (NP (NN plasma) (JJ cortisol) (NN level)) (CC or) (NP (NP (DT the) (JJ circadian) (NNS variations)) (PP (IN in) (NP (JJ environmental) (NN lighting))))))))) (CC and) (SBAR (IN that) (S (NP (DT the) (NN rhythmicity)) (VP (MD might) (VP (VB be) (VP (VBN regulated) (PP (IN by) (NP (DT the) ('' ') (NP (JJ circadian) (NN pacemaker) (POS ')) (VP (VBN located) (PP (IN in) (NP (DT the) (JJ human) (JJ basal) (NN brain))))))))))))) (. .))) 4 | (S1 (S (NP (JJ Such) (NN transcription) (NNS factors)) (VP (VBP play) (NP (NP (DT a) (JJ key) (NN role)) (PP (IN in) (NP (NP (DT the) (NN development)) (PP (IN of) (NP (NP (DT the) (JJ mature) (JJ T-cell) (NN phenotype)) (PP (IN by) (NP (NP (NN functioning) (RB as) (POS ')) (NN master) (NNS regulators))))) (PP (IN of) (NP (JJ T-cell) (NN differentiation) (POS '))))))) (. .))) 5 | (S1 (S (NP (NP (DT The) (NN conversion)) (PP (IN of) (NP (DT the)))) (VP (VBD TCEd) (PP (TO to) (NP (NP (DT a) ('' ') (JJ perfect) ('' ') (NN NF-kB)) (SBAR (S (NP (JJ binding) (NN site)) (VP (VBZ leads) (PP (TO to) (NP (NP (NP (DT a) (ADJP (RBR tighter) (JJ binding)) (PP (IN of) (NP (NP (NNS NF-kB)) (PP (PP (TO to) (NP (JJ TCEd) (NN DNA))) (CC and) (PP (, ,) (PP (IN as) (NP (DT a) (JJ functional) (NN consequence))) (, ,) (TO to) (NP (NP (DT the) (NN activity)) (PP (IN of) (NP (DT the)))))))) (POS ')) (JJ converted) ('' ') (JJ TCEd) (NNS motifs)) (PP (IN in) (NP (NNP HeLa) (NNS cells))))))))))) (. .))) 6 | -------------------------------------------------------------------------------- /EVALB/sample/sample.prm: -------------------------------------------------------------------------------- 1 | ##------------------------------------------## 2 | ## Debug mode ## 3 | ## print out data for individual sentence ## 4 | ##------------------------------------------## 5 | DEBUG 0 6 | 7 | ##------------------------------------------## 8 | ## MAX error ## 9 | ## Number of error to stop the process. ## 10 | ## This is useful if there could be ## 11 | ## tokanization error. ## 12 | ## The process will stop when this number## 13 | ## of errors are accumulated. ## 14 | ##------------------------------------------## 15 | MAX_ERROR 10 16 | 17 | ##------------------------------------------## 18 | ## Cut-off length for statistics ## 19 | ## At the end of evaluation, the ## 20 | ## statistics for the senetnces of length## 21 | ## less than or equal to this number will## 22 | ## be shown, on top of the statistics ## 23 | ## for all the sentences ## 24 | ##------------------------------------------## 25 | CUTOFF_LEN 40 26 | 27 | ##------------------------------------------## 28 | ## unlabeled or labeled bracketing ## 29 | ## 0: unlabeled bracketing ## 30 | ## 1: labeled bracketing ## 31 | ##------------------------------------------## 32 | LABELED 1 33 | 34 | ##------------------------------------------## 35 | ## Delete labels ## 36 | ## list of labels to be ignored. ## 37 | ## If it is a pre-terminal label, delete ## 38 | ## the word along with the brackets. ## 39 | ## If it is a non-terminal label, just ## 40 | ## delete the brackets (don't delete ## 41 | ## deildrens). ## 42 | ##------------------------------------------## 43 | DELETE_LABEL TOP 44 | DELETE_LABEL -NONE- 45 | DELETE_LABEL , 46 | DELETE_LABEL : 47 | DELETE_LABEL `` 48 | DELETE_LABEL '' 49 | 50 | ##------------------------------------------## 51 | ## Delete labels for length calculation ## 52 | ## list of labels to be ignored for ## 53 | ## length calculation purpose ## 54 | ##------------------------------------------## 55 | DELETE_LABEL_FOR_LENGTH -NONE- 56 | 57 | 58 | ##------------------------------------------## 59 | ## Equivalent labels, words ## 60 | ## the pairs are considered equivalent ## 61 | ## This is non-directional. ## 62 | ##------------------------------------------## 63 | EQ_LABEL T TT 64 | 65 | EQ_WORD This this 66 | -------------------------------------------------------------------------------- /EVALB/COLLINS.prm: -------------------------------------------------------------------------------- 1 | ##------------------------------------------## 2 | ## Debug mode ## 3 | ## 0: No debugging ## 4 | ## 1: print data for individual sentence ## 5 | ##------------------------------------------## 6 | DEBUG 0 7 | 8 | ##------------------------------------------## 9 | ## MAX error ## 10 | ## Number of error to stop the process. ## 11 | ## This is useful if there could be ## 12 | ## tokanization error. ## 13 | ## The process will stop when this number## 14 | ## of errors are accumulated. ## 15 | ##------------------------------------------## 16 | MAX_ERROR 10 17 | 18 | ##------------------------------------------## 19 | ## Cut-off length for statistics ## 20 | ## At the end of evaluation, the ## 21 | ## statistics for the senetnces of length## 22 | ## less than or equal to this number will## 23 | ## be shown, on top of the statistics ## 24 | ## for all the sentences ## 25 | ##------------------------------------------## 26 | CUTOFF_LEN 40 27 | 28 | ##------------------------------------------## 29 | ## unlabeled or labeled bracketing ## 30 | ## 0: unlabeled bracketing ## 31 | ## 1: labeled bracketing ## 32 | ##------------------------------------------## 33 | LABELED 1 34 | 35 | ##------------------------------------------## 36 | ## Delete labels ## 37 | ## list of labels to be ignored. ## 38 | ## If it is a pre-terminal label, delete ## 39 | ## the word along with the brackets. ## 40 | ## If it is a non-terminal label, just ## 41 | ## delete the brackets (don't delete ## 42 | ## deildrens). ## 43 | ##------------------------------------------## 44 | DELETE_LABEL TOP 45 | DELETE_LABEL -NONE- 46 | DELETE_LABEL , 47 | DELETE_LABEL : 48 | DELETE_LABEL `` 49 | DELETE_LABEL '' 50 | DELETE_LABEL . 51 | 52 | ##------------------------------------------## 53 | ## Delete labels for length calculation ## 54 | ## list of labels to be ignored for ## 55 | ## length calculation purpose ## 56 | ##------------------------------------------## 57 | DELETE_LABEL_FOR_LENGTH -NONE- 58 | 59 | ##------------------------------------------## 60 | ## Equivalent labels, words ## 61 | ## the pairs are considered equivalent ## 62 | ## This is non-directional. ## 63 | ##------------------------------------------## 64 | EQ_LABEL ADVP PRT 65 | 66 | # EQ_WORD Example example 67 | -------------------------------------------------------------------------------- /EVALB/COLLINS_ch.prm: -------------------------------------------------------------------------------- 1 | ##------------------------------------------## 2 | ## Debug mode ## 3 | ## 0: No debugging ## 4 | ## 1: print data for individual sentence ## 5 | ##------------------------------------------## 6 | DEBUG 0 7 | 8 | ##------------------------------------------## 9 | ## MAX error ## 10 | ## Number of error to stop the process. ## 11 | ## This is useful if there could be ## 12 | ## tokanization error. ## 13 | ## The process will stop when this number## 14 | ## of errors are accumulated. ## 15 | ##------------------------------------------## 16 | MAX_ERROR 10 17 | 18 | ##------------------------------------------## 19 | ## Cut-off length for statistics ## 20 | ## At the end of evaluation, the ## 21 | ## statistics for the senetnces of length## 22 | ## less than or equal to this number will## 23 | ## be shown, on top of the statistics ## 24 | ## for all the sentences ## 25 | ##------------------------------------------## 26 | CUTOFF_LEN 40 27 | 28 | ##------------------------------------------## 29 | ## unlabeled or labeled bracketing ## 30 | ## 0: unlabeled bracketing ## 31 | ## 1: labeled bracketing ## 32 | ##------------------------------------------## 33 | LABELED 1 34 | 35 | ##------------------------------------------## 36 | ## Delete labels ## 37 | ## list of labels to be ignored. ## 38 | ## If it is a pre-terminal label, delete ## 39 | ## the word along with the brackets. ## 40 | ## If it is a non-terminal label, just ## 41 | ## delete the brackets (don't delete ## 42 | ## deildrens). ## 43 | ##------------------------------------------## 44 | DELETE_LABEL TOP 45 | DELETE_LABEL -NONE- 46 | DELETE_LABEL , 47 | DELETE_LABEL : 48 | DELETE_LABEL `` 49 | DELETE_LABEL '' 50 | DELETE_LABEL . 51 | DELETE_LABEL PU 52 | ##------------------------------------------## 53 | ## Delete labels for length calculation ## 54 | ## list of labels to be ignored for ## 55 | ## length calculation purpose ## 56 | ##------------------------------------------## 57 | DELETE_LABEL_FOR_LENGTH -NONE- 58 | 59 | ##------------------------------------------## 60 | ## Equivalent labels, words ## 61 | ## the pairs are considered equivalent ## 62 | ## This is non-directional. ## 63 | ##------------------------------------------## 64 | EQ_LABEL ADVP PRT 65 | 66 | # EQ_WORD Example example 67 | -------------------------------------------------------------------------------- /cnn/hsm-builder.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_HSMBUILDER_H 2 | #define CNN_HSMBUILDER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "cnn/cnn.h" 8 | #include "cnn/expr.h" 9 | #include "cnn/dict.h" 10 | #include "cnn/cfsm-builder.h" 11 | 12 | namespace cnn { 13 | 14 | struct Parameters; 15 | 16 | class Cluster { 17 | private: 18 | std::vector children; 19 | std::vector path; 20 | std::vector terminals; 21 | std::unordered_map word2ind; 22 | Parameters* p_weights; 23 | Parameters* p_bias; 24 | mutable expr::Expression weights; 25 | mutable expr::Expression bias; 26 | bool initialized; 27 | unsigned output_size; 28 | 29 | expr::Expression predict(expr::Expression h, ComputationGraph& cg) const; 30 | 31 | public: 32 | Cluster(); 33 | Cluster* add_child(unsigned sym); 34 | void add_word(unsigned word); 35 | void initialize(unsigned rep_dim, Model* model); 36 | 37 | void new_graph(ComputationGraph& cg); 38 | unsigned sample(expr::Expression h, ComputationGraph& cg) const; 39 | expr::Expression neg_log_softmax(expr::Expression h, unsigned r, ComputationGraph& cg) const; 40 | 41 | unsigned get_index(unsigned word) const; 42 | unsigned get_word(unsigned index) const; 43 | unsigned num_children() const; 44 | const Cluster* get_child(unsigned i) const; 45 | const std::vector& get_path() const; 46 | expr::Expression get_weights(ComputationGraph& cg) const; 47 | expr::Expression get_bias(ComputationGraph& cg) const; 48 | 49 | std::string toString() const; 50 | }; 51 | 52 | // helps with implementation of hierarchical softmax 53 | // read a file with lines of the following format 54 | // CLASSID word [freq] 55 | class HierarchicalSoftmaxBuilder : public FactoredSoftmaxBuilder { 56 | public: 57 | HierarchicalSoftmaxBuilder(unsigned rep_dim, 58 | const std::string& cluster_file, 59 | Dict* word_dict, 60 | Model* model); 61 | ~HierarchicalSoftmaxBuilder(); 62 | // call this once per ComputationGraph 63 | void new_graph(ComputationGraph& cg); 64 | 65 | // -log(p(c | rep) * p(w | c, rep)) 66 | expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); 67 | 68 | // samples a word from p(w,c | rep) 69 | unsigned sample(const expr::Expression& rep); 70 | 71 | private: 72 | Cluster* ReadClusterFile(const std::string& cluster_file, Dict* word_dict); 73 | std::vector widx2path; // will be NULL if not found 74 | Dict path_symbols; 75 | 76 | ComputationGraph* pcg; 77 | Cluster* root; 78 | }; 79 | 80 | } // namespace cnn 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /EVALB/bug/bug.gld: -------------------------------------------------------------------------------- 1 | (TOP (S (NP-SBJ (DT The) (NN Thy-1) (NN gene) (NN promoter) ) (VP (VBZ resembles) (NP (DT a) (`` ") (JJ housekeeping) ('' ") (NN promoter) ) (PP (IN in) (SBAR (IN that) (S (NP-SBJ-68 (PRP it) ) (VP-COOD (VP (VBZ is) (ADJP-PRD (JJ located) (PP (IN within) (NP (DT a) (JJ methylation-free) (NN island) )))) (, ,) (VP (VBZ lacks) (NP (DT a) (JJ canonical) (NN TATA) (NN box) )) (, ,) (CC and) (VP (VBZ displays) (NP (NN heterogeneity) ) (PP (IN in) (NP (NP (DT the) (JJ 5'-end) (NNS termini) ) (PP (IN of) (NP (DT the) (NN mRNA) )))))))))) (. .) ) ) 2 | (TOP (S (NP-SBJ (DT The) (JJ latter) (`` ") (NP (NP (JJ nuclear) (NN factor) ) (PP (IN for) (NP (VBN activated) (NN T) (NNS cells) ))) ('' ") ) (ADVP (RB likely) ) (VP (VBZ contributes) (PP (TO to) (NP (NP (DT the) (NN tissue) (NN specificity) ) (PP (IN of) (NP (NN IL-2) (NN gene) (NN expression) ))))) (. .) ) ) 3 | (TOP (S (ADVP (RB Thus) ) (, ,) (NP-SBJ (PRP we) ) (VP (VBD postulated) (SBAR-COOD (SBAR (IN that) (S (NP-SBJ (NP (DT the) (JJ circadian) (NN modification) ) (PP (IN of) (NP (NN GR) ))) (VP (VBD was) (ADJP-PRD (JJ independent) (PP (IN of) (NP-COOD (NP (NP (DT the) (JJ diurnal) (NNS fluctuations) ) (PP (IN in) (NP (NN plasma) (NN cortisol) (NN level) ))) (CC or) (NP (NP (DT the) (JJ circadian) (NNS variations) ) (PP (IN in) (NP (JJ environmental) (NN lighting) ))))))))) (CC and) (SBAR (IN that) (S (NP-SBJ-79 (DT the) (NN rhythmicity) ) (VP (MD might) (VP (VB be) (VP (VBN regulated) (NP (-NONE- *-79) ) (PP (IN by) (NP-LGS (NP (DT the) (`` ') (JJ circadian) (NN pacemaker) ('' ') ) (ADJP (JJ located) (PP (IN in) (NP (DT the) (JJ human) (JJ basal) (NN brain) )))))))))))) (. .) ) ) 4 | (TOP (S (NP-SBJ-70 (JJ Such) (NN transcription) (NNS factors) ) (VP (VBP play) (NP (DT a) (JJ key) (NN role) ) (PP (IN in) (NP (NP (DT the) (NN development) ) (PP (IN of) (NP (DT the) (JJ mature) (NN T-cell) (NN phenotype) )))) (PP (IN by) (S (NP-SBJ (-NONE- *-70) ) (VP (VBG functioning) (PP (IN as) (`` ') (NP (NP (JJ master) (NNS regulators) ) (PP (IN of) (NP (NN T-cell) (NN differentiation) ))) ('' ') ))))) (. .) ) ) 5 | (TOP (S (NP-SBJ (NP (DT The) (NN conversion) ) (PP (IN of) (NP (DT the) (NN TCEd) )) (PP (TO to) (NP (DT a) (`` ') (JJ perfect) ('' ') (NN NF-kB) (NN binding) (NN site) ))) (VP-COOD (VP (VBZ leads) (PP (TO to) (NP-19 (NP (DT a) (JJR tighter) (NN binding) ) (PP (IN of) (NP (NN NF-kB) )) (PP (TO to) (NP (NN TCEd) (NN DNA) ))))) (CC and) (, ,) (VP (PP (IN as) (NP (DT a) (JJ functional) (NN consequence) )) (, ,) (PP (TO to) (NP=19 (NP (DT the) (NN activity) ) (PP (IN of) (NP (DT the) (`` ') (VBN converted) ('' ') (NN TCEd) (NNS motifs) )) (PP (IN in) (NP (NN HeLa) (NNS cells) )))))) (. .) ) ) 6 | -------------------------------------------------------------------------------- /cnn/cfsm-builder.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CFSMBUILDER_H 2 | #define CNN_CFSMBUILDER_H 3 | 4 | #include 5 | #include 6 | #include "cnn/cnn.h" 7 | #include "cnn/expr.h" 8 | #include "cnn/dict.h" 9 | 10 | namespace cnn { 11 | 12 | struct Parameters; 13 | 14 | class FactoredSoftmaxBuilder { 15 | public: 16 | // call this once per ComputationGraph 17 | virtual void new_graph(ComputationGraph& cg) = 0; 18 | 19 | // -log(p(c | rep) * p(w | c, rep)) 20 | virtual expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx) = 0; 21 | 22 | // samples a word from p(w,c | rep) 23 | virtual unsigned sample(const expr::Expression& rep) = 0; 24 | }; 25 | 26 | class NonFactoredSoftmaxBuilder : public FactoredSoftmaxBuilder { 27 | public: 28 | NonFactoredSoftmaxBuilder(unsigned rep_dim, unsigned vocab_size, Model* model); 29 | void new_graph(ComputationGraph& cg); 30 | expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); 31 | unsigned sample(const expr::Expression& rep); 32 | private: 33 | Parameters* p_w; 34 | Parameters* p_b; 35 | expr::Expression w; 36 | expr::Expression b; 37 | ComputationGraph* pcg; 38 | }; 39 | 40 | // helps with implementation of hierarchical softmax 41 | // read a file with lines of the following format 42 | // CLASSID word [freq] 43 | class ClassFactoredSoftmaxBuilder : public FactoredSoftmaxBuilder { 44 | public: 45 | ClassFactoredSoftmaxBuilder(unsigned rep_dim, 46 | const std::string& cluster_file, 47 | Dict* word_dict, 48 | Model* model); 49 | 50 | void new_graph(ComputationGraph& cg); 51 | expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); 52 | unsigned sample(const expr::Expression& rep); 53 | 54 | private: 55 | void ReadClusterFile(const std::string& cluster_file, Dict* word_dict); 56 | Dict cdict; 57 | std::vector widx2cidx; // will be -1 if not present 58 | std::vector widx2cwidx; // word index to word index inside of cluster 59 | std::vector> cidx2words; 60 | std::vector singleton_cluster; // does cluster contain a single word type? 61 | 62 | // parameters 63 | Parameters* p_r2c; 64 | Parameters* p_cbias; 65 | std::vector p_rc2ws; // len = number of classes 66 | std::vector p_rcwbiases; // len = number of classes 67 | 68 | // Expressions for current graph 69 | inline expr::Expression& get_rc2w(unsigned cluster_idx) { 70 | expr::Expression& e = rc2ws[cluster_idx]; 71 | if (!e.pg) 72 | e = expr::parameter(*pcg, p_rc2ws[cluster_idx]); 73 | return e; 74 | } 75 | inline expr::Expression& get_rc2wbias(unsigned cluster_idx) { 76 | expr::Expression& e = rc2biases[cluster_idx]; 77 | if (!e.pg) 78 | e = expr::parameter(*pcg, p_rcwbiases[cluster_idx]); 79 | return e; 80 | } 81 | ComputationGraph* pcg; 82 | expr::Expression r2c; 83 | expr::Expression cbias; 84 | std::vector rc2ws; 85 | std::vector rc2biases; 86 | }; 87 | 88 | } // namespace cnn 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /cnn/dict.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DICT_H_ 2 | #define CNN_DICT_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #if BOOST_VERSION >= 105600 15 | #include 16 | #include 17 | #endif 18 | 19 | namespace cnn { 20 | 21 | class Dict { 22 | typedef std::unordered_map Map; 23 | public: 24 | Dict() : frozen(false), map_unk(false), unk_id(-1) { 25 | } 26 | 27 | inline unsigned size() const { return words_.size(); } 28 | 29 | inline bool Contains(const std::string& words) { 30 | return !(d_.find(words) == d_.end()); 31 | } 32 | 33 | void Freeze() { frozen = true; } 34 | bool is_frozen() { return frozen; } 35 | 36 | inline int Convert(const std::string& word) { 37 | auto i = d_.find(word); 38 | if (i == d_.end()) { 39 | if (frozen) { 40 | if (map_unk) { 41 | return unk_id; 42 | } 43 | else { 44 | std::cerr << map_unk << std::endl; 45 | std::cerr << "Unknown word encountered: " << word << std::endl; 46 | throw std::runtime_error("Unknown word encountered in frozen dictionary: " + word); 47 | } 48 | } 49 | words_.push_back(word); 50 | return d_[word] = words_.size() - 1; 51 | } else { 52 | return i->second; 53 | } 54 | } 55 | 56 | inline const std::string& Convert(const int& id) const { 57 | assert(id < (int)words_.size()); 58 | return words_[id]; 59 | } 60 | 61 | void SetUnk(const std::string& word) { 62 | if (!frozen) 63 | throw std::runtime_error("Please call SetUnk() only after dictionary is frozen"); 64 | if (map_unk) 65 | throw std::runtime_error("Set UNK more than one time"); 66 | 67 | // temporarily unfrozen the dictionary to allow the add of the UNK 68 | frozen = false; 69 | unk_id = Convert(word); 70 | frozen = true; 71 | 72 | map_unk = true; 73 | } 74 | 75 | void clear() { words_.clear(); d_.clear(); } 76 | 77 | private: 78 | bool frozen; 79 | bool map_unk; // if true, map unknown word to unk_id 80 | int unk_id; 81 | std::vector words_; 82 | Map d_; 83 | 84 | friend class boost::serialization::access; 85 | #if BOOST_VERSION >= 105600 86 | template void serialize(Archive& ar, const unsigned int) { 87 | ar & frozen; 88 | ar & map_unk; 89 | ar & unk_id; 90 | ar & words_; 91 | ar & d_; 92 | } 93 | #else 94 | template void serialize(Archive& ar, const unsigned int) { 95 | throw std::invalid_argument("Serializing dictionaries is only supported on versions of boost 1.56 or higher"); 96 | } 97 | #endif 98 | }; 99 | 100 | std::vector ReadSentence(const std::string& line, Dict* sd); 101 | void ReadSentencePair(const std::string& line, std::vector* s, Dict* sd, std::vector* t, Dict* td); 102 | 103 | } // namespace cnn 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /EVALB/sample/sample.rsl: -------------------------------------------------------------------------------- 1 | Sent. Matched Bracket Cross Correct Tag 2 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy 3 | ============================================================================ 4 | 1 4 0 100.00 100.00 4 4 4 0 4 4 100.00 5 | 2 4 0 75.00 75.00 3 4 4 0 4 4 100.00 6 | 3 4 0 100.00 100.00 4 4 4 0 4 3 75.00 7 | 4 4 0 75.00 75.00 3 4 4 0 4 3 75.00 8 | 5 4 0 75.00 75.00 3 4 4 0 4 4 100.00 9 | 6 4 0 50.00 66.67 2 4 3 1 4 4 100.00 10 | 7 4 0 25.00 100.00 1 4 1 0 4 4 100.00 11 | 8 4 0 0.00 0.00 0 4 0 0 4 4 100.00 12 | 9 4 0 100.00 80.00 4 4 5 0 4 4 100.00 13 | 10 4 0 100.00 50.00 4 4 8 0 4 4 100.00 14 | 11 4 2 0.00 0.00 0 0 0 0 4 0 0.00 15 | 12 4 1 0.00 0.00 0 0 0 0 4 0 0.00 16 | 13 4 1 0.00 0.00 0 0 0 0 4 0 0.00 17 | 14 4 2 0.00 0.00 0 0 0 0 4 0 0.00 18 | 15 4 0 100.00 100.00 4 4 4 0 4 4 100.00 19 | 16 4 1 0.00 0.00 0 0 0 0 4 0 0.00 20 | 17 4 1 0.00 0.00 0 0 0 0 4 0 0.00 21 | 18 4 0 100.00 100.00 4 4 4 0 4 4 100.00 22 | 19 4 0 100.00 100.00 4 4 4 0 4 4 100.00 23 | 20 4 1 0.00 0.00 0 0 0 0 4 0 0.00 24 | 21 4 0 100.00 100.00 4 4 4 0 4 4 100.00 25 | 22 44 0 100.00 100.00 34 34 34 0 44 44 100.00 26 | 23 4 0 100.00 100.00 4 4 4 0 4 4 100.00 27 | 24 5 0 100.00 100.00 4 4 4 0 4 4 100.00 28 | ============================================================================ 29 | 87.76 90.53 86 98 95 16 108 106 98.15 30 | === Summary === 31 | 32 | -- All -- 33 | Number of sentence = 24 34 | Number of Error sentence = 5 35 | Number of Skip sentence = 2 36 | Number of Valid sentence = 17 37 | Bracketing Recall = 87.76 38 | Bracketing Precision = 90.53 39 | Complete match = 52.94 40 | Average crossing = 0.06 41 | No crossing = 94.12 42 | 2 or less crossing = 100.00 43 | Tagging accuracy = 98.15 44 | 45 | -- len<=40 -- 46 | Number of sentence = 23 47 | Number of Error sentence = 5 48 | Number of Skip sentence = 2 49 | Number of Valid sentence = 16 50 | Bracketing Recall = 81.25 51 | Bracketing Precision = 85.25 52 | Complete match = 50.00 53 | Average crossing = 0.06 54 | No crossing = 93.75 55 | 2 or less crossing = 100.00 56 | Tagging accuracy = 96.88 57 | -------------------------------------------------------------------------------- /cnn/init.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/init.h" 2 | #include "cnn/aligned-mem-pool.h" 3 | #include "cnn/cnn.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #if HAVE_CUDA 10 | #include "cnn/cuda.h" 11 | #include 12 | #endif 13 | 14 | using namespace std; 15 | 16 | namespace cnn { 17 | 18 | // these should maybe live in a file called globals.cc or something 19 | AlignedMemoryPool* fxs = nullptr; 20 | AlignedMemoryPool* dEdfs = nullptr; 21 | AlignedMemoryPool* ps = nullptr; 22 | mt19937* rndeng = nullptr; 23 | std::vector devices; 24 | Device* default_device = nullptr; 25 | 26 | static void RemoveArgs(int& argc, char**& argv, int& argi, int n) { 27 | for (int i = argi + n; i < argc; ++i) 28 | argv[i - n] = argv[i]; 29 | argc -= n; 30 | assert(argc >= 0); 31 | } 32 | 33 | void Initialize(int& argc, char**& argv, unsigned random_seed, bool shared_parameters) { 34 | vector gpudevices; 35 | #if HAVE_CUDA 36 | cerr << "[cnn] initializing CUDA\n"; 37 | gpudevices = Initialize_GPU(argc, argv); 38 | #endif 39 | unsigned long num_mb = 512UL; 40 | int argi = 1; 41 | while(argi < argc) { 42 | string arg = argv[argi]; 43 | if (arg == "--cnn-mem" || arg == "--cnn_mem") { 44 | if ((argi + 1) > argc) { 45 | cerr << "[cnn] --cnn-mem expects an argument (the memory, in megabytes, to reserve)\n"; 46 | abort(); 47 | } else { 48 | string a2 = argv[argi+1]; 49 | istringstream c(a2); c >> num_mb; 50 | RemoveArgs(argc, argv, argi, 2); 51 | } 52 | } else if (arg == "--cnn-seed" || arg == "--cnn_seed") { 53 | if ((argi + 1) > argc) { 54 | cerr << "[cnn] --cnn-seed expects an argument (the random number seed)\n"; 55 | abort(); 56 | } else { 57 | string a2 = argv[argi+1]; 58 | istringstream c(a2); c >> random_seed; 59 | RemoveArgs(argc, argv, argi, 2); 60 | } 61 | } else if (arg.find("--cnn") == 0) { 62 | cerr << "[cnn] Bad command line argument: " << arg << endl; 63 | abort(); 64 | } else { break; } 65 | } 66 | if (random_seed == 0) { 67 | random_device rd; 68 | random_seed = rd(); 69 | } 70 | cerr << "[cnn] random seed: " << random_seed << endl; 71 | rndeng = new mt19937(random_seed); 72 | 73 | cerr << "[cnn] allocating memory: " << num_mb << "MB\n"; 74 | devices.push_back(new Device_CPU(num_mb, shared_parameters)); 75 | int default_index = 0; 76 | if (gpudevices.size() > 0) { 77 | for (auto gpu : gpudevices) 78 | devices.push_back(gpu); 79 | default_index++; 80 | } 81 | default_device = devices[default_index]; 82 | 83 | // TODO these should be accessed through the relevant device and removed here 84 | fxs = default_device->fxs; 85 | dEdfs = default_device->dEdfs; 86 | ps = default_device->ps; 87 | kSCALAR_MINUSONE = default_device->kSCALAR_MINUSONE; 88 | kSCALAR_ONE = default_device->kSCALAR_ONE; 89 | kSCALAR_ZERO = default_device->kSCALAR_ZERO; 90 | cerr << "[cnn] memory allocation done.\n"; 91 | } 92 | 93 | void Cleanup() { 94 | delete rndeng; 95 | delete fxs; 96 | delete dEdfs; 97 | delete ps; 98 | } 99 | 100 | } // namespace cnn 101 | 102 | -------------------------------------------------------------------------------- /EVALB/new.prm: -------------------------------------------------------------------------------- 1 | ##------------------------------------------## 2 | ## Debug mode ## 3 | ## 0: No debugging ## 4 | ## 1: print data for individual sentence ## 5 | ## 2: print detailed bracketing info ## 6 | ##------------------------------------------## 7 | DEBUG 0 8 | 9 | ##------------------------------------------## 10 | ## MAX error ## 11 | ## Number of error to stop the process. ## 12 | ## This is useful if there could be ## 13 | ## tokanization error. ## 14 | ## The process will stop when this number## 15 | ## of errors are accumulated. ## 16 | ##------------------------------------------## 17 | MAX_ERROR 10 18 | 19 | ##------------------------------------------## 20 | ## Cut-off length for statistics ## 21 | ## At the end of evaluation, the ## 22 | ## statistics for the senetnces of length## 23 | ## less than or equal to this number will## 24 | ## be shown, on top of the statistics ## 25 | ## for all the sentences ## 26 | ##------------------------------------------## 27 | CUTOFF_LEN 40 28 | 29 | ##------------------------------------------## 30 | ## unlabeled or labeled bracketing ## 31 | ## 0: unlabeled bracketing ## 32 | ## 1: labeled bracketing ## 33 | ##------------------------------------------## 34 | LABELED 1 35 | 36 | ##------------------------------------------## 37 | ## Delete labels ## 38 | ## list of labels to be ignored. ## 39 | ## If it is a pre-terminal label, delete ## 40 | ## the word along with the brackets. ## 41 | ## If it is a non-terminal label, just ## 42 | ## delete the brackets (don't delete ## 43 | ## deildrens). ## 44 | ##------------------------------------------## 45 | DELETE_LABEL TOP 46 | DELETE_LABEL S1 47 | DELETE_LABEL -NONE- 48 | DELETE_LABEL , 49 | DELETE_LABEL : 50 | DELETE_LABEL `` 51 | DELETE_LABEL '' 52 | DELETE_LABEL . 53 | DELETE_LABEL ? 54 | DELETE_LABEL ! 55 | 56 | ##------------------------------------------## 57 | ## Delete labels for length calculation ## 58 | ## list of labels to be ignored for ## 59 | ## length calculation purpose ## 60 | ##------------------------------------------## 61 | DELETE_LABEL_FOR_LENGTH -NONE- 62 | 63 | ##------------------------------------------## 64 | ## Labels to be considered for misquote ## 65 | ## (could be possesive or quote) ## 66 | ##------------------------------------------## 67 | QUOTE_LABEL `` 68 | QUOTE_LABEL '' 69 | QUOTE_LABEL POS 70 | 71 | ##------------------------------------------## 72 | ## These ones are less common, but ## 73 | ## are on occasion output by parsers: ## 74 | ##------------------------------------------## 75 | QUOTE_LABEL NN 76 | QUOTE_LABEL CD 77 | QUOTE_LABEL VBZ 78 | QUOTE_LABEL : 79 | 80 | ##------------------------------------------## 81 | ## Equivalent labels, words ## 82 | ## the pairs are considered equivalent ## 83 | ## This is non-directional. ## 84 | ##------------------------------------------## 85 | EQ_LABEL ADVP PRT 86 | 87 | # EQ_WORD Example example 88 | -------------------------------------------------------------------------------- /cnn/grad-check.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/grad-check.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "cnn/model.h" 7 | #include "cnn/cnn.h" 8 | #include "cnn/tensor.h" 9 | 10 | using namespace std; 11 | 12 | namespace cnn { 13 | 14 | bool CheckGrad(Model& m, ComputationGraph& g, int verbosity) { 15 | // Clear the parameters first 16 | const vector& params = m.parameters_list(); 17 | const vector& lookup_params = m.lookup_parameters_list(); 18 | for (auto pp : params) 19 | pp->clear(); 20 | for (auto pp : lookup_params) 21 | pp->clear(); 22 | 23 | // Perform forward and backward steps 24 | float alpha = 5e-4; 25 | g.forward(); 26 | g.backward(); 27 | 28 | // Check 29 | bool flag = false, curr_flag = false; 30 | for (auto pp : params) { 31 | if(verbosity > 1) 32 | cerr << endl << "PARAMETERS " << pp << endl; 33 | Parameters& p = *pp; 34 | size_t ts = p.dim.size(); 35 | for (size_t i = 0; i < ts; ++i) { 36 | float old = TensorTools::AccessElement(p.values, i); 37 | TensorTools::SetElement(p.values, i, old - alpha); 38 | float E_left = as_scalar(g.forward()); 39 | TensorTools::SetElement(p.values, i, old + alpha); 40 | float E_right = as_scalar(g.forward()); 41 | TensorTools::SetElement(p.values, i, old); 42 | float g = (E_right - E_left) / (2 * alpha); 43 | float g_act = TensorTools::AccessElement(p.g, i); 44 | float f = fabs(g - g_act); 45 | float m = max(fabs(g), fabs(g_act)); 46 | if (f > 0.1 && m > 0.f) f /= m; 47 | if (f > 0.1 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } } 48 | if(verbosity + (curr_flag ? 1 : 0) > 1) { 49 | cerr << g_act << ' ' << g << endl; 50 | curr_flag = false; 51 | } 52 | } 53 | } 54 | 55 | for (auto pp : lookup_params) { 56 | if(verbosity > 1) 57 | cerr << endl << "LOOKUP PARAMETERS " << pp << endl; 58 | LookupParameters& p = *pp; 59 | size_t ts = p.dim.size(); 60 | for (unsigned j : p.non_zero_grads) { 61 | if(verbosity > 1) 62 | cerr << "OBJECT=" << j << endl; 63 | Tensor& v = p.values[j]; 64 | Tensor& ag = p.grads[j]; 65 | for (size_t i = 0; i < ts; ++i) { 66 | float old = TensorTools::AccessElement(v, i); 67 | TensorTools::SetElement(v, i, old - alpha); 68 | float E_left = as_scalar(g.forward()); 69 | TensorTools::SetElement(v, i, old + alpha); 70 | float E_right = as_scalar(g.forward()); 71 | TensorTools::SetElement(v, i, old); 72 | float g = (E_right - E_left) / (2 * alpha); 73 | float g_act = TensorTools::AccessElement(ag, i); 74 | float f = fabs(g - g_act); 75 | float m = max(fabs(g), fabs(g_act)); 76 | if (f > 0.1 && m > 0.f) f /= m; 77 | if (f > 0.1 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } } 78 | if(verbosity + (curr_flag ? 1 : 0) > 1) { 79 | cerr << g_act << ' ' << g << endl; 80 | curr_flag = false; 81 | } 82 | } 83 | } 84 | } 85 | 86 | if (flag) { 87 | if (verbosity > 1) 88 | cerr << endl << "*** GRADIENT CHECK FAILED ***" << endl; 89 | } else { 90 | if (verbosity > 0) 91 | cerr << endl << "GRADIENT CHECK PASSED" << endl; 92 | } 93 | return !flag; 94 | } 95 | 96 | } 97 | 98 | -------------------------------------------------------------------------------- /cnn/conv.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_CONV_H_ 2 | #define CNN_CONV_H_ 3 | 4 | #include "cnn/cnn.h" 5 | 6 | namespace cnn { 7 | 8 | struct AddVectorToAllColumns : public Node { 9 | explicit AddVectorToAllColumns(const std::initializer_list& a) : Node(a) {} 10 | std::string as_string(const std::vector& arg_names) const override; 11 | Dim dim_forward(const std::vector& xs) const override; 12 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 13 | void backward_impl(const std::vector& xs, 14 | const Tensor& fx, 15 | const Tensor& dEdf, 16 | unsigned i, 17 | Tensor& dEdxi) const override; 18 | }; 19 | 20 | struct KMaxPooling : public Node { 21 | explicit KMaxPooling(const std::initializer_list& a, unsigned k = 1) : Node(a), k(k) {} 22 | std::string as_string(const std::vector& arg_names) const override; 23 | Dim dim_forward(const std::vector& xs) const override; 24 | size_t aux_storage_size() const override; 25 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 26 | void backward_impl(const std::vector& xs, 27 | const Tensor& fx, 28 | const Tensor& dEdf, 29 | unsigned i, 30 | Tensor& dEdxi) const override; 31 | unsigned k; 32 | }; 33 | 34 | struct FoldRows : public Node { 35 | explicit FoldRows(const std::initializer_list& a, unsigned nrows) : Node(a), nrows(nrows) {} 36 | std::string as_string(const std::vector& arg_names) const override; 37 | Dim dim_forward(const std::vector& xs) const override; 38 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 39 | void backward_impl(const std::vector& xs, 40 | const Tensor& fx, 41 | const Tensor& dEdf, 42 | unsigned i, 43 | Tensor& dEdxi) const override; 44 | unsigned nrows; 45 | }; 46 | 47 | // y = x_1 *conv x_2 48 | // x_1 \in R^{d x s} (input) 49 | // x_2 \in R^{d x m} (filter) 50 | struct Conv1DNarrow : public Node { 51 | explicit Conv1DNarrow(const std::initializer_list& a) : Node(a) {} 52 | std::string as_string(const std::vector& arg_names) const override; 53 | Dim dim_forward(const std::vector& xs) const override; 54 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 55 | void backward_impl(const std::vector& xs, 56 | const Tensor& fx, 57 | const Tensor& dEdf, 58 | unsigned i, 59 | Tensor& dEdxi) const override; 60 | }; 61 | 62 | // y = x_1 *conv x_2 63 | // x_1 \in R^{d x s} (input) 64 | // x_2 \in R^{d x m} (filter) 65 | struct Conv1DWide : public Node { 66 | explicit Conv1DWide(const std::initializer_list& a) : Node(a) {} 67 | std::string as_string(const std::vector& arg_names) const override; 68 | Dim dim_forward(const std::vector& xs) const override; 69 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 70 | void backward_impl(const std::vector& xs, 71 | const Tensor& fx, 72 | const Tensor& dEdf, 73 | unsigned i, 74 | Tensor& dEdxi) const override; 75 | }; 76 | 77 | } // namespace cnn 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /cmake/FindEigen3.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Eigen3 lib 2 | # 3 | # This module supports requiring a minimum version, e.g. you can do 4 | # find_package(Eigen3 3.1.2) 5 | # to require version 3.1.2 or newer of Eigen3. 6 | # 7 | # Once done this will define 8 | # 9 | # EIGEN3_FOUND - system has eigen lib with correct version 10 | # EIGEN3_INCLUDE_DIR - the eigen include directory 11 | # EIGEN3_VERSION - eigen version 12 | # 13 | # This module reads hints about search locations from 14 | # the following enviroment variables: 15 | # 16 | # EIGEN3_ROOT 17 | # EIGEN3_ROOT_DIR 18 | 19 | # Copyright (c) 2006, 2007 Montel Laurent, 20 | # Copyright (c) 2008, 2009 Gael Guennebaud, 21 | # Copyright (c) 2009 Benoit Jacob 22 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license. 23 | 24 | if(NOT Eigen3_FIND_VERSION) 25 | if(NOT Eigen3_FIND_VERSION_MAJOR) 26 | set(Eigen3_FIND_VERSION_MAJOR 2) 27 | endif(NOT Eigen3_FIND_VERSION_MAJOR) 28 | if(NOT Eigen3_FIND_VERSION_MINOR) 29 | set(Eigen3_FIND_VERSION_MINOR 91) 30 | endif(NOT Eigen3_FIND_VERSION_MINOR) 31 | if(NOT Eigen3_FIND_VERSION_PATCH) 32 | set(Eigen3_FIND_VERSION_PATCH 0) 33 | endif(NOT Eigen3_FIND_VERSION_PATCH) 34 | 35 | set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") 36 | endif(NOT Eigen3_FIND_VERSION) 37 | 38 | macro(_eigen3_check_version) 39 | file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) 40 | 41 | string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") 42 | set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") 43 | string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") 44 | set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") 45 | string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") 46 | set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") 47 | 48 | set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) 49 | if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 50 | set(EIGEN3_VERSION_OK FALSE) 51 | else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 52 | set(EIGEN3_VERSION_OK TRUE) 53 | endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 54 | 55 | if(NOT EIGEN3_VERSION_OK) 56 | 57 | message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " 58 | "but at least version ${Eigen3_FIND_VERSION} is required") 59 | endif(NOT EIGEN3_VERSION_OK) 60 | endmacro(_eigen3_check_version) 61 | 62 | if (EIGEN3_INCLUDE_DIR) 63 | 64 | # in cache already 65 | _eigen3_check_version() 66 | set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) 67 | 68 | else (EIGEN3_INCLUDE_DIR) 69 | 70 | find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library 71 | HINTS 72 | ENV EIGEN3_ROOT 73 | ENV EIGEN3_ROOT_DIR 74 | PATHS 75 | ${CMAKE_INSTALL_PREFIX}/include 76 | ${KDE4_INCLUDE_DIR} 77 | PATH_SUFFIXES eigen3 eigen 78 | ) 79 | 80 | if(EIGEN3_INCLUDE_DIR) 81 | _eigen3_check_version() 82 | endif(EIGEN3_INCLUDE_DIR) 83 | 84 | include(FindPackageHandleStandardArgs) 85 | find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) 86 | 87 | mark_as_advanced(EIGEN3_INCLUDE_DIR) 88 | 89 | endif(EIGEN3_INCLUDE_DIR) 90 | 91 | -------------------------------------------------------------------------------- /cnn/dim.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_DIM_H 2 | #define CNN_DIM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define CNN_MAX_TENSOR_DIM 7 13 | 14 | namespace boost { namespace serialization { class access; } } 15 | 16 | namespace cnn { 17 | 18 | struct Dim { 19 | Dim() : nd(), bd(1) {} 20 | // explicit Dim(unsigned int m) : nd(1), bd(1) { d[0] = m; } 21 | // TODO: The constructors for dimensions w/ and w/o batches is not intuitive. 22 | // can this be fixed in some way? 23 | // Dim(unsigned int m, unsigned int n) : nd(2), bd(1) { d[0] = m; d[1] = n; } 24 | Dim(std::initializer_list x) : nd(), bd(1) { 25 | for(auto v : x) d[nd++] = v; 26 | } 27 | Dim(std::initializer_list x, unsigned int b) : nd(), bd(b) { 28 | for(auto v : x) d[nd++] = v; 29 | } 30 | Dim(const std::vector & x) : nd(), bd(1) { 31 | for(auto v : x) d[nd++] = v; 32 | } 33 | Dim(const std::vector & x, unsigned int b) : nd(), bd(b) { 34 | for(auto v : x) d[nd++] = v; 35 | } 36 | inline unsigned int size() const { 37 | return batch_size() * bd; 38 | } 39 | inline unsigned int batch_size() const { 40 | unsigned int p = 1; 41 | for (unsigned int i = 0; i < nd; ++i) p *= d[i]; 42 | return p; 43 | } 44 | inline unsigned int sum_dims() const { 45 | unsigned int p = 0; 46 | for (unsigned int i = 0; i < nd; ++i) p += d[i]; 47 | return p; 48 | } 49 | inline Dim truncate() const { 50 | Dim r = *this; 51 | unsigned int m = 1; 52 | unsigned int s = size(); 53 | for (unsigned int i = 1; i < s; ++i) 54 | if (size(i) > 1) m = i + 1; 55 | r.resize(m); 56 | return r; 57 | } 58 | inline Dim single_batch() const { 59 | Dim r = *this; 60 | r.bd = 1; 61 | return r; 62 | } 63 | inline void resize(unsigned int i) { nd = i; } 64 | inline unsigned int ndims() const { return nd; } 65 | inline unsigned int rows() const { return d[0]; } 66 | inline unsigned int cols() const { return nd > 1 ? d[1] : 1; } 67 | inline unsigned int batch_elems() const { return bd; } 68 | inline void set(unsigned int i, unsigned int s) { assert(i < nd); assert(s > 0); d[i] = s; } 69 | inline unsigned int operator[](unsigned int i) const { return i < nd ? d[i] : 1; } 70 | inline unsigned int size(unsigned int i) const { return (*this)[i]; } 71 | inline Dim transpose() const { 72 | if (nd == 1) { return Dim({1, d[0]}, bd); } 73 | else if (nd == 2) { return Dim({d[1], d[0]}, bd); } 74 | throw std::invalid_argument("Cannot transpose Dim object with more than 2 dimensions"); 75 | } 76 | unsigned int d[CNN_MAX_TENSOR_DIM]; 77 | unsigned int nd; 78 | unsigned int bd; 79 | private: 80 | friend class boost::serialization::access; 81 | template void serialize(Archive& ar, const unsigned int) { 82 | ar & nd; 83 | ar & d; 84 | } 85 | }; 86 | 87 | //static_assert(std::is_trivially_copyable::value, "Dim must be trivially copyable"); 88 | 89 | inline bool operator==(const Dim& a, const Dim& b) { 90 | if (a.nd != b.nd || a.bd != b.bd) return false; 91 | return std::memcmp(a.d, b.d, a.nd) == 0; 92 | } 93 | 94 | inline bool operator!=(const Dim& a, const Dim& b) { return !(a == b); } 95 | 96 | std::ostream& operator<<(std::ostream& os, const Dim& d); 97 | std::ostream& operator<<(std::ostream& os, const std::vector& ds); 98 | 99 | } // namespace cnn 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /cnn/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ########## cnn library ########## 2 | # Sources: 3 | set(cnn_library_SRCS 4 | cfsm-builder.cc 5 | cnn.cc 6 | conv.cc 7 | deep-lstm.cc 8 | devices.cc 9 | dict.cc 10 | dim.cc 11 | exec.cc 12 | expr.cc 13 | fast-lstm.cc 14 | grad-check.cc 15 | graph.cc 16 | gru.cc 17 | hsm-builder.cc 18 | init.cc 19 | lstm.cc 20 | mem.cc 21 | model.cc 22 | mp.cc 23 | nodes.cc 24 | nodes-common.cc 25 | param-nodes.cc 26 | rnn.cc 27 | rnn-state-machine.cc 28 | saxe-init.cc 29 | shadow-params.cc 30 | tensor.cc 31 | training.cc 32 | ) 33 | 34 | # Headers: 35 | set(cnn_library_HDRS 36 | aligned-mem-pool.h 37 | cfsm-builder.h 38 | c2w.h 39 | cnn.h 40 | conv.h 41 | cuda.h 42 | devices.h 43 | dict.h 44 | dim.h 45 | exec.h 46 | expr.h 47 | fast-lstm.h 48 | functors.h 49 | gpu-kernels.h 50 | gpu-ops.h 51 | graph.h 52 | gru.h 53 | hsm-builder.h 54 | init.h 55 | lstm.h 56 | mem.h 57 | model.h 58 | mp.h 59 | nodes.h 60 | param-nodes.h 61 | random.h 62 | rnn-state-machine.h 63 | rnn.h 64 | saxe-init.h 65 | shadow-params.h 66 | simd-functors.h 67 | tensor.h 68 | timing.h 69 | training.h 70 | ) 71 | 72 | if(WITH_CUDA_BACKEND) 73 | list(APPEND cnn_library_SRCS 74 | cuda.cc) 75 | endif(WITH_CUDA_BACKEND) 76 | 77 | file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc) 78 | 79 | #foreach(test_src ${TEST_SRCS}) 80 | #Extract the filename without an extension (NAME_WE) 81 | # get_filename_component(testName ${test_src} NAME_WE) 82 | 83 | #Add compile target 84 | # add_executable(${testName} ${test_src}) 85 | 86 | #link to Boost libraries AND your targets and dependencies 87 | # target_link_libraries(${testName} cnn ${LIBS}) 88 | 89 | # set_target_properties(${testName} PROPERTIES 90 | # RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin) 91 | 92 | #Finally add it to test execution - 93 | #Notice the WORKING_DIRECTORY and COMMAND 94 | # add_test(NAME ${testName} 95 | # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin 96 | # COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin/${testName} ) 97 | #endforeach(test_src) 98 | 99 | # actual target: 100 | add_library(cnn STATIC ${cnn_library_SRCS} ${cnn_library_HDRS}) 101 | target_link_libraries(cnn ${LIBS}) 102 | if(WITH_CUDA_BACKEND) 103 | add_library(gcnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS}) 104 | target_link_libraries(gcnn_shared ${LIBS}) 105 | else() 106 | add_library(cnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS}) 107 | target_link_libraries(cnn_shared ${LIBS}) 108 | endif(WITH_CUDA_BACKEND) 109 | #add_library(cnn ${cnn_library_SRCS} ${cnn_library_HDRS} ${LIBS}) 110 | if(WITH_CUDA_BACKEND) 111 | set(CUDA_SEPARABLE_COMPILATION ON) 112 | list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-O2;-DVERBOSE;-Xcompiler;-fpic") 113 | SET(CUDA_PROPAGATE_HOST_FLAGS OFF) 114 | cuda_add_library(cnncuda STATIC gpu-ops.cu) 115 | cuda_add_library(cnncuda_shared SHARED gpu-ops.cu) 116 | endif(WITH_CUDA_BACKEND) 117 | 118 | install(FILES ${cnn_library_HDRS} DESTINATION include/cnn) 119 | install(TARGETS cnn DESTINATION lib) 120 | 121 | # target_compile_features(cnn PRIVATE cxx_range_for) 122 | 123 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cnn) 2 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 3 | 4 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 5 | 6 | # CNN uses Eigen which exploits modern CPU architectures. To get the 7 | # best possible performance, the following are recommended: 8 | # 1. use very recent versions of gcc or Clang to build 9 | # 2. use very recent versions of Eigen (ideally the dev version) 10 | # 3. try compiler options like -march=native or other architecture 11 | # flags (the compiler does not always make the best configuration 12 | # decisions without help) 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++11 -Ofast -g -DEIGEN_FAST_MATH -march=native") 14 | 15 | enable_testing() 16 | 17 | include_directories(${CMAKE_CURRENT_SOURCE_DIR} 18 | ${PROJECT_SOURCE_DIR}/external/easyloggingpp/src) 19 | 20 | function(find_cudnn) 21 | set(CUDNN_ROOT "" CACHE PATH "CUDNN root path") 22 | find_path(CUDNN_INCLUDE_DIRS cudnn.h 23 | PATHS ${CUDNN_ROOT} 24 | ${CUDNN_ROOT}/include 25 | DOC "CUDNN include path") 26 | find_library(CUDNN_LIBRARIES NAMES libcudnn.so 27 | PATHS ${CUDNN_ROOT} 28 | ${CUDNN_ROOT}/lib 29 | ${CUDNN_ROOT}/lib64 30 | DOC "CUDNN library path") 31 | if(CUDNN_INCLUDE_DIRS AND CUDNN_LIBRARIES) 32 | set(CUDNN_FOUND TRUE PARENT_SCOPE) 33 | message(STATUS "Found CUDNN (include: ${CUDNN_INCLUDE_DIRS}, library: ${CUDNN_LIBRARIES})") 34 | mark_as_advanced(CUDNN_INCLUDE_DIRS CUDNN_LIBRARIES) 35 | else() 36 | MESSAGE(FATAL_ERROR "Failed to find CUDNN in path: ${CUDNN_ROOT} (Did you set CUDNN_ROOT properly?)") 37 | endif() 38 | endfunction() 39 | 40 | # look for Boost 41 | if(DEFINED ENV{BOOST_ROOT}) 42 | set(Boost_NO_SYSTEM_PATHS ON) 43 | endif() 44 | set(Boost_REALPATH ON) 45 | find_package(Boost COMPONENTS program_options regex serialization REQUIRED) 46 | include_directories(${Boost_INCLUDE_DIR}) 47 | set(LIBS ${LIBS} ${Boost_LIBRARIES}) 48 | # trouble shooting: 49 | # if boost library cannot be found, in addition to install boost library 50 | # check if environment variables are set 51 | # 52 | # to set boost root and its library root in environment variable, use 53 | # for example 54 | # echo "export BOOST_LIBRARYDIR=/usr/local/lib" >> ~/.bashrc 55 | # echo "export BOOST_ROOT=/cygdrive/d/tools/boost_1_58_0/boost_1_58_0" >> ~/.bashrc 56 | # then run source ~/.bashrc to have those environment variable effective immediately 57 | 58 | if(BACKEND) 59 | message("-- BACKEND: ${BACKEND}") 60 | else() 61 | message("-- BACKEND not specified, defaulting to eigen.") 62 | set(BACKEND "eigen") 63 | endif() 64 | 65 | if(BACKEND MATCHES "^eigen$") 66 | set(WITH_EIGEN_BACKEND 1) 67 | elseif(BACKEND MATCHES "^cuda$") 68 | set(WITH_CUDA_BACKEND 1) 69 | else() 70 | message(SEND_ERROR "BACKEND must be eigen or cuda") 71 | endif() 72 | 73 | if (WITH_CUDA_BACKEND) 74 | find_package(CUDA REQUIRED) 75 | set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT}) 76 | include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) 77 | add_definitions(-DHAVE_CUDA) 78 | #list(APPEND CUDA_LIBRARIES /usr/lib64/libpthread.so) 79 | MESSAGE("CUDA_LIBRARIES: ${CUDA_LIBRARIES}") 80 | list(REMOVE_ITEM CUDA_LIBRARIES -lpthread) 81 | set(LIBS ${LIBS} ${CUDA_LIBRARIES}) 82 | #find_cudnn() 83 | #include_directories(SYSTEM ${CUDNN_INCLUDE_DIRS}) 84 | endif() 85 | 86 | # look for Eigen 87 | find_package(Eigen3 REQUIRED) 88 | include_directories(${EIGEN3_INCLUDE_DIR}) 89 | 90 | FIND_PACKAGE(Threads REQUIRED) 91 | set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) 92 | 93 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h) 94 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 95 | 96 | add_subdirectory(cnn) 97 | add_subdirectory(impl) 98 | enable_testing() 99 | -------------------------------------------------------------------------------- /cnn/rnn.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/rnn.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | #include "cnn/expr.h" 10 | 11 | using namespace std; 12 | using namespace cnn::expr; 13 | using namespace cnn; 14 | 15 | namespace cnn { 16 | 17 | enum { X2H=0, H2H, HB, L2H }; 18 | 19 | RNNBuilder::~RNNBuilder() {} 20 | 21 | SimpleRNNBuilder::SimpleRNNBuilder(unsigned layers, 22 | unsigned input_dim, 23 | unsigned hidden_dim, 24 | Model* model, 25 | bool support_lags) : layers(layers), lagging(support_lags) { 26 | unsigned layer_input_dim = input_dim; 27 | for (unsigned i = 0; i < layers; ++i) { 28 | Parameters* p_x2h = model->add_parameters({hidden_dim, layer_input_dim}); 29 | Parameters* p_h2h = model->add_parameters({hidden_dim, hidden_dim}); 30 | Parameters* p_hb = model->add_parameters({hidden_dim}); 31 | vector ps = {p_x2h, p_h2h, p_hb}; 32 | if (lagging) 33 | ps.push_back(model->add_parameters({hidden_dim, hidden_dim})); 34 | params.push_back(ps); 35 | layer_input_dim = hidden_dim; 36 | } 37 | } 38 | 39 | void SimpleRNNBuilder::new_graph_impl(ComputationGraph& cg) { 40 | param_vars.clear(); 41 | for (unsigned i = 0; i < layers; ++i) { 42 | Parameters* p_x2h = params[i][X2H]; 43 | Parameters* p_h2h = params[i][H2H]; 44 | Parameters* p_hb = params[i][HB]; 45 | Expression i_x2h = parameter(cg,p_x2h); 46 | Expression i_h2h = parameter(cg,p_h2h); 47 | Expression i_hb = parameter(cg,p_hb); 48 | vector vars = {i_x2h, i_h2h, i_hb}; 49 | 50 | if (lagging) { 51 | Parameters* p_l2h = params[i][L2H]; 52 | Expression i_l2h = parameter(cg,p_l2h); 53 | vars.push_back(i_l2h); 54 | } 55 | 56 | param_vars.push_back(vars); 57 | } 58 | } 59 | 60 | void SimpleRNNBuilder::start_new_sequence_impl(const vector& h_0) { 61 | h.clear(); 62 | h0 = h_0; 63 | if (h0.size()) { assert(h0.size() == layers); } 64 | } 65 | 66 | Expression SimpleRNNBuilder::add_input_impl(int prev, const Expression &in) { 67 | const unsigned t = h.size(); 68 | h.push_back(vector(layers)); 69 | 70 | Expression x = in; 71 | 72 | for (unsigned i = 0; i < layers; ++i) { 73 | const vector& vars = param_vars[i]; 74 | 75 | // y <--- f(x) 76 | Expression y = affine_transform({vars[2], vars[0], x}); 77 | 78 | // y <--- g(y_prev) 79 | if (prev == -1 && h0.size() > 0) 80 | y = affine_transform({y, vars[1], h0[i]}); 81 | else if (prev >= 0) 82 | y = affine_transform({y, vars[1], h[prev][i]}); 83 | 84 | // x <--- tanh(y) 85 | x = h[t][i] = tanh(y); 86 | } 87 | return h[t].back(); 88 | } 89 | 90 | Expression SimpleRNNBuilder::add_auxiliary_input(const Expression &in, const Expression &aux) { 91 | const unsigned t = h.size(); 92 | h.push_back(vector(layers)); 93 | 94 | Expression x = in; 95 | 96 | for (unsigned i = 0; i < layers; ++i) { 97 | const vector& vars = param_vars[i]; 98 | assert(vars.size() >= L2H + 1); 99 | 100 | Expression y = affine_transform({vars[HB], vars[X2H], x, vars[L2H], aux}); 101 | 102 | if (t == 0 && h0.size() > 0) 103 | y = affine_transform({y, vars[H2H], h0[i]}); 104 | else if (t >= 1) 105 | y = affine_transform({y, vars[H2H], h[t-1][i]}); 106 | 107 | x = h[t][i] = tanh(y); 108 | } 109 | return h[t].back(); 110 | } 111 | 112 | void SimpleRNNBuilder::copy(const RNNBuilder & rnn) { 113 | const SimpleRNNBuilder & rnn_simple = (const SimpleRNNBuilder&)rnn; 114 | assert(params.size() == rnn_simple.params.size()); 115 | for(size_t i = 0; i < rnn_simple.params.size(); ++i) { 116 | params[i][0]->copy(*rnn_simple.params[i][0]); 117 | params[i][1]->copy(*rnn_simple.params[i][1]); 118 | params[i][2]->copy(*rnn_simple.params[i][2]); 119 | } 120 | } 121 | 122 | } // namespace cnn 123 | -------------------------------------------------------------------------------- /cnn/training.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_TRAINING_H_ 2 | #define CNN_TRAINING_H_ 3 | 4 | #include 5 | #include "cnn/model.h" 6 | #include "cnn/shadow-params.h" 7 | 8 | namespace cnn { 9 | 10 | struct Trainer { 11 | explicit Trainer(Model* m, real lam, real e0) : 12 | eta0(e0), eta(e0), eta_decay(), epoch(), lambda(lam), clipping_enabled(true), clip_threshold(5), clips(), updates(), model(m) {} 13 | virtual ~Trainer(); 14 | 15 | virtual void update(real scale = 1.0) = 0; 16 | void update_epoch(real r = 1) { 17 | epoch += r; 18 | eta = eta0 / (1 + epoch * eta_decay); 19 | } 20 | 21 | // if clipping is enabled and the gradient is too big, return the amount to 22 | // scale the gradient by (otherwise 1) 23 | float clip_gradients(); 24 | 25 | // learning rates 26 | real eta0; 27 | real eta; 28 | real eta_decay; 29 | real epoch; 30 | 31 | real lambda; // weight regularization (l2) 32 | 33 | // clipping 34 | real clipping_enabled; 35 | real clip_threshold; 36 | real clips; 37 | real updates; 38 | 39 | void status() { 40 | std::cerr << "[epoch=" << epoch << " eta=" << eta << " clips=" << clips << " updates=" << updates << "] "; 41 | updates = clips = 0; 42 | } 43 | 44 | Model* model; // parameters and gradients live here 45 | }; 46 | 47 | struct SimpleSGDTrainer : public Trainer { 48 | explicit SimpleSGDTrainer(Model* m, real lam = 1e-6, real e0 = 0.1) : Trainer(m, lam, e0) {} 49 | void update(real scale) override; 50 | void update(const std::vector &lookup_params, const std::vector ¶ms, real scale = 1); 51 | }; 52 | 53 | struct MomentumSGDTrainer : public Trainer { 54 | explicit MomentumSGDTrainer(Model* m, real lam = 1e-6, real e0 = 0.01, real mom = 0.9) : 55 | Trainer(m, lam, e0), momentum(mom), velocity_allocated(false) {} 56 | void update(real scale) override; 57 | 58 | real momentum; 59 | 60 | bool velocity_allocated; 61 | 62 | // the following represent the current velocity 63 | std::vector vp; 64 | std::vector vlp; 65 | //std::unordered_map vp; 66 | //std::unordered_map> vl; 67 | }; 68 | 69 | struct AdagradTrainer : public Trainer { 70 | explicit AdagradTrainer(Model* m, real lam = 1e-6, real e0 = 0.1, real eps = 1e-20) : 71 | Trainer(m, lam, e0), epsilon(eps), shadow_params_allocated(false) {} 72 | void update(real scale) override; 73 | 74 | real epsilon; 75 | bool shadow_params_allocated; 76 | std::vector vp; 77 | std::vector vlp; 78 | }; 79 | 80 | struct AdadeltaTrainer : public Trainer { 81 | explicit AdadeltaTrainer(Model* m, real lam = 1e-6, real eps = 1e-6, real rho = 0.95) : 82 | Trainer(m, lam, 1.0), epsilon(eps), rho(rho), shadow_params_allocated(false) {} 83 | void update(real scale) override; 84 | 85 | real epsilon; 86 | real rho; 87 | bool shadow_params_allocated; 88 | std::vector hg; // History of gradients 89 | std::vector hlg; 90 | std::vector hd; // History of deltas 91 | std::vector hld; 92 | }; 93 | 94 | struct RmsPropTrainer : public Trainer { 95 | explicit RmsPropTrainer(Model* m, real lam = 1e-6, real e0 = 0.1, real eps = 1e-20, real rho = 0.95) : 96 | Trainer(m, lam, e0), epsilon(eps), rho(rho), shadow_params_allocated(false) {} 97 | void update(real scale) override; 98 | 99 | real epsilon; 100 | real rho; 101 | bool shadow_params_allocated; 102 | std::vector hg; // History of gradients 103 | std::vector > hlg; 104 | }; 105 | 106 | struct AdamTrainer : public Trainer { 107 | explicit AdamTrainer(Model* m, float lambda = 1e-6, float alpha = 0.001, float beta_1 = 0.9, float beta_2 = 0.999, float eps = 1e-8) : 108 | Trainer(m, lambda, alpha), beta_1(beta_1), beta_2(beta_2), eps(eps), shadow_params_allocated(false) {} 109 | 110 | void update(real scale) override; 111 | 112 | float beta_1; 113 | float beta_2; 114 | float eps; 115 | bool shadow_params_allocated; 116 | std::vector m; // History of gradients 117 | std::vector lm; 118 | std::vector v; // History of deltas 119 | std::vector lv; 120 | }; 121 | 122 | } // namespace cnn 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InOrderParser 2 | 3 | This implementation is based on the [cnn library](https://github.com/clab/cnn-v1) for this software to function. The reference paper is "In-Order Transition-based Constituent Parsing System", which is accepted by Tansactions of ACL. The system achieves the state-of-the-art results on the standard benchmark PTB and CTB 5.1 by obtaining 91.8 F1 and 86.1 F1, respectively. With sampling and reranking strategy, it achieves 94.2 F1 and 88.0 F1, respectively. By converting to dependencies, it achieves 96.2 UAS (95.2 LAS) and 89.4 UAS (88.4 LAS), respectively. On single i7 CPU, the speed is 60 sentence per second. 4 | 5 | ## Building 6 | The boost version is 1.5.4. 7 | 8 | mkdir build 9 | cd build 10 | cmake .. -DEIGEN3_INCLUDE_DIR=/path/to/eigen 11 | make 12 | 13 | There are two implementations, Kparser and Kparser-standard. Kparser is used for standard experiments, while Kparser-standard is easy-use. 14 | 15 | ## Experiments 16 | 17 | #### Data 18 | 19 | You could use the scripts to convert the format of training, development and test data, respectively. 20 | 21 | python ./scripts/get_oracle.py [en|ch] [training data in bracketed format] [training data in bracketed format] > [training oracle] 22 | python ./scripts/get_oracle.py [en|ch] [training data in bracketed format] [development data in bracketed format] > [development oracle] 23 | python ./scripts/get_oracle.py [en|ch] [training data in bracketed format] [test data in bracketed format] > [test oracle] 24 | 25 | If you require the related data, contact us. 26 | 27 | #### Training 28 | 29 | Ensure the related file are linked into the current directory. 30 | 31 | mkdir model/ 32 | ./build/impl/Kparser --cnn-mem 1700 --training_data [training oracle] --dev_data [development oracle] --bracketing_dev_data [development data in bracketed format] -P -t --pretrained_dim 100 -w [pretrained word embeddings] --lstm_input_dim 128 --hidden_dim 128 -D 0.2 33 | 34 | #### Test 35 | 36 | ./build/impl/Kparser --cnn-mem 1700 --training_data [training oracle] --test_data [test oracle] --bracketing_dev_data [test data in bracketed format] -P --pretrained_dim 100 -w [pretrained word embeddings] --lstm_input_dim 128 --hidden_dim 128 -m [model file] 37 | 38 | The automatically generated file test.eval is the result file. 39 | 40 | We provide the trained models: [English model](https://drive.google.com/file/d/0B1VhP65vISjoWmNjN0pfTmh5Vnc/view?usp=sharing) and pretrained word embeddings [sskip.100.vectors](https://drive.google.com/open?id=0B1VhP65vISjoZ3ppTnR3YXRMd1E) for English; [Chinese model](https://drive.google.com/open?id=0B1VhP65vISjoVjZKT2U1amFXVGc) and pretrained word embeddings [zzgiga.sskip.80.vectors](https://drive.google.com/open?id=0B1VhP65vISjoeGJsX2syOGhLWnc) for Chinese 41 | 42 | #### Sampling 43 | 44 | ./build/impl/Kparser --cnn-mem 1700 --training_data [training oracle] --test_data [test oracle] --bracketing_dev_data [test data in bracketed format] -P --pretrained_dim 100 -w [pretrained word embeddings] --lstm_input_dim 128 --hidden_dim 128 -m [model file] --alpha 0.8 -s 100 > samples.act 45 | ./mid2tree.py samples.act > samples.trees 46 | 47 | The samples.props could be fed into following reranking components. 48 | 49 | ## Easy Usage 50 | 51 | Download the [English model](https://drive.google.com/open?id=0B1VhP65vISjoSXRHelVnSVNYSjA) and the [Chinese model](https://drive.google.com/open?id=0B1VhP65vISjodDM2NW9vRFdOQmM). 52 | 53 |    ./build/impl/Kparser-standard --cnn-mem 1700 --model_dir [model directory] -w [pretrained word embeddings] --train_dict [model directory]/train_dict --lang [en/ch] < [stdin] > [stdout] 54 | 55 | The standard input should follow the fomart, Word1 POS1 Word2 POS2 ... Wordn POSn. The example is 56 | 57 | No RB , , it PRP was VBD n't RB Black NNP Monday NNP . . 58 | 59 | The standard output is tree in bracketed format. 60 | 61 | (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)) 62 | 63 | If you want to sample trees, you should added --samples [number of samples] --a [alpha], for example, --samples 100 --a 0.8 64 | 65 | ## Citation 66 | 67 |    @article{TACL1199, 68 |        author = {Liu, Jiangming and Zhang, Yue }, 69 |        title = {In-Order Transition-based Constituent Parsing}, 70 |        journal = {Transactions of the Association for Computational Linguistics}, 71 |        volume = {5}, 72 |        year = {2017}, 73 |        issn = {2307-387X}, 74 |        pages = {413--424}   75 |        } 76 | 77 | ## Contact 78 | 79 | Jiangming Liu, jmliunlp@gmail.com 80 | 81 | Yue Zhang, yue_zhang@sutd.edu.sg 82 | -------------------------------------------------------------------------------- /impl/oracle.cc: -------------------------------------------------------------------------------- 1 | #include "impl/oracle.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "cnn/dict.h" 7 | #include "impl/compressed-fstream.h" 8 | 9 | using namespace std; 10 | 11 | namespace parser { 12 | 13 | 14 | Oracle::~Oracle() {} 15 | 16 | inline bool is_ws(char x) { //check whether the character is a space or tab delimiter 17 | return (x == ' ' || x == '\t'); 18 | } 19 | 20 | inline bool is_not_ws(char x) { 21 | return (x != ' ' && x != '\t'); 22 | } 23 | 24 | void Oracle::ReadSentenceView(const std::string& line, cnn::Dict* dict, vector* sent) { 25 | unsigned cur = 0; 26 | while(cur < line.size()) { 27 | while(cur < line.size() && is_ws(line[cur])) { ++cur; } 28 | unsigned start = cur; 29 | while(cur < line.size() && is_not_ws(line[cur])) { ++cur; } 30 | unsigned end = cur; 31 | if (end > start) { 32 | unsigned x = dict->Convert(line.substr(start, end - start)); 33 | sent->push_back(x); 34 | } 35 | } 36 | assert(sent->size() > 0); // empty sentences not allowed 37 | } 38 | 39 | void KOracle::load_bdata(const string& file) { 40 | devdata=file; 41 | } 42 | 43 | void KOracle::load_oracle(const string& file, bool is_training) { 44 | cerr << "Loading top-down oracle from " << file << " [" << (is_training ? "training" : "non-training") << "] ...\n"; 45 | cnn::compressed_ifstream in(file.c_str()); 46 | assert(in); 47 | const string kREDUCE = "REDUCE"; 48 | const string kSHIFT = "SHIFT"; 49 | const string kTERM = "TERM"; 50 | const int kREDUCE_INT = ad->Convert("REDUCE"); 51 | const int kSHIFT_INT = ad->Convert("SHIFT"); 52 | const int kTERM_INT = ad->Convert("TERM"); 53 | int lc = 0; 54 | string line; 55 | vector cur_acts; 56 | while(getline(in, line)) { 57 | ++lc; 58 | //cerr << "line number = " << lc << endl; 59 | cur_acts.clear(); 60 | if (line.size() == 0 || (line[0] == '#' && line[2] == '(')) continue; 61 | sents.resize(sents.size() + 1); 62 | auto& cur_sent = sents.back(); 63 | if (is_training) { // at training time, we load both "UNKified" versions of the data, and raw versions 64 | ReadSentenceView(line, pd, &cur_sent.pos); 65 | getline(in, line); 66 | ReadSentenceView(line, d, &cur_sent.raw); 67 | getline(in, line); 68 | ReadSentenceView(line, d, &cur_sent.lc); 69 | getline(in, line); 70 | ReadSentenceView(line, d, &cur_sent.unk); 71 | } else { // at test time, we ignore the raw strings and just use the "UNKified" versions 72 | ReadSentenceView(line, pd, &cur_sent.pos); 73 | getline(in, line); 74 | istrstream istr(line.c_str()); 75 | string word; 76 | while(istr>>word) cur_sent.surfaces.push_back(word); 77 | getline(in, line); 78 | ReadSentenceView(line, d, &cur_sent.lc); 79 | getline(in, line); 80 | ReadSentenceView(line, d, &cur_sent.unk); 81 | cur_sent.raw = cur_sent.unk; 82 | } 83 | lc += 3; 84 | if (!cur_sent.SizesMatch()) { 85 | cerr << "Mismatched lengths of input strings in oracle before line " << lc << endl; 86 | abort(); 87 | } 88 | int termc = 0; 89 | while(getline(in, line)) { 90 | ++lc; 91 | //cerr << "line number = " << lc << endl; 92 | if (line.size() == 0) break; 93 | assert(line.find(' ') == string::npos); 94 | if (line == kREDUCE) { 95 | cur_acts.push_back(kREDUCE_INT); 96 | } else if (line.find("PJ(") == 0) { 97 | // Convert NT 98 | nd->Convert(line.substr(3, line.size() - 4)); 99 | // PJ(X) is put into the actions list as PJ(X) 100 | cur_acts.push_back(ad->Convert(line)); 101 | } else if (line == kSHIFT) { 102 | cur_acts.push_back(kSHIFT_INT); 103 | termc++; 104 | } else if (line == kTERM){ 105 | cur_acts.push_back(kTERM_INT); 106 | } else { 107 | cerr << "Malformed input in line " << lc << endl; 108 | abort(); 109 | } 110 | } 111 | actions.push_back(cur_acts); 112 | if (termc != sents.back().size()) { 113 | cerr << "Mismatched number of tokens and SHIFTs in oracle before line " << lc << endl; 114 | abort(); 115 | } 116 | } 117 | cerr << "Loaded " << sents.size() << " sentences\n"; 118 | cerr << " cumulative action vocab size: " << ad->size() << endl; 119 | cerr << " cumulative terminal vocab size: " << d->size() << endl; 120 | cerr << " cumulative nonterminal vocab size: " << nd->size() << endl; 121 | cerr << " cumulative pos vocab size: " << pd->size() << endl; 122 | } 123 | 124 | } // namespace parser 125 | -------------------------------------------------------------------------------- /cnn/gru.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/gru.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | #include "cnn/training.h" 10 | 11 | using namespace std; 12 | 13 | namespace cnn { 14 | 15 | enum { X2Z, H2Z, BZ, X2R, H2R, BR, X2H, H2H, BH }; 16 | 17 | GRUBuilder::GRUBuilder(unsigned layers, 18 | unsigned input_dim, 19 | unsigned hidden_dim, 20 | Model* model) : hidden_dim(hidden_dim), layers(layers) { 21 | unsigned layer_input_dim = input_dim; 22 | for (unsigned i = 0; i < layers; ++i) { 23 | // z 24 | Parameters* p_x2z = model->add_parameters({hidden_dim, layer_input_dim}); 25 | Parameters* p_h2z = model->add_parameters({hidden_dim, hidden_dim}); 26 | Parameters* p_bz = model->add_parameters({hidden_dim}); 27 | 28 | // r 29 | Parameters* p_x2r = model->add_parameters({hidden_dim, layer_input_dim}); 30 | Parameters* p_h2r = model->add_parameters({hidden_dim, hidden_dim}); 31 | Parameters* p_br = model->add_parameters({hidden_dim}); 32 | 33 | // h 34 | Parameters* p_x2h = model->add_parameters({hidden_dim, layer_input_dim}); 35 | Parameters* p_h2h = model->add_parameters({hidden_dim, hidden_dim}); 36 | Parameters* p_bh = model->add_parameters({hidden_dim}); 37 | layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next 38 | 39 | vector ps = {p_x2z, p_h2z, p_bz, p_x2r, p_h2r, p_br, p_x2h, p_h2h, p_bh}; 40 | params.push_back(ps); 41 | } // layers 42 | } 43 | 44 | void GRUBuilder::new_graph_impl(ComputationGraph& cg) { 45 | param_vars.clear(); 46 | for (unsigned i = 0; i < layers; ++i) { 47 | auto& p = params[i]; 48 | 49 | // z 50 | Expression x2z = parameter(cg,p[X2Z]); 51 | Expression h2z = parameter(cg,p[H2Z]); 52 | Expression bz = parameter(cg,p[BZ]); 53 | 54 | // r 55 | Expression x2r = parameter(cg,p[X2R]); 56 | Expression h2r = parameter(cg,p[H2R]); 57 | Expression br = parameter(cg,p[BR]); 58 | 59 | // h 60 | Expression x2h = parameter(cg,p[X2H]); 61 | Expression h2h = parameter(cg,p[H2H]); 62 | Expression bh = parameter(cg,p[BH]); 63 | 64 | vector vars = {x2z, h2z, bz, x2r, h2r, br, x2h, h2h, bh}; 65 | param_vars.push_back(vars); 66 | } 67 | } 68 | 69 | void GRUBuilder::start_new_sequence_impl(const std::vector& h_0) { 70 | h.clear(); 71 | h0 = h_0; 72 | if (!h0.empty()) { 73 | assert (h0.size() == layers); 74 | } 75 | } 76 | 77 | Expression GRUBuilder::add_input_impl(int prev, const Expression& x) { 78 | const bool has_initial_state = (h0.size() > 0); 79 | h.push_back(vector(layers)); 80 | vector& ht = h.back(); 81 | Expression in = x; 82 | for (unsigned i = 0; i < layers; ++i) { 83 | const vector& vars = param_vars[i]; 84 | Expression h_tprev; 85 | // prev_zero means that h_tprev should be treated as 0 86 | bool prev_zero = false; 87 | if (prev >= 0 || has_initial_state) { 88 | h_tprev = (prev < 0) ? h0[i] : h[prev][i]; 89 | } else { prev_zero = true; } 90 | // update gate 91 | Expression zt; 92 | if (prev_zero) 93 | zt = affine_transform({vars[BZ], vars[X2Z], in}); 94 | else 95 | zt = affine_transform({vars[BZ], vars[X2Z], in, vars[H2Z], h_tprev}); 96 | zt = logistic(zt); 97 | // forget 98 | Expression ft = 1.f - zt; 99 | // reset gate 100 | Expression rt; 101 | if (prev_zero) 102 | rt = affine_transform({vars[BR], vars[X2R], in}); 103 | else 104 | rt = affine_transform({vars[BR], vars[X2R], in, vars[H2R], h_tprev}); 105 | rt = logistic(rt); 106 | 107 | // candidate activation 108 | Expression ct; 109 | if (prev_zero) { 110 | ct = affine_transform({vars[BH], vars[X2H], in}); 111 | ct = tanh(ct); 112 | Expression nwt = cwise_multiply(zt, ct); 113 | in = ht[i] = nwt; 114 | } else { 115 | Expression ght = cwise_multiply(rt, h_tprev); 116 | ct = affine_transform({vars[BH], vars[X2H], in, vars[H2H], ght}); 117 | ct = tanh(ct); 118 | Expression nwt = cwise_multiply(zt, ct); 119 | Expression crt = cwise_multiply(ft, h_tprev); 120 | in = ht[i] = crt + nwt; 121 | } 122 | } 123 | return ht.back(); 124 | } 125 | 126 | void GRUBuilder::copy(const RNNBuilder & rnn) { 127 | const GRUBuilder & rnn_gru = (const GRUBuilder&)rnn; 128 | assert(params.size() == rnn_gru.params.size()); 129 | for(size_t i = 0; i < params.size(); ++i) 130 | for(size_t j = 0; j < params[i].size(); ++j) 131 | params[i][j]->copy(*rnn_gru.params[i][j]); 132 | } 133 | 134 | } // namespace cnn 135 | -------------------------------------------------------------------------------- /cnn/tensor.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/tensor.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #if HAVE_CUDA 8 | #include "cnn/cuda.h" 9 | #endif 10 | 11 | using namespace std; 12 | 13 | namespace cnn { 14 | 15 | ostream& operator<<(ostream& os, const Tensor& t) { 16 | #if HAVE_CUDA 17 | vector vt = as_vector(t); 18 | Eigen::Map m(&vt[0], t.d.rows(), t.d.cols()); 19 | os << m; 20 | #else 21 | os << (*t); 22 | #endif 23 | return os; 24 | } 25 | 26 | real as_scalar(const Tensor& t) { 27 | assert(t.d.size() == 1); 28 | #if HAVE_CUDA 29 | float res; 30 | CUDA_CHECK(cudaMemcpy(&res, t.v, sizeof(float), cudaMemcpyDeviceToHost)); 31 | return res; 32 | #else 33 | return t.v[0]; 34 | #endif 35 | } 36 | 37 | vector as_vector(const Tensor& v) { 38 | vector res(v.d.size()); 39 | #if HAVE_CUDA 40 | CUDA_CHECK(cudaMemcpy(&res[0], v.v, sizeof(real) * res.size(), cudaMemcpyDeviceToHost)); 41 | #else 42 | memcpy(&res[0], v.v, sizeof(real) * res.size()); 43 | #endif 44 | return res; 45 | } 46 | 47 | float TensorTools::AccessElement(const Tensor& v, int index) { 48 | #if HAVE_CUDA 49 | float ret; 50 | cudaMemcpyAsync(&ret, &v.v[index], sizeof(real), cudaMemcpyDeviceToHost); 51 | return ret; 52 | #else 53 | return v.v[index]; 54 | #endif 55 | } 56 | 57 | float TensorTools::AccessElement(const Tensor& v, const Dim& index) { 58 | #if HAVE_CUDA 59 | abort(); 60 | #else 61 | return (*v)(index[0], index[1]); 62 | #endif 63 | } 64 | 65 | void TensorTools::SetElement(const Tensor& v, int index, float value) { 66 | #if HAVE_CUDA 67 | cudaMemcpyAsync(&v.v[index], &value, sizeof(real), cudaMemcpyHostToDevice); 68 | #else 69 | v.v[index] = value; 70 | #endif 71 | } 72 | 73 | void TensorTools::SetElements(const Tensor& v, const vector& vec) { 74 | #if HAVE_CUDA 75 | cudaMemcpyAsync(v.v, &vec[0], sizeof(real) * vec.size(), cudaMemcpyHostToDevice); 76 | #else 77 | memcpy(v.v, &vec[0], sizeof(real) * vec.size()); 78 | #endif 79 | } 80 | 81 | void TensorTools::CopyElements(const Tensor& v, const Tensor& v_src) { 82 | #if HAVE_CUDA 83 | cudaMemcpyAsync(v.v, v_src.v, sizeof(real) * v.d.size(), cudaMemcpyDeviceToDevice); 84 | #else 85 | memcpy(v.v, v_src.v, sizeof(real) * v.d.size()); 86 | #endif 87 | } 88 | 89 | void TensorTools::Constant(Tensor& d, float c) { 90 | #if HAVE_CUDA 91 | if (!c) { 92 | CUDA_CHECK(cudaMemsetAsync(d.v, 0, d.d.size() * sizeof(float))); 93 | } else { 94 | fill(d.v, d.v + d.d.size(), c); 95 | } 96 | #else 97 | if (!c) { 98 | memset(d.v, c, d.d.size() * sizeof(float)); 99 | } else { 100 | fill(d.v, d.v + d.d.size(), c); 101 | } 102 | #endif 103 | } 104 | 105 | void TensorTools::Zero(Tensor& d) { 106 | Constant(d, 0); 107 | } 108 | 109 | void TensorTools::Randomize(Tensor& val, real scale) { 110 | uniform_real_distribution distribution(-scale,scale); 111 | auto b = [&] {return distribution(*rndeng);}; 112 | #if HAVE_CUDA 113 | float* t = new float[val.d.size()]; 114 | generate(t, t + val.d.size(), b); 115 | CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); 116 | delete[] t; 117 | #else 118 | generate(val.v, val.v + val.d.size(), b); 119 | #endif 120 | } 121 | 122 | void TensorTools::Randomize(Tensor& d) { 123 | Randomize(d, sqrt(6) / sqrt(d.d.sum_dims())); 124 | } 125 | 126 | void TensorTools::RandomBernoulli(Tensor& val, real p, real scale) { 127 | bernoulli_distribution distribution(p); 128 | auto b = [&] {return distribution(*rndeng) * scale;}; 129 | #if HAVE_CUDA 130 | float* t = new float[val.d.size()]; 131 | generate(t, t + val.d.size(), b); 132 | CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); 133 | delete[] t; 134 | #else 135 | generate(val.v, val.v + val.d.size(), b); 136 | #endif 137 | } 138 | 139 | void TensorTools::RandomizeNormal(real mean, real stddev, Tensor& val) { 140 | normal_distribution distribution(mean, stddev); 141 | auto b = [&] {return distribution(*rndeng);}; 142 | #if HAVE_CUDA 143 | float* t = new float[val.d.size()]; 144 | generate(t, t + val.d.size(), b); 145 | CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); 146 | delete[] t; 147 | #else 148 | generate(val.v, val.v + val.d.size(), b); 149 | #endif 150 | } 151 | 152 | real rand01() { 153 | uniform_real_distribution distribution(0, 1); 154 | return distribution(*rndeng); 155 | } 156 | 157 | int rand0n(int n) { 158 | assert(n > 0); 159 | int x = rand01() * n; 160 | while(n == x) { x = rand01() * n; } 161 | return x; 162 | } 163 | 164 | real rand_normal() { 165 | normal_distribution distribution(0, 1); 166 | return distribution(*rndeng); 167 | } 168 | 169 | } // namespace cnn 170 | -------------------------------------------------------------------------------- /cnn/param-nodes.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_PARAM_NODES_H_ 2 | #define CNN_PARAM_NODES_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/model.h" 6 | 7 | namespace cnn { 8 | 9 | struct ParameterNodeBase : public Node { 10 | virtual void accumulate_grad(const Tensor& g) = 0; 11 | }; 12 | 13 | // represents optimizable parameters 14 | struct ParameterNode : public ParameterNodeBase { 15 | explicit ParameterNode(Parameters* p) : dim(p->dim), params(p) {} 16 | std::string as_string(const std::vector& arg_names) const override; 17 | Dim dim_forward(const std::vector& xs) const override; 18 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 19 | void backward_impl(const std::vector& xs, 20 | const Tensor& fx, 21 | const Tensor& dEdf, 22 | unsigned i, 23 | Tensor& dEdxi) const override; 24 | void accumulate_grad(const Tensor& g) override; 25 | Dim dim; 26 | Parameters* params; 27 | }; 28 | 29 | // represents optimizable parameters that are being held constant 30 | struct ConstParameterNode : public Node { 31 | explicit ConstParameterNode(Parameters* p) : dim(p->dim), params(p) {} 32 | std::string as_string(const std::vector& arg_names) const override; 33 | Dim dim_forward(const std::vector& xs) const override; 34 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 35 | void backward_impl(const std::vector& xs, 36 | const Tensor& fx, 37 | const Tensor& dEdf, 38 | unsigned i, 39 | Tensor& dEdxi) const override; 40 | Dim dim; 41 | Parameters* params; 42 | }; 43 | 44 | // represents specified (not learned) inputs to the network 45 | struct InputNode : public Node { 46 | explicit InputNode(const Dim& d, const std::vector& dat) : dim(d), data(dat), pdata(&data) {} 47 | explicit InputNode(const Dim& d, const std::vector* pdat) : dim(d), data(), pdata(pdat) {} 48 | std::string as_string(const std::vector& arg_names) const override; 49 | Dim dim_forward(const std::vector& xs) const override; 50 | virtual bool supports_multibatch() const override { return true; } 51 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 52 | void backward_impl(const std::vector& xs, 53 | const Tensor& fx, 54 | const Tensor& dEdf, 55 | unsigned i, 56 | Tensor& dEdxi) const override; 57 | Dim dim; 58 | const std::vector data; 59 | const std::vector* pdata; 60 | }; 61 | 62 | // represents specified (not learned) scalar inputs to the network 63 | struct ScalarInputNode : public Node { 64 | explicit ScalarInputNode(real s) : data(s), pdata(&data) {} 65 | explicit ScalarInputNode(const real* ps) : data(), pdata(ps) {} 66 | std::string as_string(const std::vector& arg_names) const override; 67 | Dim dim_forward(const std::vector& xs) const override; 68 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 69 | void backward_impl(const std::vector& xs, 70 | const Tensor& fx, 71 | const Tensor& dEdf, 72 | unsigned i, 73 | Tensor& dEdxi) const override; 74 | const cnn::real data; 75 | const cnn::real* pdata; 76 | }; 77 | 78 | // represents a matrix/vector embedding of an item of a discrete set (1-hot coding) 79 | struct LookupNode : public ParameterNodeBase { 80 | LookupNode(LookupParameters* p, unsigned ind) : dim(p->dim), index(ind), pindex(&index), indices(), pindices(), params(p) {} 81 | LookupNode(LookupParameters* p, const unsigned* pind) : dim(p->dim), index(), pindex(pind), indices(), pindices(), params(p) {} 82 | LookupNode(LookupParameters* p, const std::vector& indices) : dim(p->dim), index(), pindex(), indices(indices), pindices(&this->indices), params(p) { 83 | dim.bd = pindices->size(); 84 | } 85 | LookupNode(LookupParameters* p, const std::vector* pindices) : dim(p->dim), index(), pindex(), indices(), pindices(pindices), params(p) { 86 | dim.bd = pindices->size(); 87 | } 88 | std::string as_string(const std::vector& arg_names) const override; 89 | Dim dim_forward(const std::vector& xs) const override; 90 | virtual bool supports_multibatch() const override { return true; } 91 | void forward_impl(const std::vector& xs, Tensor& fx) const override; 92 | void backward_impl(const std::vector& xs, 93 | const Tensor& fx, 94 | const Tensor& dEdf, 95 | unsigned i, 96 | Tensor& dEdxi) const override; 97 | void accumulate_grad(const Tensor& g) override; 98 | Dim dim; 99 | unsigned index; 100 | const unsigned* pindex; 101 | std::vector indices; 102 | const std::vector* pindices; 103 | LookupParameters* params; 104 | }; 105 | 106 | } // namespace cnn 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /cnn/rnn.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_RNN_H_ 2 | #define CNN_RNN_H_ 3 | 4 | #include "cnn/cnn.h" 5 | #include "cnn/rnn-state-machine.h" 6 | #include "cnn/expr.h" 7 | 8 | using namespace cnn::expr; 9 | 10 | namespace cnn { 11 | 12 | class Model; 13 | 14 | BOOST_STRONG_TYPEDEF(int, RNNPointer) 15 | inline void swap(RNNPointer& i1, RNNPointer& i2) { 16 | RNNPointer t = i1; i1 = i2; i2 = t; 17 | } 18 | 19 | // interface for constructing an RNN, LSTM, GRU, etc. 20 | struct RNNBuilder { 21 | RNNBuilder() : cur(-1) {} 22 | virtual ~RNNBuilder(); 23 | 24 | RNNPointer state() const { return cur; } 25 | 26 | // call this to reset the builder when you are working with a newly 27 | // created ComputationGraph object 28 | void new_graph(ComputationGraph& cg) { 29 | sm.transition(RNNOp::new_graph); 30 | new_graph_impl(cg); 31 | } 32 | 33 | // Reset for new sequence 34 | // call this before add_input and after new_graph, 35 | // when starting a new sequence on the same hypergraph. 36 | // h_0 is used to initialize hidden layers at timestep 0 to given values 37 | void start_new_sequence(const std::vector& h_0={}) { 38 | sm.transition(RNNOp::start_new_sequence); 39 | cur = RNNPointer(-1); 40 | head.clear(); 41 | start_new_sequence_impl(h_0); 42 | } 43 | 44 | // add another timestep by reading in the variable x 45 | // return the hidden representation of the deepest layer 46 | Expression add_input(const Expression& x) { 47 | sm.transition(RNNOp::add_input); 48 | head.push_back(cur); 49 | int rcp = cur; 50 | cur = head.size() - 1; 51 | return add_input_impl(rcp, x); 52 | } 53 | 54 | // add another timestep, but define recurrent connection to prev 55 | // rather than to head[cur] 56 | // this can be used to construct trees, implement beam search, etc. 57 | Expression add_input(const RNNPointer& prev, const Expression& x) { 58 | sm.transition(RNNOp::add_input); 59 | head.push_back(prev); 60 | cur = head.size() - 1; 61 | return add_input_impl(prev, x); 62 | } 63 | 64 | // rewind the last timestep - this DOES NOT remove the variables 65 | // from the computation graph, it just means the next time step will 66 | // see a different previous state. You can remind as many times as 67 | // you want. 68 | void rewind_one_step() { 69 | cur = head[cur]; 70 | } 71 | 72 | // returns node (index) of most recent output 73 | virtual Expression back() const = 0; 74 | // access the final output of each hidden layer 75 | virtual std::vector final_h() const = 0; 76 | virtual std::vector get_h(RNNPointer i) const = 0; 77 | // access the state of each hidden layer, in a format that can be used in 78 | // start_new_sequence 79 | virtual std::vector final_s() const = 0; 80 | virtual unsigned num_h0_components() const = 0; 81 | virtual std::vector get_s(RNNPointer i) const = 0; 82 | // copy the parameters of another builder 83 | virtual void copy(const RNNBuilder & params) = 0; 84 | protected: 85 | virtual void new_graph_impl(ComputationGraph& cg) = 0; 86 | virtual void start_new_sequence_impl(const std::vector& h_0) = 0; 87 | virtual Expression add_input_impl(int prev, const Expression& x) = 0; 88 | RNNPointer cur; 89 | private: 90 | // the state machine ensures that the caller is behaving 91 | RNNStateMachine sm; 92 | std::vector head; // head[i] returns the head position 93 | }; 94 | 95 | struct SimpleRNNBuilder : public RNNBuilder { 96 | SimpleRNNBuilder() = default; 97 | explicit SimpleRNNBuilder(unsigned layers, 98 | unsigned input_dim, 99 | unsigned hidden_dim, 100 | Model* model, 101 | bool support_lags=false); 102 | 103 | protected: 104 | void new_graph_impl(ComputationGraph& cg) override; 105 | void start_new_sequence_impl(const std::vector& h_0) override; 106 | Expression add_input_impl(int prev, const Expression& x) override; 107 | 108 | public: 109 | Expression add_auxiliary_input(const Expression& x, const Expression &aux); 110 | 111 | Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } 112 | std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } 113 | std::vector final_s() const override { return final_h(); } 114 | 115 | std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } 116 | std::vector get_s(RNNPointer i) const override { return get_h(i); } 117 | void copy(const RNNBuilder & params) override; 118 | 119 | unsigned num_h0_components() const override { return layers; } 120 | 121 | private: 122 | // first index is layer, then x2h h2h hb 123 | std::vector> params; 124 | 125 | // first index is layer, then x2h h2h hb 126 | std::vector> param_vars; 127 | 128 | // first index is time, second is layer 129 | std::vector> h; 130 | 131 | // initial value of h 132 | // defaults to zero matrix input 133 | std::vector h0; 134 | 135 | unsigned layers; 136 | bool lagging; 137 | }; 138 | 139 | } // namespace cnn 140 | 141 | #endif 142 | -------------------------------------------------------------------------------- /cnn/exec.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/exec.h" 2 | 3 | #include "cnn/param-nodes.h" 4 | 5 | using namespace std; 6 | 7 | namespace cnn { 8 | 9 | ExecutionEngine::~ExecutionEngine() {} 10 | 11 | void SimpleExecutionEngine::invalidate() { 12 | num_nodes_evaluated = 0; 13 | } 14 | 15 | const Tensor& SimpleExecutionEngine::forward() { 16 | const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1); 17 | return forward(node_max_index); 18 | } 19 | 20 | const Tensor& SimpleExecutionEngine::forward(VariableIndex i) { 21 | invalidate(); 22 | return incremental_forward(i); 23 | } 24 | 25 | const Tensor& SimpleExecutionEngine::get_value(VariableIndex i) { 26 | assert(i < cg.nodes.size()); 27 | if (i >= num_nodes_evaluated) { 28 | incremental_forward(); 29 | } 30 | return nfxs[i]; 31 | } 32 | 33 | const Tensor& SimpleExecutionEngine::incremental_forward() { 34 | const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1); 35 | return incremental_forward(node_max_index); 36 | } 37 | 38 | const Tensor& SimpleExecutionEngine::incremental_forward(VariableIndex i) { 39 | assert(i < cg.nodes.size()); 40 | 41 | // free any old memory if this is a new CG 42 | if (num_nodes_evaluated == 0) fxs->free(); 43 | 44 | if (i >= num_nodes_evaluated) { 45 | nfxs.resize(i + 1); 46 | 47 | //vector dummy(5, "x"); 48 | vector xs(16); 49 | for (; num_nodes_evaluated <= i; ++num_nodes_evaluated) { 50 | const Node* node = cg.nodes[num_nodes_evaluated]; 51 | xs.resize(node->arity()); 52 | unsigned ai = 0; 53 | for (VariableIndex arg : node->args) { 54 | xs[ai] = &nfxs[arg]; 55 | ++ai; 56 | } 57 | nfxs[num_nodes_evaluated].d = node->dim; 58 | nfxs[num_nodes_evaluated].v = static_cast(fxs->allocate(node->dim.size() * sizeof(float))); 59 | if (nfxs[num_nodes_evaluated].v == nullptr) { 60 | cerr << "out of memory\n"; 61 | abort(); 62 | } 63 | void* aux_mem = nullptr; 64 | size_t aux_size = node->aux_storage_size(); 65 | if (aux_size) { 66 | aux_mem = fxs->allocate(aux_size); 67 | if (!aux_mem) { 68 | cerr << "aux out of memory\n"; 69 | abort(); 70 | } 71 | } 72 | node->aux_mem = aux_mem; 73 | node->forward(xs, nfxs[num_nodes_evaluated]); 74 | } 75 | } 76 | return nfxs[i]; 77 | } 78 | 79 | void SimpleExecutionEngine::backward() { 80 | assert(nfxs.size() == cg.nodes.size()); 81 | backward((VariableIndex)(cg.nodes.size()-1)); 82 | } 83 | 84 | // TODO what is happening with parameter nodes if from_where > param_node_id ? 85 | void SimpleExecutionEngine::backward(VariableIndex from_where) { 86 | assert(from_where+1 <= nfxs.size()); 87 | assert(from_where+1 <= cg.nodes.size()); 88 | if (nfxs[from_where].d.size() != 1) { 89 | cerr << "backward() called on non-scalar node.\n"; 90 | abort(); 91 | } 92 | 93 | const unsigned num_nodes = from_where+1; 94 | ndEdfs.resize(num_nodes); 95 | dEdfs->free(); 96 | for (unsigned i = 0; i < num_nodes; ++i) { 97 | const auto dim = nfxs[i].d; 98 | ndEdfs[i].d = dim; 99 | ndEdfs[i].v = static_cast(dEdfs->allocate(dim.size() * sizeof(float))); 100 | if (!ndEdfs[i].v) { 101 | cerr << "out of memory while attempting to allocate space for derivatives\n"; 102 | abort(); 103 | } 104 | } 105 | dEdfs->zero_allocated_memory(); 106 | // initialize dE/dE = 1 107 | ndEdfs.back().v = kSCALAR_ONE; 108 | 109 | // here we find constant paths to avoid doing extra work 110 | // by default, a node is constant unless 111 | // 1) it is a parameter node 112 | // 2) it depends on a non-constant node 113 | // (thus, functions of constants and inputs end up being 114 | // false in this computation) 115 | vector needs_derivative(num_nodes, false); 116 | for (auto i : cg.parameter_nodes) 117 | needs_derivative[i] = true; 118 | 119 | for (unsigned ni = 0; ni < num_nodes; ++ni) { 120 | bool nd = needs_derivative[ni]; 121 | for (auto arg : cg.nodes[ni]->args) 122 | nd |= needs_derivative[arg]; 123 | needs_derivative[ni] = nd; 124 | } 125 | 126 | // loop in reverse topological order 127 | // consider only nodes that participate in the computation. 128 | vector in_computation(num_nodes, false); 129 | in_computation[num_nodes - 1] = true; 130 | vector xs; 131 | for (int i = num_nodes - 1; i >= 0; --i) { 132 | if (!in_computation[i]) continue; 133 | const Node* node = cg.nodes[i]; 134 | xs.resize(node->arity()); 135 | unsigned ai = 0; 136 | for (VariableIndex arg : node->args) { 137 | in_computation[arg] = true; 138 | xs[ai] = &nfxs[arg]; 139 | ++ai; 140 | } 141 | ai = 0; 142 | for (VariableIndex arg : node->args) { 143 | if (needs_derivative[arg]) { 144 | node->backward(xs, nfxs[i], ndEdfs[i], ai, ndEdfs[arg]); 145 | } 146 | ++ai; 147 | } 148 | } 149 | 150 | // accumulate gradients into parameters 151 | // this is simpler than you might find in some other frameworks 152 | // since we assume parameters come into the graph as a "function" 153 | // that returns the current value of the parameters 154 | for (VariableIndex i : cg.parameter_nodes) 155 | static_cast(cg.nodes[i])->accumulate_grad(ndEdfs[i]); 156 | } 157 | 158 | } // namespace cnn 159 | -------------------------------------------------------------------------------- /cnn/model.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_PARAMS_H_ 2 | #define CNN_PARAMS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "cnn/tensor.h" 13 | 14 | namespace cnn { 15 | 16 | // to deal with sparse updates, there are two parameter classes: 17 | // * Parameters represents a vector, matrix, (eventually higher order tensors) 18 | // of parameters. These are densely updated. 19 | // * LookupParameters represents a table of vectors that are used to embed a 20 | // set of discrete objects. These are sparsely updated. 21 | 22 | struct ParametersBase { 23 | friend class Model; 24 | virtual void scale_parameters(float a) = 0; 25 | virtual void squared_l2norm(float* sqnorm) const = 0; 26 | virtual void g_squared_l2norm(float* sqnorm) const = 0; 27 | virtual size_t size() const = 0; 28 | virtual ~ParametersBase(); 29 | }; 30 | 31 | // represents parameters (e.g., a weight matrix) that will be optimized 32 | struct Parameters : public ParametersBase { 33 | friend class Model; 34 | void scale_parameters(float a) override; 35 | void squared_l2norm(float* sqnorm) const override; 36 | void g_squared_l2norm(float* sqnorm) const override; 37 | size_t size() const override; 38 | 39 | void copy(const Parameters & val); 40 | void accumulate_grad(const Tensor& g); 41 | void clear(); 42 | 43 | Dim dim; 44 | Tensor values; 45 | Tensor g; 46 | private: 47 | Parameters() {} 48 | explicit Parameters(const Dim& d, float minmax); // initialize with ~U(-minmax,+minmax) 49 | // or Glorot initialization if minmax = 0 50 | friend class boost::serialization::access; 51 | template void serialize(Archive& ar, const unsigned int) { 52 | ar & dim; 53 | ar & values; 54 | } 55 | }; 56 | 57 | // represents a matrix/vector embedding of a discrete set 58 | struct LookupParameters : public ParametersBase { 59 | friend class Model; 60 | void scale_parameters(float a) override; 61 | void squared_l2norm(float* sqnorm) const override; 62 | void g_squared_l2norm(float* sqnorm) const override; 63 | size_t size() const override; 64 | void Initialize(unsigned index, const std::vector& val); 65 | 66 | void copy(const LookupParameters & val); 67 | void accumulate_grad(unsigned index, const Tensor& g); 68 | void clear(); 69 | 70 | Dim dim; 71 | std::vector values; 72 | std::vector grads; 73 | // gradients are sparse, so track which components are nonzero 74 | std::unordered_set non_zero_grads; 75 | private: 76 | LookupParameters() {} 77 | LookupParameters(unsigned n, const Dim& d); 78 | friend class boost::serialization::access; 79 | template 80 | void save(Archive& ar, const unsigned int) const { 81 | ar & dim; 82 | int nv = values.size(); 83 | ar & nv; 84 | for (unsigned i = 0; i < values.size(); ++i) 85 | ar & values[i]; 86 | } 87 | template 88 | void load(Archive& ar, const unsigned int) { 89 | ar & dim; 90 | int nv; 91 | ar & nv; 92 | assert(nv == (int)values.size()); 93 | for (unsigned i = 0; i < values.size(); ++i) 94 | ar & values[i]; 95 | } 96 | BOOST_SERIALIZATION_SPLIT_MEMBER() 97 | }; 98 | 99 | // this is a collection of parameters 100 | // if you need a matrix of parameters, or a lookup table - ask an instance of this class 101 | // this knows how to serialize itself 102 | // parameters know how to track their gradients, but any extra information (like velocity) will live here 103 | class Model { 104 | public: 105 | Model() : gradient_norm_scratch() {} 106 | ~Model(); 107 | float gradient_l2_norm() const; 108 | void reset_gradient(); 109 | // set scale to use custom initialization 110 | Parameters* add_parameters(const Dim& d, float scale = 0.0f); 111 | LookupParameters* add_lookup_parameters(unsigned n, const Dim& d); 112 | // project weights so their L2 norm = radius 113 | void project_weights(float radius = 1.0f); 114 | 115 | const std::vector& all_parameters_list() const { return all_params; } 116 | const std::vector& parameters_list() const { return params; } 117 | const std::vector& lookup_parameters_list() const { return lookup_params; } 118 | 119 | private: 120 | friend class boost::serialization::access; 121 | template 122 | void save(Archive& ar, const unsigned int) const { 123 | int np = params.size(); 124 | int nlp = lookup_params.size(); 125 | ar & np; 126 | ar & nlp; 127 | for (unsigned i = 0; i < params.size(); ++i) 128 | ar & *params[i]; 129 | for (unsigned i = 0; i < lookup_params.size(); ++i) 130 | ar & *lookup_params[i]; 131 | } 132 | template 133 | void load(Archive& ar, const unsigned int) { 134 | int np, nlp; 135 | ar & np; 136 | ar & nlp; 137 | assert(np == (int)params.size()); 138 | assert(nlp == (int)lookup_params.size()); 139 | for (unsigned i = 0; i < params.size(); ++i) 140 | ar & *params[i]; 141 | for (unsigned i = 0; i < lookup_params.size(); ++i) 142 | ar & *lookup_params[i]; 143 | all_params.clear(); 144 | for (auto p : params) all_params.push_back(p); 145 | for (auto p : lookup_params) all_params.push_back(p); 146 | } 147 | BOOST_SERIALIZATION_SPLIT_MEMBER() 148 | 149 | std::vector all_params; 150 | std::vector params; 151 | std::vector lookup_params; 152 | mutable float* gradient_norm_scratch; 153 | }; 154 | 155 | void save_cnn_model(std::string filename, Model* model); 156 | void load_cnn_model(std::string filename, Model* model); 157 | 158 | } // namespace cnn 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /cnn/lstm.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/lstm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | 10 | using namespace std; 11 | using namespace cnn::expr; 12 | 13 | namespace cnn { 14 | 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; 16 | 17 | LSTMBuilder::LSTMBuilder(unsigned layers, 18 | unsigned input_dim, 19 | unsigned hidden_dim, 20 | Model* model) : layers(layers) { 21 | unsigned layer_input_dim = input_dim; 22 | for (unsigned i = 0; i < layers; ++i) { 23 | // i 24 | Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim}); 25 | Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim}); 26 | Parameters* p_c2i = model->add_parameters({hidden_dim, hidden_dim}); 27 | Parameters* p_bi = model->add_parameters({hidden_dim}); 28 | 29 | // o 30 | Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim}); 31 | Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim}); 32 | Parameters* p_c2o = model->add_parameters({hidden_dim, hidden_dim}); 33 | Parameters* p_bo = model->add_parameters({hidden_dim}); 34 | 35 | // c 36 | Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim}); 37 | Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim}); 38 | Parameters* p_bc = model->add_parameters({hidden_dim}); 39 | layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next 40 | 41 | vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; 42 | params.push_back(ps); 43 | } // layers 44 | dropout_rate = 0.0f; 45 | } 46 | 47 | void LSTMBuilder::new_graph_impl(ComputationGraph& cg){ 48 | param_vars.clear(); 49 | 50 | for (unsigned i = 0; i < layers; ++i){ 51 | auto& p = params[i]; 52 | 53 | //i 54 | Expression i_x2i = parameter(cg,p[X2I]); 55 | Expression i_h2i = parameter(cg,p[H2I]); 56 | Expression i_c2i = parameter(cg,p[C2I]); 57 | Expression i_bi = parameter(cg,p[BI]); 58 | //o 59 | Expression i_x2o = parameter(cg,p[X2O]); 60 | Expression i_h2o = parameter(cg,p[H2O]); 61 | Expression i_c2o = parameter(cg,p[C2O]); 62 | Expression i_bo = parameter(cg,p[BO]); 63 | //c 64 | Expression i_x2c = parameter(cg,p[X2C]); 65 | Expression i_h2c = parameter(cg,p[H2C]); 66 | Expression i_bc = parameter(cg,p[BC]); 67 | 68 | vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; 69 | param_vars.push_back(vars); 70 | } 71 | } 72 | 73 | // layout: 0..layers = c 74 | // layers+1..2*layers = h 75 | void LSTMBuilder::start_new_sequence_impl(const vector& hinit) { 76 | h.clear(); 77 | c.clear(); 78 | if (hinit.size() > 0) { 79 | assert(layers*2 == hinit.size()); 80 | h0.resize(layers); 81 | c0.resize(layers); 82 | for (unsigned i = 0; i < layers; ++i) { 83 | c0[i] = hinit[i]; 84 | h0[i] = hinit[i + layers]; 85 | } 86 | has_initial_state = true; 87 | } else { 88 | has_initial_state = false; 89 | } 90 | } 91 | 92 | Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) { 93 | h.push_back(vector(layers)); 94 | c.push_back(vector(layers)); 95 | vector& ht = h.back(); 96 | vector& ct = c.back(); 97 | Expression in = x; 98 | for (unsigned i = 0; i < layers; ++i) { 99 | const vector& vars = param_vars[i]; 100 | Expression i_h_tm1, i_c_tm1; 101 | bool has_prev_state = (prev >= 0 || has_initial_state); 102 | if (prev < 0) { 103 | if (has_initial_state) { 104 | // intial value for h and c at timestep 0 in layer i 105 | // defaults to zero matrix input if not set in add_parameter_edges 106 | i_h_tm1 = h0[i]; 107 | i_c_tm1 = c0[i]; 108 | } 109 | } else { // t > 0 110 | i_h_tm1 = h[prev][i]; 111 | i_c_tm1 = c[prev][i]; 112 | } 113 | // apply dropout according to http://arxiv.org/pdf/1409.2329v5.pdf 114 | if (dropout_rate) in = dropout(in, dropout_rate); 115 | // input 116 | Expression i_ait; 117 | if (has_prev_state) 118 | i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1}); 119 | else 120 | i_ait = affine_transform({vars[BI], vars[X2I], in}); 121 | Expression i_it = logistic(i_ait); 122 | // forget 123 | Expression i_ft = 1.f - i_it; 124 | // write memory cell 125 | Expression i_awt; 126 | if (has_prev_state) 127 | i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); 128 | else 129 | i_awt = affine_transform({vars[BC], vars[X2C], in}); 130 | Expression i_wt = tanh(i_awt); 131 | // output 132 | if (has_prev_state) { 133 | Expression i_nwt = cwise_multiply(i_it,i_wt); 134 | Expression i_crt = cwise_multiply(i_ft,i_c_tm1); 135 | ct[i] = i_crt + i_nwt; 136 | } else { 137 | ct[i] = cwise_multiply(i_it,i_wt); 138 | } 139 | 140 | Expression i_aot; 141 | if (has_prev_state) 142 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]}); 143 | else 144 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[C2O], ct[i]}); 145 | Expression i_ot = logistic(i_aot); 146 | Expression ph_t = tanh(ct[i]); 147 | in = ht[i] = cwise_multiply(i_ot,ph_t); 148 | } 149 | if (dropout_rate) return dropout(ht.back(), dropout_rate); 150 | else return ht.back(); 151 | } 152 | 153 | void LSTMBuilder::copy(const RNNBuilder & rnn) { 154 | const LSTMBuilder & rnn_lstm = (const LSTMBuilder&)rnn; 155 | assert(params.size() == rnn_lstm.params.size()); 156 | for(size_t i = 0; i < params.size(); ++i) 157 | for(size_t j = 0; j < params[i].size(); ++j) 158 | params[i][j]->copy(*rnn_lstm.params[i][j]); 159 | } 160 | 161 | } // namespace cnn 162 | -------------------------------------------------------------------------------- /cnn/deep-lstm.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/deep-lstm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | 10 | using namespace std; 11 | using namespace cnn::expr; 12 | 13 | namespace cnn { 14 | 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; 16 | 17 | DeepLSTMBuilder::DeepLSTMBuilder(unsigned layers, 18 | unsigned input_dim, 19 | unsigned hidden_dim, 20 | Model* model) : layers(layers) { 21 | unsigned layer_input_dim = input_dim; 22 | for (unsigned i = 0; i < layers; ++i) { 23 | // i 24 | Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim}); 25 | Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim}); 26 | Parameters* p_c2i = model->add_parameters({hidden_dim, hidden_dim}); 27 | Parameters* p_bi = model->add_parameters({hidden_dim}); 28 | 29 | // o 30 | Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim}); 31 | Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim}); 32 | Parameters* p_c2o = model->add_parameters({hidden_dim, hidden_dim}); 33 | Parameters* p_bo = model->add_parameters({hidden_dim}); 34 | 35 | // c 36 | Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim}); 37 | Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim}); 38 | Parameters* p_bc = model->add_parameters({hidden_dim}); 39 | layer_input_dim = hidden_dim + input_dim; // output (hidden) from 1st layer is input to next 40 | 41 | vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; 42 | params.push_back(ps); 43 | } // layers 44 | } 45 | 46 | void DeepLSTMBuilder::new_graph_impl(ComputationGraph& cg){ 47 | param_vars.clear(); 48 | 49 | for (unsigned i = 0; i < layers; ++i){ 50 | auto& p = params[i]; 51 | 52 | //i 53 | Expression i_x2i = parameter(cg,p[X2I]); 54 | Expression i_h2i = parameter(cg,p[H2I]); 55 | Expression i_c2i = parameter(cg,p[C2I]); 56 | Expression i_bi = parameter(cg,p[BI]); 57 | //o 58 | Expression i_x2o = parameter(cg,p[X2O]); 59 | Expression i_h2o = parameter(cg,p[H2O]); 60 | Expression i_c2o = parameter(cg,p[C2O]); 61 | Expression i_bo = parameter(cg,p[BO]); 62 | //c 63 | Expression i_x2c = parameter(cg,p[X2C]); 64 | Expression i_h2c = parameter(cg,p[H2C]); 65 | Expression i_bc = parameter(cg,p[BC]); 66 | 67 | vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; 68 | param_vars.push_back(vars); 69 | } 70 | } 71 | 72 | // layout: 0..layers = c 73 | // layers+1..2*layers = h 74 | void DeepLSTMBuilder::start_new_sequence_impl(const vector& hinit) { 75 | h.clear(); 76 | c.clear(); 77 | if (hinit.size() > 0) { 78 | assert(layers*2 == hinit.size()); 79 | h0.resize(layers); 80 | c0.resize(layers); 81 | for (unsigned i = 0; i < layers; ++i) { 82 | c0[i] = hinit[i]; 83 | h0[i] = hinit[i + layers]; 84 | } 85 | has_initial_state = true; 86 | } else { 87 | has_initial_state = false; 88 | } 89 | } 90 | 91 | Expression DeepLSTMBuilder::add_input_impl(int prev, const Expression& x) { 92 | h.push_back(vector(layers)); 93 | c.push_back(vector(layers)); 94 | o.push_back(Expression()); 95 | vector& ht = h.back(); 96 | vector& ct = c.back(); 97 | Expression& ot = o.back(); 98 | Expression in = x; 99 | vector cc(layers); 100 | for (unsigned i = 0; i < layers; ++i) { 101 | if (i > 0) 102 | in = concatenate({in, x}); 103 | const vector& vars = param_vars[i]; 104 | Expression i_h_tm1, i_c_tm1; 105 | bool has_prev_state = (prev >= 0 || has_initial_state); 106 | if (prev < 0) { 107 | if (has_initial_state) { 108 | // intial value for h and c at timestep 0 in layer i 109 | // defaults to zero matrix input if not set in add_parameter_edges 110 | i_h_tm1 = h0[i]; 111 | i_c_tm1 = c0[i]; 112 | } 113 | } else { // t > 0 114 | i_h_tm1 = h[prev][i]; 115 | i_c_tm1 = c[prev][i]; 116 | } 117 | // input 118 | Expression i_ait; 119 | if (has_prev_state) 120 | // i_ait = vars[BI] + vars[X2I] * in + vars[H2I]*i_h_tm1 + vars[C2I] * i_c_tm1; 121 | i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1}); 122 | else 123 | // i_ait = vars[BI] + vars[X2I] * in; 124 | i_ait = affine_transform({vars[BI], vars[X2I], in}); 125 | Expression i_it = logistic(i_ait); 126 | // forget 127 | Expression i_ft = 1.f - i_it; 128 | // write memory cell 129 | Expression i_awt; 130 | if (has_prev_state) 131 | // i_awt = vars[BC] + vars[X2C] * in + vars[H2C]*i_h_tm1; 132 | i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); 133 | else 134 | // i_awt = vars[BC] + vars[X2C] * in; 135 | i_awt = affine_transform({vars[BC], vars[X2C], in}); 136 | Expression i_wt = tanh(i_awt); 137 | // output 138 | if (has_prev_state) { 139 | Expression i_nwt = cwise_multiply(i_it,i_wt); 140 | Expression i_crt = cwise_multiply(i_ft,i_c_tm1); 141 | ct[i] = i_crt + i_nwt; 142 | } else { 143 | ct[i] = cwise_multiply(i_it,i_wt); 144 | } 145 | 146 | Expression i_aot; 147 | if (has_prev_state) 148 | // i_aot = vars[BO] + vars[X2O] * in + vars[H2O] * i_h_tm1 + vars[C2O] * ct[i]; 149 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]}); 150 | else 151 | // i_aot = vars[BO] + vars[X2O] * in; 152 | i_aot = affine_transform({vars[BO], vars[X2O], in}); 153 | Expression i_ot = logistic(i_aot); 154 | Expression ph_t = tanh(ct[i]); 155 | in = ht[i] = cwise_multiply(i_ot,ph_t); 156 | cc[i] = in; 157 | } 158 | ot = concatenate(cc); 159 | return ot; 160 | } 161 | 162 | } // namespace cnn 163 | -------------------------------------------------------------------------------- /cnn/param-nodes.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/param-nodes.h" 2 | #include "cnn/tensor.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | string ConstParameterNode::as_string(const vector& arg_names) const { 11 | ostringstream s; 12 | s << "const_parameters(" << dim << ", " << params << ')'; 13 | return s.str(); 14 | } 15 | 16 | Dim ConstParameterNode::dim_forward(const vector& xs) const { 17 | assert(xs.size() == 0); 18 | return dim; 19 | } 20 | 21 | void ConstParameterNode::forward_impl(const vector& xs, Tensor& fx) const { 22 | assert(xs.size() == 0); 23 | fx.v = params->values.v; 24 | } 25 | 26 | void ConstParameterNode::backward_impl(const vector& xs, 27 | const Tensor& fx, 28 | const Tensor& dEdf, 29 | unsigned i, 30 | Tensor& dEdxi) const { 31 | cerr << "called backward() on arity 0 node: i = " << i << endl; 32 | abort(); 33 | } 34 | 35 | string ParameterNode::as_string(const vector& arg_names) const { 36 | ostringstream s; 37 | s << "parameters(" << dim << ", " << params << ')'; 38 | return s.str(); 39 | } 40 | 41 | Dim ParameterNode::dim_forward(const vector& xs) const { 42 | assert(xs.size() == 0); 43 | return dim; 44 | } 45 | 46 | void ParameterNode::forward_impl(const vector& xs, Tensor& fx) const { 47 | assert(xs.size() == 0); 48 | fx.v = params->values.v; 49 | } 50 | 51 | void ParameterNode::backward_impl(const vector& xs, 52 | const Tensor& fx, 53 | const Tensor& dEdf, 54 | unsigned i, 55 | Tensor& dEdxi) const { 56 | cerr << "called backward() on arity 0 node: i = " << i << endl; 57 | abort(); 58 | } 59 | 60 | void ParameterNode::accumulate_grad(const Tensor& g) { 61 | params->accumulate_grad(g); 62 | } 63 | 64 | string InputNode::as_string(const vector& arg_names) const { 65 | ostringstream s; 66 | s << "constant(" << dim << ')'; 67 | return s.str(); 68 | } 69 | 70 | Dim InputNode::dim_forward(const vector& xs) const { 71 | return dim; 72 | } 73 | 74 | void InputNode::forward_impl(const vector& xs, Tensor& fx) const { 75 | assert(xs.size() == 0); 76 | #if HAVE_CUDA 77 | cudaMemcpyAsync(fx.v, &pdata->front(), dim.size() * sizeof(float), cudaMemcpyHostToDevice); 78 | #else 79 | // TODO memcpy is only necessary if pdata->front() points to an unaligned location 80 | // need to compute this value 81 | bool is_input_address_aligned = false; 82 | if (!is_input_address_aligned) { 83 | memcpy(fx.v, &pdata->front(), dim.size() * sizeof(float)); 84 | } else { 85 | fx.v = const_cast(&pdata->front()); 86 | } 87 | #endif 88 | } 89 | 90 | void InputNode::backward_impl(const vector& xs, 91 | const Tensor& fx, 92 | const Tensor& dEdf, 93 | unsigned i, 94 | Tensor& dEdxi) const { 95 | cerr << "called backward() on arity 0 node\n"; 96 | abort(); 97 | } 98 | 99 | string ScalarInputNode::as_string(const vector& arg_names) const { 100 | ostringstream s; 101 | s << "scalar_constant(" << pdata << ')'; 102 | return s.str(); 103 | } 104 | 105 | Dim ScalarInputNode::dim_forward(const vector& xs) const { 106 | return Dim({1}); 107 | } 108 | 109 | void ScalarInputNode::forward_impl(const vector& xs, Tensor& fx) const { 110 | assert(xs.size() == 0); 111 | #if HAVE_CUDA 112 | cudaMemcpyAsync(fx.v, pdata, 1 * sizeof(float), cudaMemcpyHostToDevice); 113 | #else 114 | fx.v[0] = *pdata; 115 | #endif 116 | } 117 | 118 | void ScalarInputNode::backward_impl(const vector& xs, 119 | const Tensor& fx, 120 | const Tensor& dEdf, 121 | unsigned i, 122 | Tensor& dEdxi) const { 123 | cerr << "called backward() on arity 0 node\n"; 124 | abort(); 125 | } 126 | 127 | string LookupNode::as_string(const vector& arg_names) const { 128 | ostringstream s; 129 | s << "lookup_parameters(|x|=" << params->values.size() << " --> " << dim << ')'; 130 | return s.str(); 131 | } 132 | 133 | Dim LookupNode::dim_forward(const vector& xs) const { 134 | return dim; 135 | } 136 | 137 | void LookupNode::forward_impl(const vector& xs, Tensor& fx) const { 138 | assert(xs.size() == 0); 139 | if(pindex) { 140 | assert(*pindex < params->values.size()); 141 | assert (fx.d.batch_elems() == 1); 142 | fx.v = params->values[*pindex].v; 143 | } else { 144 | assert (pindices); 145 | assert (fx.d.batch_elems() == pindices->size()); 146 | for (unsigned b = 0; b < pindices->size(); ++b) { 147 | unsigned i = pindices->at(b); 148 | assert (i < params->values.size()); 149 | float* v = fx.v + fx.d.batch_size() * (b % fx.d.batch_elems()); 150 | #if HAVE_CUDA 151 | cudaMemcpyAsync(v, params->values[i].v, fx.d.batch_size() * sizeof(float), cudaMemcpyDeviceToDevice); 152 | #else 153 | memcpy(v, params->values[i].v, fx.d.batch_size() * sizeof(float)); 154 | #endif 155 | } 156 | } 157 | } 158 | 159 | void LookupNode::backward_impl(const vector& xs, 160 | const Tensor& fx, 161 | const Tensor& dEdf, 162 | unsigned i, 163 | Tensor& dEdxi) const { 164 | cerr << "called backward() on arity 0 node\n"; 165 | abort(); 166 | } 167 | 168 | void LookupNode::accumulate_grad(const Tensor& g) { 169 | if(pindex) { 170 | params->accumulate_grad(*pindex, g); 171 | } else { 172 | assert (pindices); 173 | const vector& gb = g.batch_elems(); 174 | for (unsigned b = 0; b < pindices->size(); ++b) { 175 | unsigned i = pindices->at(b); 176 | assert (i < params->values.size()); 177 | params->accumulate_grad(i, gb[b]); 178 | } 179 | } 180 | } 181 | 182 | } // namespace cnn 183 | -------------------------------------------------------------------------------- /cnn/cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cnn/cnn.h" 6 | #include "cnn/cuda.h" 7 | 8 | using namespace std; 9 | 10 | namespace cnn { 11 | 12 | cublasHandle_t cublas_handle; 13 | 14 | static void RemoveArgs(int& argc, char**& argv, int& argi, int n) { 15 | for (int i = argi + n; i < argc; ++i) 16 | argv[i - n] = argv[i]; 17 | argc -= n; 18 | assert(argc >= 0); 19 | } 20 | 21 | #define MAX_GPUS 256 22 | 23 | vector Initialize_GPU(int& argc, char**& argv) { 24 | int nDevices; 25 | CUDA_CHECK(cudaGetDeviceCount(&nDevices)); 26 | if (nDevices < 1) { 27 | cerr << "[cnn] No GPUs found, recompile without DENABLE_CUDA=1\n"; 28 | throw std::runtime_error("No GPUs found but CNN compiled with CUDA support."); 29 | } 30 | // logic: no flags, you get 1 GPU 31 | // or you request a certain number of GPUs explicitly 32 | // or you request the device ids 33 | int requested_gpus = -1; 34 | vector gpu_mask(MAX_GPUS); 35 | int argi = 1; 36 | bool ngpus_requested = false; 37 | bool ids_requested = false; 38 | for( ;argi < argc; ++argi) { 39 | string arg = argv[argi]; 40 | if (arg == "--cnn_gpus" || arg == "--cnn-gpus") { 41 | if ((argi + 1) > argc) { 42 | cerr << "[cnn] --cnn-gpus expects an argument (number of GPUs to use)\n"; 43 | abort(); 44 | } else { 45 | if (ngpus_requested) { 46 | cerr << "Multiple instances of --cnn-gpus" << endl; abort(); 47 | } 48 | ngpus_requested = true; 49 | string a2 = argv[argi+1]; 50 | istringstream c(a2); c >> requested_gpus; 51 | RemoveArgs(argc, argv, argi, 2); 52 | } 53 | } else if (arg == "--cnn_gpu_ids" || arg == "--cnn-gpu-ids") { 54 | if ((argi + 1) > argc) { 55 | cerr << "[cnn] --cnn-gpu-ids expects an argument (comma separated list of physical GPU ids to use)\n"; 56 | abort(); 57 | } else { 58 | string a2 = argv[argi+1]; 59 | if (ids_requested) { 60 | cerr << "Multiple instances of --cnn-gpu-ids" << endl; abort(); 61 | } 62 | ids_requested = true; 63 | if (a2.size() % 2 != 1) { 64 | cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort(); 65 | } 66 | for (unsigned i = 0; i < a2.size(); ++i) { 67 | if ((i % 2 == 0 && (a2[i] < '0' || a2[i] > '9')) || 68 | (i % 2 == 1 && a2[i] != ',')) { 69 | cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort(); 70 | } 71 | if (i % 2 == 0) { 72 | int gpu_id = a2[i] - '0'; 73 | if (gpu_id >= nDevices) { 74 | cerr << "You requested GPU id " << gpu_id << " but system only reports up to " << nDevices << endl; 75 | abort(); 76 | } 77 | if (gpu_id >= MAX_GPUS) { cerr << "Raise MAX_GPUS\n"; abort(); } 78 | gpu_mask[gpu_id]++; 79 | requested_gpus++; 80 | if (gpu_mask[gpu_id] != 1) { 81 | cerr << "Bad argument to --cnn-gpu-ids: " << a2 << endl; abort(); 82 | } 83 | } 84 | } 85 | RemoveArgs(argc, argv, argi, 2); 86 | } 87 | } 88 | } 89 | if (ids_requested && ngpus_requested) { 90 | cerr << "Use only --cnn_gpus or --cnn_gpu_ids, not both\n"; 91 | abort(); 92 | } 93 | if (ngpus_requested || requested_gpus == -1) { 94 | if (requested_gpus == -1) requested_gpus = 1; 95 | cerr << "Request for " << requested_gpus << " GPU" << (requested_gpus == 1 ? "" : "s") << " ...\n"; 96 | for (int i = 0; i < MAX_GPUS; ++i) gpu_mask[i] = 1; 97 | } else if (ids_requested) { 98 | requested_gpus++; 99 | cerr << "[cnn] Request for " << requested_gpus << " specific GPU" << (requested_gpus == 1 ? "" : "s") << " ...\n"; 100 | } 101 | 102 | vector gpudevices; 103 | if (requested_gpus == 0) return gpudevices; 104 | if (requested_gpus > nDevices) { 105 | cerr << "You requested " << requested_gpus << " GPUs but system only reports " << nDevices << endl; 106 | abort(); 107 | } 108 | 109 | // after all that, requested_gpus is the number of GPUs to reserve 110 | // we now pick the ones that are both requested by the user or have 111 | // the most memory free 112 | 113 | vector gpu_free_mem(MAX_GPUS, 0); 114 | vector gpus(MAX_GPUS, 0); 115 | for (int i = 0; i < MAX_GPUS; ++i) gpus[i] = i; 116 | size_t free_bytes, total_bytes, max_free = 0; 117 | int selected = 0; 118 | for (int i = 0; i < nDevices; i++) { 119 | if (!gpu_mask[i]) continue; 120 | cudaDeviceProp prop; 121 | CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); 122 | cerr << "[cnn] Device Number: " << i << endl; 123 | cerr << "[cnn] Device name: " << prop.name << endl; 124 | cerr << "[cnn] Memory Clock Rate (KHz): " << prop.memoryClockRate << endl; 125 | cerr << "[cnn] Memory Bus Width (bits): " << prop.memoryBusWidth << endl; 126 | cerr << "[cnn] Peak Memory Bandwidth (GB/s): " << (2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6) << endl; 127 | if (!prop.unifiedAddressing) { 128 | cerr << "[cnn] GPU does not support unified addressing.\n"; 129 | abort(); 130 | } 131 | CUDA_CHECK(cudaSetDevice(i)); 132 | CUDA_CHECK(cudaMemGetInfo( &free_bytes, &total_bytes )); 133 | CUDA_CHECK(cudaDeviceReset()); 134 | cerr << "[cnn] Memory Free (GB): " << free_bytes/1.0e9 << "/" << total_bytes/1.0e9 << endl; 135 | cerr << "[cnn]" << endl; 136 | gpu_free_mem[i] = free_bytes; 137 | } 138 | stable_sort(gpus.begin(), gpus.end(), [&](int a, int b) -> bool { return gpu_free_mem[a] > gpu_free_mem[b]; }); 139 | gpus.resize(requested_gpus); 140 | cerr << "[cnn] Device(s) selected:"; 141 | for (int i = 0; i < requested_gpus; ++i) { 142 | cerr << ' ' << gpus[i]; 143 | int mb = 512; 144 | Device* d = new Device_GPU(mb, gpus[i]); 145 | gpudevices.push_back(d); 146 | } 147 | cerr << endl; 148 | 149 | // eventually kill the global handle 150 | CUDA_CHECK(cudaSetDevice(gpus[0])); 151 | CUBLAS_CHECK(cublasCreate(&cublas_handle)); 152 | CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); 153 | return gpudevices; 154 | } 155 | 156 | } // namespace cnn 157 | -------------------------------------------------------------------------------- /cnn/cfsm-builder.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/cfsm-builder.h" 2 | 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | namespace cnn { 9 | 10 | using namespace expr; 11 | 12 | inline bool is_ws(char x) { return (x == ' ' || x == '\t'); } 13 | inline bool not_ws(char x) { return (x != ' ' && x != '\t'); } 14 | 15 | NonFactoredSoftmaxBuilder::NonFactoredSoftmaxBuilder(unsigned rep_dim, unsigned vocab_size, Model* model) { 16 | p_w = model->add_parameters({vocab_size, rep_dim}); 17 | p_b = model->add_parameters({vocab_size}); 18 | } 19 | 20 | void NonFactoredSoftmaxBuilder::new_graph(ComputationGraph& cg) { 21 | pcg = &cg; 22 | w = parameter(cg, p_w); 23 | b = parameter(cg, p_b); 24 | } 25 | 26 | Expression NonFactoredSoftmaxBuilder::neg_log_softmax(const Expression& rep, unsigned wordidx) { 27 | return pickneglogsoftmax(affine_transform({b, w, rep}), wordidx); 28 | } 29 | 30 | unsigned NonFactoredSoftmaxBuilder::sample(const expr::Expression& rep) { 31 | softmax(affine_transform({b, w, rep})); 32 | vector dist = as_vector(pcg->incremental_forward()); 33 | unsigned c = 0; 34 | double p = rand01(); 35 | for (; c < dist.size(); ++c) { 36 | p -= dist[c]; 37 | if (p < 0.0) { break; } 38 | } 39 | if (c == dist.size()) { 40 | --c; 41 | } 42 | return c; 43 | } 44 | 45 | ClassFactoredSoftmaxBuilder::ClassFactoredSoftmaxBuilder(unsigned rep_dim, 46 | const std::string& cluster_file, 47 | Dict* word_dict, 48 | Model* model) { 49 | ReadClusterFile(cluster_file, word_dict); 50 | const unsigned num_clusters = cdict.size(); 51 | p_r2c = model->add_parameters({num_clusters, rep_dim}); 52 | p_cbias = model->add_parameters({num_clusters}); 53 | p_rc2ws.resize(num_clusters); 54 | p_rcwbiases.resize(num_clusters); 55 | for (unsigned i = 0; i < num_clusters; ++i) { 56 | auto& words = cidx2words[i]; // vector of word ids 57 | const unsigned num_words_in_cluster = words.size(); 58 | if (num_words_in_cluster > 1) { 59 | // for singleton clusters, we don't need these parameters, so 60 | // we don't create them 61 | p_rc2ws[i] = model->add_parameters({num_words_in_cluster, rep_dim}); 62 | p_rcwbiases[i] = model->add_parameters({num_words_in_cluster}); 63 | } 64 | } 65 | } 66 | 67 | void ClassFactoredSoftmaxBuilder::new_graph(ComputationGraph& cg) { 68 | pcg = &cg; 69 | const unsigned num_clusters = cdict.size(); 70 | r2c = parameter(cg, p_r2c); 71 | cbias = parameter(cg, p_cbias); 72 | rc2ws.clear(); 73 | rc2biases.clear(); 74 | rc2ws.resize(num_clusters); 75 | rc2biases.resize(num_clusters); 76 | } 77 | 78 | Expression ClassFactoredSoftmaxBuilder::neg_log_softmax(const Expression& rep, unsigned wordidx) { 79 | // TODO assert that new_graph has been called 80 | int clusteridx = widx2cidx[wordidx]; 81 | assert(clusteridx >= 0); // if this fails, wordid is missing from clusters 82 | Expression cscores = affine_transform({cbias, r2c, rep}); 83 | Expression cnlp = pickneglogsoftmax(cscores, clusteridx); 84 | if (singleton_cluster[clusteridx]) return cnlp; 85 | // if there is only one word in the cluster, just return -log p(class | rep) 86 | // otherwise predict word too 87 | unsigned wordrow = widx2cwidx[wordidx]; 88 | Expression& cwbias = get_rc2wbias(clusteridx); 89 | Expression& r2cw = get_rc2w(clusteridx); 90 | Expression wscores = affine_transform({cwbias, r2cw, rep}); 91 | Expression wnlp = pickneglogsoftmax(wscores, wordrow); 92 | return cnlp + wnlp; 93 | } 94 | 95 | unsigned ClassFactoredSoftmaxBuilder::sample(const expr::Expression& rep) { 96 | // TODO assert that new_graph has been called 97 | Expression cscores = affine_transform({cbias, r2c, rep}); 98 | softmax(cscores); 99 | auto cdist = as_vector(pcg->incremental_forward()); 100 | unsigned c = 0; 101 | double p = rand01(); 102 | for (; c < cdist.size(); ++c) { 103 | p -= cdist[c]; 104 | if (p < 0.0) { break; } 105 | } 106 | if (c == cdist.size()) --c; 107 | unsigned w = 0; 108 | if (!singleton_cluster[c]) { 109 | Expression& cwbias = get_rc2wbias(c); 110 | Expression& r2cw = get_rc2w(c); 111 | Expression wscores = affine_transform({cwbias, r2cw, rep}); 112 | softmax(wscores); 113 | auto wdist = as_vector(pcg->incremental_forward()); 114 | p = rand01(); 115 | for (; w < wdist.size(); ++w) { 116 | p -= wdist[w]; 117 | if (p < 0.0) { break; } 118 | } 119 | if (w == wdist.size()) --w; 120 | } 121 | return cidx2words[c][w]; 122 | } 123 | 124 | void ClassFactoredSoftmaxBuilder::ReadClusterFile(const std::string& cluster_file, Dict* word_dict) { 125 | cerr << "Reading clusters from " << cluster_file << " ...\n"; 126 | ifstream in(cluster_file); 127 | assert(in); 128 | int wc = 0; 129 | string line; 130 | while(getline(in, line)) { 131 | ++wc; 132 | const unsigned len = line.size(); 133 | unsigned startc = 0; 134 | while (is_ws(line[startc]) && startc < len) { ++startc; } 135 | unsigned endc = startc; 136 | while (not_ws(line[endc]) && endc < len) { ++endc; } 137 | unsigned startw = endc; 138 | while (is_ws(line[startw]) && startw < len) { ++startw; } 139 | unsigned endw = startw; 140 | while (not_ws(line[endw]) && endw < len) { ++endw; } 141 | assert(endc > startc); 142 | assert(startw > endc); 143 | assert(endw > startw); 144 | unsigned c = cdict.Convert(line.substr(startc, endc - startc)); 145 | unsigned word = word_dict->Convert(line.substr(startw, endw - startw)); 146 | if (word >= widx2cidx.size()) { 147 | widx2cidx.resize(word + 1, -1); 148 | widx2cwidx.resize(word + 1); 149 | } 150 | widx2cidx[word] = c; 151 | if (c >= cidx2words.size()) cidx2words.resize(c + 1); 152 | auto& clusterwords = cidx2words[c]; 153 | widx2cwidx[word] = clusterwords.size(); 154 | clusterwords.push_back(word); 155 | } 156 | singleton_cluster.resize(cidx2words.size()); 157 | int scs = 0; 158 | for (unsigned i = 0; i < cidx2words.size(); ++i) { 159 | bool sc = cidx2words[i].size() <= 1; 160 | if (sc) scs++; 161 | singleton_cluster[i] = sc; 162 | } 163 | cerr << "Read " << wc << " words in " << cdict.size() << " clusters (" << scs << " singleton clusters)\n"; 164 | } 165 | 166 | } // namespace cnn 167 | -------------------------------------------------------------------------------- /cnn/fast-lstm.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/fast-lstm.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cnn/nodes.h" 9 | 10 | using namespace std; 11 | using namespace cnn::expr; 12 | 13 | namespace cnn { 14 | 15 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; 16 | 17 | /* 18 | FastLSTM replaces the matrices from cell to other units, by diagonal matrices. 19 | Namely: C2O, C2I. 20 | */ 21 | 22 | FastLSTMBuilder::FastLSTMBuilder(unsigned layers, 23 | unsigned input_dim, 24 | unsigned hidden_dim, 25 | Model* model) : layers(layers) { 26 | unsigned layer_input_dim = input_dim; 27 | for (unsigned i = 0; i < layers; ++i) { 28 | // i 29 | Parameters* p_x2i = model->add_parameters({hidden_dim, layer_input_dim}); 30 | Parameters* p_h2i = model->add_parameters({hidden_dim, hidden_dim}); 31 | Parameters* p_c2i = model->add_parameters({hidden_dim, 1}); 32 | Parameters* p_bi = model->add_parameters({hidden_dim}); 33 | 34 | // o 35 | Parameters* p_x2o = model->add_parameters({hidden_dim, layer_input_dim}); 36 | Parameters* p_h2o = model->add_parameters({hidden_dim, hidden_dim}); 37 | Parameters* p_c2o = model->add_parameters({hidden_dim, 1}); 38 | Parameters* p_bo = model->add_parameters({hidden_dim}); 39 | 40 | // c 41 | Parameters* p_x2c = model->add_parameters({hidden_dim, layer_input_dim}); 42 | Parameters* p_h2c = model->add_parameters({hidden_dim, hidden_dim}); 43 | Parameters* p_bc = model->add_parameters({hidden_dim}); 44 | layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next 45 | 46 | vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; 47 | params.push_back(ps); 48 | } // layers 49 | } 50 | 51 | void FastLSTMBuilder::new_graph_impl(ComputationGraph& cg){ 52 | param_vars.clear(); 53 | 54 | for (unsigned i = 0; i < layers; ++i){ 55 | auto& p = params[i]; 56 | 57 | //i 58 | Expression i_x2i = parameter(cg,p[X2I]); 59 | Expression i_h2i = parameter(cg,p[H2I]); 60 | Expression i_c2i = parameter(cg,p[C2I]); 61 | Expression i_bi = parameter(cg,p[BI]); 62 | //o 63 | Expression i_x2o = parameter(cg,p[X2O]); 64 | Expression i_h2o = parameter(cg,p[H2O]); 65 | Expression i_c2o = parameter(cg,p[C2O]); 66 | Expression i_bo = parameter(cg,p[BO]); 67 | //c 68 | Expression i_x2c = parameter(cg,p[X2C]); 69 | Expression i_h2c = parameter(cg,p[H2C]); 70 | Expression i_bc = parameter(cg,p[BC]); 71 | 72 | vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; 73 | param_vars.push_back(vars); 74 | } 75 | } 76 | 77 | // layout: 0..layers = c 78 | // layers+1..2*layers = h 79 | void FastLSTMBuilder::start_new_sequence_impl(const vector& hinit) { 80 | h.clear(); 81 | c.clear(); 82 | if (hinit.size() > 0) { 83 | assert(layers*2 == hinit.size()); 84 | h0.resize(layers); 85 | c0.resize(layers); 86 | for (unsigned i = 0; i < layers; ++i) { 87 | c0[i] = hinit[i]; 88 | h0[i] = hinit[i + layers]; 89 | } 90 | has_initial_state = true; 91 | } else { 92 | has_initial_state = false; 93 | } 94 | } 95 | 96 | Expression FastLSTMBuilder::add_input_impl(int prev, const Expression& x) { 97 | h.push_back(vector(layers)); 98 | c.push_back(vector(layers)); 99 | vector& ht = h.back(); 100 | vector& ct = c.back(); 101 | Expression in = x; 102 | for (unsigned i = 0; i < layers; ++i) { 103 | const vector& vars = param_vars[i]; 104 | Expression i_h_tm1, i_c_tm1; 105 | bool has_prev_state = (prev >= 0 || has_initial_state); 106 | if (prev < 0) { 107 | if (has_initial_state) { 108 | // intial value for h and c at timestep 0 in layer i 109 | // defaults to zero matrix input if not set in add_parameter_edges 110 | i_h_tm1 = h0[i]; 111 | i_c_tm1 = c0[i]; 112 | } 113 | } else { // t > 0 114 | i_h_tm1 = h[prev][i]; 115 | i_c_tm1 = c[prev][i]; 116 | } 117 | // input 118 | Expression i_ait; 119 | if (has_prev_state) { 120 | // i_ait = vars[BI] + vars[X2I] * in + vars[H2I]*i_h_tm1 + cwise_multiply(vars[C2I], i_c_tm1); 121 | i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1}) + 122 | cwise_multiply(vars[C2I], i_c_tm1); 123 | } else { 124 | // i_ait = vars[BI] + vars[X2I] * in; 125 | i_ait = affine_transform({vars[BI], vars[X2I], in}); 126 | } 127 | Expression i_it = logistic(i_ait); 128 | // forget 129 | Expression i_ft = 1.f - i_it; 130 | // write memory cell 131 | Expression i_awt; 132 | if (has_prev_state) 133 | // i_awt = vars[BC] + vars[X2C] * in + vars[H2C]*i_h_tm1; 134 | i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); 135 | else 136 | // i_awt = vars[BC] + vars[X2C] * in; 137 | i_awt = affine_transform({vars[BC], vars[X2C], in}); 138 | Expression i_wt = tanh(i_awt); 139 | // output 140 | if (has_prev_state) { 141 | Expression i_nwt = cwise_multiply(i_it,i_wt); 142 | Expression i_crt = cwise_multiply(i_ft,i_c_tm1); 143 | ct[i] = i_crt + i_nwt; 144 | } else { 145 | ct[i] = cwise_multiply(i_it,i_wt); 146 | } 147 | 148 | Expression i_aot; 149 | if (has_prev_state) { 150 | // i_aot = vars[BO] + vars[X2O] * in + vars[H2O] * i_h_tm1 + cwise_multiply(vars[C2O], ct[i]); 151 | i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1}) + 152 | cwise_multiply(vars[C2O], ct[i]); 153 | } 154 | else { 155 | // i_aot = vars[BO] + vars[X2O] * in; 156 | i_aot = affine_transform({vars[BO], vars[X2O], in}); 157 | } 158 | Expression i_ot = logistic(i_aot); 159 | Expression ph_t = tanh(ct[i]); 160 | in = ht[i] = cwise_multiply(i_ot,ph_t); 161 | } 162 | return ht.back(); 163 | } 164 | 165 | void FastLSTMBuilder::copy(const RNNBuilder & rnn) { 166 | const FastLSTMBuilder & rnn_lstm = (const FastLSTMBuilder&)rnn; 167 | assert(params.size() == rnn_lstm.params.size()); 168 | for(size_t i = 0; i < params.size(); ++i) 169 | for(size_t j = 0; j < params[i].size(); ++j) 170 | params[i][j]->copy(*rnn_lstm.params[i][j]); 171 | } 172 | 173 | } // namespace cnn 174 | -------------------------------------------------------------------------------- /cnn/model.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/model.h" 2 | #include "cnn/tensor.h" 3 | #include "cnn/aligned-mem-pool.h" 4 | #include "cnn/cnn.h" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #if HAVE_CUDA 15 | #include "cnn/gpu-ops.h" 16 | #include "cnn/cuda.h" 17 | #endif 18 | 19 | using namespace std; 20 | 21 | namespace cnn { 22 | 23 | ParametersBase::~ParametersBase() {} 24 | 25 | Parameters::Parameters(const Dim& d, float scale) : dim(d) { 26 | values.d = g.d = d; 27 | values.v = static_cast(ps->allocate(d.size() * sizeof(float))); 28 | if (scale) { 29 | TensorTools::Randomize(values, scale); 30 | } 31 | else { 32 | TensorTools::Randomize(values); 33 | } 34 | g.v = static_cast(ps->allocate(d.size() * sizeof(float))); 35 | TensorTools::Zero(g); 36 | } 37 | 38 | size_t Parameters::size() const { return dim.size(); } 39 | 40 | void Parameters::scale_parameters(float a) { 41 | (*values) *= a; 42 | } 43 | 44 | void Parameters::squared_l2norm(float* sqnorm) const { 45 | #if HAVE_CUDA 46 | gpu::l2_norm_reducer(values.d.size(), values.v, sqnorm, true, false); 47 | #else 48 | *sqnorm = (*values).squaredNorm(); 49 | #endif 50 | } 51 | 52 | void Parameters::g_squared_l2norm(float* sqnorm) const { 53 | #if HAVE_CUDA 54 | gpu::l2_norm_reducer(g.d.size(), g.v, sqnorm, true, false); 55 | #else 56 | *sqnorm = g.vec().squaredNorm(); 57 | #endif 58 | } 59 | 60 | void Parameters::copy(const Parameters & param) { 61 | assert(dim == param.dim); 62 | TensorTools::CopyElements(values, param.values); 63 | } 64 | 65 | void Parameters::accumulate_grad(const Tensor& d) { 66 | #if HAVE_CUDA 67 | CUBLAS_CHECK(cublasSaxpy(cublas_handle, g.d.size(), kSCALAR_ONE, d.v, 1, g.v, 1)); 68 | #else 69 | g.vec() += d.vec(); 70 | #endif 71 | } 72 | 73 | void Parameters::clear() { 74 | TensorTools::Zero(g); 75 | } 76 | 77 | LookupParameters::LookupParameters(unsigned n, const Dim& d) : dim(d), values(n), grads(n) { 78 | for (unsigned i = 0; i < n; ++i) { 79 | auto& v = values[i]; 80 | v.d = d; 81 | v.v = static_cast(ps->allocate(d.size() * sizeof(float))); 82 | TensorTools::Randomize(v); 83 | 84 | auto& g = grads[i]; 85 | g.d = d; 86 | g.v = static_cast(ps->allocate(d.size() * sizeof(float))); 87 | TensorTools::Zero(g); 88 | } 89 | } 90 | 91 | void LookupParameters::scale_parameters(float a) { 92 | for (auto& p : values) 93 | (*p) *= a; 94 | } 95 | 96 | void LookupParameters::Initialize(unsigned index, const vector& val) { 97 | assert(int(val.size()) == int(dim.size())); 98 | #if HAVE_CUDA 99 | cerr << "implement LookupParameters::Initialize\n"; 100 | throw cuda_not_implemented("LookupParameters::Initialize"); 101 | #else 102 | memcpy(values[index].v, &val[0], val.size() * sizeof(float)); 103 | #endif 104 | } 105 | 106 | size_t LookupParameters::size() const { 107 | return values.size() * dim.size(); 108 | } 109 | 110 | void LookupParameters::g_squared_l2norm(float* sqnorm) const { 111 | #if HAVE_CUDA 112 | bool acc = false; 113 | for (auto i : non_zero_grads) { 114 | gpu::l2_norm_reducer(grads[i].d.size(), grads[i].v, sqnorm, true, acc); 115 | acc = true; 116 | } 117 | #else 118 | real a = 0; 119 | for (auto i : non_zero_grads) 120 | a += (*grads[i]).squaredNorm(); 121 | *sqnorm = a; 122 | #endif 123 | } 124 | 125 | void LookupParameters::squared_l2norm(float* sqnorm) const { 126 | #if HAVE_CUDA 127 | bool acc = false; 128 | for (unsigned i = 0; i < values.size(); ++i) { 129 | gpu::l2_norm_reducer(values[i].d.size(), values[i].v, sqnorm, true, acc); 130 | acc = true; 131 | } 132 | #else 133 | float a = 0; 134 | for (unsigned i = 0; i < values.size(); ++i) 135 | a += (*values[i]).squaredNorm(); 136 | *sqnorm = a; 137 | #endif 138 | } 139 | 140 | void LookupParameters::copy(const LookupParameters & param) { 141 | assert(dim == param.dim); 142 | for(size_t i = 0; i < param.values.size(); ++i) 143 | TensorTools::CopyElements(values[i], param.values[i]); 144 | } 145 | 146 | void LookupParameters::accumulate_grad(unsigned index, const Tensor& d) { 147 | non_zero_grads.insert(index); 148 | #if HAVE_CUDA 149 | CUBLAS_CHECK(cublasSaxpy(cublas_handle, d.d.size(), kSCALAR_ONE, d.v, 1, grads[index].v, 1)); 150 | #else 151 | *grads[index] += *d; 152 | #endif 153 | } 154 | 155 | void LookupParameters::clear() { 156 | for (auto i : non_zero_grads) 157 | TensorTools::Zero(grads[i]); 158 | non_zero_grads.clear(); 159 | } 160 | 161 | Model::~Model() { 162 | for (auto p : all_params) delete p; 163 | } 164 | 165 | void Model::project_weights(float radius) { 166 | static float* project_scratch = 0; 167 | if (!project_scratch) 168 | project_scratch = (float*)default_device->mem->malloc(all_params.size() * sizeof(float)); 169 | int pi = 0; 170 | for (auto p : all_params) { 171 | p->squared_l2norm(&project_scratch[pi]); 172 | ++pi; 173 | } 174 | double gg = 0; 175 | for (int i = 0; i < pi; ++i) 176 | gg += project_scratch[i]; 177 | cerr << "NORM: " << sqrt(gg) << endl; 178 | } 179 | 180 | float Model::gradient_l2_norm() const { 181 | if (!gradient_norm_scratch) 182 | gradient_norm_scratch = (float*)default_device->mem->malloc(all_params.size() * sizeof(float)); 183 | int pi = 0; 184 | for (auto p : all_params) { 185 | p->g_squared_l2norm(&gradient_norm_scratch[pi]); 186 | ++pi; 187 | } 188 | #if HAVE_CUDA 189 | float res = 0; 190 | gpu::l2_norm_reducer(all_params.size(), gradient_norm_scratch, gradient_norm_scratch, false, false); 191 | cudaMemcpy(&res, gradient_norm_scratch, sizeof(float), cudaMemcpyDeviceToHost); 192 | return sqrt(res); 193 | #else 194 | double gg = 0; 195 | for (int i = 0; i < pi; ++i) 196 | gg += gradient_norm_scratch[i]; 197 | return sqrt(gg); 198 | #endif 199 | } 200 | 201 | Parameters* Model::add_parameters(const Dim& d, float scale) { 202 | Parameters* p = new Parameters(d, scale); 203 | all_params.push_back(p); 204 | params.push_back(p); 205 | return p; 206 | } 207 | 208 | LookupParameters* Model::add_lookup_parameters(unsigned n, const Dim& d) { 209 | LookupParameters* p = new LookupParameters(n,d); 210 | all_params.push_back(p); 211 | lookup_params.push_back(p); 212 | return p; 213 | } 214 | 215 | void Model::reset_gradient() { 216 | for (auto p : params) { p->clear(); } 217 | for (auto p : lookup_params) { p->clear(); } 218 | } 219 | 220 | void save_cnn_model(std::string filename, Model* model) { 221 | std::ofstream out(filename); 222 | boost::archive::text_oarchive oa(out); 223 | oa << (*model); 224 | }; 225 | 226 | void load_cnn_model(std::string filename, Model* model) { 227 | std::ifstream in(filename); 228 | boost::archive::text_iarchive ia(in); 229 | ia >> (*model); 230 | }; 231 | 232 | } // namespace cnn 233 | -------------------------------------------------------------------------------- /cnn/simd-functors.h: -------------------------------------------------------------------------------- 1 | #ifndef CNN_XFUNCTORS_H 2 | #define CNN_XFUNCTORS_H 3 | 4 | #include 5 | 6 | #include "cnn/functors.h" 7 | 8 | // these functors are implemented to exploit Eigen's internal logic for doing 9 | // vectorized arithmetic. I'm putting them in a separate file since, if Eigen 10 | // breaks backward compatibility by changing an internal interface, I want 11 | // the necessary changes to be localized. 12 | // 13 | // to implement your own functor, you need to provide 14 | // 1) operator() implemented on the scalar data type 15 | // 2) packetOp implemented using vector ("packet") type 16 | // 3) the functor_traits specialization for your functor 17 | // that tells the compiler whether your architecture 18 | // has vectorized support for the operations you need 19 | // and an estimate of the cost of the operation 20 | 21 | namespace cnn { 22 | template struct const_add_op { 23 | const_add_op(const Scalar& c) : c(c) {} 24 | CNN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x) const { 25 | return c + x; 26 | } 27 | template 28 | CNN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { 29 | using namespace Eigen::internal; 30 | return padd(pset1(c), x); 31 | } 32 | Scalar c; 33 | }; 34 | } 35 | 36 | namespace Eigen { namespace internal { 37 | template 38 | struct functor_traits > { 39 | enum { 40 | Cost = NumTraits::AddCost * 2, 41 | PacketAccess = packet_traits::HasAdd 42 | }; 43 | }; 44 | } } 45 | 46 | namespace cnn { 47 | template struct const_minus_op { 48 | const_minus_op(const Scalar& c) : c(c) {} 49 | CNN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x) const { 50 | return c - x; 51 | } 52 | template 53 | CNN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { 54 | using namespace Eigen::internal; 55 | return psub(pset1(c), x); 56 | } 57 | Scalar c; 58 | }; 59 | } 60 | 61 | namespace Eigen { namespace internal { 62 | template 63 | struct functor_traits > { 64 | enum { 65 | Cost = NumTraits::AddCost * 2, 66 | PacketAccess = packet_traits::HasSub 67 | }; 68 | }; 69 | } } 70 | 71 | namespace cnn { 72 | template struct scalar_logistic_sigmoid_op { 73 | EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_sigmoid_op) 74 | CNN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x) const { 75 | using std::exp; 76 | const Scalar one = Scalar(1); 77 | return one / (one + exp(-x)); 78 | } 79 | template 80 | CNN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { 81 | using namespace Eigen::internal; 82 | const Packet one = pset1(1); 83 | return pdiv(one, padd(one, pexp(pnegate(x)))); 84 | } 85 | }; 86 | } 87 | 88 | namespace Eigen { namespace internal { 89 | template 90 | struct functor_traits > { 91 | enum { 92 | Cost = NumTraits::AddCost * 2 + NumTraits::MulCost * 6, 93 | PacketAccess = packet_traits::HasAdd && packet_traits::HasDiv && 94 | packet_traits::HasNegate && packet_traits::HasExp 95 | }; 96 | }; 97 | } } 98 | 99 | namespace cnn { 100 | template struct scalar_erf_backward_op { 101 | EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_backward_op) 102 | CNN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& d) const { 103 | using std::exp; 104 | const Scalar sqrt_pi_over2(1.1283791670955125738961589); 105 | return sqrt_pi_over2 * exp(-x * x) * d; 106 | } 107 | template 108 | CNN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& d) const { 109 | using namespace Eigen::internal; 110 | const Packet sqrt_pi_over2 = pset1(1.1283791670955125738961589); 111 | return pmul(sqrt_pi_over2, pmul(pexp(pnegate(pmul(x, x))), d)); 112 | } 113 | }; 114 | } 115 | 116 | namespace Eigen { namespace internal { 117 | template 118 | struct functor_traits > { 119 | enum { 120 | Cost = NumTraits::MulCost * 8, 121 | PacketAccess = packet_traits::HasExp && packet_traits::HasMul && packet_traits::HasNegate 122 | }; 123 | }; 124 | } } 125 | 126 | namespace cnn { 127 | template struct scalar_logistic_sigmoid_backward_op { 128 | EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_sigmoid_backward_op) 129 | CNN_DEVICE_FUNC inline const Scalar operator() (const Scalar& t, const Scalar& d) const { 130 | const Scalar one = Scalar(1); 131 | return (one - t) * t * d; 132 | } 133 | template 134 | CNN_DEVICE_FUNC inline Packet packetOp(const Packet& t, const Packet& d) const { 135 | using namespace Eigen::internal; 136 | const Packet one = pset1(1); 137 | return pmul(psub(one, t), pmul(t, d)); 138 | } 139 | }; 140 | } 141 | 142 | namespace Eigen { namespace internal { 143 | template 144 | struct functor_traits > { 145 | enum { 146 | Cost = NumTraits::AddCost + NumTraits::MulCost * 2, 147 | PacketAccess = packet_traits::HasSub && packet_traits::HasMul 148 | }; 149 | }; 150 | } } 151 | 152 | namespace cnn { 153 | template struct scalar_tanh_op { 154 | EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) 155 | CNN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } 156 | template 157 | CNN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return Eigen::internal::ptanh(a); } 158 | }; 159 | } 160 | 161 | namespace Eigen { namespace internal { 162 | template 163 | struct functor_traits > { 164 | enum { 165 | Cost = 5 * NumTraits::MulCost, 166 | PacketAccess = packet_traits::HasTanh 167 | }; 168 | }; 169 | } } 170 | 171 | namespace cnn { 172 | template struct scalar_tanh_backward_op { 173 | EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_backward_op) 174 | CNN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& t, const Scalar& d) const { return (1 - t * t) * d; } 175 | template 176 | CNN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& t, const Packet& d) const { 177 | using namespace Eigen::internal; 178 | const Packet one = pset1(1); 179 | return pmul(psub(one, pmul(t, t)), d); 180 | } 181 | }; 182 | } 183 | 184 | namespace Eigen { namespace internal { 185 | template 186 | struct functor_traits > { 187 | enum { 188 | Cost = NumTraits::AddCost + 2 * NumTraits::MulCost, 189 | PacketAccess = packet_traits::HasSub && packet_traits::HasMul 190 | }; 191 | }; 192 | }} 193 | 194 | namespace cnn { 195 | //this is slower than the dumb implementation, probably because of the pset operations 196 | // which could be factored out into the constructor, but the Packet type isn't used 197 | // then (and I think fixing this would be hard) 198 | template struct scalar_nlsoftmax_backward_op { 199 | scalar_nlsoftmax_backward_op(const Scalar& lz, const Scalar& err) : logz(lz), d(err) {} 200 | CNN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& t) const { 201 | using std::exp; 202 | return exp(t - logz) * d; 203 | } 204 | template 205 | CNN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& t) const { 206 | using namespace Eigen::internal; 207 | const Packet lz = pset1(logz); 208 | const Packet dd = pset1(d); 209 | return pmul(pexp(psub(t, lz)), dd); 210 | } 211 | Scalar logz; 212 | Scalar d; 213 | };} 214 | 215 | namespace Eigen { namespace internal { 216 | template 217 | struct functor_traits > { 218 | enum { 219 | Cost = NumTraits::AddCost + 6 * NumTraits::MulCost, 220 | PacketAccess = packet_traits::HasSub && packet_traits::HasExp 221 | }; 222 | }; 223 | }} 224 | 225 | #endif 226 | -------------------------------------------------------------------------------- /cnn/hsm-builder.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/hsm-builder.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #undef assert 9 | #define assert(x) {} 10 | 11 | using namespace std; 12 | 13 | namespace cnn { 14 | 15 | using namespace expr; 16 | 17 | Cluster::Cluster() : initialized(false) {} 18 | void Cluster::new_graph(ComputationGraph& cg) { 19 | for (Cluster* child : children) { 20 | child->new_graph(cg); 21 | } 22 | bias.pg = NULL; 23 | weights.pg = NULL; 24 | } 25 | 26 | Cluster* Cluster::add_child(unsigned sym) { 27 | assert (!initialized); 28 | assert (terminals.size() == 0); 29 | auto it = word2ind.find(sym); 30 | unsigned i; 31 | if (it == word2ind.end()) { 32 | Cluster* c = new Cluster(); 33 | c->path = path; 34 | c->path.push_back(sym); 35 | i = children.size(); 36 | word2ind.insert(make_pair(sym, i)); 37 | children.push_back(c); 38 | assert (c != NULL); 39 | } 40 | else { 41 | i = it->second; 42 | } 43 | return children[i]; 44 | } 45 | 46 | void Cluster::add_word(unsigned word) { 47 | assert (!initialized); 48 | assert (children.size() == 0); 49 | word2ind[word] = terminals.size(); 50 | terminals.push_back(word); 51 | } 52 | 53 | void Cluster::initialize(unsigned rep_dim, Model* model) { 54 | assert (!initialized); 55 | output_size = (children.size() > 0) ? children.size() : terminals.size(); 56 | assert (output_size > 0); 57 | 58 | if (output_size == 1) { 59 | p_weights = NULL; 60 | p_bias = NULL; 61 | } 62 | else if (output_size == 2) { 63 | p_weights = model->add_parameters({1, rep_dim}); 64 | p_bias = model->add_parameters({1}); 65 | } 66 | else { 67 | p_weights = model->add_parameters({output_size, rep_dim}); 68 | p_bias = model->add_parameters({output_size}); 69 | } 70 | 71 | for (Cluster* child : children) { 72 | child->initialize(rep_dim, model); 73 | } 74 | } 75 | 76 | unsigned Cluster::num_children() const { 77 | return children.size(); 78 | } 79 | 80 | const Cluster* Cluster::get_child(unsigned i) const { 81 | assert (i < children.size()); 82 | assert (children[i] != NULL); 83 | return children[i]; 84 | } 85 | 86 | const vector& Cluster::get_path() const { return path; } 87 | unsigned Cluster::get_index(unsigned word) const { return word2ind.find(word)->second; } 88 | unsigned Cluster::get_word(unsigned index) const { return terminals[index]; } 89 | 90 | Expression Cluster::predict(Expression h, ComputationGraph& cg) const { 91 | if (output_size == 1) { 92 | return input(cg, 1.0f); 93 | } 94 | else { 95 | Expression b = get_bias(cg); 96 | Expression w = get_weights(cg); 97 | return affine_transform({b, w, h}); 98 | } 99 | } 100 | 101 | Expression Cluster::neg_log_softmax(Expression h, unsigned r, ComputationGraph& cg) const { 102 | if (output_size == 1) { 103 | return input(cg, 0.0f); 104 | } 105 | else if (output_size == 2) { 106 | Expression p = logistic(predict(h, cg)); 107 | assert (r == 0 || r == 1); 108 | if (r == 1) { 109 | p = 1 - p; 110 | } 111 | return -log(p); 112 | } 113 | else { 114 | Expression dist = predict(h, cg); 115 | return pickneglogsoftmax(dist, r); 116 | } 117 | } 118 | 119 | unsigned Cluster::sample(expr::Expression h, ComputationGraph& cg) const { 120 | if (output_size == 1) { 121 | return 0; 122 | } 123 | else if (output_size == 2) { 124 | logistic(predict(h, cg)); 125 | double prob0 = as_scalar(cg.incremental_forward()); 126 | double p = rand01(); 127 | if (p < prob0) { 128 | return 0; 129 | } 130 | else { 131 | return 1; 132 | } 133 | } 134 | else { 135 | softmax(predict(h, cg)); 136 | vector dist = as_vector(cg.incremental_forward()); 137 | unsigned c = 0; 138 | double p = rand01(); 139 | for (; c < dist.size(); ++c) { 140 | p -= dist[c]; 141 | if (p < 0.0) { break; } 142 | } 143 | if (c == dist.size()) { 144 | --c; 145 | } 146 | return c; 147 | } 148 | } 149 | 150 | Expression Cluster::get_weights(ComputationGraph& cg) const { 151 | if (weights.pg != &cg) { 152 | weights = parameter(cg, p_weights); 153 | } 154 | return weights; 155 | } 156 | 157 | Expression Cluster::get_bias(ComputationGraph& cg) const { 158 | if (bias.pg != &cg) { 159 | bias = parameter(cg, p_bias); 160 | } 161 | return bias; 162 | } 163 | 164 | string Cluster::toString() const { 165 | stringstream ss; 166 | for (unsigned i = 0; i < path.size(); ++i) { 167 | if (i != 0) { 168 | ss << " "; 169 | } 170 | ss << path[i]; 171 | } 172 | return ss.str(); 173 | } 174 | 175 | HierarchicalSoftmaxBuilder::HierarchicalSoftmaxBuilder(unsigned rep_dim, 176 | const std::string& cluster_file, 177 | Dict* word_dict, 178 | Model* model) { 179 | root = ReadClusterFile(cluster_file, word_dict); 180 | root->initialize(rep_dim, model); 181 | } 182 | 183 | HierarchicalSoftmaxBuilder::~HierarchicalSoftmaxBuilder() { 184 | } 185 | 186 | void HierarchicalSoftmaxBuilder::new_graph(ComputationGraph& cg) { 187 | pcg = &cg; 188 | root->new_graph(cg); 189 | } 190 | 191 | Expression HierarchicalSoftmaxBuilder::neg_log_softmax(const Expression& rep, unsigned wordidx) { 192 | assert (pcg != NULL && "You must call new_graph before calling neg_log_softmax!"); 193 | Cluster* path = widx2path[wordidx]; 194 | 195 | unsigned i = 0; 196 | const Cluster* node = root; 197 | assert (root != NULL); 198 | vector log_probs; 199 | Expression lp; 200 | unsigned r; 201 | while (node->num_children() > 0) { 202 | r = node->get_index(path->get_path()[i]); 203 | lp = node->neg_log_softmax(rep, r, *pcg); 204 | log_probs.push_back(lp); 205 | node = node->get_child(r); 206 | assert (node != NULL); 207 | i += 1; 208 | } 209 | 210 | r = path->get_index(wordidx); 211 | lp = node->neg_log_softmax(rep, r, *pcg); 212 | log_probs.push_back(lp); 213 | 214 | return sum(log_probs); 215 | } 216 | 217 | unsigned HierarchicalSoftmaxBuilder::sample(const expr::Expression& rep) { 218 | assert (pcg != NULL && "You must call new_graph before calling sample!"); 219 | 220 | const Cluster* node = root; 221 | vector dist; 222 | unsigned c; 223 | while (node->num_children() > 0) { 224 | c = node->sample(rep, *pcg); 225 | node = node->get_child(c); 226 | } 227 | 228 | c = node->sample(rep, *pcg); 229 | return node->get_word(c); 230 | } 231 | 232 | inline bool is_ws(char x) { return (x == ' ' || x == '\t'); } 233 | inline bool not_ws(char x) { return (x != ' ' && x != '\t'); } 234 | 235 | Cluster* HierarchicalSoftmaxBuilder::ReadClusterFile(const std::string& cluster_file, Dict* word_dict) { 236 | cerr << "Reading clusters from " << cluster_file << " ...\n"; 237 | ifstream in(cluster_file); 238 | assert(in); 239 | int wc = 0; 240 | string line; 241 | vector path; 242 | Cluster* root = new Cluster(); 243 | while(getline(in, line)) { 244 | path.clear(); 245 | ++wc; 246 | const unsigned len = line.size(); 247 | unsigned startp = 0; 248 | unsigned endp = 0; 249 | while (startp < len) { 250 | while (is_ws(line[startp]) && startp < len) { ++startp; } 251 | endp = startp; 252 | while (not_ws(line[endp]) && endp < len) { ++endp; } 253 | string symbol = line.substr(startp, endp - startp); 254 | path.push_back(path_symbols.Convert(symbol)); 255 | if (line[endp] == ' ') { 256 | startp = endp + 1; 257 | continue; 258 | } 259 | else { 260 | break; 261 | } 262 | } 263 | Cluster* node = root; 264 | for (unsigned symbol : path) { 265 | node = node->add_child(symbol); 266 | } 267 | 268 | unsigned startw = endp; 269 | while (is_ws(line[startw]) && startw < len) { ++startw; } 270 | unsigned endw = startw; 271 | while (not_ws(line[endw]) && endw < len) { ++endw; } 272 | assert(endp > startp); 273 | assert(startw > endp); 274 | assert(endw > startw); 275 | 276 | string word = line.substr(startw, endw - startw); 277 | unsigned widx = word_dict->Convert(word); 278 | node->add_word(widx); 279 | 280 | if (widx2path.size() <= widx) { 281 | widx2path.resize(widx + 1); 282 | } 283 | widx2path[widx] = node; 284 | } 285 | cerr << "Done reading clusters.\n"; 286 | return root; 287 | } 288 | 289 | } // namespace cnn 290 | --------------------------------------------------------------------------------