├── .gitignore ├── CMakeLists.txt ├── README.md ├── command.sh ├── example ├── sarcasm1v1.dev1.nn ├── sarcasm1v1.test1.nn └── sarcasm1v1.train1.nn └── src ├── CMakeLists.txt ├── NNWordLocal ├── CMakeLists.txt ├── NNWordLocal.cpp ├── NNWordLocal.h └── model │ ├── ComputionGraph.h │ ├── Driver.h │ ├── HyperParams.h │ └── ModelParams.h ├── NNWordLocalContext ├── CMakeLists.txt ├── NNWordLocalContext.cpp ├── NNWordLocalContext.h └── model │ ├── ComputionGraph.h │ ├── Driver.h │ ├── HyperParams.h │ └── ModelParams.h ├── NNWordLocalContextSeparate ├── CMakeLists.txt ├── NNWordLocalContextSeparate.cpp ├── NNWordLocalContextSeparate.h └── model │ ├── ComputionGraph.h │ ├── Driver.h │ ├── HyperParams.h │ └── ModelParams.h ├── SparseLocalContext ├── CMakeLists.txt ├── SparseLocalContext.cpp ├── SparseLocalContext.h └── model │ ├── ComputionGraph.h │ ├── Driver.h │ ├── HyperParams.h │ └── ModelParams.h └── basic ├── Example.h ├── Instance.h ├── InstanceReader.h ├── InstanceWriter.h ├── Options.h ├── Pipe.h ├── Reader.h ├── Utf.h └── Writer.h /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | build 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(SarcasmDetection) 2 | cmake_minimum_required(VERSION 2.7 FATAL_ERROR) 3 | 4 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 5 | 6 | set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 7 | 8 | if(CMAKE_BUILD_TYPE MATCHES Debug) 9 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -msse3 -funroll-loops -std=c++11 -O0 -pg" ) 10 | else() 11 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -w -std=c++11 -Ofast -march=native" ) 12 | endif() 13 | add_definitions( -DUSE_FLOAT ) 14 | 15 | include_directories(${EIGEN3_INCLUDE_DIR}) 16 | include_directories(${N3L_INCLUDE_DIR}) 17 | 18 | add_subdirectory(src) 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #SarcasmDetection 2 | ### This is the code for the paper : 3 | Meishan Zhang, Yue Zhang, Guohong Fu. [Tweet Sarcasm Detection Using Deep Neural Network.](http://zhangmeishan.github.io/coling2016-sarcasm.pdf) In Proceedings of the COLING 2016. 2016.12. 4 | ##HOW TO COMPILE THIS PROJECT IN WINDOWS
5 | * Step 0: Open cmd, and change directory to project directory. Use this command
`cd /your/project/path/SarcasmDetection`.
6 | * Step 1: Create a new directory in SarcasmDetection. For example, use this command `mkdir build`
7 | * Step 2: Change your directory. Use this command `cd build`.
8 | * Step 3: Build project. Use this command
`cmake .. -DEIGEN3_INCLUDE_DIR=/your/eign/path -DN3L_INCLUDE_DIR=/your/LibN3L-2.0/path`.
9 | * Step 4: Then you can double click "SarcasmDetection.sln" to open this project.
10 | * Step 5: Now you can compile this project by Visual Studio.
11 | * Step 6: If you want to run this project.Please open project properties and add this argument.
12 | `-train /your/training/corpus -dev /your/development/corpus -test /your/test/corpus -option /your/option/file -l`
13 | 14 | ##NOTE
15 | Make sure you have eigen ,LibN3L-2.0, cmake and visual studio 2013 version (or newer).
16 | * Eigen:http://eigen.tuxfamily.org/index.php?title=Main_Page
17 | * LibN3L-2.0:https://github.com/zhangmeishan/LibN3L-2.0
18 | * cmake:https://cmake.org/
-------------------------------------------------------------------------------- /command.sh: -------------------------------------------------------------------------------- 1 | cmake .. -DEIGEN3_INCLUDE_DIR=/c/eigen/ -DN3L_INCLUDE_DIR=/d/workspace/LibN3L-2.0/ 2 | cmake .. -DEIGEN3_INCLUDE_DIR=~/workspace/eigen/ -DN3L_INCLUDE_DIR=~/workspace/LibN3L-2.0/ 3 | 4 | #sparse 5 | -l -train D:\data\sarcasm\sarcasm1v1.train1.nn -dev D:\data\sarcasm\sarcasm1v1.dev1.nn -test D:\data\sarcasm\sarcasm1v1.test1.nn -option D:\data\sarcasm\option.sparse 6 | #NNWord 7 | -l -train D:\data\sarcasm\sarcasm1v1.train1.nn -dev D:\data\sarcasm\sarcasm1v1.dev1.nn -test D:\data\sarcasm\sarcasm1v1.test1.nn -option D:\data\sarcasm\option.word 8 | 9 | #sparse 10 | ./SparseDetector -l -train ../newcorpus/1v1/sarcasm1v1.train1.nn -dev ../newcorpus/1v1/sarcasm1v1.dev1.nn -test ../newcorpus/1v1/sarcasm1v1.test1.nn -option option.sparse >sparse.log & 11 | ./NNWordLocal -l -train ../newcorpus/1v1/sarcasm1v1.train1.nn -dev ../newcorpus/1v1/sarcasm1v1.dev1.nn -test ../newcorpus/1v1/sarcasm1v1.test1.nn -option option.word >word.log & -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories( 3 | basic 4 | ) 5 | 6 | 7 | add_subdirectory(NNWordLocal) 8 | add_subdirectory(NNWordLocalContext) 9 | add_subdirectory(NNWordLocalContextSeparate) 10 | add_subdirectory(SparseLocalContext) 11 | -------------------------------------------------------------------------------- /src/NNWordLocal/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories( 3 | model 4 | ) 5 | 6 | add_executable(NNWordLocal NNWordLocal.cpp) 7 | 8 | -------------------------------------------------------------------------------- /src/NNWordLocal/NNWordLocal.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.cpp 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #include "NNWordLocal.h" 9 | 10 | #include "Argument_helper.h" 11 | 12 | Detector::Detector(size_t memsize) : m_driver(memsize){ 13 | // TODO Auto-generated constructor stub 14 | srand(0); 15 | } 16 | 17 | Detector::~Detector() { 18 | // TODO Auto-generated destructor stub 19 | } 20 | 21 | int Detector::createAlphabet(const vector& vecInsts) { 22 | if (vecInsts.size() == 0) { 23 | std::cout << "training set empty" << std::endl; 24 | return -1; 25 | } 26 | cout << "Creating Alphabet..." << endl; 27 | 28 | int numInstance; 29 | 30 | m_driver._modelparams.labelAlpha.clear(); 31 | // label alphabet and word statistics 32 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 33 | const Instance *pInstance = &vecInsts[numInstance]; 34 | 35 | const vector > &words = pInstance->words; 36 | const string &label = pInstance->label; 37 | 38 | int labelId = m_driver._modelparams.labelAlpha.from_string(label); 39 | 40 | int seq_size = pInstance->seqsize(); 41 | for (int i = 0; i < seq_size; i++) { 42 | int wordLength = words[i].size(); 43 | for (int j = 0; j < wordLength; ++j) { 44 | string curword = normalize_to_lowerwithdigit(words[i][j]); 45 | m_word_stats[curword]++; 46 | } 47 | 48 | } 49 | 50 | if ((numInstance + 1) % m_options.verboseIter == 0) { 51 | cout << numInstance + 1 << " "; 52 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 53 | cout << std::endl; 54 | cout.flush(); 55 | } 56 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 57 | break; 58 | } 59 | 60 | cout << numInstance << " " << endl; 61 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl; 62 | 63 | 64 | m_driver._modelparams.labelAlpha.set_fixed_flag(true); 65 | 66 | if (m_options.linearfeatCat > 0) { 67 | cout << "Extracting linear features..." << endl; 68 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 69 | const Instance *pInstance = &vecInsts[numInstance]; 70 | vector linearfeat; 71 | extractLinearFeatures(linearfeat, pInstance); 72 | for (int i = 0; i < linearfeat.size(); i++) 73 | m_feat_stats[linearfeat[i]] ++; 74 | } 75 | m_feat_stats[unknownkey] = m_options.featCutOff + 1; 76 | cout << "Total feature num: " << m_feat_stats.size() << endl; 77 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff); 78 | cout << "Remina feature num:" << m_driver._modelparams.featAlpha.size() << endl; 79 | m_driver._modelparams.featAlpha.set_fixed_flag(true); 80 | } 81 | return 0; 82 | } 83 | 84 | void Detector::addTestAlphabet(const vector& vecInsts) 85 | { 86 | cout << "Adding other word Alphabet..." << endl; 87 | int numInstance; 88 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 89 | const Instance *pInstance = &vecInsts[numInstance]; 90 | 91 | const vector > &words = pInstance->words; 92 | int seq_size = pInstance->seqsize(); 93 | for (int i = 0; i < seq_size; ++i) { 94 | for (int j = 0; j < words[i].size(); j++) { 95 | string curword = normalize_to_lowerwithdigit(words[i][j]); 96 | if (!m_options.wordEmbFineTune) 97 | m_word_stats[curword]++; 98 | } 99 | } 100 | 101 | if ((numInstance + 1) % m_options.verboseIter == 0) { 102 | cout << numInstance + 1 << " "; 103 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 104 | cout << std::endl; 105 | cout.flush(); 106 | } 107 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 108 | break; 109 | } 110 | 111 | cout << numInstance << " " << endl; 112 | } 113 | 114 | 115 | 116 | void Detector::extractDenseFeatures(vector& features, const Instance * pInstance) 117 | { 118 | features.clear(); 119 | const vector >& words = pInstance->words; 120 | int seq_size = pInstance->seqsize(); 121 | assert(seq_size < 3); 122 | 123 | //Local and Context sentence dense feature 124 | for (int sentidx = 0; sentidx < seq_size; sentidx++) { 125 | Feature feat; 126 | const vector& curWords = words[sentidx]; 127 | int wordnumber = curWords.size(); 128 | for (int i = 0; i < wordnumber; i++) 129 | feat.words.push_back(normalize_to_lowerwithdigit(curWords[i])); 130 | 131 | features.push_back(feat); 132 | } 133 | } 134 | 135 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) { 136 | feat.clear(); 137 | 138 | const vector >& words = pInstance->words; 139 | int seq_size = pInstance->seqsize(); 140 | assert(seq_size < 3); 141 | //Current sent linear feature 142 | const vector& lastWords = words[seq_size - 1]; 143 | int wordnumber = lastWords.size(); 144 | string strfeat = "", curWord = "", preWord = "", pre2Word = ""; 145 | for (int i = 0; i < wordnumber; i++) { 146 | curWord = normalize_to_lowerwithdigit(lastWords[i]); 147 | strfeat = "F1U=" + curWord; 148 | feat.push_back(strfeat); 149 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey; 150 | strfeat = "F2B=" + preWord + seperateKey + curWord; 151 | feat.push_back(strfeat); 152 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey; 153 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord; 154 | feat.push_back(strfeat); 155 | } 156 | 157 | //History feature 158 | if (m_options.linearfeatCat > 1 && seq_size == 2) { 159 | const vector& historyWords = words[seq_size - 2]; 160 | wordnumber = historyWords.size(); 161 | for (int i = 0; i < wordnumber; i++) { 162 | strfeat = "F4U=" + historyWords[i]; 163 | feat.push_back(strfeat); 164 | } 165 | } 166 | } 167 | 168 | void Detector::convert2Example(const Instance* pInstance, Example& exam) { 169 | exam.clear(); 170 | 171 | const string &instlabel = pInstance->label; 172 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha; 173 | 174 | int labelnum = labelAlpha.size(); 175 | for (int i = 0; i < labelnum; i++) { 176 | string str = labelAlpha.from_id(i); 177 | if (instlabel.compare(str) == 0) 178 | exam.m_labels.push_back(1.0); 179 | else 180 | exam.m_labels.push_back(0.0); 181 | } 182 | 183 | //dense feature 184 | extractDenseFeatures(exam.m_densefeatures, pInstance); 185 | 186 | //linear feature 187 | if (m_options.linearfeatCat > 0) 188 | extractLinearFeatures(exam.m_linearfeatures, pInstance); 189 | 190 | } 191 | 192 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) { 193 | int numInstance; 194 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 195 | const Instance *pInstance = &vecInsts[numInstance]; 196 | Example curExam; 197 | convert2Example(pInstance, curExam); 198 | vecExams.push_back(curExam); 199 | 200 | if ((numInstance + 1) % m_options.verboseIter == 0) { 201 | cout << numInstance + 1 << " "; 202 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 203 | cout << std::endl; 204 | cout.flush(); 205 | } 206 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 207 | break; 208 | } 209 | 210 | cout << numInstance << " " << endl; 211 | } 212 | 213 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) { 214 | if (optionFile != "") 215 | m_options.load(optionFile); 216 | m_options.showOptions(); 217 | vector trainInsts, devInsts, testInsts; 218 | static vector decodeInstResults; 219 | static Instance curDecodeInst; 220 | bool bCurIterBetter = false; 221 | 222 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); 223 | if (devFile != "") 224 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); 225 | if (testFile != "") 226 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); 227 | 228 | std::cout << "Training example number: " << trainInsts.size() << std::endl; 229 | std::cout << "Dev example number: " << trainInsts.size() << std::endl; 230 | std::cout << "Test example number: " << trainInsts.size() << std::endl; 231 | 232 | createAlphabet(trainInsts); 233 | addTestAlphabet(devInsts); 234 | addTestAlphabet(testInsts); 235 | 236 | m_word_stats[unknownkey] = m_options.wordCutOff + 1; 237 | cout << "Total word num: " << m_word_stats.size() << endl; 238 | m_driver._modelparams.wordAlpha.initial(m_word_stats, m_options.wordCutOff); 239 | m_driver._modelparams.wordAlpha.set_fixed_flag(true); 240 | cout << "Remain word num:" << m_driver._modelparams.wordAlpha.size() << endl; 241 | 242 | vector trainExamples, devExamples, testExamples; 243 | 244 | std::cout << "Instance convert to example... " << std::endl; 245 | initialExamples(trainInsts, trainExamples); 246 | initialExamples(devInsts, devExamples); 247 | initialExamples(testInsts, testExamples); 248 | 249 | if (m_options.wordFile != "") { 250 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune); 251 | } 252 | else{ 253 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune); 254 | } 255 | 256 | m_driver._hyperparams.setRequired(m_options); 257 | m_driver.initial(); 258 | 259 | 260 | 261 | dtype bestDIS = 0; 262 | 263 | int inputSize = trainExamples.size(); 264 | 265 | int batchBlock = inputSize / m_options.batchSize; 266 | if (inputSize % m_options.batchSize != 0) 267 | batchBlock++; 268 | 269 | srand(0); 270 | std::vector indexes; 271 | for (int i = 0; i < inputSize; ++i) 272 | indexes.push_back(i); 273 | 274 | static Metric eval, metric_dev, metric_test; 275 | static vector subExamples; 276 | int devNum = devExamples.size(), testNum = testExamples.size(); 277 | for (int iter = 0; iter < m_options.maxIter; ++iter) { 278 | std::cout << "##### Iteration " << iter << std::endl; 279 | 280 | random_shuffle(indexes.begin(), indexes.end()); 281 | eval.reset(); 282 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) { 283 | subExamples.clear(); 284 | int start_pos = updateIter * m_options.batchSize; 285 | int end_pos = (updateIter + 1) * m_options.batchSize; 286 | if (end_pos > inputSize) 287 | end_pos = inputSize; 288 | 289 | for (int idy = start_pos; idy < end_pos; idy++) { 290 | subExamples.push_back(trainExamples[indexes[idy]]); 291 | } 292 | 293 | int curUpdateIter = iter * batchBlock + updateIter; 294 | dtype cost = m_driver.train(subExamples, curUpdateIter); 295 | 296 | eval.overall_label_count += m_driver._eval.overall_label_count; 297 | eval.correct_label_count += m_driver._eval.correct_label_count; 298 | 299 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) { 300 | //m_driver.checkgrad(subExamples, curUpdateIter + 1); 301 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl; 302 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl; 303 | } 304 | m_driver.updateModel(); 305 | 306 | } 307 | 308 | if (devNum > 0) { 309 | bCurIterBetter = false; 310 | if (!m_options.outBest.empty()) 311 | decodeInstResults.clear(); 312 | metric_dev.reset(); 313 | for (int idx = 0; idx < devExamples.size(); idx++) { 314 | string result_label; 315 | predict(devExamples[idx].m_densefeatures, result_label); 316 | 317 | devInsts[idx].Evaluate(result_label, metric_dev); 318 | 319 | if (!m_options.outBest.empty()) { 320 | curDecodeInst.copyValuesFrom(devInsts[idx]); 321 | curDecodeInst.assignLabel(result_label); 322 | decodeInstResults.push_back(curDecodeInst); 323 | } 324 | } 325 | 326 | std::cout << "dev:" << std::endl; 327 | metric_dev.print(); 328 | 329 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) { 330 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); 331 | bCurIterBetter = true; 332 | } 333 | 334 | if (testNum > 0) { 335 | if (!m_options.outBest.empty()) 336 | decodeInstResults.clear(); 337 | metric_test.reset(); 338 | for (int idx = 0; idx < testExamples.size(); idx++) { 339 | string result_label; 340 | predict(testExamples[idx].m_densefeatures, result_label); 341 | 342 | testInsts[idx].Evaluate(result_label, metric_test); 343 | 344 | if (bCurIterBetter && !m_options.outBest.empty()) { 345 | curDecodeInst.copyValuesFrom(testInsts[idx]); 346 | curDecodeInst.assignLabel(result_label); 347 | decodeInstResults.push_back(curDecodeInst); 348 | } 349 | } 350 | std::cout << "test:" << std::endl; 351 | metric_test.print(); 352 | 353 | if (!m_options.outBest.empty() && bCurIterBetter) { 354 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); 355 | } 356 | } 357 | 358 | 359 | 360 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) { 361 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; 362 | bestDIS = metric_dev.getAccuracy(); 363 | writeModelFile(modelFile); 364 | } 365 | 366 | } 367 | // Clear gradients 368 | } 369 | } 370 | 371 | int Detector::predict(const vector& features, string& output) { 372 | int labelIdx; 373 | m_driver.predict(features, labelIdx); 374 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey); 375 | 376 | if (output == nullkey) 377 | std::cout << "predict error" << std::endl; 378 | return 0; 379 | } 380 | 381 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) { 382 | loadModelFile(modelFile); 383 | vector testInsts; 384 | m_pipe.readInstances(testFile, testInsts); 385 | 386 | vector testExamples; 387 | initialExamples(testInsts, testExamples); 388 | 389 | int testNum = testExamples.size(); 390 | vector testInstResults; 391 | Metric metric_test; 392 | metric_test.reset(); 393 | for (int idx = 0; idx < testExamples.size(); idx++) { 394 | string result_label; 395 | predict(testExamples[idx].m_densefeatures, result_label); 396 | testInsts[idx].Evaluate(result_label, metric_test); 397 | Instance curResultInst; 398 | curResultInst.copyValuesFrom(testInsts[idx]); 399 | curResultInst.assignLabel(result_label); 400 | testInstResults.push_back(curResultInst); 401 | } 402 | std::cout << "test:" << std::endl; 403 | metric_test.print(); 404 | 405 | m_pipe.outputAllInstances(outputFile, testInstResults); 406 | 407 | } 408 | 409 | 410 | void Detector::loadModelFile(const string& inputModelFile) { 411 | 412 | } 413 | 414 | void Detector::writeModelFile(const string& outputModelFile) { 415 | 416 | } 417 | 418 | int main(int argc, char* argv[]) { 419 | 420 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = ""; 421 | std::string outputFile = "", wordEmbFile = ""; 422 | bool bTrain = false; 423 | dsr::Argument_helper ah; 424 | int memsize = 1; 425 | 426 | ah.new_flag("l", "learn", "train or test", bTrain); 427 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile); 428 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile); 429 | ah.new_named_string("test", "testCorpus", "named_string", 430 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile); 431 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile); 432 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile); 433 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile); 434 | ah.new_named_int("mem", "memsize", "named_int", "memory allocated for tensor nodes", memsize); 435 | 436 | ah.process(argc, argv); 437 | 438 | Detector detector(memsize); 439 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length; 440 | if (bTrain) { 441 | detector.train(trainFile, devFile, testFile, modelFile, optionFile); 442 | } 443 | else { 444 | detector.test(testFile, outputFile, modelFile); 445 | } 446 | 447 | //test(argv); 448 | //ah.write_values(std::cout); 449 | } 450 | -------------------------------------------------------------------------------- /src/NNWordLocal/NNWordLocal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.h 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #ifndef SRC_SparseDetector_H_ 9 | #define SRC_SparseDetector_H_ 10 | 11 | 12 | #include "N3L.h" 13 | #include "Driver.h" 14 | #include "Options.h" 15 | #include "Instance.h" 16 | #include "Example.h" 17 | #include "Pipe.h" 18 | #include "Utf.h" 19 | 20 | using namespace nr; 21 | using namespace std; 22 | 23 | class Detector { 24 | 25 | 26 | public: 27 | unordered_map m_feat_stats; 28 | unordered_map m_word_stats; 29 | 30 | public: 31 | Options m_options; 32 | 33 | Pipe m_pipe; 34 | 35 | Driver m_driver; 36 | 37 | 38 | public: 39 | Detector(size_t memsize); 40 | virtual ~Detector(); 41 | 42 | public: 43 | 44 | int createAlphabet(const vector& vecTrainInsts); 45 | void addTestAlphabet(const vector& vecInsts); 46 | 47 | void extractDenseFeatures(vector& features, const Instance* pInstance); 48 | void extractLinearFeatures(vector& features, const Instance* pInstance); 49 | 50 | void convert2Example(const Instance* pInstance, Example& exam); 51 | void initialExamples(const vector& vecInsts, vector& vecExams); 52 | 53 | public: 54 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile); 55 | int predict(const vector& features, string& outputs); 56 | void test(const string& testFile, const string& outputFile, const string& modelFile); 57 | 58 | void writeModelFile(const string& outputModelFile); 59 | void loadModelFile(const string& inputModelFile); 60 | 61 | }; 62 | 63 | #endif /* SRC_SparseDetector_H_ */ 64 | -------------------------------------------------------------------------------- /src/NNWordLocal/model/ComputionGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ComputionGraph_H_ 2 | #define SRC_ComputionGraph_H_ 3 | 4 | #include "ModelParams.h" 5 | 6 | 7 | // Each model consists of two parts, building neural graph and defining output losses. 8 | struct ComputionGraph : Graph { 9 | public: 10 | const static int max_sentence_length = 256; 11 | 12 | public: 13 | // node instances 14 | vector word_inputs; 15 | WindowBuilder word_window; 16 | 17 | LSTM1Builder left_lstm; 18 | LSTM1Builder right_lstm; 19 | 20 | vector concat_bilstm; 21 | GatedPoolBuilder gated_pooling; 22 | 23 | UniNode sent_hidden; 24 | LinearNode output; 25 | 26 | public: 27 | ComputionGraph() : Graph() { 28 | } 29 | 30 | ~ComputionGraph() { 31 | clear(); 32 | } 33 | 34 | public: 35 | //allocate enough nodes 36 | inline void createNodes(int sent_length) { 37 | 38 | word_inputs.resize(sent_length); 39 | word_window.resize(sent_length); 40 | left_lstm.resize(sent_length); 41 | right_lstm.resize(sent_length); 42 | 43 | concat_bilstm.resize(sent_length); 44 | gated_pooling.resize(sent_length); 45 | } 46 | 47 | inline void clear() { 48 | Graph::clear(); 49 | word_inputs.clear(); 50 | word_window.clear(); 51 | left_lstm.clear(); 52 | right_lstm.clear(); 53 | 54 | concat_bilstm.clear(); 55 | gated_pooling.clear(); 56 | 57 | } 58 | 59 | public: 60 | inline void initial(ModelParams& model, HyperParams& opts, AlignedMemoryPool* mem = NULL) { 61 | for (int idx = 0; idx < word_inputs.size(); idx++) { 62 | word_inputs[idx].init(model.words.nDim, opts.dropOut, mem); 63 | word_inputs[idx].setParam(&model.words); 64 | concat_bilstm[idx].init(opts.rnnhiddensize * 2, -1, mem); 65 | } 66 | word_window.init(model.words.nDim, opts.wordcontext, mem); 67 | left_lstm.init(&model.left_lstm_project, opts.dropOut, true, mem); 68 | right_lstm.init(&model.right_lstm_project, opts.dropOut, false, mem); 69 | gated_pooling.init(&model.gatedpool_project, mem); 70 | sent_hidden.init(opts.hiddensize, opts.dropOut, mem); 71 | sent_hidden.setParam(&model.sent_hidden_project); 72 | output.init(opts.labelSize, -1, mem); 73 | output.setParam(&model.olayer_linear); 74 | } 75 | 76 | 77 | public: 78 | // some nodes may behave different during training and decode, for example, dropout 79 | inline void forward(const vector& features, bool bTrain = false) { 80 | //first step: clear value 81 | clearValue(bTrain); // compute is a must step for train, predict and cost computation 82 | 83 | 84 | // second step: build graph 85 | int seqsize = features.size(); 86 | //forward 87 | // word-level neural networks 88 | const Feature& feature = features[seqsize - 1]; 89 | int wordnum = feature.words.size(); 90 | if (wordnum > max_sentence_length) 91 | wordnum = max_sentence_length; 92 | for (int idx = 0; idx < wordnum; idx++) { 93 | //input 94 | word_inputs[idx].forward(this, feature.words[idx]); 95 | } 96 | 97 | //windowlized 98 | word_window.forward(this, getPNodes(word_inputs, wordnum)); 99 | 100 | left_lstm.forward(this, getPNodes(word_window._outputs, wordnum)); 101 | right_lstm.forward(this, getPNodes(word_window._outputs, wordnum)); 102 | 103 | for (int idx = 0; idx < wordnum; idx++) { 104 | //feed-forward 105 | concat_bilstm[idx].forward(this, &(left_lstm._hiddens[idx]), &(right_lstm._hiddens[idx])); 106 | } 107 | gated_pooling.forward(this, getPNodes(concat_bilstm, wordnum)); 108 | sent_hidden.forward(this, &gated_pooling._output); 109 | output.forward(this, &sent_hidden); 110 | } 111 | 112 | }; 113 | 114 | #endif /* SRC_ComputionGraph_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocal/model/Driver.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Driver.h 3 | * 4 | * Created on: Mar 18, 2015 5 | * Author: mszhang 6 | */ 7 | 8 | #ifndef SRC_Driver_H_ 9 | #define SRC_Driver_H_ 10 | 11 | #include 12 | #include "ComputionGraph.h" 13 | 14 | //A native neural network classfier using only linear features 15 | 16 | class Driver{ 17 | public: 18 | Driver(size_t memsize) : aligned_mem(memsize) { 19 | _pcg = NULL; 20 | } 21 | 22 | ~Driver() { 23 | if (_pcg != NULL) 24 | delete _pcg; 25 | _pcg = NULL; 26 | } 27 | 28 | public: 29 | ComputionGraph *_pcg; // build neural graphs 30 | ModelParams _modelparams; // model parameters 31 | HyperParams _hyperparams; 32 | 33 | Metric _eval; 34 | CheckGrad _checkgrad; 35 | ModelUpdate _ada; // model update 36 | 37 | AlignedMemoryPool aligned_mem; 38 | 39 | public: 40 | inline void initial() { 41 | if (!_hyperparams.bValid()){ 42 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl; 43 | return; 44 | } 45 | if (!_modelparams.initial(_hyperparams, &aligned_mem)){ 46 | std::cout << "model parameter initialization Error, Please check!" << std::endl; 47 | return; 48 | } 49 | _modelparams.exportModelParams(_ada); 50 | _modelparams.exportCheckGradParams(_checkgrad); 51 | 52 | _hyperparams.print(); 53 | 54 | _pcg = new ComputionGraph(); 55 | _pcg->createNodes(ComputionGraph::max_sentence_length); 56 | _pcg->initial(_modelparams, _hyperparams, &aligned_mem); 57 | 58 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps); 59 | } 60 | 61 | 62 | inline dtype train(const vector& examples, int iter) { 63 | _eval.reset(); 64 | 65 | int example_num = examples.size(); 66 | dtype cost = 0.0; 67 | 68 | for (int count = 0; count < example_num; count++) { 69 | const Example& example = examples[count]; 70 | 71 | //forward 72 | _pcg->forward(example.m_densefeatures, true); 73 | 74 | //loss function 75 | int seq_size = example.m_densefeatures.size(); 76 | int wordnum = example.m_densefeatures[seq_size - 1].words.size(); 77 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num); 78 | 79 | // backward, which exists only for training 80 | _pcg->backward(); 81 | } 82 | 83 | if (_eval.getAccuracy() < 0) { 84 | std::cout << "strange" << std::endl; 85 | } 86 | 87 | return cost; 88 | } 89 | 90 | inline void predict(const vector densefeatures, int& results) { 91 | _pcg->forward(densefeatures); 92 | _modelparams.loss.predict(&_pcg->output, results); 93 | } 94 | 95 | inline dtype cost(const Example& example){ 96 | _pcg->forward(example.m_densefeatures); //forward here 97 | 98 | int seq_size = example.m_densefeatures.size(); 99 | 100 | dtype cost = 0.0; 101 | 102 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1); 103 | 104 | return cost; 105 | } 106 | 107 | void checkgrad(const vector& examples, int iter){ 108 | ostringstream out; 109 | out << "Iteration: " << iter; 110 | _checkgrad.check(this, examples, out.str()); 111 | } 112 | 113 | void updateModel() { 114 | _ada.update(); 115 | //_ada.update(5.0); 116 | } 117 | 118 | void writeModel(); 119 | 120 | void loadModel(); 121 | 122 | 123 | 124 | private: 125 | inline void resetEval() { 126 | _eval.reset(); 127 | } 128 | 129 | 130 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){ 131 | _ada._alpha = adaAlpha; 132 | _ada._eps = adaEps; 133 | _ada._reg = nnRegular; 134 | } 135 | 136 | }; 137 | 138 | #endif /* SRC_Driver_H_ */ 139 | -------------------------------------------------------------------------------- /src/NNWordLocal/model/HyperParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_HyperParams_H_ 2 | #define SRC_HyperParams_H_ 3 | 4 | #include "N3L.h" 5 | #include "Example.h" 6 | #include "Options.h" 7 | 8 | using namespace nr; 9 | using namespace std; 10 | 11 | struct HyperParams{ 12 | 13 | // must assign 14 | int wordcontext; 15 | int hiddensize; 16 | int rnnhiddensize; 17 | dtype dropOut; 18 | 19 | // must assign 20 | dtype nnRegular; // for optimization 21 | dtype adaAlpha; // for optimization 22 | dtype adaEps; // for optimization 23 | 24 | //auto generated 25 | int wordwindow; 26 | int wordDim; 27 | int inputsize; 28 | int labelSize; 29 | 30 | public: 31 | HyperParams(){ 32 | bAssigned = false; 33 | } 34 | 35 | public: 36 | void setRequired(Options& opt){ 37 | wordcontext = opt.wordcontext; 38 | hiddensize = opt.hiddenSize; 39 | rnnhiddensize = opt.rnnHiddenSize; 40 | dropOut = opt.dropProb; 41 | 42 | nnRegular = opt.regParameter; 43 | adaAlpha = opt.adaAlpha; 44 | adaEps = opt.adaEps; 45 | 46 | bAssigned = true; 47 | } 48 | 49 | void clear(){ 50 | bAssigned = false; 51 | } 52 | 53 | bool bValid(){ 54 | return bAssigned; 55 | } 56 | 57 | 58 | public: 59 | 60 | void print(){ 61 | 62 | } 63 | 64 | private: 65 | bool bAssigned; 66 | }; 67 | 68 | 69 | #endif /* SRC_HyperParams_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocal/model/ModelParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ModelParams_H_ 2 | #define SRC_ModelParams_H_ 3 | #include "HyperParams.h" 4 | 5 | // Each model consists of two parts, building neural graph and defining output losses. 6 | class ModelParams{ 7 | 8 | public: 9 | Alphabet wordAlpha; // should be initialized outside 10 | Alphabet featAlpha; //should be intialized outside 11 | Alphabet labelAlpha; // should be initialized outside 12 | public: 13 | LookupTable words; // should be initialized outside 14 | LSTM1Params left_lstm_project; //left lstm 15 | LSTM1Params right_lstm_project; //right lstm 16 | GatedPoolParam gatedpool_project; 17 | UniParams sent_hidden_project; 18 | UniParams olayer_linear; // output 19 | public: 20 | SoftMaxLoss loss; 21 | 22 | 23 | public: 24 | bool initial(HyperParams& opts , AlignedMemoryPool *mem = NULL){ 25 | 26 | // some model parameters should be initialized outside 27 | if (words.nVSize <= 0 || labelAlpha.size() <= 0){ 28 | return false; 29 | } 30 | opts.wordDim = words.nDim; 31 | opts.wordwindow = 2 * opts.wordcontext + 1; 32 | opts.inputsize = opts.wordwindow * opts.wordDim; 33 | 34 | left_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem); 35 | right_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem); 36 | gatedpool_project.initial(opts.rnnhiddensize *2, opts.rnnhiddensize * 2,mem); 37 | sent_hidden_project.initial(opts.hiddensize, opts.rnnhiddensize * 2, mem); 38 | 39 | opts.labelSize = labelAlpha.size(); 40 | olayer_linear.initial(opts.labelSize, opts.hiddensize, false, mem); 41 | 42 | return true; 43 | } 44 | 45 | 46 | void exportModelParams(ModelUpdate& ada){ 47 | words.exportAdaParams(ada); 48 | left_lstm_project.exportAdaParams(ada); 49 | right_lstm_project.exportAdaParams(ada); 50 | gatedpool_project.exportAdaParams(ada); 51 | sent_hidden_project.exportAdaParams(ada); 52 | olayer_linear.exportAdaParams(ada); 53 | } 54 | 55 | 56 | void exportCheckGradParams(CheckGrad& checkgrad){ 57 | checkgrad.add(&(words.E), "_words.E"); 58 | checkgrad.add(&(left_lstm_project.output.W1), "left_lstm_project.output.W1"); 59 | checkgrad.add(&(gatedpool_project._uni_gate_param.W), "gatedpool_project._uni_gate_param.W"); 60 | checkgrad.add(&(gatedpool_project._uni_gate_param.b), "gatedpool_project._uni_gate_param.b"); 61 | checkgrad.add(&(sent_hidden_project.W), "sent_hiden_project.W"); 62 | checkgrad.add(&(sent_hidden_project.b), "sent_hiden_project.b"); 63 | checkgrad.add(&(olayer_linear.W), "olayer_linear.W"); 64 | } 65 | 66 | // will add it later 67 | void saveModel(){ 68 | 69 | } 70 | 71 | void loadModel(const string& inFile){ 72 | 73 | } 74 | 75 | }; 76 | 77 | #endif /* SRC_ModelParams_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocalContext/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories( 3 | model 4 | ) 5 | 6 | add_executable(NNWordLocalContext NNWordLocalContext.cpp) 7 | 8 | -------------------------------------------------------------------------------- /src/NNWordLocalContext/NNWordLocalContext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.cpp 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #include "NNWordLocalContext.h" 9 | 10 | #include "Argument_helper.h" 11 | 12 | Detector::Detector(size_t memsize) : m_driver(memsize){ 13 | // TODO Auto-generated constructor stub 14 | srand(0); 15 | } 16 | 17 | Detector::~Detector() { 18 | // TODO Auto-generated destructor stub 19 | } 20 | 21 | int Detector::createAlphabet(const vector& vecInsts) { 22 | if (vecInsts.size() == 0) { 23 | std::cout << "training set empty" << std::endl; 24 | return -1; 25 | } 26 | cout << "Creating Alphabet..." << endl; 27 | 28 | m_maxseq_size = -1; 29 | int numInstance; 30 | 31 | m_driver._modelparams.labelAlpha.clear(); 32 | // label alphabet and word statistics 33 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 34 | const Instance *pInstance = &vecInsts[numInstance]; 35 | 36 | const vector > &words = pInstance->words; 37 | const string &label = pInstance->label; 38 | 39 | int labelId = m_driver._modelparams.labelAlpha.from_string(label); 40 | 41 | int seq_size = pInstance->seqsize(); 42 | if (seq_size > m_maxseq_size) 43 | m_maxseq_size = seq_size; 44 | for (int i = 0; i < seq_size; i++) { 45 | int wordLength = words[i].size(); 46 | for (int j = 0; j < wordLength; ++j) { 47 | string curword = normalize_to_lowerwithdigit(words[i][j]); 48 | m_word_stats[curword]++; 49 | } 50 | 51 | } 52 | 53 | if ((numInstance + 1) % m_options.verboseIter == 0) { 54 | cout << numInstance + 1 << " "; 55 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 56 | cout << std::endl; 57 | cout.flush(); 58 | } 59 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 60 | break; 61 | } 62 | 63 | cout << numInstance << " " << endl; 64 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl; 65 | 66 | 67 | m_driver._modelparams.labelAlpha.set_fixed_flag(true); 68 | 69 | if (m_options.linearfeatCat > 0) { 70 | cout << "Extracting linear features..." << endl; 71 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 72 | const Instance *pInstance = &vecInsts[numInstance]; 73 | vector linearfeat; 74 | extractLinearFeatures(linearfeat, pInstance); 75 | for (int i = 0; i < linearfeat.size(); i++) 76 | m_feat_stats[linearfeat[i]] ++; 77 | } 78 | m_feat_stats[unknownkey] = m_options.featCutOff + 1; 79 | cout << "Total feature num: " << m_feat_stats.size() << endl; 80 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff); 81 | cout << "Remina feature num:" << m_driver._modelparams.featAlpha.size() << endl; 82 | m_driver._modelparams.featAlpha.set_fixed_flag(true); 83 | } 84 | return 0; 85 | } 86 | 87 | void Detector::addTestAlphabet(const vector& vecInsts) 88 | { 89 | cout << "Adding other word Alphabet..." << endl; 90 | int numInstance; 91 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 92 | const Instance *pInstance = &vecInsts[numInstance]; 93 | 94 | const vector > &words = pInstance->words; 95 | int seq_size = pInstance->seqsize(); 96 | for (int i = 0; i < seq_size; ++i) { 97 | for (int j = 0; j < words[i].size(); j++) { 98 | string curword = normalize_to_lowerwithdigit(words[i][j]); 99 | if (!m_options.wordEmbFineTune) 100 | m_word_stats[curword]++; 101 | } 102 | } 103 | 104 | if ((numInstance + 1) % m_options.verboseIter == 0) { 105 | cout << numInstance + 1 << " "; 106 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 107 | cout << std::endl; 108 | cout.flush(); 109 | } 110 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 111 | break; 112 | } 113 | 114 | cout << numInstance << " " << endl; 115 | } 116 | 117 | 118 | 119 | void Detector::extractDenseFeatures(vector& features, const Instance * pInstance) 120 | { 121 | features.clear(); 122 | const vector >& words = pInstance->words; 123 | int seq_size = pInstance->seqsize(); 124 | assert(seq_size < 3); 125 | 126 | //Local and Context sentence dense feature 127 | for (int sentidx = 0; sentidx < seq_size; sentidx++) { 128 | Feature feat; 129 | const vector& curWords = words[sentidx]; 130 | int wordnumber = curWords.size(); 131 | for (int i = 0; i < wordnumber; i++) 132 | feat.words.push_back(normalize_to_lowerwithdigit(curWords[i])); 133 | 134 | features.push_back(feat); 135 | } 136 | } 137 | 138 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) { 139 | feat.clear(); 140 | 141 | const vector >& words = pInstance->words; 142 | int seq_size = pInstance->seqsize(); 143 | assert(seq_size < 3); 144 | //Current sent linear feature 145 | const vector& lastWords = words[seq_size - 1]; 146 | int wordnumber = lastWords.size(); 147 | string strfeat = "", curWord = "", preWord = "", pre2Word = ""; 148 | for (int i = 0; i < wordnumber; i++) { 149 | curWord = normalize_to_lowerwithdigit(lastWords[i]); 150 | strfeat = "F1U=" + curWord; 151 | feat.push_back(strfeat); 152 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey; 153 | strfeat = "F2B=" + preWord + seperateKey + curWord; 154 | feat.push_back(strfeat); 155 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey; 156 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord; 157 | feat.push_back(strfeat); 158 | } 159 | 160 | //History feature 161 | if (m_options.linearfeatCat > 1 && seq_size == 2) { 162 | const vector& historyWords = words[seq_size - 2]; 163 | wordnumber = historyWords.size(); 164 | for (int i = 0; i < wordnumber; i++) { 165 | strfeat = "F4U=" + historyWords[i]; 166 | feat.push_back(strfeat); 167 | } 168 | } 169 | } 170 | 171 | void Detector::convert2Example(const Instance* pInstance, Example& exam) { 172 | exam.clear(); 173 | 174 | const string &instlabel = pInstance->label; 175 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha; 176 | 177 | int labelnum = labelAlpha.size(); 178 | for (int i = 0; i < labelnum; i++) { 179 | string str = labelAlpha.from_id(i); 180 | if (instlabel.compare(str) == 0) 181 | exam.m_labels.push_back(1.0); 182 | else 183 | exam.m_labels.push_back(0.0); 184 | } 185 | 186 | //dense feature 187 | extractDenseFeatures(exam.m_densefeatures, pInstance); 188 | 189 | //linear feature 190 | if (m_options.linearfeatCat > 0) 191 | extractLinearFeatures(exam.m_linearfeatures, pInstance); 192 | 193 | } 194 | 195 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) { 196 | int numInstance; 197 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 198 | const Instance *pInstance = &vecInsts[numInstance]; 199 | Example curExam; 200 | convert2Example(pInstance, curExam); 201 | vecExams.push_back(curExam); 202 | 203 | if ((numInstance + 1) % m_options.verboseIter == 0) { 204 | cout << numInstance + 1 << " "; 205 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 206 | cout << std::endl; 207 | cout.flush(); 208 | } 209 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 210 | break; 211 | } 212 | 213 | cout << numInstance << " " << endl; 214 | } 215 | 216 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) { 217 | if (optionFile != "") 218 | m_options.load(optionFile); 219 | m_options.showOptions(); 220 | vector trainInsts, devInsts, testInsts; 221 | static vector decodeInstResults; 222 | static Instance curDecodeInst; 223 | bool bCurIterBetter = false; 224 | 225 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); 226 | if (devFile != "") 227 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); 228 | if (testFile != "") 229 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); 230 | 231 | std::cout << "Training example number: " << trainInsts.size() << std::endl; 232 | std::cout << "Dev example number: " << trainInsts.size() << std::endl; 233 | std::cout << "Test example number: " << trainInsts.size() << std::endl; 234 | 235 | createAlphabet(trainInsts); 236 | addTestAlphabet(devInsts); 237 | addTestAlphabet(testInsts); 238 | 239 | m_word_stats[unknownkey] = m_options.wordCutOff + 1; 240 | cout << "Total word num: " << m_word_stats.size() << endl; 241 | m_driver._modelparams.wordAlpha.initial(m_word_stats, m_options.wordCutOff); 242 | m_driver._modelparams.wordAlpha.set_fixed_flag(true); 243 | cout << "Remain word num:" << m_driver._modelparams.wordAlpha.size() << endl; 244 | 245 | vector trainExamples, devExamples, testExamples; 246 | 247 | std::cout << "Instance convert to example... " << std::endl; 248 | initialExamples(trainInsts, trainExamples); 249 | initialExamples(devInsts, devExamples); 250 | initialExamples(testInsts, testExamples); 251 | 252 | if (m_options.wordFile != "") { 253 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune); 254 | } 255 | else{ 256 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune); 257 | } 258 | 259 | m_driver._hyperparams.setRequired(m_options); 260 | m_driver.initial(m_maxseq_size); 261 | 262 | 263 | 264 | dtype bestDIS = 0; 265 | 266 | int inputSize = trainExamples.size(); 267 | 268 | int batchBlock = inputSize / m_options.batchSize; 269 | if (inputSize % m_options.batchSize != 0) 270 | batchBlock++; 271 | 272 | srand(0); 273 | std::vector indexes; 274 | for (int i = 0; i < inputSize; ++i) 275 | indexes.push_back(i); 276 | 277 | static Metric eval, metric_dev, metric_test; 278 | static vector subExamples; 279 | int devNum = devExamples.size(), testNum = testExamples.size(); 280 | for (int iter = 0; iter < m_options.maxIter; ++iter) { 281 | std::cout << "##### Iteration " << iter << std::endl; 282 | 283 | random_shuffle(indexes.begin(), indexes.end()); 284 | eval.reset(); 285 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) { 286 | subExamples.clear(); 287 | int start_pos = updateIter * m_options.batchSize; 288 | int end_pos = (updateIter + 1) * m_options.batchSize; 289 | if (end_pos > inputSize) 290 | end_pos = inputSize; 291 | 292 | for (int idy = start_pos; idy < end_pos; idy++) { 293 | subExamples.push_back(trainExamples[indexes[idy]]); 294 | } 295 | 296 | int curUpdateIter = iter * batchBlock + updateIter; 297 | dtype cost = m_driver.train(subExamples, curUpdateIter); 298 | 299 | eval.overall_label_count += m_driver._eval.overall_label_count; 300 | eval.correct_label_count += m_driver._eval.correct_label_count; 301 | 302 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) { 303 | //m_driver.checkgrad(subExamples, curUpdateIter + 1); 304 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl; 305 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl; 306 | } 307 | m_driver.updateModel(); 308 | 309 | } 310 | 311 | if (devNum > 0) { 312 | bCurIterBetter = false; 313 | if (!m_options.outBest.empty()) 314 | decodeInstResults.clear(); 315 | metric_dev.reset(); 316 | for (int idx = 0; idx < devExamples.size(); idx++) { 317 | string result_label; 318 | predict(devExamples[idx].m_densefeatures, result_label); 319 | 320 | devInsts[idx].Evaluate(result_label, metric_dev); 321 | 322 | if (!m_options.outBest.empty()) { 323 | curDecodeInst.copyValuesFrom(devInsts[idx]); 324 | curDecodeInst.assignLabel(result_label); 325 | decodeInstResults.push_back(curDecodeInst); 326 | } 327 | } 328 | 329 | std::cout << "dev:" << std::endl; 330 | metric_dev.print(); 331 | 332 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) { 333 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); 334 | bCurIterBetter = true; 335 | } 336 | 337 | if (testNum > 0) { 338 | if (!m_options.outBest.empty()) 339 | decodeInstResults.clear(); 340 | metric_test.reset(); 341 | for (int idx = 0; idx < testExamples.size(); idx++) { 342 | string result_label; 343 | predict(testExamples[idx].m_densefeatures, result_label); 344 | 345 | testInsts[idx].Evaluate(result_label, metric_test); 346 | 347 | if (bCurIterBetter && !m_options.outBest.empty()) { 348 | curDecodeInst.copyValuesFrom(testInsts[idx]); 349 | curDecodeInst.assignLabel(result_label); 350 | decodeInstResults.push_back(curDecodeInst); 351 | } 352 | } 353 | std::cout << "test:" << std::endl; 354 | metric_test.print(); 355 | 356 | if (!m_options.outBest.empty() && bCurIterBetter) { 357 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); 358 | } 359 | } 360 | 361 | 362 | 363 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) { 364 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; 365 | bestDIS = metric_dev.getAccuracy(); 366 | writeModelFile(modelFile); 367 | } 368 | 369 | } 370 | // Clear gradients 371 | } 372 | } 373 | 374 | int Detector::predict(const vector& features, string& output) { 375 | int labelIdx; 376 | m_driver.predict(features, labelIdx); 377 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey); 378 | 379 | if (output == nullkey) 380 | std::cout << "predict error" << std::endl; 381 | return 0; 382 | } 383 | 384 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) { 385 | loadModelFile(modelFile); 386 | vector testInsts; 387 | m_pipe.readInstances(testFile, testInsts); 388 | 389 | vector testExamples; 390 | initialExamples(testInsts, testExamples); 391 | 392 | int testNum = testExamples.size(); 393 | vector testInstResults; 394 | Metric metric_test; 395 | metric_test.reset(); 396 | for (int idx = 0; idx < testExamples.size(); idx++) { 397 | string result_label; 398 | predict(testExamples[idx].m_densefeatures, result_label); 399 | testInsts[idx].Evaluate(result_label, metric_test); 400 | Instance curResultInst; 401 | curResultInst.copyValuesFrom(testInsts[idx]); 402 | curResultInst.assignLabel(result_label); 403 | testInstResults.push_back(curResultInst); 404 | } 405 | std::cout << "test:" << std::endl; 406 | metric_test.print(); 407 | 408 | m_pipe.outputAllInstances(outputFile, testInstResults); 409 | 410 | } 411 | 412 | 413 | void Detector::loadModelFile(const string& inputModelFile) { 414 | 415 | } 416 | 417 | void Detector::writeModelFile(const string& outputModelFile) { 418 | 419 | } 420 | 421 | int main(int argc, char* argv[]) { 422 | 423 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = ""; 424 | std::string outputFile = "", wordEmbFile = ""; 425 | bool bTrain = false; 426 | dsr::Argument_helper ah; 427 | int memsize = 1; 428 | 429 | ah.new_flag("l", "learn", "train or test", bTrain); 430 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile); 431 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile); 432 | ah.new_named_string("test", "testCorpus", "named_string", 433 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile); 434 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile); 435 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile); 436 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile); 437 | ah.new_named_int("mem", "memsize", "named_int", "memory allocated for tensor nodes", memsize); 438 | 439 | ah.process(argc, argv); 440 | 441 | Detector detector(memsize); 442 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length; 443 | if (bTrain) { 444 | detector.train(trainFile, devFile, testFile, modelFile, optionFile); 445 | } 446 | else { 447 | detector.test(testFile, outputFile, modelFile); 448 | } 449 | 450 | //test(argv); 451 | //ah.write_values(std::cout); 452 | } 453 | -------------------------------------------------------------------------------- /src/NNWordLocalContext/NNWordLocalContext.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.h 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #ifndef SRC_SparseDetector_H_ 9 | #define SRC_SparseDetector_H_ 10 | 11 | 12 | #include "N3L.h" 13 | #include "Driver.h" 14 | #include "Options.h" 15 | #include "Instance.h" 16 | #include "Example.h" 17 | #include "Pipe.h" 18 | #include "Utf.h" 19 | 20 | using namespace nr; 21 | using namespace std; 22 | 23 | class Detector { 24 | 25 | 26 | public: 27 | unordered_map m_feat_stats; 28 | unordered_map m_word_stats; 29 | int m_maxseq_size; 30 | 31 | public: 32 | Options m_options; 33 | 34 | Pipe m_pipe; 35 | 36 | Driver m_driver; 37 | 38 | 39 | public: 40 | Detector(size_t memsize); 41 | virtual ~Detector(); 42 | 43 | public: 44 | 45 | int createAlphabet(const vector& vecTrainInsts); 46 | void addTestAlphabet(const vector& vecInsts); 47 | 48 | void extractDenseFeatures(vector& features, const Instance* pInstance); 49 | void extractLinearFeatures(vector& features, const Instance* pInstance); 50 | 51 | void convert2Example(const Instance* pInstance, Example& exam); 52 | void initialExamples(const vector& vecInsts, vector& vecExams); 53 | 54 | public: 55 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile); 56 | int predict(const vector& features, string& outputs); 57 | void test(const string& testFile, const string& outputFile, const string& modelFile); 58 | 59 | void writeModelFile(const string& outputModelFile); 60 | void loadModelFile(const string& inputModelFile); 61 | 62 | }; 63 | 64 | #endif /* SRC_SparseDetector_H_ */ 65 | -------------------------------------------------------------------------------- /src/NNWordLocalContext/model/ComputionGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ComputionGraph_H_ 2 | #define SRC_ComputionGraph_H_ 3 | 4 | #include "ModelParams.h" 5 | 6 | 7 | // Each model consists of two parts, building neural graph and defining output losses. 8 | struct ComputionGraph : Graph { 9 | public: 10 | const static int max_sentence_length = 256; 11 | 12 | public: 13 | // node instances 14 | vector > word_inputs; 15 | WindowBuilder word_window; 16 | 17 | LSTM1Builder left_lstm; 18 | LSTM1Builder right_lstm; 19 | 20 | vector concat_bilstm; 21 | GatedPoolBuilder local_gated_pooling; 22 | GatedPoolBuilder context_gated_pooling; 23 | 24 | Node padding; 25 | ConcatNode concat_local_context; 26 | UniNode sent_hidden; 27 | LinearNode output; 28 | 29 | public: 30 | ComputionGraph() : Graph() { 31 | } 32 | 33 | ~ComputionGraph() { 34 | clear(); 35 | } 36 | 37 | public: 38 | //allocate enough nodes 39 | inline void createNodes(int sent_length, int maxseq_size) { 40 | 41 | resizeVec(word_inputs, maxseq_size, sent_length); 42 | word_window.resize(sent_length); 43 | left_lstm.resize(sent_length); 44 | right_lstm.resize(sent_length); 45 | 46 | concat_bilstm.resize(sent_length); 47 | local_gated_pooling.resize(sent_length); 48 | context_gated_pooling.resize(sent_length); 49 | 50 | } 51 | 52 | inline void clear() { 53 | Graph::clear(); 54 | clearVec(word_inputs); 55 | word_window.clear(); 56 | left_lstm.clear(); 57 | right_lstm.clear(); 58 | concat_bilstm.clear(); 59 | local_gated_pooling.clear(); 60 | context_gated_pooling.clear(); 61 | } 62 | 63 | 64 | public: 65 | inline void initial(ModelParams& model, HyperParams& opts, AlignedMemoryPool* mem = NULL) { 66 | int seq_size = word_inputs.size(); 67 | 68 | for (int i = 0; i < seq_size; i++) { 69 | for (int idx = 0; idx < word_inputs[i].size(); idx++) { 70 | word_inputs[i][idx].init(model.words.nDim, opts.dropOut, mem); 71 | word_inputs[i][idx].setParam(&model.words); 72 | if ( i == seq_size -1 ) 73 | concat_bilstm[idx].init(opts.rnnhiddensize * 2, -1, mem); 74 | } 75 | } 76 | word_window.init(model.words.nDim, opts.wordcontext, mem); 77 | left_lstm.init(&model.left_lstm_project, opts.dropOut, true, mem); 78 | right_lstm.init(&model.right_lstm_project, opts.dropOut, false, mem); 79 | 80 | local_gated_pooling.init(&model.local_gatedpool_project, mem); 81 | context_gated_pooling.init(&model.context_gatedpool_project, mem); 82 | 83 | concat_local_context.init(opts.rnnhiddensize * 2 + model.words.nDim, -1, mem); 84 | sent_hidden.init(opts.hiddensize, opts.dropOut, mem); 85 | sent_hidden.setParam(&model.sent_tanh_project); 86 | output.init(opts.labelSize, -1, mem); 87 | output.setParam(&model.olayer_linear); 88 | 89 | padding.init(opts.wordDim, -1, mem); 90 | } 91 | 92 | 93 | public: 94 | // some nodes may behave different during training and decode, for example, dropout 95 | inline void forward(const vector& features, bool bTrain = false) { 96 | //first step: clear value 97 | clearValue(bTrain); // compute is a must step for train, predict and cost computation 98 | 99 | 100 | // second step: build graph 101 | int seq_size = features.size(); 102 | //forward 103 | // word-level neural networks 104 | for (int i = 0; i < seq_size; i++) { 105 | 106 | const Feature& feature = features[i]; 107 | int wordnum = feature.words.size(); 108 | if (wordnum > max_sentence_length) 109 | wordnum = max_sentence_length; 110 | for (int idx = 0; idx < wordnum; idx++) { 111 | //input 112 | word_inputs[i][idx].forward(this, feature.words[idx]); 113 | } 114 | if (i == seq_size - 1) { 115 | //windowlized 116 | word_window.forward(this, getPNodes(word_inputs[i], wordnum)); 117 | left_lstm.forward(this, getPNodes(word_window._outputs, wordnum)); 118 | right_lstm.forward(this, getPNodes(word_window._outputs, wordnum)); 119 | 120 | for (int idx = 0; idx < wordnum; idx++) { 121 | //feed-forward 122 | concat_bilstm[idx].forward(this, &(left_lstm._hiddens[idx]), &(right_lstm._hiddens[idx])); 123 | } 124 | local_gated_pooling.forward(this, getPNodes(concat_bilstm, wordnum)); 125 | } 126 | 127 | else { 128 | context_gated_pooling.forward(this, getPNodes(word_inputs[i], wordnum)); 129 | } 130 | } 131 | 132 | if (seq_size == 1) 133 | concat_local_context.forward(this, &padding, &local_gated_pooling._output); 134 | else 135 | concat_local_context.forward(this, &context_gated_pooling._output, &local_gated_pooling._output); 136 | sent_hidden.forward(this, &concat_local_context); 137 | output.forward(this, &sent_hidden); 138 | } 139 | 140 | }; 141 | 142 | #endif /* SRC_ComputionGraph_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocalContext/model/Driver.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Driver.h 3 | * 4 | * Created on: Mar 18, 2015 5 | * Author: mszhang 6 | */ 7 | 8 | #ifndef SRC_Driver_H_ 9 | #define SRC_Driver_H_ 10 | 11 | #include 12 | #include "ComputionGraph.h" 13 | 14 | 15 | //A native neural network classfier using only linear features 16 | 17 | class Driver{ 18 | public: 19 | Driver(size_t memsize) : aligned_mem(memsize) { 20 | _pcg = NULL; 21 | } 22 | 23 | ~Driver() { 24 | if (_pcg != NULL) 25 | delete _pcg; 26 | _pcg = NULL; 27 | } 28 | 29 | public: 30 | ComputionGraph *_pcg; // build neural graphs 31 | ModelParams _modelparams; // model parameters 32 | HyperParams _hyperparams; 33 | 34 | Metric _eval; 35 | CheckGrad _checkgrad; 36 | ModelUpdate _ada; // model update 37 | AlignedMemoryPool aligned_mem; 38 | 39 | 40 | public: 41 | inline void initial(int maxseq_size) { 42 | if (!_hyperparams.bValid()){ 43 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl; 44 | return; 45 | } 46 | if (!_modelparams.initial(_hyperparams, &aligned_mem)){ 47 | std::cout << "model parameter initialization Error, Please check!" << std::endl; 48 | return; 49 | } 50 | _modelparams.exportModelParams(_ada); 51 | _modelparams.exportCheckGradParams(_checkgrad); 52 | 53 | _hyperparams.print(); 54 | 55 | _pcg = new ComputionGraph(); 56 | _pcg->createNodes(ComputionGraph::max_sentence_length, maxseq_size); 57 | _pcg->initial(_modelparams, _hyperparams, &aligned_mem); 58 | 59 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps); 60 | } 61 | 62 | 63 | inline dtype train(const vector& examples, int iter) { 64 | _eval.reset(); 65 | 66 | int example_num = examples.size(); 67 | dtype cost = 0.0; 68 | 69 | for (int count = 0; count < example_num; count++) { 70 | const Example& example = examples[count]; 71 | //forward 72 | _pcg->forward(example.m_densefeatures, true); 73 | 74 | //loss function 75 | int seq_size = example.m_densefeatures.size(); 76 | int wordnum = example.m_densefeatures[seq_size - 1].words.size(); 77 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num); 78 | 79 | // backward, which exists only for training 80 | _pcg->backward(); 81 | } 82 | 83 | if (_eval.getAccuracy() < 0) { 84 | std::cout << "strange" << std::endl; 85 | } 86 | 87 | return cost; 88 | } 89 | 90 | inline void predict(const vector densefeatures, int& results) { 91 | _pcg->forward(densefeatures); 92 | _modelparams.loss.predict(&_pcg->output, results); 93 | } 94 | 95 | inline dtype cost(const Example& example){ 96 | _pcg->forward(example.m_densefeatures); //forward here 97 | 98 | int seq_size = example.m_densefeatures.size(); 99 | 100 | dtype cost = 0.0; 101 | 102 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1); 103 | 104 | return cost; 105 | } 106 | 107 | void checkgrad(const vector& examples, int iter){ 108 | ostringstream out; 109 | out << "Iteration: " << iter; 110 | _checkgrad.check(this, examples, out.str()); 111 | } 112 | 113 | void updateModel() { 114 | _ada.update(); 115 | //_ada.update(5.0); 116 | } 117 | 118 | void writeModel(); 119 | 120 | void loadModel(); 121 | 122 | 123 | 124 | private: 125 | inline void resetEval() { 126 | _eval.reset(); 127 | } 128 | 129 | 130 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){ 131 | _ada._alpha = adaAlpha; 132 | _ada._eps = adaEps; 133 | _ada._reg = nnRegular; 134 | } 135 | 136 | }; 137 | 138 | #endif /* SRC_Driver_H_ */ 139 | -------------------------------------------------------------------------------- /src/NNWordLocalContext/model/HyperParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_HyperParams_H_ 2 | #define SRC_HyperParams_H_ 3 | 4 | #include "N3L.h" 5 | #include "Example.h" 6 | #include "Options.h" 7 | 8 | using namespace nr; 9 | using namespace std; 10 | 11 | struct HyperParams{ 12 | 13 | // must assign 14 | int wordcontext; 15 | int hiddensize; 16 | int rnnhiddensize; 17 | dtype dropOut; 18 | 19 | // must assign 20 | dtype nnRegular; // for optimization 21 | dtype adaAlpha; // for optimization 22 | dtype adaEps; // for optimization 23 | 24 | //auto generated 25 | int wordwindow; 26 | int wordDim; 27 | int inputsize; 28 | int labelSize; 29 | 30 | public: 31 | HyperParams(){ 32 | bAssigned = false; 33 | } 34 | 35 | public: 36 | void setRequired(Options& opt){ 37 | wordcontext = opt.wordcontext; 38 | hiddensize = opt.hiddenSize; 39 | rnnhiddensize = opt.rnnHiddenSize; 40 | dropOut = opt.dropProb; 41 | 42 | nnRegular = opt.regParameter; 43 | adaAlpha = opt.adaAlpha; 44 | adaEps = opt.adaEps; 45 | 46 | bAssigned = true; 47 | } 48 | 49 | void clear(){ 50 | bAssigned = false; 51 | } 52 | 53 | bool bValid(){ 54 | return bAssigned; 55 | } 56 | 57 | 58 | public: 59 | 60 | void print(){ 61 | 62 | } 63 | 64 | private: 65 | bool bAssigned; 66 | }; 67 | 68 | 69 | #endif /* SRC_HyperParams_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocalContext/model/ModelParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ModelParams_H_ 2 | #define SRC_ModelParams_H_ 3 | #include "HyperParams.h" 4 | 5 | // Each model consists of two parts, building neural graph and defining output losses. 6 | class ModelParams{ 7 | 8 | public: 9 | Alphabet wordAlpha; // should be initialized outside 10 | Alphabet featAlpha; //should be intialized outside 11 | Alphabet labelAlpha; // should be initialized outside 12 | public: 13 | LookupTable words; // should be initialized outside 14 | LSTM1Params left_lstm_project; //left lstm 15 | LSTM1Params right_lstm_project; //right lstm 16 | GatedPoolParam local_gatedpool_project; 17 | GatedPoolParam context_gatedpool_project; 18 | UniParams sent_tanh_project; // sentence hidden 19 | UniParams olayer_linear; // output 20 | public: 21 | SoftMaxLoss loss; 22 | 23 | 24 | public: 25 | bool initial(HyperParams& opts, AlignedMemoryPool* mem = NULL){ 26 | 27 | // some model parameters should be initialized outside 28 | if (words.nVSize <= 0 || labelAlpha.size() <= 0){ 29 | return false; 30 | } 31 | opts.wordDim = words.nDim; 32 | opts.wordwindow = 2 * opts.wordcontext + 1; 33 | opts.inputsize = opts.wordwindow * opts.wordDim; 34 | int senthiddensize = opts.rnnhiddensize * 2 + words.nDim; 35 | 36 | left_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem); 37 | right_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem); 38 | local_gatedpool_project.initial(opts.rnnhiddensize * 2, opts.rnnhiddensize * 2, mem); 39 | context_gatedpool_project.initial(opts.wordDim, opts.wordDim, mem); 40 | sent_tanh_project.initial(opts.hiddensize, senthiddensize, mem); 41 | opts.labelSize = labelAlpha.size(); 42 | olayer_linear.initial(opts.labelSize, opts.hiddensize, false, mem); 43 | 44 | return true; 45 | } 46 | 47 | 48 | void exportModelParams(ModelUpdate& ada){ 49 | words.exportAdaParams(ada); 50 | left_lstm_project.exportAdaParams(ada); 51 | right_lstm_project.exportAdaParams(ada); 52 | local_gatedpool_project.exportAdaParams(ada); 53 | context_gatedpool_project.exportAdaParams(ada); 54 | sent_tanh_project.exportAdaParams(ada); 55 | olayer_linear.exportAdaParams(ada); 56 | } 57 | 58 | 59 | void exportCheckGradParams(CheckGrad& checkgrad){ 60 | checkgrad.add(&(words.E), "_words.E"); 61 | 62 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.W), "context_gatedpool_project.uni.W"); 63 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.b), "context_gatedpool_project.uni.b"); 64 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.W), "local_gatedpool_project.W"); 65 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.b), "local_gatedpool_project.b"); 66 | 67 | checkgrad.add(&(sent_tanh_project.W), "sent_tanh_project.W"); 68 | checkgrad.add(&(sent_tanh_project.b), "sent_tanh_project.b"); 69 | } 70 | 71 | // will add it later 72 | void saveModel(){ 73 | 74 | } 75 | 76 | void loadModel(const string& inFile){ 77 | 78 | } 79 | 80 | }; 81 | 82 | #endif /* SRC_ModelParams_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories( 3 | model 4 | ) 5 | 6 | add_executable(NNWordLocalContextSeparate NNWordLocalContextSeparate.cpp) 7 | 8 | -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/NNWordLocalContextSeparate.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.cpp 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #include "NNWordLocalContextSeparate.h" 9 | 10 | #include "Argument_helper.h" 11 | 12 | Detector::Detector(size_t memsize) : m_driver(memsize) { 13 | // TODO Auto-generated constructor stub 14 | srand(0); 15 | } 16 | 17 | Detector::~Detector() { 18 | // TODO Auto-generated destructor stub 19 | } 20 | 21 | int Detector::createAlphabet(const vector& vecInsts) { 22 | if (vecInsts.size() == 0) { 23 | std::cout << "training set empty" << std::endl; 24 | return -1; 25 | } 26 | cout << "Creating Alphabet..." << endl; 27 | 28 | m_maxseq_size = -1; 29 | int numInstance; 30 | 31 | m_driver._modelparams.labelAlpha.clear(); 32 | // label alphabet and word statistics 33 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 34 | const Instance *pInstance = &vecInsts[numInstance]; 35 | 36 | const vector > &words = pInstance->words; 37 | const string &label = pInstance->label; 38 | 39 | int labelId = m_driver._modelparams.labelAlpha.from_string(label); 40 | 41 | int seq_size = pInstance->seqsize(); 42 | if (seq_size > m_maxseq_size) 43 | m_maxseq_size = seq_size; 44 | for (int i = 0; i < seq_size; i++) { 45 | int wordLength = words[i].size(); 46 | for (int j = 0; j < wordLength; ++j) { 47 | string curword = normalize_to_lowerwithdigit(words[i][j]); 48 | m_word_stats[curword]++; 49 | } 50 | 51 | } 52 | 53 | if ((numInstance + 1) % m_options.verboseIter == 0) { 54 | cout << numInstance + 1 << " "; 55 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 56 | cout << std::endl; 57 | cout.flush(); 58 | } 59 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 60 | break; 61 | } 62 | 63 | cout << numInstance << " " << endl; 64 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl; 65 | 66 | 67 | m_driver._modelparams.labelAlpha.set_fixed_flag(true); 68 | 69 | if (m_options.linearfeatCat > 0) { 70 | cout << "Extracting linear features..." << endl; 71 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 72 | const Instance *pInstance = &vecInsts[numInstance]; 73 | vector linearfeat; 74 | extractLinearFeatures(linearfeat, pInstance); 75 | for (int i = 0; i < linearfeat.size(); i++) 76 | m_feat_stats[linearfeat[i]] ++; 77 | } 78 | m_feat_stats[unknownkey] = m_options.featCutOff + 1; 79 | cout << "Total feature num: " << m_feat_stats.size() << endl; 80 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff); 81 | cout << "Remina feature num:" << m_driver._modelparams.featAlpha.size() << endl; 82 | m_driver._modelparams.featAlpha.set_fixed_flag(true); 83 | } 84 | return 0; 85 | } 86 | 87 | void Detector::addTestAlphabet(const vector& vecInsts) 88 | { 89 | cout << "Adding other word Alphabet..." << endl; 90 | int numInstance; 91 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 92 | const Instance *pInstance = &vecInsts[numInstance]; 93 | 94 | const vector > &words = pInstance->words; 95 | int seq_size = pInstance->seqsize(); 96 | for (int i = 0; i < seq_size; ++i) { 97 | for (int j = 0; j < words[i].size(); j++) { 98 | string curword = normalize_to_lowerwithdigit(words[i][j]); 99 | if (!m_options.wordEmbFineTune) 100 | m_word_stats[curword]++; 101 | } 102 | } 103 | 104 | if ((numInstance + 1) % m_options.verboseIter == 0) { 105 | cout << numInstance + 1 << " "; 106 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 107 | cout << std::endl; 108 | cout.flush(); 109 | } 110 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 111 | break; 112 | } 113 | 114 | cout << numInstance << " " << endl; 115 | } 116 | 117 | 118 | 119 | void Detector::extractDenseFeatures(vector& features, const Instance * pInstance) 120 | { 121 | features.clear(); 122 | const vector >& words = pInstance->words; 123 | int seq_size = pInstance->seqsize(); 124 | assert(seq_size < 3); 125 | 126 | //Local and Context sentence dense feature 127 | for (int sentidx = 0; sentidx < seq_size; sentidx++) { 128 | Feature feat; 129 | const vector& curWords = words[sentidx]; 130 | int wordnumber = curWords.size(); 131 | for (int i = 0; i < wordnumber; i++) 132 | feat.words.push_back(normalize_to_lowerwithdigit(curWords[i])); 133 | 134 | features.push_back(feat); 135 | } 136 | } 137 | 138 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) { 139 | feat.clear(); 140 | 141 | const vector >& words = pInstance->words; 142 | int seq_size = pInstance->seqsize(); 143 | assert(seq_size < 3); 144 | //Current sent linear feature 145 | const vector& lastWords = words[seq_size - 1]; 146 | int wordnumber = lastWords.size(); 147 | string strfeat = "", curWord = "", preWord = "", pre2Word = ""; 148 | for (int i = 0; i < wordnumber; i++) { 149 | curWord = normalize_to_lowerwithdigit(lastWords[i]); 150 | strfeat = "F1U=" + curWord; 151 | feat.push_back(strfeat); 152 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey; 153 | strfeat = "F2B=" + preWord + seperateKey + curWord; 154 | feat.push_back(strfeat); 155 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey; 156 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord; 157 | feat.push_back(strfeat); 158 | } 159 | 160 | //History feature 161 | if (m_options.linearfeatCat > 1 && seq_size == 2) { 162 | const vector& historyWords = words[seq_size - 2]; 163 | wordnumber = historyWords.size(); 164 | for (int i = 0; i < wordnumber; i++) { 165 | strfeat = "F4U=" + historyWords[i]; 166 | feat.push_back(strfeat); 167 | } 168 | } 169 | } 170 | 171 | void Detector::convert2Example(const Instance* pInstance, Example& exam) { 172 | exam.clear(); 173 | 174 | const string &instlabel = pInstance->label; 175 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha; 176 | 177 | int labelnum = labelAlpha.size(); 178 | for (int i = 0; i < labelnum; i++) { 179 | string str = labelAlpha.from_id(i); 180 | if (instlabel.compare(str) == 0) 181 | exam.m_labels.push_back(1.0); 182 | else 183 | exam.m_labels.push_back(0.0); 184 | } 185 | 186 | //dense feature 187 | extractDenseFeatures(exam.m_densefeatures, pInstance); 188 | 189 | //linear feature 190 | if (m_options.linearfeatCat > 0) 191 | extractLinearFeatures(exam.m_linearfeatures, pInstance); 192 | 193 | } 194 | 195 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) { 196 | int numInstance; 197 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 198 | const Instance *pInstance = &vecInsts[numInstance]; 199 | Example curExam; 200 | convert2Example(pInstance, curExam); 201 | vecExams.push_back(curExam); 202 | 203 | if ((numInstance + 1) % m_options.verboseIter == 0) { 204 | cout << numInstance + 1 << " "; 205 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 206 | cout << std::endl; 207 | cout.flush(); 208 | } 209 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 210 | break; 211 | } 212 | 213 | cout << numInstance << " " << endl; 214 | } 215 | 216 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) { 217 | if (optionFile != "") 218 | m_options.load(optionFile); 219 | m_options.showOptions(); 220 | vector trainInsts, devInsts, testInsts; 221 | static vector decodeInstResults; 222 | static Instance curDecodeInst; 223 | bool bCurIterBetter = false; 224 | 225 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); 226 | if (devFile != "") 227 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); 228 | if (testFile != "") 229 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); 230 | 231 | std::cout << "Training example number: " << trainInsts.size() << std::endl; 232 | std::cout << "Dev example number: " << trainInsts.size() << std::endl; 233 | std::cout << "Test example number: " << trainInsts.size() << std::endl; 234 | 235 | createAlphabet(trainInsts); 236 | addTestAlphabet(devInsts); 237 | addTestAlphabet(testInsts); 238 | 239 | m_word_stats[unknownkey] = m_options.wordCutOff + 1; 240 | cout << "Total word num: " << m_word_stats.size() << endl; 241 | m_driver._modelparams.wordAlpha.initial(m_word_stats, m_options.wordCutOff); 242 | m_driver._modelparams.wordAlpha.set_fixed_flag(true); 243 | cout << "Remain word num:" << m_driver._modelparams.wordAlpha.size() << endl; 244 | 245 | vector trainExamples, devExamples, testExamples; 246 | 247 | std::cout << "Instance convert to example... " << std::endl; 248 | initialExamples(trainInsts, trainExamples); 249 | initialExamples(devInsts, devExamples); 250 | initialExamples(testInsts, testExamples); 251 | 252 | if (m_options.wordFile != "") { 253 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune); 254 | m_driver._modelparams.history_words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune); 255 | } 256 | else{ 257 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune); 258 | m_driver._modelparams.history_words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune); 259 | } 260 | 261 | m_driver._hyperparams.setRequired(m_options); 262 | m_driver.initial(m_maxseq_size); 263 | 264 | 265 | 266 | dtype bestDIS = 0; 267 | 268 | int inputSize = trainExamples.size(); 269 | 270 | int batchBlock = inputSize / m_options.batchSize; 271 | if (inputSize % m_options.batchSize != 0) 272 | batchBlock++; 273 | 274 | srand(0); 275 | std::vector indexes; 276 | for (int i = 0; i < inputSize; ++i) 277 | indexes.push_back(i); 278 | 279 | static Metric eval, metric_dev, metric_test; 280 | static vector subExamples; 281 | int devNum = devExamples.size(), testNum = testExamples.size(); 282 | for (int iter = 0; iter < m_options.maxIter; ++iter) { 283 | std::cout << "##### Iteration " << iter << std::endl; 284 | 285 | random_shuffle(indexes.begin(), indexes.end()); 286 | eval.reset(); 287 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) { 288 | subExamples.clear(); 289 | int start_pos = updateIter * m_options.batchSize; 290 | int end_pos = (updateIter + 1) * m_options.batchSize; 291 | if (end_pos > inputSize) 292 | end_pos = inputSize; 293 | 294 | for (int idy = start_pos; idy < end_pos; idy++) { 295 | subExamples.push_back(trainExamples[indexes[idy]]); 296 | } 297 | 298 | int curUpdateIter = iter * batchBlock + updateIter; 299 | dtype cost = m_driver.train(subExamples, curUpdateIter); 300 | 301 | eval.overall_label_count += m_driver._eval.overall_label_count; 302 | eval.correct_label_count += m_driver._eval.correct_label_count; 303 | 304 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) { 305 | //m_driver.checkgrad(subExamples, curUpdateIter + 1); 306 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl; 307 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl; 308 | } 309 | m_driver.updateModel(); 310 | 311 | } 312 | 313 | if (devNum > 0) { 314 | bCurIterBetter = false; 315 | if (!m_options.outBest.empty()) 316 | decodeInstResults.clear(); 317 | metric_dev.reset(); 318 | for (int idx = 0; idx < devExamples.size(); idx++) { 319 | string result_label; 320 | predict(devExamples[idx].m_densefeatures, result_label); 321 | 322 | devInsts[idx].Evaluate(result_label, metric_dev); 323 | 324 | if (!m_options.outBest.empty()) { 325 | curDecodeInst.copyValuesFrom(devInsts[idx]); 326 | curDecodeInst.assignLabel(result_label); 327 | decodeInstResults.push_back(curDecodeInst); 328 | } 329 | } 330 | 331 | std::cout << "dev:" << std::endl; 332 | metric_dev.print(); 333 | 334 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) { 335 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); 336 | bCurIterBetter = true; 337 | } 338 | 339 | if (testNum > 0) { 340 | if (!m_options.outBest.empty()) 341 | decodeInstResults.clear(); 342 | metric_test.reset(); 343 | for (int idx = 0; idx < testExamples.size(); idx++) { 344 | string result_label; 345 | predict(testExamples[idx].m_densefeatures, result_label); 346 | 347 | testInsts[idx].Evaluate(result_label, metric_test); 348 | 349 | if (bCurIterBetter && !m_options.outBest.empty()) { 350 | curDecodeInst.copyValuesFrom(testInsts[idx]); 351 | curDecodeInst.assignLabel(result_label); 352 | decodeInstResults.push_back(curDecodeInst); 353 | } 354 | } 355 | std::cout << "test:" << std::endl; 356 | metric_test.print(); 357 | 358 | if (!m_options.outBest.empty() && bCurIterBetter) { 359 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); 360 | } 361 | } 362 | 363 | 364 | 365 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) { 366 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; 367 | bestDIS = metric_dev.getAccuracy(); 368 | writeModelFile(modelFile); 369 | } 370 | 371 | } 372 | // Clear gradients 373 | } 374 | } 375 | 376 | int Detector::predict(const vector& features, string& output) { 377 | int labelIdx; 378 | m_driver.predict(features, labelIdx); 379 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey); 380 | 381 | if (output == nullkey) 382 | std::cout << "predict error" << std::endl; 383 | return 0; 384 | } 385 | 386 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) { 387 | loadModelFile(modelFile); 388 | vector testInsts; 389 | m_pipe.readInstances(testFile, testInsts); 390 | 391 | vector testExamples; 392 | initialExamples(testInsts, testExamples); 393 | 394 | int testNum = testExamples.size(); 395 | vector testInstResults; 396 | Metric metric_test; 397 | metric_test.reset(); 398 | for (int idx = 0; idx < testExamples.size(); idx++) { 399 | string result_label; 400 | predict(testExamples[idx].m_densefeatures, result_label); 401 | testInsts[idx].Evaluate(result_label, metric_test); 402 | Instance curResultInst; 403 | curResultInst.copyValuesFrom(testInsts[idx]); 404 | curResultInst.assignLabel(result_label); 405 | testInstResults.push_back(curResultInst); 406 | } 407 | std::cout << "test:" << std::endl; 408 | metric_test.print(); 409 | 410 | m_pipe.outputAllInstances(outputFile, testInstResults); 411 | 412 | } 413 | 414 | 415 | void Detector::loadModelFile(const string& inputModelFile) { 416 | 417 | } 418 | 419 | void Detector::writeModelFile(const string& outputModelFile) { 420 | 421 | } 422 | 423 | int main(int argc, char* argv[]) { 424 | 425 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = ""; 426 | std::string outputFile = "", wordEmbFile = ""; 427 | bool bTrain = false; 428 | dsr::Argument_helper ah; 429 | int memsize = 1; 430 | 431 | ah.new_flag("l", "learn", "train or test", bTrain); 432 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile); 433 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile); 434 | ah.new_named_string("test", "testCorpus", "named_string", 435 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile); 436 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile); 437 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile); 438 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile); 439 | ah.new_named_int("mem", "memsize", "named_int", "memory allocated for tensor nodes", memsize); 440 | 441 | ah.process(argc, argv); 442 | 443 | Detector detector(memsize); 444 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length; 445 | if (bTrain) { 446 | detector.train(trainFile, devFile, testFile, modelFile, optionFile); 447 | } 448 | else { 449 | detector.test(testFile, outputFile, modelFile); 450 | } 451 | 452 | //test(argv); 453 | //ah.write_values(std::cout); 454 | } 455 | -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/NNWordLocalContextSeparate.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.h 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #ifndef SRC_SparseDetector_H_ 9 | #define SRC_SparseDetector_H_ 10 | 11 | 12 | #include "N3L.h" 13 | #include "Driver.h" 14 | #include "Options.h" 15 | #include "Instance.h" 16 | #include "Example.h" 17 | #include "Pipe.h" 18 | #include "Utf.h" 19 | 20 | using namespace nr; 21 | using namespace std; 22 | 23 | class Detector { 24 | 25 | 26 | public: 27 | unordered_map m_feat_stats; 28 | unordered_map m_word_stats; 29 | int m_maxseq_size; 30 | 31 | public: 32 | Options m_options; 33 | 34 | Pipe m_pipe; 35 | 36 | Driver m_driver; 37 | 38 | 39 | public: 40 | Detector(size_t memsize); 41 | virtual ~Detector(); 42 | 43 | public: 44 | 45 | int createAlphabet(const vector& vecTrainInsts); 46 | void addTestAlphabet(const vector& vecInsts); 47 | 48 | void extractDenseFeatures(vector& features, const Instance* pInstance); 49 | void extractLinearFeatures(vector& features, const Instance* pInstance); 50 | 51 | void convert2Example(const Instance* pInstance, Example& exam); 52 | void initialExamples(const vector& vecInsts, vector& vecExams); 53 | 54 | public: 55 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile); 56 | int predict(const vector& features, string& outputs); 57 | void test(const string& testFile, const string& outputFile, const string& modelFile); 58 | 59 | void writeModelFile(const string& outputModelFile); 60 | void loadModelFile(const string& inputModelFile); 61 | 62 | }; 63 | 64 | #endif /* SRC_SparseDetector_H_ */ 65 | -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/model/ComputionGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ComputionGraph_H_ 2 | #define SRC_ComputionGraph_H_ 3 | 4 | #include "ModelParams.h" 5 | 6 | 7 | // Each model consists of two parts, building neural graph and defining output losses. 8 | struct ComputionGraph : Graph { 9 | public: 10 | const static int max_sentence_length = 256; 11 | 12 | public: 13 | // node instances 14 | vector > word_inputs; 15 | WindowBuilder word_window; 16 | 17 | LSTM1Builder left_lstm; 18 | LSTM1Builder right_lstm; 19 | 20 | vector concat_bilstm; 21 | GatedPoolBuilder local_gated_pooling; 22 | GatedPoolBuilder context_gated_pooling; 23 | 24 | Node padding; 25 | ConcatNode concat_local_context; 26 | UniNode sent_hidden; 27 | LinearNode output; 28 | 29 | public: 30 | ComputionGraph() : Graph() { 31 | } 32 | 33 | ~ComputionGraph() { 34 | clear(); 35 | } 36 | 37 | public: 38 | //allocate enough nodes 39 | inline void createNodes(int sent_length, int maxseq_size) { 40 | 41 | resizeVec(word_inputs, maxseq_size, sent_length); 42 | word_window.resize(sent_length); 43 | left_lstm.resize(sent_length); 44 | right_lstm.resize(sent_length); 45 | 46 | concat_bilstm.resize(sent_length); 47 | local_gated_pooling.resize(sent_length); 48 | context_gated_pooling.resize(sent_length); 49 | } 50 | 51 | inline void clear() { 52 | Graph::clear(); 53 | clearVec(word_inputs); 54 | word_window.clear(); 55 | left_lstm.clear(); 56 | right_lstm.clear(); 57 | 58 | concat_bilstm.clear(); 59 | local_gated_pooling.clear(); 60 | context_gated_pooling.clear(); 61 | 62 | } 63 | 64 | 65 | public: 66 | inline void initial(ModelParams& model, HyperParams& opts, AlignedMemoryPool* mem) { 67 | int seq_size = word_inputs.size(); 68 | 69 | for (int i = 0; i < seq_size; i++){ 70 | for (int idx = 0; idx < word_inputs[i].size(); idx++) { 71 | if (i == seq_size - 1){ 72 | word_inputs[i][idx].init(model.words.nDim, opts.dropOut, mem); 73 | word_inputs[i][idx].setParam(&model.words); 74 | concat_bilstm[idx].init(opts.rnnhiddensize * 2, -1, mem); 75 | } 76 | else{ 77 | word_inputs[i][idx].init(model.words.nDim, opts.dropOut, mem); 78 | word_inputs[i][idx].setParam(&model.history_words); 79 | } 80 | } 81 | } 82 | word_window.init(model.words.nDim, opts.wordcontext, mem); 83 | left_lstm.init(&model.left_lstm_project, opts.dropOut, true, mem); 84 | right_lstm.init(&model.right_lstm_project, opts.dropOut, false, mem); 85 | 86 | local_gated_pooling.init(&model.local_gatedpool_project, mem); 87 | context_gated_pooling.init(&model.context_gatedpool_project, mem); 88 | 89 | concat_local_context.init(model.words.nDim + opts.rnnhiddensize * 2, -1, mem); 90 | sent_hidden.init(opts.hiddensize, opts.dropOut, mem); 91 | sent_hidden.setParam(&model.sent_tanh_project); 92 | output.init(opts.labelSize, -1, mem); 93 | output.setParam(&model.olayer_linear); 94 | 95 | padding.init(model.words.nDim, -1, mem); 96 | } 97 | 98 | 99 | public: 100 | // some nodes may behave different during training and decode, for example, dropout 101 | inline void forward(const vector& features, bool bTrain = false) { 102 | //first step: clear value 103 | clearValue(bTrain); // compute is a must step for train, predict and cost computation 104 | 105 | // second step: build graph 106 | int seq_size = features.size(); 107 | //forward 108 | // word-level neural networks 109 | for (int i = 0; i max_sentence_length) 114 | wordnum = max_sentence_length; 115 | 116 | if (i == seq_size - 1) { 117 | for (int idx = 0; idx < wordnum; idx++) { 118 | //input 119 | word_inputs[1][idx].forward(this, feature.words[idx]); 120 | } 121 | //windowlized 122 | word_window.forward(this, getPNodes(word_inputs[1], wordnum)); 123 | left_lstm.forward(this, getPNodes(word_window._outputs, wordnum)); 124 | right_lstm.forward(this, getPNodes(word_window._outputs, wordnum)); 125 | 126 | for (int idx = 0; idx < wordnum; idx++) { 127 | //feed-forward 128 | concat_bilstm[idx].forward(this, &(left_lstm._hiddens[idx]), &(right_lstm._hiddens[idx])); 129 | } 130 | local_gated_pooling.forward(this, getPNodes(concat_bilstm, wordnum)); 131 | } 132 | 133 | else { 134 | for (int idx = 0; idx < wordnum; idx++) { 135 | //input 136 | word_inputs[0][idx].forward(this, feature.words[idx]); 137 | } 138 | context_gated_pooling.forward(this, getPNodes(word_inputs[0], wordnum)); 139 | } 140 | } 141 | 142 | 143 | if (seq_size == 1) 144 | concat_local_context.forward(this, &padding, &local_gated_pooling._output); 145 | else 146 | concat_local_context.forward(this, &context_gated_pooling._output, &local_gated_pooling._output); 147 | sent_hidden.forward(this, &concat_local_context); 148 | output.forward(this, &sent_hidden); 149 | } 150 | 151 | }; 152 | 153 | #endif /* SRC_ComputionGraph_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/model/Driver.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Driver.h 3 | * 4 | * Created on: Mar 18, 2015 5 | * Author: mszhang 6 | */ 7 | 8 | #ifndef SRC_Driver_H_ 9 | #define SRC_Driver_H_ 10 | 11 | #include 12 | #include "ComputionGraph.h" 13 | 14 | 15 | //A native neural network classfier using only linear features 16 | 17 | class Driver{ 18 | public: 19 | Driver(size_t memsize) : aligned_mem(memsize){ 20 | _pcg = NULL; 21 | } 22 | 23 | ~Driver() { 24 | if (_pcg != NULL) 25 | delete _pcg; 26 | _pcg = NULL; 27 | } 28 | 29 | public: 30 | ComputionGraph *_pcg; // build neural graphs 31 | ModelParams _modelparams; // model parameters 32 | HyperParams _hyperparams; 33 | 34 | Metric _eval; 35 | CheckGrad _checkgrad; 36 | ModelUpdate _ada; // model update 37 | AlignedMemoryPool aligned_mem; 38 | 39 | 40 | public: 41 | inline void initial(int maxseq_size) { 42 | if (!_hyperparams.bValid()){ 43 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl; 44 | return; 45 | } 46 | if (!_modelparams.initial(_hyperparams, &aligned_mem)){ 47 | std::cout << "model parameter initialization Error, Please check!" << std::endl; 48 | return; 49 | } 50 | _modelparams.exportModelParams(_ada); 51 | _modelparams.exportCheckGradParams(_checkgrad); 52 | 53 | _hyperparams.print(); 54 | 55 | _pcg = new ComputionGraph(); 56 | _pcg->createNodes(ComputionGraph::max_sentence_length, maxseq_size); 57 | _pcg->initial(_modelparams, _hyperparams, &aligned_mem); 58 | 59 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps); 60 | } 61 | 62 | 63 | inline dtype train(const vector& examples, int iter) { 64 | _eval.reset(); 65 | 66 | int example_num = examples.size(); 67 | dtype cost = 0.0; 68 | 69 | for (int count = 0; count < example_num; count++) { 70 | const Example& example = examples[count]; 71 | 72 | //forward 73 | _pcg->forward(example.m_densefeatures, true); 74 | 75 | //loss function 76 | int seq_size = example.m_densefeatures.size(); 77 | int wordnum = example.m_densefeatures[seq_size - 1].words.size(); 78 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num); 79 | 80 | // backward, which exists only for training 81 | _pcg->backward(); 82 | } 83 | 84 | if (_eval.getAccuracy() < 0) { 85 | std::cout << "strange" << std::endl; 86 | } 87 | 88 | return cost; 89 | } 90 | 91 | inline void predict(const vector densefeatures, int& results) { 92 | _pcg->forward(densefeatures); 93 | _modelparams.loss.predict(&_pcg->output, results); 94 | } 95 | 96 | inline dtype cost(const Example& example){ 97 | _pcg->forward(example.m_densefeatures); //forward here 98 | 99 | int seq_size = example.m_densefeatures.size(); 100 | 101 | dtype cost = 0.0; 102 | 103 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1); 104 | 105 | return cost; 106 | } 107 | 108 | void checkgrad(const vector& examples, int iter){ 109 | ostringstream out; 110 | out << "Iteration: " << iter; 111 | _checkgrad.check(this, examples, out.str()); 112 | } 113 | 114 | void updateModel() { 115 | _ada.update(); 116 | //_ada.update(5.0); 117 | } 118 | 119 | void writeModel(); 120 | 121 | void loadModel(); 122 | 123 | 124 | 125 | private: 126 | inline void resetEval() { 127 | _eval.reset(); 128 | } 129 | 130 | 131 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){ 132 | _ada._alpha = adaAlpha; 133 | _ada._eps = adaEps; 134 | _ada._reg = nnRegular; 135 | } 136 | 137 | }; 138 | 139 | #endif /* SRC_Driver_H_ */ 140 | -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/model/HyperParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_HyperParams_H_ 2 | #define SRC_HyperParams_H_ 3 | 4 | #include "N3L.h" 5 | #include "Example.h" 6 | #include "Options.h" 7 | 8 | using namespace nr; 9 | using namespace std; 10 | 11 | struct HyperParams{ 12 | 13 | // must assign 14 | int wordcontext; 15 | int hiddensize; 16 | int rnnhiddensize; 17 | dtype dropOut; 18 | 19 | // must assign 20 | dtype nnRegular; // for optimization 21 | dtype adaAlpha; // for optimization 22 | dtype adaEps; // for optimization 23 | 24 | //auto generated 25 | int wordwindow; 26 | int wordDim; 27 | int inputsize; 28 | int labelSize; 29 | 30 | public: 31 | HyperParams(){ 32 | bAssigned = false; 33 | } 34 | 35 | public: 36 | void setRequired(Options& opt){ 37 | wordcontext = opt.wordcontext; 38 | hiddensize = opt.hiddenSize; 39 | rnnhiddensize = opt.rnnHiddenSize; 40 | dropOut = opt.dropProb; 41 | 42 | nnRegular = opt.regParameter; 43 | adaAlpha = opt.adaAlpha; 44 | adaEps = opt.adaEps; 45 | 46 | bAssigned = true; 47 | } 48 | 49 | void clear(){ 50 | bAssigned = false; 51 | } 52 | 53 | bool bValid(){ 54 | return bAssigned; 55 | } 56 | 57 | 58 | public: 59 | 60 | void print(){ 61 | 62 | } 63 | 64 | private: 65 | bool bAssigned; 66 | }; 67 | 68 | 69 | #endif /* SRC_HyperParams_H_ */ -------------------------------------------------------------------------------- /src/NNWordLocalContextSeparate/model/ModelParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ModelParams_H_ 2 | #define SRC_ModelParams_H_ 3 | #include "HyperParams.h" 4 | 5 | // Each model consists of two parts, building neural graph and defining output losses. 6 | class ModelParams{ 7 | 8 | public: 9 | Alphabet wordAlpha; // should be initialized outside 10 | Alphabet featAlpha; //should be intialized outside 11 | Alphabet labelAlpha; // should be initialized outside 12 | public: 13 | LookupTable words; // should be initialized outside 14 | LookupTable history_words; // should be initialized outside 15 | LSTM1Params left_lstm_project; //left lstm 16 | LSTM1Params right_lstm_project; //right lstm 17 | GatedPoolParam local_gatedpool_project; //local gated pooling 18 | GatedPoolParam context_gatedpool_project; //context gated pooling 19 | UniParams sent_tanh_project; // sentence hidden 20 | UniParams olayer_linear; // output 21 | public: 22 | SoftMaxLoss loss; 23 | 24 | 25 | public: 26 | bool initial(HyperParams& opts, AlignedMemoryPool* mem){ 27 | 28 | // some model parameters should be initialized outside 29 | if (words.nVSize <= 0 || history_words.nVSize <= 0 || labelAlpha.size() <= 0) { 30 | return false; 31 | } 32 | opts.wordDim = words.nDim; 33 | opts.wordwindow = 2 * opts.wordcontext + 1; 34 | opts.inputsize = opts.wordwindow * opts.wordDim; 35 | int senthiddensize = opts.rnnhiddensize * 2+ words.nDim; 36 | 37 | left_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem); 38 | right_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem); 39 | local_gatedpool_project.initial(opts.rnnhiddensize * 2, opts.rnnhiddensize * 2, mem); 40 | context_gatedpool_project.initial(opts.wordDim, opts.wordDim, mem); 41 | sent_tanh_project.initial(opts.hiddensize, senthiddensize, mem); 42 | opts.labelSize = labelAlpha.size(); 43 | olayer_linear.initial(opts.labelSize, opts.hiddensize, false, mem); 44 | 45 | return true; 46 | } 47 | 48 | 49 | void exportModelParams(ModelUpdate& ada){ 50 | words.exportAdaParams(ada); 51 | history_words.exportAdaParams(ada); 52 | left_lstm_project.exportAdaParams(ada); 53 | right_lstm_project.exportAdaParams(ada); 54 | local_gatedpool_project.exportAdaParams(ada); 55 | context_gatedpool_project.exportAdaParams(ada); 56 | sent_tanh_project.exportAdaParams(ada); 57 | olayer_linear.exportAdaParams(ada); 58 | } 59 | 60 | 61 | void exportCheckGradParams(CheckGrad& checkgrad){ 62 | checkgrad.add(&(sent_tanh_project.W), "sent_tanh_project.W"); 63 | checkgrad.add(&(sent_tanh_project.b), "sent_tanh_project.b"); 64 | 65 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.W), "context_gatedpool_project.W"); 66 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.b), "context_gatedpool_project.b"); 67 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.W), "local_gatedpool_project.W"); 68 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.b), "local_gatedpool_project.b"); 69 | 70 | checkgrad.add(&(right_lstm_project.cell.W1), "right_lstm_project.cell.W1"); 71 | 72 | checkgrad.add(&(words.E), "_words.E"); 73 | checkgrad.add(&(history_words.E), "_history_words.E"); 74 | 75 | } 76 | 77 | // will add it later 78 | void saveModel(){ 79 | 80 | } 81 | 82 | void loadModel(const string& inFile){ 83 | 84 | } 85 | 86 | }; 87 | 88 | #endif /* SRC_ModelParams_H_ */ -------------------------------------------------------------------------------- /src/SparseLocalContext/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories( 3 | model 4 | ) 5 | 6 | add_executable(SparseLocalContext SparseLocalContext.cpp) 7 | 8 | -------------------------------------------------------------------------------- /src/SparseLocalContext/SparseLocalContext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.cpp 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #include "SparseLocalContext.h" 9 | 10 | #include "Argument_helper.h" 11 | 12 | Detector::Detector() { 13 | // TODO Auto-generated constructor stub 14 | srand(0); 15 | } 16 | 17 | Detector::~Detector() { 18 | // TODO Auto-generated destructor stub 19 | } 20 | 21 | int Detector::createAlphabet(const vector& vecInsts) { 22 | if (vecInsts.size() == 0){ 23 | std::cout << "training set empty" << std::endl; 24 | return -1; 25 | } 26 | cout << "Creating Alphabet..." << endl; 27 | 28 | int numInstance; 29 | 30 | m_driver._modelparams.labelAlpha.clear(); 31 | // label alphabet and word statistics 32 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 33 | const Instance *pInstance = &vecInsts[numInstance]; 34 | 35 | const vector > &words = pInstance->words; 36 | const string &label = pInstance->label; 37 | 38 | int labelId = m_driver._modelparams.labelAlpha.from_string(label); 39 | 40 | 41 | if ((numInstance + 1) % m_options.verboseIter == 0) { 42 | cout << numInstance + 1 << " "; 43 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 44 | cout << std::endl; 45 | cout.flush(); 46 | } 47 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 48 | break; 49 | } 50 | 51 | cout << numInstance << " " << endl; 52 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl; 53 | 54 | m_driver._modelparams.labelAlpha.set_fixed_flag(true); 55 | 56 | if (m_options.linearfeatCat > 0){ 57 | cout << "Extracting linear features..." << endl; 58 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++){ 59 | const Instance *pInstance = &vecInsts[numInstance]; 60 | vector linearfeat; 61 | extractLinearFeatures(linearfeat, pInstance); 62 | for (int i = 0; i < linearfeat.size(); i++) 63 | m_feat_stats[linearfeat[i]] ++; 64 | } 65 | m_feat_stats[unknownkey] = m_options.featCutOff + 1; 66 | cout << "Total feature num: " << m_feat_stats.size() << endl; 67 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff); 68 | cout << "Remain feature num:" << m_driver._modelparams.featAlpha.size() << endl; 69 | m_driver._modelparams.featAlpha.set_fixed_flag(true); 70 | } 71 | return 0; 72 | } 73 | 74 | 75 | 76 | 77 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) { 78 | feat.clear(); 79 | 80 | const vector >& words = pInstance->words; 81 | int seq_size = pInstance->seqsize(); 82 | assert(seq_size < 3); 83 | //Current sent linear feature 84 | const vector& lastWords = words[seq_size - 1]; 85 | int wordnumber = lastWords.size(); 86 | string strfeat = "", curWord = "", preWord = "", pre2Word = ""; 87 | for (int i = 0; i < wordnumber; i++){ 88 | curWord = normalize_to_lowerwithdigit(lastWords[i]); 89 | strfeat = "F1U=" + curWord; 90 | feat.push_back(strfeat); 91 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey; 92 | strfeat = "F2B=" + preWord + seperateKey + curWord; 93 | feat.push_back(strfeat); 94 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey; 95 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord; 96 | feat.push_back(strfeat); 97 | } 98 | 99 | //History feature 100 | if (m_options.linearfeatCat > 1 && seq_size == 2){ 101 | const vector& historyWords = words[seq_size - 2]; 102 | wordnumber = historyWords.size(); 103 | for (int i = 0; i < wordnumber; i++){ 104 | strfeat = "F4U=" + historyWords[i]; 105 | feat.push_back(strfeat); 106 | } 107 | } 108 | } 109 | 110 | void Detector::convert2Example(const Instance* pInstance, Example& exam) { 111 | exam.clear(); 112 | 113 | const string &instlabel = pInstance->label; 114 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha; 115 | 116 | int labelnum = labelAlpha.size(); 117 | for (int i = 0; i < labelnum; i++){ 118 | string str = labelAlpha.from_id(i); 119 | if (instlabel.compare(str) == 0) 120 | exam.m_labels.push_back(1.0); 121 | else 122 | exam.m_labels.push_back(0.0); 123 | } 124 | 125 | //linear feature 126 | if (m_options.linearfeatCat > 0) 127 | extractLinearFeatures(exam.m_linearfeatures, pInstance); 128 | 129 | } 130 | 131 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) { 132 | int numInstance; 133 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) { 134 | const Instance *pInstance = &vecInsts[numInstance]; 135 | Example curExam; 136 | convert2Example(pInstance, curExam); 137 | vecExams.push_back(curExam); 138 | 139 | if ((numInstance + 1) % m_options.verboseIter == 0) { 140 | cout << numInstance + 1 << " "; 141 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0) 142 | cout << std::endl; 143 | cout.flush(); 144 | } 145 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance) 146 | break; 147 | } 148 | 149 | cout << numInstance << " " << endl; 150 | } 151 | 152 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) { 153 | if (optionFile != "") 154 | m_options.load(optionFile); 155 | m_options.showOptions(); 156 | vector trainInsts, devInsts, testInsts; 157 | static vector decodeInstResults; 158 | static Instance curDecodeInst; 159 | bool bCurIterBetter = false; 160 | 161 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance); 162 | if (devFile != "") 163 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance); 164 | if (testFile != "") 165 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance); 166 | 167 | std::cout << "Training example number: " << trainInsts.size() << std::endl; 168 | std::cout << "Dev example number: " << trainInsts.size() << std::endl; 169 | std::cout << "Test example number: " << trainInsts.size() << std::endl; 170 | 171 | createAlphabet(trainInsts); 172 | vector trainExamples, devExamples, testExamples; 173 | 174 | std::cout << "Instance convert to example... " << std::endl; 175 | initialExamples(trainInsts, trainExamples); 176 | initialExamples(devInsts, devExamples); 177 | initialExamples(testInsts, testExamples); 178 | 179 | m_driver._hyperparams.setRequired(m_options); 180 | m_driver.initial(); 181 | 182 | dtype bestDIS = 0; 183 | 184 | int inputSize = trainExamples.size(); 185 | 186 | int batchBlock = inputSize / m_options.batchSize; 187 | if (inputSize % m_options.batchSize != 0) 188 | batchBlock++; 189 | 190 | srand(0); 191 | std::vector indexes; 192 | for (int i = 0; i < inputSize; ++i) 193 | indexes.push_back(i); 194 | 195 | static Metric eval, metric_dev, metric_test; 196 | static vector subExamples; 197 | int devNum = devExamples.size(), testNum = testExamples.size(); 198 | for (int iter = 0; iter < m_options.maxIter; ++iter) { 199 | std::cout << "##### Iteration " << iter << std::endl; 200 | 201 | random_shuffle(indexes.begin(), indexes.end()); 202 | eval.reset(); 203 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) { 204 | subExamples.clear(); 205 | int start_pos = updateIter * m_options.batchSize; 206 | int end_pos = (updateIter + 1) * m_options.batchSize; 207 | if (end_pos > inputSize) 208 | end_pos = inputSize; 209 | 210 | for (int idy = start_pos; idy < end_pos; idy++) { 211 | subExamples.push_back(trainExamples[indexes[idy]]); 212 | } 213 | 214 | int curUpdateIter = iter * batchBlock + updateIter; 215 | dtype cost = m_driver.train(subExamples, curUpdateIter); 216 | 217 | eval.overall_label_count += m_driver._eval.overall_label_count; 218 | eval.correct_label_count += m_driver._eval.correct_label_count; 219 | 220 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) { 221 | //m_driver.checkgrad(subExamples, curUpdateIter + 1); 222 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl; 223 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl; 224 | } 225 | m_driver.updateModel(); 226 | 227 | } 228 | 229 | if (devNum > 0) { 230 | bCurIterBetter = false; 231 | if (!m_options.outBest.empty()) 232 | decodeInstResults.clear(); 233 | metric_dev.reset(); 234 | for (int idx = 0; idx < devExamples.size(); idx++) { 235 | string result_label; 236 | predict(devExamples[idx].m_linearfeatures, result_label); 237 | 238 | devInsts[idx].Evaluate(result_label, metric_dev); 239 | 240 | if (!m_options.outBest.empty()) { 241 | curDecodeInst.copyValuesFrom(devInsts[idx]); 242 | curDecodeInst.assignLabel(result_label); 243 | decodeInstResults.push_back(curDecodeInst); 244 | } 245 | } 246 | 247 | std::cout << "dev:" << std::endl; 248 | metric_dev.print(); 249 | 250 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) { 251 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults); 252 | bCurIterBetter = true; 253 | } 254 | 255 | if (testNum > 0) { 256 | if (!m_options.outBest.empty()) 257 | decodeInstResults.clear(); 258 | metric_test.reset(); 259 | for (int idx = 0; idx < testExamples.size(); idx++) { 260 | string result_label; 261 | predict(testExamples[idx].m_linearfeatures, result_label); 262 | 263 | testInsts[idx].Evaluate(result_label, metric_test); 264 | 265 | if (bCurIterBetter && !m_options.outBest.empty()) { 266 | curDecodeInst.copyValuesFrom(testInsts[idx]); 267 | curDecodeInst.assignLabel(result_label); 268 | decodeInstResults.push_back(curDecodeInst); 269 | } 270 | } 271 | std::cout << "test:" << std::endl; 272 | metric_test.print(); 273 | 274 | if (!m_options.outBest.empty() && bCurIterBetter) { 275 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults); 276 | } 277 | } 278 | 279 | 280 | 281 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) { 282 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl; 283 | bestDIS = metric_dev.getAccuracy(); 284 | writeModelFile(modelFile); 285 | } 286 | 287 | } 288 | // Clear gradients 289 | } 290 | } 291 | 292 | int Detector::predict(const vector& features, string& output) { 293 | int labelIdx; 294 | m_driver.predict(features, labelIdx); 295 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey); 296 | 297 | if (output == nullkey) 298 | std::cout << "predict error" << std::endl; 299 | return 0; 300 | } 301 | 302 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) { 303 | loadModelFile(modelFile); 304 | vector testInsts; 305 | m_pipe.readInstances(testFile, testInsts); 306 | 307 | vector testExamples; 308 | initialExamples(testInsts, testExamples); 309 | 310 | int testNum = testExamples.size(); 311 | vector testInstResults; 312 | Metric metric_test; 313 | metric_test.reset(); 314 | for (int idx = 0; idx < testExamples.size(); idx++) { 315 | string result_label; 316 | predict(testExamples[idx].m_linearfeatures, result_label); 317 | testInsts[idx].Evaluate(result_label, metric_test); 318 | Instance curResultInst; 319 | curResultInst.copyValuesFrom(testInsts[idx]); 320 | curResultInst.assignLabel(result_label); 321 | testInstResults.push_back(curResultInst); 322 | } 323 | std::cout << "test:" << std::endl; 324 | metric_test.print(); 325 | 326 | m_pipe.outputAllInstances(outputFile, testInstResults); 327 | 328 | } 329 | 330 | 331 | void Detector::loadModelFile(const string& inputModelFile) { 332 | 333 | } 334 | 335 | void Detector::writeModelFile(const string& outputModelFile) { 336 | 337 | } 338 | 339 | int main(int argc, char* argv[]) { 340 | 341 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = ""; 342 | std::string outputFile = ""; 343 | bool bTrain = false; 344 | dsr::Argument_helper ah; 345 | 346 | ah.new_flag("l", "learn", "train or test", bTrain); 347 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile); 348 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile); 349 | ah.new_named_string("test", "testCorpus", "named_string", 350 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile); 351 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile); 352 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile); 353 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile); 354 | 355 | ah.process(argc, argv); 356 | 357 | Detector detector; 358 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length; 359 | if (bTrain) { 360 | detector.train(trainFile, devFile, testFile, modelFile, optionFile); 361 | } 362 | else { 363 | detector.test(testFile, outputFile, modelFile); 364 | } 365 | 366 | //test(argv); 367 | //ah.write_values(std::cout); 368 | } 369 | -------------------------------------------------------------------------------- /src/SparseLocalContext/SparseLocalContext.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SparseDetector.h 3 | * 4 | * Created on: Oct 23, 2016 5 | * Author: DaPan 6 | */ 7 | 8 | #ifndef SRC_SparseDetector_H_ 9 | #define SRC_SparseDetector_H_ 10 | 11 | 12 | #include "N3L.h" 13 | #include "Driver.h" 14 | #include "Options.h" 15 | #include "Instance.h" 16 | #include "Example.h" 17 | #include "Pipe.h" 18 | #include "Utf.h" 19 | 20 | using namespace nr; 21 | using namespace std; 22 | 23 | class Detector { 24 | 25 | 26 | public: 27 | unordered_map m_feat_stats; 28 | 29 | public: 30 | Options m_options; 31 | 32 | Pipe m_pipe; 33 | 34 | Driver m_driver; 35 | 36 | 37 | public: 38 | Detector(); 39 | virtual ~Detector(); 40 | 41 | public: 42 | 43 | int createAlphabet(const vector& vecTrainInsts); 44 | 45 | void extractLinearFeatures(vector& features, const Instance* pInstance); 46 | 47 | void convert2Example(const Instance* pInstance, Example& exam); 48 | void initialExamples(const vector& vecInsts, vector& vecExams); 49 | 50 | public: 51 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile); 52 | int predict(const vector& features, string& outputs); 53 | void test(const string& testFile, const string& outputFile, const string& modelFile); 54 | 55 | void writeModelFile(const string& outputModelFile); 56 | void loadModelFile(const string& inputModelFile); 57 | 58 | }; 59 | 60 | #endif /* SRC_SparseDetector_H_ */ 61 | -------------------------------------------------------------------------------- /src/SparseLocalContext/model/ComputionGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ComputionGraph_H_ 2 | #define SRC_ComputionGraph_H_ 3 | 4 | #include "ModelParams.h" 5 | 6 | 7 | // Each model consists of two parts, building neural graph and defining output losses. 8 | struct ComputionGraph : Graph{ 9 | public: 10 | const static int max_sentence_length = 256; 11 | 12 | public: 13 | // node instances 14 | SparseNode output; 15 | public: 16 | ComputionGraph() : Graph(){ 17 | } 18 | 19 | ~ComputionGraph(){ 20 | clear(); 21 | } 22 | 23 | public: 24 | //allocate enough nodes 25 | inline void createNodes(int sent_length, int typeNum){ 26 | 27 | } 28 | 29 | inline void clear(){ 30 | Graph::clear(); 31 | } 32 | 33 | public: 34 | inline void initial(ModelParams& model, HyperParams& opts){ 35 | output.setParam(&model.sparselayer); 36 | output.init(opts.labelSize,-1); 37 | } 38 | 39 | 40 | public: 41 | // some nodes may behave different during training and decode, for example, dropout 42 | inline void forward(const vector& features, bool bTrain = false){ 43 | //first step: clear value 44 | clearValue(bTrain); // compute is a must step for train, predict and cost computation 45 | 46 | 47 | // second step: build graph 48 | //forward 49 | output.forward(this, features); 50 | } 51 | 52 | }; 53 | 54 | #endif /* SRC_ComputionGraph_H_ */ -------------------------------------------------------------------------------- /src/SparseLocalContext/model/Driver.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Driver.h 3 | * 4 | * Created on: Mar 18, 2015 5 | * Author: mszhang 6 | */ 7 | 8 | #ifndef SRC_Driver_H_ 9 | #define SRC_Driver_H_ 10 | 11 | #include 12 | #include "ComputionGraph.h" 13 | 14 | 15 | //A native neural network classfier using only linear features 16 | 17 | class Driver{ 18 | public: 19 | Driver() { 20 | _pcg = NULL; 21 | } 22 | 23 | ~Driver() { 24 | if (_pcg != NULL) 25 | delete _pcg; 26 | _pcg = NULL; 27 | } 28 | 29 | public: 30 | ComputionGraph *_pcg; // build neural graphs 31 | ModelParams _modelparams; // model parameters 32 | HyperParams _hyperparams; 33 | 34 | Metric _eval; 35 | CheckGrad _checkgrad; 36 | ModelUpdate _ada; // model update 37 | 38 | 39 | public: 40 | inline void initial() { 41 | if (!_hyperparams.bValid()){ 42 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl; 43 | return; 44 | } 45 | if (!_modelparams.initial(_hyperparams)){ 46 | std::cout << "model parameter initialization Error, Please check!" << std::endl; 47 | return; 48 | } 49 | _modelparams.exportModelParams(_ada); 50 | _modelparams.exportCheckGradParams(_checkgrad); 51 | 52 | _hyperparams.print(); 53 | 54 | _pcg = new ComputionGraph(); 55 | _pcg->initial(_modelparams, _hyperparams); 56 | 57 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps); 58 | } 59 | 60 | 61 | inline dtype train(const vector& examples, int iter) { 62 | _eval.reset(); 63 | 64 | int example_num = examples.size(); 65 | dtype cost = 0.0; 66 | 67 | for (int count = 0; count < example_num; count++) { 68 | const Example& example = examples[count]; 69 | 70 | //forward 71 | _pcg->forward(example.m_linearfeatures, true); 72 | 73 | //loss function 74 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num); 75 | 76 | // backward, which exists only for training 77 | _pcg->backward(); 78 | } 79 | 80 | if (_eval.getAccuracy() < 0) { 81 | std::cout << "strange" << std::endl; 82 | } 83 | 84 | return cost; 85 | } 86 | 87 | inline void predict(const vector& features, int& results) { 88 | _pcg->forward(features); 89 | _modelparams.loss.predict(&_pcg->output, results); 90 | } 91 | 92 | inline dtype cost(const Example example) { 93 | _pcg->forward(example.m_linearfeatures, true); 94 | 95 | dtype cost = 0.0; 96 | 97 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1); 98 | 99 | return cost; 100 | } 101 | 102 | void checkgrad(const vector& examples, int iter){ 103 | ostringstream out; 104 | out << "Iteration: " << iter; 105 | _checkgrad.check(this, examples, out.str()); 106 | } 107 | 108 | 109 | void updateModel() { 110 | _ada.update(); 111 | //_ada.update(5.0); 112 | } 113 | 114 | void writeModel(); 115 | 116 | void loadModel(); 117 | 118 | 119 | 120 | private: 121 | inline void resetEval() { 122 | _eval.reset(); 123 | } 124 | 125 | 126 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){ 127 | _ada._alpha = adaAlpha; 128 | _ada._eps = adaEps; 129 | _ada._reg = nnRegular; 130 | } 131 | 132 | }; 133 | 134 | #endif /* SRC_Driver_H_ */ 135 | -------------------------------------------------------------------------------- /src/SparseLocalContext/model/HyperParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_HyperParams_H_ 2 | #define SRC_HyperParams_H_ 3 | 4 | #include "N3L.h" 5 | #include "Example.h" 6 | #include "Options.h" 7 | 8 | using namespace nr; 9 | using namespace std; 10 | 11 | struct HyperParams{ 12 | 13 | // must assign 14 | dtype nnRegular; // for optimization 15 | dtype adaAlpha; // for optimization 16 | dtype adaEps; // for optimization 17 | 18 | //auto generated 19 | int labelSize; 20 | 21 | public: 22 | HyperParams(){ 23 | bAssigned = false; 24 | } 25 | 26 | public: 27 | void setRequired(Options& opt){ 28 | nnRegular = opt.regParameter; 29 | adaAlpha = opt.adaAlpha; 30 | adaEps = opt.adaEps; 31 | 32 | bAssigned = true; 33 | } 34 | 35 | void clear(){ 36 | bAssigned = false; 37 | } 38 | 39 | bool bValid(){ 40 | return bAssigned; 41 | } 42 | 43 | 44 | public: 45 | 46 | void print(){ 47 | 48 | } 49 | 50 | private: 51 | bool bAssigned; 52 | }; 53 | 54 | 55 | #endif /* SRC_HyperParams_H_ */ -------------------------------------------------------------------------------- /src/SparseLocalContext/model/ModelParams.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_ModelParams_H_ 2 | #define SRC_ModelParams_H_ 3 | #include "HyperParams.h" 4 | 5 | // Each model consists of two parts, building neural graph and defining output losses. 6 | class ModelParams{ 7 | 8 | public: 9 | Alphabet featAlpha; //should be intialized outside 10 | Alphabet labelAlpha; // should be initialized outside 11 | public: 12 | SparseParams sparselayer; 13 | SoftMaxLoss loss; 14 | 15 | 16 | public: 17 | bool initial(HyperParams& opts){ 18 | 19 | // some model parameters should be initialized outside 20 | 21 | opts.labelSize = labelAlpha.size(); 22 | sparselayer.initial(&featAlpha, opts.labelSize); 23 | 24 | return true; 25 | } 26 | 27 | 28 | void exportModelParams(ModelUpdate& ada){ 29 | sparselayer.exportAdaParams(ada); 30 | } 31 | 32 | 33 | void exportCheckGradParams(CheckGrad& checkgrad){ 34 | checkgrad.add(&(sparselayer.W), "sparse.w"); 35 | } 36 | 37 | // will add it later 38 | void saveModel(){ 39 | 40 | } 41 | 42 | void loadModel(const string& inFile){ 43 | 44 | } 45 | 46 | }; 47 | 48 | #endif /* SRC_ModelParams_H_ */ -------------------------------------------------------------------------------- /src/basic/Example.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Example.h 3 | * 4 | * Created on: Mar 17, 2015 5 | * Author: mszhang 6 | */ 7 | 8 | #ifndef SRC_EXAMPLE_H_ 9 | #define SRC_EXAMPLE_H_ 10 | 11 | #include "MyLib.h" 12 | 13 | using namespace std; 14 | struct Feature { 15 | public: 16 | vector words; 17 | public: 18 | Feature() { 19 | } 20 | 21 | virtual ~Feature() { 22 | 23 | } 24 | 25 | void clear() { 26 | words.clear(); 27 | } 28 | }; 29 | 30 | class Example { 31 | 32 | public: 33 | vector m_labels; 34 | vector m_densefeatures; 35 | vector m_linearfeatures; 36 | public: 37 | Example(){ 38 | 39 | } 40 | virtual ~Example(){ 41 | 42 | } 43 | 44 | void clear(){ 45 | m_labels.clear(); 46 | m_densefeatures.clear(); 47 | } 48 | 49 | 50 | }; 51 | 52 | 53 | #endif /* SRC_EXAMPLE_H_ */ 54 | -------------------------------------------------------------------------------- /src/basic/Instance.h: -------------------------------------------------------------------------------- 1 | #ifndef _JST_INSTANCE_ 2 | #define _JST_INSTANCE_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "N3L.h" 9 | #include "Metric.h" 10 | 11 | using namespace std; 12 | 13 | class Instance { 14 | public: 15 | Instance() { 16 | } 17 | ~Instance() { 18 | } 19 | 20 | int seqsize() const { 21 | return words.size(); 22 | } 23 | 24 | 25 | int wordnum() const{ 26 | return words[seqsize() - 1].size(); 27 | } 28 | 29 | void clear() { 30 | label = ""; 31 | for (int i = 0; i < seqsize(); i++) { 32 | words[i].clear(); 33 | } 34 | words.clear(); 35 | confidence = -1.0; 36 | } 37 | 38 | void allocate(int seq_size) { 39 | clear(); 40 | label = ""; 41 | words.resize(seq_size); 42 | confidence = -1.0; 43 | } 44 | 45 | void copyValuesFrom(const Instance& anInstance) { 46 | allocate(anInstance.seqsize()); 47 | for (int i = 0; i < anInstance.seqsize(); i++) { 48 | for (int j = 0; j < anInstance.words[i].size(); j++) 49 | words[i].push_back(anInstance.words[i][j]); 50 | } 51 | label = anInstance.label; 52 | } 53 | 54 | void assignLabel(const string& resulted_label) { 55 | label = resulted_label; 56 | } 57 | 58 | void assignLabel(const string& resulted_label, dtype resulted_confidence){ 59 | label = resulted_label; 60 | confidence = resulted_confidence; 61 | } 62 | 63 | void Evaluate(const string& resulted_label, Metric& eval) const { 64 | if (resulted_label.compare(label) == 0) 65 | eval.correct_label_count++; 66 | eval.overall_label_count++; 67 | 68 | } 69 | 70 | 71 | public: 72 | string label; 73 | vector > words; 74 | dtype confidence; 75 | }; 76 | 77 | #endif 78 | 79 | -------------------------------------------------------------------------------- /src/basic/InstanceReader.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONLL_READER_ 2 | #define _CONLL_READER_ 3 | 4 | #include "Reader.h" 5 | #include "N3L.h" 6 | #include 7 | 8 | using namespace std; 9 | 10 | class InstanceReader : public Reader { 11 | public: 12 | InstanceReader() { 13 | } 14 | ~InstanceReader() { 15 | } 16 | 17 | Instance *getNext() { 18 | m_instance.clear(); 19 | vector vecLine; 20 | while (1) { 21 | string strLine; 22 | if (!my_getline(m_inf, strLine)) { 23 | break; 24 | } 25 | if (strLine.empty()) 26 | break; 27 | vecLine.push_back(strLine); 28 | } 29 | 30 | int seq_size = vecLine.size(); 31 | 32 | if (seq_size == 1) { 33 | m_instance.allocate(1); 34 | vector vecInfo; 35 | split_bychar(vecLine[0], vecInfo, ' '); 36 | int veclength = vecInfo.size(); 37 | m_instance.label = vecInfo[0]; 38 | for (int j = 1; j < veclength; j++) 39 | m_instance.words[0].push_back(vecInfo[j]); 40 | } 41 | else { 42 | m_instance.allocate(2); 43 | for (int i = 0; i < seq_size; ++i) { 44 | vector vecInfo; 45 | split_bychar(vecLine[i], vecInfo, ' '); 46 | int veclength = vecInfo.size(); 47 | if (i == seq_size - 1) { 48 | m_instance.label = vecInfo[0]; 49 | for (int j = 1; j < veclength; j++) 50 | m_instance.words[1].push_back(vecInfo[j]); 51 | } 52 | else { 53 | for (int j = 1; j < veclength; j++) 54 | m_instance.words[0].push_back(vecInfo[j]); 55 | } 56 | 57 | } 58 | } 59 | 60 | return &m_instance; 61 | } 62 | }; 63 | 64 | #endif 65 | 66 | -------------------------------------------------------------------------------- /src/basic/InstanceWriter.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONLL_WRITER_ 2 | #define _CONLL_WRITER_ 3 | 4 | #include "Writer.h" 5 | #include 6 | 7 | using namespace std; 8 | 9 | class InstanceWriter : public Writer 10 | { 11 | public: 12 | InstanceWriter(){} 13 | ~InstanceWriter(){} 14 | int write(const Instance *pInstance) 15 | { 16 | if (!m_outf.is_open()) return -1; 17 | 18 | const vector > &words = pInstance->words; 19 | int seq_size = words.size(); 20 | for (int i = 0; i < seq_size; i++){ 21 | const string &label = pInstance->label; 22 | if (i < seq_size - 1) 23 | m_outf << "history " << endl; 24 | else if (pInstance->confidence < 0.0) 25 | m_outf << pInstance->label << endl; 26 | else 27 | m_outf << pInstance->label << " " << pInstance->confidence << endl; 28 | 29 | } 30 | m_outf << endl; 31 | return 0; 32 | 33 | } 34 | }; 35 | 36 | 37 | #endif 38 | 39 | -------------------------------------------------------------------------------- /src/basic/Options.h: -------------------------------------------------------------------------------- 1 | #ifndef _PARSER_OPTIONS_ 2 | #define _PARSER_OPTIONS_ 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "N3L.h" 11 | 12 | using namespace std; 13 | 14 | class Options { 15 | public: 16 | 17 | int wordCutOff; 18 | int featCutOff; 19 | int charCutOff; 20 | dtype initRange; 21 | int maxIter; 22 | int batchSize; 23 | dtype adaEps; 24 | dtype adaAlpha; 25 | dtype regParameter; 26 | dtype dropProb; 27 | 28 | int segHiddenSize; 29 | int hiddenSize; 30 | int rnnHiddenSize; 31 | int wordEmbSize; 32 | int wordcontext; 33 | bool wordEmbFineTune; 34 | 35 | int charEmbSize; 36 | int charcontext; 37 | bool charEmbFineTune; 38 | int charhiddenSize; 39 | 40 | int typeEmbSize; 41 | bool typeEmbFineTune; 42 | 43 | int maxsegLen; 44 | 45 | int verboseIter; 46 | bool saveIntermediate; 47 | bool train; 48 | int maxInstance; 49 | vector testFiles; 50 | string outBest; 51 | bool seg; 52 | int relu; 53 | int atomLayers; 54 | int rnnLayers; 55 | 56 | //embedding files 57 | string wordFile; 58 | string charFile; 59 | string segFile; 60 | vector typeFiles; 61 | 62 | //linear feature: 0:do not use, 1:current sent, 2:current+history sent 63 | int linearfeatCat; 64 | 65 | Options() { 66 | wordCutOff = 0; 67 | featCutOff = 0; 68 | charCutOff = 0; 69 | initRange = 0.01; 70 | maxIter = 1000; 71 | batchSize = 1; 72 | adaEps = 1e-6; 73 | adaAlpha = 0.01; 74 | regParameter = 1e-8; 75 | dropProb = 0.0; 76 | 77 | segHiddenSize = 100; 78 | hiddenSize = 100; 79 | rnnHiddenSize = 100; 80 | wordEmbSize = 50; 81 | wordcontext = 2; 82 | wordEmbFineTune = true; 83 | charEmbSize = 50; 84 | charcontext = 2; 85 | charEmbFineTune = true; 86 | charhiddenSize = 50; 87 | 88 | typeEmbSize = 50; 89 | typeEmbFineTune = true; 90 | 91 | verboseIter = 100; 92 | saveIntermediate = true; 93 | train = false; 94 | maxInstance = -1; 95 | testFiles.clear(); 96 | outBest = ""; 97 | relu = 0; 98 | seg = false; 99 | atomLayers = 1; 100 | rnnLayers = 1; 101 | maxsegLen = 5; 102 | 103 | wordFile = ""; 104 | charFile = ""; 105 | segFile = ""; 106 | typeFiles.clear(); 107 | 108 | linearfeatCat = 0; 109 | } 110 | 111 | virtual ~Options() { 112 | 113 | } 114 | 115 | void setOptions(const vector &vecOption) { 116 | int i = 0; 117 | for (; i < vecOption.size(); ++i) { 118 | pair pr; 119 | string2pair(vecOption[i], pr, '='); 120 | if (pr.first == "wordCutOff") 121 | wordCutOff = atoi(pr.second.c_str()); 122 | if (pr.first == "featCutOff") 123 | featCutOff = atoi(pr.second.c_str()); 124 | if (pr.first == "charCutOff") 125 | charCutOff = atoi(pr.second.c_str()); 126 | if (pr.first == "initRange") 127 | initRange = atof(pr.second.c_str()); 128 | if (pr.first == "maxIter") 129 | maxIter = atoi(pr.second.c_str()); 130 | if (pr.first == "batchSize") 131 | batchSize = atoi(pr.second.c_str()); 132 | if (pr.first == "adaEps") 133 | adaEps = atof(pr.second.c_str()); 134 | if (pr.first == "adaAlpha") 135 | adaAlpha = atof(pr.second.c_str()); 136 | if (pr.first == "regParameter") 137 | regParameter = atof(pr.second.c_str()); 138 | if (pr.first == "dropProb") 139 | dropProb = atof(pr.second.c_str()); 140 | 141 | if (pr.first == "segHiddenSize") 142 | segHiddenSize = atoi(pr.second.c_str()); 143 | if (pr.first == "hiddenSize") 144 | hiddenSize = atoi(pr.second.c_str()); 145 | if (pr.first == "rnnHiddenSize") 146 | rnnHiddenSize = atoi(pr.second.c_str()); 147 | if (pr.first == "wordcontext") 148 | wordcontext = atoi(pr.second.c_str()); 149 | if (pr.first == "wordEmbSize") 150 | wordEmbSize = atoi(pr.second.c_str()); 151 | if (pr.first == "wordEmbFineTune") 152 | wordEmbFineTune = (pr.second == "true") ? true : false; 153 | if (pr.first == "charcontext") 154 | charcontext = atoi(pr.second.c_str()); 155 | if (pr.first == "charEmbSize") 156 | charEmbSize = atoi(pr.second.c_str()); 157 | if (pr.first == "charEmbFineTune") 158 | charEmbFineTune = (pr.second == "true") ? true : false; 159 | if (pr.first == "charhiddenSize") 160 | charhiddenSize = atoi(pr.second.c_str()); 161 | if (pr.first == "typeEmbSize") 162 | typeEmbSize = atoi(pr.second.c_str()); 163 | if (pr.first == "typeEmbFineTune") 164 | typeEmbFineTune = (pr.second == "true") ? true : false; 165 | 166 | if (pr.first == "verboseIter") 167 | verboseIter = atoi(pr.second.c_str()); 168 | if (pr.first == "train") 169 | train = (pr.second == "true") ? true : false; 170 | if (pr.first == "saveIntermediate") 171 | saveIntermediate = (pr.second == "true") ? true : false; 172 | if (pr.first == "maxInstance") 173 | maxInstance = atoi(pr.second.c_str()); 174 | if (pr.first == "testFile") 175 | testFiles.push_back(pr.second); 176 | if (pr.first == "outBest") 177 | outBest = pr.second; 178 | if (pr.first == "relu") 179 | relu = atoi(pr.second.c_str()); 180 | if (pr.first == "seg") 181 | seg = (pr.second == "true") ? true : false; 182 | if (pr.first == "atomLayers") 183 | atomLayers = atoi(pr.second.c_str()); 184 | if (pr.first == "rnnLayers") 185 | rnnLayers = atoi(pr.second.c_str()); 186 | if (pr.first == "maxsegLen") 187 | maxsegLen = atoi(pr.second.c_str()); 188 | 189 | if (pr.first == "wordFile") 190 | wordFile = pr.second; 191 | if (pr.first == "segFile") 192 | segFile = pr.second; 193 | if (pr.first == "charFile") 194 | charFile = pr.second; 195 | if (pr.first == "typeFile") 196 | typeFiles.push_back(pr.second); 197 | 198 | if (pr.first == "linearfeatCat") 199 | linearfeatCat = atoi(pr.second.c_str()); 200 | } 201 | } 202 | 203 | void showOptions() { 204 | std::cout << "wordCutOff = " << wordCutOff << std::endl; 205 | std::cout << "featCutOff = " << featCutOff << std::endl; 206 | std::cout << "charCutOff = " << charCutOff << std::endl; 207 | std::cout << "initRange = " << initRange << std::endl; 208 | std::cout << "maxIter = " << maxIter << std::endl; 209 | std::cout << "batchSize = " << batchSize << std::endl; 210 | std::cout << "adaEps = " << adaEps << std::endl; 211 | std::cout << "adaAlpha = " << adaAlpha << std::endl; 212 | std::cout << "regParameter = " << regParameter << std::endl; 213 | std::cout << "dropProb = " << dropProb << std::endl; 214 | 215 | std::cout << "segHiddenSize = " << segHiddenSize << std::endl; 216 | std::cout << "hiddenSize = " << hiddenSize << std::endl; 217 | std::cout << "rnnHiddenSize = " << rnnHiddenSize << std::endl; 218 | std::cout << "wordEmbSize = " << wordEmbSize << std::endl; 219 | std::cout << "wordcontext = " << wordcontext << std::endl; 220 | std::cout << "wordEmbFineTune = " << wordEmbFineTune << std::endl; 221 | std::cout << "charEmbSize = " << charEmbSize << std::endl; 222 | std::cout << "charcontext = " << charcontext << std::endl; 223 | std::cout << "charEmbFineTune = " << charEmbFineTune << std::endl; 224 | std::cout << "charhiddenSize = " << charhiddenSize << std::endl; 225 | std::cout << "typeEmbSize = " << typeEmbSize << std::endl; 226 | std::cout << "typeEmbFineTune = " << typeEmbFineTune << std::endl; 227 | 228 | std::cout << "verboseIter = " << verboseIter << std::endl; 229 | std::cout << "saveItermediate = " << saveIntermediate << std::endl; 230 | std::cout << "train = " << train << std::endl; 231 | std::cout << "maxInstance = " << maxInstance << std::endl; 232 | for (int idx = 0; idx < testFiles.size(); idx++) { 233 | std::cout << "testFile = " << testFiles[idx] << std::endl; 234 | } 235 | std::cout << "outBest = " << outBest << std::endl; 236 | std::cout << "relu = " << relu << std::endl; 237 | std::cout << "seg = " << seg << std::endl; 238 | std::cout << "atomLayers = " << atomLayers << std::endl; 239 | std::cout << "rnnLayers = " << rnnLayers << std::endl; 240 | std::cout << "maxsegLen = " << maxsegLen << std::endl; 241 | 242 | std::cout << "wordFile = " << wordFile << std::endl; 243 | std::cout << "charFile = " << charFile << std::endl; 244 | std::cout << "segFile = " << segFile << std::endl; 245 | for (int idx = 0; idx < typeFiles.size(); idx++) { 246 | std::cout << "typeFile = " << typeFiles[idx] << std::endl; 247 | } 248 | 249 | std::cout << "linearfeatCat = " << linearfeatCat << std::endl; 250 | } 251 | 252 | void load(const std::string& infile) { 253 | ifstream inf; 254 | inf.open(infile.c_str()); 255 | vector vecLine; 256 | while (1) { 257 | string strLine; 258 | if (!my_getline(inf, strLine)) { 259 | break; 260 | } 261 | if (strLine.empty()) 262 | continue; 263 | vecLine.push_back(strLine); 264 | } 265 | inf.close(); 266 | setOptions(vecLine); 267 | } 268 | }; 269 | 270 | #endif 271 | 272 | -------------------------------------------------------------------------------- /src/basic/Pipe.h: -------------------------------------------------------------------------------- 1 | #ifndef _JST_PIPE_ 2 | #define _JST_PIPE_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "Instance.h" 11 | #include "InstanceReader.h" 12 | #include "InstanceWriter.h" 13 | #include 14 | 15 | using namespace std; 16 | 17 | //#define MAX_BUFFER_SIZE 256 18 | 19 | class Pipe { 20 | public: 21 | Pipe() { 22 | m_jstReader = new InstanceReader(); 23 | m_jstWriter = new InstanceWriter(); 24 | max_sentense_size = 256; 25 | } 26 | 27 | ~Pipe(void) { 28 | if (m_jstReader) 29 | delete m_jstReader; 30 | if (m_jstWriter) 31 | delete m_jstWriter; 32 | } 33 | 34 | int initInputFile(const char *filename) { 35 | if (0 != m_jstReader->startReading(filename)) 36 | return -1; 37 | return 0; 38 | } 39 | 40 | void uninitInputFile() { 41 | if (m_jstWriter) 42 | m_jstReader->finishReading(); 43 | } 44 | 45 | int initOutputFile(const char *filename) { 46 | if (0 != m_jstWriter->startWriting(filename)) 47 | return -1; 48 | return 0; 49 | } 50 | 51 | void uninitOutputFile() { 52 | if (m_jstWriter) 53 | m_jstWriter->finishWriting(); 54 | } 55 | 56 | int outputAllInstances(const string& m_strOutFile, const vector& vecInstances) { 57 | 58 | initOutputFile(m_strOutFile.c_str()); 59 | static int instNum; 60 | instNum = vecInstances.size(); 61 | for (int idx = 0; idx < instNum; idx++) { 62 | if (0 != m_jstWriter->write(&(vecInstances[idx]))) 63 | return -1; 64 | } 65 | 66 | uninitOutputFile(); 67 | return 0; 68 | } 69 | 70 | int outputSingleInstance(const Instance& inst) { 71 | 72 | if (0 != m_jstWriter->write(&inst)) 73 | return -1; 74 | return 0; 75 | } 76 | 77 | Instance* nextInstance() { 78 | Instance *pInstance = m_jstReader->getNext(); 79 | if (!pInstance || pInstance->label.empty()) 80 | return 0; 81 | 82 | return pInstance; 83 | } 84 | 85 | void readInstances(const string& m_strInFile, vector& vecInstances, int maxInstance = -1) { 86 | vecInstances.clear(); 87 | initInputFile(m_strInFile.c_str()); 88 | 89 | Instance *pInstance = nextInstance(); 90 | int numInstance = 0; 91 | 92 | while (pInstance) { 93 | 94 | if (pInstance->wordnum() < max_sentense_size) { 95 | Instance trainInstance; 96 | trainInstance.copyValuesFrom(*pInstance); 97 | vecInstances.push_back(trainInstance); 98 | numInstance++; 99 | 100 | if (numInstance == maxInstance) { 101 | break; 102 | } 103 | } 104 | 105 | pInstance = nextInstance(); 106 | 107 | } 108 | 109 | uninitInputFile(); 110 | 111 | cout << endl; 112 | cout << "instance num: " << numInstance << endl; 113 | } 114 | 115 | public: 116 | int max_sentense_size; 117 | 118 | protected: 119 | Reader *m_jstReader; 120 | Writer *m_jstWriter; 121 | 122 | }; 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /src/basic/Reader.h: -------------------------------------------------------------------------------- 1 | #ifndef _JST_READER_ 2 | #define _JST_READER_ 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | #include "Instance.h" 11 | 12 | class Reader 13 | { 14 | public: 15 | Reader() 16 | { 17 | } 18 | 19 | virtual ~Reader() 20 | { 21 | if (m_inf.is_open()) m_inf.close(); 22 | } 23 | int startReading(const char *filename) { 24 | if (m_inf.is_open()) { 25 | m_inf.close(); 26 | m_inf.clear(); 27 | } 28 | m_inf.open(filename); 29 | 30 | if (!m_inf.is_open()) { 31 | cout << "Reader::startReading() open file err: " << filename << endl; 32 | return -1; 33 | } 34 | 35 | return 0; 36 | } 37 | 38 | void finishReading() { 39 | if (m_inf.is_open()) { 40 | m_inf.close(); 41 | m_inf.clear(); 42 | } 43 | } 44 | virtual Instance *getNext() = 0; 45 | protected: 46 | ifstream m_inf; 47 | 48 | int m_numInstance; 49 | 50 | Instance m_instance; 51 | }; 52 | 53 | #endif 54 | 55 | -------------------------------------------------------------------------------- /src/basic/Utf.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) University of Oxford 2010 2 | /**************************************************************** 3 | * * 4 | * utf.h - the utilities for unicode characters. * 5 | * * 6 | * Author: Yue Zhang * 7 | * * 8 | * Computing Laboratory, Oxford. 2007.6 * 9 | * * 10 | ****************************************************************/ 11 | 12 | #ifndef _UTILITY_UTF_H 13 | #define _UTILITY_UTF_H 14 | 15 | #include 16 | #include 17 | 18 | /*=============================================================== 19 | * 20 | * Unicode std::string and character utils 21 | * 22 | *==============================================================*/ 23 | 24 | /*--------------------------------------------------------------- 25 | * 26 | * getUTF8StringLength - get how many characters are in a UTF8 std::string 27 | * 28 | *--------------------------------------------------------------*/ 29 | 30 | inline 31 | unsigned long int getUTF8StringLength(const std::string &s) { 32 | unsigned long int retval = 0; 33 | unsigned long int idx = 0; 34 | while (idx < s.length()) { 35 | if ((s[idx] & 0x80) == 0) { 36 | ++idx; 37 | ++retval; 38 | } else if ((s[idx] & 0xE0) == 0xC0) { 39 | idx += 2; 40 | ++retval; 41 | } else if ((s[idx] & 0xF0) == 0xE0) { 42 | idx += 3; 43 | ++retval; 44 | } else { 45 | //std::cerr << "Warning: " << "in utf.h getUTF8StringLength: std::string '" << s << "' not encoded in unicode utf-8" << std::endl; 46 | if(s.length()- idx < 4) 47 | { 48 | return retval+1; 49 | } 50 | else 51 | { 52 | idx += 4; 53 | ++retval; 54 | } 55 | } 56 | } 57 | if (idx != s.length()) { 58 | //std::cerr << "Warning: " << "in utf.h getUTF8StringLength: std::string '" << s << "' not encoded in unicode utf-8" << std::endl; 59 | return retval+1; 60 | } 61 | return retval; 62 | } 63 | 64 | /*---------------------------------------------------------------- 65 | * 66 | * getCharactersFromUTF8String - get the characters from 67 | * utf std::string. The characters from 68 | * this std::string are appended 69 | * to a given sentence. 70 | * 71 | *----------------------------------------------------------------*/ 72 | 73 | inline int getCharactersFromUTF8String(const std::string &s, std::vector& sentence) { 74 | sentence.clear(); 75 | unsigned long int idx = 0; 76 | unsigned long int len = 0; 77 | while (idx < s.length()) { 78 | if ((s[idx] & 0x80) == 0) { 79 | sentence.push_back(s.substr(idx, 1)); 80 | ++len; 81 | ++idx; 82 | } else if ((s[idx] & 0xE0) == 0xC0) { 83 | sentence.push_back(s.substr(idx, 2)); 84 | ++len; 85 | idx += 2; 86 | } else if ((s[idx] & 0xF0) == 0xE0) { 87 | sentence.push_back(s.substr(idx, 3)); 88 | ++len; 89 | idx += 3; 90 | } else { 91 | if(s.length()- idx < 4) 92 | { 93 | sentence.push_back(s.substr(idx)); 94 | ++len; 95 | idx = s.length(); 96 | } 97 | else 98 | { 99 | sentence.push_back(s.substr(idx, 4)); 100 | ++len; 101 | idx += 4; 102 | } 103 | } 104 | } 105 | if (idx != s.length()) { 106 | //std::cerr << "Warning: " << "in utf.h getCharactersFromUTF8String: std::string '" << s << "' not encoded in utf-8" << std::endl; 107 | return len+1; 108 | } 109 | 110 | return len; 111 | } 112 | 113 | /*---------------------------------------------------------------- 114 | * 115 | * getFirstCharFromUTF8String - get the first character from 116 | * utf std::string. 117 | * 118 | *----------------------------------------------------------------*/ 119 | 120 | inline std::string getFirstCharFromUTF8String(const std::string &s) { 121 | if (s == "") 122 | return ""; 123 | if ((s[0] & 0x80) == 0) { 124 | return s.substr(0, 1); 125 | } else if ((s[0] & 0xE0) == 0xC0) { 126 | assert(s.length() >= 2); 127 | return s.substr(0, 2); 128 | } else if ((s[0] & 0xF0) == 0xE0) { 129 | assert(s.length() >= 3); 130 | return s.substr(0, 3); 131 | } else { 132 | //std::cerr << "Warning: " << "in utf.h getFirstCharFromUTF8String: std::string '" << s << "' not encoded in unicode utf-8" << std::endl; 133 | if(s.length() < 4) 134 | { 135 | return s; 136 | } 137 | else 138 | { 139 | return s.substr(0, 4); 140 | } 141 | } 142 | } 143 | 144 | /*---------------------------------------------------------------- 145 | * 146 | * getLastCharFromUTF8String - get the last character from 147 | * utf std::string. 148 | * 149 | *----------------------------------------------------------------*/ 150 | 151 | inline std::string getLastCharFromUTF8String(const std::string &s) { 152 | if (s == "") 153 | return ""; 154 | unsigned long int idx = 0; 155 | std::string retval; 156 | while (idx < s.length()) { 157 | if ((s[idx] & 0x80) == 0) { 158 | retval = s.substr(idx, 1); 159 | ++idx; 160 | } else if ((s[idx] & 0xE0) == 0xC0) { 161 | retval = s.substr(idx, 2); 162 | idx += 2; 163 | } else if ((s[idx] & 0xF0) == 0xE0) { 164 | retval = s.substr(idx, 3); 165 | idx += 3; 166 | } else { 167 | //std::cerr << "Warning: " << "in utf.h getLastCharFromUTF8String: std::string '" << s << "' not encoded in unicode utf-8" << std::endl; 168 | if(s.length()- idx < 4) 169 | { 170 | return s; 171 | } 172 | else 173 | { 174 | retval = s.substr(idx, 4); 175 | idx += 4; 176 | } 177 | return s; 178 | } 179 | } 180 | if (idx != s.length()) { 181 | //std::cerr << "Warning: " << "in utf.h getLastCharFromUTF8String: std::string '" << s << "' not encoded in unicode utf-8" << std::endl; 182 | return s; 183 | } 184 | return retval; 185 | } 186 | 187 | /*---------------------------------------------------------------- 188 | * 189 | * isOneUTF8Character - whether a std::string is one utf8 character 190 | * 191 | *----------------------------------------------------------------*/ 192 | 193 | inline bool isOneUTF8Character(const std::string &s) { 194 | if (s == "") 195 | return false; // is no utf character 196 | if (s.size() > 3) 197 | return false; // is more than one utf character 198 | if ((s[0] & 0x80) == 0) { 199 | return s.size() == 1; 200 | } else if ((s[0] & 0xE0) == 0xC0) { 201 | return s.size() == 2; 202 | } else if ((s[0] & 0xF0) == 0xE0) { 203 | return s.size() == 3; 204 | } 205 | } 206 | 207 | inline std::string getUTF8CharType(const std::string &s) { 208 | std::string digit = "0123456789"; 209 | std::string eng = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 210 | std::string defaultType = "default"; 211 | if (s.length() <= 2) { 212 | if (digit.find(s) != -1) { 213 | defaultType = "digit"; 214 | } else if (eng.find(s) != -1) { 215 | defaultType = "eng"; 216 | } 217 | } else { 218 | defaultType = "unitype"; 219 | } 220 | 221 | return defaultType; 222 | } 223 | 224 | inline std::string wordtype(const std::string &s) { 225 | std::vector chars; 226 | getCharactersFromUTF8String(s, chars); 227 | std::string type = ""; 228 | for (int i = 0; i < chars.size(); i++) { 229 | if (chars[i].length() > 1) { 230 | type = type + "u"; 231 | } else if (isdigit(chars[i][0])) { 232 | type = type + "d"; 233 | } else if (isalpha(chars[i][0])) { 234 | if (islower(chars[i][0])) 235 | type = type + "e"; 236 | else 237 | type = type + "E"; 238 | } 239 | } 240 | return type; 241 | } 242 | 243 | inline std::string normalize_to_lowerwithdigit(const std::string& s) 244 | { 245 | std::vector chars; 246 | getCharactersFromUTF8String(s, chars); 247 | std::string lowcase = ""; 248 | for (int i = 0; i < chars.size(); i++) { 249 | if (chars[i].length() > 1) { 250 | lowcase = lowcase + chars[i]; 251 | } else if (isdigit(chars[i][0])) { 252 | lowcase = lowcase + "0"; 253 | } else if (isalpha(chars[i][0])) { 254 | if (islower(chars[i][0])) 255 | { 256 | lowcase = lowcase + chars[i][0]; 257 | } 258 | else 259 | { 260 | char temp = chars[i][0] + 'a'-'A'; 261 | lowcase = lowcase + temp; 262 | } 263 | } 264 | else 265 | { 266 | lowcase = lowcase + chars[i]; 267 | } 268 | } 269 | return lowcase; 270 | } 271 | 272 | 273 | /*---------------------------------------------------------------- 274 | * 275 | * getCharactersFromUTF8String - get the characters from 276 | * utf std::string. The characters from 277 | * this std::string are appended 278 | * to a given sentence. 279 | * 280 | *----------------------------------------------------------------*/ 281 | 282 | inline int getCharactersFromString(const std::string &s, std::vector& sentence) { 283 | sentence.clear(); 284 | unsigned long int idx = 0; 285 | unsigned long int len = 0; 286 | while (idx < s.length()) { 287 | if ((s[idx] & 0x80) == 0) { 288 | sentence.push_back(s.substr(idx, 1)); 289 | ++len; 290 | ++idx; 291 | } else if ((s[idx] & 0xE0) == 0xC0) { 292 | sentence.push_back(s.substr(idx, 2)); 293 | ++len; 294 | idx += 2; 295 | } else if ((s[idx] & 0xF0) == 0xE0) { 296 | sentence.push_back(s.substr(idx, 3)); 297 | ++len; 298 | idx += 3; 299 | } else { 300 | //std::cerr << "Warning: " << "in utf.h getCharactersFromUTF8String: std::string '" << s << "' not encoded in unicode utf-8" << std::endl; 301 | if(s.length()- idx < 4) 302 | { 303 | sentence.push_back(s.substr(idx)); 304 | ++len; 305 | idx = s.length(); 306 | } 307 | else 308 | { 309 | sentence.push_back(s.substr(idx, 4)); 310 | ++len; 311 | idx += 4; 312 | } 313 | } 314 | } 315 | 316 | if (idx != s.length()) { 317 | //std::cerr << "Warning: " << "in utf.h getCharactersFromUTF8String: std::string '" << s << "' not encoded in utf-8" << std::endl; 318 | return len+1; 319 | } 320 | 321 | return len; 322 | } 323 | 324 | #endif 325 | -------------------------------------------------------------------------------- /src/basic/Writer.h: -------------------------------------------------------------------------------- 1 | #ifndef _JST_WRITER_ 2 | #define _JST_WRITER_ 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | #include "Instance.h" 11 | 12 | class Writer 13 | { 14 | public: 15 | Writer() 16 | { 17 | } 18 | virtual ~Writer() 19 | { 20 | if (m_outf.is_open()) m_outf.close(); 21 | } 22 | 23 | inline int startWriting(const char *filename) { 24 | m_outf.open(filename); 25 | if (!m_outf) { 26 | cout << "Writerr::startWriting() open file err: " << filename << endl; 27 | return -1; 28 | } 29 | return 0; 30 | } 31 | 32 | inline void finishWriting() { 33 | m_outf.close(); 34 | } 35 | 36 | virtual int write(const Instance *pInstance) = 0; 37 | protected: 38 | ofstream m_outf; 39 | }; 40 | 41 | #endif 42 | 43 | --------------------------------------------------------------------------------