├── .gitignore
├── CMakeLists.txt
├── README.md
├── command.sh
├── example
├── sarcasm1v1.dev1.nn
├── sarcasm1v1.test1.nn
└── sarcasm1v1.train1.nn
└── src
├── CMakeLists.txt
├── NNWordLocal
├── CMakeLists.txt
├── NNWordLocal.cpp
├── NNWordLocal.h
└── model
│ ├── ComputionGraph.h
│ ├── Driver.h
│ ├── HyperParams.h
│ └── ModelParams.h
├── NNWordLocalContext
├── CMakeLists.txt
├── NNWordLocalContext.cpp
├── NNWordLocalContext.h
└── model
│ ├── ComputionGraph.h
│ ├── Driver.h
│ ├── HyperParams.h
│ └── ModelParams.h
├── NNWordLocalContextSeparate
├── CMakeLists.txt
├── NNWordLocalContextSeparate.cpp
├── NNWordLocalContextSeparate.h
└── model
│ ├── ComputionGraph.h
│ ├── Driver.h
│ ├── HyperParams.h
│ └── ModelParams.h
├── SparseLocalContext
├── CMakeLists.txt
├── SparseLocalContext.cpp
├── SparseLocalContext.h
└── model
│ ├── ComputionGraph.h
│ ├── Driver.h
│ ├── HyperParams.h
│ └── ModelParams.h
└── basic
├── Example.h
├── Instance.h
├── InstanceReader.h
├── InstanceWriter.h
├── Options.h
├── Pipe.h
├── Reader.h
├── Utf.h
└── Writer.h
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | build
3 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(SarcasmDetection)
2 | cmake_minimum_required(VERSION 2.7 FATAL_ERROR)
3 |
4 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
5 |
6 | set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
7 |
8 | if(CMAKE_BUILD_TYPE MATCHES Debug)
9 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -msse3 -funroll-loops -std=c++11 -O0 -pg" )
10 | else()
11 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -w -std=c++11 -Ofast -march=native" )
12 | endif()
13 | add_definitions( -DUSE_FLOAT )
14 |
15 | include_directories(${EIGEN3_INCLUDE_DIR})
16 | include_directories(${N3L_INCLUDE_DIR})
17 |
18 | add_subdirectory(src)
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #SarcasmDetection
2 | ### This is the code for the paper :
3 | Meishan Zhang, Yue Zhang, Guohong Fu. [Tweet Sarcasm Detection Using Deep Neural Network.](http://zhangmeishan.github.io/coling2016-sarcasm.pdf) In Proceedings of the COLING 2016. 2016.12.
4 | ##HOW TO COMPILE THIS PROJECT IN WINDOWS
5 | * Step 0: Open cmd, and change directory to project directory. Use this command `cd /your/project/path/SarcasmDetection`.
6 | * Step 1: Create a new directory in SarcasmDetection. For example, use this command `mkdir build`
7 | * Step 2: Change your directory. Use this command `cd build`.
8 | * Step 3: Build project. Use this command `cmake .. -DEIGEN3_INCLUDE_DIR=/your/eign/path -DN3L_INCLUDE_DIR=/your/LibN3L-2.0/path`.
9 | * Step 4: Then you can double click "SarcasmDetection.sln" to open this project.
10 | * Step 5: Now you can compile this project by Visual Studio.
11 | * Step 6: If you want to run this project.Please open project properties and add this argument.
12 | `-train /your/training/corpus -dev /your/development/corpus -test /your/test/corpus -option /your/option/file -l`
13 |
14 | ##NOTE
15 | Make sure you have eigen ,LibN3L-2.0, cmake and visual studio 2013 version (or newer).
16 | * Eigen:http://eigen.tuxfamily.org/index.php?title=Main_Page
17 | * LibN3L-2.0:https://github.com/zhangmeishan/LibN3L-2.0
18 | * cmake:https://cmake.org/
--------------------------------------------------------------------------------
/command.sh:
--------------------------------------------------------------------------------
1 | cmake .. -DEIGEN3_INCLUDE_DIR=/c/eigen/ -DN3L_INCLUDE_DIR=/d/workspace/LibN3L-2.0/
2 | cmake .. -DEIGEN3_INCLUDE_DIR=~/workspace/eigen/ -DN3L_INCLUDE_DIR=~/workspace/LibN3L-2.0/
3 |
4 | #sparse
5 | -l -train D:\data\sarcasm\sarcasm1v1.train1.nn -dev D:\data\sarcasm\sarcasm1v1.dev1.nn -test D:\data\sarcasm\sarcasm1v1.test1.nn -option D:\data\sarcasm\option.sparse
6 | #NNWord
7 | -l -train D:\data\sarcasm\sarcasm1v1.train1.nn -dev D:\data\sarcasm\sarcasm1v1.dev1.nn -test D:\data\sarcasm\sarcasm1v1.test1.nn -option D:\data\sarcasm\option.word
8 |
9 | #sparse
10 | ./SparseDetector -l -train ../newcorpus/1v1/sarcasm1v1.train1.nn -dev ../newcorpus/1v1/sarcasm1v1.dev1.nn -test ../newcorpus/1v1/sarcasm1v1.test1.nn -option option.sparse >sparse.log &
11 | ./NNWordLocal -l -train ../newcorpus/1v1/sarcasm1v1.train1.nn -dev ../newcorpus/1v1/sarcasm1v1.dev1.nn -test ../newcorpus/1v1/sarcasm1v1.test1.nn -option option.word >word.log &
--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | include_directories(
3 | basic
4 | )
5 |
6 |
7 | add_subdirectory(NNWordLocal)
8 | add_subdirectory(NNWordLocalContext)
9 | add_subdirectory(NNWordLocalContextSeparate)
10 | add_subdirectory(SparseLocalContext)
11 |
--------------------------------------------------------------------------------
/src/NNWordLocal/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | include_directories(
3 | model
4 | )
5 |
6 | add_executable(NNWordLocal NNWordLocal.cpp)
7 |
8 |
--------------------------------------------------------------------------------
/src/NNWordLocal/NNWordLocal.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.cpp
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #include "NNWordLocal.h"
9 |
10 | #include "Argument_helper.h"
11 |
12 | Detector::Detector(size_t memsize) : m_driver(memsize){
13 | // TODO Auto-generated constructor stub
14 | srand(0);
15 | }
16 |
17 | Detector::~Detector() {
18 | // TODO Auto-generated destructor stub
19 | }
20 |
21 | int Detector::createAlphabet(const vector& vecInsts) {
22 | if (vecInsts.size() == 0) {
23 | std::cout << "training set empty" << std::endl;
24 | return -1;
25 | }
26 | cout << "Creating Alphabet..." << endl;
27 |
28 | int numInstance;
29 |
30 | m_driver._modelparams.labelAlpha.clear();
31 | // label alphabet and word statistics
32 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
33 | const Instance *pInstance = &vecInsts[numInstance];
34 |
35 | const vector > &words = pInstance->words;
36 | const string &label = pInstance->label;
37 |
38 | int labelId = m_driver._modelparams.labelAlpha.from_string(label);
39 |
40 | int seq_size = pInstance->seqsize();
41 | for (int i = 0; i < seq_size; i++) {
42 | int wordLength = words[i].size();
43 | for (int j = 0; j < wordLength; ++j) {
44 | string curword = normalize_to_lowerwithdigit(words[i][j]);
45 | m_word_stats[curword]++;
46 | }
47 |
48 | }
49 |
50 | if ((numInstance + 1) % m_options.verboseIter == 0) {
51 | cout << numInstance + 1 << " ";
52 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
53 | cout << std::endl;
54 | cout.flush();
55 | }
56 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
57 | break;
58 | }
59 |
60 | cout << numInstance << " " << endl;
61 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl;
62 |
63 |
64 | m_driver._modelparams.labelAlpha.set_fixed_flag(true);
65 |
66 | if (m_options.linearfeatCat > 0) {
67 | cout << "Extracting linear features..." << endl;
68 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
69 | const Instance *pInstance = &vecInsts[numInstance];
70 | vector linearfeat;
71 | extractLinearFeatures(linearfeat, pInstance);
72 | for (int i = 0; i < linearfeat.size(); i++)
73 | m_feat_stats[linearfeat[i]] ++;
74 | }
75 | m_feat_stats[unknownkey] = m_options.featCutOff + 1;
76 | cout << "Total feature num: " << m_feat_stats.size() << endl;
77 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff);
78 | cout << "Remina feature num:" << m_driver._modelparams.featAlpha.size() << endl;
79 | m_driver._modelparams.featAlpha.set_fixed_flag(true);
80 | }
81 | return 0;
82 | }
83 |
84 | void Detector::addTestAlphabet(const vector& vecInsts)
85 | {
86 | cout << "Adding other word Alphabet..." << endl;
87 | int numInstance;
88 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
89 | const Instance *pInstance = &vecInsts[numInstance];
90 |
91 | const vector > &words = pInstance->words;
92 | int seq_size = pInstance->seqsize();
93 | for (int i = 0; i < seq_size; ++i) {
94 | for (int j = 0; j < words[i].size(); j++) {
95 | string curword = normalize_to_lowerwithdigit(words[i][j]);
96 | if (!m_options.wordEmbFineTune)
97 | m_word_stats[curword]++;
98 | }
99 | }
100 |
101 | if ((numInstance + 1) % m_options.verboseIter == 0) {
102 | cout << numInstance + 1 << " ";
103 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
104 | cout << std::endl;
105 | cout.flush();
106 | }
107 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
108 | break;
109 | }
110 |
111 | cout << numInstance << " " << endl;
112 | }
113 |
114 |
115 |
116 | void Detector::extractDenseFeatures(vector& features, const Instance * pInstance)
117 | {
118 | features.clear();
119 | const vector >& words = pInstance->words;
120 | int seq_size = pInstance->seqsize();
121 | assert(seq_size < 3);
122 |
123 | //Local and Context sentence dense feature
124 | for (int sentidx = 0; sentidx < seq_size; sentidx++) {
125 | Feature feat;
126 | const vector& curWords = words[sentidx];
127 | int wordnumber = curWords.size();
128 | for (int i = 0; i < wordnumber; i++)
129 | feat.words.push_back(normalize_to_lowerwithdigit(curWords[i]));
130 |
131 | features.push_back(feat);
132 | }
133 | }
134 |
135 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) {
136 | feat.clear();
137 |
138 | const vector >& words = pInstance->words;
139 | int seq_size = pInstance->seqsize();
140 | assert(seq_size < 3);
141 | //Current sent linear feature
142 | const vector& lastWords = words[seq_size - 1];
143 | int wordnumber = lastWords.size();
144 | string strfeat = "", curWord = "", preWord = "", pre2Word = "";
145 | for (int i = 0; i < wordnumber; i++) {
146 | curWord = normalize_to_lowerwithdigit(lastWords[i]);
147 | strfeat = "F1U=" + curWord;
148 | feat.push_back(strfeat);
149 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey;
150 | strfeat = "F2B=" + preWord + seperateKey + curWord;
151 | feat.push_back(strfeat);
152 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey;
153 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord;
154 | feat.push_back(strfeat);
155 | }
156 |
157 | //History feature
158 | if (m_options.linearfeatCat > 1 && seq_size == 2) {
159 | const vector& historyWords = words[seq_size - 2];
160 | wordnumber = historyWords.size();
161 | for (int i = 0; i < wordnumber; i++) {
162 | strfeat = "F4U=" + historyWords[i];
163 | feat.push_back(strfeat);
164 | }
165 | }
166 | }
167 |
168 | void Detector::convert2Example(const Instance* pInstance, Example& exam) {
169 | exam.clear();
170 |
171 | const string &instlabel = pInstance->label;
172 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha;
173 |
174 | int labelnum = labelAlpha.size();
175 | for (int i = 0; i < labelnum; i++) {
176 | string str = labelAlpha.from_id(i);
177 | if (instlabel.compare(str) == 0)
178 | exam.m_labels.push_back(1.0);
179 | else
180 | exam.m_labels.push_back(0.0);
181 | }
182 |
183 | //dense feature
184 | extractDenseFeatures(exam.m_densefeatures, pInstance);
185 |
186 | //linear feature
187 | if (m_options.linearfeatCat > 0)
188 | extractLinearFeatures(exam.m_linearfeatures, pInstance);
189 |
190 | }
191 |
192 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) {
193 | int numInstance;
194 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
195 | const Instance *pInstance = &vecInsts[numInstance];
196 | Example curExam;
197 | convert2Example(pInstance, curExam);
198 | vecExams.push_back(curExam);
199 |
200 | if ((numInstance + 1) % m_options.verboseIter == 0) {
201 | cout << numInstance + 1 << " ";
202 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
203 | cout << std::endl;
204 | cout.flush();
205 | }
206 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
207 | break;
208 | }
209 |
210 | cout << numInstance << " " << endl;
211 | }
212 |
213 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) {
214 | if (optionFile != "")
215 | m_options.load(optionFile);
216 | m_options.showOptions();
217 | vector trainInsts, devInsts, testInsts;
218 | static vector decodeInstResults;
219 | static Instance curDecodeInst;
220 | bool bCurIterBetter = false;
221 |
222 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
223 | if (devFile != "")
224 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
225 | if (testFile != "")
226 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);
227 |
228 | std::cout << "Training example number: " << trainInsts.size() << std::endl;
229 | std::cout << "Dev example number: " << trainInsts.size() << std::endl;
230 | std::cout << "Test example number: " << trainInsts.size() << std::endl;
231 |
232 | createAlphabet(trainInsts);
233 | addTestAlphabet(devInsts);
234 | addTestAlphabet(testInsts);
235 |
236 | m_word_stats[unknownkey] = m_options.wordCutOff + 1;
237 | cout << "Total word num: " << m_word_stats.size() << endl;
238 | m_driver._modelparams.wordAlpha.initial(m_word_stats, m_options.wordCutOff);
239 | m_driver._modelparams.wordAlpha.set_fixed_flag(true);
240 | cout << "Remain word num:" << m_driver._modelparams.wordAlpha.size() << endl;
241 |
242 | vector trainExamples, devExamples, testExamples;
243 |
244 | std::cout << "Instance convert to example... " << std::endl;
245 | initialExamples(trainInsts, trainExamples);
246 | initialExamples(devInsts, devExamples);
247 | initialExamples(testInsts, testExamples);
248 |
249 | if (m_options.wordFile != "") {
250 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune);
251 | }
252 | else{
253 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune);
254 | }
255 |
256 | m_driver._hyperparams.setRequired(m_options);
257 | m_driver.initial();
258 |
259 |
260 |
261 | dtype bestDIS = 0;
262 |
263 | int inputSize = trainExamples.size();
264 |
265 | int batchBlock = inputSize / m_options.batchSize;
266 | if (inputSize % m_options.batchSize != 0)
267 | batchBlock++;
268 |
269 | srand(0);
270 | std::vector indexes;
271 | for (int i = 0; i < inputSize; ++i)
272 | indexes.push_back(i);
273 |
274 | static Metric eval, metric_dev, metric_test;
275 | static vector subExamples;
276 | int devNum = devExamples.size(), testNum = testExamples.size();
277 | for (int iter = 0; iter < m_options.maxIter; ++iter) {
278 | std::cout << "##### Iteration " << iter << std::endl;
279 |
280 | random_shuffle(indexes.begin(), indexes.end());
281 | eval.reset();
282 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
283 | subExamples.clear();
284 | int start_pos = updateIter * m_options.batchSize;
285 | int end_pos = (updateIter + 1) * m_options.batchSize;
286 | if (end_pos > inputSize)
287 | end_pos = inputSize;
288 |
289 | for (int idy = start_pos; idy < end_pos; idy++) {
290 | subExamples.push_back(trainExamples[indexes[idy]]);
291 | }
292 |
293 | int curUpdateIter = iter * batchBlock + updateIter;
294 | dtype cost = m_driver.train(subExamples, curUpdateIter);
295 |
296 | eval.overall_label_count += m_driver._eval.overall_label_count;
297 | eval.correct_label_count += m_driver._eval.correct_label_count;
298 |
299 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) {
300 | //m_driver.checkgrad(subExamples, curUpdateIter + 1);
301 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl;
302 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl;
303 | }
304 | m_driver.updateModel();
305 |
306 | }
307 |
308 | if (devNum > 0) {
309 | bCurIterBetter = false;
310 | if (!m_options.outBest.empty())
311 | decodeInstResults.clear();
312 | metric_dev.reset();
313 | for (int idx = 0; idx < devExamples.size(); idx++) {
314 | string result_label;
315 | predict(devExamples[idx].m_densefeatures, result_label);
316 |
317 | devInsts[idx].Evaluate(result_label, metric_dev);
318 |
319 | if (!m_options.outBest.empty()) {
320 | curDecodeInst.copyValuesFrom(devInsts[idx]);
321 | curDecodeInst.assignLabel(result_label);
322 | decodeInstResults.push_back(curDecodeInst);
323 | }
324 | }
325 |
326 | std::cout << "dev:" << std::endl;
327 | metric_dev.print();
328 |
329 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) {
330 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults);
331 | bCurIterBetter = true;
332 | }
333 |
334 | if (testNum > 0) {
335 | if (!m_options.outBest.empty())
336 | decodeInstResults.clear();
337 | metric_test.reset();
338 | for (int idx = 0; idx < testExamples.size(); idx++) {
339 | string result_label;
340 | predict(testExamples[idx].m_densefeatures, result_label);
341 |
342 | testInsts[idx].Evaluate(result_label, metric_test);
343 |
344 | if (bCurIterBetter && !m_options.outBest.empty()) {
345 | curDecodeInst.copyValuesFrom(testInsts[idx]);
346 | curDecodeInst.assignLabel(result_label);
347 | decodeInstResults.push_back(curDecodeInst);
348 | }
349 | }
350 | std::cout << "test:" << std::endl;
351 | metric_test.print();
352 |
353 | if (!m_options.outBest.empty() && bCurIterBetter) {
354 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults);
355 | }
356 | }
357 |
358 |
359 |
360 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) {
361 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl;
362 | bestDIS = metric_dev.getAccuracy();
363 | writeModelFile(modelFile);
364 | }
365 |
366 | }
367 | // Clear gradients
368 | }
369 | }
370 |
371 | int Detector::predict(const vector& features, string& output) {
372 | int labelIdx;
373 | m_driver.predict(features, labelIdx);
374 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey);
375 |
376 | if (output == nullkey)
377 | std::cout << "predict error" << std::endl;
378 | return 0;
379 | }
380 |
381 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) {
382 | loadModelFile(modelFile);
383 | vector testInsts;
384 | m_pipe.readInstances(testFile, testInsts);
385 |
386 | vector testExamples;
387 | initialExamples(testInsts, testExamples);
388 |
389 | int testNum = testExamples.size();
390 | vector testInstResults;
391 | Metric metric_test;
392 | metric_test.reset();
393 | for (int idx = 0; idx < testExamples.size(); idx++) {
394 | string result_label;
395 | predict(testExamples[idx].m_densefeatures, result_label);
396 | testInsts[idx].Evaluate(result_label, metric_test);
397 | Instance curResultInst;
398 | curResultInst.copyValuesFrom(testInsts[idx]);
399 | curResultInst.assignLabel(result_label);
400 | testInstResults.push_back(curResultInst);
401 | }
402 | std::cout << "test:" << std::endl;
403 | metric_test.print();
404 |
405 | m_pipe.outputAllInstances(outputFile, testInstResults);
406 |
407 | }
408 |
409 |
410 | void Detector::loadModelFile(const string& inputModelFile) {
411 |
412 | }
413 |
414 | void Detector::writeModelFile(const string& outputModelFile) {
415 |
416 | }
417 |
418 | int main(int argc, char* argv[]) {
419 |
420 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = "";
421 | std::string outputFile = "", wordEmbFile = "";
422 | bool bTrain = false;
423 | dsr::Argument_helper ah;
424 | int memsize = 1;
425 |
426 | ah.new_flag("l", "learn", "train or test", bTrain);
427 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile);
428 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile);
429 | ah.new_named_string("test", "testCorpus", "named_string",
430 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile);
431 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile);
432 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile);
433 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile);
434 | ah.new_named_int("mem", "memsize", "named_int", "memory allocated for tensor nodes", memsize);
435 |
436 | ah.process(argc, argv);
437 |
438 | Detector detector(memsize);
439 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length;
440 | if (bTrain) {
441 | detector.train(trainFile, devFile, testFile, modelFile, optionFile);
442 | }
443 | else {
444 | detector.test(testFile, outputFile, modelFile);
445 | }
446 |
447 | //test(argv);
448 | //ah.write_values(std::cout);
449 | }
450 |
--------------------------------------------------------------------------------
/src/NNWordLocal/NNWordLocal.h:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.h
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #ifndef SRC_SparseDetector_H_
9 | #define SRC_SparseDetector_H_
10 |
11 |
12 | #include "N3L.h"
13 | #include "Driver.h"
14 | #include "Options.h"
15 | #include "Instance.h"
16 | #include "Example.h"
17 | #include "Pipe.h"
18 | #include "Utf.h"
19 |
20 | using namespace nr;
21 | using namespace std;
22 |
23 | class Detector {
24 |
25 |
26 | public:
27 | unordered_map m_feat_stats;
28 | unordered_map m_word_stats;
29 |
30 | public:
31 | Options m_options;
32 |
33 | Pipe m_pipe;
34 |
35 | Driver m_driver;
36 |
37 |
38 | public:
39 | Detector(size_t memsize);
40 | virtual ~Detector();
41 |
42 | public:
43 |
44 | int createAlphabet(const vector& vecTrainInsts);
45 | void addTestAlphabet(const vector& vecInsts);
46 |
47 | void extractDenseFeatures(vector& features, const Instance* pInstance);
48 | void extractLinearFeatures(vector& features, const Instance* pInstance);
49 |
50 | void convert2Example(const Instance* pInstance, Example& exam);
51 | void initialExamples(const vector& vecInsts, vector& vecExams);
52 |
53 | public:
54 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile);
55 | int predict(const vector& features, string& outputs);
56 | void test(const string& testFile, const string& outputFile, const string& modelFile);
57 |
58 | void writeModelFile(const string& outputModelFile);
59 | void loadModelFile(const string& inputModelFile);
60 |
61 | };
62 |
63 | #endif /* SRC_SparseDetector_H_ */
64 |
--------------------------------------------------------------------------------
/src/NNWordLocal/model/ComputionGraph.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ComputionGraph_H_
2 | #define SRC_ComputionGraph_H_
3 |
4 | #include "ModelParams.h"
5 |
6 |
7 | // Each model consists of two parts, building neural graph and defining output losses.
8 | struct ComputionGraph : Graph {
9 | public:
10 | const static int max_sentence_length = 256;
11 |
12 | public:
13 | // node instances
14 | vector word_inputs;
15 | WindowBuilder word_window;
16 |
17 | LSTM1Builder left_lstm;
18 | LSTM1Builder right_lstm;
19 |
20 | vector concat_bilstm;
21 | GatedPoolBuilder gated_pooling;
22 |
23 | UniNode sent_hidden;
24 | LinearNode output;
25 |
26 | public:
27 | ComputionGraph() : Graph() {
28 | }
29 |
30 | ~ComputionGraph() {
31 | clear();
32 | }
33 |
34 | public:
35 | //allocate enough nodes
36 | inline void createNodes(int sent_length) {
37 |
38 | word_inputs.resize(sent_length);
39 | word_window.resize(sent_length);
40 | left_lstm.resize(sent_length);
41 | right_lstm.resize(sent_length);
42 |
43 | concat_bilstm.resize(sent_length);
44 | gated_pooling.resize(sent_length);
45 | }
46 |
47 | inline void clear() {
48 | Graph::clear();
49 | word_inputs.clear();
50 | word_window.clear();
51 | left_lstm.clear();
52 | right_lstm.clear();
53 |
54 | concat_bilstm.clear();
55 | gated_pooling.clear();
56 |
57 | }
58 |
59 | public:
60 | inline void initial(ModelParams& model, HyperParams& opts, AlignedMemoryPool* mem = NULL) {
61 | for (int idx = 0; idx < word_inputs.size(); idx++) {
62 | word_inputs[idx].init(model.words.nDim, opts.dropOut, mem);
63 | word_inputs[idx].setParam(&model.words);
64 | concat_bilstm[idx].init(opts.rnnhiddensize * 2, -1, mem);
65 | }
66 | word_window.init(model.words.nDim, opts.wordcontext, mem);
67 | left_lstm.init(&model.left_lstm_project, opts.dropOut, true, mem);
68 | right_lstm.init(&model.right_lstm_project, opts.dropOut, false, mem);
69 | gated_pooling.init(&model.gatedpool_project, mem);
70 | sent_hidden.init(opts.hiddensize, opts.dropOut, mem);
71 | sent_hidden.setParam(&model.sent_hidden_project);
72 | output.init(opts.labelSize, -1, mem);
73 | output.setParam(&model.olayer_linear);
74 | }
75 |
76 |
77 | public:
78 | // some nodes may behave different during training and decode, for example, dropout
79 | inline void forward(const vector& features, bool bTrain = false) {
80 | //first step: clear value
81 | clearValue(bTrain); // compute is a must step for train, predict and cost computation
82 |
83 |
84 | // second step: build graph
85 | int seqsize = features.size();
86 | //forward
87 | // word-level neural networks
88 | const Feature& feature = features[seqsize - 1];
89 | int wordnum = feature.words.size();
90 | if (wordnum > max_sentence_length)
91 | wordnum = max_sentence_length;
92 | for (int idx = 0; idx < wordnum; idx++) {
93 | //input
94 | word_inputs[idx].forward(this, feature.words[idx]);
95 | }
96 |
97 | //windowlized
98 | word_window.forward(this, getPNodes(word_inputs, wordnum));
99 |
100 | left_lstm.forward(this, getPNodes(word_window._outputs, wordnum));
101 | right_lstm.forward(this, getPNodes(word_window._outputs, wordnum));
102 |
103 | for (int idx = 0; idx < wordnum; idx++) {
104 | //feed-forward
105 | concat_bilstm[idx].forward(this, &(left_lstm._hiddens[idx]), &(right_lstm._hiddens[idx]));
106 | }
107 | gated_pooling.forward(this, getPNodes(concat_bilstm, wordnum));
108 | sent_hidden.forward(this, &gated_pooling._output);
109 | output.forward(this, &sent_hidden);
110 | }
111 |
112 | };
113 |
114 | #endif /* SRC_ComputionGraph_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocal/model/Driver.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Driver.h
3 | *
4 | * Created on: Mar 18, 2015
5 | * Author: mszhang
6 | */
7 |
8 | #ifndef SRC_Driver_H_
9 | #define SRC_Driver_H_
10 |
11 | #include
12 | #include "ComputionGraph.h"
13 |
14 | //A native neural network classfier using only linear features
15 |
16 | class Driver{
17 | public:
18 | Driver(size_t memsize) : aligned_mem(memsize) {
19 | _pcg = NULL;
20 | }
21 |
22 | ~Driver() {
23 | if (_pcg != NULL)
24 | delete _pcg;
25 | _pcg = NULL;
26 | }
27 |
28 | public:
29 | ComputionGraph *_pcg; // build neural graphs
30 | ModelParams _modelparams; // model parameters
31 | HyperParams _hyperparams;
32 |
33 | Metric _eval;
34 | CheckGrad _checkgrad;
35 | ModelUpdate _ada; // model update
36 |
37 | AlignedMemoryPool aligned_mem;
38 |
39 | public:
40 | inline void initial() {
41 | if (!_hyperparams.bValid()){
42 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl;
43 | return;
44 | }
45 | if (!_modelparams.initial(_hyperparams, &aligned_mem)){
46 | std::cout << "model parameter initialization Error, Please check!" << std::endl;
47 | return;
48 | }
49 | _modelparams.exportModelParams(_ada);
50 | _modelparams.exportCheckGradParams(_checkgrad);
51 |
52 | _hyperparams.print();
53 |
54 | _pcg = new ComputionGraph();
55 | _pcg->createNodes(ComputionGraph::max_sentence_length);
56 | _pcg->initial(_modelparams, _hyperparams, &aligned_mem);
57 |
58 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps);
59 | }
60 |
61 |
62 | inline dtype train(const vector& examples, int iter) {
63 | _eval.reset();
64 |
65 | int example_num = examples.size();
66 | dtype cost = 0.0;
67 |
68 | for (int count = 0; count < example_num; count++) {
69 | const Example& example = examples[count];
70 |
71 | //forward
72 | _pcg->forward(example.m_densefeatures, true);
73 |
74 | //loss function
75 | int seq_size = example.m_densefeatures.size();
76 | int wordnum = example.m_densefeatures[seq_size - 1].words.size();
77 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num);
78 |
79 | // backward, which exists only for training
80 | _pcg->backward();
81 | }
82 |
83 | if (_eval.getAccuracy() < 0) {
84 | std::cout << "strange" << std::endl;
85 | }
86 |
87 | return cost;
88 | }
89 |
90 | inline void predict(const vector densefeatures, int& results) {
91 | _pcg->forward(densefeatures);
92 | _modelparams.loss.predict(&_pcg->output, results);
93 | }
94 |
95 | inline dtype cost(const Example& example){
96 | _pcg->forward(example.m_densefeatures); //forward here
97 |
98 | int seq_size = example.m_densefeatures.size();
99 |
100 | dtype cost = 0.0;
101 |
102 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1);
103 |
104 | return cost;
105 | }
106 |
107 | void checkgrad(const vector& examples, int iter){
108 | ostringstream out;
109 | out << "Iteration: " << iter;
110 | _checkgrad.check(this, examples, out.str());
111 | }
112 |
113 | void updateModel() {
114 | _ada.update();
115 | //_ada.update(5.0);
116 | }
117 |
118 | void writeModel();
119 |
120 | void loadModel();
121 |
122 |
123 |
124 | private:
125 | inline void resetEval() {
126 | _eval.reset();
127 | }
128 |
129 |
130 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){
131 | _ada._alpha = adaAlpha;
132 | _ada._eps = adaEps;
133 | _ada._reg = nnRegular;
134 | }
135 |
136 | };
137 |
138 | #endif /* SRC_Driver_H_ */
139 |
--------------------------------------------------------------------------------
/src/NNWordLocal/model/HyperParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_HyperParams_H_
2 | #define SRC_HyperParams_H_
3 |
4 | #include "N3L.h"
5 | #include "Example.h"
6 | #include "Options.h"
7 |
8 | using namespace nr;
9 | using namespace std;
10 |
11 | struct HyperParams{
12 |
13 | // must assign
14 | int wordcontext;
15 | int hiddensize;
16 | int rnnhiddensize;
17 | dtype dropOut;
18 |
19 | // must assign
20 | dtype nnRegular; // for optimization
21 | dtype adaAlpha; // for optimization
22 | dtype adaEps; // for optimization
23 |
24 | //auto generated
25 | int wordwindow;
26 | int wordDim;
27 | int inputsize;
28 | int labelSize;
29 |
30 | public:
31 | HyperParams(){
32 | bAssigned = false;
33 | }
34 |
35 | public:
36 | void setRequired(Options& opt){
37 | wordcontext = opt.wordcontext;
38 | hiddensize = opt.hiddenSize;
39 | rnnhiddensize = opt.rnnHiddenSize;
40 | dropOut = opt.dropProb;
41 |
42 | nnRegular = opt.regParameter;
43 | adaAlpha = opt.adaAlpha;
44 | adaEps = opt.adaEps;
45 |
46 | bAssigned = true;
47 | }
48 |
49 | void clear(){
50 | bAssigned = false;
51 | }
52 |
53 | bool bValid(){
54 | return bAssigned;
55 | }
56 |
57 |
58 | public:
59 |
60 | void print(){
61 |
62 | }
63 |
64 | private:
65 | bool bAssigned;
66 | };
67 |
68 |
69 | #endif /* SRC_HyperParams_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocal/model/ModelParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ModelParams_H_
2 | #define SRC_ModelParams_H_
3 | #include "HyperParams.h"
4 |
5 | // Each model consists of two parts, building neural graph and defining output losses.
6 | class ModelParams{
7 |
8 | public:
9 | Alphabet wordAlpha; // should be initialized outside
10 | Alphabet featAlpha; //should be intialized outside
11 | Alphabet labelAlpha; // should be initialized outside
12 | public:
13 | LookupTable words; // should be initialized outside
14 | LSTM1Params left_lstm_project; //left lstm
15 | LSTM1Params right_lstm_project; //right lstm
16 | GatedPoolParam gatedpool_project;
17 | UniParams sent_hidden_project;
18 | UniParams olayer_linear; // output
19 | public:
20 | SoftMaxLoss loss;
21 |
22 |
23 | public:
24 | bool initial(HyperParams& opts , AlignedMemoryPool *mem = NULL){
25 |
26 | // some model parameters should be initialized outside
27 | if (words.nVSize <= 0 || labelAlpha.size() <= 0){
28 | return false;
29 | }
30 | opts.wordDim = words.nDim;
31 | opts.wordwindow = 2 * opts.wordcontext + 1;
32 | opts.inputsize = opts.wordwindow * opts.wordDim;
33 |
34 | left_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem);
35 | right_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem);
36 | gatedpool_project.initial(opts.rnnhiddensize *2, opts.rnnhiddensize * 2,mem);
37 | sent_hidden_project.initial(opts.hiddensize, opts.rnnhiddensize * 2, mem);
38 |
39 | opts.labelSize = labelAlpha.size();
40 | olayer_linear.initial(opts.labelSize, opts.hiddensize, false, mem);
41 |
42 | return true;
43 | }
44 |
45 |
46 | void exportModelParams(ModelUpdate& ada){
47 | words.exportAdaParams(ada);
48 | left_lstm_project.exportAdaParams(ada);
49 | right_lstm_project.exportAdaParams(ada);
50 | gatedpool_project.exportAdaParams(ada);
51 | sent_hidden_project.exportAdaParams(ada);
52 | olayer_linear.exportAdaParams(ada);
53 | }
54 |
55 |
56 | void exportCheckGradParams(CheckGrad& checkgrad){
57 | checkgrad.add(&(words.E), "_words.E");
58 | checkgrad.add(&(left_lstm_project.output.W1), "left_lstm_project.output.W1");
59 | checkgrad.add(&(gatedpool_project._uni_gate_param.W), "gatedpool_project._uni_gate_param.W");
60 | checkgrad.add(&(gatedpool_project._uni_gate_param.b), "gatedpool_project._uni_gate_param.b");
61 | checkgrad.add(&(sent_hidden_project.W), "sent_hiden_project.W");
62 | checkgrad.add(&(sent_hidden_project.b), "sent_hiden_project.b");
63 | checkgrad.add(&(olayer_linear.W), "olayer_linear.W");
64 | }
65 |
66 | // will add it later
67 | void saveModel(){
68 |
69 | }
70 |
71 | void loadModel(const string& inFile){
72 |
73 | }
74 |
75 | };
76 |
77 | #endif /* SRC_ModelParams_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocalContext/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | include_directories(
3 | model
4 | )
5 |
6 | add_executable(NNWordLocalContext NNWordLocalContext.cpp)
7 |
8 |
--------------------------------------------------------------------------------
/src/NNWordLocalContext/NNWordLocalContext.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.cpp
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #include "NNWordLocalContext.h"
9 |
10 | #include "Argument_helper.h"
11 |
12 | Detector::Detector(size_t memsize) : m_driver(memsize){
13 | // TODO Auto-generated constructor stub
14 | srand(0);
15 | }
16 |
17 | Detector::~Detector() {
18 | // TODO Auto-generated destructor stub
19 | }
20 |
21 | int Detector::createAlphabet(const vector& vecInsts) {
22 | if (vecInsts.size() == 0) {
23 | std::cout << "training set empty" << std::endl;
24 | return -1;
25 | }
26 | cout << "Creating Alphabet..." << endl;
27 |
28 | m_maxseq_size = -1;
29 | int numInstance;
30 |
31 | m_driver._modelparams.labelAlpha.clear();
32 | // label alphabet and word statistics
33 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
34 | const Instance *pInstance = &vecInsts[numInstance];
35 |
36 | const vector > &words = pInstance->words;
37 | const string &label = pInstance->label;
38 |
39 | int labelId = m_driver._modelparams.labelAlpha.from_string(label);
40 |
41 | int seq_size = pInstance->seqsize();
42 | if (seq_size > m_maxseq_size)
43 | m_maxseq_size = seq_size;
44 | for (int i = 0; i < seq_size; i++) {
45 | int wordLength = words[i].size();
46 | for (int j = 0; j < wordLength; ++j) {
47 | string curword = normalize_to_lowerwithdigit(words[i][j]);
48 | m_word_stats[curword]++;
49 | }
50 |
51 | }
52 |
53 | if ((numInstance + 1) % m_options.verboseIter == 0) {
54 | cout << numInstance + 1 << " ";
55 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
56 | cout << std::endl;
57 | cout.flush();
58 | }
59 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
60 | break;
61 | }
62 |
63 | cout << numInstance << " " << endl;
64 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl;
65 |
66 |
67 | m_driver._modelparams.labelAlpha.set_fixed_flag(true);
68 |
69 | if (m_options.linearfeatCat > 0) {
70 | cout << "Extracting linear features..." << endl;
71 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
72 | const Instance *pInstance = &vecInsts[numInstance];
73 | vector linearfeat;
74 | extractLinearFeatures(linearfeat, pInstance);
75 | for (int i = 0; i < linearfeat.size(); i++)
76 | m_feat_stats[linearfeat[i]] ++;
77 | }
78 | m_feat_stats[unknownkey] = m_options.featCutOff + 1;
79 | cout << "Total feature num: " << m_feat_stats.size() << endl;
80 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff);
81 | cout << "Remina feature num:" << m_driver._modelparams.featAlpha.size() << endl;
82 | m_driver._modelparams.featAlpha.set_fixed_flag(true);
83 | }
84 | return 0;
85 | }
86 |
87 | void Detector::addTestAlphabet(const vector& vecInsts)
88 | {
89 | cout << "Adding other word Alphabet..." << endl;
90 | int numInstance;
91 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
92 | const Instance *pInstance = &vecInsts[numInstance];
93 |
94 | const vector > &words = pInstance->words;
95 | int seq_size = pInstance->seqsize();
96 | for (int i = 0; i < seq_size; ++i) {
97 | for (int j = 0; j < words[i].size(); j++) {
98 | string curword = normalize_to_lowerwithdigit(words[i][j]);
99 | if (!m_options.wordEmbFineTune)
100 | m_word_stats[curword]++;
101 | }
102 | }
103 |
104 | if ((numInstance + 1) % m_options.verboseIter == 0) {
105 | cout << numInstance + 1 << " ";
106 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
107 | cout << std::endl;
108 | cout.flush();
109 | }
110 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
111 | break;
112 | }
113 |
114 | cout << numInstance << " " << endl;
115 | }
116 |
117 |
118 |
119 | void Detector::extractDenseFeatures(vector& features, const Instance * pInstance)
120 | {
121 | features.clear();
122 | const vector >& words = pInstance->words;
123 | int seq_size = pInstance->seqsize();
124 | assert(seq_size < 3);
125 |
126 | //Local and Context sentence dense feature
127 | for (int sentidx = 0; sentidx < seq_size; sentidx++) {
128 | Feature feat;
129 | const vector& curWords = words[sentidx];
130 | int wordnumber = curWords.size();
131 | for (int i = 0; i < wordnumber; i++)
132 | feat.words.push_back(normalize_to_lowerwithdigit(curWords[i]));
133 |
134 | features.push_back(feat);
135 | }
136 | }
137 |
138 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) {
139 | feat.clear();
140 |
141 | const vector >& words = pInstance->words;
142 | int seq_size = pInstance->seqsize();
143 | assert(seq_size < 3);
144 | //Current sent linear feature
145 | const vector& lastWords = words[seq_size - 1];
146 | int wordnumber = lastWords.size();
147 | string strfeat = "", curWord = "", preWord = "", pre2Word = "";
148 | for (int i = 0; i < wordnumber; i++) {
149 | curWord = normalize_to_lowerwithdigit(lastWords[i]);
150 | strfeat = "F1U=" + curWord;
151 | feat.push_back(strfeat);
152 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey;
153 | strfeat = "F2B=" + preWord + seperateKey + curWord;
154 | feat.push_back(strfeat);
155 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey;
156 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord;
157 | feat.push_back(strfeat);
158 | }
159 |
160 | //History feature
161 | if (m_options.linearfeatCat > 1 && seq_size == 2) {
162 | const vector& historyWords = words[seq_size - 2];
163 | wordnumber = historyWords.size();
164 | for (int i = 0; i < wordnumber; i++) {
165 | strfeat = "F4U=" + historyWords[i];
166 | feat.push_back(strfeat);
167 | }
168 | }
169 | }
170 |
171 | void Detector::convert2Example(const Instance* pInstance, Example& exam) {
172 | exam.clear();
173 |
174 | const string &instlabel = pInstance->label;
175 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha;
176 |
177 | int labelnum = labelAlpha.size();
178 | for (int i = 0; i < labelnum; i++) {
179 | string str = labelAlpha.from_id(i);
180 | if (instlabel.compare(str) == 0)
181 | exam.m_labels.push_back(1.0);
182 | else
183 | exam.m_labels.push_back(0.0);
184 | }
185 |
186 | //dense feature
187 | extractDenseFeatures(exam.m_densefeatures, pInstance);
188 |
189 | //linear feature
190 | if (m_options.linearfeatCat > 0)
191 | extractLinearFeatures(exam.m_linearfeatures, pInstance);
192 |
193 | }
194 |
195 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) {
196 | int numInstance;
197 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
198 | const Instance *pInstance = &vecInsts[numInstance];
199 | Example curExam;
200 | convert2Example(pInstance, curExam);
201 | vecExams.push_back(curExam);
202 |
203 | if ((numInstance + 1) % m_options.verboseIter == 0) {
204 | cout << numInstance + 1 << " ";
205 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
206 | cout << std::endl;
207 | cout.flush();
208 | }
209 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
210 | break;
211 | }
212 |
213 | cout << numInstance << " " << endl;
214 | }
215 |
216 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) {
217 | if (optionFile != "")
218 | m_options.load(optionFile);
219 | m_options.showOptions();
220 | vector trainInsts, devInsts, testInsts;
221 | static vector decodeInstResults;
222 | static Instance curDecodeInst;
223 | bool bCurIterBetter = false;
224 |
225 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
226 | if (devFile != "")
227 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
228 | if (testFile != "")
229 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);
230 |
231 | std::cout << "Training example number: " << trainInsts.size() << std::endl;
232 | std::cout << "Dev example number: " << trainInsts.size() << std::endl;
233 | std::cout << "Test example number: " << trainInsts.size() << std::endl;
234 |
235 | createAlphabet(trainInsts);
236 | addTestAlphabet(devInsts);
237 | addTestAlphabet(testInsts);
238 |
239 | m_word_stats[unknownkey] = m_options.wordCutOff + 1;
240 | cout << "Total word num: " << m_word_stats.size() << endl;
241 | m_driver._modelparams.wordAlpha.initial(m_word_stats, m_options.wordCutOff);
242 | m_driver._modelparams.wordAlpha.set_fixed_flag(true);
243 | cout << "Remain word num:" << m_driver._modelparams.wordAlpha.size() << endl;
244 |
245 | vector trainExamples, devExamples, testExamples;
246 |
247 | std::cout << "Instance convert to example... " << std::endl;
248 | initialExamples(trainInsts, trainExamples);
249 | initialExamples(devInsts, devExamples);
250 | initialExamples(testInsts, testExamples);
251 |
252 | if (m_options.wordFile != "") {
253 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune);
254 | }
255 | else{
256 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune);
257 | }
258 |
259 | m_driver._hyperparams.setRequired(m_options);
260 | m_driver.initial(m_maxseq_size);
261 |
262 |
263 |
264 | dtype bestDIS = 0;
265 |
266 | int inputSize = trainExamples.size();
267 |
268 | int batchBlock = inputSize / m_options.batchSize;
269 | if (inputSize % m_options.batchSize != 0)
270 | batchBlock++;
271 |
272 | srand(0);
273 | std::vector indexes;
274 | for (int i = 0; i < inputSize; ++i)
275 | indexes.push_back(i);
276 |
277 | static Metric eval, metric_dev, metric_test;
278 | static vector subExamples;
279 | int devNum = devExamples.size(), testNum = testExamples.size();
280 | for (int iter = 0; iter < m_options.maxIter; ++iter) {
281 | std::cout << "##### Iteration " << iter << std::endl;
282 |
283 | random_shuffle(indexes.begin(), indexes.end());
284 | eval.reset();
285 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
286 | subExamples.clear();
287 | int start_pos = updateIter * m_options.batchSize;
288 | int end_pos = (updateIter + 1) * m_options.batchSize;
289 | if (end_pos > inputSize)
290 | end_pos = inputSize;
291 |
292 | for (int idy = start_pos; idy < end_pos; idy++) {
293 | subExamples.push_back(trainExamples[indexes[idy]]);
294 | }
295 |
296 | int curUpdateIter = iter * batchBlock + updateIter;
297 | dtype cost = m_driver.train(subExamples, curUpdateIter);
298 |
299 | eval.overall_label_count += m_driver._eval.overall_label_count;
300 | eval.correct_label_count += m_driver._eval.correct_label_count;
301 |
302 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) {
303 | //m_driver.checkgrad(subExamples, curUpdateIter + 1);
304 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl;
305 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl;
306 | }
307 | m_driver.updateModel();
308 |
309 | }
310 |
311 | if (devNum > 0) {
312 | bCurIterBetter = false;
313 | if (!m_options.outBest.empty())
314 | decodeInstResults.clear();
315 | metric_dev.reset();
316 | for (int idx = 0; idx < devExamples.size(); idx++) {
317 | string result_label;
318 | predict(devExamples[idx].m_densefeatures, result_label);
319 |
320 | devInsts[idx].Evaluate(result_label, metric_dev);
321 |
322 | if (!m_options.outBest.empty()) {
323 | curDecodeInst.copyValuesFrom(devInsts[idx]);
324 | curDecodeInst.assignLabel(result_label);
325 | decodeInstResults.push_back(curDecodeInst);
326 | }
327 | }
328 |
329 | std::cout << "dev:" << std::endl;
330 | metric_dev.print();
331 |
332 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) {
333 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults);
334 | bCurIterBetter = true;
335 | }
336 |
337 | if (testNum > 0) {
338 | if (!m_options.outBest.empty())
339 | decodeInstResults.clear();
340 | metric_test.reset();
341 | for (int idx = 0; idx < testExamples.size(); idx++) {
342 | string result_label;
343 | predict(testExamples[idx].m_densefeatures, result_label);
344 |
345 | testInsts[idx].Evaluate(result_label, metric_test);
346 |
347 | if (bCurIterBetter && !m_options.outBest.empty()) {
348 | curDecodeInst.copyValuesFrom(testInsts[idx]);
349 | curDecodeInst.assignLabel(result_label);
350 | decodeInstResults.push_back(curDecodeInst);
351 | }
352 | }
353 | std::cout << "test:" << std::endl;
354 | metric_test.print();
355 |
356 | if (!m_options.outBest.empty() && bCurIterBetter) {
357 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults);
358 | }
359 | }
360 |
361 |
362 |
363 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) {
364 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl;
365 | bestDIS = metric_dev.getAccuracy();
366 | writeModelFile(modelFile);
367 | }
368 |
369 | }
370 | // Clear gradients
371 | }
372 | }
373 |
374 | int Detector::predict(const vector& features, string& output) {
375 | int labelIdx;
376 | m_driver.predict(features, labelIdx);
377 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey);
378 |
379 | if (output == nullkey)
380 | std::cout << "predict error" << std::endl;
381 | return 0;
382 | }
383 |
384 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) {
385 | loadModelFile(modelFile);
386 | vector testInsts;
387 | m_pipe.readInstances(testFile, testInsts);
388 |
389 | vector testExamples;
390 | initialExamples(testInsts, testExamples);
391 |
392 | int testNum = testExamples.size();
393 | vector testInstResults;
394 | Metric metric_test;
395 | metric_test.reset();
396 | for (int idx = 0; idx < testExamples.size(); idx++) {
397 | string result_label;
398 | predict(testExamples[idx].m_densefeatures, result_label);
399 | testInsts[idx].Evaluate(result_label, metric_test);
400 | Instance curResultInst;
401 | curResultInst.copyValuesFrom(testInsts[idx]);
402 | curResultInst.assignLabel(result_label);
403 | testInstResults.push_back(curResultInst);
404 | }
405 | std::cout << "test:" << std::endl;
406 | metric_test.print();
407 |
408 | m_pipe.outputAllInstances(outputFile, testInstResults);
409 |
410 | }
411 |
412 |
413 | void Detector::loadModelFile(const string& inputModelFile) {
414 |
415 | }
416 |
417 | void Detector::writeModelFile(const string& outputModelFile) {
418 |
419 | }
420 |
421 | int main(int argc, char* argv[]) {
422 |
423 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = "";
424 | std::string outputFile = "", wordEmbFile = "";
425 | bool bTrain = false;
426 | dsr::Argument_helper ah;
427 | int memsize = 1;
428 |
429 | ah.new_flag("l", "learn", "train or test", bTrain);
430 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile);
431 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile);
432 | ah.new_named_string("test", "testCorpus", "named_string",
433 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile);
434 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile);
435 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile);
436 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile);
437 | ah.new_named_int("mem", "memsize", "named_int", "memory allocated for tensor nodes", memsize);
438 |
439 | ah.process(argc, argv);
440 |
441 | Detector detector(memsize);
442 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length;
443 | if (bTrain) {
444 | detector.train(trainFile, devFile, testFile, modelFile, optionFile);
445 | }
446 | else {
447 | detector.test(testFile, outputFile, modelFile);
448 | }
449 |
450 | //test(argv);
451 | //ah.write_values(std::cout);
452 | }
453 |
--------------------------------------------------------------------------------
/src/NNWordLocalContext/NNWordLocalContext.h:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.h
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #ifndef SRC_SparseDetector_H_
9 | #define SRC_SparseDetector_H_
10 |
11 |
12 | #include "N3L.h"
13 | #include "Driver.h"
14 | #include "Options.h"
15 | #include "Instance.h"
16 | #include "Example.h"
17 | #include "Pipe.h"
18 | #include "Utf.h"
19 |
20 | using namespace nr;
21 | using namespace std;
22 |
23 | class Detector {
24 |
25 |
26 | public:
27 | unordered_map m_feat_stats;
28 | unordered_map m_word_stats;
29 | int m_maxseq_size;
30 |
31 | public:
32 | Options m_options;
33 |
34 | Pipe m_pipe;
35 |
36 | Driver m_driver;
37 |
38 |
39 | public:
40 | Detector(size_t memsize);
41 | virtual ~Detector();
42 |
43 | public:
44 |
45 | int createAlphabet(const vector& vecTrainInsts);
46 | void addTestAlphabet(const vector& vecInsts);
47 |
48 | void extractDenseFeatures(vector& features, const Instance* pInstance);
49 | void extractLinearFeatures(vector& features, const Instance* pInstance);
50 |
51 | void convert2Example(const Instance* pInstance, Example& exam);
52 | void initialExamples(const vector& vecInsts, vector& vecExams);
53 |
54 | public:
55 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile);
56 | int predict(const vector& features, string& outputs);
57 | void test(const string& testFile, const string& outputFile, const string& modelFile);
58 |
59 | void writeModelFile(const string& outputModelFile);
60 | void loadModelFile(const string& inputModelFile);
61 |
62 | };
63 |
64 | #endif /* SRC_SparseDetector_H_ */
65 |
--------------------------------------------------------------------------------
/src/NNWordLocalContext/model/ComputionGraph.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ComputionGraph_H_
2 | #define SRC_ComputionGraph_H_
3 |
4 | #include "ModelParams.h"
5 |
6 |
7 | // Each model consists of two parts, building neural graph and defining output losses.
8 | struct ComputionGraph : Graph {
9 | public:
10 | const static int max_sentence_length = 256;
11 |
12 | public:
13 | // node instances
14 | vector > word_inputs;
15 | WindowBuilder word_window;
16 |
17 | LSTM1Builder left_lstm;
18 | LSTM1Builder right_lstm;
19 |
20 | vector concat_bilstm;
21 | GatedPoolBuilder local_gated_pooling;
22 | GatedPoolBuilder context_gated_pooling;
23 |
24 | Node padding;
25 | ConcatNode concat_local_context;
26 | UniNode sent_hidden;
27 | LinearNode output;
28 |
29 | public:
30 | ComputionGraph() : Graph() {
31 | }
32 |
33 | ~ComputionGraph() {
34 | clear();
35 | }
36 |
37 | public:
38 | //allocate enough nodes
39 | inline void createNodes(int sent_length, int maxseq_size) {
40 |
41 | resizeVec(word_inputs, maxseq_size, sent_length);
42 | word_window.resize(sent_length);
43 | left_lstm.resize(sent_length);
44 | right_lstm.resize(sent_length);
45 |
46 | concat_bilstm.resize(sent_length);
47 | local_gated_pooling.resize(sent_length);
48 | context_gated_pooling.resize(sent_length);
49 |
50 | }
51 |
52 | inline void clear() {
53 | Graph::clear();
54 | clearVec(word_inputs);
55 | word_window.clear();
56 | left_lstm.clear();
57 | right_lstm.clear();
58 | concat_bilstm.clear();
59 | local_gated_pooling.clear();
60 | context_gated_pooling.clear();
61 | }
62 |
63 |
64 | public:
65 | inline void initial(ModelParams& model, HyperParams& opts, AlignedMemoryPool* mem = NULL) {
66 | int seq_size = word_inputs.size();
67 |
68 | for (int i = 0; i < seq_size; i++) {
69 | for (int idx = 0; idx < word_inputs[i].size(); idx++) {
70 | word_inputs[i][idx].init(model.words.nDim, opts.dropOut, mem);
71 | word_inputs[i][idx].setParam(&model.words);
72 | if ( i == seq_size -1 )
73 | concat_bilstm[idx].init(opts.rnnhiddensize * 2, -1, mem);
74 | }
75 | }
76 | word_window.init(model.words.nDim, opts.wordcontext, mem);
77 | left_lstm.init(&model.left_lstm_project, opts.dropOut, true, mem);
78 | right_lstm.init(&model.right_lstm_project, opts.dropOut, false, mem);
79 |
80 | local_gated_pooling.init(&model.local_gatedpool_project, mem);
81 | context_gated_pooling.init(&model.context_gatedpool_project, mem);
82 |
83 | concat_local_context.init(opts.rnnhiddensize * 2 + model.words.nDim, -1, mem);
84 | sent_hidden.init(opts.hiddensize, opts.dropOut, mem);
85 | sent_hidden.setParam(&model.sent_tanh_project);
86 | output.init(opts.labelSize, -1, mem);
87 | output.setParam(&model.olayer_linear);
88 |
89 | padding.init(opts.wordDim, -1, mem);
90 | }
91 |
92 |
93 | public:
94 | // some nodes may behave different during training and decode, for example, dropout
95 | inline void forward(const vector& features, bool bTrain = false) {
96 | //first step: clear value
97 | clearValue(bTrain); // compute is a must step for train, predict and cost computation
98 |
99 |
100 | // second step: build graph
101 | int seq_size = features.size();
102 | //forward
103 | // word-level neural networks
104 | for (int i = 0; i < seq_size; i++) {
105 |
106 | const Feature& feature = features[i];
107 | int wordnum = feature.words.size();
108 | if (wordnum > max_sentence_length)
109 | wordnum = max_sentence_length;
110 | for (int idx = 0; idx < wordnum; idx++) {
111 | //input
112 | word_inputs[i][idx].forward(this, feature.words[idx]);
113 | }
114 | if (i == seq_size - 1) {
115 | //windowlized
116 | word_window.forward(this, getPNodes(word_inputs[i], wordnum));
117 | left_lstm.forward(this, getPNodes(word_window._outputs, wordnum));
118 | right_lstm.forward(this, getPNodes(word_window._outputs, wordnum));
119 |
120 | for (int idx = 0; idx < wordnum; idx++) {
121 | //feed-forward
122 | concat_bilstm[idx].forward(this, &(left_lstm._hiddens[idx]), &(right_lstm._hiddens[idx]));
123 | }
124 | local_gated_pooling.forward(this, getPNodes(concat_bilstm, wordnum));
125 | }
126 |
127 | else {
128 | context_gated_pooling.forward(this, getPNodes(word_inputs[i], wordnum));
129 | }
130 | }
131 |
132 | if (seq_size == 1)
133 | concat_local_context.forward(this, &padding, &local_gated_pooling._output);
134 | else
135 | concat_local_context.forward(this, &context_gated_pooling._output, &local_gated_pooling._output);
136 | sent_hidden.forward(this, &concat_local_context);
137 | output.forward(this, &sent_hidden);
138 | }
139 |
140 | };
141 |
142 | #endif /* SRC_ComputionGraph_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocalContext/model/Driver.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Driver.h
3 | *
4 | * Created on: Mar 18, 2015
5 | * Author: mszhang
6 | */
7 |
8 | #ifndef SRC_Driver_H_
9 | #define SRC_Driver_H_
10 |
11 | #include
12 | #include "ComputionGraph.h"
13 |
14 |
15 | //A native neural network classfier using only linear features
16 |
17 | class Driver{
18 | public:
19 | Driver(size_t memsize) : aligned_mem(memsize) {
20 | _pcg = NULL;
21 | }
22 |
23 | ~Driver() {
24 | if (_pcg != NULL)
25 | delete _pcg;
26 | _pcg = NULL;
27 | }
28 |
29 | public:
30 | ComputionGraph *_pcg; // build neural graphs
31 | ModelParams _modelparams; // model parameters
32 | HyperParams _hyperparams;
33 |
34 | Metric _eval;
35 | CheckGrad _checkgrad;
36 | ModelUpdate _ada; // model update
37 | AlignedMemoryPool aligned_mem;
38 |
39 |
40 | public:
41 | inline void initial(int maxseq_size) {
42 | if (!_hyperparams.bValid()){
43 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl;
44 | return;
45 | }
46 | if (!_modelparams.initial(_hyperparams, &aligned_mem)){
47 | std::cout << "model parameter initialization Error, Please check!" << std::endl;
48 | return;
49 | }
50 | _modelparams.exportModelParams(_ada);
51 | _modelparams.exportCheckGradParams(_checkgrad);
52 |
53 | _hyperparams.print();
54 |
55 | _pcg = new ComputionGraph();
56 | _pcg->createNodes(ComputionGraph::max_sentence_length, maxseq_size);
57 | _pcg->initial(_modelparams, _hyperparams, &aligned_mem);
58 |
59 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps);
60 | }
61 |
62 |
63 | inline dtype train(const vector& examples, int iter) {
64 | _eval.reset();
65 |
66 | int example_num = examples.size();
67 | dtype cost = 0.0;
68 |
69 | for (int count = 0; count < example_num; count++) {
70 | const Example& example = examples[count];
71 | //forward
72 | _pcg->forward(example.m_densefeatures, true);
73 |
74 | //loss function
75 | int seq_size = example.m_densefeatures.size();
76 | int wordnum = example.m_densefeatures[seq_size - 1].words.size();
77 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num);
78 |
79 | // backward, which exists only for training
80 | _pcg->backward();
81 | }
82 |
83 | if (_eval.getAccuracy() < 0) {
84 | std::cout << "strange" << std::endl;
85 | }
86 |
87 | return cost;
88 | }
89 |
90 | inline void predict(const vector densefeatures, int& results) {
91 | _pcg->forward(densefeatures);
92 | _modelparams.loss.predict(&_pcg->output, results);
93 | }
94 |
95 | inline dtype cost(const Example& example){
96 | _pcg->forward(example.m_densefeatures); //forward here
97 |
98 | int seq_size = example.m_densefeatures.size();
99 |
100 | dtype cost = 0.0;
101 |
102 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1);
103 |
104 | return cost;
105 | }
106 |
107 | void checkgrad(const vector& examples, int iter){
108 | ostringstream out;
109 | out << "Iteration: " << iter;
110 | _checkgrad.check(this, examples, out.str());
111 | }
112 |
113 | void updateModel() {
114 | _ada.update();
115 | //_ada.update(5.0);
116 | }
117 |
118 | void writeModel();
119 |
120 | void loadModel();
121 |
122 |
123 |
124 | private:
125 | inline void resetEval() {
126 | _eval.reset();
127 | }
128 |
129 |
130 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){
131 | _ada._alpha = adaAlpha;
132 | _ada._eps = adaEps;
133 | _ada._reg = nnRegular;
134 | }
135 |
136 | };
137 |
138 | #endif /* SRC_Driver_H_ */
139 |
--------------------------------------------------------------------------------
/src/NNWordLocalContext/model/HyperParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_HyperParams_H_
2 | #define SRC_HyperParams_H_
3 |
4 | #include "N3L.h"
5 | #include "Example.h"
6 | #include "Options.h"
7 |
8 | using namespace nr;
9 | using namespace std;
10 |
11 | struct HyperParams{
12 |
13 | // must assign
14 | int wordcontext;
15 | int hiddensize;
16 | int rnnhiddensize;
17 | dtype dropOut;
18 |
19 | // must assign
20 | dtype nnRegular; // for optimization
21 | dtype adaAlpha; // for optimization
22 | dtype adaEps; // for optimization
23 |
24 | //auto generated
25 | int wordwindow;
26 | int wordDim;
27 | int inputsize;
28 | int labelSize;
29 |
30 | public:
31 | HyperParams(){
32 | bAssigned = false;
33 | }
34 |
35 | public:
36 | void setRequired(Options& opt){
37 | wordcontext = opt.wordcontext;
38 | hiddensize = opt.hiddenSize;
39 | rnnhiddensize = opt.rnnHiddenSize;
40 | dropOut = opt.dropProb;
41 |
42 | nnRegular = opt.regParameter;
43 | adaAlpha = opt.adaAlpha;
44 | adaEps = opt.adaEps;
45 |
46 | bAssigned = true;
47 | }
48 |
49 | void clear(){
50 | bAssigned = false;
51 | }
52 |
53 | bool bValid(){
54 | return bAssigned;
55 | }
56 |
57 |
58 | public:
59 |
60 | void print(){
61 |
62 | }
63 |
64 | private:
65 | bool bAssigned;
66 | };
67 |
68 |
69 | #endif /* SRC_HyperParams_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocalContext/model/ModelParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ModelParams_H_
2 | #define SRC_ModelParams_H_
3 | #include "HyperParams.h"
4 |
5 | // Each model consists of two parts, building neural graph and defining output losses.
6 | class ModelParams{
7 |
8 | public:
9 | Alphabet wordAlpha; // should be initialized outside
10 | Alphabet featAlpha; //should be intialized outside
11 | Alphabet labelAlpha; // should be initialized outside
12 | public:
13 | LookupTable words; // should be initialized outside
14 | LSTM1Params left_lstm_project; //left lstm
15 | LSTM1Params right_lstm_project; //right lstm
16 | GatedPoolParam local_gatedpool_project;
17 | GatedPoolParam context_gatedpool_project;
18 | UniParams sent_tanh_project; // sentence hidden
19 | UniParams olayer_linear; // output
20 | public:
21 | SoftMaxLoss loss;
22 |
23 |
24 | public:
25 | bool initial(HyperParams& opts, AlignedMemoryPool* mem = NULL){
26 |
27 | // some model parameters should be initialized outside
28 | if (words.nVSize <= 0 || labelAlpha.size() <= 0){
29 | return false;
30 | }
31 | opts.wordDim = words.nDim;
32 | opts.wordwindow = 2 * opts.wordcontext + 1;
33 | opts.inputsize = opts.wordwindow * opts.wordDim;
34 | int senthiddensize = opts.rnnhiddensize * 2 + words.nDim;
35 |
36 | left_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem);
37 | right_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem);
38 | local_gatedpool_project.initial(opts.rnnhiddensize * 2, opts.rnnhiddensize * 2, mem);
39 | context_gatedpool_project.initial(opts.wordDim, opts.wordDim, mem);
40 | sent_tanh_project.initial(opts.hiddensize, senthiddensize, mem);
41 | opts.labelSize = labelAlpha.size();
42 | olayer_linear.initial(opts.labelSize, opts.hiddensize, false, mem);
43 |
44 | return true;
45 | }
46 |
47 |
48 | void exportModelParams(ModelUpdate& ada){
49 | words.exportAdaParams(ada);
50 | left_lstm_project.exportAdaParams(ada);
51 | right_lstm_project.exportAdaParams(ada);
52 | local_gatedpool_project.exportAdaParams(ada);
53 | context_gatedpool_project.exportAdaParams(ada);
54 | sent_tanh_project.exportAdaParams(ada);
55 | olayer_linear.exportAdaParams(ada);
56 | }
57 |
58 |
59 | void exportCheckGradParams(CheckGrad& checkgrad){
60 | checkgrad.add(&(words.E), "_words.E");
61 |
62 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.W), "context_gatedpool_project.uni.W");
63 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.b), "context_gatedpool_project.uni.b");
64 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.W), "local_gatedpool_project.W");
65 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.b), "local_gatedpool_project.b");
66 |
67 | checkgrad.add(&(sent_tanh_project.W), "sent_tanh_project.W");
68 | checkgrad.add(&(sent_tanh_project.b), "sent_tanh_project.b");
69 | }
70 |
71 | // will add it later
72 | void saveModel(){
73 |
74 | }
75 |
76 | void loadModel(const string& inFile){
77 |
78 | }
79 |
80 | };
81 |
82 | #endif /* SRC_ModelParams_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | include_directories(
3 | model
4 | )
5 |
6 | add_executable(NNWordLocalContextSeparate NNWordLocalContextSeparate.cpp)
7 |
8 |
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/NNWordLocalContextSeparate.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.cpp
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #include "NNWordLocalContextSeparate.h"
9 |
10 | #include "Argument_helper.h"
11 |
12 | Detector::Detector(size_t memsize) : m_driver(memsize) {
13 | // TODO Auto-generated constructor stub
14 | srand(0);
15 | }
16 |
17 | Detector::~Detector() {
18 | // TODO Auto-generated destructor stub
19 | }
20 |
21 | int Detector::createAlphabet(const vector& vecInsts) {
22 | if (vecInsts.size() == 0) {
23 | std::cout << "training set empty" << std::endl;
24 | return -1;
25 | }
26 | cout << "Creating Alphabet..." << endl;
27 |
28 | m_maxseq_size = -1;
29 | int numInstance;
30 |
31 | m_driver._modelparams.labelAlpha.clear();
32 | // label alphabet and word statistics
33 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
34 | const Instance *pInstance = &vecInsts[numInstance];
35 |
36 | const vector > &words = pInstance->words;
37 | const string &label = pInstance->label;
38 |
39 | int labelId = m_driver._modelparams.labelAlpha.from_string(label);
40 |
41 | int seq_size = pInstance->seqsize();
42 | if (seq_size > m_maxseq_size)
43 | m_maxseq_size = seq_size;
44 | for (int i = 0; i < seq_size; i++) {
45 | int wordLength = words[i].size();
46 | for (int j = 0; j < wordLength; ++j) {
47 | string curword = normalize_to_lowerwithdigit(words[i][j]);
48 | m_word_stats[curword]++;
49 | }
50 |
51 | }
52 |
53 | if ((numInstance + 1) % m_options.verboseIter == 0) {
54 | cout << numInstance + 1 << " ";
55 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
56 | cout << std::endl;
57 | cout.flush();
58 | }
59 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
60 | break;
61 | }
62 |
63 | cout << numInstance << " " << endl;
64 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl;
65 |
66 |
67 | m_driver._modelparams.labelAlpha.set_fixed_flag(true);
68 |
69 | if (m_options.linearfeatCat > 0) {
70 | cout << "Extracting linear features..." << endl;
71 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
72 | const Instance *pInstance = &vecInsts[numInstance];
73 | vector linearfeat;
74 | extractLinearFeatures(linearfeat, pInstance);
75 | for (int i = 0; i < linearfeat.size(); i++)
76 | m_feat_stats[linearfeat[i]] ++;
77 | }
78 | m_feat_stats[unknownkey] = m_options.featCutOff + 1;
79 | cout << "Total feature num: " << m_feat_stats.size() << endl;
80 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff);
81 | cout << "Remina feature num:" << m_driver._modelparams.featAlpha.size() << endl;
82 | m_driver._modelparams.featAlpha.set_fixed_flag(true);
83 | }
84 | return 0;
85 | }
86 |
87 | void Detector::addTestAlphabet(const vector& vecInsts)
88 | {
89 | cout << "Adding other word Alphabet..." << endl;
90 | int numInstance;
91 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
92 | const Instance *pInstance = &vecInsts[numInstance];
93 |
94 | const vector > &words = pInstance->words;
95 | int seq_size = pInstance->seqsize();
96 | for (int i = 0; i < seq_size; ++i) {
97 | for (int j = 0; j < words[i].size(); j++) {
98 | string curword = normalize_to_lowerwithdigit(words[i][j]);
99 | if (!m_options.wordEmbFineTune)
100 | m_word_stats[curword]++;
101 | }
102 | }
103 |
104 | if ((numInstance + 1) % m_options.verboseIter == 0) {
105 | cout << numInstance + 1 << " ";
106 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
107 | cout << std::endl;
108 | cout.flush();
109 | }
110 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
111 | break;
112 | }
113 |
114 | cout << numInstance << " " << endl;
115 | }
116 |
117 |
118 |
119 | void Detector::extractDenseFeatures(vector& features, const Instance * pInstance)
120 | {
121 | features.clear();
122 | const vector >& words = pInstance->words;
123 | int seq_size = pInstance->seqsize();
124 | assert(seq_size < 3);
125 |
126 | //Local and Context sentence dense feature
127 | for (int sentidx = 0; sentidx < seq_size; sentidx++) {
128 | Feature feat;
129 | const vector& curWords = words[sentidx];
130 | int wordnumber = curWords.size();
131 | for (int i = 0; i < wordnumber; i++)
132 | feat.words.push_back(normalize_to_lowerwithdigit(curWords[i]));
133 |
134 | features.push_back(feat);
135 | }
136 | }
137 |
138 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) {
139 | feat.clear();
140 |
141 | const vector >& words = pInstance->words;
142 | int seq_size = pInstance->seqsize();
143 | assert(seq_size < 3);
144 | //Current sent linear feature
145 | const vector& lastWords = words[seq_size - 1];
146 | int wordnumber = lastWords.size();
147 | string strfeat = "", curWord = "", preWord = "", pre2Word = "";
148 | for (int i = 0; i < wordnumber; i++) {
149 | curWord = normalize_to_lowerwithdigit(lastWords[i]);
150 | strfeat = "F1U=" + curWord;
151 | feat.push_back(strfeat);
152 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey;
153 | strfeat = "F2B=" + preWord + seperateKey + curWord;
154 | feat.push_back(strfeat);
155 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey;
156 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord;
157 | feat.push_back(strfeat);
158 | }
159 |
160 | //History feature
161 | if (m_options.linearfeatCat > 1 && seq_size == 2) {
162 | const vector& historyWords = words[seq_size - 2];
163 | wordnumber = historyWords.size();
164 | for (int i = 0; i < wordnumber; i++) {
165 | strfeat = "F4U=" + historyWords[i];
166 | feat.push_back(strfeat);
167 | }
168 | }
169 | }
170 |
171 | void Detector::convert2Example(const Instance* pInstance, Example& exam) {
172 | exam.clear();
173 |
174 | const string &instlabel = pInstance->label;
175 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha;
176 |
177 | int labelnum = labelAlpha.size();
178 | for (int i = 0; i < labelnum; i++) {
179 | string str = labelAlpha.from_id(i);
180 | if (instlabel.compare(str) == 0)
181 | exam.m_labels.push_back(1.0);
182 | else
183 | exam.m_labels.push_back(0.0);
184 | }
185 |
186 | //dense feature
187 | extractDenseFeatures(exam.m_densefeatures, pInstance);
188 |
189 | //linear feature
190 | if (m_options.linearfeatCat > 0)
191 | extractLinearFeatures(exam.m_linearfeatures, pInstance);
192 |
193 | }
194 |
195 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) {
196 | int numInstance;
197 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
198 | const Instance *pInstance = &vecInsts[numInstance];
199 | Example curExam;
200 | convert2Example(pInstance, curExam);
201 | vecExams.push_back(curExam);
202 |
203 | if ((numInstance + 1) % m_options.verboseIter == 0) {
204 | cout << numInstance + 1 << " ";
205 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
206 | cout << std::endl;
207 | cout.flush();
208 | }
209 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
210 | break;
211 | }
212 |
213 | cout << numInstance << " " << endl;
214 | }
215 |
216 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) {
217 | if (optionFile != "")
218 | m_options.load(optionFile);
219 | m_options.showOptions();
220 | vector trainInsts, devInsts, testInsts;
221 | static vector decodeInstResults;
222 | static Instance curDecodeInst;
223 | bool bCurIterBetter = false;
224 |
225 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
226 | if (devFile != "")
227 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
228 | if (testFile != "")
229 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);
230 |
231 | std::cout << "Training example number: " << trainInsts.size() << std::endl;
232 | std::cout << "Dev example number: " << trainInsts.size() << std::endl;
233 | std::cout << "Test example number: " << trainInsts.size() << std::endl;
234 |
235 | createAlphabet(trainInsts);
236 | addTestAlphabet(devInsts);
237 | addTestAlphabet(testInsts);
238 |
239 | m_word_stats[unknownkey] = m_options.wordCutOff + 1;
240 | cout << "Total word num: " << m_word_stats.size() << endl;
241 | m_driver._modelparams.wordAlpha.initial(m_word_stats, m_options.wordCutOff);
242 | m_driver._modelparams.wordAlpha.set_fixed_flag(true);
243 | cout << "Remain word num:" << m_driver._modelparams.wordAlpha.size() << endl;
244 |
245 | vector trainExamples, devExamples, testExamples;
246 |
247 | std::cout << "Instance convert to example... " << std::endl;
248 | initialExamples(trainInsts, trainExamples);
249 | initialExamples(devInsts, devExamples);
250 | initialExamples(testInsts, testExamples);
251 |
252 | if (m_options.wordFile != "") {
253 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune);
254 | m_driver._modelparams.history_words.initial(&m_driver._modelparams.wordAlpha, m_options.wordFile, m_options.wordEmbFineTune);
255 | }
256 | else{
257 | m_driver._modelparams.words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune);
258 | m_driver._modelparams.history_words.initial(&m_driver._modelparams.wordAlpha, m_options.wordEmbSize, m_options.wordEmbFineTune);
259 | }
260 |
261 | m_driver._hyperparams.setRequired(m_options);
262 | m_driver.initial(m_maxseq_size);
263 |
264 |
265 |
266 | dtype bestDIS = 0;
267 |
268 | int inputSize = trainExamples.size();
269 |
270 | int batchBlock = inputSize / m_options.batchSize;
271 | if (inputSize % m_options.batchSize != 0)
272 | batchBlock++;
273 |
274 | srand(0);
275 | std::vector indexes;
276 | for (int i = 0; i < inputSize; ++i)
277 | indexes.push_back(i);
278 |
279 | static Metric eval, metric_dev, metric_test;
280 | static vector subExamples;
281 | int devNum = devExamples.size(), testNum = testExamples.size();
282 | for (int iter = 0; iter < m_options.maxIter; ++iter) {
283 | std::cout << "##### Iteration " << iter << std::endl;
284 |
285 | random_shuffle(indexes.begin(), indexes.end());
286 | eval.reset();
287 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
288 | subExamples.clear();
289 | int start_pos = updateIter * m_options.batchSize;
290 | int end_pos = (updateIter + 1) * m_options.batchSize;
291 | if (end_pos > inputSize)
292 | end_pos = inputSize;
293 |
294 | for (int idy = start_pos; idy < end_pos; idy++) {
295 | subExamples.push_back(trainExamples[indexes[idy]]);
296 | }
297 |
298 | int curUpdateIter = iter * batchBlock + updateIter;
299 | dtype cost = m_driver.train(subExamples, curUpdateIter);
300 |
301 | eval.overall_label_count += m_driver._eval.overall_label_count;
302 | eval.correct_label_count += m_driver._eval.correct_label_count;
303 |
304 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) {
305 | //m_driver.checkgrad(subExamples, curUpdateIter + 1);
306 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl;
307 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl;
308 | }
309 | m_driver.updateModel();
310 |
311 | }
312 |
313 | if (devNum > 0) {
314 | bCurIterBetter = false;
315 | if (!m_options.outBest.empty())
316 | decodeInstResults.clear();
317 | metric_dev.reset();
318 | for (int idx = 0; idx < devExamples.size(); idx++) {
319 | string result_label;
320 | predict(devExamples[idx].m_densefeatures, result_label);
321 |
322 | devInsts[idx].Evaluate(result_label, metric_dev);
323 |
324 | if (!m_options.outBest.empty()) {
325 | curDecodeInst.copyValuesFrom(devInsts[idx]);
326 | curDecodeInst.assignLabel(result_label);
327 | decodeInstResults.push_back(curDecodeInst);
328 | }
329 | }
330 |
331 | std::cout << "dev:" << std::endl;
332 | metric_dev.print();
333 |
334 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) {
335 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults);
336 | bCurIterBetter = true;
337 | }
338 |
339 | if (testNum > 0) {
340 | if (!m_options.outBest.empty())
341 | decodeInstResults.clear();
342 | metric_test.reset();
343 | for (int idx = 0; idx < testExamples.size(); idx++) {
344 | string result_label;
345 | predict(testExamples[idx].m_densefeatures, result_label);
346 |
347 | testInsts[idx].Evaluate(result_label, metric_test);
348 |
349 | if (bCurIterBetter && !m_options.outBest.empty()) {
350 | curDecodeInst.copyValuesFrom(testInsts[idx]);
351 | curDecodeInst.assignLabel(result_label);
352 | decodeInstResults.push_back(curDecodeInst);
353 | }
354 | }
355 | std::cout << "test:" << std::endl;
356 | metric_test.print();
357 |
358 | if (!m_options.outBest.empty() && bCurIterBetter) {
359 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults);
360 | }
361 | }
362 |
363 |
364 |
365 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) {
366 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl;
367 | bestDIS = metric_dev.getAccuracy();
368 | writeModelFile(modelFile);
369 | }
370 |
371 | }
372 | // Clear gradients
373 | }
374 | }
375 |
376 | int Detector::predict(const vector& features, string& output) {
377 | int labelIdx;
378 | m_driver.predict(features, labelIdx);
379 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey);
380 |
381 | if (output == nullkey)
382 | std::cout << "predict error" << std::endl;
383 | return 0;
384 | }
385 |
386 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) {
387 | loadModelFile(modelFile);
388 | vector testInsts;
389 | m_pipe.readInstances(testFile, testInsts);
390 |
391 | vector testExamples;
392 | initialExamples(testInsts, testExamples);
393 |
394 | int testNum = testExamples.size();
395 | vector testInstResults;
396 | Metric metric_test;
397 | metric_test.reset();
398 | for (int idx = 0; idx < testExamples.size(); idx++) {
399 | string result_label;
400 | predict(testExamples[idx].m_densefeatures, result_label);
401 | testInsts[idx].Evaluate(result_label, metric_test);
402 | Instance curResultInst;
403 | curResultInst.copyValuesFrom(testInsts[idx]);
404 | curResultInst.assignLabel(result_label);
405 | testInstResults.push_back(curResultInst);
406 | }
407 | std::cout << "test:" << std::endl;
408 | metric_test.print();
409 |
410 | m_pipe.outputAllInstances(outputFile, testInstResults);
411 |
412 | }
413 |
414 |
415 | void Detector::loadModelFile(const string& inputModelFile) {
416 |
417 | }
418 |
419 | void Detector::writeModelFile(const string& outputModelFile) {
420 |
421 | }
422 |
423 | int main(int argc, char* argv[]) {
424 |
425 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = "";
426 | std::string outputFile = "", wordEmbFile = "";
427 | bool bTrain = false;
428 | dsr::Argument_helper ah;
429 | int memsize = 1;
430 |
431 | ah.new_flag("l", "learn", "train or test", bTrain);
432 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile);
433 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile);
434 | ah.new_named_string("test", "testCorpus", "named_string",
435 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile);
436 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile);
437 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile);
438 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile);
439 | ah.new_named_int("mem", "memsize", "named_int", "memory allocated for tensor nodes", memsize);
440 |
441 | ah.process(argc, argv);
442 |
443 | Detector detector(memsize);
444 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length;
445 | if (bTrain) {
446 | detector.train(trainFile, devFile, testFile, modelFile, optionFile);
447 | }
448 | else {
449 | detector.test(testFile, outputFile, modelFile);
450 | }
451 |
452 | //test(argv);
453 | //ah.write_values(std::cout);
454 | }
455 |
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/NNWordLocalContextSeparate.h:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.h
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #ifndef SRC_SparseDetector_H_
9 | #define SRC_SparseDetector_H_
10 |
11 |
12 | #include "N3L.h"
13 | #include "Driver.h"
14 | #include "Options.h"
15 | #include "Instance.h"
16 | #include "Example.h"
17 | #include "Pipe.h"
18 | #include "Utf.h"
19 |
20 | using namespace nr;
21 | using namespace std;
22 |
23 | class Detector {
24 |
25 |
26 | public:
27 | unordered_map m_feat_stats;
28 | unordered_map m_word_stats;
29 | int m_maxseq_size;
30 |
31 | public:
32 | Options m_options;
33 |
34 | Pipe m_pipe;
35 |
36 | Driver m_driver;
37 |
38 |
39 | public:
40 | Detector(size_t memsize);
41 | virtual ~Detector();
42 |
43 | public:
44 |
45 | int createAlphabet(const vector& vecTrainInsts);
46 | void addTestAlphabet(const vector& vecInsts);
47 |
48 | void extractDenseFeatures(vector& features, const Instance* pInstance);
49 | void extractLinearFeatures(vector& features, const Instance* pInstance);
50 |
51 | void convert2Example(const Instance* pInstance, Example& exam);
52 | void initialExamples(const vector& vecInsts, vector& vecExams);
53 |
54 | public:
55 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile);
56 | int predict(const vector& features, string& outputs);
57 | void test(const string& testFile, const string& outputFile, const string& modelFile);
58 |
59 | void writeModelFile(const string& outputModelFile);
60 | void loadModelFile(const string& inputModelFile);
61 |
62 | };
63 |
64 | #endif /* SRC_SparseDetector_H_ */
65 |
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/model/ComputionGraph.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ComputionGraph_H_
2 | #define SRC_ComputionGraph_H_
3 |
4 | #include "ModelParams.h"
5 |
6 |
7 | // Each model consists of two parts, building neural graph and defining output losses.
8 | struct ComputionGraph : Graph {
9 | public:
10 | const static int max_sentence_length = 256;
11 |
12 | public:
13 | // node instances
14 | vector > word_inputs;
15 | WindowBuilder word_window;
16 |
17 | LSTM1Builder left_lstm;
18 | LSTM1Builder right_lstm;
19 |
20 | vector concat_bilstm;
21 | GatedPoolBuilder local_gated_pooling;
22 | GatedPoolBuilder context_gated_pooling;
23 |
24 | Node padding;
25 | ConcatNode concat_local_context;
26 | UniNode sent_hidden;
27 | LinearNode output;
28 |
29 | public:
30 | ComputionGraph() : Graph() {
31 | }
32 |
33 | ~ComputionGraph() {
34 | clear();
35 | }
36 |
37 | public:
38 | //allocate enough nodes
39 | inline void createNodes(int sent_length, int maxseq_size) {
40 |
41 | resizeVec(word_inputs, maxseq_size, sent_length);
42 | word_window.resize(sent_length);
43 | left_lstm.resize(sent_length);
44 | right_lstm.resize(sent_length);
45 |
46 | concat_bilstm.resize(sent_length);
47 | local_gated_pooling.resize(sent_length);
48 | context_gated_pooling.resize(sent_length);
49 | }
50 |
51 | inline void clear() {
52 | Graph::clear();
53 | clearVec(word_inputs);
54 | word_window.clear();
55 | left_lstm.clear();
56 | right_lstm.clear();
57 |
58 | concat_bilstm.clear();
59 | local_gated_pooling.clear();
60 | context_gated_pooling.clear();
61 |
62 | }
63 |
64 |
65 | public:
66 | inline void initial(ModelParams& model, HyperParams& opts, AlignedMemoryPool* mem) {
67 | int seq_size = word_inputs.size();
68 |
69 | for (int i = 0; i < seq_size; i++){
70 | for (int idx = 0; idx < word_inputs[i].size(); idx++) {
71 | if (i == seq_size - 1){
72 | word_inputs[i][idx].init(model.words.nDim, opts.dropOut, mem);
73 | word_inputs[i][idx].setParam(&model.words);
74 | concat_bilstm[idx].init(opts.rnnhiddensize * 2, -1, mem);
75 | }
76 | else{
77 | word_inputs[i][idx].init(model.words.nDim, opts.dropOut, mem);
78 | word_inputs[i][idx].setParam(&model.history_words);
79 | }
80 | }
81 | }
82 | word_window.init(model.words.nDim, opts.wordcontext, mem);
83 | left_lstm.init(&model.left_lstm_project, opts.dropOut, true, mem);
84 | right_lstm.init(&model.right_lstm_project, opts.dropOut, false, mem);
85 |
86 | local_gated_pooling.init(&model.local_gatedpool_project, mem);
87 | context_gated_pooling.init(&model.context_gatedpool_project, mem);
88 |
89 | concat_local_context.init(model.words.nDim + opts.rnnhiddensize * 2, -1, mem);
90 | sent_hidden.init(opts.hiddensize, opts.dropOut, mem);
91 | sent_hidden.setParam(&model.sent_tanh_project);
92 | output.init(opts.labelSize, -1, mem);
93 | output.setParam(&model.olayer_linear);
94 |
95 | padding.init(model.words.nDim, -1, mem);
96 | }
97 |
98 |
99 | public:
100 | // some nodes may behave different during training and decode, for example, dropout
101 | inline void forward(const vector& features, bool bTrain = false) {
102 | //first step: clear value
103 | clearValue(bTrain); // compute is a must step for train, predict and cost computation
104 |
105 | // second step: build graph
106 | int seq_size = features.size();
107 | //forward
108 | // word-level neural networks
109 | for (int i = 0; i max_sentence_length)
114 | wordnum = max_sentence_length;
115 |
116 | if (i == seq_size - 1) {
117 | for (int idx = 0; idx < wordnum; idx++) {
118 | //input
119 | word_inputs[1][idx].forward(this, feature.words[idx]);
120 | }
121 | //windowlized
122 | word_window.forward(this, getPNodes(word_inputs[1], wordnum));
123 | left_lstm.forward(this, getPNodes(word_window._outputs, wordnum));
124 | right_lstm.forward(this, getPNodes(word_window._outputs, wordnum));
125 |
126 | for (int idx = 0; idx < wordnum; idx++) {
127 | //feed-forward
128 | concat_bilstm[idx].forward(this, &(left_lstm._hiddens[idx]), &(right_lstm._hiddens[idx]));
129 | }
130 | local_gated_pooling.forward(this, getPNodes(concat_bilstm, wordnum));
131 | }
132 |
133 | else {
134 | for (int idx = 0; idx < wordnum; idx++) {
135 | //input
136 | word_inputs[0][idx].forward(this, feature.words[idx]);
137 | }
138 | context_gated_pooling.forward(this, getPNodes(word_inputs[0], wordnum));
139 | }
140 | }
141 |
142 |
143 | if (seq_size == 1)
144 | concat_local_context.forward(this, &padding, &local_gated_pooling._output);
145 | else
146 | concat_local_context.forward(this, &context_gated_pooling._output, &local_gated_pooling._output);
147 | sent_hidden.forward(this, &concat_local_context);
148 | output.forward(this, &sent_hidden);
149 | }
150 |
151 | };
152 |
153 | #endif /* SRC_ComputionGraph_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/model/Driver.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Driver.h
3 | *
4 | * Created on: Mar 18, 2015
5 | * Author: mszhang
6 | */
7 |
8 | #ifndef SRC_Driver_H_
9 | #define SRC_Driver_H_
10 |
11 | #include
12 | #include "ComputionGraph.h"
13 |
14 |
15 | //A native neural network classfier using only linear features
16 |
17 | class Driver{
18 | public:
19 | Driver(size_t memsize) : aligned_mem(memsize){
20 | _pcg = NULL;
21 | }
22 |
23 | ~Driver() {
24 | if (_pcg != NULL)
25 | delete _pcg;
26 | _pcg = NULL;
27 | }
28 |
29 | public:
30 | ComputionGraph *_pcg; // build neural graphs
31 | ModelParams _modelparams; // model parameters
32 | HyperParams _hyperparams;
33 |
34 | Metric _eval;
35 | CheckGrad _checkgrad;
36 | ModelUpdate _ada; // model update
37 | AlignedMemoryPool aligned_mem;
38 |
39 |
40 | public:
41 | inline void initial(int maxseq_size) {
42 | if (!_hyperparams.bValid()){
43 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl;
44 | return;
45 | }
46 | if (!_modelparams.initial(_hyperparams, &aligned_mem)){
47 | std::cout << "model parameter initialization Error, Please check!" << std::endl;
48 | return;
49 | }
50 | _modelparams.exportModelParams(_ada);
51 | _modelparams.exportCheckGradParams(_checkgrad);
52 |
53 | _hyperparams.print();
54 |
55 | _pcg = new ComputionGraph();
56 | _pcg->createNodes(ComputionGraph::max_sentence_length, maxseq_size);
57 | _pcg->initial(_modelparams, _hyperparams, &aligned_mem);
58 |
59 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps);
60 | }
61 |
62 |
63 | inline dtype train(const vector& examples, int iter) {
64 | _eval.reset();
65 |
66 | int example_num = examples.size();
67 | dtype cost = 0.0;
68 |
69 | for (int count = 0; count < example_num; count++) {
70 | const Example& example = examples[count];
71 |
72 | //forward
73 | _pcg->forward(example.m_densefeatures, true);
74 |
75 | //loss function
76 | int seq_size = example.m_densefeatures.size();
77 | int wordnum = example.m_densefeatures[seq_size - 1].words.size();
78 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num);
79 |
80 | // backward, which exists only for training
81 | _pcg->backward();
82 | }
83 |
84 | if (_eval.getAccuracy() < 0) {
85 | std::cout << "strange" << std::endl;
86 | }
87 |
88 | return cost;
89 | }
90 |
91 | inline void predict(const vector densefeatures, int& results) {
92 | _pcg->forward(densefeatures);
93 | _modelparams.loss.predict(&_pcg->output, results);
94 | }
95 |
96 | inline dtype cost(const Example& example){
97 | _pcg->forward(example.m_densefeatures); //forward here
98 |
99 | int seq_size = example.m_densefeatures.size();
100 |
101 | dtype cost = 0.0;
102 |
103 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1);
104 |
105 | return cost;
106 | }
107 |
108 | void checkgrad(const vector& examples, int iter){
109 | ostringstream out;
110 | out << "Iteration: " << iter;
111 | _checkgrad.check(this, examples, out.str());
112 | }
113 |
114 | void updateModel() {
115 | _ada.update();
116 | //_ada.update(5.0);
117 | }
118 |
119 | void writeModel();
120 |
121 | void loadModel();
122 |
123 |
124 |
125 | private:
126 | inline void resetEval() {
127 | _eval.reset();
128 | }
129 |
130 |
131 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){
132 | _ada._alpha = adaAlpha;
133 | _ada._eps = adaEps;
134 | _ada._reg = nnRegular;
135 | }
136 |
137 | };
138 |
139 | #endif /* SRC_Driver_H_ */
140 |
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/model/HyperParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_HyperParams_H_
2 | #define SRC_HyperParams_H_
3 |
4 | #include "N3L.h"
5 | #include "Example.h"
6 | #include "Options.h"
7 |
8 | using namespace nr;
9 | using namespace std;
10 |
11 | struct HyperParams{
12 |
13 | // must assign
14 | int wordcontext;
15 | int hiddensize;
16 | int rnnhiddensize;
17 | dtype dropOut;
18 |
19 | // must assign
20 | dtype nnRegular; // for optimization
21 | dtype adaAlpha; // for optimization
22 | dtype adaEps; // for optimization
23 |
24 | //auto generated
25 | int wordwindow;
26 | int wordDim;
27 | int inputsize;
28 | int labelSize;
29 |
30 | public:
31 | HyperParams(){
32 | bAssigned = false;
33 | }
34 |
35 | public:
36 | void setRequired(Options& opt){
37 | wordcontext = opt.wordcontext;
38 | hiddensize = opt.hiddenSize;
39 | rnnhiddensize = opt.rnnHiddenSize;
40 | dropOut = opt.dropProb;
41 |
42 | nnRegular = opt.regParameter;
43 | adaAlpha = opt.adaAlpha;
44 | adaEps = opt.adaEps;
45 |
46 | bAssigned = true;
47 | }
48 |
49 | void clear(){
50 | bAssigned = false;
51 | }
52 |
53 | bool bValid(){
54 | return bAssigned;
55 | }
56 |
57 |
58 | public:
59 |
60 | void print(){
61 |
62 | }
63 |
64 | private:
65 | bool bAssigned;
66 | };
67 |
68 |
69 | #endif /* SRC_HyperParams_H_ */
--------------------------------------------------------------------------------
/src/NNWordLocalContextSeparate/model/ModelParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ModelParams_H_
2 | #define SRC_ModelParams_H_
3 | #include "HyperParams.h"
4 |
5 | // Each model consists of two parts, building neural graph and defining output losses.
6 | class ModelParams{
7 |
8 | public:
9 | Alphabet wordAlpha; // should be initialized outside
10 | Alphabet featAlpha; //should be intialized outside
11 | Alphabet labelAlpha; // should be initialized outside
12 | public:
13 | LookupTable words; // should be initialized outside
14 | LookupTable history_words; // should be initialized outside
15 | LSTM1Params left_lstm_project; //left lstm
16 | LSTM1Params right_lstm_project; //right lstm
17 | GatedPoolParam local_gatedpool_project; //local gated pooling
18 | GatedPoolParam context_gatedpool_project; //context gated pooling
19 | UniParams sent_tanh_project; // sentence hidden
20 | UniParams olayer_linear; // output
21 | public:
22 | SoftMaxLoss loss;
23 |
24 |
25 | public:
26 | bool initial(HyperParams& opts, AlignedMemoryPool* mem){
27 |
28 | // some model parameters should be initialized outside
29 | if (words.nVSize <= 0 || history_words.nVSize <= 0 || labelAlpha.size() <= 0) {
30 | return false;
31 | }
32 | opts.wordDim = words.nDim;
33 | opts.wordwindow = 2 * opts.wordcontext + 1;
34 | opts.inputsize = opts.wordwindow * opts.wordDim;
35 | int senthiddensize = opts.rnnhiddensize * 2+ words.nDim;
36 |
37 | left_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem);
38 | right_lstm_project.initial(opts.rnnhiddensize, opts.inputsize, mem);
39 | local_gatedpool_project.initial(opts.rnnhiddensize * 2, opts.rnnhiddensize * 2, mem);
40 | context_gatedpool_project.initial(opts.wordDim, opts.wordDim, mem);
41 | sent_tanh_project.initial(opts.hiddensize, senthiddensize, mem);
42 | opts.labelSize = labelAlpha.size();
43 | olayer_linear.initial(opts.labelSize, opts.hiddensize, false, mem);
44 |
45 | return true;
46 | }
47 |
48 |
49 | void exportModelParams(ModelUpdate& ada){
50 | words.exportAdaParams(ada);
51 | history_words.exportAdaParams(ada);
52 | left_lstm_project.exportAdaParams(ada);
53 | right_lstm_project.exportAdaParams(ada);
54 | local_gatedpool_project.exportAdaParams(ada);
55 | context_gatedpool_project.exportAdaParams(ada);
56 | sent_tanh_project.exportAdaParams(ada);
57 | olayer_linear.exportAdaParams(ada);
58 | }
59 |
60 |
61 | void exportCheckGradParams(CheckGrad& checkgrad){
62 | checkgrad.add(&(sent_tanh_project.W), "sent_tanh_project.W");
63 | checkgrad.add(&(sent_tanh_project.b), "sent_tanh_project.b");
64 |
65 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.W), "context_gatedpool_project.W");
66 | checkgrad.add(&(context_gatedpool_project._uni_gate_param.b), "context_gatedpool_project.b");
67 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.W), "local_gatedpool_project.W");
68 | checkgrad.add(&(local_gatedpool_project._uni_gate_param.b), "local_gatedpool_project.b");
69 |
70 | checkgrad.add(&(right_lstm_project.cell.W1), "right_lstm_project.cell.W1");
71 |
72 | checkgrad.add(&(words.E), "_words.E");
73 | checkgrad.add(&(history_words.E), "_history_words.E");
74 |
75 | }
76 |
77 | // will add it later
78 | void saveModel(){
79 |
80 | }
81 |
82 | void loadModel(const string& inFile){
83 |
84 | }
85 |
86 | };
87 |
88 | #endif /* SRC_ModelParams_H_ */
--------------------------------------------------------------------------------
/src/SparseLocalContext/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | include_directories(
3 | model
4 | )
5 |
6 | add_executable(SparseLocalContext SparseLocalContext.cpp)
7 |
8 |
--------------------------------------------------------------------------------
/src/SparseLocalContext/SparseLocalContext.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.cpp
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #include "SparseLocalContext.h"
9 |
10 | #include "Argument_helper.h"
11 |
12 | Detector::Detector() {
13 | // TODO Auto-generated constructor stub
14 | srand(0);
15 | }
16 |
17 | Detector::~Detector() {
18 | // TODO Auto-generated destructor stub
19 | }
20 |
21 | int Detector::createAlphabet(const vector& vecInsts) {
22 | if (vecInsts.size() == 0){
23 | std::cout << "training set empty" << std::endl;
24 | return -1;
25 | }
26 | cout << "Creating Alphabet..." << endl;
27 |
28 | int numInstance;
29 |
30 | m_driver._modelparams.labelAlpha.clear();
31 | // label alphabet and word statistics
32 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
33 | const Instance *pInstance = &vecInsts[numInstance];
34 |
35 | const vector > &words = pInstance->words;
36 | const string &label = pInstance->label;
37 |
38 | int labelId = m_driver._modelparams.labelAlpha.from_string(label);
39 |
40 |
41 | if ((numInstance + 1) % m_options.verboseIter == 0) {
42 | cout << numInstance + 1 << " ";
43 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
44 | cout << std::endl;
45 | cout.flush();
46 | }
47 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
48 | break;
49 | }
50 |
51 | cout << numInstance << " " << endl;
52 | cout << "Label num: " << m_driver._modelparams.labelAlpha.size() << endl;
53 |
54 | m_driver._modelparams.labelAlpha.set_fixed_flag(true);
55 |
56 | if (m_options.linearfeatCat > 0){
57 | cout << "Extracting linear features..." << endl;
58 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++){
59 | const Instance *pInstance = &vecInsts[numInstance];
60 | vector linearfeat;
61 | extractLinearFeatures(linearfeat, pInstance);
62 | for (int i = 0; i < linearfeat.size(); i++)
63 | m_feat_stats[linearfeat[i]] ++;
64 | }
65 | m_feat_stats[unknownkey] = m_options.featCutOff + 1;
66 | cout << "Total feature num: " << m_feat_stats.size() << endl;
67 | m_driver._modelparams.featAlpha.initial(m_feat_stats, m_options.featCutOff);
68 | cout << "Remain feature num:" << m_driver._modelparams.featAlpha.size() << endl;
69 | m_driver._modelparams.featAlpha.set_fixed_flag(true);
70 | }
71 | return 0;
72 | }
73 |
74 |
75 |
76 |
77 | void Detector::extractLinearFeatures(vector& feat, const Instance* pInstance) {
78 | feat.clear();
79 |
80 | const vector >& words = pInstance->words;
81 | int seq_size = pInstance->seqsize();
82 | assert(seq_size < 3);
83 | //Current sent linear feature
84 | const vector& lastWords = words[seq_size - 1];
85 | int wordnumber = lastWords.size();
86 | string strfeat = "", curWord = "", preWord = "", pre2Word = "";
87 | for (int i = 0; i < wordnumber; i++){
88 | curWord = normalize_to_lowerwithdigit(lastWords[i]);
89 | strfeat = "F1U=" + curWord;
90 | feat.push_back(strfeat);
91 | preWord = i - 1 >= 0 ? lastWords[i - 1] : nullkey;
92 | strfeat = "F2B=" + preWord + seperateKey + curWord;
93 | feat.push_back(strfeat);
94 | pre2Word = i - 2 >= 0 ? lastWords[i - 2] : nullkey;
95 | strfeat = "F3T=" + pre2Word + seperateKey + preWord + seperateKey + curWord;
96 | feat.push_back(strfeat);
97 | }
98 |
99 | //History feature
100 | if (m_options.linearfeatCat > 1 && seq_size == 2){
101 | const vector& historyWords = words[seq_size - 2];
102 | wordnumber = historyWords.size();
103 | for (int i = 0; i < wordnumber; i++){
104 | strfeat = "F4U=" + historyWords[i];
105 | feat.push_back(strfeat);
106 | }
107 | }
108 | }
109 |
110 | void Detector::convert2Example(const Instance* pInstance, Example& exam) {
111 | exam.clear();
112 |
113 | const string &instlabel = pInstance->label;
114 | const Alphabet &labelAlpha = m_driver._modelparams.labelAlpha;
115 |
116 | int labelnum = labelAlpha.size();
117 | for (int i = 0; i < labelnum; i++){
118 | string str = labelAlpha.from_id(i);
119 | if (instlabel.compare(str) == 0)
120 | exam.m_labels.push_back(1.0);
121 | else
122 | exam.m_labels.push_back(0.0);
123 | }
124 |
125 | //linear feature
126 | if (m_options.linearfeatCat > 0)
127 | extractLinearFeatures(exam.m_linearfeatures, pInstance);
128 |
129 | }
130 |
131 | void Detector::initialExamples(const vector& vecInsts, vector& vecExams) {
132 | int numInstance;
133 | for (numInstance = 0; numInstance < vecInsts.size(); numInstance++) {
134 | const Instance *pInstance = &vecInsts[numInstance];
135 | Example curExam;
136 | convert2Example(pInstance, curExam);
137 | vecExams.push_back(curExam);
138 |
139 | if ((numInstance + 1) % m_options.verboseIter == 0) {
140 | cout << numInstance + 1 << " ";
141 | if ((numInstance + 1) % (40 * m_options.verboseIter) == 0)
142 | cout << std::endl;
143 | cout.flush();
144 | }
145 | if (m_options.maxInstance > 0 && numInstance == m_options.maxInstance)
146 | break;
147 | }
148 |
149 | cout << numInstance << " " << endl;
150 | }
151 |
152 | void Detector::train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile) {
153 | if (optionFile != "")
154 | m_options.load(optionFile);
155 | m_options.showOptions();
156 | vector trainInsts, devInsts, testInsts;
157 | static vector decodeInstResults;
158 | static Instance curDecodeInst;
159 | bool bCurIterBetter = false;
160 |
161 | m_pipe.readInstances(trainFile, trainInsts, m_options.maxInstance);
162 | if (devFile != "")
163 | m_pipe.readInstances(devFile, devInsts, m_options.maxInstance);
164 | if (testFile != "")
165 | m_pipe.readInstances(testFile, testInsts, m_options.maxInstance);
166 |
167 | std::cout << "Training example number: " << trainInsts.size() << std::endl;
168 | std::cout << "Dev example number: " << trainInsts.size() << std::endl;
169 | std::cout << "Test example number: " << trainInsts.size() << std::endl;
170 |
171 | createAlphabet(trainInsts);
172 | vector trainExamples, devExamples, testExamples;
173 |
174 | std::cout << "Instance convert to example... " << std::endl;
175 | initialExamples(trainInsts, trainExamples);
176 | initialExamples(devInsts, devExamples);
177 | initialExamples(testInsts, testExamples);
178 |
179 | m_driver._hyperparams.setRequired(m_options);
180 | m_driver.initial();
181 |
182 | dtype bestDIS = 0;
183 |
184 | int inputSize = trainExamples.size();
185 |
186 | int batchBlock = inputSize / m_options.batchSize;
187 | if (inputSize % m_options.batchSize != 0)
188 | batchBlock++;
189 |
190 | srand(0);
191 | std::vector indexes;
192 | for (int i = 0; i < inputSize; ++i)
193 | indexes.push_back(i);
194 |
195 | static Metric eval, metric_dev, metric_test;
196 | static vector subExamples;
197 | int devNum = devExamples.size(), testNum = testExamples.size();
198 | for (int iter = 0; iter < m_options.maxIter; ++iter) {
199 | std::cout << "##### Iteration " << iter << std::endl;
200 |
201 | random_shuffle(indexes.begin(), indexes.end());
202 | eval.reset();
203 | for (int updateIter = 0; updateIter < batchBlock; updateIter++) {
204 | subExamples.clear();
205 | int start_pos = updateIter * m_options.batchSize;
206 | int end_pos = (updateIter + 1) * m_options.batchSize;
207 | if (end_pos > inputSize)
208 | end_pos = inputSize;
209 |
210 | for (int idy = start_pos; idy < end_pos; idy++) {
211 | subExamples.push_back(trainExamples[indexes[idy]]);
212 | }
213 |
214 | int curUpdateIter = iter * batchBlock + updateIter;
215 | dtype cost = m_driver.train(subExamples, curUpdateIter);
216 |
217 | eval.overall_label_count += m_driver._eval.overall_label_count;
218 | eval.correct_label_count += m_driver._eval.correct_label_count;
219 |
220 | if ((curUpdateIter + 1) % m_options.verboseIter == 0) {
221 | //m_driver.checkgrad(subExamples, curUpdateIter + 1);
222 | std::cout << "current: " << updateIter + 1 << ", total block: " << batchBlock << std::endl;
223 | std::cout << "Cost = " << cost << ", Tag Correct(%) = " << eval.getAccuracy() << std::endl;
224 | }
225 | m_driver.updateModel();
226 |
227 | }
228 |
229 | if (devNum > 0) {
230 | bCurIterBetter = false;
231 | if (!m_options.outBest.empty())
232 | decodeInstResults.clear();
233 | metric_dev.reset();
234 | for (int idx = 0; idx < devExamples.size(); idx++) {
235 | string result_label;
236 | predict(devExamples[idx].m_linearfeatures, result_label);
237 |
238 | devInsts[idx].Evaluate(result_label, metric_dev);
239 |
240 | if (!m_options.outBest.empty()) {
241 | curDecodeInst.copyValuesFrom(devInsts[idx]);
242 | curDecodeInst.assignLabel(result_label);
243 | decodeInstResults.push_back(curDecodeInst);
244 | }
245 | }
246 |
247 | std::cout << "dev:" << std::endl;
248 | metric_dev.print();
249 |
250 | if (!m_options.outBest.empty() && metric_dev.getAccuracy() > bestDIS) {
251 | m_pipe.outputAllInstances(devFile + m_options.outBest, decodeInstResults);
252 | bCurIterBetter = true;
253 | }
254 |
255 | if (testNum > 0) {
256 | if (!m_options.outBest.empty())
257 | decodeInstResults.clear();
258 | metric_test.reset();
259 | for (int idx = 0; idx < testExamples.size(); idx++) {
260 | string result_label;
261 | predict(testExamples[idx].m_linearfeatures, result_label);
262 |
263 | testInsts[idx].Evaluate(result_label, metric_test);
264 |
265 | if (bCurIterBetter && !m_options.outBest.empty()) {
266 | curDecodeInst.copyValuesFrom(testInsts[idx]);
267 | curDecodeInst.assignLabel(result_label);
268 | decodeInstResults.push_back(curDecodeInst);
269 | }
270 | }
271 | std::cout << "test:" << std::endl;
272 | metric_test.print();
273 |
274 | if (!m_options.outBest.empty() && bCurIterBetter) {
275 | m_pipe.outputAllInstances(testFile + m_options.outBest, decodeInstResults);
276 | }
277 | }
278 |
279 |
280 |
281 | if (m_options.saveIntermediate && metric_dev.getAccuracy() > bestDIS) {
282 | std::cout << "Exceeds best previous performance of " << bestDIS << ". Saving model file.." << std::endl;
283 | bestDIS = metric_dev.getAccuracy();
284 | writeModelFile(modelFile);
285 | }
286 |
287 | }
288 | // Clear gradients
289 | }
290 | }
291 |
292 | int Detector::predict(const vector& features, string& output) {
293 | int labelIdx;
294 | m_driver.predict(features, labelIdx);
295 | output = m_driver._modelparams.labelAlpha.from_id(labelIdx, nullkey);
296 |
297 | if (output == nullkey)
298 | std::cout << "predict error" << std::endl;
299 | return 0;
300 | }
301 |
302 | void Detector::test(const string& testFile, const string& outputFile, const string& modelFile) {
303 | loadModelFile(modelFile);
304 | vector testInsts;
305 | m_pipe.readInstances(testFile, testInsts);
306 |
307 | vector testExamples;
308 | initialExamples(testInsts, testExamples);
309 |
310 | int testNum = testExamples.size();
311 | vector testInstResults;
312 | Metric metric_test;
313 | metric_test.reset();
314 | for (int idx = 0; idx < testExamples.size(); idx++) {
315 | string result_label;
316 | predict(testExamples[idx].m_linearfeatures, result_label);
317 | testInsts[idx].Evaluate(result_label, metric_test);
318 | Instance curResultInst;
319 | curResultInst.copyValuesFrom(testInsts[idx]);
320 | curResultInst.assignLabel(result_label);
321 | testInstResults.push_back(curResultInst);
322 | }
323 | std::cout << "test:" << std::endl;
324 | metric_test.print();
325 |
326 | m_pipe.outputAllInstances(outputFile, testInstResults);
327 |
328 | }
329 |
330 |
331 | void Detector::loadModelFile(const string& inputModelFile) {
332 |
333 | }
334 |
335 | void Detector::writeModelFile(const string& outputModelFile) {
336 |
337 | }
338 |
339 | int main(int argc, char* argv[]) {
340 |
341 | std::string trainFile = "", devFile = "", testFile = "", modelFile = "", optionFile = "";
342 | std::string outputFile = "";
343 | bool bTrain = false;
344 | dsr::Argument_helper ah;
345 |
346 | ah.new_flag("l", "learn", "train or test", bTrain);
347 | ah.new_named_string("train", "trainCorpus", "named_string", "training corpus to train a model, must when training", trainFile);
348 | ah.new_named_string("dev", "devCorpus", "named_string", "development corpus to train a model, optional when training", devFile);
349 | ah.new_named_string("test", "testCorpus", "named_string",
350 | "testing corpus to train a model or input file to test a model, optional when training and must when testing", testFile);
351 | ah.new_named_string("model", "modelFile", "named_string", "model file, must when training and testing", modelFile);
352 | ah.new_named_string("option", "optionFile", "named_string", "option file to train a model, optional when training", optionFile);
353 | ah.new_named_string("output", "outputFile", "named_string", "output file to test, must when testing", outputFile);
354 |
355 | ah.process(argc, argv);
356 |
357 | Detector detector;
358 | detector.m_pipe.max_sentense_size = ComputionGraph::max_sentence_length;
359 | if (bTrain) {
360 | detector.train(trainFile, devFile, testFile, modelFile, optionFile);
361 | }
362 | else {
363 | detector.test(testFile, outputFile, modelFile);
364 | }
365 |
366 | //test(argv);
367 | //ah.write_values(std::cout);
368 | }
369 |
--------------------------------------------------------------------------------
/src/SparseLocalContext/SparseLocalContext.h:
--------------------------------------------------------------------------------
1 | /*
2 | * SparseDetector.h
3 | *
4 | * Created on: Oct 23, 2016
5 | * Author: DaPan
6 | */
7 |
8 | #ifndef SRC_SparseDetector_H_
9 | #define SRC_SparseDetector_H_
10 |
11 |
12 | #include "N3L.h"
13 | #include "Driver.h"
14 | #include "Options.h"
15 | #include "Instance.h"
16 | #include "Example.h"
17 | #include "Pipe.h"
18 | #include "Utf.h"
19 |
20 | using namespace nr;
21 | using namespace std;
22 |
23 | class Detector {
24 |
25 |
26 | public:
27 | unordered_map m_feat_stats;
28 |
29 | public:
30 | Options m_options;
31 |
32 | Pipe m_pipe;
33 |
34 | Driver m_driver;
35 |
36 |
37 | public:
38 | Detector();
39 | virtual ~Detector();
40 |
41 | public:
42 |
43 | int createAlphabet(const vector& vecTrainInsts);
44 |
45 | void extractLinearFeatures(vector& features, const Instance* pInstance);
46 |
47 | void convert2Example(const Instance* pInstance, Example& exam);
48 | void initialExamples(const vector& vecInsts, vector& vecExams);
49 |
50 | public:
51 | void train(const string& trainFile, const string& devFile, const string& testFile, const string& modelFile, const string& optionFile);
52 | int predict(const vector& features, string& outputs);
53 | void test(const string& testFile, const string& outputFile, const string& modelFile);
54 |
55 | void writeModelFile(const string& outputModelFile);
56 | void loadModelFile(const string& inputModelFile);
57 |
58 | };
59 |
60 | #endif /* SRC_SparseDetector_H_ */
61 |
--------------------------------------------------------------------------------
/src/SparseLocalContext/model/ComputionGraph.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ComputionGraph_H_
2 | #define SRC_ComputionGraph_H_
3 |
4 | #include "ModelParams.h"
5 |
6 |
7 | // Each model consists of two parts, building neural graph and defining output losses.
8 | struct ComputionGraph : Graph{
9 | public:
10 | const static int max_sentence_length = 256;
11 |
12 | public:
13 | // node instances
14 | SparseNode output;
15 | public:
16 | ComputionGraph() : Graph(){
17 | }
18 |
19 | ~ComputionGraph(){
20 | clear();
21 | }
22 |
23 | public:
24 | //allocate enough nodes
25 | inline void createNodes(int sent_length, int typeNum){
26 |
27 | }
28 |
29 | inline void clear(){
30 | Graph::clear();
31 | }
32 |
33 | public:
34 | inline void initial(ModelParams& model, HyperParams& opts){
35 | output.setParam(&model.sparselayer);
36 | output.init(opts.labelSize,-1);
37 | }
38 |
39 |
40 | public:
41 | // some nodes may behave different during training and decode, for example, dropout
42 | inline void forward(const vector& features, bool bTrain = false){
43 | //first step: clear value
44 | clearValue(bTrain); // compute is a must step for train, predict and cost computation
45 |
46 |
47 | // second step: build graph
48 | //forward
49 | output.forward(this, features);
50 | }
51 |
52 | };
53 |
54 | #endif /* SRC_ComputionGraph_H_ */
--------------------------------------------------------------------------------
/src/SparseLocalContext/model/Driver.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Driver.h
3 | *
4 | * Created on: Mar 18, 2015
5 | * Author: mszhang
6 | */
7 |
8 | #ifndef SRC_Driver_H_
9 | #define SRC_Driver_H_
10 |
11 | #include
12 | #include "ComputionGraph.h"
13 |
14 |
15 | //A native neural network classfier using only linear features
16 |
17 | class Driver{
18 | public:
19 | Driver() {
20 | _pcg = NULL;
21 | }
22 |
23 | ~Driver() {
24 | if (_pcg != NULL)
25 | delete _pcg;
26 | _pcg = NULL;
27 | }
28 |
29 | public:
30 | ComputionGraph *_pcg; // build neural graphs
31 | ModelParams _modelparams; // model parameters
32 | HyperParams _hyperparams;
33 |
34 | Metric _eval;
35 | CheckGrad _checkgrad;
36 | ModelUpdate _ada; // model update
37 |
38 |
39 | public:
40 | inline void initial() {
41 | if (!_hyperparams.bValid()){
42 | std::cout << "hyper parameter initialization Error, Please check!" << std::endl;
43 | return;
44 | }
45 | if (!_modelparams.initial(_hyperparams)){
46 | std::cout << "model parameter initialization Error, Please check!" << std::endl;
47 | return;
48 | }
49 | _modelparams.exportModelParams(_ada);
50 | _modelparams.exportCheckGradParams(_checkgrad);
51 |
52 | _hyperparams.print();
53 |
54 | _pcg = new ComputionGraph();
55 | _pcg->initial(_modelparams, _hyperparams);
56 |
57 | setUpdateParameters(_hyperparams.nnRegular, _hyperparams.adaAlpha, _hyperparams.adaEps);
58 | }
59 |
60 |
61 | inline dtype train(const vector& examples, int iter) {
62 | _eval.reset();
63 |
64 | int example_num = examples.size();
65 | dtype cost = 0.0;
66 |
67 | for (int count = 0; count < example_num; count++) {
68 | const Example& example = examples[count];
69 |
70 | //forward
71 | _pcg->forward(example.m_linearfeatures, true);
72 |
73 | //loss function
74 | cost += _modelparams.loss.loss(&_pcg->output, example.m_labels, _eval, example_num);
75 |
76 | // backward, which exists only for training
77 | _pcg->backward();
78 | }
79 |
80 | if (_eval.getAccuracy() < 0) {
81 | std::cout << "strange" << std::endl;
82 | }
83 |
84 | return cost;
85 | }
86 |
87 | inline void predict(const vector& features, int& results) {
88 | _pcg->forward(features);
89 | _modelparams.loss.predict(&_pcg->output, results);
90 | }
91 |
92 | inline dtype cost(const Example example) {
93 | _pcg->forward(example.m_linearfeatures, true);
94 |
95 | dtype cost = 0.0;
96 |
97 | cost += _modelparams.loss.cost(&_pcg->output, example.m_labels, 1);
98 |
99 | return cost;
100 | }
101 |
102 | void checkgrad(const vector& examples, int iter){
103 | ostringstream out;
104 | out << "Iteration: " << iter;
105 | _checkgrad.check(this, examples, out.str());
106 | }
107 |
108 |
109 | void updateModel() {
110 | _ada.update();
111 | //_ada.update(5.0);
112 | }
113 |
114 | void writeModel();
115 |
116 | void loadModel();
117 |
118 |
119 |
120 | private:
121 | inline void resetEval() {
122 | _eval.reset();
123 | }
124 |
125 |
126 | inline void setUpdateParameters(dtype nnRegular, dtype adaAlpha, dtype adaEps){
127 | _ada._alpha = adaAlpha;
128 | _ada._eps = adaEps;
129 | _ada._reg = nnRegular;
130 | }
131 |
132 | };
133 |
134 | #endif /* SRC_Driver_H_ */
135 |
--------------------------------------------------------------------------------
/src/SparseLocalContext/model/HyperParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_HyperParams_H_
2 | #define SRC_HyperParams_H_
3 |
4 | #include "N3L.h"
5 | #include "Example.h"
6 | #include "Options.h"
7 |
8 | using namespace nr;
9 | using namespace std;
10 |
11 | struct HyperParams{
12 |
13 | // must assign
14 | dtype nnRegular; // for optimization
15 | dtype adaAlpha; // for optimization
16 | dtype adaEps; // for optimization
17 |
18 | //auto generated
19 | int labelSize;
20 |
21 | public:
22 | HyperParams(){
23 | bAssigned = false;
24 | }
25 |
26 | public:
27 | void setRequired(Options& opt){
28 | nnRegular = opt.regParameter;
29 | adaAlpha = opt.adaAlpha;
30 | adaEps = opt.adaEps;
31 |
32 | bAssigned = true;
33 | }
34 |
35 | void clear(){
36 | bAssigned = false;
37 | }
38 |
39 | bool bValid(){
40 | return bAssigned;
41 | }
42 |
43 |
44 | public:
45 |
46 | void print(){
47 |
48 | }
49 |
50 | private:
51 | bool bAssigned;
52 | };
53 |
54 |
55 | #endif /* SRC_HyperParams_H_ */
--------------------------------------------------------------------------------
/src/SparseLocalContext/model/ModelParams.h:
--------------------------------------------------------------------------------
1 | #ifndef SRC_ModelParams_H_
2 | #define SRC_ModelParams_H_
3 | #include "HyperParams.h"
4 |
5 | // Each model consists of two parts, building neural graph and defining output losses.
6 | class ModelParams{
7 |
8 | public:
9 | Alphabet featAlpha; //should be intialized outside
10 | Alphabet labelAlpha; // should be initialized outside
11 | public:
12 | SparseParams sparselayer;
13 | SoftMaxLoss loss;
14 |
15 |
16 | public:
17 | bool initial(HyperParams& opts){
18 |
19 | // some model parameters should be initialized outside
20 |
21 | opts.labelSize = labelAlpha.size();
22 | sparselayer.initial(&featAlpha, opts.labelSize);
23 |
24 | return true;
25 | }
26 |
27 |
28 | void exportModelParams(ModelUpdate& ada){
29 | sparselayer.exportAdaParams(ada);
30 | }
31 |
32 |
33 | void exportCheckGradParams(CheckGrad& checkgrad){
34 | checkgrad.add(&(sparselayer.W), "sparse.w");
35 | }
36 |
37 | // will add it later
38 | void saveModel(){
39 |
40 | }
41 |
42 | void loadModel(const string& inFile){
43 |
44 | }
45 |
46 | };
47 |
48 | #endif /* SRC_ModelParams_H_ */
--------------------------------------------------------------------------------
/src/basic/Example.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Example.h
3 | *
4 | * Created on: Mar 17, 2015
5 | * Author: mszhang
6 | */
7 |
8 | #ifndef SRC_EXAMPLE_H_
9 | #define SRC_EXAMPLE_H_
10 |
11 | #include "MyLib.h"
12 |
13 | using namespace std;
14 | struct Feature {
15 | public:
16 | vector words;
17 | public:
18 | Feature() {
19 | }
20 |
21 | virtual ~Feature() {
22 |
23 | }
24 |
25 | void clear() {
26 | words.clear();
27 | }
28 | };
29 |
30 | class Example {
31 |
32 | public:
33 | vector m_labels;
34 | vector m_densefeatures;
35 | vector m_linearfeatures;
36 | public:
37 | Example(){
38 |
39 | }
40 | virtual ~Example(){
41 |
42 | }
43 |
44 | void clear(){
45 | m_labels.clear();
46 | m_densefeatures.clear();
47 | }
48 |
49 |
50 | };
51 |
52 |
53 | #endif /* SRC_EXAMPLE_H_ */
54 |
--------------------------------------------------------------------------------
/src/basic/Instance.h:
--------------------------------------------------------------------------------
1 | #ifndef _JST_INSTANCE_
2 | #define _JST_INSTANCE_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "N3L.h"
9 | #include "Metric.h"
10 |
11 | using namespace std;
12 |
13 | class Instance {
14 | public:
15 | Instance() {
16 | }
17 | ~Instance() {
18 | }
19 |
20 | int seqsize() const {
21 | return words.size();
22 | }
23 |
24 |
25 | int wordnum() const{
26 | return words[seqsize() - 1].size();
27 | }
28 |
29 | void clear() {
30 | label = "";
31 | for (int i = 0; i < seqsize(); i++) {
32 | words[i].clear();
33 | }
34 | words.clear();
35 | confidence = -1.0;
36 | }
37 |
38 | void allocate(int seq_size) {
39 | clear();
40 | label = "";
41 | words.resize(seq_size);
42 | confidence = -1.0;
43 | }
44 |
45 | void copyValuesFrom(const Instance& anInstance) {
46 | allocate(anInstance.seqsize());
47 | for (int i = 0; i < anInstance.seqsize(); i++) {
48 | for (int j = 0; j < anInstance.words[i].size(); j++)
49 | words[i].push_back(anInstance.words[i][j]);
50 | }
51 | label = anInstance.label;
52 | }
53 |
54 | void assignLabel(const string& resulted_label) {
55 | label = resulted_label;
56 | }
57 |
58 | void assignLabel(const string& resulted_label, dtype resulted_confidence){
59 | label = resulted_label;
60 | confidence = resulted_confidence;
61 | }
62 |
63 | void Evaluate(const string& resulted_label, Metric& eval) const {
64 | if (resulted_label.compare(label) == 0)
65 | eval.correct_label_count++;
66 | eval.overall_label_count++;
67 |
68 | }
69 |
70 |
71 | public:
72 | string label;
73 | vector > words;
74 | dtype confidence;
75 | };
76 |
77 | #endif
78 |
79 |
--------------------------------------------------------------------------------
/src/basic/InstanceReader.h:
--------------------------------------------------------------------------------
1 | #ifndef _CONLL_READER_
2 | #define _CONLL_READER_
3 |
4 | #include "Reader.h"
5 | #include "N3L.h"
6 | #include
7 |
8 | using namespace std;
9 |
10 | class InstanceReader : public Reader {
11 | public:
12 | InstanceReader() {
13 | }
14 | ~InstanceReader() {
15 | }
16 |
17 | Instance *getNext() {
18 | m_instance.clear();
19 | vector vecLine;
20 | while (1) {
21 | string strLine;
22 | if (!my_getline(m_inf, strLine)) {
23 | break;
24 | }
25 | if (strLine.empty())
26 | break;
27 | vecLine.push_back(strLine);
28 | }
29 |
30 | int seq_size = vecLine.size();
31 |
32 | if (seq_size == 1) {
33 | m_instance.allocate(1);
34 | vector vecInfo;
35 | split_bychar(vecLine[0], vecInfo, ' ');
36 | int veclength = vecInfo.size();
37 | m_instance.label = vecInfo[0];
38 | for (int j = 1; j < veclength; j++)
39 | m_instance.words[0].push_back(vecInfo[j]);
40 | }
41 | else {
42 | m_instance.allocate(2);
43 | for (int i = 0; i < seq_size; ++i) {
44 | vector vecInfo;
45 | split_bychar(vecLine[i], vecInfo, ' ');
46 | int veclength = vecInfo.size();
47 | if (i == seq_size - 1) {
48 | m_instance.label = vecInfo[0];
49 | for (int j = 1; j < veclength; j++)
50 | m_instance.words[1].push_back(vecInfo[j]);
51 | }
52 | else {
53 | for (int j = 1; j < veclength; j++)
54 | m_instance.words[0].push_back(vecInfo[j]);
55 | }
56 |
57 | }
58 | }
59 |
60 | return &m_instance;
61 | }
62 | };
63 |
64 | #endif
65 |
66 |
--------------------------------------------------------------------------------
/src/basic/InstanceWriter.h:
--------------------------------------------------------------------------------
1 | #ifndef _CONLL_WRITER_
2 | #define _CONLL_WRITER_
3 |
4 | #include "Writer.h"
5 | #include
6 |
7 | using namespace std;
8 |
9 | class InstanceWriter : public Writer
10 | {
11 | public:
12 | InstanceWriter(){}
13 | ~InstanceWriter(){}
14 | int write(const Instance *pInstance)
15 | {
16 | if (!m_outf.is_open()) return -1;
17 |
18 | const vector > &words = pInstance->words;
19 | int seq_size = words.size();
20 | for (int i = 0; i < seq_size; i++){
21 | const string &label = pInstance->label;
22 | if (i < seq_size - 1)
23 | m_outf << "history " << endl;
24 | else if (pInstance->confidence < 0.0)
25 | m_outf << pInstance->label << endl;
26 | else
27 | m_outf << pInstance->label << " " << pInstance->confidence << endl;
28 |
29 | }
30 | m_outf << endl;
31 | return 0;
32 |
33 | }
34 | };
35 |
36 |
37 | #endif
38 |
39 |
--------------------------------------------------------------------------------
/src/basic/Options.h:
--------------------------------------------------------------------------------
1 | #ifndef _PARSER_OPTIONS_
2 | #define _PARSER_OPTIONS_
3 |
4 | #pragma once
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include "N3L.h"
11 |
12 | using namespace std;
13 |
14 | class Options {
15 | public:
16 |
17 | int wordCutOff;
18 | int featCutOff;
19 | int charCutOff;
20 | dtype initRange;
21 | int maxIter;
22 | int batchSize;
23 | dtype adaEps;
24 | dtype adaAlpha;
25 | dtype regParameter;
26 | dtype dropProb;
27 |
28 | int segHiddenSize;
29 | int hiddenSize;
30 | int rnnHiddenSize;
31 | int wordEmbSize;
32 | int wordcontext;
33 | bool wordEmbFineTune;
34 |
35 | int charEmbSize;
36 | int charcontext;
37 | bool charEmbFineTune;
38 | int charhiddenSize;
39 |
40 | int typeEmbSize;
41 | bool typeEmbFineTune;
42 |
43 | int maxsegLen;
44 |
45 | int verboseIter;
46 | bool saveIntermediate;
47 | bool train;
48 | int maxInstance;
49 | vector testFiles;
50 | string outBest;
51 | bool seg;
52 | int relu;
53 | int atomLayers;
54 | int rnnLayers;
55 |
56 | //embedding files
57 | string wordFile;
58 | string charFile;
59 | string segFile;
60 | vector typeFiles;
61 |
62 | //linear feature: 0:do not use, 1:current sent, 2:current+history sent
63 | int linearfeatCat;
64 |
65 | Options() {
66 | wordCutOff = 0;
67 | featCutOff = 0;
68 | charCutOff = 0;
69 | initRange = 0.01;
70 | maxIter = 1000;
71 | batchSize = 1;
72 | adaEps = 1e-6;
73 | adaAlpha = 0.01;
74 | regParameter = 1e-8;
75 | dropProb = 0.0;
76 |
77 | segHiddenSize = 100;
78 | hiddenSize = 100;
79 | rnnHiddenSize = 100;
80 | wordEmbSize = 50;
81 | wordcontext = 2;
82 | wordEmbFineTune = true;
83 | charEmbSize = 50;
84 | charcontext = 2;
85 | charEmbFineTune = true;
86 | charhiddenSize = 50;
87 |
88 | typeEmbSize = 50;
89 | typeEmbFineTune = true;
90 |
91 | verboseIter = 100;
92 | saveIntermediate = true;
93 | train = false;
94 | maxInstance = -1;
95 | testFiles.clear();
96 | outBest = "";
97 | relu = 0;
98 | seg = false;
99 | atomLayers = 1;
100 | rnnLayers = 1;
101 | maxsegLen = 5;
102 |
103 | wordFile = "";
104 | charFile = "";
105 | segFile = "";
106 | typeFiles.clear();
107 |
108 | linearfeatCat = 0;
109 | }
110 |
111 | virtual ~Options() {
112 |
113 | }
114 |
115 | void setOptions(const vector &vecOption) {
116 | int i = 0;
117 | for (; i < vecOption.size(); ++i) {
118 | pair pr;
119 | string2pair(vecOption[i], pr, '=');
120 | if (pr.first == "wordCutOff")
121 | wordCutOff = atoi(pr.second.c_str());
122 | if (pr.first == "featCutOff")
123 | featCutOff = atoi(pr.second.c_str());
124 | if (pr.first == "charCutOff")
125 | charCutOff = atoi(pr.second.c_str());
126 | if (pr.first == "initRange")
127 | initRange = atof(pr.second.c_str());
128 | if (pr.first == "maxIter")
129 | maxIter = atoi(pr.second.c_str());
130 | if (pr.first == "batchSize")
131 | batchSize = atoi(pr.second.c_str());
132 | if (pr.first == "adaEps")
133 | adaEps = atof(pr.second.c_str());
134 | if (pr.first == "adaAlpha")
135 | adaAlpha = atof(pr.second.c_str());
136 | if (pr.first == "regParameter")
137 | regParameter = atof(pr.second.c_str());
138 | if (pr.first == "dropProb")
139 | dropProb = atof(pr.second.c_str());
140 |
141 | if (pr.first == "segHiddenSize")
142 | segHiddenSize = atoi(pr.second.c_str());
143 | if (pr.first == "hiddenSize")
144 | hiddenSize = atoi(pr.second.c_str());
145 | if (pr.first == "rnnHiddenSize")
146 | rnnHiddenSize = atoi(pr.second.c_str());
147 | if (pr.first == "wordcontext")
148 | wordcontext = atoi(pr.second.c_str());
149 | if (pr.first == "wordEmbSize")
150 | wordEmbSize = atoi(pr.second.c_str());
151 | if (pr.first == "wordEmbFineTune")
152 | wordEmbFineTune = (pr.second == "true") ? true : false;
153 | if (pr.first == "charcontext")
154 | charcontext = atoi(pr.second.c_str());
155 | if (pr.first == "charEmbSize")
156 | charEmbSize = atoi(pr.second.c_str());
157 | if (pr.first == "charEmbFineTune")
158 | charEmbFineTune = (pr.second == "true") ? true : false;
159 | if (pr.first == "charhiddenSize")
160 | charhiddenSize = atoi(pr.second.c_str());
161 | if (pr.first == "typeEmbSize")
162 | typeEmbSize = atoi(pr.second.c_str());
163 | if (pr.first == "typeEmbFineTune")
164 | typeEmbFineTune = (pr.second == "true") ? true : false;
165 |
166 | if (pr.first == "verboseIter")
167 | verboseIter = atoi(pr.second.c_str());
168 | if (pr.first == "train")
169 | train = (pr.second == "true") ? true : false;
170 | if (pr.first == "saveIntermediate")
171 | saveIntermediate = (pr.second == "true") ? true : false;
172 | if (pr.first == "maxInstance")
173 | maxInstance = atoi(pr.second.c_str());
174 | if (pr.first == "testFile")
175 | testFiles.push_back(pr.second);
176 | if (pr.first == "outBest")
177 | outBest = pr.second;
178 | if (pr.first == "relu")
179 | relu = atoi(pr.second.c_str());
180 | if (pr.first == "seg")
181 | seg = (pr.second == "true") ? true : false;
182 | if (pr.first == "atomLayers")
183 | atomLayers = atoi(pr.second.c_str());
184 | if (pr.first == "rnnLayers")
185 | rnnLayers = atoi(pr.second.c_str());
186 | if (pr.first == "maxsegLen")
187 | maxsegLen = atoi(pr.second.c_str());
188 |
189 | if (pr.first == "wordFile")
190 | wordFile = pr.second;
191 | if (pr.first == "segFile")
192 | segFile = pr.second;
193 | if (pr.first == "charFile")
194 | charFile = pr.second;
195 | if (pr.first == "typeFile")
196 | typeFiles.push_back(pr.second);
197 |
198 | if (pr.first == "linearfeatCat")
199 | linearfeatCat = atoi(pr.second.c_str());
200 | }
201 | }
202 |
203 | void showOptions() {
204 | std::cout << "wordCutOff = " << wordCutOff << std::endl;
205 | std::cout << "featCutOff = " << featCutOff << std::endl;
206 | std::cout << "charCutOff = " << charCutOff << std::endl;
207 | std::cout << "initRange = " << initRange << std::endl;
208 | std::cout << "maxIter = " << maxIter << std::endl;
209 | std::cout << "batchSize = " << batchSize << std::endl;
210 | std::cout << "adaEps = " << adaEps << std::endl;
211 | std::cout << "adaAlpha = " << adaAlpha << std::endl;
212 | std::cout << "regParameter = " << regParameter << std::endl;
213 | std::cout << "dropProb = " << dropProb << std::endl;
214 |
215 | std::cout << "segHiddenSize = " << segHiddenSize << std::endl;
216 | std::cout << "hiddenSize = " << hiddenSize << std::endl;
217 | std::cout << "rnnHiddenSize = " << rnnHiddenSize << std::endl;
218 | std::cout << "wordEmbSize = " << wordEmbSize << std::endl;
219 | std::cout << "wordcontext = " << wordcontext << std::endl;
220 | std::cout << "wordEmbFineTune = " << wordEmbFineTune << std::endl;
221 | std::cout << "charEmbSize = " << charEmbSize << std::endl;
222 | std::cout << "charcontext = " << charcontext << std::endl;
223 | std::cout << "charEmbFineTune = " << charEmbFineTune << std::endl;
224 | std::cout << "charhiddenSize = " << charhiddenSize << std::endl;
225 | std::cout << "typeEmbSize = " << typeEmbSize << std::endl;
226 | std::cout << "typeEmbFineTune = " << typeEmbFineTune << std::endl;
227 |
228 | std::cout << "verboseIter = " << verboseIter << std::endl;
229 | std::cout << "saveItermediate = " << saveIntermediate << std::endl;
230 | std::cout << "train = " << train << std::endl;
231 | std::cout << "maxInstance = " << maxInstance << std::endl;
232 | for (int idx = 0; idx < testFiles.size(); idx++) {
233 | std::cout << "testFile = " << testFiles[idx] << std::endl;
234 | }
235 | std::cout << "outBest = " << outBest << std::endl;
236 | std::cout << "relu = " << relu << std::endl;
237 | std::cout << "seg = " << seg << std::endl;
238 | std::cout << "atomLayers = " << atomLayers << std::endl;
239 | std::cout << "rnnLayers = " << rnnLayers << std::endl;
240 | std::cout << "maxsegLen = " << maxsegLen << std::endl;
241 |
242 | std::cout << "wordFile = " << wordFile << std::endl;
243 | std::cout << "charFile = " << charFile << std::endl;
244 | std::cout << "segFile = " << segFile << std::endl;
245 | for (int idx = 0; idx < typeFiles.size(); idx++) {
246 | std::cout << "typeFile = " << typeFiles[idx] << std::endl;
247 | }
248 |
249 | std::cout << "linearfeatCat = " << linearfeatCat << std::endl;
250 | }
251 |
252 | void load(const std::string& infile) {
253 | ifstream inf;
254 | inf.open(infile.c_str());
255 | vector vecLine;
256 | while (1) {
257 | string strLine;
258 | if (!my_getline(inf, strLine)) {
259 | break;
260 | }
261 | if (strLine.empty())
262 | continue;
263 | vecLine.push_back(strLine);
264 | }
265 | inf.close();
266 | setOptions(vecLine);
267 | }
268 | };
269 |
270 | #endif
271 |
272 |
--------------------------------------------------------------------------------
/src/basic/Pipe.h:
--------------------------------------------------------------------------------
1 | #ifndef _JST_PIPE_
2 | #define _JST_PIPE_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include