├── .gitattributes
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── README.md
├── UML
    └── slnn_visio.vsdx
├── cmake
    └── FindEigen3.cmake
├── run_flow_example
    ├── README.md
    ├── buildflow.sh
    ├── runflow.sh
    └── sampledata
    │   ├── cws
    │       ├── PKU_input.50.cws
    │       └── PKU_train.50.cws
    │   ├── ner
    │       ├── PKU_input.50.ner
    │       └── PKU_train.50.ner
    │   └── postag
    │       ├── PTB_input.50.pos
    │       └── PTB_train.50.pos
└── src
    ├── CMakeLists.txt
    ├── modelmodule
        ├── CMakeLists.txt
        ├── context_feature.cpp
        ├── context_feature.h
        ├── context_feature_layer.cpp
        ├── context_feature_layer.h
        ├── hyper_input_layers.cpp
        ├── hyper_input_layers.h
        ├── hyper_layers.cpp
        ├── hyper_layers.h
        ├── hyper_output_layers.cpp
        ├── hyper_output_layers.h
        ├── layers.cpp
        └── layers.h
    ├── ner
        ├── CMakeLists.txt
        ├── base_model
        │   ├── input2D_model.cpp
        │   └── input2D_model.h
        ├── model_handler
        │   ├── input2D_modelhandler.cpp
        │   └── input2D_modelhandler.h
        ├── ner
        │   ├── CMakeLists.txt
        │   ├── bilstmmodel4ner.hpp
        │   └── ner.cpp
        ├── ner_crf
        │   ├── CMakeLists.txt
        │   ├── ner_crf.cpp
        │   ├── ner_crf_model.cpp
        │   ├── ner_crf_model.h
        │   ├── ner_crf_modelhandler.cpp
        │   └── ner_crf_modelhandler.h
        ├── ner_crf_dc
        │   ├── CMakeLists.txt
        │   ├── ner_crf_dc.cpp
        │   ├── ner_crf_dc_model.cpp
        │   ├── ner_crf_dc_model.h
        │   ├── ner_crf_dc_modelhandler.cpp
        │   └── ner_crf_dc_modelhandler.h
        ├── ner_doublechannel
        │   ├── CMakeLists.txt
        │   ├── ner_dc_model.cpp
        │   ├── ner_dc_model.h
        │   ├── ner_dc_modelhandler.cpp
        │   ├── ner_dc_modelhandler.h
        │   └── ner_doublechannel.cpp
        └── ner_single_classification
        │   ├── CMakeLists.txt
        │   ├── ner_single_classification.cpp
        │   ├── ner_single_classification_model.cpp
        │   └── ner_single_classification_model.h
    ├── postagger
        ├── CMakeLists.txt
        ├── base_model
        │   ├── bareinput1_f2i_no_merge_model.hpp
        │   ├── bareinput1_f2o_no_merge_model.hpp
        │   ├── bareinput1_f2o_nonlinear_model.hpp
        │   ├── input1_feature2input_layer_model.hpp
        │   ├── input1_feature2output_layer_model.hpp
        │   ├── input1_mlp_model.cpp
        │   ├── input1_mlp_model.h
        │   ├── input1_mlp_model_no_feature.cpp
        │   ├── input1_mlp_model_no_feature.h
        │   ├── input2_feature2input_layer_model.hpp
        │   ├── input2_feature2output_layer_model.hpp
        │   ├── input2_with_feature_model.hpp
        │   ├── single_input_model.cpp
        │   ├── single_input_model.h
        │   └── single_input_with_feature_model.hpp
        ├── model_handler
        │   ├── input1_mlp_modelhandler.cpp
        │   ├── input1_mlp_modelhandler.h
        │   ├── input1_mlp_modelhandler_no_feature.h
        │   ├── input2_with_feature_modelhandler.hpp
        │   ├── single_input_modelhandler.cpp
        │   ├── single_input_modelhandler.h
        │   └── single_input_with_feature_modelhandler.hpp
        ├── pos_bareinput1_classification_with_feature_no_merge
        │   ├── CMakeLists.txt
        │   ├── pos_bareinput1_classification_f2i_no_merge.cpp
        │   ├── pos_bareinput1_classification_f2i_no_merge_model.cpp
        │   ├── pos_bareinput1_classification_f2i_no_merge_model.h
        │   ├── pos_bareinput1_classification_f2o_no_merge.cpp
        │   ├── pos_bareinput1_classification_f2o_no_merge_model.cpp
        │   ├── pos_bareinput1_classification_f2o_no_merge_model.h
        │   ├── pos_bareinput1_f2o_nonlinear.cpp
        │   ├── pos_bareinput1_f2o_nonlinear_model.cpp
        │   └── pos_bareinput1_f2o_nonlinear_model.h
        ├── pos_input1_mlp_with_tag
        │   ├── CMakeLists.txt
        │   ├── pos_input1_mlp_with_tag.cpp
        │   ├── pos_input1_mlp_with_tag_model.cpp
        │   └── pos_input1_mlp_with_tag_model.h
        ├── pos_input1_mlp_without_tag
        │   ├── CMakeLists.txt
        │   ├── pos_input1_mlp_without_tag.cpp
        │   ├── pos_input1_mlp_without_tag_model.cpp
        │   └── pos_input1_mlp_without_tag_model.h
        ├── pos_input1_mlp_without_tag_NO_FEATURE
        │   ├── CMakeLists.txt
        │   ├── pos_input1_mlp_without_tag_no_feature.cpp
        │   ├── pos_input1_mlp_without_tag_no_feature_model.cpp
        │   └── pos_input1_mlp_without_tag_no_feature_model.h
        ├── pos_input2_classification_with_feature
        │   ├── CMakeLists.txt
        │   ├── pos_input2_classification_feature2input_layer.cpp
        │   ├── pos_input2_classification_feature2input_layer_model.cpp
        │   ├── pos_input2_classification_feature2input_layer_model.h
        │   ├── pos_input2_classification_feature2output_layer.cpp
        │   ├── pos_input2_classification_feature2output_layer_model.cpp
        │   └── pos_input2_classification_feature2output_layer_model.h
        ├── pos_input2_crf_with_feature
        │   ├── CMakeLists.txt
        │   ├── pos_input2_crf_feature2input_layer.cpp
        │   ├── pos_input2_crf_feature2input_layer_model.cpp
        │   ├── pos_input2_crf_feature2input_layer_model.h
        │   ├── pos_input2_crf_feature2output_layer.cpp
        │   ├── pos_input2_crf_feature2output_layer_model.cpp
        │   └── pos_input2_crf_feature2output_layer_model.h
        ├── pos_input2_pretag_with_feature
        │   ├── CMakeLists.txt
        │   ├── pos_input2_pretag_feature2input_layer.cpp
        │   ├── pos_input2_pretag_feature2input_layer_model.cpp
        │   ├── pos_input2_pretag_feature2input_layer_model.h
        │   ├── pos_input2_pretag_feature2output_layer.cpp
        │   ├── pos_input2_pretag_feature2output_layer_model.cpp
        │   └── pos_input2_pretag_feature2output_layer_model.h
        ├── pos_single_classification
        │   ├── CMakeLists.txt
        │   ├── pos_single_classification.cpp
        │   ├── pos_single_classification_model.cpp
        │   └── pos_single_classification_model.h
        ├── pos_single_classification_with_feature
        │   ├── CMakeLists.txt
        │   ├── pos_input1_classification_feature2input_layer.cpp
        │   ├── pos_input1_classification_feature2input_layer_model.cpp
        │   ├── pos_input1_classification_feature2input_layer_model.h
        │   ├── pos_input1_classification_feature2output_layer.cpp
        │   ├── pos_input1_classification_feature2output_layer_model.cpp
        │   └── pos_input1_classification_feature2output_layer_model.h
        ├── pos_single_crf_with_feature
        │   ├── CMakeLists.txt
        │   ├── pos_input1_crf_feature2input_layer.cpp
        │   ├── pos_input1_crf_feature2input_layer_model.cpp
        │   ├── pos_input1_crf_feature2input_layer_model.h
        │   ├── pos_input1_crf_feature2output_layer.cpp
        │   ├── pos_input1_crf_feature2output_layer_model.cpp
        │   └── pos_input1_crf_feature2output_layer_model.h
        ├── pos_single_pretag_with_feature
        │   ├── CMakeLists.txt
        │   ├── pos_input1_pretag_feature2input_layer.cpp
        │   ├── pos_input1_pretag_feature2input_layer_model.cpp
        │   ├── pos_input1_pretag_feature2input_layer_model.h
        │   ├── pos_input1_pretag_feature2output_layer.cpp
        │   ├── pos_input1_pretag_feature2output_layer_model.cpp
        │   └── pos_input1_pretag_feature2output_layer_model.h
        ├── postagger_crf
        │   ├── CMakeLists.txt
        │   ├── bilstmcrf.cpp
        │   ├── bilstmcrf.h
        │   ├── bilstmcrf_modelhandler.cpp
        │   ├── bilstmcrf_modelhandler.h
        │   └── postagger_bilstmcrf.cpp
        ├── postagger_crfdc
        │   ├── CMakeLists.txt
        │   ├── bilstmcrf_dc.cpp
        │   ├── bilstmcrf_dc.h
        │   ├── bilstmcrf_dc_modelhandler.cpp
        │   ├── bilstmcrf_dc_modelhandler.h
        │   └── postagger_bilstmcrf_dc.cpp
        ├── postagger_dc
        │   ├── CMakeLists.txt
        │   ├── bilstmmodel4tagging_doublechannel.cpp
        │   ├── bilstmmodel4tagging_doublechannel.h
        │   ├── doublechannel_modelhandler.cpp
        │   ├── doublechannel_modelhandler.h
        │   └── postagger_doublechannel.cpp
        ├── postagger_module
        │   ├── pos_feature.cpp
        │   ├── pos_feature.h
        │   ├── pos_feature_extractor.cpp
        │   ├── pos_feature_extractor.h
        │   ├── pos_feature_layer.cpp
        │   ├── pos_feature_layer.h
        │   ├── pos_reader.cpp
        │   └── pos_reader.h
        └── postagger_pretag
        │   ├── CMakeLists.txt
        │   ├── bilstmmodel4tagging.hpp
        │   └── postagger.cpp
    ├── segmenter
        ├── CMakeLists.txt
        ├── base_model
        │   ├── bareinput1_f2i_model.hpp
        │   ├── bareinput1_f2o_model.hpp
        │   ├── cws_mlp_base_model.h
        │   ├── input1_f2i_model_0628.hpp
        │   ├── input1_f2o_model_0628.hpp
        │   ├── input1_with_feature_model_0628.hpp
        │   ├── input2_model.cpp
        │   ├── input2_model.h
        │   ├── single_input_model.cpp
        │   └── single_input_model.h
        ├── cws_bareinput1_cl_with_feature
        │   ├── CMakeLists.txt
        │   ├── cws_bareinput1_cl_f2i.cpp
        │   ├── cws_bareinput1_cl_f2i_model.cpp
        │   ├── cws_bareinput1_cl_f2i_model.h
        │   ├── cws_bareinput1_cl_f2o.cpp
        │   ├── cws_bareinput1_cl_f2o_model.cpp
        │   └── cws_bareinput1_cl_f2o_model.h
        ├── cws_double_bigram_classification
        │   ├── CMakeLists.txt
        │   └── cws_double_bigram_classification.cpp
        ├── cws_double_bigram_crf
        │   ├── CMakeLists.txt
        │   └── cws_double_bigram_crf.cpp
        ├── cws_double_bigram_pretag
        │   ├── CMakeLists.txt
        │   └── cws_double_bigram_pretag.cpp
        ├── cws_double_classification
        │   ├── CMakeLists.txt
        │   ├── cws_double_classification.cpp
        │   ├── cws_double_classification_model.cpp
        │   └── cws_double_classification_model.h
        ├── cws_double_crf
        │   ├── CMakeLists.txt
        │   ├── cws_double_crf.cpp
        │   ├── cws_double_crf_model.cpp
        │   └── cws_double_crf_model.h
        ├── cws_double_pretag
        │   ├── CMakeLists.txt
        │   ├── cws_double_pretag.cpp
        │   ├── cws_double_pretag_model.cpp
        │   └── cws_double_pretag_model.h
        ├── cws_input1_cl_with_feature
        │   ├── CMakeLists.txt
        │   ├── cws_input1_cl_f2i.cpp
        │   ├── cws_input1_cl_f2i_model.cpp
        │   ├── cws_input1_cl_f2i_model.h
        │   ├── cws_input1_cl_f2o.cpp
        │   ├── cws_input1_cl_f2o_model.cpp
        │   └── cws_input1_cl_f2o_model.h
        ├── cws_mlp_input1
        │   ├── CMakeLists.txt
        │   ├── cws_mlp_input1_all.cpp
        │   ├── cws_mlp_input1_bigram.cpp
        │   ├── cws_mlp_input1_instance.cpp
        │   ├── cws_mlp_input1_instance.h
        │   ├── cws_mlp_input1_template.h
        │   └── cws_mlp_input1_unigram.cpp
        ├── cws_module
        │   ├── cws_eval.cpp
        │   ├── cws_eval.h
        │   ├── cws_feature.cpp
        │   ├── cws_feature.h
        │   ├── cws_feature_layer.cpp
        │   ├── cws_feature_layer.h
        │   ├── cws_general_modelhandler.cpp
        │   ├── cws_general_modelhandler.h
        │   ├── cws_output_layer.cpp
        │   ├── cws_output_layer.h
        │   ├── cws_reader.cpp
        │   ├── cws_reader.h
        │   ├── cws_reader_unicode.cpp
        │   ├── cws_reader_unicode.h
        │   ├── cws_stat.h
        │   ├── cws_tagging_system.cpp
        │   ├── cws_tagging_system.h
        │   ├── cws_writer.h
        │   ├── lexicon_feature.cpp
        │   ├── lexicon_feature.h
        │   ├── lexicon_feature_layer.cpp
        │   ├── lexicon_feature_layer.h
        │   ├── nn_module
        │   │   ├── experiment_layer
        │   │   │   ├── nn_cws_specific_output_layer.cpp
        │   │   │   ├── nn_cws_specific_output_layer.h
        │   │   │   ├── nn_window_expr_processing_layer.cpp
        │   │   │   └── nn_window_expr_processing_layer.h
        │   │   ├── mlp_input1
        │   │   │   ├── nn_cws_mlp_input1_abstract.cpp
        │   │   │   ├── nn_cws_mlp_input1_abstract.h
        │   │   │   ├── nn_cws_mlp_input1_all.cpp
        │   │   │   └── nn_cws_mlp_input1_all.h
        │   │   ├── nn_common_interface.h
        │   │   ├── nn_common_interface_dynet_impl.cpp
        │   │   ├── nn_common_interface_dynet_impl.h
        │   │   └── rnn_input1
        │   │   │   ├── nn_cws_rnn_input1_abstract.cpp
        │   │   │   └── nn_cws_rnn_input1_abstract.h
        │   ├── structure_param_module
        │   │   ├── basic_mlp_param.cpp
        │   │   ├── basic_mlp_param.h
        │   │   ├── param_mlp_input1_all.cpp
        │   │   ├── param_mlp_input1_all.h
        │   │   ├── rnn_input1_param.cpp
        │   │   └── rnn_input1_param.h
        │   ├── token_module
        │   │   ├── cws_tag_definition.h
        │   │   ├── cws_tag_utility.h
        │   │   ├── input1
        │   │   │   ├── token_input1_all.cpp
        │   │   │   ├── token_input1_all.h
        │   │   │   ├── token_input1_bigram.cpp
        │   │   │   ├── token_input1_bigram.h
        │   │   │   ├── token_input1_unigram.cpp
        │   │   │   └── token_input1_unigram.h
        │   │   ├── token_chartype.cpp
        │   │   ├── token_chartype.h
        │   │   ├── token_lexicon.cpp
        │   │   └── token_lexicon.h
        │   ├── type_feature.cpp
        │   └── type_feature.h
        ├── cws_rnn_input1
        │   ├── CMakeLists.txt
        │   ├── cws_rnn_input1_bigram.cpp
        │   ├── cws_rnn_input1_instance.cpp
        │   ├── cws_rnn_input1_instance.h
        │   ├── cws_rnn_input1_template.h
        │   └── cws_rnn_input1_unigram.cpp
        ├── cws_single_bigram_classification
        │   ├── CMakeLists.txt
        │   └── cws_single_bigram_classification.cpp
        ├── cws_single_bigram_crf
        │   ├── CMakeLists.txt
        │   └── cws_single_bigram_crf.cpp
        ├── cws_single_bigram_pretag
        │   ├── CMakeLists.txt
        │   └── cws_single_bigram_pretag.cpp
        ├── cws_single_classification
        │   ├── CMakeLists.txt
        │   ├── cws_single_classification.cpp
        │   ├── cws_single_classification_model.cpp
        │   └── cws_single_classification_model.h
        ├── cws_single_crf
        │   ├── CMakeLists.txt
        │   ├── cws_single_crf.cpp
        │   ├── cws_single_crf_model.cpp
        │   └── cws_single_crf_model.h
        ├── cws_single_pretag
        │   ├── CMakeLists.txt
        │   ├── cws_single_pretag.cpp
        │   ├── cws_single_pretag_model.cpp
        │   └── cws_single_pretag_model.h
        ├── model_handler
        │   ├── input1_with_feature_modelhandler_0628.hpp
        │   ├── input2_bigram_modelhandler.h
        │   ├── input2_modelhandler.cpp
        │   ├── input2_modelhandler.h
        │   ├── single_input_bigram_modelhandler.h
        │   ├── single_input_modelhandler.cpp
        │   └── single_input_modelhandler.h
        └── unit_test
        │   ├── CMakeLists.txt
        │   └── cws_evaluation.cpp
    ├── trivial
        ├── CMakeLists.txt
        ├── charcode
        │   ├── charcode_base.hpp
        │   ├── charcode_convertor.h
        │   ├── charcode_detector.h
        │   ├── naive_unicode.cpp
        │   ├── naive_unicode.h
        │   ├── naive_utf8.cpp
        │   └── naive_utf8.h
        ├── logger
        │   └── naive_logger.h
        └── lookup_table
        │   ├── lookup_table.cpp
        │   └── lookup_table.h
    ├── unittest
        ├── CMakeLists.txt
        ├── test_charcode
        │   ├── CMakeLists.txt
        │   └── test_charcode.cpp
        ├── test_cwstag
        │   ├── CMakeLists.txt
        │   └── test_cwstag.cpp
        └── test_lookup_table
        │   ├── CMakeLists.txt
        │   └── test_lookup_table.cpp
    └── utils
        ├── dict_wrapper.hpp
        ├── general.hpp
        ├── nn_utility.cpp
        ├── nn_utility.h
        ├── reader.hpp
        ├── stash_model.hpp
        ├── stat.hpp
        ├── typedeclaration.h
        ├── utf8processing.hpp
        ├── word2vec_embedding_helper.cpp
        ├── word2vec_embedding_helper.h
        └── writer.h


/.gitattributes:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Set default behavior to automatically normalize line endings.
 3 | ###############################################################################
 4 | * text=auto
 5 | 
 6 | ###############################################################################
 7 | # Set default behavior for command prompt diff.
 8 | #
 9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs     diff=csharp
14 | 
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following 
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln       merge=binary
26 | #*.csproj    merge=binary
27 | #*.vbproj    merge=binary
28 | #*.vcxproj   merge=binary
29 | #*.vcproj    merge=binary
30 | #*.dbproj    merge=binary
31 | #*.fsproj    merge=binary
32 | #*.lsproj    merge=binary
33 | #*.wixproj   merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj   merge=binary
36 | #*.wwaproj   merge=binary
37 | 
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg   binary
44 | #*.png   binary
45 | #*.gif   binary
46 | 
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | # 
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the 
52 | # entries below.
53 | ###############################################################################
54 | #*.doc   diff=astextplain
55 | #*.DOC   diff=astextplain
56 | #*.docx  diff=astextplain
57 | #*.DOCX  diff=astextplain
58 | #*.dot   diff=astextplain
59 | #*.DOT   diff=astextplain
60 | #*.pdf   diff=astextplain
61 | #*.PDF   diff=astextplain
62 | #*.rtf   diff=astextplain
63 | #*.RTF   diff=astextplain
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | bin
3 | run
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "3rdparty/catch"]
2 | 	path = 3rdparty/catch
3 | 	url = https://github.com/philsquared/Catch.git
4 | [submodule "3rdparty/dynet"]
5 | 	path = 3rdparty/dynet
6 | 	url = https://github.com/memeda/dynet.git
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sequence Labeling by Neural Network
 2 | 
 3 | The repository is for sequence labeling through neural network (deep learning) methods .
 4 | 
 5 | ## RUN FLOW SAMPLE
 6 | 
 7 | directory [run\_flow\_example](run_flow_example) contains naive example to bulid project and run samples under linux .
 8 | 
 9 | ## Build
10 | 
11 | ### dependency
12 | 
13 | we are using [DyNet library](https://github.com/clab/dynet) fork [DyNet-self](https://github.com/memeda/dynet) (some trivial modified) as the basic neural framework. After clone the repository, we should use 
14 | 
15 | ```shell
16 | git submodule init
17 | git submodule update
18 | ```
19 | 
20 | to clone down the `dynet` module.
21 | 
22 | Dynet needs `boost` and `eigen3`. `cmake` is also needed.
23 | 
24 | ### Under MSVC
25 | 
26 | **boost-1.57.0, boost-1.58.0** are supported, and **boost-1.60.0** leads to some compiling errors.
27 | 
28 | 1. get [eigen3](https://bitbucket.org/eigen/eigen/)
29 | 2. open `git bash` or `cmd`, change directory to the repository root
30 | 3. `git submodule init && git submodule update`
31 | 4. make a directory to build, `mkdir build`
32 | 5. `cd build`
33 | 6. using the command to make : `cmake .. -DEIGEN3_INCLUDE_DIR=/eigen/path -DBOOST_ROOT=/boost/path -DBoost_USE_STATIC_LIBS=On` , **Boost_USE_STATIC_LIBS=On** is needed for Windows.
34 | 7. open the VS solution under *build* folder
35 | 
36 | ### Under Linux
37 | 
38 | you can just use `run_flow_example`
39 | 
40 | ## Plan 
41 | 
42 | 
43 | it is now based on [DyNet library](https://github.com/clab/dynet)
44 | 
45 | steps :
46 | 
47 | 1. postagging based on example `tag-bilstm.cc` of DyNet [done]
48 | 
49 | 2. chinese segmentation(using sequence labeling method) , ner [done]
50 | 
51 | 3. more various structures based on DyNet [doing]
52 | 
53 | 4. (almost)from scratch ?? -> NO , need more time to think about it !
54 | 
55 | ## WIKI
56 | 
57 | [wiki](https://github.com/memeda/sequence-labeling-by-nn/wiki) pages for more detail infomation.


--------------------------------------------------------------------------------
/UML/slnn_visio.vsdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/UML/slnn_visio.vsdx


--------------------------------------------------------------------------------
/run_flow_example/README.md:
--------------------------------------------------------------------------------
 1 | # runing flow example
 2 | 
 3 | ```shell
 4 | sh buildflow.sh # build program 
 5 | sh runflow.sh # run sample , only postagger input1_feature2output model is demonstrated
 6 | ```
 7 | 
 8 | To ensure build flow work successfully, **you should promise your machine has `boost` library** , and `1.57`,`1.58` has been validated , and 1.60 will not work because of some libiraries has been became hpp instead of lib . 
 9 | 
10 | ## currently model
11 | 
12 | The code has be restructured about 2 times and there also exists so many duplicated code for different model and task .
13 | 
14 | For now , almost every task has the same model structure , including 2 input types and 3 output types . there are described as following :
15 | 
16 | 1. input : single input(input1 , input2d) , double channel 
17 | 
18 |     single input means only use training data tokens as input , for CWS and POSTAG , there only word(character for CWS) , so alse may be named input1 . But for NER , both word and postag are the input , so also called input2d ;
19 | 
20 |     double channel means not only training data tokens , but also word embeddings from unlabelled data . Word embedding is trained using `Word2vec` , using `skip-gram` choice .
21 | 
22 | 2. output : classification , pretag , crf
23 |     
24 |     classification means only use current input sequence infomation to predict tag , 
25 |     pretag means add "previous tag infomation" ,
26 |     crf means do viterbi decoding .
27 | 
28 | So it may be have at least 6 models (some task may havn't classification output model ).
29 | 
30 | What's more , for POSTAG input1+classification ,  we have added handcraft feature , and RNN , GRU to replace LSTM , MLP and so on . the model becomes more and more . it becomes difficult to demonstrate all models .
31 | 
32 | And for compile speed , we has comment some model building , if you need build it , just uncomment the `add_subdirectory(XXX)` at CMakeLists.txt under every task root directory , such as `postagger/CMakeLists.txt`
33 | 


--------------------------------------------------------------------------------
/run_flow_example/buildflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # set wd
 4 | cd ../
 5 | 
 6 | # init cnn submodule
 7 | echo "init cnn submodule" >/dev/stderr
 8 | git submodule init
 9 | git submodule update
10 | 
11 | # build
12 | mkdir -p build
13 | cd build
14 | # get eigen
15 | echo "clone eigen3" >>/dev/stderr
16 | git clone https://github.com/RLovelett/eigen.git eigen
17 | 
18 | # cmake
19 | cmake -DEIGEN3_INCLUDE_DIR=build/eigen/ .. 
20 | 
21 | #make
22 | make -j2
23 | 


--------------------------------------------------------------------------------
/run_flow_example/runflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # change work directory to root
 4 | cd ../
 5 | pwd
 6 | F2O_BIN="./bin/pos_input1_classification_feature2output_layer"
 7 | RNN_TYPE="lstm"
 8 | ANNOTATED_FILE="example/sampledata/postag/PTB_train.50.pos"
 9 | INPUT_FILE="example/sampledata/postag/PTB_input.50"
10 | MODEL_PATH="example/f2o.model"
11 | PREDICT_OUTPUT="example/predict.out"
12 | [ -e "$MODEL_PATH" ] && /bin/rm "$MODEL_PATH" # remove it if exists
13 | 
14 | # train
15 | echo "-- training process --" >/dev/stderr
16 | $F2O_BIN train $RNN_TYPE --cnn-mem 256 --training_data $ANNOTATED_FILE --devel_data $ANNOTATED_FILE --max_epoch 5 --dropout_rate 0 --model $MODEL_PATH
17 | 
18 | # devel
19 | echo "-- devel process --" >/dev/stderr
20 | $F2O_BIN devel $RNN_TYPE --cnn-mem 256 --devel_data $ANNOTATED_FILE --model $MODEL_PATH
21 | 
22 | # predict 
23 | echo "-- predict process --" >/dev/stderr
24 | $F2O_BIN predict $RNN_TYPE --cnn-mem 256 --raw_data $INPUT_FILE --model $MODEL_PATH --output $PREDICT_OUTPUT
25 | 
26 | 


--------------------------------------------------------------------------------
/run_flow_example/sampledata/cws/PKU_input.50.cws:
--------------------------------------------------------------------------------
 1 | 节目歌颂改革开放，歌颂农民新生活，充满了浓郁的民族风格，受到市民的热烈欢迎。
 2 | 中国国际男子网球希望赛5月举行
 3 | 河南省泌阳县位于伏牛山和桐柏山的交界处，经济比较落后。
 4 | 杨鹏飞福建省莆田市涵江区委书记
 5 | 我一看，这张票的原价比下午那张高，为350法郎，而他的出价比下午还低200法郎，就更不信了：“假的！”
 6 | 各级公安机关要明确肩负的职责，增强工作主动性，坚定不移地贯彻『严打』方针，狠狠打击那些影响治安稳定的突出犯罪活动。
 7 | 茶汤虎年吉祥
 8 | 3.音像制品制作、出版、发行，电子出版物（中方控股或占主导地位）
 9 | 目前，丰田在中国已成立了28家汽车零部件合资公司。
10 | 四、积极开展议会外交，加强同外国议会的交往与合作
11 | 突尼斯外汇主要来源之一的旅游业，去年共接待外国游客达426.3万人，比1996年增长9.7％，旅游收入达15.5亿美元，增长9.9％。
12 | 如何解决这一难题？
13 | 浏阳市国税局官渡征收分局开展了一次“千卷问形象”活动，通过对收回1181份问卷测定，服务态度满意率为98.4％，办事效率满意率为98.4％，工作作风满意率为99.21％，执行政策满意率为99.14％，社会形象满意率为99.14％。
14 | 读者也许不解：为什么短短时间内会有这般奇迹出现？
15 | 只有让校长年复一年地“参差”下去了。
16 | 他们通过扩股、配股等产权交易形式和手段，开展资本运作，使企业产权更明晰，资本得以更快积累，资产实现快速增长。
17 | 同时要尽快改变目前国内润滑油企业散、乱、小的局面，发展有实力、高水平的企业集团。
18 | 我们将努力改进工作，不断提高服务水平。
19 | 没有水源的地方，发动群众群策群力，保墒增墒，确保春播任务的完成。
20 | 历史昭示我们，没有先进科技和教育的国家和民族，就没有参与世界竞争的资格。
21 | 令人不解的是，房门未被破坏。
22 | 但是，由于基础薄弱，西藏发展教育的任务艰巨，全区义务教育目标仍以普及六年制教育为主。
23 | 1991年，其作品《白山黑水》获纪念建党七十周年全国美展银奖，并获首届“关山月中国画创作奖”学术奖。
24 | 各地要进一步加强对扶贫开发工作的领导，增加投入，着力改善贫困地区的生产条件，支持有助于直接解决温饱的种植业、养殖业、林果业，以及农副产品加工业的发展。
25 | 我们空中乘务员从发型、化妆到着装和配带的物品都要依照严格的规范进行。
26 | 预计28日晚上到3月1日，新疆西部、青藏高原东部有小到中雪或雨夹雪，西南地区东部、华南大部、江南大部有小到中雨，部分地区有大雨；
27 | 近年来/l/%，伊利集团公司在促进公司整体实力不断壮大的前提下，兼并、租赁、托管了一些处于困境的企业，解决了这些企业中已经下岗或濒于下岗的职工就业问题。
28 | 该县劳动、保险等部门常年性开展择业咨询服务，已免费举办再就业岗前培训12期，计3000多人次。
29 | 蒸汽机的发明和应用带动了工业发展，在相当长一段时间内，英国是世界科学中心和产业革命中心，是19世纪的最强工业国。
30 | 追在最前面的工商干部张宪清一个箭步冲上去，紧紧抓住了歹徒的手臂，在周围群众的配合下将歹徒擒获，并扭送到当地派出所。
31 | 觉得《骆驼祥子》结局过于悲惨，我有些受不了。
32 | 还是那个秋天，我辗转于北京团结湖林立的楼厦之间，苦苦打听，终于寻找到了当年曾为政协一次会议缝制第一面国旗样旗的老人赵文瑞，采写了一篇通讯：《情笃意深绣国旗———记第一面五星红旗的缝制者赵文瑞》。
33 | 近年来/l/%儿童文学作品中类似的创新之作不胜枚举。
34 | 去年，大化县委、县政府在七百乡发起了告别茅草屋活动。
35 | 他将飞速发展的生物技术带来的变化称为“第三次产业革命”，认为它将会改变人类自身的基因和体质。
36 | 这个时机把握得相当好，当时半导体技术正从实验室走向产业化，对于所有企业来说，学习这些新技术都必须从零开始。
37 | 但记者在街上却看到了另一番景象。
38 | 李岚清表示希望双方共同努力，使两国的经济、贸易和科技合作提高到一个新的水平。
39 | 对弄虚作假者，先罢了他的官，并追究上级领导责任。
40 | 福州1.2—1.4—0.5
41 | 虎年春节前夕，记者在吉林省吉林市采访，看到一家公司七八位工人午休时颇为激烈的争论：有的慷慨激昂地对一位法官的判决公正与否直抒胸臆，有的对一件民告官案件的结局颇为自信地作出预测……
42 | 戏剧艺术风格的形成是由艺术家的创作个性决定的，艺术家与众不同的创作风格是其独特的生活阅历、情感体验乃至思维方式、性格特征的体现。
43 | 他料想前不久寄给温源宁先生的稿子不会立即刊登。
44 | 人民币不贬值有利于维护亚洲乃至世界金融形势的稳定，中国付出的代价是值得的。
45 | 站在门口环顾房间，右侧墙上有扇窗，室内一张单人床、两把椅子、一个大衣柜、一张桌子。
46 | 公司在1997年11月11日向大连市中级人民法院递交诉状，诉刘某某“采取故意捏造并散布虚假事实的手段，诋毁原告产品声誉”。
47 | 有关科索沃问题，会议采纳了俄罗斯关于不应只对贝尔格莱德施加压力，对科索沃地区有关领导人也应施压的主张；
48 | 琼斯能为中国田坛带来什么？
49 | 根据宪法和有关法律的规定，一个代表团或有30名以上的代表联名，可以向全国人大提出属于全国人大职权范围内的议案。
50 | 从中，人们可以看到新一届政府分离政企的强大决心，人们也期待着中国电信管理科学化的到来。
51 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | set(util_directory
 4 |     ${source_directory}/utils
 5 | )
 6 | 
 7 | set(module_directory
 8 |     ${source_directory}/modelmodule
 9 | )
10 | set(common_headers
11 |     ${util_directory}/typedeclaration.h
12 |     ${util_directory}/utf8processing.hpp
13 |     ${util_directory}/stat.hpp
14 |     ${util_directory}/dict_wrapper.hpp
15 |     ${util_directory}/stash_model.hpp
16 |     ${util_directory}/reader.hpp
17 |     ${util_directory}/general.hpp
18 |     ${module_directory}/layers.h
19 |     ${module_directory}/hyper_layers.h
20 |     ${module_directory}/hyper_input_layers.h
21 |     ${module_directory}/hyper_output_layers.h
22 | )
23 | set(common_libs
24 |     ${module_directory}/layers.cpp
25 |     ${module_directory}/hyper_layers.cpp
26 |     ${module_directory}/hyper_input_layers.cpp
27 |     ${module_directory}/hyper_output_layers.cpp
28 | )
29 | 
30 | set(additional_base_modules
31 |    # word2vec embedding helper 
32 |    ${util_directory}/word2vec_embedding_helper.h
33 |    ${util_directory}/word2vec_embedding_helper.cpp
34 | )
35 | 
36 | # context module
37 | 
38 | set(context_module
39 |     ${module_directory}/context_feature.h
40 |     ${module_directory}/context_feature.cpp
41 |     ${module_directory}/context_feature_layer.h
42 |     ${module_directory}/context_feature_layer.cpp
43 | )
44 | 
45 | # base reader
46 | 
47 | set(base_reader_module
48 |     ${util_directory}/reader.hpp
49 | )
50 | 
51 | ## unit test
52 | set(unittest_framework_include
53 |     ${thirdparty_dir}/catch/include/catch.hpp
54 | )
55 | 
56 | ####################    Layer Modules (modelmodule)  ###########
57 | 
58 | set(layer_dir "${source_dir}/modelmodule")
59 | 
60 | FILE(GLOB layer_headers "${layer_dir}/*.h*")
61 | 
62 | 
63 | ####################    Trivial Modules    #####################
64 | 
65 | SET(trivial_dir "${source_dir}/trivial")
66 | 
67 | FILE(GLOB lookup_table_headers "${trivial_dir}/lookup_table/*.h*")
68 | FILE(GLOB charcode_headers "${trivial_dir}/charcode/*.h*")
69 | 
70 | ###################    utils (new)         ######################
71 | SET(utils_dir "${source_dir}/utils")
72 | 
73 | ADD_SUBDIRECTORY(modelmodule)
74 | add_subdirectory(trivial)
75 | 
76 | add_subdirectory(unittest)
77 | 
78 | #add_subdirectory(postagger)
79 | add_subdirectory(ner)
80 | #add_subdirectory(segmenter)
81 | 


--------------------------------------------------------------------------------
/src/modelmodule/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | FILE(GLOB layer_all_files "*")
2 | 
3 | ADD_LIBRARY(layers STATIC ${layer_all_files})
4 | 
5 | SET_PROPERTY(TARGET layers PROPERTY FOLDER "libraries")


--------------------------------------------------------------------------------
/src/modelmodule/context_feature.cpp:
--------------------------------------------------------------------------------
 1 | #include "context_feature.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | const Index ContextFeature::WordSOSId;
 6 | const Index ContextFeature::WordEOSId;
 7 | 
 8 | ContextFeature::ContextFeature(DictWrapper &dict_wrapper, unsigned context_left_size, unsigned context_right_size,  unsigned word_dim)
 9 |     :context_size(context_left_size + context_right_size),
10 |     context_left_size(context_left_size),
11 |     context_right_size(context_right_size),
12 |     rwrapper(dict_wrapper),
13 |     word_dim(word_dim)
14 | {}
15 | 
16 | void ContextFeature::set_parameters(unsigned context_left_size, unsigned context_right_size, unsigned word_dim)
17 | {
18 |     this->context_size = context_left_size + context_right_size;
19 |     this->context_left_size = context_left_size;
20 |     this->context_right_size = context_right_size;
21 |     this->word_dim = word_dim;
22 | }
23 | 
24 | std::string ContextFeature::get_feature_info() const 
25 | {
26 |     std::ostringstream oss;
27 |     oss << "context size : " << context_size << ", left size : " << context_left_size << ", right size: " << context_right_size
28 |         << ", totally context feature dim : " << get_feature_dim();
29 |     return oss.str();
30 | }
31 | 
32 | void ContextFeature::extract(const IndexSeq &seq, ContextFeatureDataSeq &context_feature_data_seq)
33 | {
34 |     using std::swap;
35 |     int sent_len = seq.size();
36 |     ContextFeatureDataSeq tmp_feature_data_seq(sent_len,
37 |         ContextFeatureData(context_size));
38 |     for( Index i = 0; i < sent_len; ++i )
39 |     {
40 |         ContextFeatureData &feature_data = tmp_feature_data_seq.at(i);
41 |         unsigned feature_idx = 0 ;
42 |         for( Index left_context_offset = 1 ; left_context_offset <= context_left_size ; ++left_context_offset )
43 |         {
44 |             int word_pos = i - left_context_offset;
45 |             feature_data.at(feature_idx) = (word_pos < 0 ? WordSOSId : seq.at(word_pos)) ;
46 |             ++feature_idx;
47 |         }
48 |         for( Index right_context_offset = 1 ; right_context_offset <= context_right_size; ++right_context_offset )
49 |         {
50 |             int word_pos = i + right_context_offset;
51 |             feature_data.at(feature_idx) = ( word_pos >= sent_len ? WordEOSId : seq.at(word_pos) );
52 |             ++feature_idx;
53 |         }
54 |     }
55 |     swap(context_feature_data_seq, tmp_feature_data_seq);
56 | }
57 | 
58 | void ContextFeature::debug_context_feature_seq(const ContextFeatureDataSeq &context_feature_data_seq)
59 | {
60 |     std::cerr << "context feature output for DEBUG.\n";
61 |     for( const ContextFeatureData &fdata : context_feature_data_seq )
62 |     {
63 |         assert(fdata.size() > 0);
64 |         std::cerr << fdata[0];
65 |         for( size_t i = 1; i < fdata.size(); ++i )
66 |         {
67 |             std::cerr << " " << fdata[i];
68 |         }
69 |         std::cerr << "\n";
70 |     }
71 | }
72 | 
73 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/modelmodule/context_feature_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "context_feature_layer.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | ContextFeatureLayer::ContextFeatureLayer(dynet::Model *m, const dynet::LookupParameter &word_lookup_param)
 6 |     :word_lookup_param(word_lookup_param),
 7 |     pcg(nullptr),
 8 |     word_sos_param(m->add_parameters(word_lookup_param.dim())),
 9 |     word_eos_param(m->add_parameters(word_lookup_param.dim()))
10 | {}
11 | 
12 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/modelmodule/layers.cpp:
--------------------------------------------------------------------------------
 1 | #include "layers.h" 
 2 | 
 3 | using namespace dynet;
 4 | using namespace std;
 5 | namespace slnn {
 6 | 
 7 | // DenseLayer
 8 | 
 9 | DenseLayer::DenseLayer(Model *m , unsigned input_dim , unsigned output_dim)
10 |     :w(m->add_parameters({output_dim , input_dim})) ,
11 |     b(m->add_parameters({output_dim}))
12 | {}
13 | 
14 | DenseLayer::~DenseLayer(){}
15 | 
16 | // Merge 2 Layer
17 | 
18 | Merge2Layer::Merge2Layer(Model *m, unsigned input1_dim, unsigned input2_dim,unsigned output_dim)
19 |     :w1(m->add_parameters({ output_dim , input1_dim })),
20 |     w2(m->add_parameters({ output_dim , input2_dim })),
21 |     b(m->add_parameters({ output_dim}))
22 | {}
23 | 
24 | Merge2Layer::~Merge2Layer() {}
25 | 
26 | 
27 | // Merge 3 Layer
28 | 
29 | Merge3Layer::Merge3Layer(Model *m ,unsigned input1_dim , unsigned input2_dim , unsigned input3_dim , unsigned output_dim )
30 |     :w1(m->add_parameters({output_dim , input1_dim})) ,
31 |     w2(m->add_parameters({output_dim , input2_dim})) ,
32 |     w3(m->add_parameters({output_dim , input3_dim})) ,
33 |     b(m->add_parameters({output_dim}))
34 | {}
35 | 
36 | Merge3Layer::~Merge3Layer(){}
37 | 
38 | 
39 | // Merge 4 Layer
40 | Merge4Layer::Merge4Layer(Model *m ,unsigned input1_dim , unsigned input2_dim , unsigned input3_dim ,
41 |     unsigned input4_dim, unsigned output_dim )
42 |     :w1(m->add_parameters({output_dim , input1_dim})) ,
43 |     w2(m->add_parameters({output_dim , input2_dim})) ,
44 |     w3(m->add_parameters({output_dim , input3_dim})) ,
45 |     w4(m->add_parameters({output_dim, input4_dim})),
46 |     b(m->add_parameters({output_dim}))
47 | {}
48 | 
49 | Merge4Layer::~Merge4Layer(){}
50 | 
51 | // MLPHiddenLayer
52 | 
53 | MLPHiddenLayer::MLPHiddenLayer(Model *m, unsigned input_dim, const vector<unsigned> &layers_dim, 
54 |     dynet::real dropout_rate,
55 |     NonLinearFunc *nonlinear_func)
56 |     : nr_hidden_layer(layers_dim.size()),
57 |     output_dim(nr_hidden_layer > 0 ? layers_dim.back() : input_dim),
58 |     w_list(nr_hidden_layer),
59 |     b_list(nr_hidden_layer),
60 |     w_expr_list(nr_hidden_layer),
61 |     b_expr_list(nr_hidden_layer),
62 |     dropout_rate(dropout_rate),
63 |     is_enable_dropout(true),
64 |     nonlinear_func(nonlinear_func)
65 | {
66 |     assert(nr_hidden_layer > 0);
67 |     w_list[0] = m->add_parameters({ layers_dim.at(0), input_dim });
68 |     b_list[0] = m->add_parameters({ layers_dim.at(0) });
69 |     for( unsigned i = 1 ; i < nr_hidden_layer ; ++i )
70 |     {
71 |         w_list.at(i) = m->add_parameters({ layers_dim.at(i), layers_dim.at(i - 1) });
72 |         b_list.at(i) = m->add_parameters({ layers_dim.at(i) });
73 |     }
74 | }
75 | 
76 | 
77 | 
78 | } // end namespace slnn


--------------------------------------------------------------------------------
/src/ner/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | set(ner_base_dir
 4 |     ${source_directory}/ner
 5 | )
 6 | 
 7 | set(ner_base_model_dir
 8 |     ${ner_base_dir}/base_model
 9 | )
10 | 
11 | set(ner_modelhandler_dir
12 |     ${ner_base_dir}/model_handler
13 | )
14 | 
15 | # single input model
16 | set(single_input_model_headers
17 |     ${ner_base_model_dir}/input2D_model.h
18 | )
19 | set(single_input_model_libs
20 |     ${ner_base_model_dir}/input2D_model.cpp
21 | )
22 | 
23 | # single input model handler
24 | set(single_input_modelhandler_headers
25 |     ${ner_modelhandler_dir}/input2D_modelhandler.h
26 | )
27 | 
28 | set(ner_common_headers
29 |     ${common_headers}
30 | )
31 | 
32 | set(ner_common_libs
33 |     ${common_libs}
34 | )
35 | 
36 | add_subdirectory(ner)   
37 | add_subdirectory(ner_doublechannel)
38 | add_subdirectory(ner_crf_dc)
39 | add_subdirectory(ner_crf)
40 | 
41 | add_subdirectory(ner_single_classification)


--------------------------------------------------------------------------------
/src/ner/base_model/input2D_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "input2D_model.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | const std::string Input2DModel::UNK_STR = "UNK_STR" ;
 6 | 
 7 | Input2DModel::Input2DModel()
 8 |     :m(nullptr) ,
 9 |     word_dict_wrapper(word_dict) ,
10 |     input_layer(nullptr) ,
11 |     bilstm_layer(nullptr) ,
12 |     output_layer(nullptr) 
13 | {}
14 | 
15 | Input2DModel::~Input2DModel()
16 | {
17 |     delete input_layer ;
18 |     delete bilstm_layer ;
19 |     delete output_layer ;
20 |     delete m ;
21 | }
22 | 
23 | dynet::expr::Expression
24 | Input2DModel::build_loss(dynet::ComputationGraph &cg,
25 |                          const IndexSeq &words_seq, const IndexSeq &postag_seq,
26 |                          const IndexSeq &gold_ner_seq)
27 | {
28 |     input_layer->new_graph(cg) ;
29 |     bilstm_layer->new_graph(cg) ;
30 |     output_layer->new_graph(cg) ;
31 | 
32 |     bilstm_layer->set_dropout() ;
33 |     bilstm_layer->start_new_sequence() ;
34 | 
35 |     std::vector<dynet::expr::Expression> inputs_exprs ;
36 |     input_layer->build_inputs(words_seq, postag_seq , inputs_exprs) ;
37 | 
38 |     std::vector<dynet::expr::Expression> l2r_exprs,
39 |         r2l_exprs ;
40 |     bilstm_layer->build_graph(inputs_exprs, l2r_exprs, r2l_exprs) ;
41 |     return output_layer->build_output_loss(l2r_exprs, r2l_exprs, gold_ner_seq) ;
42 | }
43 | 
44 | void 
45 | Input2DModel::predict(dynet::ComputationGraph &cg,
46 |                       const IndexSeq &words_seq, const IndexSeq &postag_seq , 
47 |                       IndexSeq &pred_ner_seq)
48 | {
49 |     input_layer->new_graph(cg) ;
50 |     bilstm_layer->new_graph(cg) ;
51 |     output_layer->new_graph(cg) ;
52 | 
53 |     bilstm_layer->disable_dropout() ;
54 |     bilstm_layer->start_new_sequence();
55 | 
56 |     std::vector<dynet::expr::Expression> inputs_exprs ;
57 |     input_layer->build_inputs(words_seq , postag_seq, inputs_exprs) ;
58 |     std::vector<dynet::expr::Expression> l2r_exprs,
59 |                                        r2l_exprs ;
60 |     bilstm_layer->build_graph(inputs_exprs, l2r_exprs, r2l_exprs) ;
61 |     output_layer->build_output(l2r_exprs, r2l_exprs , pred_ner_seq) ;
62 | }
63 | 
64 | } // end of namespace slnn
65 | 


--------------------------------------------------------------------------------
/src/ner/base_model/input2D_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_NER_BASEMODEL_INPUT2D_MODEL_H_
 2 | #define SLNN_NER_BASEMODEL_INPUT2D_MODEL_H_
 3 | 
 4 | #include <iostream>
 5 | 
 6 | #include <boost/archive/text_iarchive.hpp>
 7 | #include <boost/program_options.hpp>
 8 | 
 9 | #include "dynet/dynet.h"
10 | #include "dynet/dict.h"
11 | #include "utils/typedeclaration.h"
12 | #include "utils/dict_wrapper.hpp"
13 | #include "modelmodule/hyper_layers.h"
14 | 
15 | namespace slnn{
16 | 
17 | class Input2DModel
18 | {
19 | public :
20 |     Input2DModel() ;
21 |     virtual ~Input2DModel() ;
22 |     
23 |     virtual void set_model_param(const boost::program_options::variables_map &var_map) = 0 ;
24 |     virtual void build_model_structure() = 0 ;
25 |     virtual void print_model_info() = 0 ;
26 |     
27 |     virtual dynet::expr::Expression  build_loss(dynet::ComputationGraph &cg,
28 |                                               const IndexSeq &words_seq, const IndexSeq &postag_seq,
29 |                                               const IndexSeq &gold_ner_seq) ;
30 |     virtual void predict(dynet::ComputationGraph &cg ,
31 |                          const IndexSeq &words_seq, const IndexSeq &postag_seq, 
32 |                          IndexSeq &pred_ner_seq) ;
33 | 
34 |     dynet::Dict& get_word_dict(){ return word_dict ;  } 
35 |     dynet::Dict& get_postag_dict() { return postag_dict ; }
36 |     dynet::Dict& get_ner_dict(){ return ner_dict ; } 
37 |     DictWrapper& get_word_dict_wrapper(){ return word_dict_wrapper ; } 
38 |     dynet::Model *get_dynet_model(){ return m ; } ;
39 | 
40 | 
41 |     void set_dynet_model(std::istream &mis){ boost::archive::text_iarchive ti(mis) ; ti >> *m ; } 
42 | 
43 |     virtual void save_model(std::ostream &os) = 0 ;
44 |     virtual void load_model(std::istream &is) = 0 ;
45 | 
46 | public :
47 |     static const std::string UNK_STR;
48 | protected :
49 |     dynet::Model *m ;
50 |     dynet::Dict word_dict ;
51 |     dynet::Dict postag_dict ;
52 |     dynet::Dict ner_dict ;
53 |     DictWrapper word_dict_wrapper ;
54 | 
55 |     Input2D *input_layer ;
56 |     BILSTMLayer *bilstm_layer ;
57 |     OutputBase *output_layer ;
58 | 
59 | };
60 | 
61 | 
62 | } // end of namespcace slnn 
63 | 
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/src/ner/model_handler/input2D_modelhandler.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/ner/model_handler/input2D_modelhandler.cpp


--------------------------------------------------------------------------------
/src/ner/ner/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | set(ner_headers
 4 |     bilstmmodel4ner.hpp
 5 | )
 6 | 
 7 | 
 8 | ADD_EXECUTABLE(ner ner.cpp ${common_headers} ${common_libs} ${ner_headers})
 9 | 
10 | target_link_libraries(ner dynet ${Boost_LIBRARIES})       
11 | 


--------------------------------------------------------------------------------
/src/ner/ner_crf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | # crf
 4 | set(
 5 |     ner_crf_headers
 6 |     ner_crf_model.h
 7 |     ner_crf_modelhandler.h
 8 | )
 9 | 
10 | set(
11 |     ner_crf_libs
12 |     ner_crf_model.cpp
13 |     ner_crf_modelhandler.cpp
14 | )
15 | 
16 | add_executable(ner_crf ner_crf.cpp 
17 |                       ${ner_crf_headers} ${common_headers}
18 |                       ${ner_crf_libs} ${common_libs}
19 | )
20 | 
21 | target_link_libraries(ner_crf dynet ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/ner/ner_crf/ner_crf_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NER_CRF_H_INCLUDED_
 2 | #define NER_CRF_H_INCLUDED_
 3 | 
 4 | #include <string>
 5 | #include <sstream>
 6 | 
 7 | #include "dynet/nodes.h"
 8 | #include "dynet/dynet.h"
 9 | #include "dynet/training.h"
10 | #include "dynet/rnn.h"
11 | #include "dynet/lstm.h"
12 | #include "dynet/dict.h"
13 | #include "dynet/expr.h"
14 | 
15 | #include <boost/program_options.hpp>
16 | 
17 | #include "utils/typedeclaration.h"
18 | #include "modelmodule/layers.h"
19 | #include "utils/dict_wrapper.hpp"
20 | #include "utils/stat.hpp"
21 | 
22 | namespace slnn
23 | {
24 | struct NERCRFModelHandler;
25 | 
26 | struct NERCRFModel
27 | {
28 |     friend struct NERCRFModelHandler;
29 |     // Model structure param 
30 |     unsigned word_embedding_dim,
31 |         postag_embedding_dim,
32 |         ner_embedding_dim;
33 |     unsigned word_embedding_dict_size,
34 |         postag_embedding_dict_size,
35 |         ner_embedding_dict_size;
36 |     unsigned nr_lstm_stacked_layer,
37 |         lstm_x_dim,
38 |         lstm_h_dim,
39 |         emit_hidden_layer_dim;
40 | 
41 |     // Model param
42 |     dynet::Model *m;
43 | 
44 |     Merge2Layer *merge_input_layer;
45 |     BILSTMLayer *bilstm_layer;
46 |     Merge3Layer *emit_hidden_layer;
47 |     DenseLayer *emit_output_layer;
48 | 
49 |     dynet::LookupParameter words_lookup_param;
50 |     dynet::LookupParameter postag_lookup_param;
51 |     dynet::LookupParameter ner_lookup_param;
52 |     
53 |     dynet::LookupParameter init_score_lookup_param;
54 |     dynet::LookupParameter trans_score_lookup_param;
55 | 
56 | 
57 |     // Dict
58 |     dynet::Dict word_dict;
59 |     dynet::Dict postag_dict;
60 |     dynet::Dict ner_dict;
61 |     DictWrapper word_dict_wrapper;
62 |     
63 |     static const std::string UNK_STR ; 
64 | 
65 |     /******************functions********************/
66 | 
67 |     NERCRFModel();
68 |     ~NERCRFModel();
69 | 
70 |     void build_model_structure();
71 |     void print_model_info();
72 | 
73 | 
74 |     dynet::expr::Expression viterbi_train(dynet::ComputationGraph *p_cg, 
75 |         const IndexSeq *p_sent, const IndexSeq *p_postag_seq,
76 |         const IndexSeq *p_ner_seq ,
77 |         float dropout_rate ,
78 |         Stat *p_stat = nullptr);
79 |     void viterbi_predict(dynet::ComputationGraph *p_cg, 
80 |         const IndexSeq *p_sent, const IndexSeq *p_postag_seq ,
81 |         IndexSeq *p_predict_ner_seq);
82 | 
83 | };
84 | 
85 | 
86 | } // end of namespace
87 | 
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/src/ner/ner_crf_dc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | # crf dc
 4 | set(
 5 |     ner_crf_dc_headers
 6 |     ner_crf_dc_model.h
 7 |     ner_crf_dc_modelhandler.h
 8 | )
 9 | 
10 | set(
11 |     ner_crf_dc_libs
12 |     ner_crf_dc_model.cpp
13 |     ner_crf_dc_modelhandler.cpp
14 | )
15 | 
16 | add_executable(ner_crf_dc ner_crf_dc.cpp 
17 |                       ${ner_crf_dc_headers} ${common_headers}
18 |                       ${ner_crf_dc_libs} ${common_libs}
19 | )
20 | 
21 | target_link_libraries(ner_crf_dc dynet ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/ner/ner_crf_dc/ner_crf_dc_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NER_CRF_DC_H_INCLUDED_
 2 | #define NER_CRF_DC_H_INCLUDED_
 3 | 
 4 | #include <string>
 5 | #include <sstream>
 6 | 
 7 | #include "dynet/nodes.h"
 8 | #include "dynet/dynet.h"
 9 | #include "dynet/training.h"
10 | #include "dynet/rnn.h"
11 | #include "dynet/lstm.h"
12 | #include "dynet/dict.h"
13 | #include "dynet/expr.h"
14 | 
15 | #include <boost/program_options.hpp>
16 | 
17 | #include "utils/typedeclaration.h"
18 | #include "modelmodule/layers.h"
19 | #include "utils/utf8processing.hpp" 
20 | #include "utils/dict_wrapper.hpp"
21 | #include "utils/stat.hpp"
22 | 
23 | namespace slnn
24 | {
25 | struct NERCRFDCModelHandler;
26 | 
27 | struct NERCRFDCModel
28 | {
29 |     friend struct NERCRFDCModelHandler;
30 |     // Model structure param 
31 |     unsigned dynamic_embedding_dim,
32 |         fixed_embedding_dim,
33 |         postag_embedding_dim,
34 |         ner_embedding_dim;
35 |     unsigned dynamic_embedding_dict_size,
36 |         fixed_embedding_dict_size,
37 |         postag_embedding_dict_size,
38 |         ner_embedding_dict_size;
39 |     unsigned nr_lstm_stacked_layer,
40 |         lstm_x_dim,
41 |         lstm_h_dim,
42 |         emit_hidden_layer_dim;
43 | 
44 |     // Model param
45 |     dynet::Model *m;
46 | 
47 |     Merge3Layer *merge_doublechannel_layer;
48 |     BILSTMLayer *bilstm_layer;
49 |     Merge3Layer *emit_hidden_layer;
50 |     DenseLayer *emit_output_layer;
51 | 
52 |     dynet::LookupParameter dynamic_words_lookup_param;
53 |     dynet::LookupParameter fixed_words_lookup_param;
54 |     dynet::LookupParameter postag_lookup_param;
55 |     dynet::LookupParameter ner_lookup_param;
56 |     
57 |     dynet::LookupParameter init_score_lookup_param;
58 |     dynet::LookupParameter trans_score_lookup_param;
59 | 
60 | 
61 |     // Dict
62 |     dynet::Dict dynamic_dict;
63 |     dynet::Dict fixed_dict;
64 |     dynet::Dict postag_dict;
65 |     dynet::Dict ner_dict;
66 |     DictWrapper dynamic_dict_wrapper;
67 |     
68 |     static const std::string UNK_STR ; 
69 | 
70 |     /******************functions********************/
71 | 
72 |     NERCRFDCModel();
73 |     ~NERCRFDCModel();
74 | 
75 |     void build_model_structure();
76 |     void print_model_info();
77 | 
78 | 
79 |     dynet::expr::Expression viterbi_train(dynet::ComputationGraph *p_cg, 
80 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, const IndexSeq *p_postag_seq,
81 |         const IndexSeq *p_ner_seq ,
82 |         float dropout_rate , 
83 |         Stat *p_stat = nullptr);
84 |     void viterbi_predict(dynet::ComputationGraph *p_cg, 
85 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, const IndexSeq *p_postag_seq ,
86 |         IndexSeq *p_predict_ner_seq);
87 | 
88 | };
89 | 
90 | 
91 | } // end of namespace
92 | 
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/src/ner/ner_doublechannel/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | set(
 4 |     ner_dc_headers
 5 |     ner_dc_model.h
 6 |     ner_dc_modelhandler.h
 7 | )
 8 | 
 9 | set(
10 |     ner_dc_libs
11 |     ner_dc_model.cpp
12 |     ner_dc_modelhandler.cpp
13 | )
14 | 
15 | add_executable(ner_dc ner_doublechannel.cpp 
16 |                       ${ner_dc_headers} ${common_headers}
17 |                       ${ner_dc_libs} ${common_libs}
18 | )
19 | 
20 | target_link_libraries(ner_dc dynet ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/ner/ner_doublechannel/ner_dc_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NER_DC_H_INCLUDED_
 2 | #define NER_DC_H_INCLUDED_
 3 | 
 4 | #include <string>
 5 | #include <sstream>
 6 | 
 7 | #include "dynet/nodes.h"
 8 | #include "dynet/dynet.h"
 9 | #include "dynet/training.h"
10 | #include "dynet/rnn.h"
11 | #include "dynet/lstm.h"
12 | #include "dynet/dict.h"
13 | #include "dynet/expr.h"
14 | 
15 | #include <boost/program_options.hpp>
16 | 
17 | #include "utils/typedeclaration.h"
18 | #include "modelmodule/layers.h"
19 | #include "utils/utf8processing.hpp" 
20 | #include "utils/dict_wrapper.hpp"
21 | #include "utils/stat.hpp"
22 | 
23 | namespace slnn
24 | {
25 | struct NERDCModelHandler;
26 | 
27 | struct NERDCModel
28 | {
29 |     friend struct NERDCModelHandler;
30 |     // Model structure param 
31 |     unsigned dynamic_embedding_dim,
32 |         fixed_embedding_dim,
33 |         postag_embedding_dim,
34 |         ner_embedding_dim;
35 |     unsigned dynamic_embedding_dict_size,
36 |         fixed_embedding_dict_size,
37 |         postag_embedding_dict_size,
38 |         ner_embedding_dict_size;
39 |     unsigned nr_lstm_stacked_layer,
40 |         lstm_x_dim,
41 |         lstm_h_dim,
42 |         tag_layer_hidden_dim;
43 | 
44 |     // Model param
45 |     dynet::Model *m;
46 | 
47 |     Merge3Layer *merge_doublechannel_layer;
48 |     BILSTMLayer *bilstm_layer;
49 |     Merge3Layer *merge_bilstm_and_pretag_layer;
50 |     DenseLayer *tag_output_linear_layer;
51 | 
52 |     dynet::LookupParameter dynamic_words_lookup_param;
53 |     dynet::LookupParameter fixed_words_lookup_param;
54 |     dynet::LookupParameter postag_lookup_param;
55 |     dynet::LookupParameter ner_lookup_param;
56 |     
57 |     dynet::Parameter TAG_SOS_param; // for tag_hidden_layer , pre-tag
58 | 
59 | 
60 |     // Dict
61 |     dynet::Dict dynamic_dict;
62 |     dynet::Dict fixed_dict;
63 |     dynet::Dict postag_dict;
64 |     dynet::Dict ner_dict;
65 |     DictWrapper dynamic_dict_wrapper;
66 |     
67 |     static const std::string UNK_STR ; 
68 | 
69 |     /******************functions********************/
70 | 
71 |     NERDCModel();
72 |     ~NERDCModel();
73 | 
74 |     void build_model_structure();
75 |     void print_model_info();
76 | 
77 | 
78 |     dynet::expr::Expression negative_loglikelihood(dynet::ComputationGraph *p_cg, 
79 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, const IndexSeq *p_postag_seq,
80 |         const IndexSeq *p_ner_seq ,
81 |         Stat *p_stat = nullptr);
82 |     void do_predict(dynet::ComputationGraph *p_cg, 
83 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, const IndexSeq *p_postag_seq ,
84 |         IndexSeq *p_predict_ner_seq);
85 | 
86 | };
87 | 
88 | 
89 | } // end of namespace
90 | 
91 | 
92 | #endif
93 | 


--------------------------------------------------------------------------------
/src/ner/ner_single_classification/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     ner_single_classification
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${exe_name}
13 |                ${exe_name}.cpp
14 |                ${single_input_modelhandler_headers} # model handler
15 |                ${specific_headers} # model
16 |                ${single_input_model_headers}        # base model
17 |                ${ner_common_headers}                # common header
18 |                ${specific_libs}
19 |                ${single_input_model_libs} 
20 |                ${ner_common_libs}
21 |                )
22 |                
23 | target_link_libraries(${exe_name}
24 |                       dynet
25 |                       ${Boost_LIBRARIES})
26 | 


--------------------------------------------------------------------------------
/src/ner/ner_single_classification/ner_single_classification_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_NER_NER_SINGLE_INPUT_CLASSIFICATION_H_
 2 | #define SLNN_NER_NER_SINGLE_INPUT_CLASSIFICATION_H_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | 
 8 | #include "dynet/dynet.h"
 9 | 
10 | #include "ner/base_model/input2D_model.h"
11 | namespace slnn{
12 | 
13 | class NERSingleClassificationModel : public Input2DModel
14 | {
15 | public:
16 |     unsigned word_embedding_dim,
17 |         word_dict_size,
18 |         postag_embedding_dim,
19 |         postag_dict_size,
20 |         lstm_nr_stacked_layer,
21 |         lstm_x_dim,
22 |         lstm_h_dim,
23 |         hidden_dim,
24 |         output_dim ;
25 | 
26 |     dynet::real dropout_rate ; // only for bilstm (output doesn't enable dropout)
27 | 
28 |     NERSingleClassificationModel() ;
29 |     ~NERSingleClassificationModel() ;
30 | 
31 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
32 |     void build_model_structure() ;
33 |     void print_model_info() ;
34 | 
35 |     void save_model(std::ostream &os) ;
36 |     void load_model(std::istream &is) ;
37 | };
38 | 
39 | 
40 | } // end of namespace slnn 
41 | #endif 
42 | 


--------------------------------------------------------------------------------
/src/postagger/base_model/single_input_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "single_input_model.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | const std::string SingleInputModel::UNK_STR = "UNK_STR" ;
 6 | 
 7 | SingleInputModel::SingleInputModel()
 8 |     :m(nullptr) ,
 9 |     input_dict_wrapper(input_dict) ,
10 |     input_layer(nullptr) ,
11 |     bilstm_layer(nullptr) ,
12 |     output_layer(nullptr) 
13 | {}
14 | 
15 | SingleInputModel::~SingleInputModel()
16 | {
17 |     delete input_layer ;
18 |     delete bilstm_layer ;
19 |     delete output_layer ;
20 |     delete m ;
21 | }
22 | 
23 | dynet::expr::Expression
24 | SingleInputModel::build_loss(dynet::ComputationGraph &cg ,
25 |                              const IndexSeq &input_seq, const IndexSeq &gold_seq)
26 | {
27 |     input_layer->new_graph(cg) ;
28 |     bilstm_layer->new_graph(cg) ;
29 |     output_layer->new_graph(cg) ;
30 | 
31 |     bilstm_layer->set_dropout() ;
32 |     bilstm_layer->start_new_sequence() ;
33 | 
34 |     std::vector<dynet::expr::Expression> inputs_exprs ;
35 |     input_layer->build_inputs(input_seq, inputs_exprs) ;
36 | 
37 |     std::vector<dynet::expr::Expression> l2r_exprs,
38 |                                        r2l_exprs ;
39 |     bilstm_layer->build_graph(inputs_exprs, l2r_exprs, r2l_exprs) ;
40 |     return output_layer->build_output_loss(l2r_exprs, r2l_exprs, gold_seq) ;
41 | }
42 | 
43 | void 
44 | SingleInputModel::predict(dynet::ComputationGraph &cg,
45 |                           const IndexSeq &input_seq, IndexSeq &pred_seq)
46 | {
47 |     input_layer->new_graph(cg) ;
48 |     bilstm_layer->new_graph(cg) ;
49 |     output_layer->new_graph(cg) ;
50 | 
51 |     bilstm_layer->disable_dropout() ;
52 |     bilstm_layer->start_new_sequence();
53 | 
54 |     std::vector<dynet::expr::Expression> inputs_exprs ;
55 |     input_layer->build_inputs(input_seq, inputs_exprs) ;
56 |     std::vector<dynet::expr::Expression> l2r_exprs,
57 |                                        r2l_exprs ;
58 |     bilstm_layer->build_graph(inputs_exprs, l2r_exprs, r2l_exprs) ;
59 |     output_layer->build_output(l2r_exprs, r2l_exprs , pred_seq) ;
60 | }
61 | 
62 | } // end of namespace slnn
63 | 


--------------------------------------------------------------------------------
/src/postagger/base_model/single_input_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_POSTAGGER_BASEMODEL_SINGLE_INPUT_MODEL_H_
 2 | #define SLNN_POSTAGGER_BASEMODEL_SINGLE_INPUT_MODEL_H_
 3 | 
 4 | #include <iostream>
 5 | 
 6 | #include <boost/archive/text_iarchive.hpp>
 7 | #include <boost/program_options.hpp>
 8 | 
 9 | #include "dynet/dynet.h"
10 | #include "dynet/dict.h"
11 | #include "utils/typedeclaration.h"
12 | #include "utils/dict_wrapper.hpp"
13 | #include "modelmodule/hyper_layers.h"
14 | 
15 | namespace slnn{
16 | 
17 | class SingleInputModel
18 | {
19 | public :
20 |     SingleInputModel() ;
21 |     virtual ~SingleInputModel() ;
22 |     
23 |     virtual void set_model_param(const boost::program_options::variables_map &var_map) = 0 ;
24 |     virtual void build_model_structure() = 0 ;
25 |     virtual void print_model_info() = 0 ;
26 |     
27 |     virtual dynet::expr::Expression  build_loss(dynet::ComputationGraph &cg,
28 |                                               const IndexSeq &input_seq, const IndexSeq &gold_seq) ;
29 |     virtual void predict(dynet::ComputationGraph &cg ,
30 |                          const IndexSeq &input_seq, IndexSeq &pred_seq) ;
31 | 
32 |     dynet::Dict& get_input_dict(){ return input_dict ;  } 
33 |     dynet::Dict& get_output_dict(){ return output_dict ; } 
34 |     DictWrapper& get_input_dict_wrapper(){ return input_dict_wrapper ; } 
35 |     dynet::Model *get_dynet_model(){ return m ; } ;
36 | 
37 | 
38 |     void set_dynet_model(std::istream &mis){ boost::archive::text_iarchive ti(mis) ; ti >> *m ; } 
39 | 
40 |     virtual void save_model(std::ostream &os) = 0 ;
41 |     virtual void load_model(std::istream &is) = 0 ;
42 | 
43 | public :
44 |     static const std::string UNK_STR;
45 | protected :
46 |     dynet::Model *m ;
47 |     dynet::Dict input_dict ;
48 |     dynet::Dict output_dict ;
49 |     DictWrapper input_dict_wrapper ;
50 | 
51 |     Input1 *input_layer ;
52 |     BILSTMLayer *bilstm_layer ;
53 |     OutputBase *output_layer ;
54 | 
55 | };
56 | 
57 | 
58 | } // end of namespcace slnn 
59 | 
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/src/postagger/model_handler/input1_mlp_modelhandler.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/model_handler/input1_mlp_modelhandler.cpp


--------------------------------------------------------------------------------
/src/postagger/model_handler/single_input_modelhandler.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/model_handler/single_input_modelhandler.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_bareinput1_classification_with_feature_no_merge/pos_bareinput1_classification_f2i_no_merge_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_bareinput1_classification_with_feature_no_merge/pos_bareinput1_classification_f2o_no_merge_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_bareinput1_classification_with_feature_no_merge/pos_bareinput1_classification_f2o_no_merge_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_bareinput1_classification_with_feature_no_merge/pos_bareinput1_f2o_nonlinear_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_bareinput1_classification_with_feature_no_merge/pos_bareinput1_f2o_nonlinear_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_input1_mlp_with_tag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     pos_input1_mlp_with_tag
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${exe_name}
13 |                ${exe_name}.cpp
14 |                ${pos_feature_module_headers} # pos model module addition
15 |                ${input1_mlp_headers} # base model and handler
16 |                ${specific_headers}   # model
17 |                ${pos_common_headers}                # common header
18 |                ${context_module}
19 |                ${pos_feature_module_libs}
20 |                ${input1_mlp_libs}
21 |                ${specific_libs}
22 |                ${pos_common_libs}
23 |                ${pos_reader_module}
24 |                )
25 |                
26 | target_link_libraries(${exe_name}
27 |                       dynet
28 |                       ${Boost_LIBRARIES})
29 | 


--------------------------------------------------------------------------------
/src/postagger/pos_input1_mlp_with_tag/pos_input1_mlp_with_tag_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef POSTAGGER_POS_INPUT1_MLP_WITH_TAG_POSTAG_INPUT1_MLP_WITH_TAG_MODEL_H_
 2 | #define POSTAGGER_POS_INPUT1_MLP_WITH_TAG_POSTAG_INPUT1_MLP_WITH_TAG_MODEL_H_
 3 | #include <boost/log/trivial.hpp>
 4 | #include <boost/archive/text_iarchive.hpp>
 5 | #include <boost/archive/text_oarchive.hpp>
 6 | #include "postagger/base_model/input1_mlp_model.h"
 7 | #include "modelmodule/hyper_layers.h"
 8 | namespace slnn{
 9 | 
10 | class POSInput1MLPWithTagModel : public Input1MLPModel
11 | {
12 |     friend class boost::serialization::access;
13 | public :
14 |     POSInput1MLPWithTagModel();
15 |     ~POSInput1MLPWithTagModel();
16 |     POSInput1MLPWithTagModel(const POSInput1MLPWithTagModel &) = delete ;
17 |     POSInput1MLPWithTagModel &operator=(const POSInput1MLPWithTagModel &) = delete ;
18 | 
19 |     void set_model_param_from_outer(const boost::program_options::variables_map &var_map) override;
20 |     void build_model_structure() override;
21 |     void print_model_info() override;
22 | 
23 |     dynet::expr::Expression  build_loss(dynet::ComputationGraph &cg,
24 |         const IndexSeq &input_seq, 
25 |         const ContextFeatureDataSeq &context_feature_gp_seq,
26 |         const POSFeature::POSFeatureIndexGroupSeq &features_gp_seq,
27 |         const IndexSeq &gold_seq)  override ;
28 |     void predict(dynet::ComputationGraph &cg ,
29 |         const IndexSeq &input_seq, 
30 |         const ContextFeatureDataSeq &context_feature_gp_seq,
31 |         const POSFeature::POSFeatureIndexGroupSeq &features_gp_seq,
32 |         IndexSeq &pred_seq) override ;
33 | 
34 |     template <typename Archive>
35 |     void serialize(Archive &ar, unsigned version);
36 | 
37 | private :
38 |     Index2ExprLayer *word_expr_layer;
39 |     ShiftedIndex2ExprLayer *tag_expr_layer;
40 |     POSFeatureLayer *pos_feature_layer;
41 |     ContextFeatureLayer *pos_context_feature_layer;
42 |     MLPHiddenLayer *mlp_hidden_layer;
43 |     SoftmaxLayer *output_layer;
44 | 
45 |     unsigned tag_embedding_dim;
46 |     NonLinearFunc *nonlinear_func;
47 |     std::string nonlinear_func_indicate;
48 | };
49 | 
50 | template <typename Archive>
51 | void POSInput1MLPWithTagModel::serialize(Archive &ar, unsigned version)
52 | {
53 |     ar & tag_embedding_dim;
54 |     ar & nonlinear_func_indicate;
55 |     ar & boost::serialization::base_object<Input1MLPModel>(*this);
56 | }
57 | 
58 | }
59 | 
60 | 
61 | #endif


--------------------------------------------------------------------------------
/src/postagger/pos_input1_mlp_without_tag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     pos_input1_mlp_without_tag
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${exe_name}
13 |                ${exe_name}.cpp
14 |                ${pos_feature_module_headers} # pos model module addition
15 |                ${input1_mlp_headers} # base model and handler
16 |                ${specific_headers}   # model
17 |                ${pos_common_headers}                # common header
18 |                ${context_module}
19 |                ${pos_feature_module_libs}
20 |                ${input1_mlp_libs}
21 |                ${specific_libs}
22 |                ${pos_common_libs}
23 |                ${pos_reader_module}
24 |                )
25 |                
26 | target_link_libraries(${exe_name}
27 |                       dynet
28 |                       ${Boost_LIBRARIES})
29 | 


--------------------------------------------------------------------------------
/src/postagger/pos_input1_mlp_without_tag/pos_input1_mlp_without_tag_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef POSTAGGER_POS_INPUT1_MLP_WITHOUT_TAG_POSTAG_INPUT1_MLP_WITHOUT_TAG_MODEL_H_
 2 | #define POSTAGGER_POS_INPUT1_MLP_WITHOUT_TAG_POSTAG_INPUT1_MLP_WITHOUT_TAG_MODEL_H_
 3 | #include <boost/log/trivial.hpp>
 4 | #include <boost/archive/text_iarchive.hpp>
 5 | #include <boost/archive/text_oarchive.hpp>
 6 | #include "postagger/base_model/input1_mlp_model.h"
 7 | #include "modelmodule/hyper_layers.h"
 8 | namespace slnn{
 9 | 
10 | class Input1MLPWithoutTagModel : public Input1MLPModel
11 | {
12 |     friend class boost::serialization::access;
13 | public :
14 |     Input1MLPWithoutTagModel();
15 |     ~Input1MLPWithoutTagModel();
16 |     Input1MLPWithoutTagModel(const Input1MLPWithoutTagModel &) = delete ;
17 |     Input1MLPWithoutTagModel &operator=(const Input1MLPWithoutTagModel &) = delete ;
18 | 
19 |     void set_model_param_from_outer(const boost::program_options::variables_map &var_map) override;
20 |     void build_model_structure() override;
21 |     void print_model_info() override;
22 | 
23 |     dynet::expr::Expression  build_loss(dynet::ComputationGraph &cg,
24 |         const IndexSeq &input_seq, 
25 |         const ContextFeatureDataSeq &context_feature_gp_seq,
26 |         const POSFeature::POSFeatureIndexGroupSeq &features_gp_seq,
27 |         const IndexSeq &gold_seq)  override ;
28 |     void predict(dynet::ComputationGraph &cg ,
29 |         const IndexSeq &input_seq, 
30 |         const ContextFeatureDataSeq &context_feature_gp_seq,
31 |         const POSFeature::POSFeatureIndexGroupSeq &features_gp_seq,
32 |         IndexSeq &pred_seq) override ;
33 | 
34 |     template <typename Archive>
35 |     void serialize(Archive &ar, unsigned version);
36 | 
37 | private :
38 |     BareInput1 *input_layer;
39 |     MLPHiddenLayer *mlp_hidden_layer;
40 |     SoftmaxLayer *output_layer;
41 |     POSFeatureLayer *pos_feature_layer;
42 |     ContextFeatureLayer *pos_context_feature_layer;
43 | };
44 | 
45 | template <typename Archive>
46 | void Input1MLPWithoutTagModel::serialize(Archive &ar, unsigned version)
47 | {
48 |     ar & boost::serialization::base_object<Input1MLPModel>(*this);
49 | }
50 | 
51 | }
52 | 
53 | 
54 | #endif


--------------------------------------------------------------------------------
/src/postagger/pos_input1_mlp_without_tag_NO_FEATURE/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     pos_input1_mlp_without_tag_no_feature
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${exe_name}
13 |                ${exe_name}.cpp
14 |                ${input1_mlp_no_feature_headers} # base model and handler
15 |                ${specific_headers}   # model
16 |                ${pos_common_headers}                # common header
17 |                ${context_module}
18 |                ${input1_mlp_no_feature_libs}
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_reader_module}
22 |                )
23 |                
24 | target_link_libraries(${exe_name}
25 |                       dynet
26 |                       ${Boost_LIBRARIES})
27 | 


--------------------------------------------------------------------------------
/src/postagger/pos_input1_mlp_without_tag_NO_FEATURE/pos_input1_mlp_without_tag_no_feature_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef POSTAGGER_POS_INPUT1_MLP_WITHOUT_TAG_POSTAG_INPUT1_MLP_WITHOUT_TAG_NO_FEATURE_MODEL_H_
 2 | #define POSTAGGER_POS_INPUT1_MLP_WITHOUT_TAG_POSTAG_INPUT1_MLP_WITHOUT_TAG_NO_FEATURE_MODEL_H_
 3 | #include <boost/log/trivial.hpp>
 4 | #include <boost/archive/text_iarchive.hpp>
 5 | #include <boost/archive/text_oarchive.hpp>
 6 | #include "postagger/base_model/input1_mlp_model_no_feature.h"
 7 | #include "modelmodule/hyper_layers.h"
 8 | namespace slnn{
 9 | 
10 | class Input1MLPWithoutTagNoFeatureModel : public Input1MLPModelNoFeature
11 | {
12 |     friend class boost::serialization::access;
13 | public :
14 |     Input1MLPWithoutTagNoFeatureModel();
15 |     ~Input1MLPWithoutTagNoFeatureModel();
16 |     Input1MLPWithoutTagNoFeatureModel(const Input1MLPWithoutTagNoFeatureModel &) = delete ;
17 |     Input1MLPWithoutTagNoFeatureModel &operator=(const Input1MLPWithoutTagNoFeatureModel &) = delete ;
18 | 
19 |     void build_model_structure() override;
20 |     void print_model_info() override;
21 | 
22 |     dynet::expr::Expression  build_loss(dynet::ComputationGraph &cg,
23 |         const IndexSeq &input_seq, 
24 |         const ContextFeatureDataSeq &context_feature_gp_seq,
25 |         const IndexSeq &gold_seq)  override ;
26 |     void predict(dynet::ComputationGraph &cg ,
27 |         const IndexSeq &input_seq, 
28 |         const ContextFeatureDataSeq &context_feature_gp_seq,
29 |         IndexSeq &pred_seq) override ;
30 | 
31 |     template <typename Archive>
32 |     void serialize(Archive &ar, unsigned version);
33 | 
34 | private :
35 |     BareInput1 *input_layer;
36 |     MLPHiddenLayer *mlp_hidden_layer;
37 |     SoftmaxLayer *output_layer;
38 |     ContextFeatureLayer *pos_context_feature_layer;
39 | };
40 | 
41 | template <typename Archive>
42 | void Input1MLPWithoutTagNoFeatureModel::serialize(Archive &ar, unsigned version)
43 | {
44 |     ar & boost::serialization::base_object<Input1MLPModelNoFeature>(*this);
45 | }
46 | 
47 | } // end of namespace slnn
48 | 
49 | 
50 | #endif


--------------------------------------------------------------------------------
/src/postagger/pos_input2_classification_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(feature2input_exe_name
 2 |     pos_input2_classification_feature2input_layer
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${feature2input_exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${feature2input_exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${feature2input_exe_name}
13 |                ${feature2input_exe_name}.cpp
14 |                ${pos_feature_module_headers}
15 |                ${input2_with_feature_modelhandler_hpp} # model handler
16 |                ${specific_headers} # model
17 |                ${input2_feature2input_layer_model_hpp}        # feature to input base model 
18 |                ${pos_common_headers}                # common header
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_feature_module_libs}
22 |                ${pos_reader_module}
23 |                ${additional_base_modules}
24 |                )
25 |                
26 | target_link_libraries(${feature2input_exe_name}
27 |                       dynet
28 |                       ${Boost_LIBRARIES})
29 | 
30 | ############# feature2output  ###########   
31 |                       
32 | set(feature2output_exe_name
33 |     pos_input2_classification_feature2output_layer
34 | )
35 | 
36 | set(specific_headers
37 |     ${feature2output_exe_name}_model.h
38 | )
39 | set(specific_libs
40 |     ${feature2output_exe_name}_model.cpp
41 | )
42 | 
43 | add_executable(${feature2output_exe_name}
44 |                ${feature2output_exe_name}.cpp
45 |                ${pos_feature_module_headers}
46 |                ${input2_with_feature_modelhandler_hpp} # model handler
47 |                ${specific_headers} # model
48 |                ${input2_feature2output_layer_model_hpp}        # feature to output base model 
49 |                ${pos_common_headers}                # common header
50 |                ${specific_libs}
51 |                ${pos_common_libs}
52 |                ${pos_feature_module_libs}
53 |                ${pos_reader_module}
54 |                ${additional_base_modules}
55 |                )
56 |                
57 | target_link_libraries(${feature2output_exe_name}
58 |                       dynet
59 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/postagger/pos_input2_classification_with_feature/pos_input2_classification_feature2input_layer_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_input2_classification_with_feature/pos_input2_classification_feature2output_layer_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_input2_classification_with_feature/pos_input2_classification_feature2output_layer_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_input2_crf_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(feature2input_exe_name
 2 |     pos_input2_crf_feature2input_layer
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${feature2input_exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${feature2input_exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${feature2input_exe_name}
13 |                ${feature2input_exe_name}.cpp
14 |                ${pos_feature_module_headers}
15 |                ${input2_with_feature_modelhandler_hpp} # model handler
16 |                ${specific_headers} # model
17 |                ${input2_feature2input_layer_model_hpp}        # feature to input base model 
18 |                ${pos_common_headers}                # common header
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_feature_module_libs}
22 |                ${pos_reader_module}
23 |                ${additional_base_modules}
24 |                )
25 |                
26 | target_link_libraries(${feature2input_exe_name}
27 |                       dynet
28 |                       ${Boost_LIBRARIES})
29 | 
30 | ############# feature2output  ###########   
31 |                       
32 | set(feature2output_exe_name
33 |     pos_input2_crf_feature2output_layer
34 | )
35 | 
36 | set(specific_headers
37 |     ${feature2output_exe_name}_model.h
38 | )
39 | set(specific_libs
40 |     ${feature2output_exe_name}_model.cpp
41 | )
42 | 
43 | add_executable(${feature2output_exe_name}
44 |                ${feature2output_exe_name}.cpp
45 |                ${pos_feature_module_headers}
46 |                ${input2_with_feature_modelhandler_hpp} # model handler
47 |                ${specific_headers} # model
48 |                ${input2_feature2output_layer_model_hpp}        # feature to output base model 
49 |                ${pos_common_headers}                # common header
50 |                ${specific_libs}
51 |                ${pos_common_libs}
52 |                ${pos_feature_module_libs}
53 |                ${pos_reader_module}
54 |                ${additional_base_modules}
55 |                )
56 |                
57 | target_link_libraries(${feature2output_exe_name}
58 |                       dynet
59 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/postagger/pos_input2_crf_with_feature/pos_input2_crf_feature2input_layer_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_input2_crf_with_feature/pos_input2_crf_feature2output_layer_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_input2_crf_with_feature/pos_input2_crf_feature2output_layer_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_input2_pretag_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(feature2input_exe_name
 2 |     pos_input2_pretag_feature2input_layer
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${feature2input_exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${feature2input_exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${feature2input_exe_name}
13 |                ${feature2input_exe_name}.cpp
14 |                ${pos_feature_module_headers}
15 |                ${input2_with_feature_modelhandler_hpp} # model handler
16 |                ${specific_headers} # model
17 |                ${input2_feature2input_layer_model_hpp}        # feature to input base model 
18 |                ${pos_common_headers}                # common header
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_feature_module_libs}
22 |                ${pos_reader_module}
23 |                ${additional_base_modules}
24 |                )
25 |                
26 | target_link_libraries(${feature2input_exe_name}
27 |                       dynet
28 |                       ${Boost_LIBRARIES})
29 | 
30 | ############# feature2output  ###########   
31 |                       
32 | set(feature2output_exe_name
33 |     pos_input2_pretag_feature2output_layer
34 | )
35 | 
36 | set(specific_headers
37 |     ${feature2output_exe_name}_model.h
38 | )
39 | set(specific_libs
40 |     ${feature2output_exe_name}_model.cpp
41 | )
42 | 
43 | add_executable(${feature2output_exe_name}
44 |                ${feature2output_exe_name}.cpp
45 |                ${pos_feature_module_headers}
46 |                ${input2_with_feature_modelhandler_hpp} # model handler
47 |                ${specific_headers} # model
48 |                ${input2_feature2output_layer_model_hpp}        # feature to output base model 
49 |                ${pos_common_headers}                # common header
50 |                ${specific_libs}
51 |                ${pos_common_libs}
52 |                ${pos_feature_module_libs}
53 |                ${pos_reader_module}
54 |                ${additional_base_modules}
55 |                )
56 |                
57 | target_link_libraries(${feature2output_exe_name}
58 |                       dynet
59 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/postagger/pos_input2_pretag_with_feature/pos_input2_pretag_feature2input_layer_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_input2_pretag_with_feature/pos_input2_pretag_feature2output_layer_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_input2_pretag_with_feature/pos_input2_pretag_feature2output_layer_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_single_classification/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     pos_single_classification
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${exe_name}
13 |                ${exe_name}.cpp
14 |                ${single_input_modelhandler_headers} # model handler
15 |                ${specific_headers} # model
16 |                ${single_input_model_headers}        # base model
17 |                ${pos_common_headers}                # common header
18 |                ${specific_libs}
19 |                ${single_input_model_libs} 
20 |                ${pos_common_libs}
21 |                )
22 |                
23 | target_link_libraries(${exe_name}
24 |                       dynet
25 |                       ${Boost_LIBRARIES})
26 | 


--------------------------------------------------------------------------------
/src/postagger/pos_single_classification/pos_single_classification_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_POSTAGGER_POS_SINGLE_INPUT_CLASSIFICATION_H_
 2 | #define SLNN_POSTAGGER_POS_SINGLE_INPUT_CLASSIFICATION_H_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | 
 8 | #include "dynet/dynet.h"
 9 | 
10 | #include "postagger/base_model/single_input_model.h"
11 | namespace slnn{
12 | 
13 | class POSSingleClassificationModel : public SingleInputModel
14 | {
15 | public:
16 |     unsigned word_embedding_dim,
17 |         word_dict_size,
18 |         lstm_nr_stacked_layer,
19 |         lstm_h_dim,
20 |         hidden_dim,
21 |         output_dim ;
22 | 
23 |     dynet::real dropout_rate ; // only for bilstm (output doesn't enable dropout)
24 | 
25 |     dynet::Dict &word_dict ;
26 |     dynet::Dict &tag_dict ;
27 | 
28 |     POSSingleClassificationModel() ;
29 |     ~POSSingleClassificationModel() ;
30 | 
31 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
32 |     void build_model_structure() ;
33 |     void print_model_info() ;
34 | 
35 |     void save_model(std::ostream &os) ;
36 |     void load_model(std::istream &is) ;
37 | };
38 | 
39 | 
40 | } // end of namespace slnn 
41 | #endif 
42 | 


--------------------------------------------------------------------------------
/src/postagger/pos_single_classification_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(feature2input_exe_name
 2 |     pos_input1_classification_feature2input_layer
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${feature2input_exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${feature2input_exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${feature2input_exe_name}
13 |                ${feature2input_exe_name}.cpp
14 |                ${pos_feature_module_headers}
15 |                ${single_input_with_feature_modelhandler_hpp} # model handler
16 |                ${specific_headers} # model
17 |                ${input1_feature2input_layer_model_hpp}        # feature to input base model 
18 |                ${pos_common_headers}                # common header
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_feature_module_libs}
22 |                ${pos_reader_module}
23 |                )
24 |                
25 | target_link_libraries(${feature2input_exe_name}
26 |                       dynet
27 |                       ${Boost_LIBRARIES})
28 | 
29 | ############# feature2output  ###########   
30 |                       
31 | set(feature2output_exe_name
32 |     pos_input1_classification_feature2output_layer
33 | )
34 | 
35 | set(specific_headers
36 |     ${feature2output_exe_name}_model.h
37 | )
38 | set(specific_libs
39 |     ${feature2output_exe_name}_model.cpp
40 | )
41 | 
42 | add_executable(${feature2output_exe_name}
43 |                ${feature2output_exe_name}.cpp
44 |                ${pos_feature_module_headers}
45 |                ${single_input_with_feature_modelhandler_hpp} # model handler
46 |                ${specific_headers} # model
47 |                ${input1_feature2output_layer_model_hpp}        # feature to output base model 
48 |                ${pos_common_headers}                # common header
49 |                ${specific_libs}
50 |                ${pos_common_libs}
51 |                ${pos_feature_module_libs}
52 |                ${pos_reader_module}
53 |                )
54 |                
55 | target_link_libraries(${feature2output_exe_name}
56 |                       dynet
57 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/postagger/pos_single_classification_with_feature/pos_input1_classification_feature2input_layer_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_single_classification_with_feature/pos_input1_classification_feature2output_layer_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_single_classification_with_feature/pos_input1_classification_feature2output_layer_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_single_crf_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(feature2input_exe_name
 2 |     pos_input1_crf_feature2input_layer
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${feature2input_exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${feature2input_exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${feature2input_exe_name}
13 |                ${feature2input_exe_name}.cpp
14 |                ${pos_feature_module_headers}
15 |                ${single_input_with_feature_modelhandler_hpp} # model handler
16 |                ${specific_headers} # model
17 |                ${input1_feature2input_layer_model_hpp}        # feature to input base model 
18 |                ${pos_common_headers}                # common header
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_feature_module_libs}
22 |                ${pos_reader_module}
23 |                )
24 |                
25 | target_link_libraries(${feature2input_exe_name}
26 |                       dynet
27 |                       ${Boost_LIBRARIES})
28 | 
29 | ############# feature2output  ###########   
30 |                       
31 | set(feature2output_exe_name
32 |     pos_input1_crf_feature2output_layer
33 | )
34 | 
35 | set(specific_headers
36 |     ${feature2output_exe_name}_model.h
37 | )
38 | set(specific_libs
39 |     ${feature2output_exe_name}_model.cpp
40 | )
41 | 
42 | add_executable(${feature2output_exe_name}
43 |                ${feature2output_exe_name}.cpp
44 |                ${pos_feature_module_headers}
45 |                ${single_input_with_feature_modelhandler_hpp} # model handler
46 |                ${specific_headers} # model
47 |                ${input1_feature2output_layer_model_hpp}        # feature to output base model 
48 |                ${pos_common_headers}                # common header
49 |                ${specific_libs}
50 |                ${pos_common_libs}
51 |                ${pos_feature_module_libs}
52 |                ${pos_reader_module}
53 |                )
54 |                
55 | target_link_libraries(${feature2output_exe_name}
56 |                       dynet
57 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/postagger/pos_single_crf_with_feature/pos_input1_crf_feature2input_layer_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_single_crf_with_feature/pos_input1_crf_feature2output_layer_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_single_crf_with_feature/pos_input1_crf_feature2output_layer_model.cpp


--------------------------------------------------------------------------------
/src/postagger/pos_single_pretag_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(feature2input_exe_name
 2 |     pos_input1_pretag_feature2input_layer
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${feature2input_exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${feature2input_exe_name}_model.cpp
10 | )
11 | 
12 | add_executable(${feature2input_exe_name}
13 |                ${feature2input_exe_name}.cpp
14 |                ${pos_feature_module_headers}
15 |                ${single_input_with_feature_modelhandler_hpp} # model handler
16 |                ${specific_headers} # model
17 |                ${input1_feature2input_layer_model_hpp}        # feature to input base model 
18 |                ${pos_common_headers}                # common header
19 |                ${specific_libs}
20 |                ${pos_common_libs}
21 |                ${pos_feature_module_libs}
22 |                ${pos_reader_module}
23 |                )
24 |                
25 | target_link_libraries(${feature2input_exe_name}
26 |                       dynet
27 |                       ${Boost_LIBRARIES})
28 | 
29 | ############# feature2output  ###########   
30 |                       
31 | set(feature2output_exe_name
32 |     pos_input1_pretag_feature2output_layer
33 | )
34 | 
35 | set(specific_headers
36 |     ${feature2output_exe_name}_model.h
37 | )
38 | set(specific_libs
39 |     ${feature2output_exe_name}_model.cpp
40 | )
41 | 
42 | add_executable(${feature2output_exe_name}
43 |                ${feature2output_exe_name}.cpp
44 |                ${pos_feature_module_headers}
45 |                ${single_input_with_feature_modelhandler_hpp} # model handler
46 |                ${specific_headers} # model
47 |                ${input1_feature2output_layer_model_hpp}        # feature to output base model 
48 |                ${pos_common_headers}                # common header
49 |                ${specific_libs}
50 |                ${pos_common_libs}
51 |                ${pos_feature_module_libs}
52 |                ${pos_reader_module}
53 |                )
54 |                
55 | target_link_libraries(${feature2output_exe_name}
56 |                       dynet
57 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/postagger/pos_single_pretag_with_feature/pos_input1_pretag_feature2input_layer_model.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/postagger/pos_single_pretag_with_feature/pos_input1_pretag_feature2output_layer_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/pos_single_pretag_with_feature/pos_input1_pretag_feature2output_layer_model.cpp


--------------------------------------------------------------------------------
/src/postagger/postagger_crf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | # crf
 4 | set(bilstmcrf_headers
 5 |     bilstmcrf.h
 6 |     bilstmcrf_modelhandler.h
 7 | )
 8 | set(bilstmcrf_libs
 9 |     bilstmcrf.cpp
10 |     bilstmcrf_modelhandler.cpp
11 | )
12 | 
13 | ADD_EXECUTABLE(postagger_crf postagger_bilstmcrf.cpp ${common_headers} 
14 | ${bilstmcrf_headers} ${common_libs} ${bilstmcrf_libs})
15 | 
16 | target_link_libraries(postagger_crf dynet ${Boost_LIBRARIES})
17 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_crf/bilstmcrf.h:
--------------------------------------------------------------------------------
 1 | #ifndef BILSTMCRF_H_INCLUDED_
 2 | #define BILSTMCRF_H_INCLUDED_
 3 | 
 4 | #include <string>
 5 | #include <sstream>
 6 | 
 7 | #include "dynet/nodes.h"
 8 | #include "dynet/dynet.h"
 9 | #include "dynet/training.h"
10 | #include "dynet/rnn.h"
11 | #include "dynet/lstm.h"
12 | #include "dynet/dict.h"
13 | #include "dynet/expr.h"
14 | 
15 | #include <boost/program_options.hpp>
16 | 
17 | #include "utils/typedeclaration.h"
18 | #include "modelmodule/layers.h"
19 | #include "utils/utf8processing.hpp" 
20 | #include "utils/dict_wrapper.hpp"
21 | #include "utils/stat.hpp"
22 | 
23 | namespace slnn
24 | {
25 | struct BILSTMCRFModelHandler;
26 | 
27 | struct BILSTMCRFModel4POSTAG
28 | {
29 |     friend struct BILSTMCRFModelHandler;
30 |     // Model structure param 
31 |     // - set by outer
32 |     unsigned word_embedding_dim,
33 |         postag_embedding_dim,
34 |         nr_lstm_stacked_layer ,
35 |         lstm_h_dim,
36 |         merge_hidden_dim;
37 |     // - set from inner (dict)
38 |     unsigned word_dict_size ,
39 |         postag_dict_size;
40 | 
41 |     // Model param
42 |     dynet::Model *m;
43 | 
44 |     BILSTMLayer *bilstm_layer;
45 |     Merge3Layer *merge_hidden_layer;
46 |     DenseLayer *emit_layer;
47 | 
48 |     dynet::LookupParameter words_lookup_param;
49 |     dynet::LookupParameter postags_lookup_param;
50 |     
51 |     dynet::LookupParameter trans_score_lookup_param; // trans score , that is , TAG_A -> TAG_B 's score
52 |     dynet::LookupParameter init_score_lookup_param; // init score , that is , the init TAG score
53 | 
54 | 
55 |     // Dict
56 |     dynet::Dict word_dict;
57 |     dynet::Dict postag_dict;
58 |     DictWrapper word_dict_wrapper;
59 |     static const std::string UNK_STR ; 
60 | 
61 |     BILSTMCRFModel4POSTAG();
62 |     ~BILSTMCRFModel4POSTAG();
63 | 
64 |     void build_model_structure();
65 |     void print_model_info();
66 | 
67 | 
68 |     dynet::expr::Expression viterbi_train(dynet::ComputationGraph *p_cg, 
69 |         const IndexSeq *p_sent, const IndexSeq *p_tag_seq,
70 |         Stat *p_stat = nullptr);
71 |     void viterbi_predict(dynet::ComputationGraph *p_cg, 
72 |         const IndexSeq *p_sent, IndexSeq *p_predict_tag_seq);
73 | 
74 | };
75 | 
76 | 
77 | } // end of namespace
78 | 
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_crfdc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | # crf dc
 4 | set(bilstmcrf_dc_headers
 5 |     bilstmcrf_dc.h
 6 |     bilstmcrf_dc_modelhandler.h
 7 | )
 8 | set(bilstmcrf_dc_libs
 9 |     bilstmcrf_dc.cpp
10 |     bilstmcrf_dc_modelhandler.cpp
11 | )
12 | 
13 | ADD_EXECUTABLE(postagger_crf_dc postagger_bilstmcrf_dc.cpp ${common_headers} 
14 | ${bilstmcrf_dc_headers} ${common_libs} ${bilstmcrf_dc_libs})
15 | 
16 | target_link_libraries(postagger_crf_dc dynet ${Boost_LIBRARIES})
17 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_crfdc/bilstmcrf_dc.h:
--------------------------------------------------------------------------------
 1 | #ifndef BILSTMCRF_DC_H_INCLUDED_
 2 | #define BILSTMCRF_DC_H_INCLUDED_
 3 | 
 4 | #include <string>
 5 | #include <sstream>
 6 | 
 7 | #include "dynet/nodes.h"
 8 | #include "dynet/dynet.h"
 9 | #include "dynet/training.h"
10 | #include "dynet/rnn.h"
11 | #include "dynet/lstm.h"
12 | #include "dynet/dict.h"
13 | #include "dynet/expr.h"
14 | 
15 | #include <boost/program_options.hpp>
16 | 
17 | #include "utils/typedeclaration.h"
18 | #include "modelmodule/layers.h"
19 | #include "utils/utf8processing.hpp" 
20 | #include "utils/dict_wrapper.hpp"
21 | #include "utils/stat.hpp"
22 | 
23 | namespace slnn
24 | {
25 | struct BILSTMCRFDCModelHandler;
26 | 
27 | struct BILSTMCRFDCModel4POSTAG
28 | {
29 |     friend struct BILSTMCRFDCModelHandler;
30 |     // Model structure param 
31 |     // - set by outer
32 |     unsigned dynamic_embedding_dim,
33 |         postag_embedding_dim,
34 |         nr_lstm_stacked_layer ,
35 |         lstm_x_dim,
36 |         lstm_h_dim,
37 |         merge_hidden_dim,
38 |         fixed_embedding_dim,
39 |         fixed_embedding_dict_size;
40 |     // - set from inner (dict)
41 |     unsigned dynamic_embedding_dict_size,
42 |         postag_dict_size;
43 | 
44 |     // Model param
45 |     dynet::Model *m;
46 | 
47 |     Merge2Layer *merge_doublechannel_layer;
48 |     BILSTMLayer *bilstm_layer;
49 |     Merge3Layer *merge_hidden_layer;
50 |     DenseLayer *emit_layer;
51 | 
52 |     dynet::LookupParameter dynamic_words_lookup_param;
53 |     dynet::LookupParameter fixed_words_lookup_param;
54 |     dynet::LookupParameter postags_lookup_param;
55 |     
56 |     dynet::LookupParameter trans_score_lookup_param; // trans score , that is , TAG_A -> TAG_B 's score
57 |     dynet::LookupParameter init_score_lookup_param; // init score , that is , the init TAG score
58 | 
59 | 
60 |     // Dict
61 |     dynet::Dict dynamic_dict;
62 |     dynet::Dict fixed_dict;
63 |     dynet::Dict postag_dict;
64 |     DictWrapper dynamic_dict_wrapper;
65 |     static const std::string UNK_STR ; 
66 | 
67 |     BILSTMCRFDCModel4POSTAG();
68 |     ~BILSTMCRFDCModel4POSTAG();
69 | 
70 |     void build_model_structure();
71 |     void print_model_info();
72 | 
73 | 
74 |     dynet::expr::Expression viterbi_train(dynet::ComputationGraph *p_cg, 
75 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, const IndexSeq *p_tag_seq,
76 |         Stat *p_stat = nullptr);
77 |     void viterbi_predict(dynet::ComputationGraph *p_cg, 
78 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, IndexSeq *p_predict_tag_seq);
79 | 
80 | };
81 | 
82 | 
83 | } // end of namespace
84 | 
85 | 
86 | #endif
87 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_dc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${source_directory})
 2 | 
 3 | #  dc
 4 | set (doublechannel_postagger_headers
 5 |     bilstmmodel4tagging_doublechannel.h
 6 |     doublechannel_modelhandler.h
 7 | )
 8 | 
 9 | set (doublechannel_postagger_libs
10 |     bilstmmodel4tagging_doublechannel.cpp
11 |     doublechannel_modelhandler.cpp    
12 | )
13 | 
14 | ADD_EXECUTABLE(postagger_dc postagger_doublechannel.cpp ${common_headers} 
15 | ${doublechannel_postagger_headers} ${common_libs} ${doublechannel_postagger_libs})
16 | 
17 | target_link_libraries(postagger_dc dynet ${Boost_LIBRARIES})
18 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_dc/bilstmmodel4tagging_doublechannel.h:
--------------------------------------------------------------------------------
 1 | #ifndef BILSTMMODEL_4_TAGGING_DOUBLECHANNEL_H_INCLUDED_
 2 | #define BILSTMMODEL_4_TAGGING_DOUBLECHANNEL_H_INCLUDED_
 3 | 
 4 | #include <string>
 5 | #include <sstream>
 6 | 
 7 | #include "dynet/nodes.h"
 8 | #include "dynet/dynet.h"
 9 | #include "dynet/training.h"
10 | #include "dynet/rnn.h"
11 | #include "dynet/lstm.h"
12 | #include "dynet/dict.h"
13 | #include "dynet/expr.h"
14 | 
15 | #include <boost/program_options.hpp>
16 | 
17 | #include "utils/typedeclaration.h"
18 | #include "modelmodule/layers.h"
19 | #include "utils/utf8processing.hpp" 
20 | #include "utils/dict_wrapper.hpp"
21 | #include "utils/stat.hpp"
22 | 
23 | namespace slnn
24 | {
25 | struct DoubleChannelModelHandler;
26 | 
27 | struct DoubleChannelModel4POSTAG
28 | {
29 |     friend struct DoubleChannelModelHandler;
30 |     // Model structure param 
31 |     // - set by outer
32 |     unsigned dynamic_embedding_dim,
33 |         postag_embedding_dim,
34 |         nr_lstm_stacked_layer ,
35 |         lstm_x_dim,
36 |         lstm_h_dim,
37 |         tag_layer_hidden_dim ,
38 |         fixed_embedding_dim,
39 |         fixed_embedding_dict_size;
40 |     // - set from inner (dict)
41 |     unsigned dynamic_embedding_dict_size,
42 |         tag_layer_output_dim;
43 | 
44 |     // Model param
45 |     dynet::Model *m;
46 | 
47 |     Merge2Layer *merge_doublechannel_layer;
48 |     BILSTMLayer *bilstm_layer;
49 |     Merge3Layer *merge_bilstm_and_pretag_layer;
50 |     DenseLayer *tag_output_linear_layer;
51 | 
52 |     dynet::LookupParameter dynamic_words_lookup_param;
53 |     dynet::LookupParameter fixed_words_lookup_param;
54 |     dynet::LookupParameter postags_lookup_param;
55 |     
56 |     dynet::Parameter TAG_SOS_param;
57 | 
58 | 
59 |     // Dict
60 |     dynet::Dict dynamic_dict;
61 |     dynet::Dict fixed_dict;
62 |     dynet::Dict postag_dict;
63 |     DictWrapper dynamic_dict_wrapper;
64 |     static const std::string UNK_STR ; 
65 | 
66 |     DoubleChannelModel4POSTAG();
67 |     ~DoubleChannelModel4POSTAG();
68 | 
69 |     void build_model_structure();
70 |     void print_model_info();
71 | 
72 | 
73 |     dynet::expr::Expression negative_loglikelihood(dynet::ComputationGraph *p_cg, 
74 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, const IndexSeq *p_tag_seq,
75 |         Stat *p_stat = nullptr);
76 |     void do_predict(dynet::ComputationGraph *p_cg, 
77 |         const IndexSeq *p_dynamic_sent, const IndexSeq *p_fixed_sent, IndexSeq *p_predict_tag_seq);
78 | 
79 | };
80 | 
81 | 
82 | } // end of namespace
83 | 
84 | 
85 | #endif
86 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_module/pos_feature.cpp:
--------------------------------------------------------------------------------
 1 | #include "pos_feature.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | const size_t POSFeature::NrFeature;
 6 | const size_t POSFeature::FeatureCharLengthLimit;
 7 | const size_t POSFeature::PrefixSuffixMaxLen;
 8 | 
 9 | const std::string POSFeature::FeatureEmptyStrPlaceholder = "";
10 | const Index POSFeature::FeatureEmptyIndexPlaceholder = -1;
11 | const std::string POSFeature::FeatureUnkStr = "feature_unk_str";
12 | 
13 | POSFeature::POSFeature()
14 |     : prefix_suffix_len1_dict_wrapper(prefix_suffix_len1_dict),
15 |     prefix_suffix_len2_dict_wrapper(prefix_suffix_len2_dict),
16 |     prefix_suffix_len3_dict_wrapper(prefix_suffix_len3_dict)
17 | {}
18 | 
19 | 
20 | std::string POSFeature::get_feature_info()
21 | {
22 |     std::ostringstream oss;
23 | 
24 |     oss << "prefix and suffix dict size : [ " << prefix_suffix_len1_dict.size() << ", " << prefix_suffix_len2_dict.size() << ", "
25 |         << prefix_suffix_len3_dict.size() << " ]\n"
26 |         << "prefix and suffix embedding dim : [ " << prefix_suffix_len1_embedding_dim << ", " << prefix_suffix_len2_embedding_dim << ", "
27 |         << prefix_suffix_len3_embedding_dim << " ]\n"
28 |         << "character length feature dict size : " << get_char_length_dict_size() << " , dimension : " << char_length_embedding_dim << "\n"
29 |         << "total pos feature dimension : " << get_pos_feature_dim() ;
30 |     return oss.str();
31 | }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_module/pos_feature_extractor.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/postagger/postagger_module/pos_feature_extractor.cpp


--------------------------------------------------------------------------------
/src/postagger/postagger_module/pos_feature_extractor.h:
--------------------------------------------------------------------------------
 1 | #ifndef POSTAGGER_POSTAGGER_MODULE_POS_FEATURE_EXTRACTOR_H_
 2 | #define POSTAGGER_POSTAGGER_MODULE_POS_FEATURE_EXTRACTOR_H_
 3 | #include <string>
 4 | 
 5 | #include "pos_feature.h"
 6 | #include "utils/utf8processing.hpp"
 7 | 
 8 | namespace slnn{
 9 | 
10 | struct POSFeatureExtractor
11 | {
12 |     static void extract(const Seq &raw_inputs, POSFeature::POSFeatureGroupSeq &feature_seq);
13 | };
14 | 
15 | /* *
16 | *  extract features sequence from raw inputs .
17 | *  For postagger , there are NR_FEATURES for every words of raw inputs , 
18 | *  they are storing in the array , ordering like following :
19 | *          0 -> PREFIX_SUFFIX_MAX_LEN - 1                        : prefix feature index
20 | *          PREFIX_SUFFIX_MAX_LEN -> PREFIX_SUFFIX_MAX_LEN * 2 -1 : suffix feature index
21 | *          PREFIX_SUFFIX_MAX_LEN * 2                             : length feature index
22 | */
23 | inline
24 | void POSFeatureExtractor::extract(const Seq &raw_inputs, POSFeature::POSFeatureGroupSeq &features_seq)
25 | {
26 |     using std::swap;
27 |     size_t nr_tokens = raw_inputs.size();
28 |     POSFeature::POSFeatureGroupSeq tmp_features_seq(nr_tokens);
29 |     for( size_t i = 0; i < nr_tokens; ++i )
30 |     {
31 |         POSFeature::POSFeatureGroup &cur_f = tmp_features_seq[i];
32 |         const std::string &word = raw_inputs[i] ;
33 |         Seq utf8_chars ;
34 |         UTF8Processing::utf8_str2char_seq(word, utf8_chars);
35 |         size_t utf8_chars_len = utf8_chars.size() ;
36 |         // prefix , suffix
37 |         std::string prefix_chars, suffix_chars ;
38 |         size_t min_len = std::min(POSFeature::PrefixSuffixMaxLen, utf8_chars_len);
39 |         for( size_t len = 1 ; len <= min_len ; ++len )
40 |         {
41 |             prefix_chars += utf8_chars[len - 1] ;
42 |             suffix_chars = utf8_chars[utf8_chars_len - len] + suffix_chars;
43 |             cur_f[len - 1] = "P-" + prefix_chars;
44 |             cur_f[len - 1 + POSFeature::PrefixSuffixMaxLen] = "S-" + suffix_chars;
45 |         }
46 |         // padding
47 |         for( size_t len = min_len + 1 ; len <= POSFeature::PrefixSuffixMaxLen; ++len )
48 |         {
49 |             cur_f[len - 1] = POSFeature::FeatureEmptyStrPlaceholder;
50 |             cur_f[len - 1 + POSFeature::PrefixSuffixMaxLen] = POSFeature::FeatureEmptyStrPlaceholder;
51 |         }
52 |         cur_f[POSFeature::NrFeature - 1] = std::to_string(utf8_chars_len);
53 |     }
54 |     swap(features_seq, tmp_features_seq);
55 | }
56 | 
57 | } // end of namespace slnn
58 | 
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_module/pos_feature_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "pos_feature_layer.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | POSFeatureLayer::POSFeatureLayer(dynet::Model *m, 
 6 |                                  size_t prefix_suffix_len1_dict_size, unsigned prefix_suffix_len1_embedding_dim,
 7 |                                  size_t prefix_suffix_len2_dict_size, unsigned prefix_suffix_len2_embedding_dim,
 8 |                                  size_t prefix_suffix_len3_dict_size, unsigned prefix_suffix_len3_embedding_dim,
 9 |                                  size_t char_length_dict_size, unsigned char_length_embedding_dim)
10 |     :prefix_suffix_len1_lookup_param(m->add_lookup_parameters(prefix_suffix_len1_dict_size, {prefix_suffix_len1_embedding_dim})),
11 |     prefix_suffix_len2_lookup_param(m->add_lookup_parameters(prefix_suffix_len2_dict_size, {prefix_suffix_len2_embedding_dim})),
12 |     prefix_suffix_len3_lookup_param(m->add_lookup_parameters(prefix_suffix_len3_dict_size, {prefix_suffix_len3_embedding_dim})),
13 |     char_length_lookup_param(m->add_lookup_parameters(char_length_dict_size, {char_length_embedding_dim}))
14 | {}
15 | 
16 | POSFeatureLayer::POSFeatureLayer(dynet::Model *m, POSFeature &pos_feature)
17 |     :POSFeatureLayer(m, 
18 |                      pos_feature.prefix_suffix_len1_dict.size(), pos_feature.prefix_suffix_len1_embedding_dim,
19 |                      pos_feature.prefix_suffix_len2_dict.size(), pos_feature.prefix_suffix_len2_embedding_dim,
20 |                      pos_feature.prefix_suffix_len3_dict.size(), pos_feature.prefix_suffix_len3_embedding_dim,
21 |                      pos_feature.get_char_length_dict_size(), pos_feature.char_length_embedding_dim)
22 | {}
23 | 
24 | } // end of namespace slnn
25 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_module/pos_reader.cpp:
--------------------------------------------------------------------------------
1 | #include "pos_reader.h"
2 | namespace slnn{
3 | 
4 | const char* POSReader::PosDataDelimiter = "\t";
5 | const char* POSReader::WordPosDelimiter = "_";
6 | 
7 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/postagger/postagger_module/pos_reader.h:
--------------------------------------------------------------------------------
 1 | #ifndef POS_POS_MODULE_POS_READER_HPP_
 2 | #define POS_POS_MODULE_POS_READER_HPP_
 3 | 
 4 | #include <fstream>
 5 | 
 6 | #include <boost/algorithm/string/split.hpp>
 7 | #include <boost/algorithm/string/classification.hpp>
 8 | #include "utils/typedeclaration.h"
 9 | #include "utils/reader.hpp"
10 | 
11 | namespace slnn{
12 | 
13 | class POSReader : public Reader
14 | {
15 | public:
16 |     static const char* PosDataDelimiter ;
17 |     static const char* WordPosDelimiter;
18 | public:
19 |     
20 |     POSReader(std::istream &is);
21 |     bool readline(Seq &sent, Seq &postag); // training data
22 |     bool readline(Seq &sent); // devel data
23 | };
24 | 
25 | 
26 | inline
27 | POSReader::POSReader(std::istream &is)
28 |     :Reader(is)
29 | {}
30 | 
31 | inline
32 | bool POSReader::readline(Seq &sent, Seq &postag_seq)
33 | {
34 |     using std::swap;
35 |     Seq tmp_sent,
36 |         tmp_tag_seq;
37 |     std::string line;
38 |     if( !getline(is, line) ){ return false; } 
39 |     std::vector<std::string> strpair_cont;
40 |     boost::algorithm::split(strpair_cont, line, boost::is_any_of(PosDataDelimiter));
41 |     size_t pair_len = strpair_cont.size();
42 |     tmp_sent.resize(pair_len);
43 |     tmp_tag_seq.resize(pair_len);
44 |     for( size_t i = 0; i < pair_len; ++i )
45 |     {
46 |         const std::string &str_pair = strpair_cont[i];
47 |         std::string::size_type  delim_pos = str_pair.rfind(WordPosDelimiter);
48 |         assert(delim_pos != std::string::npos);
49 |         tmp_sent[i] = str_pair.substr(0, delim_pos);
50 |         tmp_tag_seq[i] = str_pair.substr(delim_pos + 1);
51 |     }
52 |     swap(sent, tmp_sent);
53 |     swap(postag_seq, tmp_tag_seq);
54 |     return true ;
55 | }
56 | 
57 | inline
58 | bool POSReader::readline(Seq &sent)
59 | {
60 |     using std::swap;
61 |     Seq word_cont;
62 |     std::string line;
63 |     if( !getline(is, line) ){ return false;  } ;
64 |     boost::algorithm::split(word_cont, line, boost::is_any_of(PosDataDelimiter));
65 |     swap(sent, word_cont);
66 |     return true;
67 | }
68 | 
69 | 
70 | } // end of namespace slnn
71 | #endif
72 | 


--------------------------------------------------------------------------------
/src/postagger/postagger_pretag/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | INCLUDE_DIRECTORIES(${source_directory})
2 | 
3 | # pretag
4 | set(postagger_headers
5 |     bilstmmodel4tagging.hpp
6 | )
7 | ADD_EXECUTABLE(postagger_pretag postagger.cpp ${common_headers} ${common_libs} ${postagger_headers})
8 | target_link_libraries(postagger_pretag dynet ${Boost_LIBRARIES})
9 | 


--------------------------------------------------------------------------------
/src/segmenter/base_model/cws_mlp_base_model.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/base_model/cws_mlp_base_model.h


--------------------------------------------------------------------------------
/src/segmenter/base_model/single_input_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "single_input_model.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | const std::string SingleInputModel::UNK_STR = "UNK_STR" ;
 6 | 
 7 | SingleInputModel::SingleInputModel()
 8 |     :m(nullptr) ,
 9 |     input_dict_wrapper(input_dict) ,
10 |     input_layer(nullptr) ,
11 |     bilstm_layer(nullptr) ,
12 |     output_layer(nullptr) 
13 | {}
14 | 
15 | SingleInputModel::~SingleInputModel()
16 | {
17 |     delete input_layer ;
18 |     delete bilstm_layer ;
19 |     delete output_layer ;
20 |     delete m ;
21 | }
22 | 
23 | void SingleInputModel::set_model_param(const boost::program_options::variables_map &var_map)
24 | {
25 |     assert(input_dict.is_frozen() && output_dict.is_frozen()) ;
26 | 
27 |     word_embedding_dim = var_map["word_embedding_dim"].as<unsigned>() ;
28 |     lstm_nr_stacked_layer = var_map["nr_lstm_stacked_layer"].as<unsigned>() ;
29 |     lstm_h_dim = var_map["lstm_h_dim"].as<unsigned>() ;
30 |     hidden_dim = var_map["tag_layer_hidden_dim"].as<unsigned>() ;
31 | 
32 |     dropout_rate = var_map["dropout_rate"].as<dynet::real>() ;
33 |     word_dict_size = input_dict.size() ;
34 |     output_dim = output_dict.size() ;
35 | }
36 | 
37 | dynet::expr::Expression
38 | SingleInputModel::build_loss(dynet::ComputationGraph &cg ,
39 |                              const IndexSeq &input_seq, const IndexSeq &gold_seq)
40 | {
41 |     input_layer->new_graph(cg) ;
42 |     bilstm_layer->new_graph(cg) ;
43 |     output_layer->new_graph(cg) ;
44 | 
45 |     bilstm_layer->set_dropout() ;
46 |     bilstm_layer->start_new_sequence() ;
47 | 
48 |     std::vector<dynet::expr::Expression> inputs_exprs ;
49 |     input_layer->build_inputs(input_seq, inputs_exprs) ;
50 | 
51 |     std::vector<dynet::expr::Expression> l2r_exprs,
52 |                                        r2l_exprs ;
53 |     bilstm_layer->build_graph(inputs_exprs, l2r_exprs, r2l_exprs) ;
54 |     return output_layer->build_output_loss(l2r_exprs, r2l_exprs, gold_seq) ;
55 | }
56 | 
57 | void 
58 | SingleInputModel::predict(dynet::ComputationGraph &cg,
59 |                           const IndexSeq &input_seq, IndexSeq &pred_seq)
60 | {
61 |     input_layer->new_graph(cg) ;
62 |     bilstm_layer->new_graph(cg) ;
63 |     output_layer->new_graph(cg) ;
64 | 
65 |     bilstm_layer->disable_dropout() ;
66 |     bilstm_layer->start_new_sequence();
67 | 
68 |     std::vector<dynet::expr::Expression> inputs_exprs ;
69 |     input_layer->build_inputs(input_seq, inputs_exprs) ;
70 |     std::vector<dynet::expr::Expression> l2r_exprs,
71 |                                        r2l_exprs ;
72 |     bilstm_layer->build_graph(inputs_exprs, l2r_exprs, r2l_exprs) ;
73 |     output_layer->build_output(l2r_exprs, r2l_exprs , pred_seq) ;
74 | }
75 | 
76 | } // end of namespace slnn
77 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_bareinput1_cl_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #F2I
 2 | set(exe_name
 3 |     cws_bareinput1_cl_f2i
 4 | )
 5 | 
 6 | set(specific_dependencies
 7 |     ${exe_name}_model.h
 8 |     ${exe_name}_model.cpp
 9 | )
10 | 
11 | add_executable(${exe_name}
12 |                ${exe_name}.cpp
13 |                ${input1_with_feature_modelhandler_0628_dependencies} # model handler
14 |                ${specific_dependencies} # model
15 |                ${bareinput1_f2i_model_dependencies}        # base model
16 |                ${context_module}
17 |                ${set_cws_feature_dependencies}
18 |                ${cws_reader_module}
19 |                ${cws_common_headers}                # common header
20 |                ${cws_common_libs}
21 |                )
22 | 
23 | if (WITH_CUDA_BACKEND)
24 |     target_link_libraries(${exe_name} gdynet ${Boost_LIBRARIES})
25 |     add_dependencies(${exe_name} dynetcuda)
26 |     target_link_libraries(${exe_name} dynetcuda)
27 |     CUDA_ADD_CUBLAS_TO_TARGET(${exe_name})
28 |   else()
29 |     target_link_libraries(${exe_name} dynet ${Boost_LIBRARIES})
30 | endif (WITH_CUDA_BACKEND)
31 | 
32 | 
33 | # F2O                
34 | set(exe_name
35 |     cws_bareinput1_cl_f2o
36 | )
37 | 
38 | set(specific_dependencies
39 |     ${exe_name}_model.h
40 |     ${exe_name}_model.cpp
41 | )
42 | 
43 | add_executable(${exe_name}
44 |                ${exe_name}.cpp
45 |                ${input1_with_feature_modelhandler_0628_dependencies} # model handler
46 |                ${specific_dependencies} # model
47 |                ${bareinput1_f2o_model_dependencies}        # base model
48 |                ${context_module}
49 |                ${set_cws_feature_dependencies}
50 |                ${cws_reader_module}
51 |                ${cws_common_headers}                # common header
52 |                ${cws_common_libs}
53 |                )
54 |                
55 | if (WITH_CUDA_BACKEND)
56 |     target_link_libraries(${exe_name} gdynet ${Boost_LIBRARIES})
57 |     add_dependencies(${exe_name} dynetcuda)
58 |     target_link_libraries(${exe_name} dynetcuda)
59 |     CUDA_ADD_CUBLAS_TO_TARGET(${exe_name})
60 |   else()
61 |     target_link_libraries(${exe_name} dynet ${Boost_LIBRARIES})
62 | endif (WITH_CUDA_BACKEND)
63 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_bareinput1_cl_with_feature/cws_bareinput1_cl_f2i_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/cws_bareinput1_cl_with_feature/cws_bareinput1_cl_f2i_model.cpp


--------------------------------------------------------------------------------
/src/segmenter/cws_bareinput1_cl_with_feature/cws_bareinput1_cl_f2i_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_BAREINPUT1_CL_F2I_MODEL_H_
 2 | #define SLNN_SEGMENTER_CWS_BAREINPUT1_CL_F2I_MODEL_H_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/serialization/base_object.hpp>
 8 | #include <boost/serialization/utility.hpp>
 9 | #include <boost/serialization/split_member.hpp>
10 | 
11 | #include "dynet/dynet.h"
12 | 
13 | #include "segmenter/base_model/bareinput1_f2i_model.hpp"
14 | #include "segmenter/cws_module/nn_module/experiment_layer/nn_cws_specific_output_layer.h"
15 | namespace slnn{
16 | 
17 | template<typename RNNDerived>
18 | class CWSBareInput1CLF2IModel : public CWSBareInput1F2IModel<RNNDerived>
19 | {
20 | public:
21 |     
22 |     CWSBareInput1CLF2IModel() ;
23 |     ~CWSBareInput1CLF2IModel() ;
24 | 
25 |     void build_model_structure() override;
26 |     void print_model_info() override;
27 | };
28 | 
29 | template <typename RNNDerived>
30 | CWSBareInput1CLF2IModel<RNNDerived>::CWSBareInput1CLF2IModel()
31 |     :CWSBareInput1F2IModel<RNNDerived>()
32 | {}
33 | 
34 | template <typename RNNDerived>
35 | CWSBareInput1CLF2IModel<RNNDerived>::~CWSBareInput1CLF2IModel(){}
36 | 
37 | template <typename RNNDerived>
38 | void CWSBareInput1CLF2IModel<RNNDerived>::build_model_structure()
39 | {
40 |     this->m = new dynet::Model() ;
41 |     this->word_expr_layer = new Index2ExprLayer(this->m, this->word_dict_size, this->word_embedding_dim);
42 |     this->cws_feature_layer = new CWSFeatureLayer(this->m, this->cws_feature, this->word_expr_layer->get_lookup_param());
43 |     this->birnn_layer = new BIRNNLayer<RNNDerived>(this->m, this->nr_rnn_stacked_layer, this->rnn_x_dim, this->rnn_h_dim, 
44 |         this->dropout_rate) ;
45 |     this->output_layer = new segmenter::nn_module::experiment::SegmenterClassificationBareOutput(this->m, 
46 |             this->softmax_layer_input_dim, this->output_dim) ;
47 | }
48 | 
49 | template <typename RNNDerived>
50 | void CWSBareInput1CLF2IModel<RNNDerived>::print_model_info()
51 | {
52 |     BOOST_LOG_TRIVIAL(info) << "---------------- CWS Bare Input1 Classification F2I Model -----------------\n"
53 |         << "vocabulary size : " << this->word_dict_size << " with dimension : " << this->word_embedding_dim << "\n"
54 |         << "birnn x dim : " << this->rnn_x_dim << " , h dim : " << this->rnn_h_dim
55 |         << " , stacked layer num : " << this->nr_rnn_stacked_layer << "\n"
56 |         << "softmax layer input dim : " << this->softmax_layer_input_dim << "\n"
57 |         << "output dim : " << this->output_dim << "\n"
58 |         << "feature info : \n"
59 |         << this->cws_feature.get_feature_info() ;
60 | }
61 | } // end of namespace slnn 
62 | #endif 
63 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_bareinput1_cl_with_feature/cws_bareinput1_cl_f2o_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/cws_bareinput1_cl_with_feature/cws_bareinput1_cl_f2o_model.cpp


--------------------------------------------------------------------------------
/src/segmenter/cws_bareinput1_cl_with_feature/cws_bareinput1_cl_f2o_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_BAREINPUT1_CL_F2O_MODEL_HPP_
 2 | #define SLNN_SEGMENTER_CWS_BAREINPUT1_CL_F2O_MODEL_HPP_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/serialization/base_object.hpp>
 8 | #include <boost/serialization/utility.hpp>
 9 | #include <boost/serialization/split_member.hpp>
10 | 
11 | #include "dynet/dynet.h"
12 | 
13 | #include "segmenter/base_model/bareinput1_f2o_model.hpp"
14 | #include "segmenter/cws_module/nn_module/experiment_layer/nn_cws_specific_output_layer.h"
15 | namespace slnn{
16 | 
17 | template<typename RNNDerived>
18 | class CWSBareInput1CLF2OModel : public CWSBareInput1F2OModel<RNNDerived>
19 | {
20 | public:
21 | 
22 |     CWSBareInput1CLF2OModel();
23 |     ~CWSBareInput1CLF2OModel() ;
24 | 
25 |     void build_model_structure() override;
26 |     void print_model_info() override;
27 | };
28 | 
29 | template <typename RNNDerived>
30 | CWSBareInput1CLF2OModel<RNNDerived>::CWSBareInput1CLF2OModel()
31 |     :CWSBareInput1F2OModel<RNNDerived>()
32 | {}
33 | 
34 | template <typename RNNDerived>
35 | CWSBareInput1CLF2OModel<RNNDerived>::~CWSBareInput1CLF2OModel(){}
36 | 
37 | 
38 | template <typename RNNDerived>
39 | void CWSBareInput1CLF2OModel<RNNDerived>::build_model_structure()
40 | {
41 |     this->m = new dynet::Model() ;
42 |     this->word_expr_layer = new Index2ExprLayer(this->m, this->word_dict_size, this->word_embedding_dim) ;
43 |     this->cws_feature_layer = new CWSFeatureLayer(this->m, this->cws_feature, this->word_expr_layer->get_lookup_param());
44 |     this->birnn_layer = new BIRNNLayer<RNNDerived>(this->m, this->nr_rnn_stacked_layer, this->word_embedding_dim, this->rnn_h_dim, 
45 |         this->dropout_rate) ;
46 |     this->output_layer = new segmenter::nn_module::experiment::SegmenterClassificationBareOutput(this->m, 
47 |             this->softmax_layer_input_dim , this->output_dim) ;
48 | }
49 | 
50 | template <typename RNNDerived>
51 | void CWSBareInput1CLF2OModel<RNNDerived>::print_model_info()
52 | {
53 |     BOOST_LOG_TRIVIAL(info) << "---------------- CWS BareInput1 Classification F2O Model -----------------\n"
54 |         << "vocabulary size : " << this->word_dict_size << " with dimension : " << this->word_embedding_dim << "\n"
55 |         << "birnn x dim : " << this->word_embedding_dim << " , h dim : " << this->rnn_h_dim
56 |         << " , stacked layer num : " << this->nr_rnn_stacked_layer << "\n"
57 |         << "softmax layer input dim: " << this->softmax_layer_input_dim << "\n"
58 |         << "output dim : " << this->output_dim << "\n"
59 |         << "feature info : \n"
60 |         << this->cws_feature.get_feature_info() ;
61 | }
62 | } // end of namespace slnn 
63 | #endif 
64 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_bigram_classification/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_double_bigram_classification
 3 | )
 4 | 
 5 | set(base_name
 6 |     cws_double_classification
 7 | )
 8 | 
 9 | set(specific_headers
10 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.h
11 | )
12 | set(specific_libs
13 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.cpp
14 | )
15 | 
16 | add_executable(${exe_name}
17 |                ${exe_name}.cpp
18 |                ${input2_bigram_modelhandler_headers} # model handler
19 |                ${specific_headers} # model
20 |                ${input2_model_headers}        # base model
21 |                ${cws_common_headers}                # common header
22 |                ${specific_libs}
23 |                ${input2_model_libs} 
24 |                ${cws_common_libs}
25 |                )
26 |                
27 | target_link_libraries(${exe_name}
28 |                       dynet
29 |                       ${Boost_LIBRARIES})
30 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_bigram_crf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_double_bigram_crf
 3 | )
 4 | set(base_name
 5 |     cws_double_crf
 6 | )
 7 | 
 8 | set(specific_headers
 9 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.h
10 | )
11 | set(specific_libs
12 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.cpp
13 | )
14 | 
15 | add_executable(${exe_name}
16 |                ${exe_name}.cpp
17 |                ${input2_bigram_modelhandler_headers} # model handler
18 |                ${specific_headers} # model
19 |                ${input2_model_headers}        # base model
20 |                ${cws_common_headers}                # common header
21 |                ${specific_libs}
22 |                ${input2_model_libs} 
23 |                ${cws_common_libs}
24 |                )
25 |                
26 | target_link_libraries(${exe_name}
27 |                       dynet
28 |                       ${Boost_LIBRARIES})
29 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_bigram_pretag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_double_bigram_pretag
 3 | )
 4 | 
 5 | set(base_name
 6 |     cws_double_pretag
 7 | )
 8 | 
 9 | set(specific_headers
10 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.h
11 | )
12 | set(specific_libs
13 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.cpp
14 | )
15 | 
16 | add_executable(${exe_name}
17 |                ${exe_name}.cpp
18 |                ${input2_bigram_modelhandler_headers} # model handler
19 |                ${specific_headers} # model
20 |                ${input2_model_headers}        # base model
21 |                ${cws_common_headers}                # common header
22 |                ${specific_libs}
23 |                ${input2_model_libs} 
24 |                ${cws_common_libs}
25 |                )
26 |                
27 | target_link_libraries(${exe_name}
28 |                       dynet
29 |                       ${Boost_LIBRARIES})
30 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_classification/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |      cws_double_classification
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | add_executable(${exe_name}
12 |                ${exe_name}.cpp
13 |                ${input2_modelhandler_headers} # model handler
14 |                ${specific_headers} # model
15 |                ${input2_model_headers}        # base model
16 |                ${cws_common_headers}                # common header
17 |                ${specific_libs}
18 |                ${input2_model_libs} 
19 |                ${cws_common_libs}
20 |                )
21 |                
22 | target_link_libraries(${exe_name}
23 |                       dynet
24 |                       ${Boost_LIBRARIES})
25 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_classification/cws_double_classification_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_double_classification_model.h"
 2 | #include "segmenter/cws_module/cws_output_layer.h"
 3 | 
 4 | namespace slnn{
 5 | 
 6 | CWSDoubleClassificationModel::CWSDoubleClassificationModel()
 7 |     :Input2Model()
 8 | {}
 9 | 
10 | CWSDoubleClassificationModel::~CWSDoubleClassificationModel(){}
11 | 
12 | void CWSDoubleClassificationModel::set_model_param(const boost::program_options::variables_map &var_map)
13 | {
14 |     CWSDoubleClassificationModel::Input2Model::set_model_param(var_map);
15 | }
16 | void CWSDoubleClassificationModel::build_model_structure()
17 | {
18 |     tag_sys.build(tag_dict) ; // init B_ID , M_ID and so on 
19 |     m = new dynet::Model() ;
20 |     input_layer = new Input2(m, dynamic_dict_size, dynamic_word_dim , fixed_dict_size , fixed_word_dim , lstm_x_dim) ;
21 |     bilstm_layer = new BILSTMLayer(m, lstm_nr_stacked_layer, lstm_x_dim, lstm_h_dim, dropout_rate) ;
22 |     output_layer = new CWSSimpleOutput(m, lstm_h_dim, lstm_h_dim, hidden_dim, output_dim , tag_sys) ;
23 | }
24 | 
25 | void CWSDoubleClassificationModel::print_model_info()
26 | {
27 |     BOOST_LOG_TRIVIAL(info) << "---------------- Input2 Classification Model -----------------\n"
28 |         << "dynamic vocabulary size : " << dynamic_dict_size << " , dimension : " << dynamic_word_dim << "\n"
29 |         << "fixed vocabulary size : " << fixed_dict_size << " , dimension : " << fixed_word_dim << "\n"
30 |         << "bi-lstm x dim : " << lstm_x_dim << " , h dim : " << lstm_h_dim
31 |         << " , stacked layer num : " << lstm_nr_stacked_layer << "\n"
32 |         << "tag hidden layer dim : " << hidden_dim << "\n"
33 |         << "dropout rate : " << dropout_rate << "\n"
34 |         << "output dim : " << output_dim ;
35 | }
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_classification/cws_double_classification_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_DOUBLE_CLASSIFICATION_MODEL_H_
 2 | #define SLNN_SEGMENTER_CWS_DOUBLE_CLASSIFICATION_MODEL_H_
 3 | 
 4 | #include <boost/serialization/base_object.hpp>
 5 | #include <boost/serialization/utility.hpp>
 6 | #include <boost/serialization/split_member.hpp>
 7 | 
 8 | #include <boost/log/trivial.hpp>
 9 | #include <boost/archive/text_iarchive.hpp>
10 | #include <boost/archive/text_oarchive.hpp>
11 | 
12 | #include "dynet/dynet.h"
13 | 
14 | #include "segmenter/base_model/input2_model.h"
15 | #include "segmenter/cws_module/cws_tagging_system.h"
16 | namespace slnn{
17 | 
18 | class CWSDoubleClassificationModel : public Input2Model
19 | {
20 |     friend class boost::serialization::access;
21 | public:
22 | 
23 |     CWSDoubleClassificationModel() ;
24 |     ~CWSDoubleClassificationModel() ;
25 | 
26 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
27 |     void build_model_structure() ;
28 |     void print_model_info() ;
29 |     
30 |     template <typename Archive>
31 |     void save(Archive &ar, const unsigned version) const ;
32 |     template <typename Archive>
33 |     void load(Archive &ar, const unsigned verison) ;
34 |     BOOST_SERIALIZATION_SPLIT_MEMBER()
35 | };
36 | 
37 | /*************  Template Implementation *****************/
38 | 
39 | template <typename Archive>
40 | void CWSDoubleClassificationModel::save(Archive &ar, const unsigned version) const
41 | {
42 |     ar & boost::serialization::base_object<Input2Model>(*this) ;
43 | }
44 | 
45 | template <typename Archive>
46 | void CWSDoubleClassificationModel::load(Archive &ar, const unsigned version)
47 | {
48 |     ar & boost::serialization::base_object<Input2Model>(*this) ;
49 | }
50 | } // end of namespace slnn 
51 | #endif 
52 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_crf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |      cws_double_crf
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | add_executable(${exe_name}
12 |                ${exe_name}.cpp
13 |                ${input2_modelhandler_headers} # model handler
14 |                ${specific_headers} # model
15 |                ${input2_model_headers}        # base model
16 |                ${cws_common_headers}                # common header
17 |                ${specific_libs}
18 |                ${input2_model_libs} 
19 |                ${cws_common_libs}
20 |                )
21 |                
22 | target_link_libraries(${exe_name}
23 |                       dynet
24 |                       ${Boost_LIBRARIES})
25 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_crf/cws_double_crf_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_double_crf_model.h"
 2 | #include "segmenter/cws_module/cws_output_layer.h"
 3 | 
 4 | namespace slnn{
 5 | 
 6 | CWSDoubleCRFModel::CWSDoubleCRFModel()
 7 |     :Input2Model()
 8 | {}
 9 | 
10 | CWSDoubleCRFModel::~CWSDoubleCRFModel(){}
11 | 
12 | void CWSDoubleCRFModel::set_model_param(const boost::program_options::variables_map &var_map)
13 | {
14 |     tag_dim = var_map["tag_dim"].as<unsigned>();
15 |     CWSDoubleCRFModel::Input2Model::set_model_param(var_map);
16 | }
17 | void CWSDoubleCRFModel::build_model_structure()
18 | {
19 |     tag_sys.build(tag_dict) ; // init B_ID , M_ID and so on 
20 |     m = new dynet::Model() ;
21 |     input_layer = new Input2(m, dynamic_dict_size, dynamic_word_dim , fixed_dict_size , fixed_word_dim , lstm_x_dim) ;
22 |     bilstm_layer = new BILSTMLayer(m, lstm_nr_stacked_layer, lstm_x_dim, lstm_h_dim, dropout_rate) ;
23 |     output_layer = new CWSCRFOutput(m, tag_dim, lstm_h_dim, lstm_h_dim, hidden_dim, output_dim , dropout_rate, tag_sys) ;
24 | }
25 | 
26 | void CWSDoubleCRFModel::print_model_info()
27 | {
28 |     BOOST_LOG_TRIVIAL(info) << "---------------- Input2 CRF Model -----------------\n"
29 |         << "dynamic vocabulary size : " << dynamic_dict_size << " , dimension : " << dynamic_word_dim << "\n"
30 |         << "fixed vocabulary size : " << fixed_dict_size << " , dimension : " << fixed_word_dim << "\n"
31 |         << "tag dict size : " << output_dim << " , dimension : " << tag_dim << "\n"
32 |         << "dropout rate : " << dropout_rate << "\n"
33 |         << "bi-lstm x dim : " << lstm_x_dim << " , h dim : " << lstm_h_dim
34 |         << " , stacked layer num : " << lstm_nr_stacked_layer << "\n"
35 |         << "tag hidden layer dim : " << hidden_dim << "\n"
36 |         << "dropout rate : " << dropout_rate << "\n"
37 |         << "output dim : " << output_dim ;
38 | }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_crf/cws_double_crf_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_DOUBLE_CRF_MODEL_H_
 2 | #define SLNN_SEGMENTER_CWS_DOUBLE_CRF_MODEL_H_
 3 | 
 4 | #include <boost/serialization/base_object.hpp>
 5 | #include <boost/serialization/utility.hpp>
 6 | #include <boost/serialization/split_member.hpp>
 7 | 
 8 | #include <boost/log/trivial.hpp>
 9 | #include <boost/archive/text_iarchive.hpp>
10 | #include <boost/archive/text_oarchive.hpp>
11 | 
12 | #include "dynet/dynet.h"
13 | 
14 | #include "segmenter/base_model/input2_model.h"
15 | #include "segmenter/cws_module/cws_tagging_system.h"
16 | namespace slnn{
17 | 
18 | class CWSDoubleCRFModel : public Input2Model
19 | {
20 |     friend class boost::serialization::access;
21 | public:
22 | 
23 |     CWSDoubleCRFModel() ;
24 |     ~CWSDoubleCRFModel() ;
25 | 
26 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
27 |     void build_model_structure() ;
28 |     void print_model_info() ;
29 |     
30 |     template <typename Archive>
31 |     void save(Archive &ar, const unsigned version) const ;
32 |     template <typename Archive>
33 |     void load(Archive &ar, const unsigned verison) ;
34 |     BOOST_SERIALIZATION_SPLIT_MEMBER()
35 | 
36 | public:
37 |     unsigned tag_dim;
38 | };
39 | 
40 | /*************  Template Implementation *****************/
41 | 
42 | template <typename Archive>
43 | void CWSDoubleCRFModel::save(Archive &ar, const unsigned version) const
44 | {
45 |     ar & tag_dim;
46 |     ar & boost::serialization::base_object<Input2Model>(*this) ;
47 | }
48 | 
49 | template <typename Archive>
50 | void CWSDoubleCRFModel::load(Archive &ar, const unsigned version)
51 | {
52 |     ar & tag_dim ;
53 |     ar & boost::serialization::base_object<Input2Model>(*this) ;
54 | }
55 | } // end of namespace slnn 
56 | #endif 
57 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_pretag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |      cws_double_pretag
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | add_executable(${exe_name}
12 |                ${exe_name}.cpp
13 |                ${input2_modelhandler_headers} # model handler
14 |                ${specific_headers} # model
15 |                ${input2_model_headers}        # base model
16 |                ${cws_common_headers}                # common header
17 |                ${specific_libs}
18 |                ${input2_model_libs} 
19 |                ${cws_common_libs}
20 |                )
21 |                
22 | target_link_libraries(${exe_name}
23 |                       dynet
24 |                       ${Boost_LIBRARIES})
25 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_pretag/cws_double_pretag_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_double_pretag_model.h"
 2 | #include "segmenter/cws_module/cws_output_layer.h"
 3 | 
 4 | namespace slnn{
 5 | 
 6 | CWSDoublePretagModel::CWSDoublePretagModel()
 7 |     :Input2Model()
 8 | {}
 9 | 
10 | CWSDoublePretagModel::~CWSDoublePretagModel(){}
11 | 
12 | void CWSDoublePretagModel::set_model_param(const boost::program_options::variables_map &var_map)
13 | {
14 |     tag_dim = var_map["tag_dim"].as<unsigned>();
15 |     CWSDoublePretagModel::Input2Model::set_model_param(var_map);
16 | }
17 | void CWSDoublePretagModel::build_model_structure()
18 | {
19 |     tag_sys.build(tag_dict) ; // init B_ID , M_ID and so on 
20 |     m = new dynet::Model() ;
21 |     input_layer = new Input2(m, dynamic_dict_size, dynamic_word_dim , fixed_dict_size , fixed_word_dim , lstm_x_dim) ;
22 |     bilstm_layer = new BILSTMLayer(m, lstm_nr_stacked_layer, lstm_x_dim, lstm_h_dim, dropout_rate) ;
23 |     output_layer = new CWSPretagOutput(m, tag_dim, lstm_h_dim, lstm_h_dim, hidden_dim, output_dim , tag_sys) ;
24 | }
25 | 
26 | void CWSDoublePretagModel::print_model_info()
27 | {
28 |     BOOST_LOG_TRIVIAL(info) << "---------------- Input2 Pretag Model -----------------\n"
29 |         << "dynamic vocabulary size : " << dynamic_dict_size << " , dimension : " << dynamic_word_dim << "\n"
30 |         << "fixed vocabulary size : " << fixed_dict_size << " , dimension : " << fixed_word_dim << "\n"
31 |         << "tag dict size : " << output_dim << " , dimension : " << tag_dim << "\n"
32 |         << "bi-lstm x dim : " << lstm_x_dim << " , h dim : " << lstm_h_dim
33 |         << " , stacked layer num : " << lstm_nr_stacked_layer << "\n"
34 |         << "tag hidden layer dim : " << hidden_dim << "\n"
35 |         << "dropout tate : " << dropout_rate << "\n"
36 |         << "output dim : " << output_dim ;
37 | }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_double_pretag/cws_double_pretag_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_DOUBLE_PRETAG_MODEL_H_
 2 | #define SLNN_SEGMENTER_CWS_DOUBLE_PRETAG_MODEL_H_
 3 | 
 4 | #include <boost/serialization/base_object.hpp>
 5 | #include <boost/serialization/utility.hpp>
 6 | #include <boost/serialization/split_member.hpp>
 7 | 
 8 | #include <boost/log/trivial.hpp>
 9 | #include <boost/archive/text_iarchive.hpp>
10 | #include <boost/archive/text_oarchive.hpp>
11 | 
12 | #include "dynet/dynet.h"
13 | 
14 | #include "segmenter/base_model/input2_model.h"
15 | #include "segmenter/cws_module/cws_tagging_system.h"
16 | namespace slnn{
17 | 
18 | class CWSDoublePretagModel : public Input2Model
19 | {
20 |     friend class boost::serialization::access;
21 | public:
22 | 
23 |     CWSDoublePretagModel() ;
24 |     ~CWSDoublePretagModel() ;
25 | 
26 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
27 |     void build_model_structure() ;
28 |     void print_model_info() ;
29 |     
30 |     template <typename Archive>
31 |     void save(Archive &ar, const unsigned version) const ;
32 |     template <typename Archive>
33 |     void load(Archive &ar, const unsigned verison) ;
34 |     BOOST_SERIALIZATION_SPLIT_MEMBER()
35 | 
36 | public:
37 |     unsigned tag_dim;
38 | };
39 | 
40 | /*************  Template Implementation *****************/
41 | 
42 | template <typename Archive>
43 | void CWSDoublePretagModel::save(Archive &ar, const unsigned version) const
44 | {
45 |     ar & tag_dim;
46 |     ar & boost::serialization::base_object<Input2Model>(*this) ;
47 | }
48 | 
49 | template <typename Archive>
50 | void CWSDoublePretagModel::load(Archive &ar, const unsigned version)
51 | {
52 |     ar & tag_dim ;
53 |     ar & boost::serialization::base_object<Input2Model>(*this) ;
54 | }
55 | } // end of namespace slnn 
56 | #endif 
57 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_input1_cl_with_feature/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #F2I
 2 | set(exe_name
 3 |     cws_input1_cl_f2i
 4 | )
 5 | 
 6 | set(specific_dependencies
 7 |     ${exe_name}_model.h
 8 |     ${exe_name}_model.cpp
 9 | )
10 | 
11 | add_executable(${exe_name}
12 |                ${exe_name}.cpp
13 |                ${input1_with_feature_modelhandler_0628_dependencies} # model handler
14 |                ${specific_dependencies} # model
15 |                ${input1_f2i_model_0628_dependencies}        # base model
16 |                ${set_cws_feature_dependencies}
17 |                ${context_module}
18 |                ${cws_reader_module}
19 |                ${cws_common_headers}                # common header
20 |                ${cws_common_libs}
21 |                )
22 |                
23 | target_link_libraries(${exe_name}
24 |                       dynet
25 |                       ${Boost_LIBRARIES})
26 | 
27 | 
28 | # F2O                
29 | set(exe_name
30 |     cws_input1_cl_f2o
31 | )
32 | 
33 | set(specific_dependencies
34 |     ${exe_name}_model.h
35 |     ${exe_name}_model.cpp
36 | )
37 | 
38 | add_executable(${exe_name}
39 |                ${exe_name}.cpp
40 |                ${input1_with_feature_modelhandler_0628_dependencies} # model handler
41 |                ${specific_dependencies} # model
42 |                ${input1_f2o_model_0628_dependencies}        # base model
43 |                ${context_module}
44 |                ${set_cws_feature_dependencies}
45 |                ${cws_reader_module}
46 |                ${cws_common_headers}                # common header
47 |                ${cws_common_libs}
48 |                )
49 |                
50 | target_link_libraries(${exe_name}
51 |                       dynet
52 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/segmenter/cws_input1_cl_with_feature/cws_input1_cl_f2i_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/cws_input1_cl_with_feature/cws_input1_cl_f2i_model.cpp


--------------------------------------------------------------------------------
/src/segmenter/cws_input1_cl_with_feature/cws_input1_cl_f2i_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_INPUT1_CL_F2I_MODEL_H_
 2 | #define SLNN_SEGMENTER_CWS_INPUT1_CL_F2I_MODEL_H_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/serialization/base_object.hpp>
 8 | #include <boost/serialization/utility.hpp>
 9 | #include <boost/serialization/split_member.hpp>
10 | 
11 | #include "dynet/dynet.h"
12 | 
13 | #include "segmenter/base_model/input1_f2i_model_0628.hpp"
14 | #include "segmenter/cws_module/cws_output_layer.h"
15 | namespace slnn{
16 | 
17 | template<typename RNNDerived>
18 | class CWSInput1CLF2IModel : public CWSInput1F2IModel<RNNDerived>
19 | {
20 | public:
21 |     
22 |     CWSInput1CLF2IModel() ;
23 |     ~CWSInput1CLF2IModel() ;
24 | 
25 |     void build_model_structure() override;
26 |     void print_model_info() override;
27 | };
28 | 
29 | template <typename RNNDerived>
30 | CWSInput1CLF2IModel<RNNDerived>::CWSInput1CLF2IModel()
31 |     :CWSInput1F2IModel<RNNDerived>()
32 | {}
33 | 
34 | template <typename RNNDerived>
35 | CWSInput1CLF2IModel<RNNDerived>::~CWSInput1CLF2IModel(){}
36 | 
37 | template <typename RNNDerived>
38 | void CWSInput1CLF2IModel<RNNDerived>::build_model_structure()
39 | {
40 |     this->m = new dynet::Model() ;
41 |     this->input_layer = new Input1WithFeature(this->m, this->word_dict_size, this->word_embedding_dim, 
42 |         this->cws_feature.get_feature_dim(), this->rnn_x_dim) ;
43 |     this->cws_feature_layer = new CWSFeatureLayer(this->m, this->cws_feature,this->input_layer->get_lookup_param());
44 |     this->birnn_layer = new BIRNNLayer<RNNDerived>(this->m, this->nr_rnn_stacked_layer, this->rnn_x_dim, this->rnn_h_dim, 
45 |         this->dropout_rate) ;
46 |     this->output_layer = new CWSSimpleOutputNew(this->m, this->rnn_h_dim, this->rnn_h_dim, 
47 |         this->hidden_dim, this->output_dim, this->dropout_rate) ;
48 | }
49 | 
50 | template <typename RNNDerived>
51 | void CWSInput1CLF2IModel<RNNDerived>::print_model_info()
52 | {
53 |     BOOST_LOG_TRIVIAL(info) << "---------------- CWS Input1 Classification F2I Model -----------------\n"
54 |         << "vocabulary size : " << this->word_dict_size << " with dimension : " << this->word_embedding_dim << "\n"
55 |         << "birnn x dim : " << this->rnn_x_dim << " , h dim : " << this->rnn_h_dim
56 |         << " , stacked layer num : " << this->nr_rnn_stacked_layer << "\n"
57 |         << "tag hidden layer dim : " << this->hidden_dim << "\n"
58 |         << "output dim : " << this->output_dim << "\n"
59 |         << "feature info : \n"
60 |         << this->cws_feature.get_feature_info() ;
61 | }
62 | } // end of namespace slnn 
63 | #endif 
64 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_input1_cl_with_feature/cws_input1_cl_f2o_model.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/cws_input1_cl_with_feature/cws_input1_cl_f2o_model.cpp


--------------------------------------------------------------------------------
/src/segmenter/cws_input1_cl_with_feature/cws_input1_cl_f2o_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_INPUT1_CL_F2O_MODEL_HPP_
 2 | #define SLNN_SEGMENTER_CWS_INPUT1_CL_F2O_MODEL_HPP_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/serialization/base_object.hpp>
 8 | #include <boost/serialization/utility.hpp>
 9 | #include <boost/serialization/split_member.hpp>
10 | 
11 | #include "dynet/dynet.h"
12 | 
13 | #include "segmenter/base_model/input1_f2o_model_0628.hpp"
14 | #include "segmenter/cws_module/cws_output_layer.h"
15 | namespace slnn{
16 | 
17 | template<typename RNNDerived>
18 | class CWSInput1CLF2OModel : public CWSInput1F2OModel<RNNDerived>
19 | {
20 | public:
21 | 
22 |     CWSInput1CLF2OModel();
23 |     ~CWSInput1CLF2OModel() ;
24 | 
25 |     void build_model_structure() override;
26 |     void print_model_info() override;
27 | };
28 | 
29 | template <typename RNNDerived>
30 | CWSInput1CLF2OModel<RNNDerived>::CWSInput1CLF2OModel()
31 |     :CWSInput1F2OModel<RNNDerived>()
32 | {}
33 | 
34 | template <typename RNNDerived>
35 | CWSInput1CLF2OModel<RNNDerived>::~CWSInput1CLF2OModel(){}
36 | 
37 | template <typename RNNDerived>
38 | void CWSInput1CLF2OModel<RNNDerived>::build_model_structure()
39 | {
40 |     this->m = new dynet::Model() ;
41 |     this->input_layer = new Input1(this->m, this->word_dict_size, this->word_embedding_dim) ;
42 |     this->cws_feature_layer = new CWSFeatureLayer(this->m, this->cws_feature, this->input_layer->get_lookup_param());
43 |     this->birnn_layer = new BIRNNLayer<RNNDerived>(this->m, this->nr_rnn_stacked_layer, this->word_embedding_dim, this->rnn_h_dim, 
44 |         this->dropout_rate) ;
45 |     this->output_layer = new CWSSimpleOutputWithFeature(this->m, this->rnn_h_dim, this->rnn_h_dim, this->cws_feature.get_feature_dim(),
46 |         this->hidden_dim, this->output_dim, this->dropout_rate) ;
47 | }
48 | 
49 | template <typename RNNDerived>
50 | void CWSInput1CLF2OModel<RNNDerived>::print_model_info()
51 | {
52 |     BOOST_LOG_TRIVIAL(info) << "---------------- CWS Input1 Classification F2O Model -----------------\n"
53 |         << "vocabulary size : " << this->word_dict_size << " with dimension : " << this->word_embedding_dim << "\n"
54 |         << "birnn x dim : " << this->word_embedding_dim << " , h dim : " << this->rnn_h_dim
55 |         << " , stacked layer num : " << this->nr_rnn_stacked_layer << "\n"
56 |         << "tag hidden layer dim : " << this->hidden_dim << "\n"
57 |         << "output dim : " << this->output_dim << "\n"
58 |         << "feature info : \n"
59 |         << this->cws_feature.get_feature_info() ;
60 | }
61 | } // end of namespace slnn 
62 | #endif 
63 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_mlp_input1/cws_mlp_input1_instance.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_mlp_input1_instance.h"
 2 | namespace slnn{
 3 | namespace segmenter{
 4 | namespace mlp_input1{
 5 | 
 6 | template class SegmenterMlpInput1Template<
 7 |     token_module::TokenSegmenterInput1Unigram,
 8 |     structure_param_module::SegmenterBasicMlpParam,
 9 |     nn_module::NnSegmenterInput1MlpAbstract>;
10 | 
11 | template class SegmenterMlpInput1Template<
12 |     token_module::TokenSegmenterInput1Bigram,
13 |     structure_param_module::SegmenterBasicMlpParam,
14 |     nn_module::NnSegmenterInput1MlpAbstract>;
15 | 
16 | template class SegmenterMlpInput1Template<
17 |     token_module::TokenSegmenterInput1All,
18 |     structure_param_module::ParamSegmenterMlpInput1All,
19 |     nn_module::NnSegmenterMlpInput1All>;
20 | 
21 | } // enf of namespace mlp-input1
22 | } // enf of namespace segmenter
23 | } // end 


--------------------------------------------------------------------------------
/src/segmenter/cws_mlp_input1/cws_mlp_input1_instance.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MLP_INPUT1_CWS_MLP_INPUT1_INSTANCE_H_
 2 | #define SLNN_SEGMENTER_CWS_MLP_INPUT1_CWS_MLP_INPUT1_INSTANCE_H_
 3 | #include "segmenter/cws_mlp_input1/cws_mlp_input1_template.h"
 4 | #include "segmenter/cws_module/token_module/input1/token_input1_unigram.h"
 5 | #include "segmenter/cws_module/token_module/input1/token_input1_bigram.h"
 6 | #include "segmenter/cws_module/token_module/input1/token_input1_all.h"
 7 | #include "segmenter/cws_module/structure_param_module/basic_mlp_param.h"
 8 | #include "segmenter/cws_module/structure_param_module/param_mlp_input1_all.h"
 9 | #include "segmenter/cws_module/nn_module/mlp_input1/nn_cws_mlp_input1_abstract.h"
10 | #include "segmenter/cws_module/nn_module/mlp_input1/nn_cws_mlp_input1_all.h"
11 | namespace slnn{
12 | namespace segmenter{
13 | namespace mlp_input1{
14 | 
15 | extern template class SegmenterMlpInput1Template<
16 |     token_module::TokenSegmenterInput1Unigram,
17 |     structure_param_module::SegmenterBasicMlpParam,
18 |     nn_module::NnSegmenterInput1MlpAbstract>;
19 | 
20 | using MlpInput1Unigram = SegmenterMlpInput1Template<
21 |     token_module::TokenSegmenterInput1Unigram,
22 |     structure_param_module::SegmenterBasicMlpParam,
23 |     nn_module::NnSegmenterInput1MlpAbstract>;
24 | 
25 | extern template class SegmenterMlpInput1Template<
26 |     token_module::TokenSegmenterInput1Bigram,
27 |     structure_param_module::SegmenterBasicMlpParam,
28 |     nn_module::NnSegmenterInput1MlpAbstract>;
29 | 
30 | using MlpInput1Bigram = SegmenterMlpInput1Template<
31 |     token_module::TokenSegmenterInput1Bigram,
32 |     structure_param_module::SegmenterBasicMlpParam,
33 |     nn_module::NnSegmenterInput1MlpAbstract > ;
34 | 
35 | 
36 | extern template class SegmenterMlpInput1Template<
37 |     token_module::TokenSegmenterInput1All,
38 |     structure_param_module::ParamSegmenterMlpInput1All,
39 |     nn_module::NnSegmenterMlpInput1All>;
40 | 
41 | using MlpInput1All = SegmenterMlpInput1Template<
42 |     token_module::TokenSegmenterInput1All,
43 |     structure_param_module::ParamSegmenterMlpInput1All,
44 |     nn_module::NnSegmenterMlpInput1All>;
45 | 
46 | } // enf of namespace mlp-input1
47 | } // enf of namespace segmenter
48 | } // end of namespace slnn
49 | 
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_eval.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_CWS_EVAL_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_CWS_EVAL_H_
 3 | #include "token_module/cws_tag_definition.h"
 4 | 
 5 | namespace slnn{
 6 | namespace segmenter{
 7 | namespace eval{
 8 | 
 9 | namespace eval_inner{
10 | 
11 | struct EvalTempResultT
12 | {
13 |     unsigned nr_token_predict_right;
14 |     unsigned nr_token_predict;
15 |     unsigned nr_token_gold;
16 |     unsigned nr_tag_predict_right;
17 |     unsigned nr_tag;
18 |     EvalTempResultT() :
19 |         nr_token_predict_right(0), nr_token_predict(0), nr_token_gold(0), nr_tag_predict_right(0), nr_tag(0)
20 |     {}
21 |     void clear(){ nr_token_predict_right = nr_token_predict = nr_token_gold = nr_tag_predict_right = nr_tag = 0; }
22 |     EvalTempResultT& operator+=(const EvalTempResultT &rhs)
23 |     {
24 |         this->nr_token_predict_right += rhs.nr_token_predict_right;
25 |         this->nr_token_predict += rhs.nr_token_predict;
26 |         this->nr_token_gold += rhs.nr_token_gold;
27 |         this->nr_tag_predict_right += rhs.nr_tag_predict_right;
28 |         this->nr_tag += rhs.nr_tag;
29 |         return *this;
30 |     }
31 | };
32 | 
33 | } // end of namespace eval_inner
34 | 
35 | struct EvalResultT
36 | {
37 |     float p = 0.f;
38 |     float r = 0.f;
39 |     float f1 = 0.f;
40 |     float acc = 0.f;
41 |     unsigned nr_token_predict_right = 0;
42 |     unsigned nr_token_predict = 0;
43 |     unsigned nr_token_gold = 0;
44 |     unsigned nr_tag_predict_right = 0;
45 |     unsigned nr_tag = 0;
46 | };
47 | 
48 | /**
49 |  * do segmenter eval.
50 |  * not thread safe.
51 |  */
52 | class SegmenterEval
53 | {
54 | public:
55 |     SegmenterEval();
56 |     SegmenterEval(const SegmenterEval &) = delete;
57 |     SegmenterEval(const SegmenterEval &&) = delete;
58 |     SegmenterEval& operator=(const SegmenterEval&) = delete;
59 | public:
60 |     // iteratively
61 |     void start_eval();
62 |     void eval_iteratively(const std::vector<Index> &gold_tagseq, const std::vector<Index> &pred_tagseq);
63 |     EvalResultT end_eval();
64 |     // batch
65 |     EvalResultT eval(const std::vector<std::vector<Index>> &gold_tagseq_set, const std::vector<std::vector<Index>> &pred_tagseq);
66 | private:
67 |     eval_inner::EvalTempResultT eval_one(const std::vector<Index> &gold_tagseq, const std::vector<Index> &pred_tagseq);
68 | private:
69 |     eval_inner::EvalTempResultT tmp_result4iter;
70 | };
71 | 
72 | 
73 | /******************************************
74 |  * Inline Implementation
75 |  ******************************************/
76 | 
77 | inline
78 | void SegmenterEval::start_eval()
79 | {
80 |     tmp_result4iter.clear();
81 | }
82 | 
83 | 
84 | } // end of namespace eval
85 | } // end of namespace segmenter
86 | } // end of namespace slnn
87 | 
88 | 
89 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_feature.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_feature.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | CWSFeature::CWSFeature(DictWrapper &word_dict_wrapper)
 6 |     :context_feature(word_dict_wrapper)
 7 | {}
 8 | 
 9 | void CWSFeature::set_feature_parameters(unsigned lexicon_start_here_dim, unsigned lexicon_pass_here_dim, unsigned lexicon_end_here_dim,
10 |     unsigned context_left_size, unsigned context_right_size, unsigned word_embedding_dim,
11 |     unsigned chartype_dim)
12 | {
13 |     lexicon_feature.set_dim(lexicon_start_here_dim, lexicon_pass_here_dim, lexicon_end_here_dim);
14 |     context_feature.set_parameters(context_left_size, context_right_size, word_embedding_dim);
15 |     chartype_feature.set_dim(chartype_dim);
16 | }
17 | 
18 | 
19 | void CWSFeature::extract(const Seq &char_seq, const IndexSeq &index_char_seq, CWSFeatureDataSeq &cws_feature_seq)
20 | {
21 |     lexicon_feature.extract(char_seq, cws_feature_seq.get_lexicon_feature_data_seq());
22 |     context_feature.extract(index_char_seq, cws_feature_seq.get_context_feature_data_seq());
23 |     chartype_feature.extract(char_seq, cws_feature_seq.get_chartype_feature_data_seq());
24 | }
25 | 
26 | std::string CWSFeature::get_feature_info() const
27 | {
28 |     std::ostringstream oss;
29 |     oss << "lexicon feature info : \n" << lexicon_feature.get_feature_info() << "\n"
30 |         << "context feature info : \n" << context_feature.get_feature_info() << "\n"
31 |         << "chartype feature info : \n" << chartype_feature.get_feature_info() ;
32 |     return oss.str();
33 | }
34 | 
35 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_feature_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_feature_layer.h"
 2 | 
 3 | namespace slnn{
 4 | CWSFeatureLayer::CWSFeatureLayer(dynet::Model *dynet_m, unsigned start_here_dict_size, unsigned start_here_dim,
 5 |     unsigned pass_here_dict_size, unsigned pass_here_dim,
 6 |     unsigned end_here_dict_size, unsigned end_here_dim,
 7 |     dynet::LookupParameter word_lookup_param,
 8 |     unsigned chartype_category_num, unsigned chartype_dim)
 9 |     :lexicon_feature_layer(dynet_m, start_here_dict_size, start_here_dim, pass_here_dict_size, pass_here_dim,
10 |         end_here_dict_size, end_here_dim),
11 |     context_feature_layer(dynet_m, word_lookup_param),
12 |     chartype_feature_layer(dynet_m, chartype_category_num, chartype_dim)
13 | {}
14 | CWSFeatureLayer::CWSFeatureLayer(dynet::Model *dynet_m, const CWSFeature &cws_feature, dynet::LookupParameter word_lookup_param)
15 |     :lexicon_feature_layer(dynet_m, cws_feature.lexicon_feature),
16 |     context_feature_layer(dynet_m, word_lookup_param),
17 |     chartype_feature_layer(dynet_m, cws_feature.chartype_feature.FeatureDictSize(), 
18 |                                   cws_feature.chartype_feature.get_feature_dim())
19 | {}
20 | }
21 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_feature_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_CWS_FEATURE_LAYER_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_CWS_FEATURE_LAYER_H_
 3 | 
 4 | #include "cws_feature.h"
 5 | #include "lexicon_feature_layer.h"
 6 | #include "modelmodule/context_feature_layer.h"
 7 | #include "modelmodule/hyper_layers.h"
 8 | 
 9 | namespace slnn{
10 | 
11 | class CWSFeatureLayer
12 | {
13 | public:
14 |     CWSFeatureLayer(dynet::Model *dynet_m, unsigned start_here_dict_size, unsigned start_here_dim,
15 |         unsigned pass_here_dict_size, unsigned pass_here_dim,
16 |         unsigned end_here_dict_size, unsigned end_here_dim,
17 |         dynet::LookupParameter word_lookup_param,
18 |         unsigned chartype_category_num, unsigned chartype_dim);
19 |     CWSFeatureLayer(dynet::Model *dynet_m, const CWSFeature &cws_feature, dynet::LookupParameter word_lookup_param);
20 |     void new_graph(dynet::ComputationGraph &cg);
21 |     void build_cws_feature(const CWSFeatureDataSeq &cws_feature_data_seq, std::vector<dynet::expr::Expression> &cws_feature_exprs);
22 | 
23 | private:
24 |     LexiconFeatureLayer lexicon_feature_layer;
25 |     ContextFeatureLayer context_feature_layer;
26 |     Index2ExprLayer chartype_feature_layer;
27 | };
28 | 
29 | inline
30 | void CWSFeatureLayer::new_graph(dynet::ComputationGraph &cg)
31 | {
32 |     lexicon_feature_layer.new_graph(cg);
33 |     context_feature_layer.new_graph(cg);
34 |     chartype_feature_layer.new_graph(cg);
35 | }
36 | 
37 | inline
38 | void CWSFeatureLayer::build_cws_feature(const CWSFeatureDataSeq &cws_feature_data_seq, std::vector<dynet::expr::Expression> &cws_feature_exprs)
39 | {
40 |     using std::swap;
41 |     size_t seq_len = cws_feature_data_seq.size();
42 |     std::vector<dynet::expr::Expression> tmp_cws_exprs(seq_len);
43 |     const LexiconFeatureDataSeq &lexicon_data_seq = cws_feature_data_seq.get_lexicon_feature_data_seq();
44 |     const ContextFeatureDataSeq &context_data_seq = cws_feature_data_seq.get_context_feature_data_seq();
45 |     const CharTypeFeatureDataSeq &chartype_data_seq = cws_feature_data_seq.get_chartype_feature_data_seq();
46 |     for( size_t i = 0; i < seq_len; ++i )
47 |     {
48 |         tmp_cws_exprs[i] = dynet::expr::concatenate({
49 |             lexicon_feature_layer.build_lexicon_feature(lexicon_data_seq[i]),
50 |             context_feature_layer.build_feature_expr(context_data_seq[i]),
51 |             chartype_feature_layer.index2expr(chartype_data_seq[i])
52 |         });
53 |     }
54 |     swap(cws_feature_exprs, tmp_cws_exprs);
55 | }
56 | 
57 | 
58 | } // end of namespace slnn
59 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_general_modelhandler.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_general_modelhandler.h"
 2 | 
 3 | namespace slnn{
 4 | namespace segmenter{
 5 | namespace modelhandler{
 6 | 
 7 | namespace modelhandler_inner{
 8 | 
 9 | TrainingUpdateRecorder::TrainingUpdateRecorder(float error_threshold)
10 |     :best_score(0.f),
11 |     nr_epoch_when_best(0),
12 |     nr_devel_order_when_best(0),
13 |     train_error_threshold(error_threshold),
14 |     is_good(true)
15 | {}
16 | 
17 | 
18 | 
19 | } // end of namespce modelhandler-inner
20 | 
21 | } // end of namespace modelhandler
22 | } // end of namespace segmenter
23 | } // end of namespace slnn
24 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_reader.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_CWS_READER_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_CWS_READER_H_
 3 | #include "cws_reader.h"
 4 | #include "utils/utf8processing.hpp"
 5 | namespace slnn{
 6 | const char* CWSReader::CWSWordSeperator = " \t";
 7 | 
 8 | CWSReader::CWSReader(std::istream &is)
 9 |     :Reader(is)
10 | {}
11 | 
12 | bool CWSReader::read_segmented_line(Seq &word_seq)
13 | {
14 |     using std::swap;
15 |     Seq tmp_sent;
16 |     std::string line;
17 |     if( !getline(is, line) ){ return false;  } ; // static_cast<ifstream> == !fail() && !bad() , not equal to good() , especially on EOF bit 
18 |     std::vector<std::string> tmp_word_seq;
19 |     boost::algorithm::split(tmp_word_seq, line, boost::is_any_of(CWSWordSeperator));
20 |     swap(word_seq, tmp_word_seq);
21 |     return true ;
22 | }
23 | 
24 | bool CWSReader::readline(Seq &char_seq)
25 | {
26 |     using std::swap;
27 |     Seq tmp_sent;
28 |     std::string line;
29 |     if( !getline(is, line) ){ return false;  } ;
30 |     UTF8Processing::utf8_str2char_seq(line, char_seq);
31 |     return true;
32 | }
33 | 
34 | } // end of namespace slnn
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_reader.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEGMENTER_CWS_MODULE_CWS_READER_H_
 2 | #define SEGMENTER_CWS_MODULE_CWS_READER_H_
 3 | #include <boost/algorithm/string/split.hpp>
 4 | #include <boost/algorithm/string/classification.hpp>
 5 | #include "utils/reader.hpp"
 6 | #include "utils/typedeclaration.h"
 7 | namespace slnn{
 8 | 
 9 | /**************
10 |  * below implementation will be abandon in future.
11 |  **************/
12 | class CWSReader : public Reader
13 | {
14 | public:
15 |     static const char *CWSWordSeperator;
16 | 
17 | public:
18 |     CWSReader(std::istream &is);
19 |     bool read_segmented_line(Seq &word_seq);
20 |     bool readline(Seq &char_seq);
21 | };
22 | 
23 | } // end of namespace slnn
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_reader_unicode.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_reader_unicode.h"
 2 | namespace slnn{
 3 | namespace segmenter{
 4 | namespace reader{
 5 | 
 6 | SegmentorUnicodeReader::SegmentorUnicodeReader(std::istream &is, charcode::EncodingType f_encoding, predicateT pred_func)
 7 |     :Reader(is),
 8 |     conv(charcode::CharcodeConvertor::create_convertor(f_encoding)),
 9 |     pred_func(pred_func)
10 | {}
11 | 
12 | bool SegmentorUnicodeReader::read_segmented_line(std::vector<std::u32string> &out_wordseq)
13 | {
14 |     using std::swap;
15 |     std::string line;
16 |     if( !getline(is, line) ){ return false; }
17 |     std::u32string uline = conv->decode(line);
18 |     std::vector<std::u32string> wordseq;
19 |     boost::split(wordseq, uline, pred_func);
20 |     swap(out_wordseq, wordseq);
21 |     return true;
22 | }
23 | 
24 | bool SegmentorUnicodeReader::readline(std::u32string &out_charseq)
25 | {
26 |     using std::swap;
27 |     std::string line;
28 |     if( !getline(is, line) ){ return false; }
29 |     std::u32string uline = conv->decode(line);
30 |     swap(out_charseq, uline);
31 |     return true;
32 | }
33 | 
34 | } // end of namespace reader
35 | } // end of namespace segmenter
36 | } // end of namespace slnn
37 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_reader_unicode.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_CWS_READER_UNICODE_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_CWS_READER_UNICODE_H_
 3 | #include <functional>
 4 | #include <memory>
 5 | #include <vector>
 6 | #include <boost/algorithm/string/split.hpp>
 7 | #include <boost/algorithm/string/classification.hpp>
 8 | #include "utils/reader.hpp"
 9 | #include "trivial/charcode/charcode_base.hpp"
10 | #include "trivial/charcode/charcode_convertor.h"
11 | namespace slnn{
12 | namespace segmenter{
13 | namespace reader{
14 | 
15 | namespace reader_inner{
16 | 
17 | inline 
18 | bool is_seg_delimiter(char32_t uc)
19 | {
20 |     return uc == U'\t' || uc == U' ';
21 | }
22 | 
23 | }
24 | 
25 | class SegmentorUnicodeReader : public Reader
26 | {
27 | public:
28 |     using predicateT = std::function<bool(char32_t)>;
29 | public:
30 |     SegmentorUnicodeReader(std::istream &is, 
31 |         charcode::EncodingType file_encoding=charcode::EncodingType::UTF8, 
32 |         predicateT pred_func=reader_inner::is_seg_delimiter);
33 |     bool read_segmented_line(std::vector<std::u32string> &out_wordseq);
34 |     bool readline(std::u32string &out_charseq);
35 | private:
36 |     std::shared_ptr<charcode::CharcodeConvertor> conv;
37 |     predicateT pred_func;
38 | };
39 | 
40 | } // end of namespace reader
41 | } // end of namespace segmenter
42 | } // end of namespace slnn
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_stat.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_CWS_STAT_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_CWS_STAT_H_
 3 | #include "utils/stat.hpp"
 4 | namespace slnn{
 5 | namespace segmenter{
 6 | namespace stat{
 7 | 
 8 | struct SegmentorStat : public BasicStat
 9 | {
10 |     // data
11 |     unsigned nr_token_predict;
12 |     // interface
13 |     SegmentorStat(bool is_predict = false);
14 |     std::string get_stat_str(const std::string &info_header);
15 | };
16 | 
17 | /*********************************************
18 |  * inline interface
19 |  *********************************************/
20 | inline
21 | SegmentorStat::SegmentorStat(bool is_predict)
22 |     :BasicStat(is_predict),
23 |     nr_token_predict(0)
24 | {}
25 | 
26 | inline
27 | std::string SegmentorStat::get_stat_str(const std::string &info_header)
28 | {
29 |     std::ostringstream str_os;
30 |     str_os << info_header << "\n" ;
31 |     if( !is_predict ){ str_os << "| Sum E = " << get_sum_E() << "\n" ; }
32 |     str_os << "| Time cost = " << get_time_cost_in_seconds() << " s\n"
33 |         << "| Speed(tag) = " << get_speed_as_kilo_tokens_per_sencond() << " K Tags/s\n"
34 |         << "| Speed(token) = " << nr_token_predict / 1000.f / get_time_cost_in_seconds() << " K Tokens/s\n"
35 |         << "= - - - - -";
36 |     return str_os.str();
37 | }
38 | 
39 | 
40 | } // end of namespace stat
41 | } // end of namespace segmenter
42 | } // end of namespace slnn
43 | 
44 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_tagging_system.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_UTILS_CWS_UTILS_HPP_
 2 | #define SLNN_SEGMENTER_CWS_UTILS_CWS_UTILS_HPP_
 3 | 
 4 | #include <string>
 5 | 
 6 | #include <boost/log/trivial.hpp>
 7 | #include <boost/log/core.hpp>
 8 | 
 9 | #include "dynet/dict.h"
10 | #include "dynet/dynet.h"
11 | #include "utils/utf8processing.hpp"
12 | #include "utils/typedeclaration.h"
13 | 
14 | namespace slnn{
15 | 
16 | struct CWSTaggingSystem
17 | {
18 |     // static member
19 |     static const std::string B_TAG ;
20 |     static const std::string M_TAG ;
21 |     static const std::string E_TAG ;
22 |     static const std::string S_TAG ;
23 |     
24 |     static constexpr Index STATIC_B_ID = 0;
25 |     static constexpr Index STATIC_M_ID = 1;
26 |     static constexpr Index STATIC_E_ID = 2;
27 |     static constexpr Index STATIC_S_ID = 3;
28 |     static constexpr Index STATIC_NONE_ID = -1;
29 | 
30 |     static constexpr size_t get_tag_num(){ return 4;  }
31 | 
32 |     static void static_parse_word2chars_indextag(const std::string &word, Seq &word_cont, IndexSeq &tag_cont);
33 |     static void static_parse_chars_indextag2word_seq(const Seq &char_seq, const IndexSeq &static_tag_indices, Seq &word_seq);
34 |     static bool static_can_emit(size_t cur_pos, Index cur_static_tag_id);
35 |     static bool static_can_trans(Index pre_static_tag_id, Index cur_static_tag_id);
36 |     static Index static_select_tag_constrained(std::vector<dynet::real> &dist, size_t time, Index pre_tag_id=STATIC_NONE_ID);
37 | 
38 | 
39 |     // below is previous implementation . it is not good ! ( no need to use dynamic tag id for CWS TASK )
40 |     // For compatibility, we still preserve it . But we should never use it any more ;
41 |     static void parse_words2word_tag(const std::string &words, Seq &word_cont, Seq &tag_cont) ;
42 |     static void split_word(const std::string &utf8_str, Seq &utf8_seq) ;
43 |     static void parse_word_tag2words(const Seq &raw_words, const Seq &tags, Seq &words) ;
44 | 
45 |     // class member
46 |     Index B_ID,
47 |         M_ID,
48 |         E_ID,
49 |         S_ID ;
50 | 
51 |     void build(dynet::Dict &tag_dict) ;
52 |     bool can_emit(size_t cur_pos , Index cur_tag_id) ;
53 |     bool can_trans(Index pre_tag_id, Index cur_tag_id) ;
54 |     void parse_word_tag2words(const Seq &raw_words, const IndexSeq &tag_ids, Seq &o_words) ; // overide
55 | 
56 | };
57 | 
58 | }// end of namespace slnn
59 | 
60 | #endif 
61 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/cws_writer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_CWS_WRITER_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_CWS_WRITER_H_
 3 | #include <string>
 4 | #include <vector>
 5 | #include <ostream>
 6 | #include <sstream>
 7 | #include <memory>
 8 | #include "utils/writer.h"
 9 | #include "token_module/cws_tag_definition.h"
10 | #include "trivial/charcode/charcode_base.hpp"
11 | #include "trivial/charcode/charcode_convertor.h"
12 | namespace slnn{
13 | namespace segmenter{
14 | namespace writer{
15 | 
16 | class SegmentorWriter : private utils::Writer
17 | {
18 | public:
19 |     // constructor
20 |     SegmentorWriter(std::ostream &os, charcode::EncodingType encoding_type, const std::u32string &uni_delimiter=U"\t");
21 |     using Writer::writeline;
22 |     void write(const std::u32string &uni_str, const std::vector<Index> &tagseq);
23 | private:
24 |     std::shared_ptr<charcode::CharcodeConvertor> conv;
25 |     std::string out_delimiter;
26 | };
27 | 
28 | /*****************************************
29 |  * Inline Implementation
30 |  *****************************************/
31 | 
32 | inline 
33 | SegmentorWriter::SegmentorWriter(std::ostream &os, charcode::EncodingType encoding_type, const std::u32string &uni_delimiter)
34 |     :utils::Writer(os),
35 |     conv(charcode::CharcodeConvertor::create_convertor(encoding_type)),
36 |     out_delimiter(conv->encode(uni_delimiter))
37 | {}
38 | 
39 | inline
40 | void SegmentorWriter::write(const std::u32string &charseq, const std::vector<Index> &tagseq)
41 | {
42 |     if( charseq.size() == 0 ){ writeline(""); return; }
43 |     std::vector<std::u32string> wordseq = token_module::generate_wordseq_from_not_valid_chartagseq(charseq, tagseq);
44 |     std::ostringstream oss;
45 |     oss << conv->encode(wordseq[0]);
46 |     for( std::size_t i = 1; i < wordseq.size(); ++i )
47 |     {
48 |         oss << out_delimiter << conv->encode(wordseq[i]);
49 |     }
50 |     writeline(oss.str());
51 | }
52 | 
53 | 
54 | } // end of namespace 
55 | } // enf of namespace segmenter
56 | } // end of namespace slnn
57 | 
58 | 
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/lexicon_feature_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "lexicon_feature_layer.h"
 2 | 
 3 | namespace slnn{
 4 | 
 5 | LexiconFeatureLayer::LexiconFeatureLayer(dynet::Model * dynet_m, unsigned start_here_dict_size, unsigned start_here_dim,
 6 |     unsigned pass_here_dict_size, unsigned pass_here_dim,
 7 |     unsigned end_here_dict_size, unsigned end_here_dim)
 8 |     : start_here_lookup_param(dynet_m->add_lookup_parameters(start_here_dict_size, { start_here_dim })),
 9 |     pass_here_lookup_param(dynet_m->add_lookup_parameters(pass_here_dict_size, { pass_here_dim })),
10 |     end_here_lookup_param(dynet_m->add_lookup_parameters(end_here_dict_size, {end_here_dim}))
11 | {}
12 | 
13 | LexiconFeatureLayer::LexiconFeatureLayer(dynet::Model *dynet_m, const LexiconFeature &lexicon_feature)
14 |     :LexiconFeatureLayer(dynet_m, lexicon_feature.get_start_here_dict_size(), lexicon_feature.get_start_here_feature_dim(),
15 |         lexicon_feature.get_pass_here_dict_size(), lexicon_feature.get_pass_here_feature_dim(),
16 |         lexicon_feature.get_end_here_dict_size(), lexicon_feature.get_end_here_feature_dim())
17 | {}
18 | 
19 | }


--------------------------------------------------------------------------------
/src/segmenter/cws_module/lexicon_feature_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_LEXICON_FEATURE_LAYER_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_LEXICON_FEATURE_LAYER_H_
 3 | #include "lexicon_feature.h"
 4 | #include "dynet/dynet.h"
 5 | #include "dynet/expr.h"
 6 | namespace slnn{
 7 | 
 8 | class LexiconFeatureLayer
 9 | {
10 | public :
11 |     LexiconFeatureLayer(dynet::Model *dynet_m, unsigned start_here_dict_size, unsigned start_here_dim,
12 |         unsigned pass_here_dict_size, unsigned pass_here_dim,
13 |         unsigned end_here_dict_size, unsigned end_here_dim);
14 |     LexiconFeatureLayer(dynet::Model *dynet_m, const LexiconFeature &lexicon_feature);
15 |     void new_graph(dynet::ComputationGraph &cg);
16 |     dynet::expr::Expression build_lexicon_feature(const LexiconFeatureData &lexicon_feature);
17 |     void build_lexicon_feature(const LexiconFeatureDataSeq &lexicon_feature_seq,
18 |         std::vector<dynet::expr::Expression> &lexicon_feature_exprs);
19 | 
20 | private:
21 |     dynet::LookupParameter start_here_lookup_param;
22 |     dynet::LookupParameter pass_here_lookup_param;
23 |     dynet::LookupParameter end_here_lookup_param;
24 |     dynet::ComputationGraph *pcg;
25 | };
26 | 
27 | inline
28 | void LexiconFeatureLayer::new_graph(dynet::ComputationGraph &cg)
29 | {
30 |     pcg = &cg;
31 | }
32 | 
33 | inline 
34 | dynet::expr::Expression LexiconFeatureLayer::build_lexicon_feature(const LexiconFeatureData &lexicon_feature_data)
35 | {
36 |     return dynet::expr::concatenate({
37 |         dynet::expr::lookup(*pcg, start_here_lookup_param, lexicon_feature_data.get_start_here_feature_index()),
38 |         dynet::expr::lookup(*pcg, pass_here_lookup_param, lexicon_feature_data.get_pass_here_feature_index()),
39 |         dynet::expr::lookup(*pcg, end_here_lookup_param, lexicon_feature_data.get_end_here_feature_index())
40 |     });
41 | }
42 | 
43 | inline
44 | void LexiconFeatureLayer::build_lexicon_feature(const LexiconFeatureDataSeq &lexicon_feature_seq,
45 |     std::vector<dynet::expr::Expression> &lexicon_feature_exprs)
46 | {
47 |     using std::swap;
48 |     size_t seq_len = lexicon_feature_seq.size();
49 |     std::vector<dynet::expr::Expression> tmp_lexicon_feature_exprs(seq_len);
50 |     for( size_t i = 0; i < seq_len; ++i )
51 |     {
52 |         tmp_lexicon_feature_exprs[i] = build_lexicon_feature(lexicon_feature_seq[i]);
53 |     }
54 |     swap(lexicon_feature_exprs, tmp_lexicon_feature_exprs);
55 | }
56 | 
57 | } // end of namespace slnn
58 | 
59 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/nn_module/experiment_layer/nn_cws_specific_output_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_EXP_CWS_SPECIFIC_OUTPUT_LAYER_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_EXP_CWS_SPECIFIC_OUTPUT_LAYER_H_
 3 | #include <memory>
 4 | #include <string>
 5 | #include "modelmodule/hyper_output_layers.h"
 6 | namespace slnn{
 7 | namespace segmenter{
 8 | namespace nn_module{
 9 | namespace experiment{
10 | 
11 | /**
12 |  * This layers has the first abstract parent: BareOutputBase
13 |  */
14 | 
15 | class SegmenterClassificationBareOutput : public SimpleBareOutput
16 | {
17 | public:
18 |     SegmenterClassificationBareOutput(dynet::Model *m, unsigned input_dim, unsigned output_dim);
19 |     void build_output(const std::vector<dynet::expr::Expression>& input_expr_seq, std::vector<Index>& out_pred_seq) override;
20 | };
21 | 
22 | std::shared_ptr<BareOutputBase>
23 | create_segmenter_output_layer(const std::string& layer_type, dynet::Model* dynet_model, unsigned input_dim, unsigned output_dim);
24 | 
25 | } // end of namespace experiment
26 | } // end of namespace nn-module
27 | } // end of namespace segmeter
28 | } // end of namespace slnn
29 | 
30 | 
31 | 
32 | 
33 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/nn_module/experiment_layer/nn_window_expr_processing_layer.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/cws_module/nn_module/experiment_layer/nn_window_expr_processing_layer.h


--------------------------------------------------------------------------------
/src/segmenter/cws_module/nn_module/mlp_input1/nn_cws_mlp_input1_abstract.cpp:
--------------------------------------------------------------------------------
 1 | #include "nn_cws_mlp_input1_abstract.h"
 2 | namespace slnn{
 3 | namespace segmenter{
 4 | namespace nn_module{
 5 | 
 6 | 
 7 | 
 8 | NnSegmenterInput1MlpAbstract::NnSegmenterInput1MlpAbstract(int argc, char **argv, unsigned seed) : 
 9 |     NeuralNetworkCommonInterfaceDynetImpl(argc, argv, seed)
10 | {}
11 | 
12 | dynet::expr::Expression 
13 | NnSegmenterInput1MlpAbstract::build_training_graph_impl(const std::vector<Index> &charseq, 
14 |     const std::vector<Index> &tagseq)
15 | {
16 |     //clear_cg(); // !! ATTENTION !!
17 |     reset_cg();
18 |     word_expr_layer->new_graph(*get_cg());
19 |     window_expr_generate_layer->new_graph(*get_cg());
20 |     window_expr_processing_layer->new_graph(*get_cg());
21 |     mlp_hidden_layer->new_graph(*get_cg());
22 |     output_layer->new_graph(*get_cg()) ;
23 |     
24 |     mlp_hidden_layer->enable_dropout();
25 |     
26 |     unsigned sent_len = charseq.size();
27 | 
28 |     std::vector<dynet::expr::Expression> word_exprs(sent_len);
29 |     word_expr_layer->index_seq2expr_seq(charseq, word_exprs);
30 |     // generate window expr
31 |     std::vector<std::vector<dynet::expr::Expression>> input_window_expr_list =
32 |         window_expr_generate_layer->generate_window_expr_list(word_exprs);
33 |     // processing window expr
34 |     std::vector<dynet::expr::Expression> input_exprs = window_expr_processing_layer->process(input_window_expr_list);
35 | 
36 |     std::vector<dynet::expr::Expression> output_exprs;
37 |     mlp_hidden_layer->build_graph(input_exprs, output_exprs);
38 |     return output_layer->build_output_loss(output_exprs, tagseq);
39 | }
40 | 
41 | std::vector<Index> 
42 | NnSegmenterInput1MlpAbstract::predict_impl(const std::vector<Index> &charseq)
43 | {
44 | 
45 |     reset_cg();
46 |     //clear_cg(); // !!!!
47 |     word_expr_layer->new_graph(*get_cg());
48 |     window_expr_generate_layer->new_graph(*get_cg());
49 |     window_expr_processing_layer->new_graph(*get_cg());
50 |     mlp_hidden_layer->new_graph(*get_cg());
51 |     output_layer->new_graph(*get_cg()) ;
52 |     mlp_hidden_layer->disable_dropout();
53 | 
54 |     unsigned sent_len = charseq.size();
55 | 
56 |     std::vector<dynet::expr::Expression> word_exprs(sent_len);
57 |     word_expr_layer->index_seq2expr_seq(charseq, word_exprs);
58 |     // generate window expr
59 |     std::vector<std::vector<dynet::expr::Expression>> input_window_expr_list =
60 |         window_expr_generate_layer->generate_window_expr_list(word_exprs);
61 |     // prpcessing window expr
62 |     std::vector<dynet::expr::Expression> input_exprs = 
63 |         window_expr_processing_layer->process(input_window_expr_list);
64 | 
65 |     std::vector<dynet::expr::Expression> hidden_output_exprs;
66 |     mlp_hidden_layer->build_graph(input_exprs, hidden_output_exprs);
67 |     std::vector<Index> pred_tagseq;
68 |     output_layer->build_output(hidden_output_exprs, pred_tagseq);
69 |     return pred_tagseq;
70 | }
71 | 
72 | } // end of namespace nn_module
73 | } // end of namespace segmenter
74 | } // end of namespace slnn
75 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/nn_module/nn_common_interface.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_NN_MODULE_NN_INTERFACE_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_NN_MODULE_NN_INTERFACE_H_
 3 | #include <vector>
 4 | #include "utils/typedeclaration.h"
 5 | namespace slnn{
 6 | namespace segmenter{
 7 | namespace nn_module{
 8 | 
 9 | namespace nn_framework{
10 |     // using namespace instead of Enum for extend by user(Although no other users , 2333);
11 |     using NnFrameworkTagT = int;
12 |     constexpr NnFrameworkTagT NN_DyNet = 0; 
13 | 
14 | } // end of namespace nn-framework
15 | 
16 | /**
17 | * neural network common interface for training and predict. 
18 | * not for polymorphism but for build interface for common NN operation and different NN framework.
19 | */
20 | template <nn_framework::NnFrameworkTagT nn_tag, typename NnExprType, typename NnValueType>
21 | class NeuralNetworkCommonInterface
22 | {
23 | public:
24 |     // Type
25 |     using NnExprT = NnExprType;
26 |     using NnValueT = NnValueType;
27 |     // training
28 |     void set_update_method(const std::string &optmization_name);
29 |     void update(slnn::type::real scale);
30 |     void update_epoch();
31 |     const NnValueT& forward(const NnExprType&);
32 |     slnn::type::real as_scalar(const NnValueT&);
33 |     std::vector<slnn::type::real> as_vector(const NnValueT&);
34 |     void backward(const NnExprT&);
35 |     // stash model
36 |     void stash_model();
37 |     bool stash_model_when_best(slnn::type::real current_score);
38 |     bool reset2stashed_model();
39 | };
40 | 
41 | } // end of namespace nn-module
42 | } // end of namespace segmenter
43 | } // end of namespace slnn
44 | 
45 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/nn_module/nn_common_interface_dynet_impl.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "nn_common_interface_dynet_impl.h"
 3 | 
 4 | namespace slnn{
 5 | namespace segmenter{
 6 | namespace nn_module{
 7 | 
 8 | void 
 9 | NeuralNetworkCommonInterface<nn_framework::NN_DyNet, dynet::expr::Expression, dynet::Tensor>::
10 | set_update_method(const std::string &optmization_name)
11 | {
12 |     std::string opt_norm_name(optmization_name);
13 |     for( char &c : opt_norm_name ){ c = ::tolower(c); }
14 |     if( opt_norm_name == "sgd" )
15 |     {
16 |         trainer = new dynet::SimpleSGDTrainer(dynet_model);
17 |     }
18 |     else if( opt_norm_name == "adagrad" )
19 |     {
20 |         trainer = new dynet::AdagradTrainer(dynet_model);
21 |     }
22 |     else if( opt_norm_name == "momentum" )
23 |     {
24 |         trainer = new dynet::MomentumSGDTrainer(dynet_model);
25 |     }
26 |     else if( opt_norm_name == "adadelta" )
27 |     {
28 |         trainer = new dynet::AdadeltaTrainer(dynet_model);
29 |     }
30 |     else if( opt_norm_name == "rmsprop" )
31 |     {
32 |         trainer = new dynet::RmsPropTrainer(dynet_model);
33 |     }
34 |     else if(opt_norm_name == "adam" )
35 |     {
36 |         trainer = new dynet::AdamTrainer(dynet_model);
37 |     }
38 |     else
39 |     {
40 |         throw std::invalid_argument(std::string("un-supported optimization method : '") + optmization_name + std::string("'"));
41 |     }
42 | }
43 | 
44 | } // end of namespace nn-module
45 | } // end of namespace segmenter
46 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/segmenter/cws_module/nn_module/rnn_input1/nn_cws_rnn_input1_abstract.cpp:
--------------------------------------------------------------------------------
 1 | #include "nn_cws_rnn_input1_abstract.h"
 2 | namespace slnn{
 3 | namespace segmenter{
 4 | namespace nn_module{
 5 | 
 6 | 
 7 | 
 8 | NnSegmenterRnnInput1Abstract::NnSegmenterRnnInput1Abstract(int argc, char **argv, unsigned seed) : 
 9 |     NeuralNetworkCommonInterfaceDynetImpl(argc, argv, seed)
10 | {}
11 | 
12 | dynet::expr::Expression 
13 | NnSegmenterRnnInput1Abstract::build_training_graph_impl(const std::vector<Index> &charseq, 
14 |     const std::vector<Index> &tagseq)
15 | {
16 |     //clear_cg(); // !! ATTENTION !! dynet's implementation is not successful. so abandon it.
17 |     reset_cg();
18 |     word_expr_layer->new_graph(*get_cg());
19 |     birnn_layer->new_graph(*get_cg());
20 |     output_layer->new_graph(*get_cg()) ;
21 |     
22 |     birnn_layer->set_dropout();
23 |     birnn_layer->start_new_sequence();
24 | 
25 |     unsigned sent_len = charseq.size();
26 | 
27 |     std::vector<dynet::expr::Expression> word_exprs(sent_len);
28 |     word_expr_layer->index_seq2expr_seq(charseq, word_exprs);
29 |     // to bi-rnn
30 |     std::vector<dynet::expr::Expression> l2r_output_exprs, r2l_output_exprs;
31 |     birnn_layer->build_graph(word_exprs, l2r_output_exprs, r2l_output_exprs);
32 |     // concatenate & build loss
33 |     std::vector<dynet::expr::Expression> concated_exprs(sent_len);
34 |     for( unsigned i = 0; i < sent_len; ++i )
35 |     { 
36 |         concated_exprs[i] = dynet::expr::concatenate({ l2r_output_exprs[i], r2l_output_exprs[i] }); 
37 |     }
38 |     return output_layer->build_output_loss(concated_exprs, tagseq);
39 | }
40 | 
41 | std::vector<Index> 
42 | NnSegmenterRnnInput1Abstract::predict_impl(const std::vector<Index> &charseq)
43 | {
44 |     //clear_cg(); // !!!!
45 |     reset_cg();
46 |     word_expr_layer->new_graph(*get_cg());
47 |     birnn_layer->new_graph(*get_cg());
48 |     output_layer->new_graph(*get_cg()) ;
49 |     
50 |     birnn_layer->disable_dropout();
51 |     birnn_layer->start_new_sequence();
52 | 
53 |     unsigned sent_len = charseq.size();
54 | 
55 |     std::vector<dynet::expr::Expression> word_exprs(sent_len);
56 |     word_expr_layer->index_seq2expr_seq(charseq, word_exprs);
57 |     // to bi-rnn
58 |     std::vector<dynet::expr::Expression> l2r_output_exprs, r2l_output_exprs;
59 |     birnn_layer->build_graph(word_exprs, l2r_output_exprs, r2l_output_exprs);
60 |     // concatenate & build loss
61 |     std::vector<dynet::expr::Expression> concated_exprs(sent_len);
62 |     for( unsigned i = 0; i < sent_len; ++i )
63 |     { 
64 |         concated_exprs[i] = dynet::expr::concatenate({ l2r_output_exprs[i], r2l_output_exprs[i] }); 
65 |     }
66 |     std::vector<Index> pred_tagseq;
67 |     output_layer->build_output(concated_exprs, pred_tagseq);
68 |     return pred_tagseq;
69 | }
70 | 
71 | } // end of namespace nn_module
72 | } // end of namespace segmenter
73 | } // end of namespace slnn
74 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/structure_param_module/basic_mlp_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_STRUCTURE_PARAM_MODULE_BASIC_MLP_PARAM_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_STRUCTURE_PARAM_MODULE_BASIC_MLP_PARAM_H_
 3 | #include <vector>
 4 | #include <string>
 5 | #include <boost/serialization/access.hpp>
 6 | #include <boost/serialization/unordered_map.hpp>
 7 | #include <boost/serialization/vector.hpp>
 8 | #include <boost/program_options/variables_map.hpp>
 9 | #include "utils/typedeclaration.h"
10 | namespace slnn{
11 | namespace segmenter{
12 | namespace structure_param_module{
13 | 
14 | 
15 | /**
16 |  * Basic segmenter MLP structure param module.
17 |  * for segmenter with sigle word token and no feature.
18 |  * using this to decouple the (frontend-param, token-module) and the nn module
19 |  */
20 | 
21 | struct SegmenterBasicMlpParam
22 | {
23 |     friend class boost::serialization::access;
24 |     // Data
25 |     //   - Input
26 |     unsigned corpus_token_embedding_dim;
27 |     unsigned corpus_token_dict_size;
28 |     unsigned window_size;
29 |     //   - Mlp
30 |     std::string window_process_method;
31 |     std::vector<unsigned> mlp_hidden_dim_list;
32 |     slnn::type::real mlp_dropout_rate;
33 |     std::string mlp_nonlinear_function_str;
34 |     //   - Ouptut
35 |     std::string output_layer_type;
36 |     unsigned output_dim;
37 |     //   - Others
38 |     unsigned replace_freq_threshold;
39 |     float replace_prob_threshold;
40 |     // Interface
41 |     void set_param_from_user_defined(const boost::program_options::variables_map &args);
42 |     template<typename TokenModuleT>
43 |     void set_param_from_token_module(const TokenModuleT &token_module);
44 |     std::string get_structure_info();
45 |     // Serialization
46 |     template<class Archive>
47 |     void serialize(Archive& ar, const unsigned int);
48 | };
49 | 
50 | /**********************************************
51 |  *  Inline Implementation
52 |  **********************************************/
53 | 
54 | template<typename TokenModuleT>
55 | void SegmenterBasicMlpParam::set_param_from_token_module(const TokenModuleT &token_module)
56 | {
57 |     // Input - dict size
58 |     corpus_token_dict_size = token_module.get_charset_size();
59 |     // Output
60 |     output_dim = token_module.get_tagset_size();
61 | }
62 | 
63 | 
64 | template <class Archive>
65 | void SegmenterBasicMlpParam::serialize(Archive &ar, const unsigned int)
66 | {
67 |     ar &corpus_token_embedding_dim &corpus_token_dict_size &window_size
68 |         &window_process_method &mlp_hidden_dim_list &mlp_dropout_rate &mlp_nonlinear_function_str
69 |         &output_layer_type  &output_dim
70 |         &replace_freq_threshold &replace_prob_threshold;
71 | }
72 | 
73 | } // end of namespace structure_param_module
74 | } // end of namespace segmenter
75 | } // end of namespace slnn
76 | 
77 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/structure_param_module/rnn_input1_param.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/algorithm/string/trim.hpp>
 2 | #include <boost/algorithm/string/split.hpp>
 3 | #include <boost/algorithm/string/classification.hpp>
 4 | #include "rnn_input1_param.h"
 5 | namespace slnn{
 6 | namespace segmenter{
 7 | namespace structure_param_module{
 8 | 
 9 | void SegmenterRnnInput1Param::set_param_from_user_defined(const boost::program_options::variables_map &args)
10 | {
11 |     // Input
12 |     corpus_token_embedding_dim = args["word_embedding_dim"].as<unsigned>();
13 |     // Rnn
14 |     rnn_nr_stack_layer = args["nr_stack_layer"].as<unsigned>();
15 |     rnn_h_dim = args["h_dim"].as<unsigned>();
16 |     rnn_dropout_rate = args["dropout_rate"].as<slnn::type::real>() ;
17 |     // Output
18 |     output_layer_type = args["output_layer_type"].as<std::string>();
19 |     // Others
20 |     replace_freq_threshold = args["replace_freq_threshold"].as<unsigned>();
21 |     replace_prob_threshold = args["replace_prob_threshold"].as<float>();
22 | }
23 | 
24 | std::string SegmenterRnnInput1Param::get_structure_info()
25 | {
26 |     std::ostringstream oss;
27 |     oss << "+ Model info: \n"
28 |         << "| input: " << "charset-size(" << corpus_token_dict_size << ") embedding-dim(" << corpus_token_embedding_dim
29 |         << ") \n"
30 |         << "| rnn: h dim(" << rnn_h_dim << ") stacked layer num(" << rnn_nr_stack_layer << ") rnn-dropout-rate("
31 |         << rnn_dropout_rate << ")" << "\n"
32 |         << "| output: " << "output-dim(" << output_dim << ") type(" << output_layer_type << ")\n"
33 |         << "| others: " << "replace-frequent-threshold(" << replace_freq_threshold << ") "
34 |         << "replace-probability-threshold(" << replace_prob_threshold << ")\n"
35 |         << "= - - - - -";
36 |     return oss.str();
37 | }
38 | 
39 | 
40 | } // end of namespace structure_param_module
41 | } // end of namespace segmenter
42 | } // end of namespace slnn
43 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/structure_param_module/rnn_input1_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_STRUCTURE_PARAM_MODULE_RNN_INPUT1_PARAM_H_
 2 | #define SLNN_SEGMENTER_CWS_MODULE_STRUCTURE_PARAM_MODULE_RNN_INPUT1_PARAM_H_
 3 | #include <vector>
 4 | #include <string>
 5 | #include <boost/serialization/access.hpp>
 6 | #include <boost/serialization/unordered_map.hpp>
 7 | #include <boost/serialization/vector.hpp>
 8 | #include <boost/program_options/variables_map.hpp>
 9 | #include "utils/typedeclaration.h"
10 | namespace slnn{
11 | namespace segmenter{
12 | namespace structure_param_module{
13 | 
14 | 
15 | /**
16 |  * Basic segmenter MLP structure param module.
17 |  * for segmenter with sigle word token and no feature.
18 |  * using this to decouple the (frontend-param, token-module) and the nn module
19 |  */
20 | 
21 | struct SegmenterRnnInput1Param
22 | {
23 |     friend class boost::serialization::access;
24 |     // Data
25 |     //   - Input
26 |     unsigned corpus_token_embedding_dim;
27 |     unsigned corpus_token_dict_size;
28 |     //   - Rnn
29 |     unsigned rnn_nr_stack_layer;
30 |     unsigned rnn_h_dim;
31 |     slnn::type::real rnn_dropout_rate;
32 |     //   - Ouptut
33 |     std::string output_layer_type;
34 |     unsigned output_dim;
35 |     //   - Others
36 |     unsigned replace_freq_threshold;
37 |     float replace_prob_threshold;
38 |     // Interface
39 |     void set_param_from_user_defined(const boost::program_options::variables_map &args);
40 |     template<typename TokenModuleT>
41 |     void set_param_from_token_module(const TokenModuleT &token_module);
42 |     std::string get_structure_info();
43 |     // Serialization
44 |     template<class Archive>
45 |     void serialize(Archive& ar, const unsigned int);
46 | };
47 | 
48 | /**********************************************
49 |  *  Inline Implementation
50 |  **********************************************/
51 | 
52 | template<typename TokenModuleT>
53 | void SegmenterRnnInput1Param::set_param_from_token_module(const TokenModuleT &token_module)
54 | {
55 |     // Input - dict size
56 |     corpus_token_dict_size = token_module.get_charset_size();
57 |     // Output
58 |     output_dim = token_module.get_tagset_size();
59 | }
60 | 
61 | 
62 | template <class Archive>
63 | void SegmenterRnnInput1Param::serialize(Archive &ar, const unsigned int)
64 | {
65 |     ar &corpus_token_embedding_dim &corpus_token_dict_size
66 |         &rnn_nr_stack_layer &rnn_h_dim &rnn_dropout_rate
67 |         &output_layer_type  &output_dim
68 |         &replace_freq_threshold &replace_prob_threshold;
69 | }
70 | 
71 | } // end of namespace structure_param_module
72 | } // end of namespace segmenter
73 | } // end of namespace slnn
74 | 
75 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_module/token_module/cws_tag_utility.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * tag_utility.h, including the utilities for tag system.
 3 |  */
 4 | 
 5 | #ifndef SLNN_SEGMENTER_CWS_MODULE_TOKEN_MODULE_TAG_UTILITY_H_
 6 | #define SLNN_SEGMENTER_CWS_MODULE_TOKEN_MODULE_TAG_UTILITY_H_
 7 | #include "cws_tag_definition.h"
 8 | 
 9 | namespace slnn{
10 | namespace segmenter{
11 | namespace token_module{
12 | 
13 | 
14 | 
15 | /****************************************************
16 |  * Inline Implementation
17 |  ****************************************************/
18 | inline
19 | Index select_best_tag_constrained(const std::vector<dynet::real> &dist, size_t time, Index pre_time_tag_id)
20 | {
21 |     dynet::real max_prob = std::numeric_limits<dynet::real>::lowest();
22 |     Index tag_with_max_prob = Tag::TAG_NONE_ID;
23 |     constexpr Index max_tag_id = TAG_SIZE - 1;
24 |     for( Index tag_id = 0; tag_id <= max_tag_id; ++tag_id )
25 |     {
26 |         if( !can_emit(time, tag_id) ){ continue; }
27 |         if( time > 0 && !can_trans_unsafe(pre_time_tag_id, tag_id) ){ continue; }
28 |         if( dist[tag_id] >= max_prob )
29 |         {
30 |             tag_with_max_prob = tag_id;
31 |             max_prob = dist[tag_id];
32 |         }
33 |     }
34 |     // assert(tag_with_max_prob != Tag::TAG_NONE_ID);
35 |     return tag_with_max_prob;
36 | }
37 | 
38 | }
39 | }
40 | } // end of namespace slnn
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/token_module/input1/token_input1_all.cpp:
--------------------------------------------------------------------------------
 1 | #include "token_input1_all.h"
 2 | 
 3 | namespace slnn{
 4 | namespace segmenter{
 5 | namespace token_module{
 6 | 
 7 | std::u32string TokenSegmenterInput1All::EOS_REPR = U"<EOS>";
 8 | 
 9 | TokenSegmenterInput1All::TokenSegmenterInput1All(unsigned seed) noexcept
10 |     :unigram_dict(seed),
11 |     bigram_dict(seed),
12 |     lexicon_feat(),
13 |     state()
14 | {}
15 | 
16 | } // end of namespace token_module
17 | } // end of namespace segmenter
18 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/segmenter/cws_module/token_module/input1/token_input1_bigram.cpp:
--------------------------------------------------------------------------------
 1 | #include "token_input1_bigram.h"
 2 | 
 3 | namespace slnn{
 4 | namespace segmenter{
 5 | namespace token_module{
 6 | 
 7 | std::u32string TokenSegmenterInput1Bigram::EOS_REPR = U"<EOS>";
 8 | 
 9 | TokenSegmenterInput1Bigram::TokenSegmenterInput1Bigram(unsigned seed) noexcept
10 |     :token_dict(seed, 1, 0.2F, [](const std::u32string &token) -> std::string{
11 |     return input1_bigram_token_module_inner::token2str(token); })
12 | {}
13 | 
14 | } // end of namespace token_module
15 | } // end of namespace segmenter
16 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/segmenter/cws_module/token_module/input1/token_input1_unigram.cpp:
--------------------------------------------------------------------------------
 1 | #include "token_input1_unigram.h"
 2 | namespace slnn{
 3 | namespace segmenter{
 4 | namespace token_module{
 5 | 
 6 | /**
 7 | * constructor, with an seed to init the replace randomization.
 8 | * @param seed unsigned, to init the inner LookupTableWithReplace.
 9 | */
10 | TokenSegmenterInput1Unigram::TokenSegmenterInput1Unigram(unsigned seed) noexcept
11 |     :token_dict(seed, 1, 0.2F, [](const char32_t &token){ return input1_unigram_token_module_inner::token2str(token); })
12 | {}
13 | 
14 | } // end of namespace token_module
15 | } // end of namespace segmenter
16 | } // end of namespace slnn
17 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/token_module/token_chartype.cpp:
--------------------------------------------------------------------------------
 1 | #include "token_chartype.h"
 2 | 
 3 | namespace slnn{
 4 | namespace segmenter{
 5 | namespace token_module{
 6 | 
 7 | const std::unordered_set<char32_t> TokenChartype::DigitTypeCharDict = {
 8 |     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 
 9 |     0xff10, 0xff11, 0xff12, 0xff13, 0xff14, 0xff15, 0xff16, 0xff17, 0xff18, 
10 |     0xff19, 0xff10, 0x58f9, 0x8d30, 0x53c1, 0x8086, 0x4f0d, 0x9646, 0x67d2, 0x634c, 
11 |     0x7396, 0x62fe, 0x4f70, 0x4edf
12 | };
13 | 
14 | const std::unordered_set<char32_t> TokenChartype::LetterTypeCharDict = {
15 | 
16 |     0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 
17 |     0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
18 |     0x79, 0x7a, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a,
19 |     0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
20 |     0x57, 0x58, 0x59, 0x5a, 0xff41, 0xff42, 0xff43, 0xff44, 0xff45, 0xff46,
21 |     0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c, 0xff4d, 0xff4e, 0xff4f,
22 |     0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, 0xff56, 0xff57, 0xff58, 
23 |     0xff59, 0xff5a, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27,
24 |     0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30,
25 |     0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, 
26 |     0xff3a
27 | };
28 | 
29 | const std::unordered_set<char32_t> TokenChartype::PuncTypeCharDict = {
30 |     0xff0c, 0x3002, 0xff1f, 0xff01, 0xff1a, 0xff1b, 0x3001, 0x2026, 0x2026, 
31 |     0xff5e, 0xff06, 0xff20, 0xff03, 0x2c, 0x2e, 0x3f, 0x21, 0x3a, 0x3b, 0xb7, 
32 |     0x2026, 0x7e, 0x26, 0x40, 0x23, 0x201c, 0x201d, 0x2018, 0x2019, 0x301d, 0x301e, 
33 |     0x20, 0x27, 0x22, 0xff02, 0xff07, 0xb4, 0xff07, 0xff08, 0xff09, 0x3010, 0x3011, 
34 |     0x300a, 0x300b, 0xff1c, 0xff1e, 0xfe5d, 0xfe5e, 0x3c, 0x3e, 0x28, 0x29, 0x5b, 0x5d, 
35 |     0xab, 0xbb, 0x2039, 0x203a, 0x3014, 0x3015, 0x3008, 0x3009, 0x7b, 0x7d, 0xff3b, 0xff3d, 
36 |     0x300c, 0x300d, 0xff5b, 0xff5d, 0x3016, 0x3017, 0x300e, 0x300f, 0xfe35, 0xfe37, 0xfe39,
37 |     0xfe3f, 0xfe3d, 0xfe41, 0xfe43, 0xfe3b, 0xfe17, 0x2f, 0x7c, 0x5c, 0xfe36, 0xfe38, 0xfe3a, 
38 |     0xfe40, 0xfe3e, 0xfe42, 0xfe44, 0xfe3c, 0xfe18, 0xff0f, 0xff5c, 0xff3c, 0x2ca, 0xa8, 
39 |     0xad, 0x5e, 0xa1, 0xa6, 0x60, 0xfe4e, 0xfe4d, 0xfe4f, 0xff3f, 0x5f, 0xaf, 0xffe3, 
40 |     0xfe4b, 0xfe49, 0xfe4a, 0x2cb, 0xfe34, 0xbf, 0x2c7, 0x3000
41 | };
42 | 
43 | } // end of namespace token_module
44 | } // end of namespace segmenter
45 | } // end of namespace slnn
46 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_module/token_module/token_chartype.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MODULE_TOKEN_MODULE_TOKEN_CHARTYPE_H_
 2 | #define SLNN_SEGMNETER_CWS_MODULE_TOKEN_MODULE_TOKEN_CHARTYPE_H_
 3 | #include <unordered_set>
 4 | #include <vector>
 5 | #include <memory>
 6 | #include "utils/typedeclaration.h"
 7 | namespace slnn{
 8 | namespace segmenter{
 9 | namespace token_module{
10 | 
11 | class TokenChartype
12 | {
13 | public:
14 |     static constexpr unsigned size() { return 4U; }
15 |     static constexpr Index DefaultType = 0;
16 |     static constexpr Index DigitType = 1;
17 |     static constexpr Index PuncType = 2;
18 |     static constexpr Index LetterType = 3;
19 | public:
20 |     static std::shared_ptr<std::vector<Index>> extract(const std::u32string& charseq);
21 | private:
22 |     static const std::unordered_set<char32_t> DigitTypeCharDict;
23 |     static const std::unordered_set<char32_t> PuncTypeCharDict;
24 |     static const std::unordered_set<char32_t> LetterTypeCharDict;
25 | public:
26 |     static bool isDigit(char32_t uc) { return DigitTypeCharDict.count(uc) > 0; }
27 |     static bool isPunc(char32_t uc) { return PuncTypeCharDict.count(uc) > 0; }
28 |     static bool isLetter(char32_t uc) { return LetterTypeCharDict.count(uc) > 0; }
29 | };
30 | 
31 | inline 
32 | std::shared_ptr<std::vector<Index>> TokenChartype::extract(const std::u32string& charseq)
33 | {
34 |     std::shared_ptr<std::vector<Index>> type_feat(new std::vector<Index>(charseq.size()));
35 |     
36 |     for(unsigned i = 0; i < charseq.size(); ++i )
37 |     {
38 |         auto uc = charseq[i];
39 |         if( isPunc(uc) ){ type_feat->at(i) = PuncType; }
40 |         else if( isDigit(uc) ){ type_feat->at(i) = DigitType; }
41 |         else if( isLetter(uc) ){ type_feat->at(i) = LetterType; }
42 |         else{ type_feat->at(i) = DefaultType; }
43 |     }
44 |     return type_feat;
45 | }
46 | 
47 | } // end of namespace token_module
48 | } // end of namespace segmenter
49 | } // end of namespace slnn
50 | 
51 | #endif


--------------------------------------------------------------------------------
/src/segmenter/cws_rnn_input1/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | FILE(GLOB dep_token_module "${cws_module_token_dir}/input1/*"
 2 |                            "${cws_module_token_dir}/cws_tag_definition.h"
 3 |                            "${cws_module_token_dir}/cws_tag_utility.h")
 4 | FILE(GLOB dep_structure_param_module "${cws_module_structure_param_dir}/rnn_input1_param.*")
 5 | FILE(GLOB dep_nn_general_module "${cws_module_nn_dir}/nn_common_interface.*"
 6 |                                 "${cws_module_nn_dir}/nn_common_interface_dynet_impl.*"
 7 |                                 "${cws_module_nn_dir}/rnn_input1/nn_cws_rnn_input1_abstract.*")
 8 | FILE(GLOB dep_nn_experiment_module "${cws_module_nn_dir}/experiment_layer/*")
 9 |      
10 | FILE(GLOB dep_modelhandler "${cws_module_dir}/cws_general_modelhandler.*")
11 | FILE(GLOB dep_eval "${cws_module_dir}/cws_eval.*")
12 | FILE(GLOB dep_stat "${cws_module_dir}/cws_stat.*")
13 | FILE(GLOB dep_io "${cws_module_dir}/cws_reader_unicode.*" "${cws_module_dir}/cws_writer.*")
14 | FILE(GLOB dep_others "${utils_dir}/nn_utility.*")
15 | 
16 | FILE(GLOB dep_nn_4compatibility "${cws_module_dir}/cws_tagging_system.*")
17 | 
18 | FILE(GLOB model_template "cws_rnn_input1_template.*")
19 | 
20 | FILE(GLOB model_instance_files "cws_rnn_input1_instance.*")
21 | 
22 | set(lib_name "rnn_input1_model")
23 | 
24 | ADD_LIBRARY(${lib_name}
25 |            STATIC
26 |            ${dep_token_module}
27 |            ${dep_structure_param_module}
28 |            ${dep_nn_general_module}
29 |            ${dep_nn_experiment_module}
30 |            ${dep_eval}
31 |            ${dep_stat}
32 |            ${dep_io}
33 |            ${dep_others}
34 |            ${dep_nn_4compatibility}
35 |            ${model_template}
36 |            ${model_instance_files}
37 | )
38 | TARGET_LINK_LIBRARIES(${lib_name} layers) 
39 | 
40 | foreach(exe_name "cws_rnn_input1_unigram" "cws_rnn_input1_bigram")
41 |     add_executable(${exe_name}
42 |                    "${exe_name}.cpp"
43 |                    ${dep_modelhandler}
44 |                   )
45 | 
46 | 
47 |     if (WITH_CUDA_BACKEND)
48 |         target_link_libraries(${exe_name} gdynet ${Boost_LIBRARIES} trivial layers ${lib_name})
49 |         add_dependencies(${exe_name} dynetcuda)
50 |         target_link_libraries(${exe_name} dynetcuda)
51 |         CUDA_ADD_CUBLAS_TO_TARGET(${exe_name})
52 |       else()
53 |         target_link_libraries(${exe_name} dynet ${Boost_LIBRARIES} trivial layers ${lib_name})
54 |     endif (WITH_CUDA_BACKEND)
55 | 
56 |                           
57 |     SET_PROPERTY(TARGET ${lib_name} PROPERTY FOLDER "segmenter/rnn_input1/lib")
58 |     SET_PROPERTY(TARGET ${exe_name} PROPERTY FOLDER "segmenter/rnn_input1")
59 | endforeach()


--------------------------------------------------------------------------------
/src/segmenter/cws_rnn_input1/cws_rnn_input1_instance.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_rnn_input1_instance.h"
 2 | namespace slnn{
 3 | namespace segmenter{
 4 | namespace rnn_input1{
 5 | 
 6 | template class SegmenterRnnInput1Template<
 7 |     token_module::TokenSegmenterInput1Unigram,
 8 |     structure_param_module::SegmenterRnnInput1Param,
 9 |     nn_module::NnSegmenterRnnInput1Abstract>;
10 | 
11 | template class SegmenterRnnInput1Template<
12 |     token_module::TokenSegmenterInput1Bigram,
13 |     structure_param_module::SegmenterRnnInput1Param,
14 |     nn_module::NnSegmenterRnnInput1Abstract>;
15 | 
16 | } // enf of namespace rnn-input1
17 | } // enf of namespace segmenter
18 | } // end 


--------------------------------------------------------------------------------
/src/segmenter/cws_rnn_input1/cws_rnn_input1_instance.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_MLP_INPUT1_CWS_RNN_INPUT1_INSTANCE_H_
 2 | #define SLNN_SEGMENTER_CWS_MLP_INPUT1_CWS_RNN_INPUT1_INSTANCE_H_
 3 | #include "segmenter/cws_rnn_input1/cws_rnn_input1_template.h"
 4 | #include "segmenter/cws_module/token_module/input1/token_input1_unigram.h"
 5 | #include "segmenter/cws_module/token_module/input1/token_input1_bigram.h"
 6 | #include "segmenter/cws_module/structure_param_module/rnn_input1_param.h"
 7 | #include "segmenter/cws_module/nn_module/rnn_input1/nn_cws_rnn_input1_abstract.h"
 8 | namespace slnn{
 9 | namespace segmenter{
10 | namespace rnn_input1{
11 | 
12 | extern template class SegmenterRnnInput1Template<
13 |     token_module::TokenSegmenterInput1Unigram,
14 |     structure_param_module::SegmenterRnnInput1Param,
15 |     nn_module::NnSegmenterRnnInput1Abstract>;
16 | 
17 | using RnnInput1Unigram = SegmenterRnnInput1Template<
18 |     token_module::TokenSegmenterInput1Unigram,
19 |     structure_param_module::SegmenterRnnInput1Param,
20 |     nn_module::NnSegmenterRnnInput1Abstract>;
21 | 
22 | extern template class SegmenterRnnInput1Template<
23 |     token_module::TokenSegmenterInput1Bigram,
24 |     structure_param_module::SegmenterRnnInput1Param,
25 |     nn_module::NnSegmenterRnnInput1Abstract>;
26 | 
27 | using RnnInput1Bigram = SegmenterRnnInput1Template<
28 |     token_module::TokenSegmenterInput1Bigram,
29 |     structure_param_module::SegmenterRnnInput1Param,
30 |     nn_module::NnSegmenterRnnInput1Abstract > ;
31 | 
32 | } // enf of namespace rnn-input1
33 | } // enf of namespace segmenter
34 | } // end of namespace slnn
35 | 
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_bigram_classification/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_single_bigram_classification
 3 | )
 4 | 
 5 | set(base_name
 6 |     cws_single_classification
 7 | )
 8 | 
 9 | set(specific_headers
10 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.h
11 | )
12 | set(specific_libs
13 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.cpp
14 | )
15 | 
16 | add_executable(${exe_name}
17 |                ${exe_name}.cpp
18 |                ${single_input_bigram_modelhandler_headers} # model handler
19 |                ${specific_headers} # model
20 |                ${single_input_model_headers}        # base model
21 |                ${cws_common_headers}                # common header
22 |                ${specific_libs}
23 |                ${single_input_model_libs} 
24 |                ${cws_common_libs}
25 |                )
26 |                
27 | target_link_libraries(${exe_name}
28 |                       dynet
29 |                       ${Boost_LIBRARIES})
30 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_bigram_crf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_single_bigram_crf
 3 | )
 4 | 
 5 | set(base_name
 6 |      cws_single_crf
 7 | )
 8 | 
 9 | set(specific_headers
10 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.h
11 | )
12 | set(specific_libs
13 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.cpp
14 | )
15 | 
16 | add_executable(${exe_name}
17 |                ${exe_name}.cpp
18 |                ${single_input_bigram_modelhandler_headers} # model handler
19 |                ${specific_headers} # model
20 |                ${single_input_model_headers}        # base model
21 |                ${cws_common_headers}                # common header
22 |                ${specific_libs}
23 |                ${single_input_model_libs} 
24 |                ${cws_common_libs}
25 |                )
26 |                
27 | target_link_libraries(${exe_name}
28 |                       dynet
29 |                       ${Boost_LIBRARIES})
30 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_bigram_pretag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_single_bigram_pretag
 3 | )
 4 | 
 5 | set(base_name
 6 |     cws_single_pretag
 7 | )
 8 | set(specific_headers
 9 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.h
10 | )
11 | set(specific_libs
12 |     ${segmenter_base_dir}/${base_name}/${base_name}_model.cpp
13 | )
14 | 
15 | 
16 | add_executable(${exe_name}
17 |                ${exe_name}.cpp
18 |                ${single_input_bigram_modelhandler_headers} # model handler
19 |                ${specific_headers} # model
20 |                ${single_input_model_headers}        # base model
21 |                ${cws_common_headers}                # common header
22 |                ${specific_libs}
23 |                ${single_input_model_libs} 
24 |                ${cws_common_libs}
25 |                )
26 |                
27 | target_link_libraries(${exe_name}
28 |                       dynet
29 |                       ${Boost_LIBRARIES})
30 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_classification/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(cws_single_classification_headers
 2 |     cws_single_classification_model.h
 3 | )
 4 | set(cws_single_classification_libs
 5 |     cws_single_classification_model.cpp
 6 | )
 7 | 
 8 | add_executable(cws_single_classification 
 9 |                cws_single_classification.cpp
10 |                ${single_input_modelhandler_headers} # model handler
11 |                ${cws_single_classification_headers} # model
12 |                ${single_input_model_headers}        # base model
13 |                ${cws_common_headers}                # common header
14 |                ${cws_single_classification_libs}
15 |                ${single_input_model_libs} 
16 |                ${cws_common_libs}
17 |                )
18 |                
19 | target_link_libraries(cws_single_classification 
20 |                       dynet
21 |                       ${Boost_LIBRARIES})
22 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_classification/cws_single_classification_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_single_classification_model.h"
 2 | #include "segmenter/cws_module/cws_output_layer.h"
 3 | 
 4 | namespace slnn{
 5 | 
 6 | CWSSingleClassificationModel::CWSSingleClassificationModel()
 7 |     :SingleInputModel() ,
 8 |     word_dict(input_dict) ,
 9 |     tag_dict(output_dict)
10 | {}
11 | 
12 | CWSSingleClassificationModel::~CWSSingleClassificationModel(){}
13 | 
14 | void CWSSingleClassificationModel::set_model_param(const boost::program_options::variables_map &var_map)
15 | {
16 |     CWSSingleClassificationModel::SingleInputModel::set_model_param(var_map);
17 | }
18 | void CWSSingleClassificationModel::build_model_structure()
19 | {
20 |     tag_sys.build(tag_dict) ; // init B_ID , M_ID and so on 
21 |     m = new dynet::Model() ;
22 |     input_layer = new Input1(m, word_dict_size, word_embedding_dim) ;
23 |     bilstm_layer = new BILSTMLayer(m, lstm_nr_stacked_layer, word_embedding_dim, lstm_h_dim, dropout_rate) ;
24 |     output_layer = new CWSSimpleOutput(m, lstm_h_dim, lstm_h_dim, hidden_dim, output_dim , tag_sys) ;
25 | }
26 | 
27 | void CWSSingleClassificationModel::print_model_info()
28 | {
29 |     BOOST_LOG_TRIVIAL(info) << "---------------- Single Input Classification Model -----------------\n"
30 |         << "vocabulary size : " << word_dict_size << " with dimension : " << word_embedding_dim << "\n"
31 |         << "bi-lstm x dim : " << word_embedding_dim << " , h dim : " << lstm_h_dim
32 |         << " , stacked layer num : " << lstm_nr_stacked_layer << "\n"
33 |         << "tag hidden layer dim : " << hidden_dim << "\n"
34 |         << "dropout rate : " << dropout_rate << "\n"
35 |         << "output dim : " << output_dim ;
36 | }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_classification/cws_single_classification_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_SINGLE_INPUT_CLASSIFICATION_HPP_
 2 | #define SLNN_SEGMENTER_CWS_SINGLE_INPUT_CLASSIFICATION_HPP_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | #include <boost/serialization/base_object.hpp>
 8 | #include <boost/serialization/utility.hpp>
 9 | #include <boost/serialization/split_member.hpp>
10 | 
11 | #include "dynet/dynet.h"
12 | 
13 | #include "segmenter/base_model/single_input_model.h"
14 | #include "segmenter/cws_module/cws_tagging_system.h"
15 | namespace slnn{
16 | 
17 | class CWSSingleClassificationModel : public SingleInputModel
18 | {
19 |     friend class boost::serialization::access;
20 | public:
21 |     
22 |     dynet::Dict &word_dict ;
23 |     dynet::Dict &tag_dict ;
24 | 
25 |     CWSSingleClassificationModel() ;
26 |     ~CWSSingleClassificationModel() ;
27 | 
28 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
29 |     void build_model_structure() ;
30 |     void print_model_info() ;
31 | 
32 |     void save_model(std::ostream &os) ;
33 |     void load_model(std::istream &is) ;
34 | 
35 |     template<typename Archive>
36 |     void serialize(Archive &ar, const unsigned version);
37 | };
38 | 
39 | template<typename Archive>
40 | void CWSSingleClassificationModel::serialize(Archive &ar, const unsigned version)
41 | {
42 |     ar & boost::serialization::base_object<SingleInputModel>(*this);
43 | }
44 | 
45 | } // end of namespace slnn 
46 | #endif 
47 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_crf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |      cws_single_crf
 3 | )
 4 | 
 5 | set(specific_headers
 6 |     ${exe_name}_model.h
 7 | )
 8 | set(specific_libs
 9 |     ${exe_name}_model.cpp
10 | )
11 | 
12 | 
13 | 
14 | add_executable(${exe_name}
15 |                ${exe_name}.cpp
16 |                ${single_input_modelhandler_headers} # model handler
17 |                ${specific_headers} # model
18 |                ${single_input_model_headers}        # base model
19 |                ${cws_common_headers}                # common header
20 |                ${specific_libs}
21 |                ${single_input_model_libs} 
22 |                ${cws_common_libs}
23 |                )
24 |                
25 | target_link_libraries(${exe_name}
26 |                       dynet
27 |                       ${Boost_LIBRARIES})
28 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_crf/cws_single_crf_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_single_crf_model.h"
 2 | #include "segmenter/cws_module/cws_output_layer.h"
 3 | 
 4 | namespace slnn{
 5 | 
 6 | CWSSingleCRFModel::CWSSingleCRFModel()
 7 |     :SingleInputModel() ,
 8 |     word_dict(input_dict) ,
 9 |     tag_dict(output_dict)
10 | {}
11 | 
12 | CWSSingleCRFModel::~CWSSingleCRFModel(){}
13 | 
14 | void CWSSingleCRFModel::set_model_param(const boost::program_options::variables_map &var_map)
15 | {
16 |     tag_embedding_dim = var_map["tag_embedding_dim"].as<unsigned>() ;
17 |     CWSSingleCRFModel::SingleInputModel::set_model_param(var_map);
18 | }
19 | void CWSSingleCRFModel::build_model_structure()
20 | {
21 |     tag_sys.build(tag_dict) ; // init B_ID , M_ID and so on 
22 |     m = new dynet::Model() ;
23 |     input_layer = new Input1(m, word_dict_size, word_embedding_dim) ;
24 |     bilstm_layer = new BILSTMLayer(m, lstm_nr_stacked_layer, word_embedding_dim, lstm_h_dim, dropout_rate) ;
25 |     output_layer = new CWSCRFOutput(m, tag_embedding_dim, lstm_h_dim, lstm_h_dim, hidden_dim, output_dim, dropout_rate, tag_sys) ; 
26 | }
27 | 
28 | void CWSSingleCRFModel::print_model_info()
29 | {
30 |     BOOST_LOG_TRIVIAL(info) << "---------------- Single Input CRF Model -----------------\n"
31 |         << "vocabulary size : " << word_dict_size << " with dimension : " << word_embedding_dim << "\n"
32 |         << "tag dict size : " << output_dim << " with dimension : " << tag_embedding_dim << "\n"
33 |         << "bi-lstm x dim : " << word_embedding_dim << " , h dim : " << lstm_h_dim
34 |         << " , stacked layer num : " << lstm_nr_stacked_layer << "\n"
35 |         << "dropout rate : " << dropout_rate << "\n"
36 |         << "tag hidden layer dim : " << hidden_dim << "\n"
37 |         << "output dim : " << output_dim ;
38 | }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_crf/cws_single_crf_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_SINGLE_CRF_MODEL_H_
 2 | #define SLNN_SEGMENTER_CWS_SINGLE_CRF_MODEL_H_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | 
 8 | #include "dynet/dynet.h"
 9 | 
10 | #include "segmenter/base_model/single_input_model.h"
11 | #include "segmenter/cws_module/cws_tagging_system.h"
12 | namespace slnn{
13 | 
14 | class CWSSingleCRFModel : public SingleInputModel
15 | {
16 |     friend class boost::serialization::access;
17 | public:
18 |     unsigned tag_embedding_dim ;
19 | 
20 |     dynet::Dict &word_dict ;
21 |     dynet::Dict &tag_dict ;
22 | 
23 |     CWSSingleCRFModel() ;
24 |     ~CWSSingleCRFModel() ;
25 | 
26 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
27 |     void build_model_structure() ;
28 |     void print_model_info() ;
29 | 
30 |     template<typename Archive>
31 |     void serialize(Archive &ar, const unsigned version);
32 | };
33 | 
34 | template<typename Archive>
35 | void CWSSingleCRFModel::serialize(Archive &ar, const unsigned version)
36 | {
37 |     ar & tag_embedding_dim;
38 |     ar & boost::serialization::base_object<SingleInputModel>(*this);
39 | }
40 | 
41 | } // end of namespace slnn 
42 | #endif 
43 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_pretag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(exe_name
 2 |     cws_single_pretag
 3 | )
 4 | set(specific_headers
 5 |     ${exe_name}_model.h
 6 | )
 7 | set(specific_libs
 8 |     ${exe_name}_model.cpp
 9 | )
10 | 
11 | 
12 | add_executable(${exe_name}
13 |                ${exe_name}.cpp
14 |                ${single_input_modelhandler_headers} # model handler
15 |                ${specific_headers} # model
16 |                ${single_input_model_headers}        # base model
17 |                ${cws_common_headers}                # common header
18 |                ${specific_libs}
19 |                ${single_input_model_libs} 
20 |                ${cws_common_libs}
21 |                )
22 |                
23 | target_link_libraries(${exe_name}
24 |                       dynet
25 |                       ${Boost_LIBRARIES})
26 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_pretag/cws_single_pretag_model.cpp:
--------------------------------------------------------------------------------
 1 | #include "cws_single_pretag_model.h"
 2 | #include "segmenter/cws_module/cws_output_layer.h"
 3 | 
 4 | namespace slnn{
 5 | 
 6 | CWSSinglePretagModel::CWSSinglePretagModel()
 7 |     :SingleInputModel() ,
 8 |     word_dict(input_dict) ,
 9 |     tag_dict(output_dict)
10 | {}
11 | 
12 | CWSSinglePretagModel::~CWSSinglePretagModel(){}
13 | 
14 | void CWSSinglePretagModel::set_model_param(const boost::program_options::variables_map &var_map)
15 | {
16 |     tag_embedding_dim = var_map["tag_embedding_dim"].as<unsigned>() ;
17 |     CWSSinglePretagModel::SingleInputModel::set_model_param(var_map);
18 | }
19 | void CWSSinglePretagModel::build_model_structure()
20 | {
21 |     tag_sys.build(tag_dict) ; // init B_ID , M_ID and so on 
22 |     m = new dynet::Model() ;
23 |     input_layer = new Input1(m, word_dict_size, word_embedding_dim) ;
24 |     bilstm_layer = new BILSTMLayer(m, lstm_nr_stacked_layer, word_embedding_dim, lstm_h_dim, dropout_rate) ;
25 |     output_layer = new CWSPretagOutput(m, tag_embedding_dim, lstm_h_dim, lstm_h_dim, hidden_dim, output_dim, tag_sys) ; 
26 | }
27 | 
28 | void CWSSinglePretagModel::print_model_info()
29 | {
30 |     BOOST_LOG_TRIVIAL(info) << "---------------- Single Input Pretag Model -----------------\n"
31 |         << "vocabulary size : " << word_dict_size << " with dimension : " << word_embedding_dim << "\n"
32 |         << "tag dict size : " << output_dim << " with dimension : " << tag_embedding_dim << "\n"
33 |         << "bi-lstm x dim : " << word_embedding_dim << " , h dim : " << lstm_h_dim
34 |         << " , stacked layer num : " << lstm_nr_stacked_layer << "\n"
35 |         << "dropout rate : " << dropout_rate << "\n"
36 |         << "tag hidden layer dim : " << hidden_dim << "\n"
37 |         << "output dim : " << output_dim ;
38 | }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/segmenter/cws_single_pretag/cws_single_pretag_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_SEGMENTER_CWS_SINGLE_INPUT_PRETAG_H_
 2 | #define SLNN_SEGMENTER_CWS_SINGLE_INPUT_PRETAG_H_
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #include <boost/archive/text_iarchive.hpp>
 6 | #include <boost/archive/text_oarchive.hpp>
 7 | 
 8 | #include "dynet/dynet.h"
 9 | 
10 | #include "segmenter/base_model/single_input_model.h"
11 | #include "segmenter/cws_module/cws_tagging_system.h"
12 | namespace slnn{
13 | 
14 | class CWSSinglePretagModel : public SingleInputModel
15 | {
16 |     friend class boost::serialization::access;
17 | public:
18 |     unsigned tag_embedding_dim ;
19 |     
20 |     dynet::Dict &word_dict ;
21 |     dynet::Dict &tag_dict ;
22 | 
23 |     CWSSinglePretagModel() ;
24 |     ~CWSSinglePretagModel() ;
25 | 
26 |     void set_model_param(const boost::program_options::variables_map &var_map) ;
27 |     void build_model_structure() ;
28 |     void print_model_info() ;
29 | 
30 |     template<typename Archive>
31 |     void serialize(Archive &ar, const unsigned version);
32 | };
33 | 
34 | template<typename Archive>
35 | void CWSSinglePretagModel::serialize(Archive &ar, const unsigned version)
36 | {
37 |     ar & tag_embedding_dim;
38 |     ar & boost::serialization::base_object<SingleInputModel>(*this);
39 | }
40 | 
41 | } // end of namespace slnn 
42 | #endif 
43 | 


--------------------------------------------------------------------------------
/src/segmenter/model_handler/input2_modelhandler.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/model_handler/input2_modelhandler.cpp


--------------------------------------------------------------------------------
/src/segmenter/model_handler/single_input_modelhandler.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/segmenter/model_handler/single_input_modelhandler.cpp


--------------------------------------------------------------------------------
/src/segmenter/unit_test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | add_executable(cws_evaluation_test 
3 |                cws_evaluation.cpp
4 |                ${cws_common_headers} 
5 |                ${cws_common_libs}
6 |                )
7 | target_link_libraries(cws_evaluation_test 
8 |                       dynet
9 |                       ${Boost_LIBRARIES})


--------------------------------------------------------------------------------
/src/trivial/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | FILE(GLOB lookup_table_headers "lookup_table/*.h*")
 2 | FILE(GLOB lookup_table_srcs "lookup_table/*.cpp")
 3 | 
 4 | FILE(GLOB charcode_headers "charcode/*.h*")
 5 | FILE(GLOB charcode_srcs "charcode/*.cpp")
 6 | 
 7 | SET(trivial_headers ${lookup_table_headers}
 8 |                     ${charcode_headers})
 9 | SET(trivial_srcs ${lookup_table_srcs}
10 |                  ${charcode_srcs})                    
11 | 
12 |              
13 | ADD_LIBRARY(trivial STATIC ${trivial_headers}
14 |                            ${trivial_srcs})
15 | 
16 | 
17 | # GROUP files (For MSVS)
18 | SOURCE_GROUP("lookup_table" FILES ${lookup_table_headers}
19 |                                   ${lookup_table_srcs})
20 | 
21 | SOURCE_GROUP("charcode" FILES ${charcode_headers}
22 |                               ${charcode_srcs}) 
23 | 
24 | SET_PROPERTY(TARGET trivial PROPERTY FOLDER "libraries")                                


--------------------------------------------------------------------------------
/src/trivial/charcode/charcode_base.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_TRIVIAL_CHARCODE_CHARCODE_BASE_HPP_
 2 | #define SLNN_TRIVIAL_CHARCODE_CHARCODE_BASE_HPP_
 3 | #include <string>
 4 | 
 5 | namespace slnn{
 6 | namespace charcode{
 7 | namespace base{
 8 | 
 9 | enum class EncodingType
10 | {
11 |     UTF8 = 1,
12 |     GB18030 = 2,
13 |     UNSUPPORT = 9999
14 | };
15 | 
16 | using uint8_t = unsigned char;
17 | constexpr int UTF8MaxByteSize = 4;
18 | // octec stands for 8 bits (a byte)
19 | 
20 | template <typename octet_type>
21 | inline  
22 | uint8_t mask8(octet_type c)
23 | {
24 |     return static_cast<uint8_t>(c); 
25 | }
26 | 
27 | /*****************************
28 |  * utilities 
29 |  *****************************/
30 | 
31 | inline
32 | std::string encoding_type2str(EncodingType encoding_type)
33 | {
34 |     switch( encoding_type )
35 |     {
36 |     case EncodingType::UTF8 :
37 |         return "UTF8";
38 |     case EncodingType::GB18030:
39 |         return "GB18030";
40 |     default:
41 |         return "unsupport(value=" + std::to_string(static_cast<int>(encoding_type)) + ")";
42 |     }
43 | }
44 | 
45 | inline
46 | EncodingType str2encoding_type(const std::string &str)
47 | {
48 |     std::string upper_name(str);
49 |     for( char &c : upper_name ){ c = ::toupper(c); }
50 |     if( upper_name == "UTF8" ){ return EncodingType::UTF8; }
51 |     else if( upper_name == "GB18030" || upper_name == "GB2312" ){ return EncodingType::GB18030; }
52 |     else{ return EncodingType::UNSUPPORT; }
53 | }
54 | 
55 | 
56 | } // end of namespace base
57 | using base::EncodingType;
58 | } // end of namespace charcode
59 | } // end of namespace slnn
60 | 
61 | 
62 | 
63 | #endif


--------------------------------------------------------------------------------
/src/trivial/charcode/naive_unicode.cpp:
--------------------------------------------------------------------------------
 1 | #include "naive_unicode.h"
 2 | #include <iostream>
 3 | namespace slnn{
 4 | namespace charcode{
 5 | namespace NUnicode{
 6 | 
 7 | std::u32string decode_from_u8_bytes_unsafe(const std::string &u8_bytes) noexcept
 8 | {
 9 |     int length = u8_bytes.length();
10 |     char32_t *unicode_buf = new char32_t[length+1];
11 |     int u8_offset = 0,
12 |         unicode_offset = 0;
13 |     while( u8_offset < length )
14 |     {
15 |         char32_t code_point = next_unicode_from_u8_bytes_unsafe(u8_bytes, u8_offset, length);// auto incease u8_offset
16 |         if( code_point != UnicodeErrorValue )
17 |         {
18 |             unicode_buf[unicode_offset++] = code_point;
19 |         }
20 |         else
21 |         {
22 |             std::cerr << "At UTF8 string : \n"
23 |                 << u8_bytes << "\n"
24 |                 << "utf8 posistion: " << u8_offset - 1 << ", unicode position: " << unicode_offset << "\n";
25 |             // don't inceasing unicode offset
26 |         }
27 |     }
28 |     unicode_buf[unicode_offset] = '\0';
29 |     std::u32string ret_str(unicode_buf);
30 |     delete[] unicode_buf;
31 |     return ret_str;
32 | }
33 | 
34 | std::string encode2u8_bytes_unsafe(const std::u32string &unicode_str) noexcept
35 | {
36 |     int length = unicode_str.length();
37 |     char *u8buf = new char[length * UTF8MaxByteSize + 1];
38 |     int u8_offset = 0;
39 |     for( auto uchar : unicode_str )
40 |     {
41 |         next_u8_unit_from_unicode_unsafe(uchar, u8buf, u8_offset);
42 |     }
43 |     u8buf[u8_offset] = '\0';
44 |     std::string ret_str(u8buf);
45 |     delete[] u8buf;
46 |     return ret_str;
47 | }
48 | 
49 | } // end of namespace NUnicode
50 | } // end of namespace charcode
51 | } // end of namespace slnn


--------------------------------------------------------------------------------
/src/trivial/charcode/naive_utf8.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/trivial/charcode/naive_utf8.cpp


--------------------------------------------------------------------------------
/src/trivial/charcode/naive_utf8.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fseasy/sequence-labeling-by-nn/6436f3b93dda8f0a97631408f4a8db8920bd3cfc/src/trivial/charcode/naive_utf8.h


--------------------------------------------------------------------------------
/src/trivial/logger/naive_logger.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_TRIVIAL_LOGGER_NAIVE_LOGGER_H_
 2 | #define SLNN_TRIVIAL_LOGGER_NAIVE_LOGGER_H_
 3 | 
 4 | namespace slnn{
 5 | namespace trivial{
 6 | namespace logger{
 7 | 
 8 | 
 9 | 
10 | 
11 | } // end of namespce logger
12 | } // end of namespace trivial
13 | } // end of namespace slnn
14 | 
15 | 
16 | 
17 | #endif


--------------------------------------------------------------------------------
/src/trivial/lookup_table/lookup_table.cpp:
--------------------------------------------------------------------------------
 1 | #include "lookup_table.h"
 2 | using namespace std;
 3 | 
 4 | namespace slnn{
 5 | namespace trivial{
 6 | namespace lookup_table{
 7 | 
 8 | // do template instantiation.
 9 | template class LookupTable<char32_t>;
10 | template class LookupTable<u32string>;
11 | template class LookupTable<string>;
12 | 
13 | template class LookupTableWithCnt<char32_t>;
14 | template class LookupTableWithCnt<u32string>;
15 | template class LookupTableWithCnt<string>;
16 | 
17 | template class LookupTableWithReplace<char32_t>;
18 | template class LookupTableWithReplace<u32string>;
19 | template class LookupTableWithReplace<string>;
20 | 
21 | 
22 | } // end of namespace lookup_table
23 | } // end of namespace trivial
24 | } // end of namespace slnn
25 | 


--------------------------------------------------------------------------------
/src/unittest/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ADD_SUBDIRECTORY(test_charcode)
 2 | ADD_SUBDIRECTORY(test_lookup_table)
 3 | ADD_SUBDIRECTORY(test_cwstag)
 4 | 
 5 | FILE(GLOB test_lookup_table_srcs "test_lookup_table/*.cpp")   
 6 | FILE(GLOB test_charcode_srcs "test_charcode/*.cpp")                  
 7 | FILE(GLOB test_cwstag_srcs "test_cwstag/*.cpp")
 8 | 
 9 | 
10 | SOURCE_GROUP("unittest\\test_lookup_table" FILES ${test_lookup_table_srcs})
11 | 
12 | SOURCE_GROUP("unittest\\test_charcode" FILES ${test_charcode_srcs})
13 | 
14 | SOURCE_GROUP("unittest\\test_charcode" FILES ${test_cwstag_srcs})


--------------------------------------------------------------------------------
/src/unittest/test_charcode/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ADD_EXECUTABLE(test_charcode
 2 |                test_charcode.cpp
 3 |                ${charcode_headers}
 4 |                ${unittest_framework_include})
 5 |                
 6 | TARGET_LINK_LIBRARIES(test_charcode
 7 |                       trivial
 8 |                       ${Boost_LIRARIES})
 9 | 
10 | SET_PROPERTY(TARGET test_charcode PROPERTY FOLDER "unittest")                 
11 | 


--------------------------------------------------------------------------------
/src/unittest/test_cwstag/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | SET(cws_module_dir "${source_dir}/segmenter/cws_module")
 3 | 
 4 | FILE(GLOB dep_files 
 5 |           ${cws_module_dir}/cws_eval.*
 6 |           ${cws_module_dir}/token_module/cws_tag_definition.*)
 7 | 
 8 | ADD_EXECUTABLE( test_cwstag
 9 |                 test_cwstag.cpp
10 |                 ${dep_files}
11 |                 ${unittest_framework_include})
12 |                 
13 | SET_PROPERTY(TARGET test_cwstag PROPERTY FOLDER "unittest")


--------------------------------------------------------------------------------
/src/unittest/test_lookup_table/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ADD_EXECUTABLE(test_lookup_table
 2 |                test_lookup_table.cpp 
 3 |                ${unittest_framework_include}
 4 |                ${lookup_table_headers}
 5 |                )
 6 | 
 7 | TARGET_LINK_LIBRARIES(test_lookup_table 
 8 |                       trivial
 9 |                       ${Boost_LIBRARIES})
10 | 
11 | SET_PROPERTY(TARGET test_lookup_table PROPERTY FOLDER "unittest")                          
12 | 


--------------------------------------------------------------------------------
/src/utils/dict_wrapper.hpp:
--------------------------------------------------------------------------------
 1 | ﻿#ifndef DICT_WRAPPER_HPP_INCLUDED_
 2 | #define DICT_WRAPPER_HPP_INCLUDED_
 3 | 
 4 | #include "typedeclaration.h"
 5 | #include "dynet/dict.h"
 6 | 
 7 | #include <functional>
 8 | #include <algorithm>
 9 | #include <vector>
10 | 
11 | /**************************
12 |  * DictWrapper
13 |  * for record words frequency . 
14 |  */
15 | namespace slnn {
16 |     struct DictWrapper
17 |     {
18 |         DictWrapper(dynet::Dict &d) : rd(d), freq_threshold(1), prob_threshold(0.2f), prob_rand(std::bind(std::uniform_real_distribution<float>(0, 1), *(dynet::rndeng) ))
19 |         {
20 |             freq_records.reserve(0xFFFF); // 60K space
21 |         }
22 | 
23 |         inline int convert(const std::string& word)
24 |         {
25 |             Index word_idx = rd.convert(word);
26 |             if (!rd.is_frozen())
27 |             {
28 |                 if (static_cast<unsigned>(word_idx) + 1U > freq_records.size())
29 |                 {
30 |                     // new words has been pushed to the dict !
31 |                     freq_records.push_back(1); // add word frequency record
32 |                 }
33 |                 else ++freq_records[word_idx]; // update word_frequency record
34 |             }
35 |             return word_idx;
36 |         }
37 |         void set_unk(const std::string& word)
38 |         {
39 |             rd.set_unk(word);
40 |             UNK = rd.convert(word);
41 |         }
42 |         void freeze() { rd.freeze(); }
43 |         bool is_frozen() { return rd.is_frozen(); }
44 |         int unk_replace_probability(Index word_idx)
45 |         {
46 |             if (word_idx == UNK) return UNK; // UNK is not in freq_records
47 |             assert(static_cast<unsigned>(word_idx) < freq_records.size());
48 |             if (freq_records[word_idx] <= freq_threshold && prob_rand() <= prob_threshold) return UNK;
49 |             return word_idx;
50 |         }
51 |         void set_threshold(int freq_threshold, float prob_threshold)
52 |         {
53 |             this->freq_threshold = freq_threshold;
54 |             this->prob_threshold = prob_threshold;
55 |         }
56 |         dynet::Dict &rd;
57 |         std::vector<int> freq_records;
58 |         Index UNK;
59 |         int freq_threshold;
60 |         float prob_threshold;
61 |         std::function<float()> prob_rand;
62 |     };
63 | } // End of namespace slnn
64 | 
65 | #endif


--------------------------------------------------------------------------------
/src/utils/nn_utility.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <boost/algorithm/string/trim.hpp>
 3 | #include <boost/algorithm/string/split.hpp>
 4 | #include <boost/algorithm/string/classification.hpp>
 5 | #include "nn_utility.h"
 6 | 
 7 | namespace slnn{
 8 | namespace utils{
 9 | 
10 | auto get_nonlinear_function_from_name(const std::string &name) -> dynet::expr::Expression(*)(const dynet::expr::Expression &)
11 | {
12 |     std::string lower_name(name);
13 |     for( char &c : lower_name ){ c = ::tolower(c); }
14 |     if( lower_name == "relu" || lower_name == "rectify" ){ return &dynet::expr::rectify; }
15 |     else if( lower_name == "sigmoid" || lower_name == "softmax" ){ return &dynet::expr::softmax; } // a bit strange...
16 |     else if( lower_name == "tanh" ){ return &dynet::expr::tanh; }
17 |     else
18 |     {
19 |         std::ostringstream oss;
20 |         oss << "not supported non-linear funtion: " << name << "\n"  
21 |             <<"Exit!\n";
22 |         throw std::invalid_argument(oss.str());
23 |     }
24 | }
25 | 
26 | std::vector<unsigned> parse_mlp_hidden_dim_list(const std::string& hidden_dim_list_str)
27 | {
28 |     std::string dim_list_str_copy = hidden_dim_list_str;
29 |     boost::trim_if(dim_list_str_copy, boost::is_any_of("\", ")); 
30 |     std::vector<std::string> dim_str_container;
31 |     boost::split(dim_str_container, dim_list_str_copy, boost::is_any_of(", ")); // split by space or comma
32 |     std::vector<unsigned> dim_list(dim_str_container.size());
33 |     try
34 |     {
35 |         std::transform(dim_str_container.begin(), dim_str_container.end(), dim_list.begin(),
36 |             [](const std::string &num_str){ return std::stoul(num_str); });
37 |     }
38 |     catch( std::invalid_argument &e )
39 |     {
40 |         std::cerr << "bad argument for 'mlp_hidden_dim_list' : " << hidden_dim_list_str << "\n";
41 |         throw e ;
42 |     }
43 |     return dim_list;
44 | }
45 | 
46 | } // end of namespace utils
47 | } // end of namespace slnn
48 | 


--------------------------------------------------------------------------------
/src/utils/nn_utility.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_UTILS_NN_UTILITY_H_
 2 | #define SLNN_UTILS_NN_UTILITY_H_
 3 | #include <vector>
 4 | #include <string>
 5 | #include "dynet/expr.h"
 6 | namespace slnn{
 7 | namespace utils{
 8 | 
 9 | auto get_nonlinear_function_from_name(const std::string &name) -> dynet::expr::Expression(*)(const dynet::expr::Expression &);
10 | 
11 | std::vector<unsigned> parse_mlp_hidden_dim_list(const std::string& hidden_dim_list_str);
12 | 
13 | } // end of namespace utils
14 | } // end of namespace slnn
15 | 
16 | 
17 | #endif


--------------------------------------------------------------------------------
/src/utils/reader.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_READER_HPP_
 2 | #define UTILS_READER_HPP_
 3 | 
 4 | #include <iostream>
 5 | #include <algorithm>
 6 | namespace slnn{
 7 | 
 8 | struct Reader
 9 | {
10 |     Reader(std::istream &is);
11 |     bool good();
12 |     size_t count_line();
13 | 
14 |     std::istream &is;
15 | };
16 | 
17 | inline
18 | Reader::Reader(std::istream &is)
19 |     :is(is)
20 | {}
21 | 
22 | inline
23 | bool Reader::good()
24 | {
25 |     return is.good();
26 | }
27 | 
28 | inline
29 | size_t Reader::count_line()
30 | {
31 |     // skip when bad
32 |     if( is.bad() ) return 0; 
33 |     // save state
34 |     std::istream::iostate state_backup = is.rdstate();
35 |     // clear state
36 |     is.clear();
37 |     std::istream::streampos pos_backup = is.tellg();
38 | 
39 |     is.seekg(0);
40 |     size_t line_cnt;
41 |     size_t lf_cnt = std::count(std::istreambuf_iterator<char>(is), std::istreambuf_iterator<char>(), '\n');
42 |     line_cnt = lf_cnt;
43 |     // if the file is not end with '\n' , then line_cnt should plus 1  
44 |     is.unget();
45 |     if( is.get() != '\n' ) { ++line_cnt ; }
46 | 
47 |     // recover state
48 |     is.clear() ; // previous reading may set eofbit
49 |     is.seekg(pos_backup);
50 |     is.setstate(state_backup);
51 | 
52 |     return line_cnt;
53 | }
54 | 
55 | } // end of namespace slnn
56 | #endif


--------------------------------------------------------------------------------
/src/utils/typedeclaration.h:
--------------------------------------------------------------------------------
 1 | #ifndef TYPEDECLARATION_H_INCLUDED_
 2 | #define TYPEDECLARATION_H_INCLUDED_
 3 | 
 4 | #include <vector>
 5 | #include <array>
 6 | #include <utility>
 7 | #include <functional>
 8 | #include "dynet/dynet.h"
 9 | 
10 | namespace slnn{
11 |     using Index = int; // dynet::Dict return `int` as index 
12 |     using IndexSeq = std::vector<Index>;
13 |     using InstancePair = std::pair<IndexSeq, IndexSeq>;
14 |     using Seq = std::vector<std::string>;
15 |     template <int sz>
16 |     using FeatureGroup = std::array<std::string, sz>;
17 |     template <int sz>
18 |     using FeatureGroupSeq = std::vector<FeatureGroup<sz>>;
19 |     template <int sz>
20 |     using FeatureIndexGroup = std::array<Index, sz>;
21 |     template <int sz>
22 |     using FeatureIndexGroupSeq = std::vector<FeatureIndexGroup<sz>>;
23 | 
24 |     template <int sz>
25 |     using FeaturesIndex = std::array<Index, sz>;
26 |     template <int sz>
27 |     using FeaturesIndexSeq = std::vector<std::array<Index,sz>>;
28 | 
29 |     using NonLinearFunc = dynet::expr::Expression(const dynet::expr::Expression &); // an function : [input] -> expression , [output]-> expression 
30 | 
31 | namespace type{
32 | 
33 | using real = float;
34 | 
35 | } // end of namespace type
36 | } // end of namespace slnn
37 | #endif


--------------------------------------------------------------------------------
/src/utils/word2vec_embedding_helper.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_WORD2VEC_EMBEDDING_HELPER_H_
 2 | #define UTILS_WORD2VEC_EMBEDDING_HELPER_H_
 3 | 
 4 | #include <fstream>
 5 | 
 6 | #include "dynet/dynet.h"
 7 | #include "dynet/dict.h"
 8 | 
 9 | namespace slnn{
10 | struct Word2vecEmbeddingHelper
11 | {
12 |     /* bulid_fixed_dict
13 |     *
14 |     * PARAMES
15 |     * -------
16 |     * is : [in] , ifstream .
17 |     *      reference to wordembedding file stream
18 |     * fixed_dict : [in], dynet::Dict
19 |     *              reference to fixed dict ,
20 |     * unk_str : [in] , string
21 |     *           to build UNK
22 |     * dict_size : [out] , pointer to unsigned [optional]
23 |     *             after build the dict , set the dict size(if given)
24 |     * embedding_dim : [out] , pointer to unsigned [optional]
25 |     *                 after build the dict , set the embedding dim (if given)
26 |     * RETURN
27 |     * ------
28 |     * void
29 |     */
30 |     static
31 |         void build_fixed_dict(std::ifstream &is, dynet::Dict &fixed_dict, const std::string &unk_str,
32 |             unsigned *p_dict_size = nullptr, unsigned *p_embedding_dim = nullptr);
33 | 
34 |     /* load_fixed_embedding
35 |     * PARAMES
36 |     * -------
37 |     * is : [in] , ifstream
38 |     *      wordembedding fstream
39 |     * fixed_dict : [in], dynet::Dict&
40 |     *      dict to map word 2 index
41 |     * fixed_word_dim : [in], unsigned
42 |            for check when loading word embedding
43 |     * fixed_lookup_param : [in], dynet::LookupParameter 
44 |     *      to store the word embedding
45 |     * RETURN
46 |     * ------
47 |     * void
48 |     */
49 |     static
50 |         void load_fixed_embedding(std::ifstream &is, dynet::Dict &fixed_dict, unsigned fixed_word_dim, dynet::LookupParameter fixed_lookup_param);
51 | 
52 |     static float calc_hit_rate(dynet::Dict &fixed_dict, dynet::Dict &dynamic_dict, const std::string &fixed_dict_unk_str);
53 | };
54 | 
55 | } // end of namespace slnn
56 | #endif 


--------------------------------------------------------------------------------
/src/utils/writer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLNN_UTILS_WRITER_H_
 2 | #define SLNN_UTILS_WRITER_H_
 3 | #include <iostream>
 4 | #include <string>
 5 | namespace slnn{
 6 | namespace utils{
 7 | /**
 8 |  * writer.
 9 |  * not thread safe.
10 |  */
11 | struct Writer
12 | {
13 |     // constructor
14 |     Writer(std::ostream &os) :os(os){}
15 |     // interface
16 |     void writeline(const std::string &line){ os << line << std::endl; }
17 |     // data
18 |     std::ostream &os;
19 | };
20 | 
21 | } // end of namespace utils
22 | } // end of namespace slnn
23 | 
24 | #endif


--------------------------------------------------------------------------------