├── .gitignore ├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── Makefile.in ├── NEWS ├── README.md ├── aclocal.m4 ├── autom4te.cache ├── output.0 ├── requests └── traces.0 ├── config.guess ├── config.h.in ├── config.sub ├── configure ├── configure.in ├── depcomp ├── doxygen └── html │ ├── _c_09_09.html │ ├── _dictionary_test_8h_source.html │ ├── _f_extor_8h_source.html │ ├── _f_extor_test_8h_source.html │ ├── _n_e_r_suite_test_8h_source.html │ ├── _sentence_tagger_test_8h_source.html │ ├── _test_util_8h_source.html │ ├── annotated.html │ ├── bc_s.png │ ├── cdbpp-1_81_2include_2cdbpp_8h_source.html │ ├── cdbpp_8h_source.html │ ├── class_c_r_f_suite_1_1_trainer2-members.html │ ├── class_c_r_f_suite_1_1_trainer2.html │ ├── class_m_e___model-members.html │ ├── class_m_e___model.html │ ├── class_m_p___c_o_r_r_e_c_t_o_r-members.html │ ├── class_m_p___c_o_r_r_e_c_t_o_r.html │ ├── class_n_e_r_1_1_dictionary-members.html │ ├── class_n_e_r_1_1_dictionary.html │ ├── class_n_e_r_1_1_feature_extractor-members.html │ ├── class_n_e_r_1_1_feature_extractor.html │ ├── class_n_e_r_1_1_o_p_t_i_o_n___p_a_r_s_e_r-members.html │ ├── class_n_e_r_1_1_o_p_t_i_o_n___p_a_r_s_e_r.html │ ├── class_n_e_r_1_1_sentence_tagger-members.html │ ├── class_n_e_r_1_1_sentence_tagger.html │ ├── class_n_e_r_1_1_suite-members.html │ ├── class_n_e_r_1_1_suite.html │ ├── class_n_e_r_1_1_tokenizer-members.html │ ├── class_n_e_r_1_1_tokenizer.html │ ├── class_n_e_r_1_1nersuite__exception-members.html │ ├── class_n_e_r_1_1nersuite__exception.html │ ├── class_o_p_t_i_o_n___p_a_r_s_e_r-members.html │ ├── class_o_p_t_i_o_n___p_a_r_s_e_r.html │ ├── class_paren_converter-members.html │ ├── class_paren_converter.html │ ├── class_s_e_g___r_e_p___c_h_a_n_g_e_r-members.html │ ├── class_s_e_g___r_e_p___c_h_a_n_g_e_r.html │ ├── class_test_exception-members.html │ ├── class_test_exception.html │ ├── classcdbpp_1_1builder__base-members.html │ ├── classcdbpp_1_1builder__base.html │ ├── classcdbpp_1_1builder__exception-members.html │ ├── classcdbpp_1_1builder__exception.html │ ├── classcdbpp_1_1cdbpp__base-members.html │ ├── classcdbpp_1_1cdbpp__base.html │ ├── classcdbpp_1_1cdbpp__exception-members.html │ ├── classcdbpp_1_1cdbpp__exception.html │ ├── classcdbpp_1_1murmurhash2-members.html │ ├── classcdbpp_1_1murmurhash2.html │ ├── classes.html │ ├── closed.png │ ├── common_8h_source.html │ ├── correct__mp_2string__utils_2string__utils_8h_source.html │ ├── correct__mp_2text__loader_2text__loader_8h_source.html │ ├── crfsuite2_8h_source.html │ ├── dictionary_8h_source.html │ ├── dictionary__tagger_2cdbpp-1_81_2include_2cdbpp_8h_source.html │ ├── doxygen.css │ ├── doxygen.png │ ├── fext__helper_8h_source.html │ ├── files.html │ ├── functions.html │ ├── functions_func.html │ ├── functions_vars.html │ ├── group___n_e_rsuite.html │ ├── group__cdbpp__api.html │ ├── gtagger_2option__parser_2option__parser_8h_source.html │ ├── index.html │ ├── installdox │ ├── jquery.js │ ├── maxent_8h_source.html │ ├── modules.html │ ├── mp__corrector_8h_source.html │ ├── nav_f.png │ ├── nav_h.png │ ├── ne_8h_source.html │ ├── nersuite.png │ ├── nersuite_8h_source.html │ ├── nersuite__common_2option__parser_8h_source.html │ ├── nersuite__common_2string__utils_8h_source.html │ ├── nersuite__common_2text__loader_8h_source.html │ ├── nersuite__common_2tokenizer_8h_source.html │ ├── nersuite__exception_8h_source.html │ ├── open.png │ ├── opt__parser_8h_source.html │ ├── pages.html │ ├── search │ ├── all_62.html │ ├── all_63.html │ ├── all_64.html │ ├── all_65.html │ ├── all_66.html │ ├── all_67.html │ ├── all_68.html │ ├── all_69.html │ ├── all_6c.html │ ├── all_6d.html │ ├── all_6e.html │ ├── all_6f.html │ ├── all_70.html │ ├── all_72.html │ ├── all_73.html │ ├── all_74.html │ ├── all_77.html │ ├── all_7e.html │ ├── classes_62.html │ ├── classes_63.html │ ├── classes_64.html │ ├── classes_66.html │ ├── classes_68.html │ ├── classes_6d.html │ ├── classes_6e.html │ ├── classes_6f.html │ ├── classes_70.html │ ├── classes_73.html │ ├── classes_74.html │ ├── close.png │ ├── functions_62.html │ ├── functions_63.html │ ├── functions_64.html │ ├── functions_65.html │ ├── functions_66.html │ ├── functions_67.html │ ├── functions_69.html │ ├── functions_6c.html │ ├── functions_6e.html │ ├── functions_6f.html │ ├── functions_70.html │ ├── functions_72.html │ ├── functions_73.html │ ├── functions_74.html │ ├── functions_77.html │ ├── functions_7e.html │ ├── mag_sel.png │ ├── nomatches.html │ ├── search.css │ ├── search.js │ ├── search_l.png │ ├── search_m.png │ ├── search_r.png │ ├── variables_62.html │ ├── variables_63.html │ ├── variables_64.html │ ├── variables_65.html │ ├── variables_6c.html │ ├── variables_70.html │ ├── variables_73.html │ └── variables_77.html │ ├── seg__rep__changer_8h_source.html │ ├── sentence__tagger_8h_source.html │ ├── struct_hypothesis-members.html │ ├── struct_hypothesis.html │ ├── struct_m_e___model___data-members.html │ ├── struct_m_e___model___data.html │ ├── struct_m_e___sample-members.html │ ├── struct_m_e___sample.html │ ├── struct_morph_dic-members.html │ ├── struct_morph_dic.html │ ├── struct_n_e_r_1_1_c_o_l_u_m_n___i_n_f_o-members.html │ ├── struct_n_e_r_1_1_c_o_l_u_m_n___i_n_f_o.html │ ├── struct_n_e_r_1_1_n_e-members.html │ ├── struct_n_e_r_1_1_n_e.html │ ├── struct_n_e_r_1_1_p_a_r_a_m-members.html │ ├── struct_n_e_r_1_1_p_a_r_a_m.html │ ├── struct_p_a_r_a_m-members.html │ ├── struct_p_a_r_a_m.html │ ├── struct_token-members.html │ ├── struct_token.html │ ├── structcdbpp_1_1builder__base_1_1bucket-members.html │ ├── structcdbpp_1_1builder__base_1_1bucket.html │ ├── structcdbpp_1_1cdbpp__base_1_1bucket__t-members.html │ ├── structcdbpp_1_1cdbpp__base_1_1bucket__t.html │ ├── structcdbpp_1_1cdbpp__base_1_1hashtable__t-members.html │ ├── structcdbpp_1_1cdbpp__base_1_1hashtable__t.html │ ├── structcdbpp_1_1tableref__t-members.html │ ├── structcdbpp_1_1tableref__t.html │ ├── tab_a.png │ ├── tab_b.png │ ├── tab_h.png │ ├── tab_s.png │ ├── tabs.css │ ├── tokenizer_2tokenizer_8h_source.html │ ├── tokenizer_8h_source.html │ └── typedefs_8h_source.html ├── help └── nersuite │ ├── .nojekyll │ ├── CNAME │ ├── advanced_usage.html │ ├── basic_usage.html │ ├── command_reference.html │ ├── css │ ├── default.css │ └── index.css │ ├── download.html │ ├── faqs.html │ ├── geniatagger_license.html │ ├── img │ └── fig_01.png │ ├── index.html │ └── installation_guide.html ├── install-sh ├── missing ├── nersuite.sln ├── nersuite.suo ├── nersuite.vcxproj ├── nersuite.vcxproj.user ├── sample ├── test.gtagged.txt └── test.txt ├── scripts └── fixbio.py └── src ├── cdbpp-1.1 └── include │ └── cdbpp.h ├── correct_mp ├── Makefile.am ├── Makefile.in ├── correct_mp.vcxproj ├── correct_mp.vcxproj.user ├── main.cpp ├── mp_corrector.cpp ├── mp_corrector.h ├── seg_rep_changer.cpp └── seg_rep_changer.h ├── dictionary_compiler ├── Makefile.am ├── Makefile.in ├── dictionary_compiler.vcxproj ├── dictionary_compiler.vcxproj.user └── main.cpp ├── dictionary_tagger ├── Makefile.am ├── Makefile.in ├── dictionary_tagger.vcxproj ├── dictionary_tagger.vcxproj.user ├── main.cpp ├── sentence_tagger.cpp └── sentence_tagger.h ├── gtagger ├── Makefile.am ├── Makefile.in ├── geniatagger-3.0.1 │ ├── bidir.cpp │ ├── chunking.cpp │ ├── common.h │ ├── maxent.cpp │ ├── maxent.h │ ├── morph.cpp │ ├── postag.cpp │ └── tokenize.cpp ├── gtagger.vcxproj ├── gtagger.vcxproj.user ├── option_parser │ └── option_parser.h └── run.gtagger.cpp ├── nersuite ├── FExtor.cpp ├── FExtor.h ├── Makefile.am ├── Makefile.in ├── crfsuite-0.12 │ ├── Debug │ │ ├── cqdb.lib │ │ └── crf.lib │ ├── Release │ │ ├── cqdb.lib │ │ └── crf.lib │ └── include │ │ ├── crfsuite.h │ │ ├── crfsuite.hpp │ │ ├── crfsuite_api.hpp │ │ └── os.h ├── crfsuite2.cpp ├── crfsuite2.h ├── main.cpp ├── nersuite.cpp ├── nersuite.h └── typedefs.h ├── nersuite_common ├── Makefile.am ├── Makefile.in ├── dictionary.cpp ├── dictionary.h ├── ne.h ├── nersuite_common.vcxproj ├── nersuite_common.vcxproj.user ├── nersuite_exception.h ├── option_parser.h ├── string_utils.cpp ├── string_utils.h ├── text_loader.cpp ├── text_loader.h ├── tokenizer.cpp └── tokenizer.h ├── nersuitetest ├── DictionaryTest.h ├── FExtorTest.h ├── NERSuiteTest.h ├── SentenceTaggerTest.h ├── TestUtil.h ├── crfsuite_dummy.cpp ├── fext_helper.cpp ├── fext_helper.h ├── main.cpp ├── nersuitetest.vcxproj └── nersuitetest.vcxproj.user └── tokenizer ├── Makefile.am ├── Makefile.in ├── run.tokenizer.cpp ├── tokenizer.vcxproj └── tokenizer.vcxproj.user /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | .deps 3 | Makefile 4 | config.h 5 | config.log 6 | config.status 7 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Han-Cheol Cho (hccho at is.s.u-tokyo.ac.jp) 2 | Naoaki Okazaki (okazaki at is.s.u-tokyo.ac.jp) 3 | 4 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | NERsuite version 1.2 2 | 3 | 4 | * About 5 | NERsuite is a named entity recognition toolkit. 6 | 7 | 8 | * Copyright and license 9 | This toolkit includes and uses a modified version of the GENIA 3.0.1 tagger, and 10 | the GENIA tagger uses the information extracted from the WORDNET. The NERsuite also 11 | requires the installation of libLBFGS (http://www.chokkan.org/software/liblbfgs/) 12 | and CRFsuite (http://www.chokkan.org/software/crfsuite/). Dictionary compiler and 13 | tagger include CDBPP library (http://www.chokkan.org/software/cdbpp/). Therefore, 14 | the use of this toolkit need to follows the copyrights and the license terms of 15 | these softwares. For other parts of this toolkit, it follows the BSD license. 16 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2010-07-16 Han-Cheol Cho (hccho at is.s.u-tokyo.ac.jp) 2 | * NERsuite ver. 1.0 3 | - The first release of the NERsuite 4 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.1.1.1 2010/12/17 07:27:39 hccho Exp $ 2 | # Edited 2010-09-07 hancheol $ 3 | 4 | SUBDIRS = \ 5 | src/nersuite_common \ 6 | src/tokenizer \ 7 | src/gtagger \ 8 | src/nersuite \ 9 | src/dictionary_compiler \ 10 | src/dictionary_tagger \ 11 | src/correct_mp 12 | 13 | docdir = $(prefix)/share/doc/@PACKAGE@ 14 | doc_DATA = README.md INSTALL COPYING AUTHORS ChangeLog 15 | 16 | #models_gtaggerdir = $(prefix)/models_gtagger 17 | #models_gtagger_DATA = \ 18 | #pkgdata_DATA = \ 19 | # models_gtagger/WORDNETLICENSE \ 20 | # .... 21 | 22 | #models_nersuitedir = $(prefix)/models_nersuite 23 | #models_nersuite_DATA = 24 | 25 | EXTRA_DIST = \ 26 | src/cdbpp-1.1/include/cdbpp.h 27 | sample/test.txt 28 | # ${models_gtagger_DATA} \ 29 | # ${models_nersuite_DATA} \ 30 | # sample/test.sh 31 | 32 | AUTOMAKE_OPTIONS = foreign 33 | ACLOCAL_AMFLAGS = -I m4 34 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/NEWS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NERsuite version 1.2.1 2 | 3 | 4 | ## About ## 5 | NERsuite is a named entity recognition toolkit. 6 | 7 | 8 | ## Copyright and license ## 9 | This toolkit includes and uses a modified version of the GENIA 3.0.1 tagger, and 10 | the GENIA tagger uses the information extracted from the WORDNET. The NERsuite also 11 | requires the installation of libLBFGS (http://www.chokkan.org/software/liblbfgs/) 12 | and CRFsuite (http://www.chokkan.org/software/crfsuite/). Dictionary compiler and 13 | tagger include CDBPP library (http://www.chokkan.org/software/cdbpp/). Therefore, 14 | the use of this toolkit need to follows the copyrights and the license terms of 15 | these softwares. For other parts of this toolkit, it follows the BSD license. 16 | 17 | 18 | ## Thanks to ## 19 | Naoaki Okazaki for his invaluable help in developing and distributing this toolkit 20 | Yoshimasa Tsuruoka for the GENIA tagger 21 | And other collegues who willingly tested NERsuite and gave comments and advices 22 | 23 | 24 | ## Compatibility ## 25 | * NERsuite has been built and tested on CentOS. 26 | * NERsuite needs a modified version of CRFsuite (https://github.com/priancho/crfsuite). 27 | The original version is missing label bias functionality and will not work with this version of NERsuite. 28 | * NERsuite may not work on OSX (see also [#23](https://github.com/nlplab/nersuite/issues/23) reported by Florian Leitner). 29 | 30 | 31 | ## Citing ## 32 | 33 | If you do make use of NERsuite or its components please cite as follows: 34 | 35 | @misc{, 36 | author = {Cho, Han-Cheol and Okazaki, Naoaki and Miwa, Makoto 37 | and Tsujii, Jun'ichi}, 38 | title = {NERsuite: a named entity recognition toolkit}, 39 | howpublished = {https://github.com/nlplab/nersuite}, 40 | } 41 | 42 | 43 | ## History ## 44 | 45 | Minor updates (2015.01.20) 46 | - New function: Added the support for CRFsuite label bias arg (by @spyssalo) by 47 | using a customized CRFsuite (https://github.com/priancho/crfsuite). 48 | Use "-b [Label name]:[Bias score],[Label name]:[Bias score],... ". 49 | 50 | 51 | Minor updates (2012.07.06) 52 | - Bug fix: nersuite_gtagger crashed with segmentation fault when it runs without 53 | -multidoc argument. 54 | 55 | 56 | Version 1.2.1 (2012.07.02) 57 | - Change: -multidoc [SEP] option for all programs now regards lines "beginning 58 | with" [SEP] as comment lines and print them without any modification, 59 | whereas it previously dealt with the lines "exactly same" to [SEP]. 60 | In addition, multiple comment lines can be used from this version. 61 | 62 | - Change: nersuite_tokenizer computes token offsets at document level, whereas 63 | it used sentence level token offsets. When it encounters a document 64 | separator, which is given by -multidoc [SEP] option, the tokenizer 65 | resets the offset to 0. 66 | 67 | - New function: correct_mp program which removes named entities having mismatched 68 | parenthesis is added. Documentation will be added to the homepage 69 | later. 70 | 71 | 72 | Minor updates (2012.06.28) 73 | - Change: the standoff and brat output option now use unique IDs for entities 74 | regardless of their semantic types. The IDs are also counted in 75 | document level, whereas the conll option still counts it in sentence 76 | level. 77 | 78 | - Bug fix: a bug of the string normalization part of the dictionary compiler and 79 | tagger is fixed. 80 | 81 | - New function: brat format for output is added (by Sampo Pyysalo) 82 | 83 | 84 | Version 1.2 85 | - All programs are refactored 86 | 87 | 88 | Version 1.1 89 | - Now available to use external dictionaries. Dictionay compiler and tagger are 90 | included. (These two programs use CDB++ library) 91 | 92 | 93 | Version 1.0 94 | - Initial release of NERsuite. 95 | - Statistical model only. 96 | 97 | -------------------------------------------------------------------------------- /autom4te.cache/requests: -------------------------------------------------------------------------------- 1 | # This file was generated. 2 | # It contains the lists of macros which have been traced. 3 | # It can be safely removed. 4 | 5 | @request = ( 6 | bless( [ 7 | '0', 8 | 1, 9 | [ 10 | '/usr/share/autoconf' 11 | ], 12 | [ 13 | '/usr/share/autoconf/autoconf/autoconf.m4f', 14 | 'aclocal.m4', 15 | 'configure.in' 16 | ], 17 | { 18 | '_LT_AC_TAGCONFIG' => 1, 19 | 'AM_PROG_F77_C_O' => 1, 20 | 'AC_INIT' => 1, 21 | 'm4_pattern_forbid' => 1, 22 | 'AC_CANONICAL_TARGET' => 1, 23 | '_AM_COND_IF' => 1, 24 | 'AC_CONFIG_LIBOBJ_DIR' => 1, 25 | 'AC_SUBST' => 1, 26 | 'AC_CANONICAL_HOST' => 1, 27 | 'AC_FC_SRCEXT' => 1, 28 | 'AC_PROG_LIBTOOL' => 1, 29 | 'AM_INIT_AUTOMAKE' => 1, 30 | 'AC_CONFIG_SUBDIRS' => 1, 31 | 'AM_AUTOMAKE_VERSION' => 1, 32 | 'LT_CONFIG_LTDL_DIR' => 1, 33 | 'AC_CONFIG_LINKS' => 1, 34 | 'AC_REQUIRE_AUX_FILE' => 1, 35 | 'm4_sinclude' => 1, 36 | 'LT_SUPPORTED_TAG' => 1, 37 | 'AM_MAINTAINER_MODE' => 1, 38 | 'AM_GNU_GETTEXT_INTL_SUBDIR' => 1, 39 | '_m4_warn' => 1, 40 | 'AM_PROG_CXX_C_O' => 1, 41 | '_AM_COND_ENDIF' => 1, 42 | 'AM_ENABLE_MULTILIB' => 1, 43 | 'AM_SILENT_RULES' => 1, 44 | 'AC_CONFIG_FILES' => 1, 45 | 'LT_INIT' => 1, 46 | 'include' => 1, 47 | 'AM_GNU_GETTEXT' => 1, 48 | 'AC_LIBSOURCE' => 1, 49 | 'AC_CANONICAL_BUILD' => 1, 50 | 'AM_PROG_FC_C_O' => 1, 51 | 'AC_FC_FREEFORM' => 1, 52 | 'AH_OUTPUT' => 1, 53 | 'AC_CONFIG_AUX_DIR' => 1, 54 | '_AM_SUBST_NOTMAKE' => 1, 55 | 'AM_PROG_CC_C_O' => 1, 56 | 'sinclude' => 1, 57 | 'm4_pattern_allow' => 1, 58 | 'AM_CONDITIONAL' => 1, 59 | 'AC_CANONICAL_SYSTEM' => 1, 60 | 'AC_CONFIG_HEADERS' => 1, 61 | 'AC_DEFINE_TRACE_LITERAL' => 1, 62 | 'm4_include' => 1, 63 | '_AM_COND_ELSE' => 1, 64 | 'AC_SUBST_TRACE' => 1 65 | } 66 | ], 'Autom4te::Request' ) 67 | ); 68 | 69 | -------------------------------------------------------------------------------- /doxygen/html/bc_s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/bc_s.png -------------------------------------------------------------------------------- /doxygen/html/closed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/closed.png -------------------------------------------------------------------------------- /doxygen/html/doxygen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/doxygen.png -------------------------------------------------------------------------------- /doxygen/html/installdox: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | %subst = ( ); 4 | $quiet = 0; 5 | 6 | while ( @ARGV ) { 7 | $_ = shift @ARGV; 8 | if ( s/^-// ) { 9 | if ( /^l(.*)/ ) { 10 | $v = ($1 eq "") ? shift @ARGV : $1; 11 | ($v =~ /\/$/) || ($v .= "/"); 12 | $_ = $v; 13 | if ( /(.+)\@(.+)/ ) { 14 | if ( exists $subst{$1} ) { 15 | $subst{$1} = $2; 16 | } else { 17 | print STDERR "Unknown tag file $1 given with option -l\n"; 18 | &usage(); 19 | } 20 | } else { 21 | print STDERR "Argument $_ is invalid for option -l\n"; 22 | &usage(); 23 | } 24 | } 25 | elsif ( /^q/ ) { 26 | $quiet = 1; 27 | } 28 | elsif ( /^\?|^h/ ) { 29 | &usage(); 30 | } 31 | else { 32 | print STDERR "Illegal option -$_\n"; 33 | &usage(); 34 | } 35 | } 36 | else { 37 | push (@files, $_ ); 38 | } 39 | } 40 | 41 | foreach $sub (keys %subst) 42 | { 43 | if ( $subst{$sub} eq "" ) 44 | { 45 | print STDERR "No substitute given for tag file `$sub'\n"; 46 | &usage(); 47 | } 48 | elsif ( ! $quiet && $sub ne "_doc" && $sub ne "_cgi" ) 49 | { 50 | print "Substituting $subst{$sub} for each occurrence of tag file $sub\n"; 51 | } 52 | } 53 | 54 | if ( ! @files ) { 55 | if (opendir(D,".")) { 56 | foreach $file ( readdir(D) ) { 57 | $match = ".html"; 58 | next if ( $file =~ /^\.\.?$/ ); 59 | ($file =~ /$match/) && (push @files, $file); 60 | ($file =~ /\.svg/) && (push @files, $file); 61 | ($file =~ "navtree.js") && (push @files, $file); 62 | } 63 | closedir(D); 64 | } 65 | } 66 | 67 | if ( ! @files ) { 68 | print STDERR "Warning: No input files given and none found!\n"; 69 | } 70 | 71 | foreach $f (@files) 72 | { 73 | if ( ! $quiet ) { 74 | print "Editing: $f...\n"; 75 | } 76 | $oldf = $f; 77 | $f .= ".bak"; 78 | unless (rename $oldf,$f) { 79 | print STDERR "Error: cannot rename file $oldf\n"; 80 | exit 1; 81 | } 82 | if (open(F,"<$f")) { 83 | unless (open(G,">$oldf")) { 84 | print STDERR "Error: opening file $oldf for writing\n"; 85 | exit 1; 86 | } 87 | if ($oldf ne "tree.js") { 88 | while () { 89 | s/doxygen\=\"([^ \"\:\t\>\<]*)\:([^ \"\t\>\<]*)\" (xlink:href|href|src)=\"\2/doxygen\=\"$1:$subst{$1}\" \3=\"$subst{$1}/g; 90 | print G "$_"; 91 | } 92 | } 93 | else { 94 | while () { 95 | s/\"([^ \"\:\t\>\<]*)\:([^ \"\t\>\<]*)\", \"\2/\"$1:$subst{$1}\" ,\"$subst{$1}/g; 96 | print G "$_"; 97 | } 98 | } 99 | } 100 | else { 101 | print STDERR "Warning file $f does not exist\n"; 102 | } 103 | unlink $f; 104 | } 105 | 106 | sub usage { 107 | print STDERR "Usage: installdox [options] [html-file [html-file ...]]\n"; 108 | print STDERR "Options:\n"; 109 | print STDERR " -l tagfile\@linkName tag file + URL or directory \n"; 110 | print STDERR " -q Quiet mode\n\n"; 111 | exit 1; 112 | } 113 | -------------------------------------------------------------------------------- /doxygen/html/nav_f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/nav_f.png -------------------------------------------------------------------------------- /doxygen/html/nav_h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/nav_h.png -------------------------------------------------------------------------------- /doxygen/html/nersuite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/nersuite.png -------------------------------------------------------------------------------- /doxygen/html/open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/open.png -------------------------------------------------------------------------------- /doxygen/html/pages.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | NERsuite: Related Pages 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 16 | 17 | 18 | 19 |
20 | 21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 34 | 35 | 36 | 37 | 38 | 39 |
29 |
NERsuite 30 |  1.1.1 31 |
32 | 33 |
40 |
41 | 42 | 43 | 46 | 71 |
72 |
73 |
74 |
Related Pages
75 |
76 |
77 |
Here is a list of all related documentation pages:
81 |
82 | 83 | 88 | 89 | 90 |
91 | 94 |
95 | 96 | 97 | 98 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /doxygen/html/search/all_64.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | db_path 13 | NER::Dictionary 14 |
15 |
16 |
17 |
18 | DIC 19 | NER::COLUMN_INFO 20 |
21 |
22 |
23 |
24 | Dictionary 25 | NER::Dictionary 26 |
27 |
28 |
29 |
30 | Dictionary 31 | NER 32 |
33 |
34 |
Searching...
35 |
No Matches
36 | 42 |
43 | 44 | 45 | -------------------------------------------------------------------------------- /doxygen/html/search/all_66.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | FeatureExtractor 13 | NER 14 |
15 |
16 |
17 |
18 | FeatureExtractor 19 | NER::FeatureExtractor 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/all_67.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | get 13 | cdbpp::cdbpp_base 14 |
15 |
16 |
17 |
18 | get_args 19 | NER::OPTION_PARSER 20 |
21 |
22 |
23 |
24 | get_class_count 25 | NER::Dictionary 26 |
27 |
28 |
29 |
30 | get_class_name 31 | NER::Dictionary 32 |
33 |
34 |
35 |
36 | get_classes 37 | NER::Dictionary 38 |
39 |
40 |
41 |
42 | get_value 43 | NER::OPTION_PARSER 44 |
45 |
46 |
Searching...
47 |
No Matches
48 | 54 |
55 | 56 | 57 | -------------------------------------------------------------------------------- /doxygen/html/search/all_68.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | hashtable_t 13 | cdbpp::cdbpp_base 14 |
15 |
16 |
17 |
18 | Hypothesis 19 |
20 |
21 |
Searching...
22 |
No Matches
23 | 29 |
30 | 31 | 32 | -------------------------------------------------------------------------------- /doxygen/html/search/all_69.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | is_open 13 | cdbpp::cdbpp_base 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/all_6c.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | learn 13 | NER::Suite 14 |
15 |
16 |
17 |
18 | LEMMA 19 | NER::COLUMN_INFO 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/all_6d.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | ME_Model 13 |
14 |
15 |
16 |
17 | ME_Model_Data 18 |
19 |
20 |
21 |
22 | ME_Sample 23 |
24 |
25 |
26 |
27 | MorphDic 28 |
29 |
30 |
31 |
32 | MP_CORRECTOR 33 |
34 |
35 |
36 |
37 | murmurhash2 38 | cdbpp 39 |
40 |
41 |
Searching...
42 |
No Matches
43 | 49 |
50 | 51 | 52 | -------------------------------------------------------------------------------- /doxygen/html/search/all_6e.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | NE 13 | NER 14 |
15 |
16 | 25 |
26 |
27 | nersuite_exception 28 | NER 29 |
30 |
31 |
Searching...
32 |
No Matches
33 | 39 |
40 | 41 | 42 | -------------------------------------------------------------------------------- /doxygen/html/search/all_6f.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 | 20 |
21 |
22 | operator[] 23 | NER::SentenceTagger 24 |
25 |
26 |
27 |
28 | OPTION_PARSER 29 |
30 |
31 |
32 |
33 | OPTION_PARSER 34 | NER 35 |
36 |
37 |
38 |
39 | output_params 40 | NER::OPTION_PARSER 41 |
42 |
43 |
Searching...
44 |
No Matches
45 | 51 |
52 | 53 | 54 | -------------------------------------------------------------------------------- /doxygen/html/search/all_70.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | PARAM 13 |
14 |
15 |
16 |
17 | PARAM 18 | NER 19 |
20 |
21 |
22 |
23 | ParenConverter 24 |
25 |
26 |
27 |
28 | parse 29 | NER::OPTION_PARSER 30 |
31 |
32 |
33 |
34 | POS 35 | NER::COLUMN_INFO 36 |
37 |
38 |
39 |
40 | put 41 | cdbpp::builder_base 42 |
43 |
44 |
Searching...
45 |
No Matches
46 | 52 |
53 | 54 | 55 | -------------------------------------------------------------------------------- /doxygen/html/search/all_72.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | read 13 | NER::SentenceTagger 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/all_74.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | tableref_t 13 | cdbpp 14 |
15 |
16 |
17 |
18 | tag 19 | NER::Suite 20 |
21 |
22 |
23 |
24 | tag_nes 25 | NER::SentenceTagger 26 |
27 |
28 |
29 |
30 | TestException 31 |
32 |
33 |
34 |
35 | Token 36 |
37 |
38 |
39 |
40 | tokenize 41 | NER::Tokenizer 42 |
43 |
44 |
45 |
46 | Tokenizer 47 | NER 48 |
49 |
50 |
51 |
52 | Trainer2 53 | CRFSuite 54 |
55 |
56 |
Searching...
57 |
No Matches
58 | 64 |
65 | 66 | 67 | -------------------------------------------------------------------------------- /doxygen/html/search/all_77.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | what 13 | NER::nersuite_exception 14 |
15 |
16 |
17 |
18 | WORD 19 | NER::COLUMN_INFO 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/all_7e.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | ~builder_base 13 | cdbpp::builder_base 14 |
15 |
16 |
17 |
18 | ~cdbpp_base 19 | cdbpp::cdbpp_base 20 |
21 |
22 |
23 |
24 | ~Dictionary 25 | NER::Dictionary 26 |
27 |
28 |
29 |
30 | ~nersuite_exception 31 | NER::nersuite_exception 32 |
33 |
34 |
35 |
36 | ~SentenceTagger 37 | NER::SentenceTagger 38 |
39 |
40 |
Searching...
41 |
No Matches
42 | 48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_62.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | bucket 13 | cdbpp::builder_base 14 |
15 |
16 |
17 |
18 | bucket_t 19 | cdbpp::cdbpp_base 20 |
21 |
22 |
23 |
24 | builder_base 25 | cdbpp 26 |
27 |
28 |
29 |
30 | builder_exception 31 | cdbpp 32 |
33 |
34 |
Searching...
35 |
No Matches
36 | 42 |
43 | 44 | 45 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_63.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | cdbpp_base 13 | cdbpp 14 |
15 |
16 |
17 |
18 | cdbpp_exception 19 | cdbpp 20 |
21 |
22 |
23 |
24 | COLUMN_INFO 25 | NER 26 |
27 |
28 |
Searching...
29 |
No Matches
30 | 36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_64.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | Dictionary 13 | NER 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_66.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | FeatureExtractor 13 | NER 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_68.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | hashtable_t 13 | cdbpp::cdbpp_base 14 |
15 |
16 |
17 |
18 | Hypothesis 19 |
20 |
21 |
Searching...
22 |
No Matches
23 | 29 |
30 | 31 | 32 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_6d.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | ME_Model 13 |
14 |
15 |
16 |
17 | ME_Model_Data 18 |
19 |
20 |
21 |
22 | ME_Sample 23 |
24 |
25 |
26 |
27 | MorphDic 28 |
29 |
30 |
31 |
32 | MP_CORRECTOR 33 |
34 |
35 |
36 |
37 | murmurhash2 38 | cdbpp 39 |
40 |
41 |
Searching...
42 |
No Matches
43 | 49 |
50 | 51 | 52 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_6e.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | NE 13 | NER 14 |
15 |
16 |
17 |
18 | nersuite_exception 19 | NER 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_6f.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | OPTION_PARSER 13 | NER 14 |
15 |
16 |
17 |
18 | OPTION_PARSER 19 |
20 |
21 |
Searching...
22 |
No Matches
23 | 29 |
30 | 31 | 32 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_70.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | PARAM 13 |
14 |
15 |
16 |
17 | PARAM 18 | NER 19 |
20 |
21 |
22 |
23 | ParenConverter 24 |
25 |
26 |
Searching...
27 |
No Matches
28 | 34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_73.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | SEG_REP_CHANGER 13 |
14 |
15 |
16 |
17 | SentenceTagger 18 | NER 19 |
20 |
21 |
22 |
23 | Suite 24 | NER 25 |
26 |
27 |
Searching...
28 |
No Matches
29 | 35 |
36 | 37 | 38 | -------------------------------------------------------------------------------- /doxygen/html/search/classes_74.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | tableref_t 13 | cdbpp 14 |
15 |
16 |
17 |
18 | TestException 19 |
20 |
21 |
22 |
23 | Token 24 |
25 |
26 |
27 |
28 | Tokenizer 29 | NER 30 |
31 |
32 |
33 |
34 | Trainer2 35 | CRFSuite 36 |
37 |
38 |
Searching...
39 |
No Matches
40 | 46 |
47 | 48 | 49 | -------------------------------------------------------------------------------- /doxygen/html/search/close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/search/close.png -------------------------------------------------------------------------------- /doxygen/html/search/functions_62.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | begin 13 | NER::SentenceTagger 14 |
15 |
16 |
17 |
18 | build 19 | NER::Dictionary 20 |
21 |
22 |
23 |
24 | builder_base 25 | cdbpp::builder_base 26 |
27 |
28 |
Searching...
29 |
No Matches
30 | 36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_63.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 | 20 |
21 |
22 | close 23 | cdbpp::cdbpp_base 24 |
25 |
26 |
Searching...
27 |
No Matches
28 | 34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_64.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | Dictionary 13 | NER::Dictionary 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_66.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | FeatureExtractor 13 | NER::FeatureExtractor 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_67.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | get 13 | cdbpp::cdbpp_base 14 |
15 |
16 |
17 |
18 | get_args 19 | NER::OPTION_PARSER 20 |
21 |
22 |
23 |
24 | get_class_count 25 | NER::Dictionary 26 |
27 |
28 |
29 |
30 | get_class_name 31 | NER::Dictionary 32 |
33 |
34 |
35 |
36 | get_classes 37 | NER::Dictionary 38 |
39 |
40 |
41 |
42 | get_value 43 | NER::OPTION_PARSER 44 |
45 |
46 |
Searching...
47 |
No Matches
48 | 54 |
55 | 56 | 57 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_69.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | is_open 13 | cdbpp::cdbpp_base 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_6c.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | learn 13 | NER::Suite 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_6e.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 | 19 |
Searching...
20 |
No Matches
21 | 27 |
28 | 29 | 30 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_6f.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 | 20 |
21 |
22 | operator[] 23 | NER::SentenceTagger 24 |
25 |
26 |
27 |
28 | output_params 29 | NER::OPTION_PARSER 30 |
31 |
32 |
Searching...
33 |
No Matches
34 | 40 |
41 | 42 | 43 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_70.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | parse 13 | NER::OPTION_PARSER 14 |
15 |
16 |
17 |
18 | put 19 | cdbpp::builder_base 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_72.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | read 13 | NER::SentenceTagger 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_73.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | SentenceTagger 13 | NER::SentenceTagger 14 |
15 |
16 |
17 |
18 | set_normalize_type 19 | NER::SentenceTagger 20 |
21 |
22 |
23 | 30 |
31 |
32 |
33 | Suite 34 | NER::Suite 35 |
36 |
37 |
Searching...
38 |
No Matches
39 | 45 |
46 | 47 | 48 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_74.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | tag 13 | NER::Suite 14 |
15 |
16 |
17 |
18 | tag_nes 19 | NER::SentenceTagger 20 |
21 |
22 |
23 |
24 | tokenize 25 | NER::Tokenizer 26 |
27 |
28 |
Searching...
29 |
No Matches
30 | 36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_77.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | what 13 | NER::nersuite_exception 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/functions_7e.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | ~builder_base 13 | cdbpp::builder_base 14 |
15 |
16 |
17 |
18 | ~cdbpp_base 19 | cdbpp::cdbpp_base 20 |
21 |
22 |
23 |
24 | ~Dictionary 25 | NER::Dictionary 26 |
27 |
28 |
29 |
30 | ~nersuite_exception 31 | NER::nersuite_exception 32 |
33 |
34 |
35 |
36 | ~SentenceTagger 37 | NER::SentenceTagger 38 |
39 |
40 |
Searching...
41 |
No Matches
42 | 48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /doxygen/html/search/mag_sel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/search/mag_sel.png -------------------------------------------------------------------------------- /doxygen/html/search/nomatches.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
No Matches
10 |
11 | 12 | 13 | -------------------------------------------------------------------------------- /doxygen/html/search/search_l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/search/search_l.png -------------------------------------------------------------------------------- /doxygen/html/search/search_m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/search/search_m.png -------------------------------------------------------------------------------- /doxygen/html/search/search_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/search/search_r.png -------------------------------------------------------------------------------- /doxygen/html/search/variables_62.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | BEG 13 | NER::COLUMN_INFO 14 |
15 |
16 |
17 |
18 | begin 19 | NER::NE 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_63.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | CHUNK 13 | NER::COLUMN_INFO 14 |
15 |
16 |
17 |
18 | classes 19 | NER::NE 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_64.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | db_path 13 | NER::Dictionary 14 |
15 |
16 |
17 |
18 | DIC 19 | NER::COLUMN_INFO 20 |
21 |
22 |
Searching...
23 |
No Matches
24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_65.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | END 13 | 17 |
18 |
19 |
Searching...
20 |
No Matches
21 | 27 |
28 | 29 | 30 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_6c.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | LEMMA 13 | NER::COLUMN_INFO 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_70.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | POS 13 | NER::COLUMN_INFO 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_73.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | sim 13 | NER::NE 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/search/variables_77.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
Loading...
10 |
11 |
12 | WORD 13 | NER::COLUMN_INFO 14 |
15 |
16 |
Searching...
17 |
No Matches
18 | 24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /doxygen/html/tab_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/tab_a.png -------------------------------------------------------------------------------- /doxygen/html/tab_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/tab_b.png -------------------------------------------------------------------------------- /doxygen/html/tab_h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/tab_h.png -------------------------------------------------------------------------------- /doxygen/html/tab_s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/doxygen/html/tab_s.png -------------------------------------------------------------------------------- /doxygen/html/tabs.css: -------------------------------------------------------------------------------- 1 | .tabs, .tabs2, .tabs3 { 2 | background-image: url('tab_b.png'); 3 | width: 100%; 4 | z-index: 101; 5 | font-size: 13px; 6 | } 7 | 8 | .tabs2 { 9 | font-size: 10px; 10 | } 11 | .tabs3 { 12 | font-size: 9px; 13 | } 14 | 15 | .tablist { 16 | margin: 0; 17 | padding: 0; 18 | display: table; 19 | } 20 | 21 | .tablist li { 22 | float: left; 23 | display: table-cell; 24 | background-image: url('tab_b.png'); 25 | line-height: 36px; 26 | list-style: none; 27 | } 28 | 29 | .tablist a { 30 | display: block; 31 | padding: 0 20px; 32 | font-weight: bold; 33 | background-image:url('tab_s.png'); 34 | background-repeat:no-repeat; 35 | background-position:right; 36 | color: #283A5D; 37 | text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); 38 | text-decoration: none; 39 | outline: none; 40 | } 41 | 42 | .tabs3 .tablist a { 43 | padding: 0 10px; 44 | } 45 | 46 | .tablist a:hover { 47 | background-image: url('tab_h.png'); 48 | background-repeat:repeat-x; 49 | color: #fff; 50 | text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0); 51 | text-decoration: none; 52 | } 53 | 54 | .tablist li.current a { 55 | background-image: url('tab_a.png'); 56 | background-repeat:repeat-x; 57 | color: #fff; 58 | text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0); 59 | } 60 | -------------------------------------------------------------------------------- /help/nersuite/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/help/nersuite/.nojekyll -------------------------------------------------------------------------------- /help/nersuite/CNAME: -------------------------------------------------------------------------------- 1 | nersuite.nlplab.org 2 | -------------------------------------------------------------------------------- /help/nersuite/css/index.css: -------------------------------------------------------------------------------- 1 | /* CSS Document */ 2 | 3 | 4 | #maindiv { 5 | border-top: 1px dotted #999999; 6 | } 7 | 8 | 9 | #maindiv h2 10 | #welcome { 11 | color: #333333; 12 | font-family: Geneva, Arial, Helvetica, sans-serif; 13 | font-size: 150%; 14 | line-height: 1.5em; 15 | letter-spacing: 0.01em; 16 | margin: 5px 0 15px 5px; 17 | } 18 | 19 | 20 | #index1 { 21 | padding: 0 10px 20px 0; 22 | width: 440px; 23 | float: left; 24 | border-right: dotted 1px #999999; 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /help/nersuite/faqs.html: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | NERsuite - a Named Entity Recognition toolkit 16 | 17 | 18 | 19 |
20 |
21 |
22 |
23 |
24 |

25 | NERsuite

26 |
27 |
28 |
29 |
30 |

31 | A Named Entity Recognition toolkit 32 |

33 |
34 |
35 |
36 |
37 |

38 | FAQs

39 |

40 |

41 |
42 |
43 |
44 |
45 | 69 |
70 |
71 | 78 |
79 | 80 | 81 | -------------------------------------------------------------------------------- /help/nersuite/geniatagger_license.html: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | NERsuite - a Named Entity Recognition toolkit 16 | 17 | 18 | 19 |
20 | This software uses a dictionary in WordNet for morphological analysis.
21 | See also ./morphdic/WORNETLICENSE
22 | 
23 | ------------------------------------------------------------------------
24 | GENIA Tagger License Information 
25 | 
26 | Copyright (c) 2005, Tsujii Laboratory, The University of Tokyo
27 | All rights reserved.
28 | 
29 | Redistribution and use in source and binary forms, with or without
30 | modification, are permitted for non-commercial purposes provided
31 | that the following conditions are met:
32 | 
33 | - Redistributions of source code must retain the above copyright
34 |   notice, this list of conditions and the following disclaimer.
35 | 
36 | - Redistributions in binary form must reproduce the above copyright
37 |   notice, this list of conditions and the following disclaimer in the
38 |   documentation and/or other materials provided with the distribution.
39 | 
40 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 | 
52 | 53 | 54 | -------------------------------------------------------------------------------- /help/nersuite/img/fig_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/help/nersuite/img/fig_01.png -------------------------------------------------------------------------------- /nersuite.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/nersuite.suo -------------------------------------------------------------------------------- /nersuite.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /sample/test.gtagged.txt: -------------------------------------------------------------------------------- 1 | 0 1 5 5 CD B-NP 2 | 1 2 - - HYPH I-NP 3 | 2 5 LOX LOX NN I-NP 4 | 6 9 and and CC I-NP 5 | 10 11 5 5 CD I-NP 6 | 11 12 - - HYPH I-NP 7 | 12 15 LOX LOX NN I-NP 8 | 16 26 activating activate VBG B-VP 9 | 27 34 protein protein NN B-NP 10 | 35 36 ( ( ( O 11 | 36 40 FLAP FLAP NN B-NP 12 | 40 41 ) ) ) O 13 | 42 45 are be VBP B-VP 14 | -------------------------------------------------------------------------------- /sample/test.txt: -------------------------------------------------------------------------------- 1 | ZAP-70 tyrosine kinase, CD45, and T cell receptor involvement in UV- and H2O2-induced T cell signal transduction. 2 | -------------------------------------------------------------------------------- /src/correct_mp/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.1.1.1 2010/12/17 07:27:40 hccho Exp $ 2 | 3 | bin_PROGRAMS = nersuite_correct_mp 4 | 5 | #man_MANS = nersuite.correct.mp.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | nersuite_correct_mp_SOURCES = \ 12 | main.cpp \ 13 | ../nersuite_common/string_utils.h \ 14 | ../nersuite_common/string_utils.cpp \ 15 | ../nersuite_common/text_loader.h \ 16 | ../nersuite_common/text_loader.cpp \ 17 | ../nersuite_common/option_parser.h \ 18 | seg_rep_changer.h \ 19 | seg_rep_changer.cpp \ 20 | mp_corrector.h \ 21 | mp_corrector.cpp 22 | 23 | nersuite_correct_mp_CPPFLAGS = @CFLAGS@ 24 | INCLUDES = -I../nersuite_common @INCLUDES@ 25 | 26 | #AM_CFLAGS = @CFLAGS@ 27 | #INCLUDES = @INCLUDES@ 28 | #AM_LDFLAGS = @LDFLAGS@ 29 | #nersuite_CFLAGS = -I$(top_builddir)/lib/crf/include 30 | #nersuite_LDADD = $(top_builddir)/lib/crf/libcrf.la 31 | 32 | -------------------------------------------------------------------------------- /src/correct_mp/correct_mp.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {61C88B76-A77A-414D-BAC0-64F1C078A04D} 15 | correct_mp 16 | 17 | 18 | 19 | Application 20 | true 21 | MultiByte 22 | 23 | 24 | Application 25 | false 26 | true 27 | MultiByte 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | nersuite_correct_mp 41 | 42 | 43 | nersuite_correct_mp 44 | 45 | 46 | 47 | Level3 48 | Disabled 49 | 50 | 51 | true 52 | 53 | 54 | 55 | 56 | Level3 57 | MaxSpeed 58 | true 59 | true 60 | 61 | 62 | true 63 | true 64 | true 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /src/correct_mp/correct_mp.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /src/correct_mp/main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // 3 | // 4 | 5 | 6 | using namespace std; 7 | 8 | #include 9 | 10 | #include "../nersuite_common/string_utils.h" 11 | #include "../nersuite_common/text_loader.h" 12 | #include "../nersuite_common/option_parser.h" 13 | #include "seg_rep_changer.h" 14 | #include "mp_corrector.h" 15 | 16 | 17 | typedef vector< string > V1_STR; 18 | typedef vector< V1_STR > V2_STR; 19 | 20 | 21 | void print_data( const V2_STR &one_sent ); 22 | void print_usage( char* argv[] ); 23 | 24 | 25 | 26 | int main( int argc, char* argv[] ) 27 | { 28 | 29 | // 1. Parse command line arguments 30 | NER::OPTION_PARSER opt_parser; 31 | opt_parser.parse( argc-1, &argv[1] ); 32 | 33 | // 1.1. Get mandatory arguments 34 | string str_tokCol = "", str_neCol = ""; 35 | if( (opt_parser.get_value( "-TOK_COL", str_tokCol ) == false) || 36 | (opt_parser.get_value( "-NE_COL", str_neCol ) == false) ) 37 | { 38 | print_usage( argv ); 39 | return -1; 40 | } 41 | int tok_col = atoi( str_tokCol.c_str() ), ne_col = atoi( str_neCol.c_str() ); 42 | 43 | // 1.2. Get optional arguments 44 | string multidoc_separator = ""; 45 | bool multidoc_mode = opt_parser.get_value("-multidoc", multidoc_separator); 46 | 47 | 48 | // 2. Handle NEs having mismatched parentheses 49 | int removed_NEs = 0; 50 | bool separator_read = false; 51 | 52 | V2_STR one_sent; 53 | MP_CORRECTOR mp_corrector; 54 | SEG_REP_CHANGER sp_changer; 55 | 56 | while( !cin.eof() ) { 57 | // 0. Read a sentence 58 | int sent_len = NER::get_sent( cin, one_sent, multidoc_separator, separator_read ); 59 | 60 | // 1. Skip blank lines 61 | if( sent_len == 0 ) { 62 | continue; 63 | } 64 | 65 | // 2. Pass it through the output stream if it is comment 66 | if( separator_read == true ) { 67 | print_data( one_sent ); 68 | continue; 69 | } 70 | 71 | // 3. Remove entities having mismatched parenthesis, and print them otherwise 72 | // 3.1. Convert the data from the IOB2 rep. to the IOBES rep. 73 | sp_changer.IOB2_to_IOBES(one_sent, ne_col); 74 | // 3.2. Remove NEs having mismatched parentheses 75 | removed_NEs += mp_corrector.remove_mismatches(one_sent, tok_col, ne_col); 76 | // 3.3. Recover to the IOB2 rep. 77 | sp_changer.IOBES_to_IOB2(one_sent, ne_col); 78 | // 3.4. Print it 79 | print_data( one_sent ); 80 | } 81 | 82 | cerr << endl; 83 | cerr << "The number of removed NEs: " << removed_NEs << endl; 84 | 85 | return 0; 86 | } 87 | 88 | 89 | void print_data( const V2_STR &one_sent ) 90 | { 91 | for( V2_STR::const_iterator i_row = one_sent.begin(); i_row != one_sent.end(); ++i_row ) { 92 | for( V1_STR::const_iterator i_col = i_row->begin(); i_col != i_row->end(); ++i_col ) { 93 | cout << *(i_col); 94 | if( (i_col + 1) != i_row->end() ) 95 | cout << "\t"; 96 | } 97 | cout << endl; 98 | } 99 | cout << endl; 100 | } 101 | 102 | 103 | 104 | void print_usage( char* argv[] ) 105 | { 106 | cerr << "Usage: " << argv[0] << " -TOK_COL [#column] -NE_COL [#column] -multidoc [SEP] < input file" << endl; 107 | cerr << " Mandatory arguments. " << endl; 108 | cerr << " -TOK_COL #column : the column index in which tokens appear (begins at 0)" <at(NE_COL); 22 | 23 | switch( ne_label[0] ) { 24 | case 'O': 25 | break; 26 | case 'B': 27 | beg = i_row; 28 | break; 29 | case 'I': 30 | break; 31 | case 'E': 32 | end = i_row + 1; 33 | removed_NEs += validate_to_remove( beg, end, TOK_COL, NE_COL ); // erase a current NE if it has a mismatched parenthesis 34 | break; 35 | case 'S': 36 | beg = i_row; 37 | end = i_row + 1; 38 | removed_NEs += validate_to_remove( beg, end, TOK_COL, NE_COL ); 39 | break; 40 | } 41 | } 42 | 43 | return removed_NEs; 44 | } 45 | 46 | int MP_CORRECTOR::validate_to_remove(const V2_STR::iterator beg, const V2_STR::iterator end, int TOK_COL, int NE_COL ) 47 | { 48 | int val = 0; 49 | 50 | for( V2_STR::iterator i_row = beg; i_row != end; ++i_row ) { 51 | string tok = i_row->at(TOK_COL); 52 | 53 | if( (tok == "(") || (tok == "[") || (tok == "{") ) { 54 | ++val; 55 | }else if( (tok == ")") || (tok == "]") || (tok == "}") ) { 56 | --val; 57 | } 58 | 59 | if( val < 0 ) // absolute error 60 | break; 61 | } 62 | 63 | if( val != 0 ) { 64 | for( V2_STR::iterator i_row = beg; i_row != end; ++i_row ) 65 | (i_row->at(NE_COL)) = "O"; 66 | 67 | return 1; 68 | }else { 69 | return 0; 70 | } 71 | } 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /src/correct_mp/mp_corrector.h: -------------------------------------------------------------------------------- 1 | // 2 | // 3 | // 4 | 5 | 6 | #ifndef _H_MISMATCHED_PARENTHESES_CORRECTOR_ 7 | #define _H_MISMATCHED_PARENTHESES_CORRECTOR_ 8 | 9 | 10 | using namespace std; 11 | 12 | #include 13 | #include 14 | 15 | 16 | // Correct NEs having mismatched parentheses in a given sentence 17 | class MP_CORRECTOR { 18 | private: 19 | typedef vector< string > V1_STR; 20 | typedef vector< V1_STR > V2_STR; 21 | 22 | int validate_to_remove( const V2_STR::iterator beg, const V2_STR::iterator end, int TOK_COL, int NE_COL ); 23 | 24 | public: 25 | int remove_mismatches(V2_STR &sent, int TOK_COL, int NE_COL); 26 | 27 | //TODO: implement different types of handling functions 28 | // extend_mismatch(); 29 | // split_mismatch(); 30 | }; 31 | 32 | 33 | #endif 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/correct_mp/seg_rep_changer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // 3 | // 4 | 5 | 6 | using namespace std; 7 | 8 | 9 | #include "seg_rep_changer.h" 10 | 11 | 12 | bool SEG_REP_CHANGER::IOB2_to_IOBES( V2_STR &sent, int NE_COL) 13 | { 14 | V2_STR::iterator new_end = sent.end() - 1; 15 | string cur_ne_label, next_ne_label; 16 | 17 | // 1. Handle NE labels before the last one 18 | for( V2_STR::iterator i_row = sent.begin(); i_row != new_end; ++i_row ) 19 | { 20 | cur_ne_label = i_row->at(NE_COL), next_ne_label = (i_row + 1)->at(NE_COL); 21 | 22 | if( cur_ne_label[0] == 'B' ) { 23 | if( (next_ne_label[0] == 'B') || ( next_ne_label[0] == 'O' ) ) { 24 | i_row->at(NE_COL) = 'S'; 25 | if( cur_ne_label.length() > 1 ) 26 | i_row->at(NE_COL) += cur_ne_label.substr(1, string::npos); 27 | } 28 | }else if( cur_ne_label[0] == 'I' ) { 29 | if( (next_ne_label[0] == 'B') || ( next_ne_label[0] == 'O' ) ) { 30 | i_row->at(NE_COL) = 'E'; 31 | if( cur_ne_label.length() > 1 ) 32 | i_row->at(NE_COL) += cur_ne_label.substr(1, string::npos); 33 | } 34 | }else if( cur_ne_label[0] == 'O' ) { 35 | // nothing 36 | }else { 37 | return false; // Invalid segment label; the conversion process failed! 38 | } 39 | } 40 | 41 | // 2. Handle the last NE label of a sentence 42 | cur_ne_label = sent.back().at(NE_COL); 43 | 44 | if( cur_ne_label[0] == 'B' ) { 45 | sent.back().at(NE_COL) = 'S'; 46 | if( cur_ne_label.length() > 1 ) 47 | sent.back().at(NE_COL) += cur_ne_label.substr(1, string::npos); 48 | }else if( cur_ne_label[0] == 'I' ) { 49 | sent.back().at(NE_COL) = 'E'; 50 | if( cur_ne_label.length() > 1 ) 51 | sent.back().at(NE_COL) += cur_ne_label.substr(1, string::npos); 52 | }else if( cur_ne_label[0] == 'O' ) { 53 | 54 | }else { 55 | return false; 56 | } 57 | 58 | return true; 59 | } 60 | 61 | 62 | 63 | bool SEG_REP_CHANGER::IOBES_to_IOB2( V2_STR &sent, int NE_COL) 64 | { 65 | string cur_ne_label; 66 | 67 | for( V2_STR::iterator i_row = sent.begin(); i_row != sent.end(); ++i_row ) 68 | { 69 | cur_ne_label = i_row->at(NE_COL); 70 | 71 | if( cur_ne_label[0] == 'S' ) { 72 | i_row->at(NE_COL) = 'B'; 73 | if( cur_ne_label.length() > 1 ) 74 | i_row->at(NE_COL) += cur_ne_label.substr(1, string::npos); 75 | }else if( cur_ne_label[0] == 'E' ) { 76 | i_row->at(NE_COL) = 'I'; 77 | if( cur_ne_label.length() > 1 ) 78 | i_row->at(NE_COL) += cur_ne_label.substr(1, string::npos); 79 | }else if ( (cur_ne_label[0] == 'B' ) || (cur_ne_label[0] == 'I' ) || (cur_ne_label[0] == 'O' ) ) { 80 | // same segment label 81 | }else { 82 | return false; 83 | } 84 | } 85 | 86 | return true; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /src/correct_mp/seg_rep_changer.h: -------------------------------------------------------------------------------- 1 | // 2 | // 3 | // 4 | 5 | 6 | #ifndef _H_IOB2_TO_IOBES_ 7 | #define _H_IOB2_TO_IOBES_ 8 | 9 | 10 | using namespace std; 11 | 12 | #include 13 | #include 14 | 15 | 16 | class SEG_REP_CHANGER { 17 | private: 18 | typedef vector< string > V1_STR; 19 | typedef vector< V1_STR > V2_STR; 20 | 21 | public: 22 | // To a finer-grained rep. 23 | bool IOB2_to_IOBES( V2_STR &sent, int NE_COL ); // Change the IOB2 representation to the IOBES 24 | 25 | // To a coarser-grained rep. 26 | bool IOBES_to_IOB2( V2_STR &sent, int NE_COL ); // Change the IOBES representation to the IOB2 27 | 28 | }; 29 | 30 | 31 | #endif 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/dictionary_compiler/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.1.1.1 2010/12/17 07:27:40 hccho Exp $ 2 | 3 | bin_PROGRAMS = nersuite_dic_compiler 4 | 5 | #man_MANS = nersuite.dic.compiler.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | nersuite_dic_compiler_SOURCES = \ 12 | main.cpp 13 | 14 | nersuite_dic_compiler_CPPFLAGS = @CFLAGS@ 15 | INCLUDES = -Icdbpp-1.1/include @INCLUDES@ 16 | 17 | #AM_CFLAGS = @CFLAGS@ 18 | #INCLUDES = @INCLUDES@ 19 | #AM_LDFLAGS = @LDFLAGS@ 20 | nersuite_dic_compiler_LDADD = ../nersuite_common/libnersuite_common.a 21 | 22 | -------------------------------------------------------------------------------- /src/dictionary_compiler/dictionary_compiler.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {A807E77C-3E9F-4478-9584-C90D95E2B944} 15 | dictionary_compiler 16 | 17 | 18 | 19 | Application 20 | true 21 | MultiByte 22 | 23 | 24 | Application 25 | false 26 | true 27 | MultiByte 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | nersuite_dic_compiler 41 | 42 | 43 | nersuite_dic_compiler 44 | 45 | 46 | 47 | Level3 48 | Disabled 49 | $(ProjectDir)cdbpp-1.1\include;%(AdditionalIncludeDirectories) 50 | 51 | 52 | true 53 | $(OutDir)\nersuite_common.lib 54 | 55 | 56 | 57 | 58 | Level3 59 | MaxSpeed 60 | true 61 | true 62 | $(ProjectDir)cdbpp-1.1\include;%(AdditionalIncludeDirectories) 63 | 64 | 65 | true 66 | true 67 | true 68 | $(OutDir)\nersuite_common.lib 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /src/dictionary_compiler/dictionary_compiler.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /src/dictionary_compiler/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * dictionary_compiler main 3 | * 4 | * Copyright (c) 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * * Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * * Neither the names of the authors nor the names of its contributors 15 | * may be used to endorse or promote products derived from this 16 | * software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 22 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | #include 31 | #include 32 | #include 33 | #include "../nersuite_common/dictionary.h" 34 | #include "../nersuite_common/option_parser.h" 35 | 36 | using namespace std; 37 | 38 | void print_usage() 39 | { 40 | cerr << 41 | "usage: nersuite_dic_compiler [options] \n" 42 | " Text dictionary should be a text file containing dictionary entries,\n" 43 | " one per line with the following tab-separated format:\n" 44 | " Named_Entity Class1 Class2 Class3 ...\n" 45 | " Options:\n" 46 | " -n : Normalization type for dictionary matching\n" 47 | " can be \"none\" or any combination of \"c\", \"n\", \"s\", \"t\"\n" 48 | " none (default): No normalization\n" 49 | " c: Case insensitive (convert all letters to lowercase)\n" 50 | " cn: Case AND Number insensitive (convert all numbers to \'0\')\n" 51 | " cns: Case AND Number AND Symbol insensitive (convert all symbols to \'_\')\n" 52 | " t: Use token-base matching\n"; 53 | } 54 | 55 | int main(int argc, char *argv[]) 56 | { 57 | NER::OPTION_PARSER opt_parser; 58 | opt_parser.parse(argc, argv); 59 | const vector& args = opt_parser.get_args(); 60 | if (args.size() != 3) 61 | { 62 | print_usage(); 63 | exit(1); 64 | } 65 | int normalize_type = NER::NormalizeNone; 66 | string normalize_option; 67 | if (opt_parser.get_value("-n", normalize_option)) 68 | { 69 | if (normalize_option == "none") 70 | { 71 | // do nothing 72 | } 73 | else 74 | { 75 | if (normalize_option.find('c') != string::npos) 76 | { 77 | normalize_type |= NER::NormalizeCase; 78 | } 79 | if (normalize_option.find('n') != string::npos) 80 | { 81 | normalize_type |= NER::NormalizeNumber; 82 | } 83 | if (normalize_option.find('s') != string::npos) 84 | { 85 | normalize_type |= NER::NormalizeSymbol; 86 | } 87 | if (normalize_option.find('t') != string::npos) 88 | { 89 | normalize_type |= NER::NormalizeToken; 90 | } 91 | } 92 | } 93 | 94 | try 95 | { 96 | NER::Dictionary dc(args[2]); 97 | dc.build(args[1], normalize_type); 98 | } 99 | catch (const exception& ex) 100 | { 101 | cerr << ex.what(); 102 | } 103 | 104 | return 0; 105 | } 106 | -------------------------------------------------------------------------------- /src/dictionary_tagger/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.1.1.1 2010/12/17 07:27:40 hccho Exp $ 2 | 3 | bin_PROGRAMS = nersuite_dic_tagger 4 | 5 | #man_MANS = nersuite.dic.tagger.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | nersuite_dic_tagger_SOURCES = \ 12 | sentence_tagger.cpp \ 13 | sentence_tagger.h \ 14 | main.cpp 15 | 16 | nersuite_dic_tagger_CPPFLAGS = @CFLAGS@ 17 | INCLUDES = -Icdbpp-1.1/include @INCLUDES@ 18 | 19 | #AM_CFLAGS = @CFLAGS@ 20 | #INCLUDES = @INCLUDES@ 21 | #AM_LDFLAGS = @LDFLAGS@ 22 | nersuite_dic_tagger_LDADD = ../nersuite_common/libnersuite_common.a 23 | -------------------------------------------------------------------------------- /src/dictionary_tagger/dictionary_tagger.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {34826CF1-7ABC-40BD-907E-92B03E166D63} 15 | dictionary_tagger 16 | 17 | 18 | 19 | Application 20 | true 21 | MultiByte 22 | 23 | 24 | Application 25 | false 26 | true 27 | MultiByte 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | nersuite_dic_tagger 41 | 42 | 43 | nersuite_dic_tagger 44 | 45 | 46 | 47 | Level3 48 | Disabled 49 | $(ProjectDir)cdbpp-1.1\include\ 50 | 51 | 52 | true 53 | $(OutDir)nersuite_common.lib 54 | 55 | 56 | 57 | 58 | Level3 59 | MaxSpeed 60 | true 61 | true 62 | $(ProjectDir)cdbpp-1.1\include\ 63 | 64 | 65 | true 66 | true 67 | true 68 | $(OutDir)nersuite_common.lib 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/dictionary_tagger/dictionary_tagger.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /src/gtagger/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.2 2011/02/25 15:21:30 hccho Exp $ 2 | 3 | bin_PROGRAMS = nersuite_gtagger 4 | 5 | #man_MANS = nersuite.gtagger.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | nersuite_gtagger_SOURCES = \ 12 | run.gtagger.cpp \ 13 | option_parser/option_parser.h \ 14 | ../nersuite_common/text_loader.h \ 15 | ../nersuite_common/text_loader.cpp \ 16 | geniatagger-3.0.1/bidir.cpp \ 17 | geniatagger-3.0.1/chunking.cpp \ 18 | geniatagger-3.0.1/maxent.cpp \ 19 | geniatagger-3.0.1/maxent.h \ 20 | geniatagger-3.0.1/morph.cpp \ 21 | geniatagger-3.0.1/tokenize.cpp \ 22 | geniatagger-3.0.1/postag.cpp \ 23 | geniatagger-3.0.1/common.h 24 | 25 | nersuite_gtagger_CPPFLAGS = -Wno-deprecated @CFLAGS@ 26 | INCLUDES = -I./geniatagger-3.0.1 -I./option_parser -I../nersuite_common 27 | 28 | #AM_CFLAGS = @CFLAGS@ 29 | #INCLUDES = @INCLUDES@ 30 | #AM_LDFLAGS = @LDFLAGS@ 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/gtagger/geniatagger-3.0.1/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * $Id: common.h,v 1.1.1.1 2010/12/17 07:27:40 hccho Exp $ 3 | */ 4 | 5 | #ifndef __POSTAGGER_COMMON_H_ 6 | #define __POSTAGGER_COMMON_H_ 7 | 8 | #include 9 | #include 10 | 11 | struct Token 12 | { 13 | std::string str; 14 | std::string pos; 15 | std::string prd; 16 | std::string cprd;// for chunking 17 | std::string tag; // for chunking 18 | std::string ne; 19 | Token(std::string s, std::string p) : str(s), pos(p) {} 20 | }; 21 | 22 | typedef std::vector Sentence; 23 | 24 | 25 | #endif 26 | 27 | /* 28 | * $Log: common.h,v $ 29 | * Revision 1.1.1.1 2010/12/17 07:27:40 hccho 30 | * 31 | * 32 | * Revision 1.1 2004/12/21 13:54:45 tsuruoka 33 | * add bidir.cpp 34 | * 35 | */ 36 | -------------------------------------------------------------------------------- /src/gtagger/geniatagger-3.0.1/tokenize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | static void 8 | replace(string & s, const string & s1, const string & s2, const char skip = 0); 9 | 10 | void 11 | tokenize(const string & s1, list & lt) 12 | { 13 | lt.clear(); 14 | 15 | string s(s1); 16 | 17 | if (s[0] == '"') s.replace(0, 1, "`` "); 18 | replace(s, " \"", " `` "); 19 | replace(s, "(\"", "( `` "); 20 | replace(s, "[\"", "[ `` "); 21 | replace(s, "{\"", "{ `` "); 22 | replace(s, "<\"", "< `` "); 23 | 24 | replace(s, "...", " ... "); 25 | 26 | replace(s, ",", " , "); 27 | replace(s, ";", " ; "); 28 | replace(s, ":", " : "); 29 | replace(s, "@", " @ "); 30 | replace(s, "#", " # "); 31 | replace(s, "$", " $ "); 32 | replace(s, "%", " % "); 33 | replace(s, "&", " & "); 34 | 35 | int pos = s.size() - 1; 36 | while (pos > 0 && s[pos] == ' ') pos--; 37 | while (pos > 0) { 38 | char c = s[pos]; 39 | if (c == '[' || c == ']' || c == ')' || c == '}' || c == '>' || 40 | c == '"' || c == '\'') { 41 | pos--; continue; 42 | } 43 | break; 44 | } 45 | if (s[pos] == '.' && !(pos > 0 && s[pos-1] == '.')) s.replace(pos, 1, " ."); 46 | 47 | replace(s, "?", " ? "); 48 | replace(s, "!", " ! "); 49 | 50 | replace(s, "[", " [ "); 51 | replace(s, "]", " ] "); 52 | replace(s, "(", " ( "); 53 | replace(s, ")", " ) "); 54 | replace(s, "{", " { "); 55 | replace(s, "}", " } "); 56 | replace(s, "<", " < "); 57 | replace(s, ">", " > "); 58 | 59 | replace(s, "--", " -- "); 60 | 61 | s.replace(string::size_type(0), 0, " "); 62 | s.replace(s.size(), 0, " "); 63 | 64 | replace(s, "\"", " '' "); 65 | 66 | replace(s, "' ", " ' ", '\''); 67 | replace(s, "'s ", " 's "); 68 | replace(s, "'S ", " 'S "); 69 | replace(s, "'m ", " 'm "); 70 | replace(s, "'M ", " 'M "); 71 | replace(s, "'d ", " 'd "); 72 | replace(s, "'D ", " 'D "); 73 | replace(s, "'ll ", " 'll "); 74 | replace(s, "'re ", " 're "); 75 | replace(s, "'ve ", " 've "); 76 | replace(s, "n't ", " n't "); 77 | replace(s, "'LL ", " 'LL "); 78 | replace(s, "'RE ", " 'RE "); 79 | replace(s, "'VE ", " 'VE "); 80 | replace(s, "N'T ", " N'T "); 81 | 82 | replace(s, " Cannot ", " Can not "); 83 | replace(s, " cannot ", " can not "); 84 | replace(s, " D'ye ", " D' ye "); 85 | replace(s, " d'ye ", " d' ye "); 86 | replace(s, " Gimme ", " Gim me "); 87 | replace(s, " gimme ", " gim me "); 88 | replace(s, " Gonna ", " Gon na "); 89 | replace(s, " gonna ", " gon na "); 90 | replace(s, " Gotta ", " Got ta "); 91 | replace(s, " gotta ", " got ta "); 92 | replace(s, " Lemme ", " Lem me "); 93 | replace(s, " lemme ", " lem me "); 94 | replace(s, " More'n ", " More 'n "); 95 | replace(s, " more'n ", " more 'n "); 96 | replace(s, "'Tis ", " 'T is "); 97 | replace(s, "'tis ", " 't is "); 98 | replace(s, "'Twas ", " 'T was "); 99 | replace(s, "'twas ", " 't was "); 100 | replace(s, " Wanna ", " Wan na "); 101 | replace(s, " wanna ", " wanna "); 102 | 103 | istringstream is(s); 104 | string t; 105 | while (is >> t) { 106 | lt.push_back(t); 107 | } 108 | 109 | } 110 | 111 | static void 112 | replace(string & s, const string & s1, const string & s2, const char skip) 113 | { 114 | string::size_type pos = 0; 115 | while (1) { 116 | string::size_type i = s.find(s1, pos); 117 | if (i == string::npos) break; 118 | if (i > 0 && s[i-1] == skip) { 119 | pos = i + 1; 120 | continue; 121 | } 122 | s.replace(i, s1.size(), s2); 123 | pos = i + s2.size(); 124 | } 125 | 126 | } 127 | 128 | -------------------------------------------------------------------------------- /src/gtagger/gtagger.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {1FAB951B-B48A-4047-BAB9-84BE893A9AB6} 15 | gtagger 16 | 17 | 18 | 19 | Application 20 | true 21 | MultiByte 22 | 23 | 24 | Application 25 | false 26 | true 27 | MultiByte 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | nersuite_gtagger 41 | 42 | 43 | nersuite_gtagger 44 | 45 | 46 | 47 | Level3 48 | Disabled 49 | _MBCS;%(PreprocessorDefinitions) 50 | 51 | 52 | true 53 | 54 | 55 | 56 | 57 | 58 | 59 | Level3 60 | MaxSpeed 61 | true 62 | true 63 | _MBCS;%(PreprocessorDefinitions) 64 | 65 | 66 | true 67 | true 68 | true 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/gtagger/gtagger.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -d $(SolutionDir)../models_gtagger < $(SolutionDir)Debug/sample.tok.txt 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /src/gtagger/option_parser/option_parser.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Option Parser: 3 | * Parse an array of strings. It only consumes pairs of a parameter type and a parameter value. 4 | * A parameter type begins with '-' symbol and its corresponding parameter value is the next string. 5 | **/ 6 | 7 | 8 | #ifndef __OPTION_PARSER__ 9 | #define __OPTION_PARSER__ 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | // Define the structure of a parameter 18 | typedef struct { 19 | string name; 20 | string value; 21 | } PARAM; 22 | typedef vector V1_PARAM; 23 | 24 | 25 | class OPTION_PARSER { 26 | private: 27 | V1_PARAM params; // All parameters will be stored here. 28 | 29 | public: 30 | // Parse input parameters. 31 | int parse(int n, char* items[]) { 32 | int consumed = 0; 33 | PARAM param; 34 | 35 | for (int i = 0; i < n; ++i) { 36 | if (items[i][0] == '-') { 37 | // 1. Get parameter name. 38 | param.name = items[i]; 39 | ++consumed; 40 | 41 | // 2. Get parameter value 42 | if ((i+1) < n) { 43 | if(items[i+1][0] == '-') { 44 | param.value = ""; 45 | }else { 46 | param.value = items[i+1]; 47 | ++i; 48 | ++consumed; 49 | } 50 | }else { 51 | param.value = ""; 52 | } 53 | 54 | // 3. Put a parameter in the parameter container 55 | params.push_back(param); 56 | }else { // 2. No more parameters. 57 | break; 58 | } 59 | } 60 | return consumed; 61 | } 62 | 63 | // Get the value of a given parameter name. 64 | bool get_value(const string &name, string &value) { 65 | bool found = false; 66 | for (V1_PARAM::const_iterator citr = params.begin(); citr != params.end(); ++citr) { 67 | if (citr->name == name) { 68 | value = citr->value; 69 | found = true; 70 | } 71 | } 72 | return found; 73 | } 74 | 75 | // Output parameters. 76 | void output_params(void) { 77 | for (V1_PARAM::const_iterator citr = params.begin(); citr != params.end(); ++citr) { 78 | if (citr->value == "") 79 | cout << citr->name << endl; 80 | else 81 | cout << citr->name << " : " << citr->value << endl; 82 | } 83 | } 84 | 85 | }; 86 | 87 | 88 | #endif 89 | 90 | -------------------------------------------------------------------------------- /src/nersuite/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.2 2011/02/25 20:41:12 hccho Exp $ 2 | 3 | bin_PROGRAMS = nersuite 4 | 5 | #man_MANS = nersuite.main.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | nersuite_SOURCES = \ 12 | main.cpp \ 13 | nersuite.cpp \ 14 | nersuite.h \ 15 | typedefs.h \ 16 | FExtor.h \ 17 | FExtor.cpp \ 18 | crfsuite2.h \ 19 | crfsuite2.cpp 20 | 21 | nersuite_CPPFLAGS = @CFLAGS@ 22 | INCLUDES = @INCLUDES@ 23 | 24 | #AM_CFLAGS = @CFLAGS@ 25 | #INCLUDES = @INCLUDES@ 26 | #AM_LDFLAGS = @LDFLAGS@ 27 | #nersuite_CFLAGS = -I$(top_builddir)/lib/crf/include 28 | #nersuite_LDADD = $(top_builddir)/lib/crf/libcrf.la 29 | nersuite_LDADD = ../nersuite_common/libnersuite_common.a 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/nersuite/crfsuite-0.12/Debug/cqdb.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/src/nersuite/crfsuite-0.12/Debug/cqdb.lib -------------------------------------------------------------------------------- /src/nersuite/crfsuite-0.12/Debug/crf.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/src/nersuite/crfsuite-0.12/Debug/crf.lib -------------------------------------------------------------------------------- /src/nersuite/crfsuite-0.12/Release/cqdb.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/src/nersuite/crfsuite-0.12/Release/cqdb.lib -------------------------------------------------------------------------------- /src/nersuite/crfsuite-0.12/Release/crf.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nlplab/nersuite/9feb189969dddb7b60ad632cf3486a47ad150dd6/src/nersuite/crfsuite-0.12/Release/crf.lib -------------------------------------------------------------------------------- /src/nersuite/crfsuite-0.12/include/os.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Compatibility stuff among operating systems and compilers. 3 | * 4 | * Copyright (c) 2007-2010, Naoaki Okazaki 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * * Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * * Neither the names of the authors nor the names of its contributors 15 | * may be used to endorse or promote products derived from this 16 | * software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 22 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | /* $Id$ */ 32 | 33 | #ifndef __OS_H__ 34 | #define __OS_H__ 35 | 36 | 37 | //#define __SSE__ 1 38 | #define LBFGS_FLOAT 64 39 | 40 | #ifdef _MSC_VER 41 | /* Microsoft Visual C/C++ specific */ 42 | 43 | #define _CRT_SECURE_NO_WARNINGS 1 44 | #pragma warning(disable : 4996) 45 | 46 | #define alloca _alloca 47 | #define strdup _strdup 48 | #define open _open 49 | #define isfinite _finite 50 | #define snprintf _snprintf 51 | 52 | #ifndef __cplusplus 53 | /* Microsoft Visual C specific */ 54 | 55 | #define inline __forceinline 56 | 57 | #endif/*__cplusplus*/ 58 | 59 | #endif/*_MSC_VER*/ 60 | 61 | #endif/*__OS_H__*/ 62 | -------------------------------------------------------------------------------- /src/nersuite/crfsuite2.cpp: -------------------------------------------------------------------------------- 1 | #include "crfsuite2.h" 2 | #include "crfsuite.hpp" 3 | 4 | using namespace CRFSuite; 5 | 6 | size_t Trainer2::get_attribute_num() const 7 | { 8 | if (data != NULL) 9 | { 10 | return data->attrs->num(data->attrs); 11 | } 12 | return 0; 13 | } 14 | 15 | size_t Trainer2::get_label_num() const 16 | { 17 | if (data != NULL) 18 | { 19 | return data->labels->num(data->labels); 20 | } 21 | return 0; 22 | } 23 | 24 | size_t Trainer2::get_instance_num() const 25 | { 26 | if (data != NULL) 27 | { 28 | return data->num_instances; 29 | } 30 | return 0; 31 | } 32 | 33 | size_t Trainer2::get_item_num() const 34 | { 35 | if (data != NULL) 36 | { 37 | return static_cast(crfsuite_data_totalitems(data)); 38 | } 39 | return 0; 40 | } 41 | 42 | void Trainer2::message(const std::string& msg) 43 | { 44 | std::cerr << msg; 45 | } 46 | -------------------------------------------------------------------------------- /src/nersuite/crfsuite2.h: -------------------------------------------------------------------------------- 1 | #ifndef __CRFSUITE2_H 2 | #define __CRFSUITE2_H 3 | 4 | #include "crfsuite.h" 5 | #include "crfsuite_api.hpp" 6 | 7 | namespace CRFSuite 8 | { 9 | 10 | class Trainer2 : public Trainer 11 | { 12 | public: 13 | size_t get_attribute_num() const; 14 | 15 | size_t get_label_num() const; 16 | 17 | size_t get_instance_num() const; 18 | 19 | size_t get_item_num() const; 20 | 21 | virtual void message(const std::string& msg); 22 | }; 23 | 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /src/nersuite/typedefs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NERSuite 3 | * Typedefs for Feature Extractor 4 | * 5 | * Copyright (c) 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * * Neither the names of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 23 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #ifndef _TYPEDEFS_H 33 | #define _TYPEDEFS_H 34 | 35 | #include 36 | #include 37 | 38 | typedef std::vector< std::vector > V2_STR; 39 | typedef V2_STR::const_iterator V2_STR_citr; 40 | 41 | namespace NER 42 | { 43 | /** 44 | * @ingroup NERsuite 45 | * List of Feature Column Positions in the input/output file 46 | * 47 | * Positions vary according to the NERsuite's mode. 48 | * (BEG=1 in Learning mode, otherwise, BEG=0) 49 | */ 50 | struct COLUMN_INFO 51 | { 52 | /** 53 | * Beginning Position Feature Column 54 | */ 55 | int BEG; 56 | /** 57 | * End Position Feature Column 58 | */ 59 | int END; 60 | /** 61 | * Word (Surface Form) Feature Column 62 | */ 63 | int WORD; 64 | /** 65 | * Word (Lemma Form) Feature Column 66 | */ 67 | int LEMMA; 68 | /** 69 | * Part of Speech Feature Column 70 | */ 71 | int POS; 72 | /** 73 | * Chunk Feature Column 74 | */ 75 | int CHUNK; 76 | /** 77 | * Extra Dictionary Feature Column(s) 78 | * Indicates the first column of extra features 79 | */ 80 | int DIC; // The first column of dictionary(ies) 81 | }; 82 | } 83 | #endif 84 | -------------------------------------------------------------------------------- /src/nersuite_common/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.1.1.1 2010/12/17 07:27:40 hccho Exp $ 2 | 3 | lib_LIBRARIES = libnersuite_common.a 4 | 5 | #man_MANS = nersuite.common.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | libnersuite_common_a_SOURCES = \ 12 | nersuite_exception.h \ 13 | string_utils.h \ 14 | string_utils.cpp \ 15 | text_loader.h \ 16 | text_loader.cpp \ 17 | option_parser.h \ 18 | ne.h \ 19 | dictionary.h \ 20 | dictionary.cpp \ 21 | tokenizer.h \ 22 | tokenizer.cpp 23 | 24 | nersuite_common_CPPFLAGS = @CFLAGS@ 25 | INCLUDES = @INCLUDES@ 26 | 27 | #AM_CFLAGS = @CFLAGS@ 28 | #INCLUDES = @INCLUDES@ 29 | #AM_LDFLAGS = @LDFLAGS@ 30 | 31 | -------------------------------------------------------------------------------- /src/nersuite_common/ne.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NERSuite 3 | * Internal representation of an NE 4 | * 5 | * Copyright (c) 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * * Neither the names of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 23 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | #ifndef _NE_H 32 | #define _NE_H 33 | 34 | #include 35 | #include 36 | 37 | namespace NER 38 | { 39 | /** 40 | * @ingroup NERsuite 41 | */ 42 | /** NE chunk representer class 43 | */ 44 | struct NE 45 | { 46 | /** The start position of this chunk 47 | */ 48 | int begin; 49 | 50 | /** The end+1 position of this chunk 51 | */ 52 | int end; 53 | 54 | /** The Class list which this chunk is labelled as 55 | */ 56 | std::list classes; 57 | 58 | /** The similarity value (not used and set to 1.0) 59 | */ 60 | double sim; 61 | }; 62 | } 63 | #endif 64 | -------------------------------------------------------------------------------- /src/nersuite_common/nersuite_common.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {01E090E7-EDBC-4AD4-80C2-DCC1AF435C30} 15 | nersuite_common 16 | 17 | 18 | 19 | StaticLibrary 20 | true 21 | NotSet 22 | 23 | 24 | StaticLibrary 25 | false 26 | true 27 | NotSet 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | Level3 43 | Disabled 44 | 45 | 46 | true 47 | 48 | 49 | $(OutDir)$(TargetName)$(TargetExt) 50 | 51 | 52 | 53 | 54 | Level3 55 | MaxSpeed 56 | true 57 | true 58 | 59 | 60 | true 61 | true 62 | true 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/nersuite_common/nersuite_common.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /src/nersuite_common/nersuite_exception.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NERSuite exception 3 | * 4 | * Copyright (c) 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * * Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * * Neither the names of the authors nor the names of its contributors 15 | * may be used to endorse or promote products derived from this 16 | * software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 22 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | #ifndef _NERSUITE_EXCEPTION_H 31 | #define _NERSUITE_EXCEPTION_H 32 | 33 | #include 34 | #include 35 | 36 | namespace NER 37 | { 38 | /** 39 | * @ingroup NERsuite 40 | * NERsuite General Exception 41 | */ 42 | class nersuite_exception : public std::exception 43 | { 44 | std::string message; 45 | 46 | public: 47 | /** Construct an NERsuite exception object with a message 48 | * @param[in] _what A message describing the exception content 49 | */ 50 | nersuite_exception(const char* _what) 51 | { 52 | message = _what; 53 | } 54 | 55 | /** Destroy an NERsuite exception object 56 | */ 57 | virtual ~nersuite_exception() throw () 58 | { 59 | } 60 | 61 | /** Construct an NERsuite exception object with a message 62 | * @param[in] _what A message describing the exception content 63 | */ 64 | nersuite_exception(const std::string& _what) 65 | { 66 | message = _what; 67 | } 68 | 69 | /** Retrieve the exception message 70 | * @returns The message 71 | */ 72 | virtual const char* what() const throw() 73 | { 74 | return message.c_str(); 75 | } 76 | }; 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /src/nersuite_common/string_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NERSuite 3 | * string utility 4 | * 5 | * Copyright (c) 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * * Neither the names of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 23 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #ifndef _STRING_UTILS_H 33 | #define _STRING_UTILS_H 34 | 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | namespace NER 42 | { 43 | /** 44 | * @ingroup NERsuite 45 | */ 46 | /** Tokenize a string 47 | * param[out] V_STR List of string where output is pushed. (Cleared before processing) 48 | * param[in] one_line Input string representing a line of text 49 | * param[out] del A delimiter 50 | */ 51 | template 52 | int tokenize(T1 &V_STR, std::string &one_line, std::string del) 53 | { 54 | V_STR.clear(); 55 | 56 | int total_elem = 0; 57 | size_t beg = 0, end = one_line.find(del, 0); 58 | 59 | while(beg < one_line.length()) 60 | { 61 | if ((end = one_line.find(del, beg)) == std::string::npos) 62 | end = one_line.length(); 63 | 64 | V_STR.push_back(one_line.substr(beg, end - beg)); 65 | 66 | beg = end + 1; 67 | ++total_elem; 68 | } 69 | 70 | return total_elem; 71 | } 72 | 73 | extern void trim_ws(std::string &str); 74 | extern std::string int2str(int i); 75 | extern std::string int2strIDX(int idx); 76 | extern bool check_alphanum(const char ch); 77 | 78 | extern std::string make_lowercase(const std::string& str); 79 | extern std::string squeeze_nums(const std::string& str); 80 | extern std::string squeeze_syms(const std::string& str); 81 | extern std::string squeeze_ws(const std::string& str); 82 | 83 | } 84 | #endif 85 | 86 | -------------------------------------------------------------------------------- /src/nersuite_common/text_loader.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NERSuite 3 | * Text loader 4 | * 5 | * Copyright (c) 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * * Neither the names of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 23 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #ifndef _TEXT_LOADER_H 33 | #define _TEXT_LOADER_H 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | #include 40 | #include 41 | 42 | typedef std::vector< std::vector > V2_STR; 43 | 44 | namespace NER 45 | { 46 | /** 47 | * @ingroup NERsuite 48 | */ 49 | /** 50 | * Read a sentence from a stream and construct Token List for the sentence. 51 | * 52 | * Each line consists of tab-separated columns (label + feature list). 53 | * First column is the label assigned for the feature list. 54 | * The following columns are the feature list. 55 | * 56 | * @param[in] cin Input stream (Each sentence must be separated with an empty line.) 57 | * @param[in,out] one_sent Token List for a sentence. 58 | * Any existing contents are cleared before processing. 59 | * @returns count of tokens in the sentence 60 | * 61 | */ 62 | extern int get_sent(std::istream &cin, V2_STR &one_sent); 63 | 64 | /** 65 | * @ingroup NERsuite 66 | */ 67 | /** 68 | * Read a sentence from a stream and construct Token List for the sentence. 69 | * 70 | * Each line consists of tab-separated columns (label + feature list). 71 | * First column is the label assigned for the feature list. 72 | * The following columns are the feature list. 73 | * Accepts also lines containing only the given document break marker. 74 | * 75 | * @param[in] cin Input stream (Each sentence must be separated with an empty line.) 76 | * @param[in,out] one_sent Token List for a sentence. 77 | * Any existing contents are cleared before processing. 78 | * @param[in] multidoc_separator String marking document break. 79 | * @param[out] separator_read Flag marking whether a document break was read. 80 | * @returns count of tokens in the sentence 81 | * 82 | */ 83 | extern int get_sent(std::istream &cin, V2_STR &one_sent, const std::string &multidoc_separator, bool &separator_read); 84 | 85 | } 86 | 87 | #endif 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/nersuite_common/tokenizer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * A sentence tokenizer class with a user-defined delimiters 3 | * 4 | * Copyright (c) 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * * Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * * Neither the names of the authors nor the names of its contributors 15 | * may be used to endorse or promote products derived from this 16 | * software without specific prior written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 22 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | #ifndef _NERSUITE_TOKENIZER_H 31 | #define _NERSUITE_TOKENIZER_H 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | namespace NER 40 | { 41 | typedef std::vector< std::string > V1_STR; 42 | typedef std::vector< V1_STR > V2_STR; 43 | 44 | /** 45 | * @ingroup NERsuite 46 | * Tokenizer Utility 47 | */ 48 | class Tokenizer 49 | { 50 | private: 51 | int splitter( const std::string &trimmed_sent, V2_STR &data ); 52 | void mark_pos( const std::string &raw_sent, V2_STR &data, size_t init_offset = 0 ); 53 | 54 | std::string trim_ws( const std::string &raw_sent ); 55 | size_t find_token_end( const std::string &trimmed_sent, const size_t beg ); 56 | 57 | public: 58 | /** Tokenize input text and generate the (begin_pos, end_pos, token) triplet for each token. 59 | * @param[in] raw_sent Input sentence 60 | * @param[in,out] data A vector to store the result 61 | * @param[in] init_offset The offset value added to all begin_pos 62 | * @returns the count of tokens generated. 63 | */ 64 | int tokenize( const std::string &raw_sent, V2_STR &data, size_t init_offset = 0 ); 65 | 66 | }; 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/nersuitetest/NERSuiteTest.h: -------------------------------------------------------------------------------- 1 | #ifndef _NERSUITE_TEST_H 2 | #define _NERSUITE_TEST_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "TestUtil.h" 8 | #include "../nersuite/nersuite.h" 9 | #include "fext_helper.h" 10 | 11 | using namespace std; 12 | using namespace NER; 13 | 14 | void TestNERSuite_LearnArgs() 15 | { 16 | char* args[] = { "-m", "model.m" }; 17 | Suite nersuite(2, args); 18 | } 19 | 20 | void TestNERSuite_TagArgs() 21 | { 22 | char* args[] = { "-m", "model.m" }; 23 | Suite nersuite(2, args); 24 | } 25 | 26 | void TestNERSuite_Learn() 27 | { 28 | FILE* fp = fopen("tmp_learn.txt", "wt"); 29 | fputs("B-TestTag 0 2 ABC ABC NN O O O O O O O O O\n", fp); 30 | fputs("B-TestTag 3 5 DEF DEF NN O O O O O O O O O\n", fp); 31 | fputs("B-TestTag 6 8 GHI GHI NN O O O O O O O O O\n", fp); 32 | fclose(fp); 33 | 34 | extern string CRFSuiteDummyLog; 35 | CRFSuiteDummyLog.clear(); 36 | 37 | char* args[] = { "-m", "model.m", "-f", "tmp_learn.txt" }; 38 | Suite nersuite(4, args); 39 | if (nersuite.learn() != 0) 40 | { 41 | throw new TestException("assert failed", __FILE__, __LINE__); 42 | } 43 | 44 | extern string CRFSuiteDummyLog; 45 | if (CRFSuiteDummyLog != 46 | "Trainer::Trainer;" 47 | "Trainer::select(lbfgs,crf1d);" 48 | "Trainer::params;" 49 | "Trainer::set(c2,1.000000);" 50 | "Trainer::append;" 51 | "Trainer2::get_instance_num;" 52 | "Trainer2::get_item_num;" 53 | "Trainer2::get_label_num;" 54 | "Trainer2::get_attribute_num;" 55 | "Trainer::train(model.m,-1);" 56 | "Trainer::~Trainer;") 57 | { 58 | throw new TestException("assert failed", __FILE__, __LINE__); 59 | } 60 | } 61 | 62 | void TestNERSuite_Tag() 63 | { 64 | FILE* fp = fopen("tmp_tag.txt", "wt"); 65 | fputs("0 2 ABC ABC NN O O O O O O O O O\n", fp); 66 | fputs("3 5 DEF DEF NN O O O O O O O O O\n", fp); 67 | fputs("6 8 GHI GHI NN O O O O O O O O O\n", fp); 68 | fclose(fp); 69 | 70 | extern string CRFSuiteDummyLog; 71 | CRFSuiteDummyLog.clear(); 72 | 73 | char* args[] = { "-m", "model.m", "-f", "tmp_tag.txt" }; 74 | Suite nersuite(4, args); 75 | if (nersuite.tag() != 0) 76 | { 77 | throw new TestException("assert failed", __FILE__, __LINE__); 78 | } 79 | 80 | if (CRFSuiteDummyLog != 81 | "Tagger::Tagger;" 82 | "Tagger::open(model.m);" 83 | "Tagger::labels;" 84 | "Tagger::tag;" 85 | "Tagger::~Tagger;" 86 | ) 87 | { 88 | throw new TestException("assert failed", __FILE__, __LINE__); 89 | } 90 | } 91 | 92 | #endif -------------------------------------------------------------------------------- /src/nersuitetest/TestUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef _TEST_UTIL_H 2 | #define _TEST_UTIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | class TestException : public exception 11 | { 12 | private: 13 | string m_What; 14 | 15 | public: 16 | TestException(const char* msg, const char* file, int line) 17 | { 18 | ostringstream str; 19 | str << file << "(" << line << ") : " << msg; 20 | m_What = str.str(); 21 | } 22 | 23 | virtual const char* what() const 24 | { 25 | return m_What.c_str(); 26 | } 27 | }; 28 | 29 | #endif -------------------------------------------------------------------------------- /src/nersuitetest/crfsuite_dummy.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../nersuite/CRFSuite2.h" 3 | 4 | using namespace std; 5 | using namespace CRFSuite; 6 | 7 | string CRFSuiteDummyLog; 8 | 9 | Trainer::Trainer() 10 | { 11 | CRFSuiteDummyLog.append("Trainer::Trainer;"); 12 | } 13 | 14 | Trainer::~Trainer() 15 | { 16 | CRFSuiteDummyLog.append("Trainer::~Trainer;"); 17 | } 18 | 19 | void Trainer::set(const std::string& name, const std::string& value) 20 | { 21 | CRFSuiteDummyLog.append("Trainer::set(" + name + "," + value + ");"); 22 | } 23 | 24 | StringList Trainer::params() 25 | { 26 | CRFSuiteDummyLog.append("Trainer::params;"); 27 | static StringList result; 28 | return result; 29 | } 30 | 31 | int Trainer::train(const std::string& model, int holdout) 32 | { 33 | static char msg[1024]; 34 | sprintf(msg, "Trainer::train(%s,%d);", model.c_str(), holdout); 35 | CRFSuiteDummyLog.append(msg); 36 | return 0; 37 | } 38 | 39 | bool Trainer::select(const std::string& algorithm, const std::string& type) 40 | { 41 | CRFSuiteDummyLog.append("Trainer::select(" + algorithm + "," + type + ");"); 42 | return true; 43 | } 44 | 45 | void Trainer::append(const ItemSequence& xseq, const StringList& yseq, int group) 46 | { 47 | CRFSuiteDummyLog.append("Trainer::append;"); 48 | } 49 | 50 | void Trainer::message(const std::string& msg) 51 | { 52 | CRFSuiteDummyLog.append("Trainer::message(" + msg + ");"); 53 | } 54 | 55 | size_t Trainer2::get_attribute_num() const 56 | { 57 | CRFSuiteDummyLog.append("Trainer2::get_attribute_num;"); 58 | return 0; 59 | } 60 | 61 | size_t Trainer2::get_label_num() const 62 | { 63 | CRFSuiteDummyLog.append("Trainer2::get_label_num;"); 64 | return 0; 65 | } 66 | 67 | size_t Trainer2::get_instance_num() const 68 | { 69 | CRFSuiteDummyLog.append("Trainer2::get_instance_num;"); 70 | return 0; 71 | } 72 | 73 | size_t Trainer2::get_item_num() const 74 | { 75 | CRFSuiteDummyLog.append("Trainer2::get_item_num;"); 76 | return 0; 77 | } 78 | 79 | void Trainer2::message(const std::string& msg) 80 | { 81 | CRFSuiteDummyLog.append("Trainer2::message(" + msg + ");"); 82 | } 83 | 84 | Tagger::Tagger() 85 | { 86 | CRFSuiteDummyLog.append("Tagger::Tagger;"); 87 | } 88 | 89 | Tagger::~Tagger() 90 | { 91 | CRFSuiteDummyLog.append("Tagger::~Tagger;"); 92 | } 93 | 94 | bool Tagger::open(const std::string& name) 95 | { 96 | CRFSuiteDummyLog.append("Tagger::open(" + name + ");"); 97 | return true; 98 | } 99 | 100 | StringList Tagger::labels() 101 | { 102 | CRFSuiteDummyLog.append("Tagger::labels;"); 103 | static StringList dummy; 104 | return dummy; 105 | } 106 | 107 | StringList Tagger::tag(const ItemSequence& xseq) 108 | { 109 | CRFSuiteDummyLog.append("Tagger::tag;"); 110 | static StringList dummy; 111 | return dummy; 112 | } 113 | 114 | -------------------------------------------------------------------------------- /src/nersuitetest/fext_helper.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "fext_helper.h" 4 | 5 | using namespace std; 6 | 7 | namespace NER 8 | { 9 | void SetupColumnInfoHelper(COLUMN_INFO& cinfo) 10 | { 11 | cinfo.BEG = 0; 12 | cinfo.END = 1; 13 | cinfo.WORD = 2; 14 | cinfo.LEMMA = 3; 15 | cinfo.POS = 4; 16 | cinfo.CHUNK = 5; 17 | cinfo.DIC = 6; 18 | } 19 | 20 | vector CreateWordHelper(const char* beg, const char* end, const char* surface, const char* lemma, const char* pos, const char* chunk, const char* dic) 21 | { 22 | vector word; 23 | word.push_back(beg); 24 | word.push_back(end); 25 | word.push_back(surface); 26 | word.push_back(lemma); 27 | word.push_back(pos); 28 | word.push_back(chunk); 29 | word.push_back(dic); 30 | return word; 31 | } 32 | 33 | void FeatureToStringHelper(const V2_STR& feats, vector& feats_str) 34 | { 35 | for (V2_STR_citr i = feats.begin(); i != feats.end(); ++i) 36 | { 37 | ostringstream str; 38 | for (vector::const_iterator j = (*i).begin(); j != (*i).end(); ++j) 39 | { 40 | str << *j; 41 | str << ","; 42 | } 43 | feats_str.push_back(str.str()); 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /src/nersuitetest/fext_helper.h: -------------------------------------------------------------------------------- 1 | #ifndef _FEXT_HELPER_H 2 | #define _FEXT_HELPER_H 3 | 4 | #include 5 | #include 6 | #include "../nersuite/FExtor.h" 7 | 8 | namespace NER 9 | { 10 | void FeatureToStringHelper(const V2_STR& feats, std::vector& feats_str); 11 | void SetupColumnInfoHelper(COLUMN_INFO& cinfo); 12 | std::vector CreateWordHelper(const char* beg, const char* end, const char* word, const char* lemma, const char* pos, const char* chunk, const char* dic); 13 | } 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/nersuitetest/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "NERSuiteTest.h" 6 | #include "FExtorTest.h" 7 | #include "DictionaryTest.h" 8 | #include "SentenceTaggerTest.h" 9 | 10 | using namespace std; 11 | 12 | typedef void (*TestFunc)(); 13 | #define REGISTER_TESTFUNC(table, funcname) table[#funcname] = funcname 14 | 15 | int main() 16 | { 17 | // Register Tests 18 | map TestFuncTable; 19 | 20 | REGISTER_TESTFUNC(TestFuncTable, TestNERSuite_LearnArgs); 21 | REGISTER_TESTFUNC(TestFuncTable, TestNERSuite_TagArgs); 22 | REGISTER_TESTFUNC(TestFuncTable, TestNERSuite_Tag); 23 | REGISTER_TESTFUNC(TestFuncTable, TestNERSuite_Learn); 24 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorWordFeature); 25 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorLemmaFeature); 26 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature); 27 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Greek); 28 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Comma); 29 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Period); 30 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Hyphon); 31 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Slash); 32 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_OpenSB); 33 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_CloseSB); 34 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_OpenP); 35 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_CloseP); 36 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Colon); 37 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Semicolon); 38 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Percentage); 39 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorOrthoFeature_Apostrophe); 40 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorPOSFeature); 41 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorLemmaPOSFeature); 42 | REGISTER_TESTFUNC(TestFuncTable, TestFExtorChunkFeature); 43 | REGISTER_TESTFUNC(TestFuncTable, TestDictionaryBuild_NoNormalization); 44 | REGISTER_TESTFUNC(TestFuncTable, TestDictionaryBuild_CaseNormalization); 45 | REGISTER_TESTFUNC(TestFuncTable, TestDictionaryBuild_CaseAndNumberNormalization); 46 | REGISTER_TESTFUNC(TestFuncTable, TestDictionaryBuild_CaseAndNumberAndSymbolNormalization); 47 | REGISTER_TESTFUNC(TestFuncTable, TestDictionaryBuild_TokenizerNormalization); 48 | REGISTER_TESTFUNC(TestFuncTable, TestSentenceTagger_NormalizeNone); 49 | REGISTER_TESTFUNC(TestFuncTable, TestSentenceTagger_NormalizeCase); 50 | REGISTER_TESTFUNC(TestFuncTable, TestSentenceTagger_NormalizeNumber); 51 | REGISTER_TESTFUNC(TestFuncTable, TestSentenceTagger_NormalizeSymbol); 52 | REGISTER_TESTFUNC(TestFuncTable, TestSentenceTagger_TokenBase); 53 | REGISTER_TESTFUNC(TestFuncTable, TestSentenceTagger_TokenBase); 54 | 55 | // Run the Test List 56 | size_t ntests = TestFuncTable.size(); 57 | size_t n = 0; 58 | size_t success_count = 0; 59 | for (map::iterator i = TestFuncTable.begin(); i != TestFuncTable.end(); ++i) 60 | { 61 | n ++; 62 | try 63 | { 64 | (*(i->second))(); 65 | } 66 | catch (exception* ex) 67 | { 68 | cerr << "[" << n << "/" << ntests << "] Failed in test: " << (i->first) << endl; 69 | const char* p = ex->what(); 70 | cerr << ex->what() << endl; 71 | delete ex; 72 | continue; 73 | } 74 | cerr << "[" << n << "/" << ntests << "] Passed : " << (i->first) << endl; 75 | success_count ++; 76 | } 77 | 78 | cerr << endl << "Test Completed. Total: " << n << ", Success: " << success_count << endl; 79 | return 0; 80 | } 81 | -------------------------------------------------------------------------------- /src/nersuitetest/nersuitetest.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | -------------------------------------------------------------------------------- /src/tokenizer/Makefile.am: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.am,v 1.1.1.1 2010/12/17 07:27:40 hccho Exp $ 2 | 3 | bin_PROGRAMS = nersuite_tokenizer 4 | 5 | #man_MANS = nersuite.tokenizer.1 6 | 7 | #EXTRA_DIST = ${man_MANS} 8 | 9 | #EXTRA_DIST = 10 | 11 | nersuite_tokenizer_SOURCES = \ 12 | run.tokenizer.cpp 13 | 14 | nersuite_tokenizer_CPPFLAGS = @CFLAGS@ 15 | INCLUDES = @INCLUDES@ 16 | 17 | #AM_CFLAGS = @CFLAGS@ 18 | #INCLUDES = @INCLUDES@ 19 | #AM_LDFLAGS = @LDFLAGS@ 20 | nersuite_tokenizer_LDADD = ../nersuite_common/libnersuite_common.a 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/tokenizer/run.tokenizer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // A tester for a sentence_tokenizer class 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "../nersuite_common/tokenizer.h" 10 | 11 | 12 | using namespace std; 13 | using namespace NER; 14 | 15 | 16 | int main(int argc, char* argv[]) 17 | { 18 | if( argc >= 2 ) { 19 | for( int i=1; i= 3 ) { 31 | for( int j=1; jbegin(); i_col != i_row->end(); ++i_col) { 87 | cout << *i_col; 88 | if( (i_col + 1) != i_row->end() ) 89 | cout << "\t"; 90 | } 91 | cout << endl; 92 | } 93 | cout << endl; 94 | } 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /src/tokenizer/tokenizer.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {D97F9C67-1DC4-42B0-8AC6-24B6262FC7FC} 15 | tokenizer 16 | 17 | 18 | 19 | Application 20 | true 21 | MultiByte 22 | 23 | 24 | Application 25 | false 26 | true 27 | MultiByte 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | nersuite_tokenizer 41 | 42 | 43 | nersuite_tokenizer 44 | 45 | 46 | 47 | Level3 48 | Disabled 49 | 50 | 51 | true 52 | $(OutDir)nersuite_common.lib 53 | 54 | 55 | 56 | 57 | Level3 58 | MaxSpeed 59 | true 60 | true 61 | 62 | 63 | true 64 | true 65 | true 66 | $(OutDir)nersuite_common.lib 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/tokenizer/tokenizer.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | --------------------------------------------------------------------------------