├── README
├── README.md
├── crfsuite
    ├── AUTHORS
    ├── COPYING
    ├── ChangeLog
    ├── INSTALL
    ├── Makefile.am
    ├── README
    ├── autogen.sh
    ├── bench
    │   ├── accuracy.py
    │   ├── bench.py
    │   ├── bench_crfpp.py
    │   ├── bench_crfsgd.py
    │   ├── bench_crfsuite-0.11.py
    │   ├── bench_crfsuite.py
    │   ├── bench_mallet.py
    │   ├── bench_wapiti.py
    │   ├── collect.py
    │   ├── crfsuite_to_mallet.py
    │   └── plot_performance.py
    ├── configure.in
    ├── crfsuite.sln
    ├── doc
    │   ├── Doxyfile
    │   ├── footer.html
    │   └── header.html
    ├── example
    │   ├── chunking.py
    │   ├── crfutils.py
    │   ├── ner.py
    │   ├── pos.py
    │   └── template.py
    ├── frontend
    │   ├── Makefile.am
    │   ├── dump.c
    │   ├── frontend.vcxproj
    │   ├── frontend.vcxproj.user
    │   ├── iwa.c
    │   ├── iwa.h
    │   ├── learn.c
    │   ├── main.c
    │   ├── option.c
    │   ├── option.h
    │   ├── readdata.h
    │   ├── reader.c
    │   ├── readme.txt
    │   └── tag.c
    ├── genbinary.sh.in
    ├── include
    │   ├── Makefile.am
    │   ├── crfsuite.h
    │   ├── crfsuite.hpp
    │   ├── crfsuite_api.hpp
    │   └── os.h
    ├── lib
    │   ├── cqdb
    │   │   ├── COPYING
    │   │   ├── Makefile.am
    │   │   ├── Release
    │   │   │   ├── CL.read.1.tlog
    │   │   │   ├── CL.write.1.tlog
    │   │   │   ├── Lib-link.read.1.tlog
    │   │   │   ├── Lib-link.write.1.tlog
    │   │   │   ├── cl.command.1.tlog
    │   │   │   ├── cqdb.lastbuildstate
    │   │   │   ├── cqdb.log
    │   │   │   ├── cqdb.obj
    │   │   │   ├── lib.command.1.tlog
    │   │   │   ├── lookup3.obj
    │   │   │   └── vc100.pdb
    │   │   ├── cqdb.vcxproj
    │   │   ├── cqdb.vcxproj.user
    │   │   ├── doc
    │   │   │   ├── doxyfile
    │   │   │   └── footer.html
    │   │   ├── include
    │   │   │   └── cqdb.h
    │   │   ├── makedist.sh
    │   │   └── src
    │   │   │   ├── cqdb.c
    │   │   │   ├── lookup3.c
    │   │   │   └── main.c
    │   └── crf
    │   │   ├── Makefile.am
    │   │   ├── Release
    │   │       ├── CL.read.1.tlog
    │   │       ├── CL.write.1.tlog
    │   │       ├── Lib-link.read.1.tlog
    │   │       ├── Lib-link.write.1.tlog
    │   │       ├── cl.command.1.tlog
    │   │       ├── crf.lastbuildstate
    │   │       ├── crf.log
    │   │       ├── crf1d_context.obj
    │   │       ├── crf1d_encode.obj
    │   │       ├── crf1d_feature.obj
    │   │       ├── crf1d_model.obj
    │   │       ├── crf1d_tag.obj
    │   │       ├── crfsuite.obj
    │   │       ├── crfsuite_train.obj
    │   │       ├── dataset.obj
    │   │       ├── dictionary.obj
    │   │       ├── holdout.obj
    │   │       ├── lib.command.1.tlog
    │   │       ├── logging.obj
    │   │       ├── params.obj
    │   │       ├── quark.obj
    │   │       ├── rumavl.obj
    │   │       ├── train_arow.obj
    │   │       ├── train_averaged_perceptron.obj
    │   │       ├── train_l2sgd.obj
    │   │       ├── train_lbfgs.obj
    │   │       ├── train_passive_aggressive.obj
    │   │       └── vc100.pdb
    │   │   ├── crf.suo
    │   │   ├── crf.vcxproj
    │   │   ├── crf.vcxproj.user
    │   │   └── src
    │   │       ├── crf1d.h
    │   │       ├── crf1d_context.c
    │   │       ├── crf1d_encode.c
    │   │       ├── crf1d_feature.c
    │   │       ├── crf1d_model.c
    │   │       ├── crf1d_tag.c
    │   │       ├── crfsuite.c
    │   │       ├── crfsuite_internal.h
    │   │       ├── crfsuite_train.c
    │   │       ├── dataset.c
    │   │       ├── dictionary.c
    │   │       ├── holdout.c
    │   │       ├── logging.c
    │   │       ├── logging.h
    │   │       ├── params.c
    │   │       ├── params.h
    │   │       ├── quark.c
    │   │       ├── quark.h
    │   │       ├── rumavl.c
    │   │       ├── rumavl.h
    │   │       ├── train_arow.c
    │   │       ├── train_averaged_perceptron.c
    │   │       ├── train_l2sgd.c
    │   │       ├── train_lbfgs.c
    │   │       ├── train_passive_aggressive.c
    │   │       └── vecmath.h
    ├── modification.txt
    ├── swig
    │   ├── Makefile.am
    │   ├── crfsuite.cpp
    │   ├── export.i
    │   ├── perl
    │   │   ├── Makefile.PL.in
    │   │   ├── prepare.sh
    │   │   ├── sample_tag.pl
    │   │   └── sample_train.pl
    │   └── python
    │   │   ├── README
    │   │   ├── crfsuite.py
    │   │   ├── export_wrap.cpp
    │   │   ├── export_wrap.h
    │   │   ├── prepare.sh
    │   │   ├── sample_tag.py
    │   │   ├── sample_train.py
    │   │   └── setup.py
    └── win32
    │   ├── liblbfgs
    │       ├── lbfgs.h
    │       ├── lbfgs.lib
    │       └── lbfgs_debug.lib
    │   └── stdint.h
├── data
    ├── README
    ├── test_laptop
    ├── test_restaurant
    ├── train_laptop
    ├── train_restaurant
    └── train_restaurant_updated
├── mod_pycrfsuite
    ├── _pycrfsuite.pyx
    └── crfsuite_api.pxd
├── rnn
    ├── adagrad.py
    ├── adagrad_crf.py
    ├── crf_propagation.py
    └── propagation.py
├── train_RNCRF.py
├── train_depnn.py
└── util
    ├── 10depParse.py
    ├── 20dtreeLabel.py
    ├── 30word_embedding.py
    ├── __init__.py
    ├── data_semEval
        ├── aspectTerm_sample
        ├── opinion_sample
        └── sample.txt
    ├── dtree_util.py
    ├── gen_util.py
    ├── lexparser.sh
    └── math_util.py


/README:
--------------------------------------------------------------------------------
 1 | ********************README**************************************************************************
 2 | 
 3 | This is an instruction file for successfully running the RNCRF model of the paper
 4 | published in EMNLP 2016:
 5 | https://www.aclweb.org/anthology/D/D16/D16-1059.pdf
 6 | 
 7 | ****************************************************************************************************
 8 | 
 9 | This code makes use of "python-crfsuite", which wraps CRFsuite C++ API using Cython.
10 | Contributing:
11 | "python-crfsuite": https://python-crfsuite.readthedocs.io/en/latest/         licensed under MIT license
12 | "CRFsuite": http://www.chokkan.org/software/crfsuite/                        licensed under BSD license
13 | 
14 | The recursive neural network (RNN) is implemented using python based on QANTA for question answering:
15 | https://cs.umd.edu/~miyyer/qblearn/
16 | 
17 | ****************************************************************************************************
18 | 
19 | Please follow these steps to install CRF software and make corresponding modifications
20 | in order to be executable with RNN implementation in python.
21 | 
22 | 1. Download python-crfsuite package (this should also include original CRFsuite package)
23 | 
24 | 2. Replace original 'crfsuite' folder with the provided modified folder 'crfsuite'
25 | 
26 | 3. Make some modifications to the files in folder 'pycrfsuite':
27 |    - Replace the file '_pycrfsuite.pyx' with provided one with the same name in the folder 'mod_pycrfsuite'
28 |    - Replace the file 'crfsuite_api.pxd' with provided one with the same name in the folder 'mod_pycrfsuite'
29 | 
30 | 4. Run the following command to generate cpp file from pycrfsuite.pyx file:
31 |    $ ./update_cpp.sh
32 | 
33 | 5. Run the following command to install the modified version of python-crfsuite:
34 |    $ python setup.py install
35 | 
36 | ***************************************************************************************************
37 | 
38 | Now we are ready to build RNCRF model for  trainin and evaluation:
39 | 
40 | 1. Locate to folder 'util':
41 |    - Download Stanford dependency tree parser (stanford-corenlp-3.5.1)
42 |    - Run '10depParse.py' to generate dependency trees for each sentence using stanford parser
43 |    - Run '20dtreeLabel.py' to build tree object for each sentence 
44 |    - Run '30word_embedding.py' to generate pre-trained word embedding dictionary. In order to run this file, please make sure to obtain the pre-trained word vectors from word2vec first
45 | 
46 | 2. Run 'train_depnn.py' to pre-train recursive neural network without CRF first
47 | 
48 | 3. Run 'train_RNCRF_laptop.py' to train RNCRF and make evaluations.
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Recursive-Neural-Conditional-Random-Field
2 | Implementation and data for the published paper "Recursive Neural Conditional Random Fields for Aspect-based Sentiment Analysis".
3 | 


--------------------------------------------------------------------------------
/crfsuite/AUTHORS:
--------------------------------------------------------------------------------
1 | Naoaki Okazaki <okazaki at chokkan org>
2 | 


--------------------------------------------------------------------------------
/crfsuite/COPYING:
--------------------------------------------------------------------------------
 1 | The BSD license.
 2 | 
 3 | Copyright (c) 2007-2010, Naoaki Okazaki
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the names of the authors nor the names of its contributors
14 |       may be used to endorse or promote products derived from this
15 |       software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/crfsuite/ChangeLog:
--------------------------------------------------------------------------------
  1 | 2011-08-11  Naoaki Okazaki  <okazaki at chokkan org>
  2 | 	* CRFsuite 0.12
  3 | 	- [CORE] Optimized the implementation for faster training; approximately 1.4-1.5 x speed up.
  4 | 	- [CORE] Faster routine for computing exp(x) using SSE2.
  5 | 	- [CORE] Restructured the source code to separate routines for CRF graphical models and training algorithms; this is an initial attempt for implementing CRFs with different feature types (e.g., 2nd-order CRF, 1st-order transition features conditioned on observations) and different training algorithms.
  6 | 	- [CORE] Implemented new training algorithms: Averaged Perceptron, Passive Aggressive, and Adaptive Regularization of Weights (AROW).
  7 | 	- [CORE] Removed automatic generation of BOS/EOS features; one can use these features by inserting attributes to the first/last items (e.g., "__BOS__" at the first item and "__EOS__" at the last item).
  8 | 	- [CORE] Fixed some memory-leak problems.
  9 | 	- [CORE] Reduced memory usage in training.
 10 | 	- [CORE] Fixed a crash problem when the model file does not exist in tagging.
 11 | 	- [FRONTEND:LEARN] Training and test sets are maintained by group numbers; specify the group number for hold-out evaluation with "-e" option.
 12 | 	- [FRONTEND:LEARN] Training algorithm is now specified by "-a" option instead of "-p algorithm=".
 13 | 	- [FRONTEND:LEARN] Renamed some training parameters; for example, an L2 regularization coefficient is specified by "c2" instead of "regularization.sigma" (c2 = 0.5 / sigma * sigma; c1 = 1.0 / sigma).
 14 | 	- [FRONTEND:LEARN] Show the list of parameters, default values, and descriptions with "-H" option.
 15 | 	- [FRONTEND:LEARN] Removed the support of comment lines for simplicity; one may forget to escape '#' characters in a data set. CRFsuite now does not handle '#' as a special character.
 16 | 	- [FRONTEND:TAGGER] Output probabilities of predicted sequences with "-p" option.
 17 | 	- [FRONTEND:TAGGER] Output marginal probabilities of predicted items with "-i" option.
 18 | 	- [API] Numerous changes in API for the enhancements.
 19 | 	- [API] Renamed the library name "libcrf" to "libcrfsuite".
 20 | 	- [API] Renamed the prefix "crf_" to "crfsuite_" in structure and function names.
 21 | 	- [API] Implemented a high-level and easy-to-use API for C++/SWIG (crfsuite.hpp and crfsuite_api.hpp).
 22 | 	- [API] Implemented the Python SWIG module and sample programs; writing a tagger is very easy with this module.
 23 | 	- [SAMPLE] Rewritten samples.
 24 | 	- [SAMPLE] A sample program (template.py) for using feature templates that are compatible with CRF++.
 25 | 	- [SAMPLE] New samples in example directory: Named Entity Recognition (ner.py) using the CoNLL2003 data set, and part-of-speech tagging (pos.py).
 26 | 	- [OTHER] Updated the MSVC solution file to MSVC 2010.
 27 | 
 28 | 	
 29 | 2010-07-16  Naoaki Okazaki  <okazaki at chokkan org>
 30 | 	* CRFsuite 0.11
 31 | 	- Renamed crf.h into crfsuite.h to avoid possible conflects in include directories
 32 | 	- Install crfsuite.h to the include directory (suggested by Ingo Glöckner)
 33 | 
 34 | 	
 35 | 2010-01-29  Naoaki Okazaki  <okazaki at chokkan org>
 36 | 	* CRFsuite 0.10
 37 | 	- A patch submitted by Hiroshi Manabe (at Kodensha Co., Ltd.) to fix memory leak problems in the tagger.
 38 | 	- Added a new option -r (--reference) for the tagger to output reference labels in parallel with predicted labels.
 39 | 
 40 | 	
 41 | 2009-09-24  Naoaki Okazaki  <okazaki at chokkan org>
 42 | 	* CRFsuite 0.9
 43 | 	- Fixed a build problem with liblbfgs 1.8
 44 | 
 45 | 	
 46 | 2009-03-17  Naoaki Okazaki  <okazaki at chokkan org>
 47 | 	* CRFsuite 0.8
 48 | 	- Improved the portability of model files across different machine architectures with different byte order; this fixes a crash problem in tagging on some machine architectures.
 49 | 
 50 | 	
 51 | 2009-03-10  Naoaki Okazaki  <okazaki at chokkan org>
 52 | 	* CRFsuite 0.7
 53 | 	- Updated RumAVL library to 4.0.0; this fixes a crash problem occurring in feature generation on some machine architectures.
 54 | 
 55 | 	
 56 | 2009-03-07  Naoaki Okazaki  <okazaki at chokkan org>
 57 | 	* CRFsuite 0.6
 58 | 	- A new training algorithm, Stochastic Gradient Descent (SGD).
 59 | 	- Updated the L-BFGS routine to liblbfgs 1.7.
 60 | 	- Reduced memory usage in training.
 61 | 	- Supported escape sequences in training/test data.
 62 | 	- Restructured the source code.
 63 | 	- Added a parameter to configure the number of trials for line search.
 64 | 
 65 | 	
 66 | 2008-11-19  Naoaki Okazaki  <okazaki at chokkan org>
 67 | 	* CRFsuite 0.5
 68 | 	- Updated the L-BFGS routine to liblbfgs 1.6.
 69 | 	- New parameters lbfgs.stop, lbfgs.delta, lbfgs.linesearch were added.
 70 | 	- Fixed a bug in which the frontend tools could not parse "item:value" format correctly.
 71 | 	- Fixed a bug in computing the accuracy.
 72 | 	- Fixed a bug when the tagger receives an item with no feature.
 73 | 
 74 | 
 75 | 2008-03-05  Naoaki Okazaki  <okazaki at chokkan org>
 76 | 
 77 | 	* CRFsuite 0.4 (the first public release):
 78 | 	- Website and documentation for CRFsuite.
 79 | 	- Tutorial on the CoNLL 2000 chunking shared task.
 80 | 	- Performance comparison on the CoNLL 2000 chunking shared task.
 81 | 	- Bug fix in L2 regularization.
 82 | 	- A number of small improvements for the public release.
 83 | 
 84 | 
 85 | 2007-12-12  Naoaki Okazaki  <okazaki at chokkan org>
 86 | 
 87 | 	* CRFsuite 0.3 (internal release):
 88 | 	- Implemented scaling method for forward/backward algorithm.
 89 | 	- Removed the code for computing the forward/backward algorithm in logarithm domain.
 90 | 	
 91 | 
 92 | 2007-11-30  Naoaki Okazaki  <okazaki at chokkan org>
 93 | 
 94 | 	* CRFsuite 0.2 (internal release):
 95 | 	- Orthant-Wise Limited-memory Quasi-Newton (OW-LQN) method for L1 regularization.
 96 | 	- Configurable L-BFGS parameters (number of limited memories, epsilon).
 97 | 	
 98 | 	
 99 | 2007-10-29  Naoaki Okazaki  <okazaki at chokkan org>
100 | 
101 | 	* CRFsuite 0.1 (internal release):
102 | 	- Initial release.
103 | 
104 | 


--------------------------------------------------------------------------------
/crfsuite/Makefile.am:
--------------------------------------------------------------------------------
 1 | # $Id$
 2 | 
 3 | SUBDIRS = include lib/cqdb lib/crf frontend swig
 4 | 
 5 | docdir = $(prefix)/share/doc/@PACKAGE@
 6 | doc_DATA = README INSTALL COPYING AUTHORS ChangeLog
 7 | 
 8 | EXTRA_DIST = \
 9 | 	crfsuite.sln \
10 | 	autogen.sh \
11 | 	win32/stdint.h \
12 | 	example/crfutils.py \
13 | 	example/template.py \
14 | 	example/pos.py \
15 | 	example/ner.py \
16 | 	example/chunking.py
17 | 
18 | AUTOMAKE_OPTIONS = foreign
19 | ACLOCAL_AMFLAGS = -I m4
20 | 


--------------------------------------------------------------------------------
/crfsuite/autogen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # $Id:$
 3 | 
 4 | if [ "$1" = "--force" ];
 5 | then
 6 |     FORCE=--force
 7 |     NOFORCE=
 8 |     FORCE_MISSING=--force-missing
 9 | else
10 |     FORCE=
11 |     NOFORCE=--no-force
12 |     FORCE_MISSING=
13 | fi
14 | 
15 | libtoolize --copy $FORCE 2>&1 | sed '/^You should/d' || {
16 |     echo "libtoolize failed!"
17 |     exit 1
18 | }
19 | 
20 | aclocal $FORCE || {
21 |     echo "aclocal failed!"
22 |     exit 1
23 | }
24 | 
25 | autoheader $FORCE || {
26 |     echo "autoheader failed!"
27 |     exit 1
28 | }
29 | 
30 | automake -a -c $NOFORCE || {
31 |     echo "automake failed!"
32 |     exit 1
33 | }
34 | 
35 | autoconf $FORCE || {
36 |     echo "autoconf failed!"
37 |     exit 1
38 | }
39 | 


--------------------------------------------------------------------------------
/crfsuite/bench/accuracy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | if __name__ == '__main__':
 6 |     fi = sys.stdin
 7 |     fo = sys.stdout
 8 |     n = 0
 9 |     m = 0
10 | 
11 |     for line in fi:
12 |         line = line.strip()
13 |         if line:
14 |             fields = line.split()
15 |             if len(fields) >= 2:
16 |                 if fields[-1] == fields[-2]:
17 |                     m += 1
18 |                 n += 1
19 | 
20 |     print 'Item accuracy: %f' % (m / float(n))
21 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import collections
 3 | 
 4 | LOGDIR='log/'
 5 | 
 6 | def seconds(s):
 7 |     p = s.find(':')
 8 |     q = s.find(':', p+1)
 9 |     return int(s[:p]) * 3600 + int(s[p+1:q]) * 60 + int(s[q+1:])
10 | 
11 | def last(X):
12 |     if len(X) >= 1:
13 |         return X[-1]
14 |     else:
15 |         return None
16 | 
17 | def diffmin(X):
18 |     D = []
19 |     prev = None
20 |     for x in X:
21 |         if prev is not None:
22 |             D.append(x - prev)
23 |         prev = x
24 |     return min(D)
25 | 
26 | def analyze_log(fi, patterns):
27 |     P = {}
28 |     for name, pattern, index, cast, func in patterns:
29 |         P[name] = (re.compile(pattern), index, cast, func)
30 | 
31 |     D = collections.defaultdict(list)
32 |     for line in fi:
33 |         line = line.strip('\n')
34 |         for name, (regex, index, cast, func) in P.iteritems():
35 |             m = regex.search(line)
36 |             if m is not None:
37 |                 if isinstance(index, tuple):
38 |                     for i in index:
39 |                         D[name].append(cast(m.group(i)))
40 |                 elif isinstance(index, int):
41 |                     D[name].append(cast(m.group(index)))
42 | 
43 | 
44 |     R = {}
45 |     for name, (regex, index, cast, func) in P.iteritems():
46 |         R[name] = func(D[name])
47 |     return R
48 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench_crfpp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import string
 6 | from bench import *
 7 | 
 8 | CRFPP_LEARN='/home/okazaki/local/bin/crf_learn'
 9 | CRFPP_TEST='/home/okazaki/local/bin/crf_test'
10 | OUTDIR='crfpp/'
11 | 
12 | training_patterns = (
13 |     ('num_features', r'^Number of features:[ ]*(\d+)', 1, int, last),
14 |     ('time', r'^Done!([\d.]+)', 1, float, last),
15 |     ('iterations', r'^iter=(\d+)', 1, int, last),
16 |     ('update', r'time=([\d.]+)', 1, float, min),
17 |     ('loss', r'obj=([\d.]+)', 1, float, last),
18 | )
19 | 
20 | tagging_patterns = (
21 |     ('accuracy', r'^Item accuracy: ([\d.]+)', 1, float, last),
22 | )
23 | 
24 | params = {
25 |     'lbfgs': '-a CRF-L2',
26 |     'mira': '-a MIRA',
27 | }
28 | 
29 | if __name__ == '_main__':
30 |     print analyze_log(sys.stdin, training_patterns)
31 | 
32 | if __name__ == '__main__':
33 |     fe = sys.stderr
34 | 
35 |     R = {}
36 |     for name, param in params.iteritems():
37 |         model = OUTDIR + name + '.model'
38 |         trlog = OUTDIR + name + '.tr.log'
39 |         trtxt = LOGDIR + 'crfpp-' + name + '.txt'
40 |         tglog = OUTDIR + name + '.tg.log'
41 | 
42 |         s = string.Template(
43 |             '$crfpp_learn $param template.crfpp train.txt $model > $trlog'
44 |             )
45 |         cmd = s.substitute(
46 |             crfpp_learn=CRFPP_LEARN,
47 |             param=param,
48 |             model=model,
49 |             trlog=trlog
50 |             )
51 | 
52 |         fe.write(cmd)
53 |         fe.write('\n')
54 |         #os.system(cmd)
55 | 
56 |         fo = open(trtxt, 'w')
57 |         fo.write('$ %s\n' % cmd)
58 |         fo.write(open(trlog, 'r').read())
59 | 
60 |         s = string.Template(
61 |             '$crfpp_test -m $model test.txt | ./accuracy.py > $tglog'
62 |             )
63 |         cmd = s.substitute(
64 |             crfpp_test=CRFPP_TEST,
65 |             model=model,
66 |             tglog=tglog
67 |             )
68 | 
69 |         fe.write(cmd)
70 |         fe.write('\n')
71 |         #os.system(cmd)
72 | 
73 |         D = analyze_log(open(trlog), training_patterns)
74 |         D.update(analyze_log(open(tglog), tagging_patterns))
75 |         D['logfile'] = trtxt
76 |         R[name] = D
77 | 
78 |     print repr(R)
79 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench_crfsgd.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import string
 6 | from bench import *
 7 | 
 8 | CRFSGD='/home/okazaki/install/sgd-1.3/crf/crfsgd'
 9 | OUTDIR='crfsgd/'
10 | 
11 | training_patterns = (
12 |     ('num_features', r'features: (\d+)', 1, int, last),
13 |     ('time', r'^Done!  ([\d.]+)', 1, float, last),
14 |     ('iterations', r'^\[Epoch (\d+)\]', 1, int, last),
15 |     ('update', r'^\[Epoch \d+\][^a-z]+wnorm:[^a-z]+total time: ([\d.]+) seconds$', 1, float, diffmin),
16 |     ('loss', r'loss: ([\d.]+)', 1, float, last),
17 | )
18 | 
19 | tagging_patterns = (
20 |     ('accuracy', r'^Item accuracy: ([\d.]+)', 1, float, last),
21 | )
22 | 
23 | params = {
24 |     'default': "-f 1 -r 100 -e ''",
25 | }
26 | 
27 | if __name__ == '__main__':
28 |     fe = sys.stderr
29 | 
30 |     R = {}
31 |     for name, param in params.iteritems():
32 |         model = OUTDIR + name + '.model'
33 |         trlog = OUTDIR + name + '.tr.log'
34 |         trtxt = LOGDIR + 'crfsgd-' + name + '.txt'
35 |         tglog = OUTDIR + name + '.tg.log'
36 | 
37 |         s = string.Template(
38 |             '$crfsgd $param $model template.crfpp train.txt > $trlog'
39 |             )
40 |         cmd = s.substitute(
41 |             crfsgd=CRFSGD,
42 |             param=param,
43 |             model=model,
44 |             trlog=trlog
45 |             )
46 | 
47 |         fe.write(cmd)
48 |         fe.write('\n')
49 |         #os.system(cmd)
50 | 
51 |         fo = open(trtxt, 'w')
52 |         fo.write('$ %s\n' % cmd)
53 |         fo.write(open(trlog, 'r').read())
54 | 
55 |         s = string.Template(
56 |             '$crfsgd -t $model test.txt | ./accuracy.py > $tglog'
57 |             )
58 |         cmd = s.substitute(
59 |             crfsgd=CRFSGD,
60 |             model=model,
61 |             tglog=tglog
62 |             )
63 | 
64 |         fe.write(cmd)
65 |         fe.write('\n')
66 |         #os.system(cmd)
67 | 
68 |         D = analyze_log(open(trlog), training_patterns)
69 |         D.update(analyze_log(open(tglog), tagging_patterns))
70 |         D['logfile'] = trtxt
71 |         R[name] = D
72 | 
73 |     print repr(R)
74 | 
75 | if __name__ == '_main__':
76 |     print analyze_log(sys.stdin, training_patterns)
77 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench_crfsuite-0.11.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import string
 6 | from bench import *
 7 | 
 8 | CRFSUITE='/home/okazaki/install/crfsuite-0.11/frontend/crfsuite'
 9 | OUTDIR='crfsuite-0.11/'
10 | 
11 | training_patterns = (
12 |     ('num_features', r'^Number of features: (\d+)', 1, int, last),
13 |     ('time', r'^Total seconds required for L-BFGS: ([\d.]+)', 1, float, last),
14 |     ('iterations', r'^\*\*\*\*\* (Iteration|Epoch) #(\d+)', 2, int, last),
15 |     ('update', r'^Seconds required for this iteration: ([\d.]+)', 1, float, min),
16 |     ('loss', r'^Log-likelihood: -([\d.]+)', 1, float, last),
17 | )
18 | 
19 | tagging_patterns = (
20 |     ('accuracy', r'^Item accuracy: \d+ / \d+ \(([\d.]+)\)', 1, float, last),
21 | )
22 | 
23 | params = {
24 |     'lbfgs-sparse': '-p regularization.sigma=0.70710678118654746 -p feature.possible_states=0 -p feature.possible_transitions=0',
25 |     'lbfgs-dense': '-p regularization.sigma=0.70710678118654746 -p feature.possible_states=1 -p feature.possible_transitions=1',
26 | }
27 | 
28 | if __name__ == '_main__':
29 |     print analyze_log(sys.stdin, training_patterns)
30 | 
31 | if __name__ == '__main__':
32 |     fe = sys.stderr
33 | 
34 |     R = {}
35 |     for name, param in params.iteritems():
36 |         model = OUTDIR + name + '.model'
37 |         trlog = OUTDIR + name + '.tr.log'
38 |         trtxt = LOGDIR + 'crfsuite0.11-' + name + '.txt'
39 |         tglog = OUTDIR + name + '.tg.log'
40 | 
41 |         s = string.Template(
42 |             '$crfsuite learn $param -m $model train.crfsuite > $trlog'
43 |             )
44 |         cmd = s.substitute(
45 |             crfsuite=CRFSUITE,
46 |             param=param,
47 |             model=model,
48 |             trlog=trlog
49 |             )
50 | 
51 |         fe.write(cmd)
52 |         fe.write('\n')
53 |         #os.system(cmd)
54 | 
55 |         fo = open(trtxt, 'w')
56 |         fo.write('$ %s\n' % cmd)
57 |         fo.write(open(trlog, 'r').read())
58 | 
59 |         s = string.Template(
60 |             '$crfsuite tag -m $model -qt test.crfsuite > $tglog'
61 |             )
62 |         cmd = s.substitute(
63 |             crfsuite=CRFSUITE,
64 |             model=model,
65 |             tglog=tglog
66 |             )
67 | 
68 |         fe.write(cmd)
69 |         fe.write('\n')
70 |         #os.system(cmd)
71 | 
72 |         D = analyze_log(open(trlog), training_patterns)
73 |         D.update(analyze_log(open(tglog), tagging_patterns))
74 |         D['logfile'] = trtxt
75 |         R[name] = D
76 | 
77 |     print repr(R)
78 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench_crfsuite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import string
 6 | from bench import *
 7 | 
 8 | CRFSUITE='/home/okazaki/projects/crfsuite/frontend/crfsuite'
 9 | OUTDIR='crfsuite/'
10 | 
11 | training_patterns = (
12 |     ('num_features', r'^Number of features: (\d+)', 1, int, last),
13 |     ('time', r'^Total seconds required for training: ([\d.]+)', 1, float, last),
14 |     ('iterations', r'^\*\*\*\*\* (Iteration|Epoch) #(\d+)', 2, int, last),
15 |     ('update', r'^Seconds required for this iteration: ([\d.]+)', 1, float, min),
16 |     ('loss', r'^Loss: ([-\d.]+)', 1, float, last),
17 | )
18 | 
19 | tagging_patterns = (
20 |     ('accuracy', r'^Item accuracy: \d+ / \d+ \(([\d.]+)\)', 1, float, last),
21 | )
22 | 
23 | params = {
24 |     'lbfgs-sparse': '-a lbfgs -p feature.possible_states=0 -p feature.possible_transitions=0',
25 |     'lbfgs-dense': '-a lbfgs -p feature.possible_states=1 -p feature.possible_transitions=1',
26 |     'l2sgd-sparse': '-a l2sgd -p feature.possible_states=0 -p feature.possible_transitions=0',
27 |     'l2sgd-dense': '-a l2sgd -p feature.possible_states=1 -p feature.possible_transitions=1',
28 |     'ap-sparse': '-a ap -p feature.possible_states=0 -p feature.possible_transitions=0 -p max_iterations=50',
29 |     'ap-dense': '-a ap -p feature.possible_states=1 -p feature.possible_transitions=1 -p max_iterations=50',
30 | }
31 | 
32 | if __name__ == '_main__':
33 |     print analyze_log(sys.stdin, training_patterns)
34 | 
35 | if __name__ == '__main__':
36 |     fe = sys.stderr
37 | 
38 |     R = {}
39 |     for name, param in params.iteritems():
40 |         model = OUTDIR + name + '.model'
41 |         trlog = OUTDIR + name + '.tr.log'
42 |         trtxt = LOGDIR + 'crfsuite-' + name + '.txt'
43 |         tglog = OUTDIR + name + '.tg.log'
44 | 
45 |         s = string.Template(
46 |             '$crfsuite learn $param -m $model train.crfsuite > $trlog'
47 |             )
48 |         cmd = s.substitute(
49 |             crfsuite=CRFSUITE,
50 |             param=param,
51 |             model=model,
52 |             trlog=trlog
53 |             )
54 | 
55 |         fe.write(cmd)
56 |         fe.write('\n')
57 |         #os.system(cmd)
58 | 
59 |         fo = open(trtxt, 'w')
60 |         fo.write('$ %s\n' % cmd)
61 |         fo.write(open(trlog, 'r').read())
62 | 
63 |         s = string.Template(
64 |             '$crfsuite tag -m $model -qt test.crfsuite > $tglog'
65 |             )
66 |         cmd = s.substitute(
67 |             crfsuite=CRFSUITE,
68 |             model=model,
69 |             tglog=tglog
70 |             )
71 | 
72 |         fe.write(cmd)
73 |         fe.write('\n')
74 |         #os.system(cmd)
75 | 
76 |         D = analyze_log(open(trlog), training_patterns)
77 |         D.update(analyze_log(open(tglog), tagging_patterns))
78 |         D['logfile'] = trtxt
79 |         R[name] = D
80 | 
81 |     print repr(R)
82 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench_mallet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import string
 6 | from bench import *
 7 | 
 8 | MALLET='java -cp "/home/okazaki/install/mallet-2.0.6/class:/home/okazaki/install/mallet-2.0.6/lib/mallet-deps.jar" cc.mallet.fst.SimpleTagger'
 9 | OUTDIR='mallet/'
10 | 
11 | training_patterns = (
12 |     ('num_features', r'^Number of weights = (\d+)', 1, int, last),
13 |     ('time', r'^([\d.]+)user ([\d.]+)system', (1, 2), float, sum),
14 |     ('iterations', r'^CRF finished one iteration of maximizer, i=(\d+)', 1, int, len),
15 | #    ('update', r'^Seconds required for this iteration: ([\d.]+)', 1, float, min),
16 |     ('loss', r'^getValue\(\) \(loglikelihood, optimizable by label likelihood\) = -([\d.]+)', 1, float, last),
17 | )
18 | 
19 | tagging_patterns = (
20 |     ('accuracy', r'^Testing accuracy=([\d.]+)', 1, float, last),
21 | )
22 | 
23 | params = {
24 |     'default': '--gaussian-variance 0.70710678118654746',
25 | }
26 | 
27 | if __name__ == '_main__':
28 |     print analyze_log(sys.stdin, training_patterns)
29 | 
30 | if __name__ == '__main__':
31 |     fe = sys.stderr
32 | 
33 |     R = {}
34 |     for name, param in params.iteritems():
35 |         model = OUTDIR + name + '.model'
36 |         trlog = OUTDIR + name + '.tr.log'
37 |         trtxt = LOGDIR + 'mallet-' + name + '.txt'
38 |         tglog = OUTDIR + name + '.tg.log'
39 | 
40 |         s = string.Template(
41 |             'time $mallet --train true $param --model-file $model train.mallet > $trlog 2>&1'
42 |             )
43 |         cmd = s.substitute(
44 |             mallet=MALLET,
45 |             param=param,
46 |             model=model,
47 |             trlog=trlog
48 |             )
49 | 
50 |         fe.write(cmd)
51 |         fe.write('\n')
52 |         #os.system(cmd)
53 | 
54 |         fo = open(trtxt, 'w')
55 |         fo.write('$ %s\n' % cmd)
56 |         fo.write(open(trlog, 'r').read())
57 | 
58 |         s = string.Template(
59 |             '$mallet --model-file $model --test lab test.mallet > $tglog 2>&1'
60 |             )
61 |         cmd = s.substitute(
62 |             mallet=MALLET,
63 |             model=model,
64 |             tglog=tglog
65 |             )
66 | 
67 |         fe.write(cmd)
68 |         fe.write('\n')
69 |         #os.system(cmd)
70 | 
71 |         D = analyze_log(open(trlog), training_patterns)
72 |         D['update'] = 0.
73 |         D.update(analyze_log(open(tglog), tagging_patterns))
74 |         D['logfile'] = trtxt
75 |         R[name] = D
76 | 
77 |     print repr(R)
78 | 


--------------------------------------------------------------------------------
/crfsuite/bench/bench_wapiti.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import string
 6 | from bench import *
 7 | 
 8 | WAPITI='/home/okazaki/install/wapiti-1.1.3/wapiti'
 9 | OUTDIR='wapiti/'
10 | 
11 | training_patterns = (
12 |     ('num_features', r'nb features: (\d+)', 1, int, last),
13 |     ('time', r'^([\d.]+)user ([\d.]+)system', (1, 2), float, sum),
14 |     ('iterations', r'\[\s*(\d+)\]', 1, int, last),
15 |     ('update', r'time=([\d.]+)', 1, float, min),
16 |     ('loss', r'obj=([\d.]+)', 1, float, last),
17 | )
18 | 
19 | tagging_patterns = (
20 |     ('accuracy', r'^Item accuracy: ([\d.]+)', 1, float, last),
21 | )
22 | 
23 | params = {
24 |     'lbfgs': '-a l-bfgs --rho2 0.70710678118654746 --maxiter 1000 --stopeps 0.00001 --stopwin 10',
25 |     'rprop': '-a rprop --rho3 0.70710678118654746 --maxiter 1000',
26 | }
27 | 
28 | if __name__ == '_main__':
29 |     print analyze_log(sys.stdin, training_patterns)
30 | 
31 | if __name__ == '__main__':
32 |     fe = sys.stderr
33 | 
34 |     R = {}
35 |     for name, param in params.iteritems():
36 |         model = OUTDIR + name + '.model'
37 |         trlog = OUTDIR + name + '.tr.log'
38 |         trtxt = LOGDIR + 'wapiti-' + name + '.txt'
39 |         tglog = OUTDIR + name + '.tg.log'
40 | 
41 |         s = string.Template(
42 |             'time $wapiti train $param -p template.wapiti train.txt $model > $trlog 2>&1'
43 |             )
44 |         cmd = s.substitute(
45 |             wapiti=WAPITI,
46 |             param=param,
47 |             model=model,
48 |             trlog=trlog
49 |             )
50 | 
51 |         fe.write(cmd)
52 |         fe.write('\n')
53 |         #os.system(cmd)
54 | 
55 |         fo = open(trtxt, 'w')
56 |         fo.write('$ %s\n' % cmd)
57 |         fo.write(open(trlog, 'r').read())
58 | 
59 |         s = string.Template(
60 |             '$wapiti label -m $model test.txt | ./accuracy.py > $tglog'
61 |             )
62 |         cmd = s.substitute(
63 |             wapiti=WAPITI,
64 |             model=model,
65 |             tglog=tglog
66 |             )
67 | 
68 |         fe.write(cmd)
69 |         fe.write('\n')
70 |         #os.system(cmd)
71 | 
72 |         D = analyze_log(open(trlog), training_patterns)
73 |         D.update(analyze_log(open(tglog), tagging_patterns))
74 |         D['logfile'] = trtxt
75 |         R[name] = D
76 | 
77 |     print repr(R)
78 | 


--------------------------------------------------------------------------------
/crfsuite/bench/collect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | scripts = (
 7 |     ('CRFsuite 0.12', './bench_crfsuite.py'),
 8 |     ('CRFsuite 0.11', './bench_crfsuite-0.11.py'),
 9 |     ('Wapiti v1.1.3', './bench_wapiti.py'),
10 |     ('sgd 1.3', './bench_crfsgd.py'),
11 |     ('CRF++ 0.54', './bench_crfpp.py'),
12 |     ('MALLET 2.0.6', './bench_mallet.py'),
13 | )
14 | 
15 | fields = (
16 |     ('# Features', 'num_features'),
17 |     ('Time', 'time'),
18 |     ('# Iters', 'iterations'),
19 |     ('Update', 'update'),
20 |     ('Loss', 'loss'),
21 |     ('Log', 'log'),
22 | )
23 | 
24 | def number(x):
25 |     y = ''
26 |     p = x.find('.')
27 |     if p == -1:
28 |         p = len(x)
29 |     for i in range(p):
30 |         if i % 3 == 0 and i != 0:
31 |             y = ' ' + y
32 |         y = x[p-i-1] + y
33 |     return y + x[p:]
34 | 
35 | def read():
36 |     R = {}
37 |     for name, script in scripts:
38 |         fi = os.popen(script, 'r')
39 |         R[name] = eval(fi.read())
40 |     return R
41 | 
42 | def output_update(fo, R):
43 |     for name, script in scripts:
44 |         for param, result in R[name].iteritems():
45 |             fo.write('%s\t%s\t%f\n' % (name, param, result.get('update', 0.)))
46 | 
47 | def output_table(fo, R):
48 |     for name, script in scripts:
49 |         for param, result in R[name].iteritems():
50 |             fo.write('<row>\n')
51 |             fo.write('<entry>%s</entry>\n' % name)
52 |             fo.write('<entry>%s</entry>\n' % param)
53 |             fo.write('<entry></entry>\n')
54 |             fo.write('<entry>%s</entry>\n' % number('%d' % result['num_features']))
55 |             fo.write('<entry>%s</entry>\n' % number('%.1f' % result['time']))
56 |             fo.write('<entry>%s</entry>\n' % number('%d' % result['iterations']))
57 |             fo.write('<entry>%s</entry>\n' % number('%.1f' % result['update']))
58 |             fo.write('<entry>%s</entry>\n' % number('%.1f' % result['loss']))
59 |             fo.write('<entry>%.3f</entry>\n' % (100. * result['accuracy']))
60 |             fo.write('<entry><ulink url="%s">Log</ulink></entry>\n' % result['logfile'])
61 |             fo.write('</row>\n')
62 |             fo.write('\n')
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     R = read()
67 |     output_table(sys.stdout, R)
68 | 


--------------------------------------------------------------------------------
/crfsuite/bench/crfsuite_to_mallet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | fi = sys.stdin
 6 | fo = sys.stdout
 7 | 
 8 | for line in fi:
 9 |     line = line.strip('\n')
10 |     if not line:
11 |         fo.write('\n')
12 | 
13 |     fields = line.split('\t')
14 |     fo.write('%s %s\n' % (' '.join(fields[1:]), fields[0]))
15 | 
16 |     
17 | 


--------------------------------------------------------------------------------
/crfsuite/bench/plot_performance.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import re
 5 | 
 6 | re_iteration = re.compile(r'^\*\*\*\*\* (Iteration|Epoch) #(\d+) \*\*\*\*\*')
 7 | patterns = {
 8 |     'loss': re.compile(r'^Loss: ([\d.]+)'),
 9 |     'accuracy': re.compile(r'^Item accuracy: \d+ / \d+ \(([\d.]+)\)'),
10 |     'norm': re.compile(r'^Feature [L2-]+norm: ([\d.]+)'),
11 | }
12 | 
13 | def read(fi):
14 |     D = []
15 |     for line in fi:
16 |         line = line.strip('\n')
17 |         m = re_iteration.match(line)
18 |         if m is not None:
19 |             if len(D)+1 != int(m.group(2)):
20 |                 sys.stderr.write('ERROR: sync\n')
21 |                 sys.exit(1)
22 |             D.append({})
23 |             continue
24 | 
25 |         if D:
26 |             for name, pattern in patterns.iteritems():
27 |                 m = pattern.match(line)
28 |                 if m is not None:
29 |                     D[-1][name] = float(m.group(1))
30 |     
31 |     return D
32 | 
33 | if __name__ == '__main__':
34 |     fi = sys.stdin
35 |     fo = sys.stdout
36 | 
37 |     i = 1
38 |     D = read(fi)
39 |     for item in D:
40 |         fo.write('%d' % i)
41 |         i += 1
42 |         for name in patterns.iterkeys():
43 |             fo.write(' %f' % item[name])
44 |         fo.write('\n')
45 | 


--------------------------------------------------------------------------------
/crfsuite/configure.in:
--------------------------------------------------------------------------------
  1 | dnl $Id$
  2 | dnl
  3 | dnl
  4 | dnl Exported and configured variables:
  5 | dnl CFLAGS
  6 | dnl LDFLAGS
  7 | dnl INCLUDES
  8 | 
  9 | 
 10 | dnl ------------------------------------------------------------------
 11 | dnl Initialization for autoconf
 12 | dnl ------------------------------------------------------------------
 13 | AC_PREREQ(2.59)
 14 | AC_INIT
 15 | AC_CONFIG_SRCDIR([frontend/main.c])
 16 | AC_CONFIG_MACRO_DIR([m4])
 17 | 
 18 | dnl ------------------------------------------------------------------
 19 | dnl Checks for system
 20 | dnl ------------------------------------------------------------------
 21 | AC_CANONICAL_HOST
 22 | AC_AIX
 23 | AC_MINIX
 24 | AC_ISC_POSIX
 25 | 
 26 | 
 27 | dnl ------------------------------------------------------------------
 28 | dnl Initialization for automake
 29 | dnl ------------------------------------------------------------------
 30 | AM_INIT_AUTOMAKE(crfsuite, 0.12)
 31 | AC_CONFIG_HEADERS(config.h)
 32 | AM_MAINTAINER_MODE
 33 | AM_C_PROTOTYPES
 34 | 
 35 | 
 36 | dnl ------------------------------------------------------------------
 37 | dnl Checks for program
 38 | dnl ------------------------------------------------------------------
 39 | AM_PROG_CC_C_O
 40 | AC_PROG_LIBTOOL
 41 | AC_PROG_INSTALL
 42 | AC_PROG_LN_S
 43 | 
 44 | 
 45 | dnl ------------------------------------------------------------------
 46 | dnl Initialization for variables
 47 | dnl ------------------------------------------------------------------
 48 | CFLAGS="-std=c99 ${ac_save_CFLAGS}"
 49 | LDFLAGS="${ac_save_LDFLAGS}"
 50 | INCLUDES="-I\$(top_srcdir) -I\$(top_srcdir)/include -I\$(srcdir)"
 51 | 
 52 | 
 53 | dnl ------------------------------------------------------------------
 54 | dnl Checks for header files.
 55 | dnl ------------------------------------------------------------------
 56 | AC_HEADER_STDC
 57 | AC_CHECK_HEADERS(fcntl.h limits.h malloc.h strings.h unistd.h stdint.h)
 58 | 
 59 | 
 60 | dnl ------------------------------------------------------------------
 61 | dnl Checks for typedefs, structures, and compiler characteristics.
 62 | dnl ------------------------------------------------------------------
 63 | AC_C_CONST
 64 | AC_CHECK_SIZEOF
 65 | AC_TYPE_SIZE_T
 66 | AC_STRUCT_TM
 67 | AC_CHECK_SIZEOF(short)
 68 | AC_CHECK_SIZEOF(unsigned short)
 69 | AC_CHECK_SIZEOF(int)
 70 | AC_CHECK_SIZEOF(unsigned int)
 71 | AC_CHECK_SIZEOF(long)
 72 | AC_CHECK_SIZEOF(unsigned long)
 73 | 
 74 | AC_CHECK_TYPES([uint8_t, uint16_t, uint32_t])
 75 | 
 76 | dnl ------------------------------------------------------------------
 77 | dnl Checks for debugging mode
 78 | dnl ------------------------------------------------------------------
 79 | AC_ARG_ENABLE(
 80 |   debug,
 81 |   [AS_HELP_STRING([--enable-debug],[turn on debugging])]
 82 | )
 83 | 
 84 | if test "x$enable_debug" = "xyes"; then
 85 |    CFLAGS="-DDEBUG -O -g ${CFLAGS}"
 86 | else
 87 |    CFLAGS="-O3 -fomit-frame-pointer -ffast-math -Winline ${CFLAGS}"
 88 | fi
 89 | 
 90 | dnl ------------------------------------------------------------------
 91 | dnl Checks for profiling mode
 92 | dnl ------------------------------------------------------------------
 93 | AC_ARG_ENABLE(
 94 |   profile,
 95 |   [AS_HELP_STRING([--enable-profile],[turn on profiling])]
 96 | )
 97 | 
 98 | if test "x$enable_profile" = "xyes"; then
 99 |    CFLAGS="-DPROFILE -pg ${CFLAGS}"
100 | fi
101 | 
102 | 
103 | dnl ------------------------------------------------------------------
104 | dnl Checks for SSE2 build
105 | dnl ------------------------------------------------------------------
106 | AC_ARG_ENABLE([sse2],
107 |     AS_HELP_STRING(
108 |         [--disable-sse2],
109 |         [disable SSE2 optimization routines]
110 |         )
111 |     )
112 | 
113 | AS_IF([test "x$enable_sse2" != "xno"], [
114 |     CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
115 | ])
116 | 
117 | 
118 | dnl ------------------------------------------------------------------
119 | dnl Checks for library functions.
120 | dnl ------------------------------------------------------------------
121 | AC_FUNC_ALLOCA
122 | AC_FUNC_MEMCMP
123 | AC_FUNC_VPRINTF
124 | AC_CHECK_FUNCS(strdup strerror strtol strtoul)
125 | 
126 | dnl Check for math library
127 | AC_CHECK_LIB(m, rand)
128 | 
129 | AC_ARG_WITH(
130 | 	liblbfgs,
131 | 	[AS_HELP_STRING([--with-liblbfgs=DIR],[liblbfgs directory])],
132 | 	[INCLUDES="${INCLUDES} -I${withval}/include"; LDFLAGS="${LDFLAGS} -L${withval}/lib"]
133 | )
134 | AC_CHECK_LIB(lbfgs, lbfgs)
135 | 
136 | dnl ------------------------------------------------------------------
137 | dnl Export variables
138 | dnl ------------------------------------------------------------------
139 | AC_SUBST(CFLAGS)
140 | AC_SUBST(LDFLAGS)
141 | AC_SUBST(INCLUDES)
142 | AC_SUBST(includedir)
143 | AC_SUBST(libdir)
144 | 
145 | dnl ------------------------------------------------------------------
146 | dnl Output the configure results.
147 | dnl ------------------------------------------------------------------
148 | AC_CONFIG_FILES(Makefile genbinary.sh include/Makefile lib/cqdb/Makefile lib/crf/Makefile frontend/Makefile swig/Makefile swig/perl/Makefile.PL)
149 | AC_OUTPUT
150 | 


--------------------------------------------------------------------------------
/crfsuite/crfsuite.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 11.00
 3 | # Visual Studio 2010
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "crf", "lib\crf\crf.vcxproj", "{D6B16F2E-DA86-4591-8B50-348AB7E3432E}"
 5 | EndProject
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "frontend", "frontend\frontend.vcxproj", "{CEC83336-7B18-408B-9F3C-D11225609540}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cqdb", "lib\cqdb\cqdb.vcxproj", "{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}"
 9 | EndProject
10 | Global
11 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | 		Debug|Win32 = Debug|Win32
13 | 		Debug|x64 = Debug|x64
14 | 		Release|Win32 = Release|Win32
15 | 		Release|x64 = Release|x64
16 | 	EndGlobalSection
17 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
18 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Debug|Win32.ActiveCfg = Debug|Win32
19 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Debug|Win32.Build.0 = Debug|Win32
20 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Debug|x64.ActiveCfg = Debug|Win32
21 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Debug|x64.Build.0 = Debug|Win32
22 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Release|Win32.ActiveCfg = Release|Win32
23 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Release|Win32.Build.0 = Release|Win32
24 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Release|x64.ActiveCfg = Release|x64
25 | 		{D6B16F2E-DA86-4591-8B50-348AB7E3432E}.Release|x64.Build.0 = Release|x64
26 | 		{CEC83336-7B18-408B-9F3C-D11225609540}.Debug|Win32.ActiveCfg = Debug|Win32
27 | 		{CEC83336-7B18-408B-9F3C-D11225609540}.Debug|Win32.Build.0 = Debug|Win32
28 | 		{CEC83336-7B18-408B-9F3C-D11225609540}.Debug|x64.ActiveCfg = Debug|Win32
29 | 		{CEC83336-7B18-408B-9F3C-D11225609540}.Release|Win32.ActiveCfg = Release|Win32
30 | 		{CEC83336-7B18-408B-9F3C-D11225609540}.Release|Win32.Build.0 = Release|Win32
31 | 		{CEC83336-7B18-408B-9F3C-D11225609540}.Release|x64.ActiveCfg = Release|Win32
32 | 		{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}.Debug|Win32.ActiveCfg = Debug|Win32
33 | 		{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}.Debug|Win32.Build.0 = Debug|Win32
34 | 		{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}.Debug|x64.ActiveCfg = Debug|Win32
35 | 		{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}.Release|Win32.ActiveCfg = Release|Win32
36 | 		{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}.Release|Win32.Build.0 = Release|Win32
37 | 		{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}.Release|x64.ActiveCfg = Release|Win32
38 | 	EndGlobalSection
39 | 	GlobalSection(SolutionProperties) = preSolution
40 | 		HideSolutionNode = FALSE
41 | 	EndGlobalSection
42 | EndGlobal
43 | 


--------------------------------------------------------------------------------
/crfsuite/doc/footer.html:
--------------------------------------------------------------------------------
1 | <hr/>
2 | <div>
3 | Copyright (c) 2002-2011 by Naoaki Okazaki
4 | <br /><i>$datetime</i>
5 | </div>
6 | </body>
7 | </html>
8 | 


--------------------------------------------------------------------------------
/crfsuite/doc/header.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 2 | <html>
 3 | <head>
 4 | <meta name="author" content="Naoaki Okazaki">
 5 | <meta name="description" content="A fast implementation of Conditional Random Fields (CRFs)">
 6 | <meta name="keywords" content="CRFsuite, Conditional Random Fields, CRF, C, fast implementation">
 7 | <meta name="robots" content="index,follow">
 8 | <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
 9 | <title>CRFSuite: A fast implementation of Conditional Random Fields (CRFs)</title>
10 | <link href="doxygen.css" rel="stylesheet" type="text/css">
11 | <link href="tabs.css" rel="stylesheet" type="text/css">
12 | </head>
13 | <body>
14 | 


--------------------------------------------------------------------------------
/crfsuite/example/chunking.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | A feature extractor for chunking.
 5 | Copyright 2010,2011 Naoaki Okazaki.
 6 | """
 7 | 
 8 | # Separator of field values.
 9 | separator = ' '
10 | 
11 | # Field names of the input data.
12 | fields = 'w pos y'
13 | 
14 | # Attribute templates.
15 | templates = (
16 |     (('w', -2), ),
17 |     (('w', -1), ),
18 |     (('w',  0), ),
19 |     (('w',  1), ),
20 |     (('w',  2), ),
21 |     (('w', -1), ('w',  0)),
22 |     (('w',  0), ('w',  1)),
23 |     (('pos', -2), ),
24 |     (('pos', -1), ),
25 |     (('pos',  0), ),
26 |     (('pos',  1), ),
27 |     (('pos',  2), ),
28 |     (('pos', -2), ('pos', -1)),
29 |     (('pos', -1), ('pos',  0)),
30 |     (('pos',  0), ('pos',  1)),
31 |     (('pos',  1), ('pos',  2)),
32 |     (('pos', -2), ('pos', -1), ('pos',  0)),
33 |     (('pos', -1), ('pos',  0), ('pos',  1)),
34 |     (('pos',  0), ('pos',  1), ('pos',  2)),
35 |     )
36 | 
37 | 
38 | import crfutils
39 | 
40 | def feature_extractor(X):
41 |     # Apply attribute templates to obtain features (in fact, attributes)
42 |     crfutils.apply_templates(X, templates)
43 |     if X:
44 | 	# Append BOS and EOS features manually
45 |         X[0]['F'].append('__BOS__')     # BOS feature
46 |         X[-1]['F'].append('__EOS__')    # EOS feature
47 | 
48 | if __name__ == '__main__':
49 |     crfutils.main(feature_extractor, fields=fields, sep=separator)
50 | 


--------------------------------------------------------------------------------
/crfsuite/example/crfutils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A miscellaneous utility for sequential labeling.
  3 | Copyright 2010,2011 Naoaki Okazaki.
  4 | """
  5 | 
  6 | import optparse
  7 | import sys
  8 | 
  9 | def apply_templates(X, templates):
 10 |     """
 11 |     Generate features for an item sequence by applying feature templates.
 12 |     A feature template consists of a tuple of (name, offset) pairs,
 13 |     where name and offset specify a field name and offset from which
 14 |     the template extracts a feature value. Generated features are stored
 15 |     in the 'F' field of each item in the sequence.
 16 | 
 17 |     @type   X:      list of mapping objects
 18 |     @param  X:      The item sequence.
 19 |     @type   template:   tuple of (str, int)
 20 |     @param  template:   The feature template.
 21 |     """
 22 |     for template in templates:
 23 |         name = '|'.join(['%s[%d]' % (f, o) for f, o in template])
 24 |         for t in range(len(X)):
 25 |             values = []
 26 |             for field, offset in template:
 27 |                 p = t + offset
 28 |                 if p not in range(len(X)):
 29 |                     values = []
 30 |                     break
 31 |                 values.append(X[p][field])
 32 |             if values:
 33 |                 X[t]['F'].append('%s=%s' % (name, '|'.join(values)))
 34 | 
 35 | def readiter(fi, names, sep=' '):
 36 |     """
 37 |     Return an iterator for item sequences read from a file object.
 38 |     This function reads a sequence from a file object L{fi}, and
 39 |     yields the sequence as a list of mapping objects. Each line
 40 |     (item) from the file object is split by the separator character
 41 |     L{sep}. Separated values of the item are named by L{names},
 42 |     and stored in a mapping object. Every item has a field 'F' that
 43 |     is reserved for storing features.
 44 | 
 45 |     @type   fi:     file
 46 |     @param  fi:     The file object.
 47 |     @type   names:  tuple
 48 |     @param  names:  The list of field names.
 49 |     @type   sep:    str
 50 |     @param  sep:    The separator character.
 51 |     @rtype          list of mapping objects
 52 |     @return         An iterator for sequences.
 53 |     """
 54 |     X = []
 55 |     for line in fi:
 56 |         line = line.strip('\n')
 57 |         if not line:
 58 |             yield X
 59 |             X = []
 60 |         else:
 61 |             fields = line.split(sep)
 62 |             if len(fields) < len(names):
 63 |                 raise ValueError(
 64 |                     'Too few fields (%d) for %r\n%s' % (len(fields), names, line))
 65 |             item = {'F': []}    # 'F' is reserved for features.
 66 |             for i in range(len(names)):
 67 |                 item[names[i]] = fields[i]
 68 |             X.append(item)
 69 | 
 70 | def escape(src):
 71 |     """
 72 |     Escape colon characters from feature names.
 73 | 
 74 |     @type   src:    str
 75 |     @param  src:    A feature name
 76 |     @rtype          str
 77 |     @return         The feature name escaped.
 78 |     """
 79 |     return src.replace(':', '__COLON__')
 80 | 
 81 | def output_features(fo, X, field=''):
 82 |     """
 83 |     Output features (and reference labels) of a sequence in CRFSuite
 84 |     format. For each item in the sequence, this function writes a
 85 |     reference label (if L{field} is a non-empty string) and features.
 86 | 
 87 |     @type   fo:     file
 88 |     @param  fo:     The file object.
 89 |     @type   X:      list of mapping objects
 90 |     @param  X:      The sequence.
 91 |     @type   field:  str
 92 |     @param  field:  The field name of reference labels.
 93 |     """
 94 |     for t in range(len(X)):
 95 |         if field:
 96 |             fo.write('%s' % X[t][field])
 97 |         for a in X[t]['F']:
 98 |             if isinstance(a, str):
 99 |                 fo.write('\t%s' % escape(a))
100 |             else:
101 |                 fo.write('\t%s:%f' % (escape(a[0]), a[1]))
102 |         fo.write('\n')
103 |     fo.write('\n')
104 | 
105 | def to_crfsuite(X):
106 |     """
107 |     Convert an item sequence into an object compatible with crfsuite
108 |     Python module.
109 | 
110 |     @type   X:      list of mapping objects
111 |     @param  X:      The sequence.
112 |     @rtype          crfsuite.ItemSequence
113 |     @return        The same sequence in crfsuite.ItemSequence type.
114 |     """
115 |     import crfsuite
116 |     xseq = crfsuite.ItemSequence()
117 |     for x in X:
118 |         item = crfsuite.Item()
119 |         for f in x['F']:
120 |             if isinstance(f, str):
121 |                 item.append(crfsuite.Attribute(escape(f)))
122 |             else:
123 |                 item.append(crfsuite.Attribute(escape(f[0]), f[1]))
124 |         xseq.append(item)
125 |     return xseq
126 | 
127 | def main(feature_extractor, fields='w pos y', sep=' '):
128 |     fi = sys.stdin
129 |     fo = sys.stdout
130 | 
131 |     # Parse the command-line arguments.
132 |     parser = optparse.OptionParser(usage="""usage: %prog [options]
133 | This utility reads a data set from STDIN, and outputs attributes to STDOUT.
134 | Each line of a data set must consist of field values separated by SEPARATOR
135 | characters. The names and order of field values can be specified by -f option.
136 | The separator character can be specified with -s option. Instead of outputting
137 | attributes, this utility tags the input data when a model file is specified by
138 | -t option (CRFsuite Python module must be installed)."""
139 |         )
140 |     parser.add_option(
141 |         '-t', dest='model',
142 |         help='tag the input using the model (requires "crfsuite" module)'
143 |         )
144 |     parser.add_option(
145 |         '-f', dest='fields', default=fields,
146 |         help='specify field names of input data [default: "%default"]'
147 |         )
148 |     parser.add_option(
149 |         '-s', dest='separator', default=sep,
150 |         help='specify the separator of columns of input data [default: "%default"]'
151 |         )
152 |     (options, args) = parser.parse_args()
153 | 
154 |     # The fields of input: ('w', 'pos', 'y) by default.
155 |     F = options.fields.split(' ')
156 | 
157 |     if not options.model:
158 |         # The generator function readiter() reads a sequence from a 
159 |         for X in readiter(fi, F, options.separator):
160 |             feature_extractor(X)
161 |             output_features(fo, X, 'y')
162 | 
163 |     else:
164 |         # Create a tagger with an existing model.
165 |         import crfsuite
166 |         tagger = crfsuite.Tagger()
167 |         tagger.open(options.model)
168 | 
169 |         # For each sequence from STDIN.
170 |         for X in readiter(fi, F, options.separator):
171 |             # Obtain features.
172 |             feature_extractor(X)
173 |             xseq = to_crfsuite(X)
174 |             yseq = tagger.tag(xseq)
175 |             for t in range(len(X)):
176 |                 v = X[t]
177 |                 fo.write('\t'.join([v[f] for f in F]))
178 |                 fo.write('\t%s\n' % yseq[t])
179 |             fo.write('\n')
180 | 


--------------------------------------------------------------------------------
/crfsuite/example/pos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | An example for part-of-speech tagging.
 5 | Copyright 2010,2011 Naoaki Okazaki.
 6 | """
 7 | 
 8 | # Separator of field values.
 9 | separator = ' '
10 | 
11 | # Field names of the input data.
12 | fields = 'w num cap sym p1 p2 p3 p4 s1 s2 s3 s4 y'
13 | 
14 | # Feature template. This template is identical to the one bundled in CRF++
15 | # distribution, but written in a Python object.
16 | templates = (
17 |     (('num', 0), ),
18 |     (('cap', 0), ),
19 |     (('sym', 0), ),
20 |     (('p1', 0), ),
21 |     (('p2', 0), ),
22 |     (('p3', 0), ),
23 |     (('p4', 0), ),
24 |     (('s1', 0), ),
25 |     (('s2', 0), ),
26 |     (('s3', 0), ),
27 |     (('s4', 0), ),
28 | 
29 |     (('w',  0), ),
30 |     (('w', -1), ),
31 |     (('w',  1), ),
32 |     (('w', -2), ),
33 |     (('w',  2), ),
34 |     (('w', -2), ('w',  -1)),
35 |     (('w', -1), ('w',  0)),
36 |     (('w',  0), ('w',  1)),
37 |     (('w',  1), ('w',  2)),
38 |     (('w', -2), ('w',  -1), ('w',  0)),
39 |     (('w', -1), ('w',  0), ('w',  1)),
40 |     (('w', 0), ('w',  1), ('w',  2)),
41 |     (('w', -2), ('w',  -1), ('w',  0), ('w',  1)),
42 |     (('w',  -1), ('w',  0), ('w',  1), ('w', 2)),
43 |     (('w', -2), ('w',  -1), ('w',  0), ('w',  1), ('w',  2)),
44 | 
45 |     (('w',  0), ('w',  -1)),
46 |     (('w',  0), ('w',  -2)),
47 |     (('w',  0), ('w',  -3)),
48 |     (('w',  0), ('w',  -4)),
49 |     (('w',  0), ('w',  -5)),
50 |     (('w',  0), ('w',  -6)),
51 |     (('w',  0), ('w',  -7)),
52 |     (('w',  0), ('w',  -8)),
53 |     (('w',  0), ('w',  -9)),
54 | 
55 |     (('w',  0), ('w',  1)),
56 |     (('w',  0), ('w',  2)),
57 |     (('w',  0), ('w',  3)),
58 |     (('w',  0), ('w',  4)),
59 |     (('w',  0), ('w',  5)),
60 |     (('w',  0), ('w',  6)),
61 |     (('w',  0), ('w',  7)),
62 |     (('w',  0), ('w',  8)),
63 |     (('w',  0), ('w',  9)),
64 |     )
65 | 
66 | 
67 | import crfutils
68 | 
69 | def feature_extractor(X):
70 |     # Apply feature templates to obtain features (in fact, attributes)
71 |     crfutils.apply_templates(X, templates)
72 |     if X:
73 | 	# Append BOS and EOS features manually
74 |         X[0]['F'].append('__BOS__')     # BOS feature
75 |         X[-1]['F'].append('__EOS__')    # EOS feature
76 | 
77 | if __name__ == '__main__':
78 |     crfutils.main(feature_extractor, fields=fields, sep=separator)
79 | 


--------------------------------------------------------------------------------
/crfsuite/example/template.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import re
 4 | import sys
 5 | 
 6 | class FeatureExtractor:
 7 |     def __init__(self):
 8 |         self.macro = re.compile(r'%x\[(?P<row>[\d-]+),(?P<col>[\d]+)\]')
 9 |         self.inst = []
10 |         self.t = 0
11 |         self.templates = []
12 | 
13 |     def read(self, fi):
14 |         self.templates = []
15 |         for line in fi:
16 |             line = line.strip()
17 |             if line.startswith('#'):
18 |                 continue
19 |             if line.startswith('U'):
20 |                 self.templates.append(line.replace(':', '='))
21 |             elif line == 'B':
22 |                 continue
23 |             elif line.startswith('B'):
24 |                 sys.stderr(
25 |                     'ERROR: bigram templates not supported: %s\n' % line)
26 |                 sys.exit(1)
27 | 
28 |     def replace(self, m):
29 |         row = self.t + int(m.group('row'))
30 |         col = int(m.group('col'))
31 |         if row in range(0, len(self.inst)):
32 |             return self.inst[row]['x'][col]
33 |         else:
34 |             return ''
35 | 
36 |     def apply(self, inst, t):
37 | 	self.inst = inst
38 | 	self.t = t
39 |         for template in self.templates:
40 |             f = re.sub(self.macro, self.replace, template)
41 |             self.inst[t]['F'].append(f)
42 | 
43 | def readiter(fi, sep=None):
44 |     X = []
45 |     for line in fi:
46 |         line = line.strip('\n')
47 |         if not line:
48 |             yield X
49 |             X = []
50 |         else:
51 |             fields = line.split(sep)
52 |             item = {
53 |                 'x': fields[0:-1],
54 |                 'y': fields[-1],
55 |                 'F': []
56 |                 }
57 |             X.append(item)
58 | 
59 | if __name__ == '__main__':
60 |     import optparse
61 | 
62 |     fi = sys.stdin
63 |     fo = sys.stdout
64 | 
65 |     # Parse the command-line arguments.
66 |     parser = optparse.OptionParser(usage="""usage: %prog <template>
67 | This utility reads a data set from STDIN, applies feature templates compatible
68 | with CRF++, and outputs attributes to STDOUT. Each line of a data set must
69 | consist of field values separated by SEPARATOR characters (customizable with
70 | -s option)."""
71 |         )
72 |     parser.add_option(
73 |         '-s', dest='separator', default='\t',
74 |         help='specify the separator of columns of input data [default: "\\t"]'
75 |         )
76 |     (options, args) = parser.parse_args()
77 | 
78 |     F = FeatureExtractor()
79 |     F.read(open(args[0]))
80 | 
81 |     for inst in readiter(fi, options.separator):
82 |         for t in range(len(inst)):
83 |             F.apply(inst, t)
84 |             fo.write('%s' % inst[t]['y'])
85 |             for attr in inst[t]['F']:
86 |                 fo.write('\t%s' % attr.replace(':', '__COLON__'))
87 |             fo.write('\n')
88 |         fo.write('\n')
89 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/Makefile.am:
--------------------------------------------------------------------------------
 1 | # $Id:$
 2 | 
 3 | bin_PROGRAMS = crfsuite
 4 | #man_MANS = crfsuite.1
 5 | #EXTRA_DIST = ${man_MANS}
 6 | 
 7 | EXTRA_DIST = \
 8 | 	frontend.vcxproj
 9 | 
10 | crfsuite_SOURCES = \
11 | 	iwa.h \
12 | 	iwa.c \
13 | 	option.h \
14 | 	option.c \
15 | 	readdata.h \
16 | 	reader.c \
17 | 	learn.c \
18 | 	tag.c \
19 | 	dump.c \
20 | 	main.c
21 | 
22 | #crfsuite_CPPFLAGS =
23 | 
24 | AM_CFLAGS = @CFLAGS@
25 | INCLUDES = @INCLUDES@
26 | AM_LDFLAGS = @LDFLAGS@
27 | 
28 | crfsuite_CFLAGS = -I$(top_builddir)/include
29 | crfsuite_LDADD = $(top_builddir)/lib/crf/libcrfsuite.la
30 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/dump.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *        Dump command for CRFsuite frontend.
  3 |  *
  4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the names of the authors nor the names of its contributors
 15 |  *       may be used to endorse or promote products derived from this
 16 |  *       software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /* $Id$ */
 32 | 
 33 | #include <os.h>
 34 | 
 35 | #include <stdio.h>
 36 | #include <stdlib.h>
 37 | #include <string.h>
 38 | 
 39 | #include <crfsuite.h>
 40 | #include "option.h"
 41 | 
 42 | #define    SAFE_RELEASE(obj)    if ((obj) != NULL) { (obj)->release(obj); (obj) = NULL; }
 43 | 
 44 | typedef struct {
 45 |     int help;
 46 | } dump_option_t;
 47 | 
 48 | static void dump_option_init(dump_option_t* opt)
 49 | {
 50 |     memset(opt, 0, sizeof(*opt));
 51 | }
 52 | 
 53 | static void dump_option_finish(dump_option_t* opt)
 54 | {
 55 | }
 56 | 
 57 | BEGIN_OPTION_MAP(parse_dump_options, dump_option_t)
 58 | 
 59 |     ON_OPTION(SHORTOPT('h') || LONGOPT("help"))
 60 |         opt->help = 1;
 61 | 
 62 | END_OPTION_MAP()
 63 | 
 64 | static void show_usage(FILE *fp, const char *argv0, const char *command)
 65 | {
 66 |     fprintf(fp, "USAGE: %s %s [OPTIONS] <MODEL>\n", argv0, command);
 67 |     fprintf(fp, "Output the model stored in the file (MODEL) in a plain-text format\n");
 68 |     fprintf(fp, "\n");
 69 |     fprintf(fp, "OPTIONS:\n");
 70 |     fprintf(fp, "    -h, --help      Show the usage of this command and exit\n");
 71 | }
 72 | 
 73 | int main_dump(int argc, char *argv[], const char *argv0)
 74 | {
 75 |     int ret = 0, arg_used = 0;
 76 |     dump_option_t opt;
 77 |     const char *command = argv[0];
 78 |     FILE *fp = NULL, *fpi = stdin, *fpo = stdout, *fpe = stderr;
 79 |     crfsuite_model_t *model = NULL;
 80 | 
 81 |     /* Parse the command-line option. */
 82 |     dump_option_init(&opt);
 83 |     arg_used = option_parse(++argv, --argc, parse_dump_options, &opt);
 84 |     if (arg_used < 0) {
 85 |         ret = 1;
 86 |         goto force_exit;
 87 |     }
 88 | 
 89 |     /* Show the help message for this command if specified. */
 90 |     if (opt.help) {
 91 |         show_usage(fpo, argv0, command);
 92 |         goto force_exit;
 93 |     }
 94 | 
 95 |     /* Check for the existence of the model file. */
 96 |     if (argc <= arg_used) {
 97 |         fprintf(fpe, "ERROR: No model specified.\n");
 98 |         ret = 1;
 99 |         goto force_exit;
100 |     }
101 | 
102 |     /* Create a model instance corresponding to the model file. */
103 |     if (ret = crfsuite_create_instance_from_file(argv[arg_used], (void**)&model)) {
104 |         goto force_exit;
105 |     }
106 |         
107 |     /* Dump the model. */
108 |     if (ret = model->dump(model, fpo)) {
109 |         goto force_exit;
110 |     }
111 | 
112 | force_exit:
113 |     SAFE_RELEASE(model);
114 |     dump_option_finish(&opt);
115 |     return ret;
116 | }
117 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/frontend.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <PropertyGroup Label="Globals">
 14 |     <ProjectGuid>{CEC83336-7B18-408B-9F3C-D11225609540}</ProjectGuid>
 15 |     <RootNamespace>frontend</RootNamespace>
 16 |     <Keyword>Win32Proj</Keyword>
 17 |   </PropertyGroup>
 18 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 19 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 20 |     <ConfigurationType>Application</ConfigurationType>
 21 |     <CharacterSet>Unicode</CharacterSet>
 22 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 23 |   </PropertyGroup>
 24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 25 |     <ConfigurationType>Application</ConfigurationType>
 26 |     <CharacterSet>Unicode</CharacterSet>
 27 |   </PropertyGroup>
 28 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 29 |   <ImportGroup Label="ExtensionSettings">
 30 |   </ImportGroup>
 31 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
 32 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 33 |   </ImportGroup>
 34 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
 35 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 36 |   </ImportGroup>
 37 |   <PropertyGroup Label="UserMacros" />
 38 |   <PropertyGroup>
 39 |     <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
 40 |     <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
 41 |     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
 42 |     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
 43 |     <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
 44 |     <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
 45 |     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
 46 |     <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
 47 |     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
 48 |     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
 49 |     <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
 50 |     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
 51 |     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
 52 |     <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionName)</TargetName>
 53 |     <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionName)</TargetName>
 54 |     <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\crf\src;$(IncludePath)</IncludePath>
 55 |   </PropertyGroup>
 56 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 57 |     <ClCompile>
 58 |       <Optimization>Disabled</Optimization>
 59 |       <AdditionalIncludeDirectories>$(SolutionDir)include;$(SolutionDir)win32;$(SolutionDir)lib\crf\include;$(SolutionDir)lib\crf\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 60 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 61 |       <MinimalRebuild>true</MinimalRebuild>
 62 |       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
 63 |       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
 64 |       <FloatingPointModel>Strict</FloatingPointModel>
 65 |       <FloatingPointExceptions>true</FloatingPointExceptions>
 66 |       <PrecompiledHeader>
 67 |       </PrecompiledHeader>
 68 |       <WarningLevel>Level3</WarningLevel>
 69 |       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
 70 |     </ClCompile>
 71 |     <Link>
 72 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 73 |       <SubSystem>Console</SubSystem>
 74 |       <RandomizedBaseAddress>false</RandomizedBaseAddress>
 75 |       <DataExecutionPrevention>
 76 |       </DataExecutionPrevention>
 77 |       <TargetMachine>MachineX86</TargetMachine>
 78 |     </Link>
 79 |   </ItemDefinitionGroup>
 80 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 81 |     <ClCompile>
 82 |       <Optimization>Full</Optimization>
 83 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
 84 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 85 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
 86 |       <AdditionalIncludeDirectories>$(SolutionDir)include;$(SolutionDir)win32;$(SolutionDir)lib\crf\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 87 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 88 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
 89 |       <FloatingPointExceptions>true</FloatingPointExceptions>
 90 |       <PrecompiledHeader>
 91 |       </PrecompiledHeader>
 92 |       <WarningLevel>Level1</WarningLevel>
 93 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 94 |     </ClCompile>
 95 |     <Link>
 96 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 97 |       <SubSystem>Console</SubSystem>
 98 |       <OptimizeReferences>true</OptimizeReferences>
 99 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
100 |       <RandomizedBaseAddress>false</RandomizedBaseAddress>
101 |       <DataExecutionPrevention>
102 |       </DataExecutionPrevention>
103 |       <TargetMachine>MachineX86</TargetMachine>
104 |     </Link>
105 |   </ItemDefinitionGroup>
106 |   <ItemGroup>
107 |     <ClCompile Include="dump.c" />
108 |     <ClCompile Include="iwa.c" />
109 |     <ClCompile Include="learn.c" />
110 |     <ClCompile Include="main.c" />
111 |     <ClCompile Include="option.c" />
112 |     <ClCompile Include="reader.c" />
113 |     <ClCompile Include="tag.c" />
114 |   </ItemGroup>
115 |   <ItemGroup>
116 |     <ClInclude Include="iwa.h" />
117 |     <ClInclude Include="option.h" />
118 |     <ClInclude Include="..\include\os.h" />
119 |     <ClInclude Include="readdata.h" />
120 |   </ItemGroup>
121 |   <ItemGroup>
122 |     <ProjectReference Include="..\lib\crf\crf.vcxproj">
123 |       <Project>{d6b16f2e-da86-4591-8b50-348ab7e3432e}</Project>
124 |       <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
125 |     </ProjectReference>
126 |   </ItemGroup>
127 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
128 |   <ImportGroup Label="ExtensionTargets">
129 |   </ImportGroup>
130 | </Project>


--------------------------------------------------------------------------------
/crfsuite/frontend/frontend.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
4 |     <LocalDebuggerCommandArguments>tag -t -m CoNLL2000.model crfnn_test.txt</LocalDebuggerCommandArguments>
5 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
6 |   </PropertyGroup>
7 | </Project>


--------------------------------------------------------------------------------
/crfsuite/frontend/iwa.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *        A parser for Item With Attributes (IWA) format.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __IWA_H__
34 | #define    __IWA_H__
35 | 
36 | #ifdef    __cplusplus
37 | extern "C" {
38 | #endif/*__cplusplus*/
39 | 
40 | typedef struct tag_iwa iwa_t;
41 | 
42 | enum {
43 |     IWA_NONE,
44 |     IWA_EOF,
45 |     IWA_BOI,
46 |     IWA_EOI,
47 |     IWA_ITEM,
48 | };
49 | 
50 | struct tag_iwa_token {
51 |     int type;
52 |     const char *attr;
53 |     const char *value;
54 | };
55 | typedef struct tag_iwa_token iwa_token_t;
56 | 
57 | iwa_t* iwa_reader(FILE *fp);
58 | const iwa_token_t* iwa_read(iwa_t* iwa);
59 | void iwa_delete(iwa_t* iwa);
60 | 
61 | #ifdef    __cplusplus
62 | }
63 | #endif/*__cplusplus*/
64 | 
65 | #endif/*__IWA_H__*/
66 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/main.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *        CRFsuite frontend.
  3 |  *
  4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the names of the authors nor the names of its contributors
 15 |  *       may be used to endorse or promote products derived from this
 16 |  *       software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /* $Id$ */
 32 | 
 33 | #include <os.h>
 34 | 
 35 | #include <stdio.h>
 36 | #include <stdlib.h>
 37 | #include <string.h>
 38 | #include "option.h"
 39 | #include <crfsuite.h>
 40 | 
 41 | #define    APPLICATION_S    "CRFSuite"
 42 | 
 43 | int main_learn(int argc, char *argv[], const char *argv0, double *wTest);
 44 | int main_tag(int argc, char *argv[], const char *argv0);
 45 | int main_dump(int argc, char *argv[], const char *argv0);
 46 | 
 47 | 
 48 | 
 49 | typedef struct {
 50 |     int help;            /**< Show help message and exit. */
 51 | 
 52 |     FILE *fpi;
 53 |     FILE *fpo;
 54 |     FILE *fpe;
 55 | } option_t;
 56 | 
 57 | static void option_init(option_t* opt)
 58 | {
 59 |     memset(opt, 0, sizeof(*opt));
 60 | }
 61 | 
 62 | static void option_finish(option_t* opt)
 63 | {
 64 | }
 65 | 
 66 | BEGIN_OPTION_MAP(parse_options, option_t)
 67 | 
 68 |     ON_OPTION(SHORTOPT('h') || LONGOPT("help"))
 69 |         opt->help = 1;
 70 | 
 71 | END_OPTION_MAP()
 72 | 
 73 | void show_copyright(FILE *fp)
 74 | {
 75 |     fprintf(fp, APPLICATION_S " " CRFSUITE_VERSION "  " CRFSUITE_COPYRIGHT "\n");
 76 |     fprintf(fp, "\n");
 77 | }
 78 | 
 79 | static void show_usage(FILE *fp, const char *argv0)
 80 | {
 81 |     fprintf(fp, "USAGE: %s <COMMAND> [OPTIONS]\n", argv0);
 82 |     fprintf(fp, "    COMMAND     Command name to specify the processing\n");
 83 |     fprintf(fp, "    OPTIONS     Arguments for the command (optional; command-specific)\n");
 84 |     fprintf(fp, "\n");
 85 |     fprintf(fp, "COMMAND:\n");
 86 |     fprintf(fp, "    learn       Obtain a model from a training set of instances\n");
 87 |     fprintf(fp, "    tag         Assign suitable labels to given instances by using a model\n");
 88 |     fprintf(fp, "    dump        Output a model in a plain-text format\n");
 89 |     fprintf(fp, "\n");
 90 |     fprintf(fp, "For the usage of each command, specify -h option in the command argument.\n");
 91 | }
 92 | 
 93 | //wwy change return type from int to floatval_t *
 94 | int main(int argc, char *argv[])
 95 | {
 96 | 	/*
 97 | 	argv[0]="learn";
 98 | 	argv[1]="-m";
 99 | 	argv[2]="CoNLL2000.model";
100 | 	argv[3]="train.crfsuite.txt";
101 | 	*/
102 | 
103 |     option_t opt;
104 |     int arg_used = 0;
105 | 	floatval_t *test;
106 | 	floatval_t *delta;
107 | 	int i = 0;
108 |     const char *command = NULL;
109 |     const char *argv0 = argv[0];
110 |     //FILE *fpi = stdin, *fpo = stdout, *fpe = stderr;
111 | 	FILE *fpi = stdin, *fpo = stdout, *fpe = stderr;
112 | 
113 | 	test = (floatval_t*)calloc(sizeof(floatval_t), 917);
114 | 	delta = (floatval_t*)calloc(sizeof(floatval_t), 30000);
115 | 	/*
116 | 	for (i = 0; i< 917; ++i) {
117 | 		test[i] = 2 * (double)rand() / (double)RAND_MAX - 1;
118 | 	} */
119 | 
120 |     /* Parse the command-line option. */
121 |     option_init(&opt);
122 |     arg_used = option_parse(++argv, --argc, parse_options, &opt);
123 |     if (arg_used < 0) {
124 |         return 1;
125 |     }
126 | 
127 |     /* Show the help message if specified. */
128 |     if (opt.help) {
129 |         show_copyright(fpo);
130 |         show_usage(fpo, argv0);
131 |         return 0;
132 |     }
133 | 
134 |     /* Check whether a command is    specified in the command-line. */
135 |     if (argc <= arg_used) {
136 |         fprintf(fpe, "ERROR: No command specified. See help (-h) for the usage.\n");
137 |         return 1;
138 |     }
139 | 
140 |     /* Execute the command. */
141 | 	//char *comm []= "learn -m CoNLL2000.model train.crfsuite.txt";
142 | 
143 | 	
144 |     command = argv[arg_used];
145 | 
146 | 
147 | 	/*
148 | 	floatval_t *wTest;
149 | 	wTest = (floatval_t*)calloc(sizeof(floatval_t), 1000);
150 | 	int i;
151 | 	for(i=0;i<1000;i++) {
152 | 		wTest[i]=1.0;
153 | 	}*/
154 | 
155 |     if (strcmp(command, "learn") == 0) {
156 |         show_copyright(fpo);
157 |         return main_learn(argc-arg_used, argv+arg_used, argv0, test, delta);
158 |     } else if (strcmp(command, "tag") == 0) {
159 |         return main_tag(argc-arg_used, argv+arg_used, argv0);
160 |     } else if (strcmp(command, "dump") == 0) {
161 |         return main_dump(argc-arg_used, argv+arg_used, argv0);
162 |     } else {
163 |         fprintf(fpe, "ERROR: Unrecognized command (%s) specified.\n", command);    
164 |         return 1;
165 |     }
166 | 
167 |     return 0;
168 | }
169 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/option.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *        A parser for command-line options.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #include <os.h>
34 | #include <stdlib.h>
35 | #include <string.h>
36 | #include "option.h"
37 | 
38 | int option_parse(char * const argv[], int num_argv, option_handler_t handler, void *instance)
39 | {
40 |     int i;
41 | 
42 |     for (i = 0;i < num_argv;++i) {
43 |         const char *token = argv[i];
44 |         if (*token++ == '-') {
45 |             int ret = 0;
46 |             const char *next_token = (i+1 < num_argv) ? argv[i+1] : "";
47 |             if (!*token) {
48 |                 break;    /* Only '-' was found. */
49 |             } else if (*token == '-') {
50 |                 const char *arg = strchr(++token, '=');
51 |                 if (arg) {
52 |                     arg++;
53 |                 } else {
54 |                     arg = next_token;
55 |                 }
56 | 
57 |                 ret = handler(instance, 0, token, arg);
58 |                 if (ret < 0) {
59 |                     return -1;
60 |                 }
61 |                 if (arg == next_token) {
62 |                     i += ret;
63 |                 }
64 |             } else {
65 |                 char c;
66 |                 while ((c = *token++) != '\0') {
67 |                     const char *arg = *token ? token : next_token;
68 |                     ret = handler(instance, c, token, arg);
69 |                     if (ret < 0) {
70 |                         return -1;
71 |                     }
72 |                     if (ret > 0) {
73 |                         if (arg == token) {
74 |                             token = "";
75 |                         } else {
76 |                             i++;
77 |                         }
78 |                     }
79 |                 } /* while */
80 |             } /* else (*token == '-') */
81 |         } else {
82 |             break;    /* a non-option argument was fonud. */
83 |         } 
84 |     } /* for (i) */
85 | 
86 |     return i;
87 | }
88 | 
89 | int option_strcmp(const char *option, const char *longname)
90 | {
91 |     const char *p = strchr(option, '=');
92 |     return p ? strncmp(option, longname, p-option) : strcmp(option, longname);
93 | }
94 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/option.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *        A parser for command-line options.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __OPTION_H__
34 | #define    __OPTION_H__
35 | 
36 | #ifdef    __cplusplus
37 | extern "C" {
38 | #endif/*__cplusplus*/
39 | 
40 | typedef int (*option_handler_t)(void *instance, char c, const char *longname, const char *arg);
41 | 
42 | int option_parse(char * const argv[], int num_argv, option_handler_t handler, void *instance);
43 | int option_strcmp(const char *option, const char *longname);
44 | 
45 | /** The begin of inline option map. */
46 | #define    BEGIN_OPTION_MAP(name, type) \
47 |     int name(void *instance, char __c, const char *__longname, const char *arg) \
48 |     { \
49 |         int used_args = 0; \
50 |         type *opt = (type *)instance; \
51 |         if (0) { \
52 | 
53 | /** An entry of option map */
54 | #define    ON_OPTION(test) \
55 |             return used_args; \
56 |         } else if (test) { \
57 |             used_args = 0; \
58 | 
59 | #define    ON_OPTION_WITH_ARG(test) \
60 |             return used_args; \
61 |         } else if (test) { \
62 |             used_args = 1; \
63 | 
64 | /** The end of option map implementation */
65 | #define    END_OPTION_MAP() \
66 |             return used_args; \
67 |         } \
68 |         if (__c != 0) { \
69 |             fprintf(stderr, "Unrecognized option -%c\n", __c); \
70 |         } else if (__longname != NULL) { \
71 |             fprintf(stderr, "Unrecognized option --%s\n", __longname); \
72 |         } \
73 |         return -1; \
74 |     } \
75 | 
76 | /** A predicator for short options */
77 | #define    SHORTOPT(x)        (__c == x)
78 | /** A predicator for long options */
79 | #define    LONGOPT(x)        (!__c && option_strcmp(__longname, x) == 0)
80 | 
81 | #ifdef    __cplusplus
82 | }
83 | #endif/*__cplusplus*/
84 | 
85 | #endif/*__OPTION_H__*/
86 | 
87 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/readdata.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *        Data reader.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __READDATA_H__
34 | #define    __READDATA_H__
35 | 
36 | int read_data(FILE *fpi, FILE *fpo, crfsuite_data_t* data, int group);
37 | 
38 | #endif/*__READDATA_H__*/
39 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/reader.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *        Data reader.
  3 |  *
  4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the names of the authors nor the names of its contributors
 15 |  *       may be used to endorse or promote products derived from this
 16 |  *       software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /* $Id$ */
 32 | 
 33 | #include <os.h>
 34 | 
 35 | #include <stdio.h>
 36 | #include <stdlib.h>
 37 | #include <string.h>
 38 | 
 39 | #include <crfsuite.h>
 40 | #include "iwa.h"
 41 | 
 42 | static int progress(FILE *fpo, int prev, int current)
 43 | {
 44 |     while (prev < current) {
 45 |         ++prev;
 46 |         if (prev % 2 == 0) {
 47 |             if (prev % 10 == 0) {
 48 |                 fprintf(fpo, "%d", prev / 10);
 49 |                 fflush(fpo);
 50 |             } else {
 51 |                 fprintf(fpo, ".", prev / 10);
 52 |                 fflush(fpo);
 53 |             }
 54 |         }
 55 |     }
 56 |     return prev;
 57 | }
 58 | 
 59 | int read_data(FILE *fpi, FILE *fpo, crfsuite_data_t* data, int group)
 60 | {
 61 |     int n = 0;
 62 |     int lid = -1;
 63 |     crfsuite_instance_t inst;
 64 |     crfsuite_item_t item;
 65 |     crfsuite_attribute_t cont;
 66 |     iwa_t* iwa = NULL;
 67 |     crfsuite_dictionary_t *attrs = data->attrs;
 68 |     crfsuite_dictionary_t *labels = data->labels;
 69 |     const iwa_token_t *token = NULL;
 70 |     long filesize = 0, begin = 0, offset = 0;
 71 |     int prev = 0, current = 0;
 72 | 
 73 |     /* Initialize the instance.*/
 74 |     crfsuite_instance_init(&inst);
 75 |     inst.group = group;
 76 | 
 77 |     /* Obtain the file size. */
 78 |     begin = ftell(fpi);
 79 |     fseek(fpi, 0, SEEK_END);
 80 |     filesize = ftell(fpi) - begin;
 81 |     fseek(fpi, begin, SEEK_SET);
 82 | 
 83 |     /* */
 84 |     fprintf(fpo, "0");
 85 |     fflush(fpo);
 86 |     prev = 0;
 87 | 
 88 |     iwa = iwa_reader(fpi);
 89 |     while (token = iwa_read(iwa), token != NULL) {
 90 |         /* Progress report. */
 91 |         offset = ftell(fpi);
 92 |         current = (int)((offset - begin) * 100.0 / (double)filesize);
 93 |         prev = progress(fpo, prev, current);
 94 | 
 95 |         switch (token->type) {
 96 |         case IWA_BOI:
 97 |             /* Initialize an item. */
 98 |             lid = -1;
 99 |             crfsuite_item_init(&item);
100 |             break;
101 |         case IWA_EOI:
102 |             /* Append the item to the instance. */
103 |             if (0 <= lid) {
104 |                 crfsuite_instance_append(&inst, &item, lid);
105 |             }
106 |             crfsuite_item_finish(&item);
107 |             break;
108 |         case IWA_ITEM:
109 |             if (lid == -1) {
110 |                 if (strncmp(token->attr, "@", 1) == 0) {
111 |                     /* Declaration. */
112 |                     if (strcmp(token->attr, "@weight") == 0) {
113 |                         /* Instance weighting. */
114 |                         inst.weight = atof(token->value);
115 |                     } else {
116 |                         /* Unrecognized declaration. */
117 |                         fprintf(fpo, "\n");
118 |                         fprintf(fpo, "ERROR: unrecognized declaration: %s\n", token->attr);
119 |                         return -1;
120 |                     }
121 |                 } else {
122 |                     /* Label. */
123 |                     lid = labels->get(labels, token->attr);
124 |                 }
125 |             } else {
126 |                 crfsuite_attribute_init(&cont);
127 |                 cont.aid = attrs->get(attrs, token->attr);
128 |                 if (token->value && *token->value) {
129 |                     cont.value = atof(token->value);
130 |                 } else {
131 |                     cont.value = 1.0;
132 |                 }
133 |                 crfsuite_item_append_attribute(&item, &cont);
134 |             }
135 |             break;
136 |         case IWA_NONE:
137 |         case IWA_EOF:
138 |             /* Put the training instance. */
139 |             crfsuite_data_append(data, &inst);
140 |             crfsuite_instance_finish(&inst);
141 |             inst.group = group;
142 |             inst.weight = 1.;
143 |             ++n;
144 |             break;
145 |         }
146 |     }
147 | 
148 |     progress(fpo, prev, 100);
149 |     fprintf(fpo, "\n");
150 | 
151 |     return n;
152 | }
153 | 


--------------------------------------------------------------------------------
/crfsuite/frontend/readme.txt:
--------------------------------------------------------------------------------
1 | learn -a "l2sgd" -p max_iterations=1  -m CoNLL2000.model crfnn_train_full.txt
2 | 
3 | tag -t -m CoNLL2000.model crfnn_test.txt


--------------------------------------------------------------------------------
/crfsuite/genbinary.sh.in:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | LIBLBFGS=$HOME/local
 4 | PKG=@PACKAGE@-@VERSION@
 5 | BINDIR=$HOME/build/$PKG
 6 | TARGET=`pwd`/$PKG-`/usr/bin/arch`.tar.gz
 7 | 
 8 | rm -rf $BINDIR
 9 | ./configure --prefix=$BINDIR --with-liblbfgs=$LIBLBFGS
10 | make clean
11 | make LDFLAGS=-all-static
12 | make install
13 | cd $BINDIR/..
14 | tar cvzf $TARGET $PKG
15 | 
16 | 


--------------------------------------------------------------------------------
/crfsuite/include/Makefile.am:
--------------------------------------------------------------------------------
 1 | # $Id:$
 2 | 
 3 | EXTRA_DIST = \
 4 | 	os.h
 5 | 
 6 | crfsuiteincludedir = $(includedir)
 7 | crfsuiteinclude_HEADERS = \
 8 | 	crfsuite.h \
 9 | 	crfsuite_api.hpp \
10 | 	crfsuite.hpp 
11 | 
12 | 


--------------------------------------------------------------------------------
/crfsuite/include/os.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *        Compatibility stuff among operating systems and compilers.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __OS_H__
34 | #define    __OS_H__
35 | 
36 | 
37 | //#define    __SSE__ 1
38 | #define    LBFGS_FLOAT        64
39 | 
40 | #ifdef    _MSC_VER
41 | /* Microsoft Visual C/C++ specific */
42 | 
43 | #define    _CRT_SECURE_NO_WARNINGS 1
44 | #pragma warning(disable : 4996)
45 | 
46 | #define alloca      _alloca
47 | #define strdup      _strdup
48 | #define open        _open
49 | #define isfinite    _finite
50 | #define snprintf    _snprintf
51 | 
52 | #ifndef    __cplusplus
53 | /* Microsoft Visual C specific */
54 | 
55 | #define    inline    __forceinline
56 | 
57 | #endif/*__cplusplus*/
58 | 
59 | #endif/*_MSC_VER*/
60 | 
61 | #endif/*__OS_H__*/
62 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/COPYING:
--------------------------------------------------------------------------------
 1 | The BSD license.
 2 | 
 3 | Copyright (c) 2007, Naoaki Okazaki
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the Northwestern University, University of Tokyo,
14 |       nor the names of its contributors may be used to endorse or promote
15 |       products derived from this software without specific prior written
16 |       permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Makefile.am:
--------------------------------------------------------------------------------
 1 | # $Id:$
 2 | 
 3 | lib_LTLIBRARIES = libcqdb.la
 4 | 
 5 | EXTRA_DIST = \
 6 | 	cqdb.vcxproj \
 7 | 	COPYING
 8 | 
 9 | libcqdb_la_SOURCES = \
10 | 	src/lookup3.c \
11 | 	include/cqdb.h \
12 | 	src/cqdb.c
13 | 
14 | libcqdb_la_LDFLAGS = \
15 | 	-no-undefined \
16 | 	-release @VERSION@
17 | 
18 | libcqdb_la_CFLAGS = -I./include
19 | 
20 | AM_CFLAGS = @CFLAGS@
21 | INCLUDES = @INCLUDES@
22 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/CL.read.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/CL.read.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/CL.write.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/CL.write.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/Lib-link.read.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/Lib-link.read.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/Lib-link.write.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/Lib-link.write.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/cl.command.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/cl.command.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/cqdb.lastbuildstate:
--------------------------------------------------------------------------------
1 | #v4.0:v100
2 | Release|Win32|C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\|
3 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/cqdb.log:
--------------------------------------------------------------------------------
 1 | ﻿Build started 2015/10/5 13:05:20.
 2 | Project "C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\cqdb\cqdb.vcxproj" on node 2 (build target(s)).
 3 | PrepareForBuild:
 4 |   Creating directory "C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\Release\".
 5 | InitializeBuildStatus:
 6 |   Creating "Release\cqdb.unsuccessfulbuild" because "AlwaysCreate" was specified.
 7 | ClCompile:
 8 |   C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\CL.exe /c /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\include" /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\win32" /Iinclude /Zi /nologo /W3 /WX- /O2 /Oy- /GL /D WIN32 /D NDEBUG /D _LIB /D _CRT_SECURE_NO_WARNINGS /D _UNICODE /D UNICODE /Gm- /EHsc /MD /GS /fp:precise /Zc:wchar_t /Zc:forScope /Fo"Release\\" /Fd"Release\vc100.pdb" /Gd /TC /analyze- /errorReport:prompt src\cqdb.c src\lookup3.c
 9 |   cqdb.c
10 |   lookup3.c
11 | Lib:
12 |   C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\Lib.exe /OUT:"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\Release\cqdb.lib" /NOLOGO /LTCG Release\cqdb.obj
13 |   Release\lookup3.obj
14 |   cqdb.vcxproj -> C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\Release\cqdb.lib
15 | FinalizeBuildStatus:
16 |   Deleting file "Release\cqdb.unsuccessfulbuild".
17 |   Touching "Release\cqdb.lastbuildstate".
18 | Done Building Project "C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\cqdb\cqdb.vcxproj" (build target(s)).
19 | 
20 | Build succeeded.
21 | 
22 | Time Elapsed 00:00:02.15
23 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/cqdb.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/cqdb.obj


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/lib.command.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/lib.command.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/lookup3.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/lookup3.obj


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/Release/vc100.pdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/cqdb/Release/vc100.pdb


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/cqdb.vcxproj:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup Label="ProjectConfigurations">
 4 |     <ProjectConfiguration Include="Debug|Win32">
 5 |       <Configuration>Debug</Configuration>
 6 |       <Platform>Win32</Platform>
 7 |     </ProjectConfiguration>
 8 |     <ProjectConfiguration Include="Release|Win32">
 9 |       <Configuration>Release</Configuration>
10 |       <Platform>Win32</Platform>
11 |     </ProjectConfiguration>
12 |   </ItemGroup>
13 |   <PropertyGroup Label="Globals">
14 |     <ProjectGuid>{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}</ProjectGuid>
15 |     <RootNamespace>cqdb</RootNamespace>
16 |     <Keyword>Win32Proj</Keyword>
17 |   </PropertyGroup>
18 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
19 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
20 |     <ConfigurationType>StaticLibrary</ConfigurationType>
21 |     <CharacterSet>Unicode</CharacterSet>
22 |     <WholeProgramOptimization>true</WholeProgramOptimization>
23 |   </PropertyGroup>
24 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
25 |     <ConfigurationType>StaticLibrary</ConfigurationType>
26 |     <CharacterSet>Unicode</CharacterSet>
27 |   </PropertyGroup>
28 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
29 |   <ImportGroup Label="ExtensionSettings">
30 |   </ImportGroup>
31 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
32 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
33 |   </ImportGroup>
34 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
35 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
36 |   </ImportGroup>
37 |   <PropertyGroup Label="UserMacros" />
38 |   <PropertyGroup>
39 |     <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
40 |     <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
41 |     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
42 |     <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
43 |     <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
44 |     <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
45 |     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
46 |     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
47 |     <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
48 |     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
49 |     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
50 |   </PropertyGroup>
51 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
52 |     <ClCompile>
53 |       <Optimization>Disabled</Optimization>
54 |       <AdditionalIncludeDirectories>$(SolutionDir)include;$(SolutionDir)win32;include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
55 |       <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
56 |       <MinimalRebuild>true</MinimalRebuild>
57 |       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
58 |       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
59 |       <PrecompiledHeader>
60 |       </PrecompiledHeader>
61 |       <WarningLevel>Level3</WarningLevel>
62 |       <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
63 |     </ClCompile>
64 |   </ItemDefinitionGroup>
65 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
66 |     <ClCompile>
67 |       <AdditionalIncludeDirectories>$(SolutionDir)include;$(SolutionDir)win32;include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
68 |       <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
69 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
70 |       <PrecompiledHeader>
71 |       </PrecompiledHeader>
72 |       <WarningLevel>Level3</WarningLevel>
73 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
74 |     </ClCompile>
75 |   </ItemDefinitionGroup>
76 |   <ItemGroup>
77 |     <ClCompile Include="src\cqdb.c" />
78 |     <ClCompile Include="src\lookup3.c" />
79 |   </ItemGroup>
80 |   <ItemGroup>
81 |     <ClInclude Include="include\cqdb.h" />
82 |   </ItemGroup>
83 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
84 |   <ImportGroup Label="ExtensionTargets">
85 |   </ImportGroup>
86 | </Project>


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/cqdb.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 | </Project>


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/doc/footer.html:
--------------------------------------------------------------------------------
1 | <hr/>
2 | <div>
3 | Copyright (c) 2002-2007 by Naoaki Okazaki
4 | <br /><i>$datetime</i>
5 | </div>
6 | </body>
7 | </html>
8 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/makedist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 |   echo "USAGE: $0 <tar-ball>"
 5 |   exit 1
 6 | fi
 7 | 
 8 | tar cvzf $1 \
 9 |     COPYING \
10 |     include/cqdb.h \
11 |     src/cqdb.c \
12 |     src/lookup3.c \
13 |     src/main.c
14 | 


--------------------------------------------------------------------------------
/crfsuite/lib/cqdb/src/main.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      Test program for Constant Quark Database (CQDB).
  3 |  *
  4 |  * Copyright (c) 2007, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the name of the Northwestern University, University of Tokyo,
 15 |  *       nor the names of its contributors may be used to endorse or promote
 16 |  *       products derived from this software without specific prior written
 17 |  *       permission.
 18 |  *
 19 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 23 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 24 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 25 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 26 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 27 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 28 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 29 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 |  */
 31 | 
 32 | /* $Id$ */
 33 | 
 34 | #include <stdio.h>
 35 | #include <stdlib.h>
 36 | #include <string.h>
 37 | #include "cqdb.h"
 38 | 
 39 | //#define    TEST_WRITE    1
 40 | #define    CHECK_VALIDITY
 41 | 
 42 | #define    DBNAME        "test.cqdb"
 43 | #define    NUMELEMS    1000000
 44 | 
 45 | #ifdef    TEST_WRITE
 46 | 
 47 | int main(int argc, char *argv[])
 48 | {
 49 |     int i, ret;
 50 |     char str[10];
 51 |     FILE *fp = NULL;
 52 |     cqdb_writer_t* dbw = NULL;
 53 | 
 54 |     // Open a file for writing.
 55 |     fp = fopen(DBNAME, "wb");
 56 |     if (fp == NULL) {
 57 |         fprintf(stderr, "ERROR: failed to open the file.\n");
 58 |         return 1;
 59 |     }
 60 | 
 61 |     // Create a CQDB on the file stream.
 62 |     dbw = cqdb_writer(fp, 0);
 63 |     if (dbw == NULL) {
 64 |         fprintf(stderr, "ERROR: failed to create a CQDB on the file.\n");
 65 |         goto error_exit;
 66 |     }
 67 | 
 68 |     // Put string/integer associations, "00000001"/1, ..., "01000000"/1000000.
 69 |     for (i = 0;i < NUMELEMS;++i) {
 70 |         sprintf(str, "%08d", i);
 71 |         if (ret = cqdb_writer_put(dbw, str, i)) {
 72 |             fprintf(stderr, "ERROR: failed to put a pair '%s'/%d.\n", str, i);
 73 |             goto error_exit;    
 74 |         }
 75 |     }
 76 | 
 77 |     // Close the CQDB.
 78 |     if (ret = cqdb_writer_close(dbw)) {
 79 |         fprintf(stderr, "ERROR: failed to close the CQDB.\n");        
 80 |         goto error_exit;
 81 |     }
 82 | 
 83 |     // Close the file.
 84 |     fclose(fp);
 85 |     return 0;
 86 | 
 87 | error_exit:
 88 |     if (dbw != NULL) cqdb_writer_close(dbw);
 89 |     if (fp != NULL) fclose(fp);
 90 |     return 1;
 91 | }
 92 | 
 93 | #else /*TEST_WRITE*/
 94 | 
 95 | int main(int argc, char *argv[])
 96 | {
 97 |     int i, j, ret;
 98 |     long size = 0;
 99 |     const char *value = NULL;
100 |     char str[10], *buffer = NULL;
101 |     FILE *fp = NULL;
102 |     cqdb_t* db = NULL;
103 | 
104 |     // Open the database.
105 |     fp = fopen(DBNAME, "rb");
106 |     if (fp == NULL) {
107 |         fprintf(stderr, "ERROR: failed to open the file\n");
108 |         return 1;
109 |     }
110 | 
111 |     // Obtain the file size at one time.
112 |     fseek(fp, 0, SEEK_END);
113 |     size = ftell(fp);
114 |     fseek(fp, 0, SEEK_SET);
115 | 
116 |     // Read the content of the file.
117 |     buffer = (char *)malloc(size);
118 |     if (buffer == NULL) {
119 |         fprintf(stderr, "ERROR: out of memory.\n");
120 |         goto error_exit;
121 |     }
122 |     fread(buffer, 1, size, fp);
123 |     fclose(fp);
124 |     fp = NULL;
125 | 
126 |     // Open the database on the memory.
127 |     db = cqdb_reader(buffer, size);
128 |     if (db == NULL) {
129 |         fprintf(stderr, "ERROR: failed to open a CQDB on the file.\n");
130 |         goto error_exit;
131 |     }
132 | 
133 |     // Forward lookups: strings to integer identifiers.
134 |     for (i = 0;i < NUMELEMS;++i) {
135 |         sprintf(str, "%08d", i);
136 |         j = cqdb_to_id(db, str);
137 | #ifdef    CHECK_VALIDITY
138 |         if (i != j) {
139 |             fprintf(stderr, "ERROR: inconsistency error '%s'/%d.\n", str, i);
140 |             goto error_exit;    
141 |         }
142 | #endif/*CHECK_VALIDITY*/
143 |     }
144 | 
145 |     // Backward lookups: integer identifiers to strings.
146 |     for (i = 0;i < NUMELEMS;++i) {
147 |         sprintf(str, "%08d", i);
148 |         value = cqdb_to_string(db, i);
149 | #ifdef    CHECK_VALIDITY
150 |         if (strcmp(str, value) != 0) {
151 |             fprintf(stderr, "ERROR: inconsistency error '%s'/%d.\n", str, i);
152 |             goto error_exit;    
153 |         }
154 | #endif/*CHECK_VALIDITY*/
155 |     }
156 | 
157 |     cqdb_delete(db);
158 |     free(buffer);
159 | 
160 |     return 0;
161 | 
162 | error_exit:
163 |     if (fp != NULL) fclose(fp);
164 |     if (buffer != NULL) free(buffer);
165 |     return 1;
166 | }
167 | 
168 | #endif/*TEST_WRITE*/
169 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Makefile.am:
--------------------------------------------------------------------------------
 1 | # $Id:$
 2 | 
 3 | lib_LTLIBRARIES = libcrfsuite.la
 4 | 
 5 | EXTRA_DIST = \
 6 | 	crf.vcxproj 
 7 | 
 8 | libcrfsuite_la_SOURCES = \
 9 | 	src/dictionary.c \
10 | 	src/logging.c \
11 | 	src/logging.h \
12 | 	src/params.c \
13 | 	src/params.h \
14 | 	src/quark.c \
15 | 	src/quark.h \
16 | 	src/rumavl.c \
17 | 	src/rumavl.h \
18 | 	src/vecmath.h \
19 | 	src/crfsuite_internal.h \
20 | 	src/dataset.c \
21 | 	src/holdout.c \
22 | 	src/train_arow.c \
23 | 	src/train_averaged_perceptron.c \
24 | 	src/train_l2sgd.c \
25 | 	src/train_lbfgs.c \
26 | 	src/train_passive_aggressive.c \
27 | 	src/crf1d.h \
28 | 	src/crf1d_context.c \
29 | 	src/crf1d_model.c \
30 | 	src/crf1d_feature.c \
31 | 	src/crf1d_encode.c \
32 | 	src/crf1d_tag.c \
33 | 	src/crfsuite_train.c \
34 | 	src/crfsuite.c
35 | 
36 | libcrfsuite_la_CFLAGS = -I$(top_builddir)/lib/cqdb/include
37 | 
38 | libcrfsuite_la_LDFLAGS = \
39 | 	-no-undefined \
40 | 	-release @VERSION@
41 | 
42 | libcrfsuite_la_LIBADD = \
43 | 	$(top_builddir)/lib/cqdb/libcqdb.la
44 | 
45 | AM_CFLAGS = @CFLAGS@
46 | INCLUDES = @INCLUDES@
47 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/CL.read.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/CL.read.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/CL.write.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/CL.write.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/Lib-link.read.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/Lib-link.read.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/Lib-link.write.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/Lib-link.write.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/cl.command.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/cl.command.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf.lastbuildstate:
--------------------------------------------------------------------------------
1 | #v4.0:v100
2 | Release|Win32|C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\|
3 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf.log:
--------------------------------------------------------------------------------
 1 | ﻿Build started 2015/10/5 13:05:22.
 2 | Project "C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\crf\crf.vcxproj" on node 2 (build target(s)).
 3 | InitializeBuildStatus:
 4 |   Creating "Release\crf.unsuccessfulbuild" because "AlwaysCreate" was specified.
 5 | ClCompile:
 6 |   C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\CL.exe /c /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\include" /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\win32" /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\crf\include" /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\win32\liblbfgs" /I"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\cqdb\include" /Zi /nologo /W1 /WX- /Ox /Ob2 /Oi /Oy- /GL /D WIN32 /D NDEBUG /D _LIB /D _UNICODE /D UNICODE /Gm- /EHsc /MD /GS /fp:precise /fp:except /Zc:wchar_t /Zc:forScope /Fo"Release\\" /Fd"Release\vc100.pdb" /Gd /TC /analyze- /errorReport:prompt src\crf1d_encode.c src\crfsuite.c src\crfsuite_train.c src\dataset.c src\dictionary.c src\holdout.c src\logging.c src\params.c src\quark.c src\rumavl.c src\crf1d_context.c src\crf1d_feature.c src\crf1d_model.c src\crf1d_tag.c src\train_arow.c src\train_averaged_perceptron.c src\train_l2sgd.c src\train_lbfgs.c src\train_passive_aggressive.c
 7 |   crf1d_encode.c
 8 |   crfsuite.c
 9 |   crfsuite_train.c
10 |   dataset.c
11 |   dictionary.c
12 |   holdout.c
13 |   logging.c
14 |   params.c
15 |   quark.c
16 |   rumavl.c
17 |   crf1d_context.c
18 |   crf1d_feature.c
19 |   crf1d_model.c
20 |   crf1d_tag.c
21 |   train_arow.c
22 |   train_averaged_perceptron.c
23 |   train_l2sgd.c
24 |   train_lbfgs.c
25 |   train_passive_aggressive.c
26 | Lib:
27 |   C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\Lib.exe /OUT:"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\Release\crf.lib" lbfgs.lib /LIBPATH:"C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\win32\liblbfgs" /NOLOGO /LTCG Release\crf1d_encode.obj
28 |   Release\crfsuite.obj
29 |   Release\crfsuite_train.obj
30 |   Release\dataset.obj
31 |   Release\dictionary.obj
32 |   Release\holdout.obj
33 |   Release\logging.obj
34 |   Release\params.obj
35 |   Release\quark.obj
36 |   Release\rumavl.obj
37 |   Release\crf1d_context.obj
38 |   Release\crf1d_feature.obj
39 |   Release\crf1d_model.obj
40 |   Release\crf1d_tag.obj
41 |   Release\train_arow.obj
42 |   Release\train_averaged_perceptron.obj
43 |   Release\train_l2sgd.obj
44 |   Release\train_lbfgs.obj
45 |   Release\train_passive_aggressive.obj
46 |   "C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\Release\cqdb.lib"
47 |   crf.vcxproj -> C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\Release\crf.lib
48 | FinalizeBuildStatus:
49 |   Deleting file "Release\crf.unsuccessfulbuild".
50 |   Touching "Release\crf.lastbuildstate".
51 | Done Building Project "C:\Users\hxy\Downloads\crfsuite-master\crfsuite-master\lib\crf\crf.vcxproj" (build target(s)).
52 | 
53 | Build succeeded.
54 | 
55 | Time Elapsed 00:00:01.45
56 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf1d_context.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crf1d_context.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf1d_encode.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crf1d_encode.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf1d_feature.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crf1d_feature.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf1d_model.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crf1d_model.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crf1d_tag.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crf1d_tag.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crfsuite.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crfsuite.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/crfsuite_train.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/crfsuite_train.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/dataset.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/dataset.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/dictionary.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/dictionary.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/holdout.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/holdout.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/lib.command.1.tlog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/lib.command.1.tlog


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/logging.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/logging.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/params.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/params.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/quark.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/quark.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/rumavl.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/rumavl.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/train_arow.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/train_arow.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/train_averaged_perceptron.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/train_averaged_perceptron.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/train_l2sgd.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/train_l2sgd.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/train_lbfgs.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/train_lbfgs.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/train_passive_aggressive.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/train_passive_aggressive.obj


--------------------------------------------------------------------------------
/crfsuite/lib/crf/Release/vc100.pdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/Release/vc100.pdb


--------------------------------------------------------------------------------
/crfsuite/lib/crf/crf.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/lib/crf/crf.suo


--------------------------------------------------------------------------------
/crfsuite/lib/crf/crf.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 | </Project>


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/dataset.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      Implementation for data sets (dataset_t).
  3 |  *
  4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the names of the authors nor the names of its contributors
 15 |  *       may be used to endorse or promote products derived from this
 16 |  *       software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /* $Id$ */
 32 | 
 33 | #ifdef    HAVE_CONFIG_H
 34 | #include <config.h>
 35 | #endif/*HAVE_CONFIG_H*/
 36 | 
 37 | #include <os.h>
 38 | 
 39 | #include <stdlib.h>
 40 | #include <crfsuite.h>
 41 | #include "crfsuite_internal.h"
 42 | 
 43 | void dataset_init_trainset(dataset_t *ds, crfsuite_data_t *data, int holdout)
 44 | {
 45 |     int i, n = 0;
 46 | 
 47 |     for (i = 0;i < data->num_instances;++i) {
 48 |         if (data->instances[i].group != holdout) {
 49 |             ++n;
 50 |         }
 51 |     }
 52 | 
 53 |     ds->data = data;
 54 |     ds->num_instances = n;
 55 |     //ds->perm = (int*)malloc(sizeof(int) * n);
 56 |     /*
 57 |     n = 0;
 58 |     for (i = 0;i < data->num_instances;++i) {
 59 |         if (data->instances[i].group != holdout) {
 60 |             ds->perm[n++] = i;
 61 |         }
 62 |     } */  
 63 | }
 64 | 
 65 | void dataset_init_testset(dataset_t *ds, crfsuite_data_t *data, int holdout)
 66 | {
 67 |     int i, n = 0;
 68 | 
 69 |     for (i = 0;i < data->num_instances;++i) {
 70 |         if (data->instances[i].group == holdout) {
 71 |             ++n;
 72 |         }
 73 |     }
 74 | 
 75 |     ds->data = data;
 76 |     ds->num_instances = n;
 77 |     ds->perm = (int*)malloc(sizeof(int) * n);
 78 | 
 79 |     n = 0;
 80 |     for (i = 0;i < data->num_instances;++i) {
 81 |         if (data->instances[i].group == holdout) {
 82 |             ds->perm[n++] = i;
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | void dataset_finish(dataset_t *ds)
 88 | {
 89 |     free(ds->perm);
 90 | }
 91 | 
 92 | void dataset_shuffle(dataset_t *ds)
 93 | {
 94 |     int i;
 95 |     for (i = 0;i < ds->num_instances;++i) {
 96 |         int j = rand() % ds->num_instances;
 97 |         int tmp = ds->perm[j];
 98 |         ds->perm[j] = ds->perm[i];
 99 |         ds->perm[i] = tmp;
100 |     }
101 | }
102 | 
103 | crfsuite_instance_t *dataset_get(dataset_t *ds, int i)
104 | {
105 |     //return &ds->data->instances[ds->perm[i]];
106 |     return &ds->data->instances[i];
107 | }
108 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/dictionary.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      Implementation of dictionary.
  3 |  *
  4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the names of the authors nor the names of its contributors
 15 |  *       may be used to endorse or promote products derived from this
 16 |  *       software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /* $Id$ */
 32 | 
 33 | #include <os.h>
 34 | 
 35 | #include <stdlib.h>
 36 | #include <string.h>
 37 | 
 38 | #include <crfsuite.h>
 39 | #include "quark.h"
 40 | 
 41 | static int dictionary_addref(crfsuite_dictionary_t* dic)
 42 | {
 43 |     return crfsuite_interlocked_increment(&dic->nref);
 44 | }
 45 | 
 46 | static int dictionary_release(crfsuite_dictionary_t* dic)
 47 | {
 48 |     int count = crfsuite_interlocked_decrement(&dic->nref);
 49 |     if (count == 0) {
 50 |         quark_t *qrk = (quark_t*)dic->internal;
 51 |         quark_delete(qrk);
 52 |         free(dic);
 53 |     }
 54 |     return count;
 55 | }
 56 | 
 57 | static int dictionary_get(crfsuite_dictionary_t* dic, const char *str)
 58 | {
 59 |     quark_t *qrk = (quark_t*)dic->internal;
 60 |     return quark_get(qrk, str);
 61 | }
 62 | 
 63 | static int dictionary_to_id(crfsuite_dictionary_t* dic, const char *str)
 64 | {
 65 |     quark_t *qrk = (quark_t*)dic->internal;
 66 |     return quark_to_id(qrk, str);    
 67 | }
 68 | 
 69 | static int dictionary_to_string(crfsuite_dictionary_t* dic, int id, char const **pstr)
 70 | {
 71 |     quark_t *qrk = (quark_t*)dic->internal;
 72 |     const char *str = quark_to_string(qrk, id);
 73 |     if (str != NULL) {
 74 |         char *dst = (char*)malloc(strlen(str)+1);
 75 |         if (dst) {
 76 |             strcpy(dst, str);
 77 |             *pstr = dst;
 78 |             return 0;
 79 |         }
 80 |     }
 81 |     return 1;
 82 | }
 83 | 
 84 | static int dictionary_num(crfsuite_dictionary_t* dic)
 85 | {
 86 |     quark_t *qrk = (quark_t*)dic->internal;
 87 |     return quark_num(qrk);
 88 | }
 89 | 
 90 | static void dictionary_free(crfsuite_dictionary_t* dic, const char *str)
 91 | {
 92 |     free((char*)str);
 93 | }
 94 | 
 95 | int crfsuite_dictionary_create_instance(const char *interface, void **ptr)
 96 | {
 97 |     if (strcmp(interface, "dictionary") == 0) {
 98 |         crfsuite_dictionary_t* dic = (crfsuite_dictionary_t*)calloc(1, sizeof(crfsuite_dictionary_t));
 99 | 
100 |         if (dic != NULL) {
101 |             dic->internal = quark_new();
102 |             dic->nref = 1;
103 |             dic->addref = dictionary_addref;
104 |             dic->release = dictionary_release;
105 |             dic->get = dictionary_get;
106 |             dic->to_id = dictionary_to_id;
107 |             dic->to_string = dictionary_to_string;
108 |             dic->num = dictionary_num;
109 |             dic->free = dictionary_free;
110 |             *ptr = dic;
111 |             return 0;
112 |         } else {
113 |             return -1;
114 |         }
115 |     } else {
116 |         return 1;
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/holdout.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *      Holdout evaluation.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifdef    HAVE_CONFIG_H
34 | #include <config.h>
35 | #endif/*HAVE_CONFIG_H*/
36 | 
37 | #include <os.h>
38 | 
39 | #include <stdlib.h>
40 | #include <crfsuite.h>
41 | #include "crfsuite_internal.h"
42 | #include "logging.h"
43 | 
44 | void holdout_evaluation(
45 |     encoder_t *gm,
46 |     dataset_t *ds,
47 |     const floatval_t *w,
48 |     logging_t *lg
49 |     )
50 | {
51 |     int i;
52 |     crfsuite_evaluation_t eval;
53 |     const int N = ds->num_instances;
54 |     int *viterbi = NULL;
55 |     int max_length = 0;
56 | 
57 |     /* Initialize the evaluation table. */
58 |     crfsuite_evaluation_init(&eval, ds->data->labels->num(ds->data->labels));
59 | 
60 |     gm->set_weights(gm, w, 1.);
61 | 
62 |     for (i = 0;i < N;++i) {
63 |         floatval_t score;
64 |         const crfsuite_instance_t *inst = dataset_get(ds, i);
65 | 
66 |         if (max_length < inst->num_items) {
67 |             free(viterbi);
68 |             viterbi = (int*)malloc(sizeof(int) * inst->num_items);
69 |         }
70 | 
71 |         gm->set_instance(gm, inst);
72 |         gm->viterbi(gm, viterbi, &score);
73 | 
74 |         crfsuite_evaluation_accmulate(&eval, inst->labels, viterbi, inst->num_items);
75 |     }
76 | 
77 |     /* Report the performance. */
78 |     crfsuite_evaluation_finalize(&eval);
79 |     crfsuite_evaluation_output(&eval, ds->data->labels, lg->func, lg->instance);
80 | }
81 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/logging.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *      Logging utility.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #include <os.h>
34 | 
35 | #include <stdio.h>
36 | #include <stdlib.h>
37 | #include <string.h>
38 | #include <time.h>
39 | 
40 | #include <crfsuite.h>
41 | #include "logging.h"
42 | 
43 | void logging(logging_t* lg, const char *format, ...)
44 | {
45 |     va_list args;
46 |     va_start(args, format);
47 | 
48 |     if (lg != NULL && lg->func != NULL) {
49 |         lg->func(lg->instance, format, args);
50 |     }
51 | }
52 | 
53 | void logging_timestamp(logging_t* lg, const char *format)
54 | {
55 |     time_t ts;
56 |     char timestamp[80];
57 | 
58 |     time(&ts);
59 |     strftime(
60 |         timestamp, sizeof(timestamp),
61 |         "%Y-%m-%dT%H:%M:%SZ",
62 |         gmtime(&ts)
63 |         );
64 |     logging(lg, format, timestamp);
65 | }
66 | 
67 | void logging_progress_start(logging_t* lg)
68 | {
69 |     lg->percent = 0;
70 |     logging(lg, "0");
71 | }
72 | 
73 | void logging_progress(logging_t* lg, int percent)
74 | {
75 |     while (lg->percent < percent) {
76 |         ++lg->percent;
77 |         if (lg->percent % 2 == 0) {
78 |             if (lg->percent % 10 == 0) {
79 |                 logging(lg, "%d", lg->percent / 10);
80 |             } else {
81 |                 logging(lg, ".");
82 |             }
83 |         }
84 |     }
85 | }
86 | 
87 | void logging_progress_end(logging_t* lg)
88 | {
89 |     logging_progress(lg, 100);
90 |     logging(lg, "\n");
91 | }
92 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/logging.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *      Logging utility.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __LOGGING_H__
34 | #define    __LOGGING_H__
35 | 
36 | typedef struct {
37 |     void *instance;
38 |     crfsuite_logging_callback func;
39 |     int percent;
40 | } logging_t;
41 | 
42 | void logging(logging_t* lg, const char *format, ...);
43 | void logging_timestamp(logging_t* lg, const char *format);
44 | void logging_progress_start(logging_t* lg);
45 | void logging_progress(logging_t* lg, int percent);
46 | void logging_progress_end(logging_t* lg);
47 | 
48 | #endif/*__LOGGING_H__*/
49 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/params.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *      Parameter exchange.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __PARAMS_H__
34 | #define    __PARAMS_H__
35 | 
36 | crfsuite_params_t* params_create_instance();
37 | int params_add_int(crfsuite_params_t* params, const char *name, int value, const char *help);
38 | int params_add_float(crfsuite_params_t* params, const char *name, floatval_t value, const char *help);
39 | int params_add_string(crfsuite_params_t* params, const char *name, const char *value, const char *help);
40 | 
41 | enum {
42 |     PARAMS_READ = -1,
43 |     PARAMS_INIT = 0,
44 |     PARAMS_WRITE = 1,
45 | };
46 | 
47 | #define    BEGIN_PARAM_MAP(params, mode) \
48 |     do { \
49 |         int __ret = 0; \
50 |         int __mode = mode; \
51 |         crfsuite_params_t* __params = params;
52 | 
53 | #define    END_PARAM_MAP() \
54 |     } while (0) ;
55 | 
56 | #define    DDX_PARAM_INT(name, var, defval, help) \
57 |     if (__mode < 0) \
58 |         __ret = __params->get_int(__params, name, &var); \
59 |     else if (__mode > 0) \
60 |         __ret = __params->set_int(__params, name, var); \
61 |     else \
62 |         __ret = params_add_int(__params, name, defval, help);
63 | 
64 | #define    DDX_PARAM_FLOAT(name, var, defval, help) \
65 |     if (__mode < 0) \
66 |         __ret = __params->get_float(__params, name, &var); \
67 |     else if (__mode > 0) \
68 |         __ret = __params->set_float(__params, name, var); \
69 |     else \
70 |         __ret = params_add_float(__params, name, defval, help);
71 | 
72 | #define    DDX_PARAM_STRING(name, var, defval, help) \
73 |     if (__mode < 0) \
74 |         __ret = __params->get_string(__params, name, &var); \
75 |     else if (__mode > 0) \
76 |         __ret = __params->set_string(__params, name, var); \
77 |     else \
78 |         __ret = params_add_string(__params, name, defval, help);
79 | 
80 | #endif/*__PARAMS_H__*/
81 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/quark.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      Quark object.
  3 |  *
  4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above copyright
 12 |  *       notice, this list of conditions and the following disclaimer in the
 13 |  *       documentation and/or other materials provided with the distribution.
 14 |  *     * Neither the names of the authors nor the names of its contributors
 15 |  *       may be used to endorse or promote products derived from this
 16 |  *       software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /* $Id$ */
 32 | 
 33 | #include "os.h"
 34 | #include <stdlib.h>
 35 | #include <string.h>
 36 | #include "rumavl.h"
 37 | #include "quark.h"
 38 | 
 39 | typedef struct {
 40 |     char *str;
 41 |     int qid;
 42 | } record_t;
 43 | 
 44 | struct tag_quark {
 45 |     int num;
 46 |     int max;
 47 |     RUMAVL* string_to_id;
 48 |     char **id_to_string;
 49 | };
 50 | 
 51 | static int keycmp(const void *_x, const void *_y, size_t n, void *udata)
 52 | {
 53 |     const record_t* x = (const record_t*)_x;
 54 |     const record_t* y = (const record_t*)_y;
 55 |     return strcmp(x->str, y->str);
 56 | }
 57 | 
 58 | static int owcb(RUMAVL *tree, RUMAVL_NODE *n, void *_x, const void *_y, void *udata)
 59 | {
 60 |     record_t* x = (record_t*)_x;
 61 |     free(x->str);
 62 |     return 0;
 63 | }
 64 | 
 65 | static int delcb(RUMAVL *tree, RUMAVL_NODE *n, void *_record, void *udata)
 66 | {
 67 |     record_t* record = (record_t*)_record;
 68 |     free(record->str);
 69 |     return 0;
 70 | }
 71 | 
 72 | quark_t* quark_new()
 73 | {
 74 |     quark_t* qrk = (quark_t*)malloc(sizeof(quark_t));
 75 |     if (qrk != NULL) {
 76 |         qrk->num = 0;
 77 |         qrk->max = 0;
 78 |         qrk->string_to_id = rumavl_new(sizeof(record_t), keycmp, NULL, NULL);
 79 |         if (qrk->string_to_id != NULL) {
 80 |             *rumavl_delcb(qrk->string_to_id) = delcb;
 81 |             *rumavl_owcb(qrk->string_to_id) = owcb;
 82 |         }
 83 |         qrk->id_to_string = NULL;
 84 |     }
 85 |     return qrk;
 86 | }
 87 | 
 88 | void quark_delete(quark_t* qrk)
 89 | {
 90 |     if (qrk != NULL) {
 91 |         rumavl_destroy(qrk->string_to_id);
 92 |         free(qrk->id_to_string);
 93 |         free(qrk);
 94 |     }
 95 | }
 96 | 
 97 | int quark_get(quark_t* qrk, const char *str)
 98 | {
 99 |     record_t key, *record = NULL;
100 | 
101 |     key.str = (char *)str;
102 |     record = (record_t*)rumavl_find(qrk->string_to_id, &key);
103 |     if (record == NULL) {
104 |         char *newstr = (char*)malloc(strlen(str)+1);
105 |         if (newstr != NULL) {
106 |             strcpy(newstr, str);
107 |         }
108 | 
109 |         if (qrk->max <= qrk->num) {
110 |             qrk->max = (qrk->max + 1) * 2;
111 |             qrk->id_to_string = (char **)realloc(qrk->id_to_string, sizeof(char *) * qrk->max);
112 |         }
113 | 
114 |         qrk->id_to_string[qrk->num] = newstr;
115 |         key.str = newstr;
116 |         key.qid = qrk->num;
117 |         rumavl_insert(qrk->string_to_id, &key);
118 | 
119 |         ++qrk->num;
120 |         return key.qid;
121 |     } else {
122 |         return record->qid;
123 |     }    
124 | }
125 | 
126 | int quark_to_id(quark_t* qrk, const char *str)
127 | {
128 |     record_t key, *record = NULL;
129 | 
130 |     key.str = (char *)str;
131 |     record = (record_t*)rumavl_find(qrk->string_to_id, &key);
132 |     return (record != NULL) ? record->qid : -1;
133 | }
134 | 
135 | const char *quark_to_string(quark_t* qrk, int qid)
136 | {
137 |     return (qid < qrk->num) ? qrk->id_to_string[qid] : NULL;
138 | }
139 | 
140 | int quark_num(quark_t* qrk)
141 | {
142 |     return qrk->num;
143 | }
144 | 
145 | 
146 | 
147 | #if 0
148 | int main(int argc, char *argv[])
149 | {
150 |     quark_t *qrk = quark_new();
151 |     int qid = 0;
152 | 
153 |     qid = quark_get(qrk, "zero");
154 |     qid = quark_get(qrk, "one");
155 |     qid = quark_get(qrk, "zero");
156 |     qid = quark_to_id(qrk, "three");
157 |     qid = quark_get(qrk, "two");
158 |     qid = quark_get(qrk, "three");
159 |     qid = quark_to_id(qrk, "three");
160 |     qid = quark_get(qrk, "zero");
161 |     qid = quark_get(qrk, "one");
162 | 
163 |     printf("%s\n", quark_to_string(qrk, 0));
164 |     printf("%s\n", quark_to_string(qrk, 1));
165 |     printf("%s\n", quark_to_string(qrk, 2));
166 |     printf("%s\n", quark_to_string(qrk, 3));
167 | 
168 |     quark_delete(qrk);
169 |     
170 |     return 0;
171 | }
172 | #endif
173 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/quark.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *      Quark object.
 3 |  *
 4 |  * Copyright (c) 2007-2010, Naoaki Okazaki
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above copyright
12 |  *       notice, this list of conditions and the following disclaimer in the
13 |  *       documentation and/or other materials provided with the distribution.
14 |  *     * Neither the names of the authors nor the names of its contributors
15 |  *       may be used to endorse or promote products derived from this
16 |  *       software without specific prior written permission.
17 |  *
18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 |  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | /* $Id$ */
32 | 
33 | #ifndef    __QUARK_H__
34 | #define    __QUARK_H__
35 | 
36 | struct tag_quark;
37 | typedef struct tag_quark quark_t;
38 | 
39 | quark_t* quark_new();
40 | void quark_delete(quark_t* qrk);
41 | int quark_get(quark_t* qrk, const char *str);
42 | int quark_to_id(quark_t* qrk, const char *str);
43 | const char *quark_to_string(quark_t* qrk, int qid);
44 | int quark_num(quark_t* qrk);
45 | 
46 | #endif/*__QUARK_H__*/
47 | 


--------------------------------------------------------------------------------
/crfsuite/lib/crf/src/rumavl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * RumAVL - Threaded AVL Tree Implementation
  3 |  *
  4 |  * Copyright (c) 2005-2007 Jesse Long <jpl@unknown.za.net>
  5 |  * All rights reserved.
  6 |  * 
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a
  8 |  * copy of this software and associated documentation files (the "Software"),
  9 |  * to deal in the Software without restriction, including without limitation
 10 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 11 |  * and/or sell copies of the Software, and to permit persons to whom the
 12 |  * Software is furnished to do so, subject to the following conditions:
 13 |  *
 14 |  *   1. The above copyright notice and this permission notice shall be
 15 |  *	included in all copies or substantial portions of the Software.
 16 |  *   2. The origin of the Software must not be misrepresented; you must not
 17 |  *	claim that you wrote the original Software.
 18 |  *   3. Altered source versions of the Software must be plainly marked as
 19 |  *	such, and must not be misrepresented as being the original Software.
 20 |  *
 21 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 26 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 27 |  * DEALINGS IN THE SOFTWARE.
 28 |  */
 29 | 
 30 | /*
 31 |  * Please see the `README' file, the documentation in the `doc' directory and
 32 |  * the `rumavl.c' source file for more information.
 33 |  */
 34 | 
 35 | #ifndef RUMAVL_H
 36 | #define RUMAVL_H 1
 37 | 
 38 | #ifdef __cplusplus
 39 | extern "C" {
 40 | #endif
 41 | 
 42 | #include <stddef.h>	/* size_t */
 43 | 
 44 | 
 45 | 
 46 |     
 47 | /*----------------------------------------------------------------------------
 48 |  * DATA TYPES
 49 |  *--------------------------------------------------------------------------*/
 50 | 
 51 | /* Opaque context handle for the tree */
 52 | typedef struct rumavl RUMAVL;
 53 | 
 54 | /* Node type - used for iterating */
 55 | typedef struct rumavl_node RUMAVL_NODE;
 56 | 
 57 | 
 58 | 
 59 | 
 60 | /*----------------------------------------------------------------------------
 61 |  * FUNDEMENTAL FUNCTIONS
 62 |  *--------------------------------------------------------------------------*/
 63 | 
 64 | /* Create a new RumAVL tree */
 65 | RUMAVL *rumavl_new (size_t reclen, 
 66 | 		    int (*cmp)(const void *, const void *, size_t, void *),
 67 | 		    void *(*alloc)(void *, size_t, void *),
 68 | 		    void *udata);
 69 | 
 70 | /* Destroy a RumAVL tree */
 71 | void rumavl_destroy (RUMAVL *tree);
 72 | 
 73 | /* This function returns the size of each record in a tree */
 74 | size_t rumavl_record_size (RUMAVL *tree);
 75 | 
 76 | /* Get a pointer to the udata pointer */
 77 | void **rumavl_udata  (RUMAVL *tree);
 78 | 
 79 | /* Insert a record into a tree, overwriting an existing record necessary */
 80 | int rumavl_set (RUMAVL *tree, const void *record);
 81 | /* Insert a record into a tree, never overwrites an existing record */
 82 | int rumavl_insert (RUMAVL *tree, const void *record);
 83 | 
 84 | /* Retrieve record from tree, or NULL */
 85 | void *rumavl_find (RUMAVL *tree, const void *find);
 86 | 
 87 | /* Remove record from tree */
 88 | int rumavl_delete (RUMAVL *tree, const void *record);
 89 | 
 90 | 
 91 | 
 92 | 
 93 | /*----------------------------------------------------------------------------
 94 |  * ITERATOR FUNCTIONS
 95 |  *--------------------------------------------------------------------------*/
 96 | 
 97 | /* Get a pointer to the node containing a specific record */
 98 | RUMAVL_NODE *rumavl_node_find (RUMAVL *tree, const void *find, void **record);
 99 | 
100 | /* Get the next node in sequence after a specific node, in a specific
101 |  * direction, or get the first node on either end of a tree */
102 | RUMAVL_NODE *rumavl_node_next (RUMAVL *tree, RUMAVL_NODE *node, int dir,
103 | 				    void **record);
104 | /* Possible directions */
105 | #define RUMAVL_DESC (-1)
106 | #define RUMAVL_ASC  (+1)
107 | 
108 | /* Get a record held by a specific node */
109 | void *rumavl_node_record (RUMAVL_NODE *node);
110 | 
111 | /* Pass each record in a tree to a user defined callback function */
112 | extern int rumavl_foreach (RUMAVL *tree, int dir,
113 | 	    int (*cbfn)(RUMAVL *, void *, void *), void *udata);
114 | 
115 | 
116 | 
117 | 
118 | /*----------------------------------------------------------------------------
119 |  * CALLBACK FUNCTIONS
120 |  *
121 |  * Functions giving you more control over the actions of this library.
122 |  *--------------------------------------------------------------------------*/
123 | 
124 | int (**rumavl_owcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *, 
125 | 	const void *, void *);
126 | int (**rumavl_delcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *, void *);
127 | 
128 | 
129 | 
130 | 
131 | /*----------------------------------------------------------------------------
132 |  * MEMORY MANAGEMENT
133 |  *
134 |  * The rumavl_mem struct is used to define how a RUMAVL object allocates
135 |  * and frees memory.
136 |  *--------------------------------------------------------------------------*/
137 | void *(**rumavl_alloc(RUMAVL *tree))(void *ptr, size_t size, void *udata);
138 | 
139 | 
140 | 
141 | /*----------------------------------------------------------------------------
142 |  * ERROR CODES
143 |  *
144 |  * The functions returning int's will return these errors
145 |  *--------------------------------------------------------------------------*/
146 | 
147 | #define RUMAVL_ERR_INVAL  (-1)	/* Invalid argument */
148 | #define RUMAVL_ERR_NOMEM  (-2)	/* Insufficient memory */
149 | #define RUMAVL_ERR_NOENT  (-3)	/* Entry does not exist */
150 | #define RUMAVL_ERR_EORNG  (-5)	/* No nodes left in range */
151 | #define RUMAVL_ERR_EXIST  (-6)	/* Entry already exists */
152 | 
153 | /* returns static string describing error number */
154 | extern const char *rumavl_strerror (int errno);
155 | 
156 | #ifdef __cplusplus
157 | }
158 | #endif
159 | 
160 | #endif /* ifndef RUMAVL_H */
161 | 


--------------------------------------------------------------------------------
/crfsuite/modification.txt:
--------------------------------------------------------------------------------
 1 | training command:
 2 | learn -a "l2sgd" -p max_iterations=50  -m CoNLL2000.model crfnn_train.txt
 3 | 
 4 | tagging command:
 5 | tag -m CoNLL2000.model crfnn_test.txt
 6 | 
 7 | dumping command:
 8 | tag -m CoNLL2000.model crfnn_test.txt
 9 | 
10 | evaluating command:
11 | tag -m CoNLL2000.model -qt crfnn_test.txt
12 | 
13 | 
14 | crf1d_feature.c
15 | 
16 | add:
17 | #include <math.h>
18 | 
19 | in function featureset_generate change:
20 | minfreq <= f->freq to minfreq <= fabs(f->freq)  (2 locations)
21 | 
22 | crfsuite_train.c
23 | 
24 | under function static int crfsuite_train_train, add parameter:
25 | floatval_t *w
26 | delete parameter initialization:
27 | floatval_t *w=NULL
28 | 
29 | train_l2sgd.c
30 | 
31 | under function crfsuite_train_l2sgd, add: 
32 | w_cal = (*ptr_w)
33 | 	w_sgd = (*ptr_w)
34 | under function l2sgd_calibration, delete:
35 | vecset(w, 0, K)
36 | under function l2sgd, delete:
37 | vecset(w, 0, K)
38 | under function crfsuite_train_l2sgd, delete:
39 | floatval_t *w = NULL; add:
40 | floatval_t *w_cal = NULL;
41 | 	floatval_t *w_sgd = NULL;
42 | delete:
43 | w = (floatval_t*)calloc(sizeof(floatval_t), K);
44 | change parameter in l2sgd from w to w_sgd
45 | 
46 | crf1d_encode.c
47 | 
48 | add library #include "vecmath.h"
49 | add function crf1de_observation_expectation_feature
50 | add function crf1d_model_expectation_feature
51 | 
52 | under function encoder_objective_and_gradients, add variables and their values
53 | 
54 | add function crf1de_model_expectation_feature
55 | w_cal = (floatval_t*)calloc(sizeof(floatval_t), K);
56 | 	w_sgd = (floatval_t*)calloc(sizeof(floatval_t), K);
57 | 


--------------------------------------------------------------------------------
/crfsuite/swig/Makefile.am:
--------------------------------------------------------------------------------
 1 | # $Id:$
 2 | 
 3 | EXTRA_DIST = \
 4 | 	python/README \
 5 | 	python/prepare.sh \
 6 | 	python/crfsuite.py \
 7 | 	python/export_wrap.h \
 8 | 	python/export_wrap.cpp \
 9 | 	python/setup.py \
10 | 	python/sample_tag.py \
11 | 	python/sample_train.py \
12 | 	export.i \
13 | 	crfsuite.cpp
14 | 


--------------------------------------------------------------------------------
/crfsuite/swig/crfsuite.cpp:
--------------------------------------------------------------------------------
1 | #include <crfsuite.hpp>
2 | 
3 | 


--------------------------------------------------------------------------------
/crfsuite/swig/export.i:
--------------------------------------------------------------------------------
 1 | #if defined(SWIGPYTHON)
 2 | %module(directors="1") crfsuite
 3 | #else
 4 | %module(directors="1") CRFSuite
 5 | #endif
 6 | 
 7 | %{
 8 | #include "crfsuite_api.hpp"
 9 | %}
10 | 
11 | %include "std_string.i"
12 | %include "std_vector.i"
13 | %include "exception.i"
14 | 
15 | #ifdef SWIGPERL
16 | // PERL5 Scalar value -> STL std::string
17 | %typemap(in) const std::string& ($basetype temp) {
18 |     STRLEN len;
19 |     char *s = SvPV($input, len);
20 |     temp.assign(s, len);
21 |     $1 = &temp;
22 | }
23 | %typemap(freearg) const std::string& ""
24 | #endif
25 | 
26 | %template(Item) std::vector<CRFSuite::Attribute>;
27 | %template(ItemSequence) std::vector<CRFSuite::Item>;
28 | %template(StringList) std::vector<std::string>;
29 | 
30 | %feature("director") Trainer;
31 | 
32 | %exception {
33 |     try {
34 |         $action
35 |     } catch(const std::invalid_argument& e) {
36 |         SWIG_exception(SWIG_IOError, e.what());
37 |     } catch(const std::runtime_error& e) {
38 |         SWIG_exception(SWIG_RuntimeError, e.what());
39 |     } catch (const std::exception& e) {
40 |         SWIG_exception(SWIG_RuntimeError, e.what());
41 |     } catch(...) {
42 |         SWIG_exception(SWIG_RuntimeError,"Unknown exception");
43 |     }
44 | }
45 | 
46 | %include "crfsuite_api.hpp"
47 | 
48 | 


--------------------------------------------------------------------------------
/crfsuite/swig/perl/Makefile.PL.in:
--------------------------------------------------------------------------------
 1 | use 5.008;
 2 | use strict;
 3 | use warnings;
 4 | use ExtUtils::MakeMaker;
 5 | 
 6 | WriteMakefile(
 7 |     'CC' => 'g++',
 8 |     'LD' => 'g++',
 9 |     'NAME' => '@PACKAGE@',
10 |     'VERSION' => '@VERSION@',
11 |     'OBJECT' => 'crfsuite.o export_wrap.o',
12 |     'OPTIMIZE' => '-O2',
13 |     'INC' => '-I../../include',
14 |     );
15 | 
16 | 


--------------------------------------------------------------------------------
/crfsuite/swig/perl/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ln -s ../crfsuite.cpp
 4 | ln -s ../export.i
 5 | 
 6 | if [ "$1" = "--swig" ];
 7 | then
 8 |     swig -c++ -perl -I../../include -o export_wrap.cpp export.i
 9 | fi
10 | 


--------------------------------------------------------------------------------
/crfsuite/swig/perl/sample_tag.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use lib './blib/lib';
 4 | use lib './blib/arch';
 5 | use CRFSuite;
 6 | use strict;
 7 | 
 8 | # Create a tagger object.
 9 | my $tagger = CRFSuite::Tagger->new();
10 | 
11 | # Load the model to the tagger.
12 | $tagger->open( shift @ARGV );
13 | 
14 | my $xseq = CRFSuite::ItemSequence->new();
15 | while (<>) {
16 |     chomp;
17 |     unless( $_ ){
18 | 	# An empty line presents an end of a sequence.
19 |     	# Tag the sequence.
20 |         $tagger->set($xseq);
21 |         # Obtain the label sequence predicted by the tagger.
22 |         my $yseq = $tagger->viterbi();
23 | 	# Output the probability of the predicted label sequence.
24 |         printf "%f\n", $tagger->probability($yseq);
25 | 	for( my $i = 0; $i <= $#{$yseq}; $i++ ){
26 | 	    # Output the predicted labels with their marginal probabilities.
27 | 	    printf "%s:%f\n", $yseq->[$i], $tagger->marginal($yseq->[$i], $i);
28 | 	}
29 |     } else {
30 | 	# Split the line with TAB characters.
31 | 	my( undef, @field ) = split( /\t/, $_ );
32 | 	# Append attributes to the item.
33 | 	my $item = CRFSuite::Item->new();
34 | 	for my $x ( @field ) {
35 | 	    if ( $x =~ s/:([\.\d]+)\Z// ) {
36 | 		# Weighted attribute
37 | 		$item->push( CRFSuite::Attribute->new( $x, $1+0 ) );
38 | 	    } else {
39 | 		# Unweighted (weight=1) attribute.
40 | 		$item->push( CRFSuite::Attribute->new( $x ) );
41 | 	    }
42 | 	}
43 | 	# Append the item to the item sequence.
44 | 	$xseq->push( $item );
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/crfsuite/swig/perl/sample_train.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use lib './blib/lib';
 4 | use lib './blib/arch';
 5 | use CRFSuite;
 6 | use strict;
 7 | 
 8 | our $DEBUG = 0;
 9 | 
10 | my $modelfile = shift @ARGV;
11 | 
12 | my $trainer = CRFSuite::Trainer->new();
13 | 
14 | sub CRFSuite::Trainer::message {
15 |     # FIXME: This function does not work, because message callback
16 |     # function has not been implemented.  About perl callback
17 |     # function, see http://www.swig.org/papers/Perl98/swigperl.htm.
18 |     my $this = shift;
19 |     print @_;
20 | }
21 | 
22 | my $xseq = CRFSuite::ItemSequence->new();
23 | my $yseq = CRFSuite::StringList->new();
24 | while (<>) {
25 |     chomp;
26 |     unless( $_ ){
27 | 	# An empty line presents an end of a sequence.
28 | 	$trainer->append( $xseq, $yseq, 0 );
29 | 	if ($DEBUG) {
30 | 	    for ( my $i = 0; $i < $xseq->size; $i++ ) {
31 | 		my $x = $xseq->get($i);
32 | 		printf "%s\t", $yseq->get($i);
33 | 		for ( my $j = 0; $j < $x->size; $j++ ) {
34 | 		    my $f = $x->get($j);
35 | 		    printf "\t%s:%d", $f->swig_attr_get, $f->swig_value_get;
36 | 		}
37 | 		print "\n";
38 | 	    }
39 | 	}
40 | 	$xseq = CRFSuite::ItemSequence->new();
41 | 	$yseq = CRFSuite::StringList->new();
42 |     } else {
43 | 	# Split the line with TAB characters.
44 | 	my( $label, @field ) = split( /\t/, $_ );
45 | 	# Append attributes to the item.
46 | 	my $item = CRFSuite::Item->new();
47 | 	for my $x ( @field ) {
48 | 	    if ( $x =~ s/:([\.\d]+)\Z// ) {
49 | 		# Weighted attribute
50 | 		$item->push( CRFSuite::Attribute->new( $x, $1+0 ) );
51 | 	    } else {
52 | 		# Unweighted (weight=1) attribute.
53 | 		$item->push( CRFSuite::Attribute->new( $x ) );
54 | 	    }
55 | 	}
56 | 	# Append the item to the item sequence.
57 | 	$xseq->push( $item );
58 | 	# Append the label to the label sequence.
59 | 	$yseq->push( $label );
60 |     }
61 | }
62 | 
63 | # Use L2-regularized SGD and 1st-order dyad features.
64 | $trainer->select('l2sgd', 'crf1d');
65 | 
66 | # Set the coefficient for L2 regularization to 0.1
67 | $trainer->set('c2', '0.1');
68 | 
69 | # This demonstrates how to list parameters and obtain their values.
70 | for my $name ( @{$trainer->params} ) {
71 |     printf <<__format__, $name, $trainer->get($name), $trainer->help($name);
72 | parameter: %s
73 | value: %s
74 | help: %s
75 | __format__
76 | }
77 | 
78 | # Start training; the training process will invoke trainer.message()
79 | # to report the progress.
80 | $trainer->train($modelfile, -1);
81 | 


--------------------------------------------------------------------------------
/crfsuite/swig/python/README:
--------------------------------------------------------------------------------
 1 | CRFsuite Python module via SWIG
 2 | 
 3 | * HOW TO BUILD
 4 | 
 5 | 0. Build CRFsuite and install it (the library and include files are necessary)
 6 | 
 7 | 1. Generate a SWIG wrapper
 8 | $ ./prepare.sh
 9 | 
10 | 2. Build the binding.
11 | $ python setup.py build_ext
12 | 
13 | 3. Install the binding.
14 | $ python setup.py install
15 | 
16 | 4. Run the binding.
17 | $ python
18 | Python 2.6.6 (r266:84292, Sep 15 2010, 16:22:56)
19 | [GCC 4.4.5] on linux2
20 | Type "help", "copyright", "credits" or "license" for more information.
21 | >>> import crfsuite
22 | >>>
23 | 
24 | 
25 | * SAMPLE PROGRAMS
26 | Refer to sample_train.py and sample_test.py
27 | 
28 | 
29 | 
30 | * NOTES FOR INSTALLING CRFSUITE IN A NON-DEFAULT DIRECTORY
31 | 
32 | If you have changed the installation directory of CRFsuite using --prefix
33 | option for the configure script, please specify the include and library
34 | directories to setup.py; for example, if you have installed CRFsuite into
35 | $HOME/local directory, run:
36 | 
37 | $ python setup.py build_ext --include-dir=$HOME/local/include --library-dirs=$HOME/local/lib -R $HOME/local/lib
38 | 
39 | If you forget to specify "-R PATH_TO_CRFSUITE_LIBRARY" to the arguments,
40 | you may get this kind of error:
41 | 
42 | $ python
43 | Python 2.6.6 (r266:84292, Sep 15 2010, 16:22:56)
44 | [GCC 4.4.5] on linux2
45 | Type "help", "copyright", "credits" or "license" for more information.
46 | >>> import crfsuite
47 | Traceback (most recent call last):
48 |   File "<stdin>", line 1, in <module>
49 |   File "crfsuite.py", line 25, in <module>
50 |     _crfsuite = swig_import_helper()
51 |   File "crfsuite.py", line 21, in swig_import_helper
52 |     _mod = imp.load_module('_crfsuite', fp, pathname, description)
53 | ImportError: libcrfsuite-0.12.so: cannot open shared object file: No such
54 | file or directory
55 | 
56 | This error reports that CRFsuite Python module could not resolve the reference
57 | to the CRFsuite library (libcrfsuite-0.12.so). If you encounter this problem,
58 | please rebuild the Python module with "-R PATH_TO_CRFSUITE_LIBRARY".
59 | Alternatively, you can specify a search path for libraries:
60 | 
61 | $ export LD_LIBRARY_PATH=$HOME/local/lib
62 | 
63 | 
64 | 
65 | * NOTES FOR TESTING THE PYTHON MODULE WITHOUT INSTALLING IT
66 | 
67 | Adding "--inplace" option to the command-line argument of setup.py builds
68 | _crfsuite.so in the current directory. If these files are placed on the
69 | directory included in the module path of Python (e.g., the current directory
70 | where a Python process is created), one can try the module without running
71 | "python setup.py install"
72 | 
73 | $ python setup.py build_ext --inplace
74 | 
75 | 
76 | 
77 | * NOTES FOR GENERATING WRAPPER CODE USING SWIG.
78 | 
79 | The script "prepare.sh" can generate a SWIG wrapper with "--swig" option.
80 | $ ./prepare.sh --swig
81 | 
82 | However, we may get a compiling error for the wrapper code (export_wrap.cpp)
83 | even with the latest version of SWIG (2.0.4):
84 | 
85 | export_wrap.cpp:5019: error: redefinition of ?struct swig::traits<std::vector<
86 | CRFSuite::Attribute, std::allocator<CRFSuite::Attribute> > >
87 | export_wrap.cpp:4918: error: previous definition of ?struct swig::traits<std::
88 | vector<CRFSuite::Attribute, std::allocator<CRFSuite::Attribute> > >
89 | error: command 'g++' failed with exit status 1
90 | 
91 | Having no idea how to fix this problem elegantly, I simply remove the latter
92 | definition from export_wrap.cpp manually. This is why I end up including
93 | the wrapper code into the source repository.
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/crfsuite/swig/python/export_wrap.h:
--------------------------------------------------------------------------------
 1 | /* ----------------------------------------------------------------------------
 2 |  * This file was automatically generated by SWIG (http://www.swig.org).
 3 |  * Version 2.0.4
 4 |  * 
 5 |  * This file is not intended to be easily readable and contains a number of 
 6 |  * coding conventions designed to improve portability and efficiency. Do not make
 7 |  * changes to this file unless you know what you are doing--modify the SWIG 
 8 |  * interface file instead. 
 9 |  * ----------------------------------------------------------------------------- */
10 | 
11 | #ifndef SWIG_crfsuite_WRAP_H_
12 | #define SWIG_crfsuite_WRAP_H_
13 | 
14 | #include <map>
15 | #include <string>
16 | 
17 | 
18 | class SwigDirector_Trainer : public CRFSuite::Trainer, public Swig::Director {
19 | 
20 | public:
21 |     SwigDirector_Trainer(PyObject *self);
22 |     virtual ~SwigDirector_Trainer();
23 |     virtual void message(std::string const &msg);
24 | 
25 | 
26 | /* Internal Director utilities */
27 | public:
28 |     bool swig_get_inner(const char* swig_protected_method_name) const {
29 |       std::map<std::string, bool>::const_iterator iv = swig_inner.find(swig_protected_method_name);
30 |       return (iv != swig_inner.end() ? iv->second : false);
31 |     }
32 | 
33 |     void swig_set_inner(const char* swig_protected_method_name, bool val) const
34 |     { swig_inner[swig_protected_method_name] = val;}
35 | 
36 | private:
37 |     mutable std::map<std::string, bool> swig_inner;
38 | 
39 | 
40 | #if defined(SWIG_PYTHON_DIRECTOR_VTABLE)
41 | /* VTable implementation */
42 |     PyObject *swig_get_method(size_t method_index, const char *method_name) const {
43 |       PyObject *method = vtable[method_index];
44 |       if (!method) {
45 |         swig::SwigVar_PyObject name = SWIG_Python_str_FromChar(method_name);
46 |         method = PyObject_GetAttr(swig_get_self(), name);
47 |         if (!method) {
48 |           std::string msg = "Method in class Trainer doesn't exist, undefined ";
49 |           msg += method_name;
50 |           Swig::DirectorMethodException::raise(msg.c_str());
51 |         }
52 |         vtable[method_index] = method;
53 |       };
54 |       return method;
55 |     }
56 | private:
57 |     mutable swig::SwigVar_PyObject vtable[1];
58 | #endif
59 | 
60 | };
61 | 
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/crfsuite/swig/python/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ln -s ../crfsuite.cpp
 4 | ln -s ../export.i
 5 | 
 6 | if [ "$1" = "--swig" ];
 7 | then
 8 |     swig -c++ -python -I../../include -o export_wrap.cpp export.i
 9 | fi
10 | 


--------------------------------------------------------------------------------
/crfsuite/swig/python/sample_tag.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import crfsuite
 4 | import sys
 5 | 
 6 | def instances(fi):
 7 |     xseq = crfsuite.ItemSequence()
 8 |     
 9 |     for line in fi:
10 |         line = line.strip('\n')
11 |         if not line:
12 |         	# An empty line presents an end of a sequence.
13 |             yield xseq
14 |             xseq = crfsuite.ItemSequence()
15 |             continue
16 | 
17 | 		# Split the line with TAB characters.
18 |         fields = line.split('\t')
19 |         item = crfsuite.Item()
20 |         for field in fields[1:]:
21 |             p = field.rfind(':')
22 |             if p == -1:
23 |             	# Unweighted (weight=1) attribute.
24 |                 item.append(crfsuite.Attribute(field))
25 |             else:
26 |             	# Weighted attribute
27 |                 item.append(crfsuite.Attribute(field[:p], float(field[p+1:])))
28 | 
29 |         # Append the item to the item sequence.
30 |         xseq.append(item)
31 | 
32 | if __name__ == '__main__':
33 |     fi = sys.stdin
34 |     fo = sys.stdout
35 | 
36 | 	# Create a tagger object.
37 |     tagger = crfsuite.Tagger()
38 |     
39 |     # Load the model to the tagger.
40 |     tagger.open(sys.argv[1])
41 | 
42 |     for xseq in instances(fi):
43 |     	# Tag the sequence.
44 |         tagger.set(xseq)
45 |         # Obtain the label sequence predicted by the tagger.
46 |         yseq = tagger.viterbi()
47 |         # Output the probability of the predicted label sequence.
48 |         print tagger.probability(yseq)
49 |         for t, y in enumerate(yseq):
50 |         	# Output the predicted labels with their marginal probabilities.
51 |             print '%s:%f' % (y, tagger.marginal(y, t))
52 |         print
53 | 


--------------------------------------------------------------------------------
/crfsuite/swig/python/sample_train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import crfsuite
 4 | import sys
 5 | 
 6 | # Inherit crfsuite.Trainer to implement message() function, which receives
 7 | # progress messages from a training process.
 8 | class Trainer(crfsuite.Trainer):
 9 |     def message(self, s):
10 |         # Simply output the progress messages to STDOUT.
11 |         sys.stdout.write(s)
12 | 
13 | def instances(fi):
14 |     xseq = crfsuite.ItemSequence()
15 |     yseq = crfsuite.StringList()
16 |     
17 |     for line in fi:
18 |         line = line.strip('\n')
19 |         if not line:
20 |         	# An empty line presents an end of a sequence.
21 |             yield xseq, tuple(yseq)
22 |             xseq = crfsuite.ItemSequence()
23 |             yseq = crfsuite.StringList()
24 |             continue
25 | 
26 | 		# Split the line with TAB characters.
27 |         fields = line.split('\t')
28 |     	
29 |     	# Append attributes to the item.
30 |         item = crfsuite.Item()
31 |         for field in fields[1:]:
32 |             p = field.rfind(':')
33 |             if p == -1:
34 |             	# Unweighted (weight=1) attribute.
35 |                 item.append(crfsuite.Attribute(field))
36 |             else:
37 |             	# Weighted attribute
38 |                 item.append(crfsuite.Attribute(field[:p], float(field[p+1:])))
39 |         
40 |         # Append the item to the item sequence.
41 |         xseq.append(item)
42 |         # Append the label to the label sequence.
43 |         yseq.append(fields[0])
44 | 
45 | if __name__ == '__main__':
46 | 	# This demonstrates how to obtain the version string of CRFsuite.
47 |     print crfsuite.version()
48 | 
49 | 	# Create a Trainer object.
50 |     trainer = Trainer()
51 |     
52 |     # Read training instances from STDIN, and set them to trainer.
53 |     for xseq, yseq in instances(sys.stdin):
54 |         trainer.append(xseq, yseq, 0)
55 | 
56 | 	# Use L2-regularized SGD and 1st-order dyad features.
57 |     trainer.select('l2sgd', 'crf1d')
58 |     
59 |     # This demonstrates how to list parameters and obtain their values.
60 |     for name in trainer.params():
61 |         print name, trainer.get(name), trainer.help(name)
62 |     
63 |     # Set the coefficient for L2 regularization to 0.1
64 |     trainer.set('c2', '0.1')
65 |     
66 |     # Start training; the training process will invoke trainer.message()
67 |     # to report the progress.
68 |     trainer.train(sys.argv[1], -1)
69 | 


--------------------------------------------------------------------------------
/crfsuite/swig/python/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | setup.py file for SWIG example
 5 | """
 6 | 
 7 | import sys
 8 | import os.path
 9 | 
10 | def get_rootdir():
11 |     return '/home/users/okazaki/local'
12 | def get_includedir():
13 |     return os.path.join(get_rootdir(), 'include')
14 | def get_librarydir():
15 |     return os.path.join(get_rootdir(), 'lib')
16 | 
17 | import os; os.environ['CC'] = 'g++'; os.environ['CXX'] = 'g++';
18 | os.environ['CPP'] = 'g++'; os.environ['LDSHARED'] = 'g++'
19 | 
20 | from distutils.core import setup, Extension
21 | 
22 | crfsuite_module = Extension(
23 |     '_crfsuite',
24 |     sources = [
25 |         'crfsuite.cpp',
26 |         'export_wrap.cpp',
27 |         ],
28 | #    include_dirs=['../../include',],
29 |     extra_link_args=['-shared'],
30 | #    library_dirs=['../../lib/crf',],
31 |     libraries=['crfsuite'],
32 | #    extra_objects=['../../lib/crf/libcrfsuite.la'],
33 |     language='c++',
34 |     )
35 | 
36 | setup(
37 |     name = '@PACKAGE@',
38 |     version = '@VERSION@',
39 |     author = 'Naoaki Okazaki',
40 |     description = """CRFSuite Python module""",
41 |     ext_modules = [crfsuite_module],
42 |     py_modules = ["crfsuite"],
43 |     )
44 | 
45 | 


--------------------------------------------------------------------------------
/crfsuite/win32/liblbfgs/lbfgs.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/win32/liblbfgs/lbfgs.lib


--------------------------------------------------------------------------------
/crfsuite/win32/liblbfgs/lbfgs_debug.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/happywwy/Recursive-Neural-Conditional-Random-Field/be393fa6182bbf749b276d257410f908269dd71b/crfsuite/win32/liblbfgs/lbfgs_debug.lib


--------------------------------------------------------------------------------
/data/README:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | This annotation dataset is firstly used in the paper 
 3 | "Recursive Neural Conditional Random Fields for Aspect-based Sentiment Analysis". 
 4 | 
 5 | The annotations are made on the customer review dataset from SemEval Challenge 2014 Task 4 Subtask 1. 
 6 | The annotation from the challenge only contains aspect term labels. 
 7 | We create opinion term labels for the same customer review sentences.
 8 | 
 9 | Description
10 | The annotation contains four files corresponding to customer reviews from two different domains for both training and testing. 
11 | For training, one file contains opinion term annotations for 3044 training sentences for restaurant reviews, 
12 | and the other file contains opinion term annotations for 3048 sentences for laptop reviews. 
13 | (These reviews are the same as the ones in the challenge.) 
14 | Each line in the annotation file lists all the opinion terms separated by ",". 
15 | Each opinion term is followed by "+1" or "-1" indicating the opinion polarity. 
16 | The order of the annotations is in line with the order of the original review texts for each domain. 
17 | "NIL" indicates there is no explicit opinions for the corresponding review text.
18 | 
19 | Citation
20 | If you would like to use this annotation dataset, please cite the following paper
21 | "Recursive Neural Conditional Random Fields for Aspect-based Sentiment Analysis"
22 | Wenya Wang, Sinno Jialin Pan, Daniel Dahlmeier and Xiaokui Xiao
23 | In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing (EMNLP-16)
24 | 


--------------------------------------------------------------------------------
/mod_pycrfsuite/crfsuite_api.pxd:
--------------------------------------------------------------------------------
 1 | from libcpp.string cimport string
 2 | from libcpp.vector cimport vector
 3 | 
 4 | cdef extern from "../crfsuite/include/crfsuite.h":
 5 |     ctypedef enum:
 6 |         CRFSUITE_SUCCESS
 7 |         CRFSUITEERR_UNKNOWN         # Unknown error occurred.
 8 |         CRFSUITEERR_OUTOFMEMORY     # Insufficient memory.
 9 |         CRFSUITEERR_NOTSUPPORTED    # Unsupported operation.
10 |         CRFSUITEERR_INCOMPATIBLE    # Incompatible data.
11 |         CRFSUITEERR_INTERNAL_LOGIC  # Internal error.
12 |         CRFSUITEERR_OVERFLOW        # Overflow.
13 |         CRFSUITEERR_NOTIMPLEMENTED  # Not implemented.
14 | 
15 | 
16 | cdef extern from "../crfsuite/include/crfsuite_api.hpp" namespace "CRFSuite":
17 |     cdef cppclass Attribute:
18 |         string attr
19 |         double value
20 | 
21 |         Attribute()
22 |         Attribute(string)
23 |         Attribute(string, double)
24 | 
25 |     ctypedef vector[Attribute] Item
26 |     ctypedef vector[Item] ItemSequence
27 |     ctypedef vector[string] StringList
28 | 
29 |     cdef string version()
30 | 
31 | 
32 | cdef extern from "trainer_wrapper.hpp" namespace "CRFSuiteWrapper":
33 | 
34 |     ctypedef object (*messagefunc)(object self, string message)
35 | 
36 |     cdef cppclass Trainer:
37 |         Trainer() except +
38 |         void set_handler(object, messagefunc) except +
39 |         void clear() except +
40 |         void modify(ItemSequence, int) except +
41 |         void append(ItemSequence, StringList, int) except +
42 |         bint select(string, string) except +
43 |         #int train(string, int, double*, int, double*, int, double, double, int, int) except +
44 |         int train(string, int, double*, double*, int, double, double, double*, int) except +
45 |         StringList params() except +
46 |         void set(string, string) except +
47 |         string get(string) except +
48 |         string help(string) except +
49 |         void _init_hack() except +
50 | 
51 | 
52 | cdef extern from "tagger_wrapper.hpp" namespace "CRFSuiteWrapper":
53 | 
54 |     ctypedef object (*messagefunc)(object self, string message)
55 | 
56 |     cdef cppclass Tagger:
57 |         Tagger() except +
58 |         int open(string) except +
59 |         void close() except +
60 |         StringList labels() except +
61 |         StringList tag(ItemSequence) except +
62 |         void set(ItemSequence) except +
63 |         StringList viterbi() except +
64 |         double probability(StringList) except +
65 |         double marginal(string, int) except +
66 |         void dump(int) except +
67 |         void dump2() except +
68 | 


--------------------------------------------------------------------------------
/rnn/adagrad.py:
--------------------------------------------------------------------------------
 1 | from numpy import *
 2 | 
 3 | class Adagrad(): 
 4 | 
 5 |     def __init__(self, dim):
 6 |         self.dim = dim
 7 |         self.eps = 1e-3
 8 | 
 9 |         # initial learning rate
10 |         #self.learning_rate = 0.05  #original 100-dim depnn
11 |         #self.learning_rate = 0.0001  #crf 100-dim
12 |         #self.learning_rate = 0.02
13 |         self.learning_rate = 0.01
14 |         # stores sum of squared gradients 
15 |         self.h = zeros(self.dim)
16 | 
17 |     def rescale_update(self, gradient):
18 |         curr_rate = zeros(self.h.shape)
19 |         self.h += gradient ** 2
20 |         curr_rate = self.learning_rate / (sqrt(self.h) + self.eps)
21 |         return curr_rate * gradient
22 | 
23 |     def reset_weights(self):
24 |         self.h = zeros(self.dim)
25 | 


--------------------------------------------------------------------------------
/rnn/adagrad_crf.py:
--------------------------------------------------------------------------------
 1 | from numpy import *
 2 | 
 3 | class Adagrad(): 
 4 | 
 5 |     def __init__(self, dim, r=None):
 6 |         self.dim = dim
 7 |         self.eps = 1e-3
 8 | 
 9 |         # initial learning rate
10 |         #self.learning_rate = 0.05  #original 100-dim depnn
11 |         if r == None:
12 |             self.learning_rate = 0.0001  #crf 100-dim
13 |         else: 
14 |             self.learning_rate = r
15 |         #self.learning_rate = 0.02
16 |         # stores sum of squared gradients 
17 |         self.h = zeros(self.dim)
18 | 
19 |     def rescale_update(self, gradient):
20 |         curr_rate = zeros(self.h.shape)
21 |         self.h += gradient ** 2
22 |         curr_rate = self.learning_rate / (sqrt(self.h) + self.eps)
23 |         return curr_rate * gradient
24 | 
25 |     def reset_weights(self):
26 |         self.h = zeros(self.dim)
27 | 


--------------------------------------------------------------------------------
/rnn/crf_propagation.py:
--------------------------------------------------------------------------------
  1 | # forward and back-propagation for RNN within joint model of RNCRF
  2 | # only compute hidden state for each node in forward pass
  3 | # receive error from CRF to each node for back-propagation
  4 | 
  5 | import numpy as np
  6 | from util.math_util import *
  7 | import random
  8 | 
  9 | #define softmax function
 10 | def softmax(v):
 11 |     v = np.array(v)
 12 |     max_v = np.amax(v)
 13 |     e = np.exp(v - max_v)
 14 |     dist = e / np.sum(e)
 15 | 
 16 |     return dist
 17 | 
 18 |     
 19 | def der_tanh(x):
 20 |     return 1-np.tanh(x)**2
 21 | 
 22 | def forward_prop(params, tree, d, c, labels=True):
 23 | 
 24 |     tree.reset_finished()
 25 | 
 26 |     to_do = tree.get_nodes()
 27 | 
 28 |     (rel_dict, Wv, b, We) = params
 29 | 
 30 |     # forward prop
 31 |     while to_do:
 32 |         curr = to_do.pop(0)
 33 | 
 34 |         # node is leaf
 35 |         if len(curr.kids) == 0:
 36 | 
 37 |             # activation function is the tanh
 38 |             # compute hidden state
 39 |             curr.p = tanh(Wv.dot(curr.vec) + b)
 40 |             
 41 | 
 42 |         else:
 43 | 
 44 |             # - root isn't a part of this! 
 45 |             # - more specifically, the stanford dep. parser creates a superficial ROOT node
 46 |             #   associated with the word "root" that we don't want to consider during training
 47 |             # 'root' is the last one to be popped
 48 |             if len(to_do) == 0:
 49 |                 # 'root' only has one kid, which is the root word
 50 |                 ind, rel = curr.kids[0]
 51 |                 curr.p = tree.get(ind).p
 52 |                 
 53 |                 continue
 54 |             
 55 |             # check if all kids are finished
 56 |             all_done = True
 57 |             for ind, rel in curr.kids:
 58 |                 if tree.get(ind).finished == 0:
 59 |                     all_done = False
 60 |                     break
 61 | 
 62 |             # if not, push the node back onto the queue
 63 |             if not all_done:
 64 |                 to_do.append(curr)
 65 |                 continue
 66 | 
 67 |             # otherwise, compute p at node
 68 |             else:
 69 |                 kid_sum = zeros( (d, 1) )
 70 |                 for ind, rel in curr.kids:
 71 |                     curr_kid = tree.get(ind)
 72 | 
 73 |                     try:
 74 |                         kid_sum += rel_dict[rel].dot(curr_kid.p)
 75 | 
 76 |                     # - this shouldn't happen unless the parser spit out a seriously 
 77 |                     #   malformed tree
 78 |                     except KeyError:
 79 |                         print 'forward propagation error'
 80 |                         print tree.get_words()
 81 |                         print curr.word, rel, tree.get(ind).word
 82 |                 
 83 |                 kid_sum += Wv.dot(curr.vec)
 84 |                 curr.p = tanh(kid_sum + b)
 85 | 
 86 | 
 87 |         # error and delta
 88 |         if labels:
 89 |             
 90 |             true_label = zeros( (c, 1) )
 91 |             for i in range(c):
 92 |                 if curr.trueLabel == i:
 93 |                     true_label[i] = 1
 94 |                     
 95 |             curr.true_class = true_label
 96 |                     
 97 |             
 98 |         curr.finished = 1
 99 | 
100 | 
101 | 
102 | # computes gradients for the given tree and increments existing gradients
103 | def backprop(params, tree, d, c, len_voc, grads, mixed = False):
104 | 
105 |     (rel_dict, Wv, b) = params
106 | 
107 |     # start with root's immediate kid (for same reason as forward prop)
108 |     ind, rel = tree.get(0).kids[0]
109 |     root = tree.get(ind)
110 | 
111 |     # operate on tuples of the form (node, parent delta)
112 |     to_do = [ (root, zeros( (d, 1) ) ) ]
113 | 
114 |     while to_do:
115 |         curr = to_do.pop()
116 |         node = curr[0]
117 |         # parent delta
118 |         delta_down = curr[1]
119 |         delta = node.grad_h
120 |         curr_der = der_tanh(node.p)
121 |         node.delta_full = np.multiply(delta + delta_down, curr_der)
122 | 
123 |         # internal node
124 |         if len(node.kids) > 0:
125 |             
126 | 
127 |             for ind, rel in node.kids:
128 | 
129 |                 curr_kid = tree.get(ind)
130 |                 #W_rel
131 |                 grads[0][rel] += node.delta_full.dot(curr_kid.p.T)
132 |                 to_do.append( (curr_kid, rel_dict[rel].T.dot(node.delta_full) ) )
133 | 
134 | 
135 |             grads[1] += node.delta_full.dot(node.vec.T)
136 |             grads[2] += node.delta_full
137 |             if mixed:
138 |                 grads[3][50:, node.ind] += Wv.T.dot(node.delta_full).ravel()[50:]
139 |             else:
140 |                 grads[3][:, node.ind] += Wv.T.dot(node.delta_full).ravel()
141 | 
142 |         # leaf
143 |         else:
144 | 
145 |             grads[1] += node.delta_full.dot(node.vec.T)
146 |             grads[2] += node.delta_full
147 | 
148 |             if mixed:
149 |                 grads[3][50:, node.ind] += Wv.T.dot(node.delta_full).ravel()[50:]
150 |             else:
151 |                 grads[3][:, node.ind] += Wv.T.dot(node.delta_full).ravel()
152 |                 
153 | 


--------------------------------------------------------------------------------
/rnn/propagation.py:
--------------------------------------------------------------------------------
  1 | # This is the module for computing RNN forward propagation and back-propagation in the dependency tree
  2 | 
  3 | import numpy as np
  4 | from util.math_util import *
  5 | import random
  6 | 
  7 | # - RNN forward propagation and back-propagation
  8 | 
  9 | # define softmax function
 10 | def softmax(v):
 11 |     v = np.array(v)
 12 |     max_v = np.amax(v)
 13 |     e = np.exp(v - max_v)
 14 |     dist = e / np.sum(e)
 15 | 
 16 |     return dist
 17 | 
 18 | # derivative of tanh    
 19 | def der_tanh(x):
 20 |     return 1-np.tanh(x)**2
 21 | 
 22 | def forward_prop(params, tree, d, c, labels=True):
 23 | 
 24 |     tree.reset_finished()
 25 | 
 26 |     to_do = tree.get_nodes()
 27 | 
 28 |     (rel_dict, Wv, Wc, b, b_c, We) = params
 29 | 
 30 |     # forward prop
 31 |     while to_do:
 32 |         curr = to_do.pop(0)
 33 | 
 34 |         # node is leaf
 35 |         if len(curr.kids) == 0:
 36 | 
 37 |             # activation function is the tanh
 38 |             # compute hidden state
 39 |             curr.p = tanh(Wv.dot(curr.vec) + b)
 40 |             # store for computing derivative
 41 |             curr.label_error = 0.0
 42 |             curr.label_delta = 0.0
 43 |             # classification
 44 |             curr.predict_label = softmax(Wc.dot(curr.p) + b_c)
 45 | 
 46 |         else:
 47 | 
 48 |             # - root isn't a part of this! 
 49 |             # - more specifically, the stanford dep. parser creates a superficial ROOT node
 50 |             #   associated with the word "root" that we don't want to consider during training
 51 |             # 'root' is the last one to be popped
 52 |             if len(to_do) == 0:
 53 |                 # 'root' only has one kid, which is the root word
 54 |                 ind, rel = curr.kids[0]
 55 |                 curr.p = tree.get(ind).p
 56 | 
 57 |                 curr.label_error = 0.
 58 |                 curr.label_delta = 0.
 59 |                 curr.predict_label = softmax(Wc.dot(curr.p) + b_c)
 60 |                 continue
 61 | 
 62 |             # check if all kids are finished
 63 |             all_done = True
 64 |             for ind, rel in curr.kids:
 65 |                 if tree.get(ind).finished == 0:
 66 |                     all_done = False
 67 |                     break
 68 | 
 69 |             # if not, push the node back onto the queue
 70 |             if not all_done:
 71 |                 to_do.append(curr)
 72 |                 continue
 73 | 
 74 |             # otherwise, compute p at node
 75 |             else:
 76 |                 kid_sum = zeros( (d, 1) )
 77 |                 for ind, rel in curr.kids:
 78 |                     curr_kid = tree.get(ind)
 79 | 
 80 |                     try:
 81 |                         kid_sum += rel_dict[rel].dot(curr_kid.p)
 82 | 
 83 |                     # - this shouldn't happen unless the parser spit out a seriously 
 84 |                     #   malformed tree
 85 |                     except KeyError:
 86 |                         print 'forward propagation error'
 87 |                         print tree.get_words()
 88 |                         print curr.word, rel, tree.get(ind).word
 89 |                 
 90 |                 kid_sum += Wv.dot(curr.vec)
 91 |                 curr.p = tanh(kid_sum + b)
 92 | 
 93 |                 curr.predict_label = softmax(Wc.dot(curr.p) + b_c)
 94 | 
 95 | 
 96 |         # error and delta
 97 |         if labels:
 98 |             
 99 |             curr.label_error = 0.0
100 |             curr.label_delta = zeros( (c, 1) )
101 |             true_label = zeros( (c, 1) )
102 |             for i in range(c):
103 |                 if curr.trueLabel == i:
104 |                     true_label[i] = 1
105 |                     
106 |             curr.true_class = true_label
107 |                     
108 |             curr.label_delta = curr.predict_label - curr.true_class
109 |             curr.label_error = - (np.multiply(log(curr.predict_label), curr.true_class).sum())
110 | 
111 |         curr.finished = 1
112 |         
113 | 
114 | # computes gradients for the given tree and increments existing gradients
115 | def backprop(params, tree, d, c, len_voc, grads, mixed = False):
116 | 
117 |     (rel_dict, Wv, Wc, b, b_c) = params
118 | 
119 |     # start with root's immediate kid (for same reason as forward prop)
120 |     ind, rel = tree.get(0).kids[0]
121 |     root = tree.get(ind)
122 | 
123 |     # operate on tuples of the form (node, parent delta)
124 |     to_do = [ (root, zeros( (d, 1) ) ) ]
125 | 
126 |     while to_do:
127 |         curr = to_do.pop()
128 |         node = curr[0]
129 |         # parent delta
130 |         delta_down = curr[1]
131 |         
132 |         # delta_Wc
133 |         delta_Wc = node.label_delta.dot(node.p.T)    
134 |         delta_bc = node.label_delta
135 |         
136 |         # delta_node
137 |         delta = Wc.T.dot(node.label_delta)
138 |         curr_der = der_tanh(node.p)
139 |         node.delta_full = np.multiply(delta + delta_down, curr_der)
140 |     
141 | 
142 |         # internal node
143 |         if len(node.kids) > 0:
144 |             
145 |             for ind, rel in node.kids:
146 | 
147 |                 curr_kid = tree.get(ind)
148 |                 grads[0][rel] += node.delta_full.dot(curr_kid.p.T)
149 |                 to_do.append( (curr_kid, rel_dict[rel].T.dot(node.delta_full) ) )
150 | 
151 |             grads[1] += node.delta_full.dot(node.vec.T)
152 |             grads[2] += delta_Wc
153 |             grads[3] += node.delta_full
154 |             grads[4] += delta_bc
155 |             if mixed:
156 |                 grads[5][50:, node.ind] += Wv.T.dot(node.delta_full).ravel()[50:]
157 |             else:
158 |                 grads[5][:, node.ind] += Wv.T.dot(node.delta_full).ravel()
159 | 
160 |         # leaf
161 |         else:
162 | 
163 |             grads[1] += node.delta_full.dot(node.vec.T)
164 |             grads[2] += delta_Wc
165 |             grads[3] += node.delta_full
166 |             grads[4] += delta_bc
167 |             if mixed:
168 |                 grads[5][50:, node.ind] += Wv.T.dot(node.delta_full).ravel()[50:]
169 |             else:
170 |                 grads[5][:, node.ind] += Wv.T.dot(node.delta_full).ravel()
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/util/10depParse.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May 21 22:37:53 2015
 4 | 
 5 | @author: wangwenya
 6 | """
 7 | 
 8 | """
 9 | parse the preprocessed sentences to dependency formats
10 | """
11 | 
12 | 
13 | import subprocess
14 | 
15 | #   stanford dependency parser to create a dependency parse tree for each sentence
16 | out_file = open('./data_semEval/raw_parses_sample', 'w')
17 | 
18 | # change these paths to point to your stanford parser and the path for your preprocessed sentences
19 | p = subprocess.Popen(["bash","lexparser.sh","./data_semEval/sample.txt"], stdout=subprocess.PIPE)
20 | output, err = p.communicate()
21 | 
22 | 
23 | for line in output:
24 |     out_file.write(line)
25 |     
26 | out_file.close()
27 | 


--------------------------------------------------------------------------------
/util/20dtreeLabel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri May 22 10:28:35 2015
  4 | 
  5 | @author: wangwenya
  6 | """
  7 | 
  8 | """
  9 | create tree structures from raw parses for training sentences
 10 | accumulate vocabulary
 11 | ignore lemmatization
 12 | differentiate beginning and inside of aspects/opinions
 13 | """
 14 | 
 15 | 
 16 | from dtree_util import *
 17 | import gen_util as gen
 18 | import sys, cPickle, random, os
 19 | from numpy import *
 20 | 
 21 | # import dependency parse trees
 22 | f = open('data_semEval/raw_parses_sample', 'r')
 23 | 
 24 | indice = 0
 25 | 
 26 | data = f.readlines()
 27 | plist = []
 28 | tree_dict = []
 29 | vocab = []
 30 | rel_list = []
 31 | 
 32 | # import ground-truth aspect term labels and opinion term labels
 33 | label_file = open('data_semEval/aspectTerm_sample', 'r')
 34 | label_sentence = open('data_semEval/opinion_sample', 'r')
 35 | 
 36 | for line in data:
 37 |     if line.strip():
 38 |         rel_split = line.split('(')
 39 |         rel = rel_split[0]
 40 |         deps = rel_split[1][:-1]
 41 |         deps = deps.replace(')','')
 42 |         if len(rel_split) != 2:
 43 |             print 'error ', rel_split
 44 |             sys.exit(0)
 45 | 
 46 |         else:
 47 |             dep_split = deps.split(',')
 48 |             
 49 |         if len(dep_split) > 2:
 50 |             fixed = []
 51 |             half = ''
 52 |             for piece in dep_split:
 53 |                 piece = piece.strip()
 54 |                 if '-' not in piece:
 55 |                     half += piece
 56 | 
 57 |                 else:
 58 |                     fixed.append(half + piece)
 59 |                     half = ''
 60 | 
 61 |                     #print 'fixed: ', fixed
 62 |             dep_split = fixed
 63 | 
 64 |         final_deps = []
 65 |         for dep in dep_split:
 66 |             words = dep.split('-')
 67 |             word = words[0]
 68 |             ind = int(words[len(words) - 1])
 69 | 
 70 |             if len(words) > 2:
 71 |                 word = '-'.join([w for w in words[:-1]])
 72 | 
 73 |             final_deps.append( (ind, word.strip()) )
 74 |         # store dependency relations for each word pair    
 75 |         plist.append((rel,final_deps))
 76 | 
 77 |     # after processing one sentence
 78 |     else:
 79 |         max_ind = -1
 80 |         for rel, deps in plist:
 81 |             for ind, word in deps:
 82 |                 if ind > max_ind:
 83 |                     max_ind = ind
 84 | 
 85 |         # load words into nodes, then make a dependency tree
 86 |         nodes = [None for i in range(0, max_ind + 1)]
 87 |         for rel, deps in plist:
 88 |             for ind, word in deps:
 89 |                 nodes[ind] = word
 90 | 
 91 |         tree = dtree(nodes)
 92 | 
 93 |         opinion_words = []
 94 |             
 95 |         
 96 |         aspect_term = label_file.readline().rstrip()
 97 |         labeled_sent = label_sentence.readline().strip() #opinions
 98 |         
 99 |         aspect_BIO = {}
100 |         
101 |         #facilitate bio notation
102 |         if '##' in labeled_sent:
103 |                 opinions = labeled_sent.split('##')[1].strip()
104 |                 opinions = opinions.split(',')
105 |                 
106 |                 for opinion in opinions:
107 |                     op_list = opinion.split()[:-1]
108 |                     if len(op_list) > 1:
109 |                         for ind, term in enumerate(nodes):
110 |                             if term != None:
111 |                                 if term == op_list[0] and ind < len(nodes) - 1 and nodes[ind + 1] != None and nodes[ind + 1] == op_list[1]:
112 |                                     tree.get(ind).trueLabel = 3
113 |                                     for i in range(len(op_list) - 1):
114 |                                         if nodes[ind + i + 1] != None and nodes[ind + i + 1] == op_list[i + 1]:
115 |                                             tree.get(ind + i + 1).trueLabel = 4
116 |                                         
117 |                     elif len(op_list) == 1:
118 |                         for ind, term in enumerate(nodes):
119 |                             if term != None:
120 |                                 if term == op_list[0] and tree.get(ind).trueLabel == 0:
121 |                                     tree.get(ind).trueLabel = 3
122 |         
123 |         if aspect_term != 'NIL':
124 |             aspects = aspect_term.split(',')
125 |             
126 |                         
127 |             #deal with same word but different labels
128 |             for aspect in aspects:
129 |                 aspect = aspect.strip()
130 |                 #aspect is a phrase
131 |                 if ' ' in aspect:
132 |                     aspect_list = aspect.split()
133 |                     for ind, term in enumerate(nodes):
134 |                         if term == aspect_list[0] and ind < len(nodes) - 1 and nodes[ind + 1] == aspect_list[1]:
135 |                             tree.get(ind).trueLabel = 1
136 |                             
137 |                             for i in range(len(aspect_list) - 1):
138 |                                 if ind + i + 1 < len(nodes):
139 |                                     if nodes[ind + i + 1] == aspect_list[i + 1]:
140 |                                         tree.get(ind + i + 1).trueLabel = 2
141 |                             break
142 |                       
143 |                 #aspect is a single word
144 |                 else:
145 |                     for ind, term in enumerate(nodes):
146 |                         if term == aspect and tree.get(ind).trueLabel == 0:
147 |                             tree.get(ind).trueLabel = 1
148 |             
149 |             
150 |         # add dependency edges between nodes
151 |         for rel, deps in plist:
152 |             par_ind, par_word = deps[0]
153 |             kid_ind, kid_word = deps[1]
154 |             tree.add_edge(par_ind, kid_ind, rel)
155 | 
156 |         tree_dict.append(tree)  
157 |         
158 |         for node in tree.get_nodes():
159 |             if node.word.lower() not in vocab:
160 |                 vocab.append(node.word.lower())
161 |                 
162 |             node.ind = vocab.index(node.word.lower())
163 |             
164 |             for ind, rel in node.kids:
165 |                 if rel not in rel_list:
166 |                     rel_list.append(rel)
167 | 
168 |         plist = []
169 |         indice += 1
170 | 
171 | 
172 | 
173 | print 'rels: ', len(rel_list)
174 | print 'vocab: ', len(vocab)
175 | 
176 | cPickle.dump((vocab, rel_list, tree_dict), open("data_semEval/final_input_sample", "wb"))
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/util/30word_embedding.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed May 27 19:50:17 2015
 4 | 
 5 | @author: wangwenya
 6 | """
 7 | 
 8 | import numpy as np
 9 | import cPickle
10 | 
11 | # import word2vec file with the format: word, vector
12 | # change the path to the one containing your word2vec 
13 | dic_file = open("./data_semEval/w2v_sample.txt", "r")
14 | dic = dic_file.readlines()
15 | 
16 | dictionary = {}
17 | 
18 | for line in dic:
19 |     word_vector = line.split(",")
20 | 
21 |     i = 1
22 |     while '.' not in word_vector[i] or '..' in word_vector[i] or word_vector[i] == '.':
23 |         i += 1
24 |         
25 |     word = ','.join(word_vector[:i])
26 |     
27 |     vector_list = []
28 |     for element in word_vector[i:len(word_vector)-1]:
29 |         vector_list.append(element)
30 |         
31 |     vector = np.asarray(vector_list)
32 |     dictionary[word] = vector
33 |     
34 | 
35 | final_input = cPickle.load(open("data_semEval/final_input_sample", "rb"))
36 | vocab = final_input[0]
37 | 
38 | word_embedding = np.zeros((100, len(vocab)))
39 | 
40 | count = 0
41 | 
42 | for ind, word in enumerate(vocab):
43 |     if word in dictionary.keys():
44 |         vec = dictionary[word]
45 |         row = 0
46 |         for num in vec:
47 |             word_embedding[row][ind] = float(num)
48 |             row += 1
49 |         count += 1
50 |     else:
51 |         print word,
52 |         for i in range(100):
53 |             word_embedding[i][ind] = 2 * np.random.rand() - 1
54 |     
55 | print len(vocab)
56 | print count
57 | #print word_embedding
58 | 
59 | 
60 | cPickle.dump(word_embedding, open("data_semEval/word_embeddings_sample", "wb"))
61 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import dtree_util
2 | import sys
3 | #sys.modules['dtree_util'] = dtree_util
4 | sys.modules['dtree_util'] = dtree_util


--------------------------------------------------------------------------------
/util/data_semEval/aspectTerm_sample:
--------------------------------------------------------------------------------
  1 | place
  2 | staff
  3 | NULL
  4 | food,portions
  5 | NULL
  6 | place
  7 | food
  8 | Saul
  9 | foie gras terrine with figs,duck confit
 10 | wine list
 11 | NULL
 12 | restaurant
 13 | cart attendant
 14 | NULL
 15 | Food
 16 | Chow fun,pork shu mai,NULL
 17 | place
 18 | Fish
 19 | Service,oysters,NULL
 20 | NULL
 21 | Restaurant Saul
 22 | NULL,service,decor
 23 | duck breast special
 24 | NULL
 25 | NULL
 26 | NULL
 27 | food,Thai fusion stuff,NULL
 28 | Grilled Chicken special with Edamame Puree
 29 | Edamame pureed
 30 | NULL
 31 | place,decor,vent
 32 | NULL
 33 | NULL
 34 | sake list,NULL
 35 | spicy tuna roll,rock shrimp tempura
 36 | service
 37 | pink pony
 38 | spot
 39 | Food
 40 | Service
 41 | Ambiance
 42 | place
 43 | NULL
 44 | NULL
 45 | food
 46 | service
 47 | waiter
 48 | NULL
 49 | NULL
 50 | NULL
 51 | place
 52 | NULL
 53 | Food
 54 | Service
 55 | NULL
 56 | sea urchin
 57 | NULL
 58 | sushi
 59 | rice to fish ration
 60 | half price sushi deal
 61 | NULL
 62 | NULL
 63 | NULL
 64 | crowd
 65 | food,service
 66 | NULL
 67 | NULL
 68 | NULL
 69 | service,food,wine list
 70 | Prix Fixe menu
 71 | NULL
 72 | NULL
 73 | NULL
 74 | food
 75 | dishes
 76 | NULL
 77 | NULL
 78 | food,NULL
 79 | somosas,chai,chole,dhosas,dhal
 80 | service
 81 | kitchen
 82 | food
 83 | ambience,NULL
 84 | NULL
 85 | view
 86 | service,food
 87 | lava cake dessert
 88 | Cosette
 89 | restaurant
 90 | food,French Onion soup,desserts
 91 | ambience,food
 92 | NULL
 93 | NULL
 94 | pizza,cheese
 95 | NULL
 96 | NULL
 97 | NULL
 98 | pizza
 99 | ingredients,pizza,crust
100 | NULL


--------------------------------------------------------------------------------
/util/dtree_util.py:
--------------------------------------------------------------------------------
 1 | from operator import itemgetter
 2 | 
 3 | # - an individual node contains the word associated with the node along with 
 4 | #   pointers to its kids and parents. 
 5 | class node:
 6 | 
 7 |     def __init__(self, word):
 8 |         if word != None:
 9 |             self.word = word
10 |             self.kids = []
11 |             self.parent = []
12 |             self.finished = 0
13 |             self.is_word = 1
14 |             
15 |             # add label
16 |             self.trueLabel = 0
17 | 
18 |             # the "ind" variable stores the look-up index of the word in the 
19 |             # word embedding matrix We. set this value when the vocabulary is finalized
20 |             self.ind = -1
21 | 
22 |         else:
23 |             self.is_word = 0
24 | 
25 | # - a dtree consists of a list of nodes
26 | # - if you want to use a different dataset, check out the preprocessing scripts
27 | #   that convert stanford dependency parses to dtrees
28 | class dtree:
29 | 
30 |     def __init__(self, word_list):
31 |         self.nodes = []
32 |         for word in word_list:
33 |             self.nodes.append(node(word))
34 | 
35 | 
36 |     def add_edge(self, par, child, rel):
37 |         self.nodes[par].kids.append( (child, rel ) )
38 |         self.nodes[child].parent.append( (par, rel) )
39 | 
40 | 
41 |     # return all non-None nodes
42 |     def get_nodes(self):
43 |         return [node for node in self.nodes if node.is_word]
44 | 
45 | 
46 |     def get_node_inds(self):
47 |         return [(ind, node) for ind, node in enumerate(self.nodes) if node.is_word]
48 | 
49 | 
50 |     # get a node from the raw node list
51 |     def get(self, ind):
52 |         return self.nodes[ind]
53 | 
54 | 
55 |     # return the raw text of the sentence
56 |     def get_words(self):
57 |         return ' '.join([node.word for node in self.get_nodes()[1:]])
58 | 
59 | 
60 |     # return raw text of phrase associated with the given node
61 |     def get_phrase(self, ind):
62 | 
63 |         node = self.get(ind)
64 |         words = [(ind, node.word), ]
65 |         to_do = []
66 |         for ind, rel in node.kids:
67 |             to_do.append(self.get(ind))
68 |             words.append((ind, self.get(ind).word))
69 | 
70 |         while to_do:
71 |             curr = to_do.pop()
72 | 
73 |             # add this kid's kids to to_do
74 |             if len(curr.kids) > 0:
75 |                 for ind, rel in curr.kids:
76 |                     words.append((ind, self.get(ind).word))
77 |                     to_do.insert(0, self.get(ind))  
78 | 
79 | 
80 |         return ' '.join([word for ind, word in sorted(words, key=itemgetter(0) ) ]).strip()  
81 | 
82 | 
83 |     def reset_finished(self):
84 |         for node in self.get_nodes():
85 |             node.finished = 0
86 | 
87 | 
88 |     # one tree's error is the sum of the error at all nodes of the tree
89 |     def error(self):
90 |         sum = 0.0
91 |         for node in self.get_nodes():
92 |             #sum += node.ans_error
93 |             sum += node.label_error
94 | 
95 |         return sum
96 | 


--------------------------------------------------------------------------------
/util/gen_util.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | 
  4 | # - given a vector containing all parameters, return a list of unrolled parameters
  5 | # - specifically, these parameters are:
  6 | #   - rel_dict, dictionary of {dependency relation r: composition matrix W_r}
  7 | #   - Wv, the matrix for lifting a word embedding to the hidden space
  8 | #   - b, bias term
  9 | #   - We, the word embedding matrix
 10 | #   - Wc, classification matrix
 11 | #   - b_c, classification bias
 12 | 
 13 | def unroll_params(arr, d, c, len_voc, rel_list):
 14 | 
 15 |     mat_size = d * d
 16 |     #classification
 17 |     matClass_size = c * d
 18 |     rel_dict = {}
 19 |     ind = 0
 20 | 
 21 |     for r in rel_list:
 22 |         rel_dict[r] = arr[ind: ind + mat_size].reshape( (d, d) )
 23 |         ind += mat_size
 24 | 
 25 |     Wv = arr[ind : ind + mat_size].reshape( (d, d) )
 26 |     ind += mat_size
 27 | 
 28 |     Wc = arr[ind : ind + matClass_size].reshape( (c, d) )
 29 |     ind += matClass_size
 30 | 
 31 |     b = arr[ind : ind + d].reshape( (d, 1) )
 32 |     ind += d
 33 | 
 34 |     b_c = arr[ind : ind + c].reshape( (c, 1) )
 35 |     ind += c
 36 | 
 37 |     We = arr[ind : ind + len_voc * d].reshape( (d, len_voc))
 38 | 
 39 |     return [rel_dict, Wv, Wc, b, b_c, We]
 40 |     
 41 | #   similar to the above function, but exclude classification parameters
 42 | 
 43 | def unroll_params_noWcrf(arr, d, c, len_voc, rel_list):
 44 | 
 45 |     mat_size = d * d
 46 |     rel_dict = {}
 47 |     ind = 0
 48 | 
 49 |     for r in rel_list:
 50 |         rel_dict[r] = arr[ind: ind + mat_size].reshape( (d, d) )
 51 |         ind += mat_size
 52 | 
 53 |     Wv = arr[ind : ind + mat_size].reshape( (d, d) )
 54 |     ind += mat_size
 55 |    
 56 |     b = arr[ind : ind + d].reshape( (d, 1) )
 57 |     ind += d
 58 | 
 59 |     We = arr[ind : ind + len_voc * d].reshape( (d, len_voc))
 60 | 
 61 |     return [rel_dict, Wv, b, We]
 62 |     
 63 | # combine all parameters into a flat vector
 64 | 
 65 | def roll_params(params, rel_list):
 66 | 
 67 |     (rel_dict, Wv, Wc, b, b_c, We) = params
 68 | 
 69 |     rels = np.concatenate( [rel_dict[key].ravel() for key in rel_list] )
 70 | 
 71 |     return np.concatenate( (rels, Wv.ravel(), Wc.ravel(), b.ravel(), b_c.ravel(), We.ravel() ) )
 72 | 
 73 | 
 74 | def roll_params_noWcrf(params, rel_list):
 75 | 
 76 |     (rel_dict, Wv, b, We) = params
 77 | 
 78 |     rels = np.concatenate( [rel_dict[key].ravel() for key in rel_list] )
 79 | 
 80 |     return np.concatenate( (rels, Wv.ravel(), b.ravel(), We.ravel() ) )
 81 | 
 82 | # randomly initialize all parameters
 83 | 
 84 | def gen_dtrnn_params(d, c, rels):
 85 |     """
 86 |     Returns (dict{rels:[mat]}, Wv, Wc, b, b_c)
 87 |     """
 88 |     r = np.sqrt(6) / np.sqrt(2 * d + 1)
 89 |     r_Wc = 1.0 / np.sqrt(d)
 90 |     rel_dict = {}
 91 |     np.random.seed(3)
 92 |     for rel in rels:
 93 | 	   rel_dict[rel] = np.random.rand(d, d) * 2 * r - r
 94 | 
 95 |     return (
 96 | 	    rel_dict,
 97 |         #Wv
 98 | 	    np.random.rand(d, d) * 2 * r - r,
 99 |         #Wc
100 |         np.random.rand(c, d) * 2 * r_Wc - r_Wc,
101 |         #b
102 | 	    np.zeros((d, 1)),
103 |         #b_c
104 |         np.random.rand(c, 1)
105 |         )
106 | 
107 |  
108 | #generate word embedding matrix
109 | def gen_word_embeddings(d, total_num):
110 | 
111 |     for ind in range(total_num):
112 |         word_vec = np.random.rand(d, 1)
113 |         if ind == 0:
114 |             word_embedding = word_vec
115 |         else:
116 |             word_embedding = np.c_[word_embedding, word_vec]
117 |      
118 |     return word_embedding
119 | 
120 | 
121 | # returns list of zero gradients which backprop modifies, used for pretraining of RNN
122 | def init_dtrnn_grads(rel_list, d, c, len_voc):
123 | 
124 |     rel_grads = {}
125 |     for rel in rel_list:
126 | 	  rel_grads[rel] = np.zeros( (d, d) )
127 | 
128 |     return [
129 | 	    rel_grads,
130 | 	    np.zeros((d, d)),
131 |         np.zeros((c, d)),
132 | 	    np.zeros((d, 1)),
133 |         np.zeros((c, 1)),
134 | 	    np.zeros((d, len_voc))
135 | 	    ]
136 |      
137 | # returns list of zero gradients which backprop modifies, used for joint training of RNCRF
138 | def init_crfrnn_grads(rel_list, d, c, len_voc):
139 | 
140 |     rel_grads = {}
141 |     for rel in rel_list:
142 | 	  rel_grads[rel] = np.zeros( (d, d) )
143 | 
144 |     return [
145 | 	    rel_grads,
146 | 	    np.zeros((d, d)),
147 | 	    np.zeros((d, 1)),
148 | 	    np.zeros((d, len_voc))
149 | 	    ]
150 |      
151 | 
152 | # random embedding matrix for gradient checks
153 | def gen_rand_we(len_voc, d):
154 |     r = np.sqrt(6) / np.sqrt(51)
155 |     we = np.random.rand(d, len_voc) * 2 * r - r
156 |     return we
157 | 


--------------------------------------------------------------------------------
/util/lexparser.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Runs the English PCFG parser on one or more files, printing dependency parses only
 4 | 
 5 | if [ ! $# -ge 1 ]; then
 6 |   echo Usage: `basename $0` 'file(s)'
 7 |   echo
 8 |   exit
 9 | fi
10 | 
11 | #scriptdir=`dirname $0`
12 | #$scriptdir/
13 | 
14 | java -mx10g -cp "*" edu.stanford.nlp.parser.lexparser.LexicalizedParser -nthreads 2 -sentences newline \
15 |  -retainTmpSubcategories -outputFormat "typedDependencies" -outputFormatOptions "basicDependencies" englishPCFG.ser.gz $*
16 | 
17 | #edu/stanford/nlp/models/lexparser/
18 | 


--------------------------------------------------------------------------------
/util/math_util.py:
--------------------------------------------------------------------------------
 1 | from numpy import *
 2 | 
 3 | # derivative of tanh
 4 | def dtanh0(x):
 5 | 	return 1 - square(x)
 6 | 
 7 | 
 8 | # derivative of normalized tanh
 9 | def dtanh(x):
10 |     norm = linalg.norm(x)
11 |     y = x - power(x, 3)
12 |     dia = diag((1 - square(x)).flatten()) / norm
13 |     pro = y.dot(x.T) / power(norm, 3)
14 |     out = dia - pro
15 |     return out
16 | 
17 | 
18 | ## other utility functions not used here (but experimented with!)
19 | def softmax(w):
20 |     ew = exp(w)
21 |     return ew / sum(ew)
22 | 
23 | def sigmoid(w):
24 |     sm = 1 / (1 + exp(-w))
25 |     return sm
26 | 
27 | def d_sigmoid(w):
28 |     return w * (1 - w)
29 | 
30 | def relu(x):
31 |     return x * (x > 0)
32 | 
33 | def drelu(x):
34 |     return x > 0
35 |     
36 | def crossent(label, classification):
37 |     return -sum(label * log(classification))
38 | 
39 | def crossent_loss(label, classification):
40 |     return classification - label
41 | 
42 | def square_loss(label, classification):
43 |     err = label - classification
44 |     return 0.5 * err.T.dot(err)


--------------------------------------------------------------------------------