├── COPYING
├── Makefile
├── README.txt
├── crf
    ├── Makefile
    ├── README.txt
    ├── conlleval
    ├── crfasgd.cpp
    ├── crfsgd.cpp
    └── template
├── data
    ├── README.txt
    ├── conll2000
    │   └── README.txt
    ├── pascal
    │   └── README.txt
    └── rcv1
    │   └── README.txt
├── lib
    ├── assert.h
    ├── gzstream.cpp
    ├── gzstream.h
    ├── matrices.cpp
    ├── matrices.h
    ├── pstream.cpp
    ├── pstream.h
    ├── timer.cpp
    ├── timer.h
    ├── vectors.cpp
    ├── vectors.h
    └── wrapper.h
├── svm
    ├── Makefile
    ├── README.txt
    ├── data.cpp
    ├── data.h
    ├── loss.h
    ├── old
    │   ├── Makefile
    │   ├── README.txt
    │   ├── svmcg.cpp
    │   ├── svmolbfgs.cpp
    │   ├── svmsgd2.cpp
    │   └── svmsgdqn.cpp
    ├── prep_alpha.cpp
    ├── prep_rcv1.cpp
    ├── prep_webspam.cpp
    ├── svmasgd.cpp
    └── svmsgd.cpp
└── win
    ├── README.txt
    ├── crfasgd
        └── crfasgd.vcproj
    ├── crfsgd
        └── crfsgd.vcproj
    ├── prep_alpha
        └── prep_alpha.vcproj
    ├── prep_rcv1
        └── prep_rcv1.vcproj
    ├── prep_webspam
        └── prep_webspam.vcproj
    ├── sgd.sln
    ├── svmasgd
        └── svmasgd.vcproj
    ├── svmsgd
        └── svmsgd.vcproj
    └── zlib
        └── README.txt


/COPYING:
--------------------------------------------------------------------------------
  1 | 		   GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions. 
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version. 
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # General makefile
 2 | 
 3 | MAKE=make
 4 | SHELL=/bin/sh
 5 | 
 6 | SUBDIRS=svm crf
 7 | 
 8 | 
 9 | world: check all
10 | 	@echo "================================================"
11 | 	@echo "CONGRATULATIONS: The compilation was successful."
12 | 	@echo "To know what to do next, check the README file."
13 | 	@echo "================================================"
14 | 
15 | 
16 | all clean:
17 | 	@for n in ${SUBDIRS} ; \
18 | 	  do ( cd $$n && ${MAKE} ${@}) || exit ; done
19 | 
20 | all: check
21 | 
22 | check:
23 | 	@if [ -r data/rcv1/rcv1-v2.topics.qrels.gz ] ; then : ; else \
24 | 	  echo "=======================================" ; \
25 | 	  echo "ATTENTION: Missing data files" ; \
26 | 	  echo "You should have read the README file!" ; \
27 | 	  echo "=======================================" ; \
28 | 	fi
29 | 
30 | 
31 | .PHONY: world all depend check


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | SGD-2.0
  3 | -------
  4 | 
  5 | L. Bottou, October 2011
  6 | 
  7 | 
  8 | 
  9 | 1. INTRODUCTION
 10 | 
 11 | The goal of this package is to illustrate the efficiency of stochastic
 12 | gradient descent for large-scale learning tasks.  
 13 | 
 14 | Two algorithms,
 15 | 
 16 |     * Stochastic gradient descent (SGD), and
 17 |     * Averaged stochastic gradient descent (ASGD),
 18 | 
 19 | are applied to two well known problems
 20 | 
 21 |     * Linear Support Vector Machines, and
 22 |     * Conditional Random Fields.
 23 | 
 24 | The corresponding programs are designed for simplicity and readability.  In
 25 | particular they avoid optimizations that would made the programs less
 26 | readable.  The sole exception is the handling of sparse training data.
 27 | 
 28 | 
 29 | 
 30 | 2. DATASETS
 31 | 
 32 | The programs are demonstrated using a number of standard datasets.
 33 | 
 34 | * The RCV1-V2 dataset.
 35 | * The ALPHA and WEBSPAM datasets from the first Pascal Large Scale Learning Challenge.
 36 | * The dataset associated with the CONLL2000 chunking task.
 37 | 
 38 | These datasets are available from the web.  File "data/README" contains
 39 | instructions for downloading.  The Pascal datasets must be 
 40 | preprocessed using a relatively slow python script.
 41 | 
 42 | 
 43 | 
 44 | 3. ALGORITHMS
 45 | 
 46 | Unlike most optimization algorithm, each iteration of these stochastic
 47 | algorithms process a single example and update the parameters.  Although the
 48 | theory calls for picking a random example at each iteration, this
 49 | implementation performs sequential passes over randomly shuffled training
 50 | examples. This process is in fact more effective in practice.  Each pass is
 51 | called an epoch.
 52 | 
 53 | Assume we have an objective function of the form
 54 |  
 55 |      Obj(w) = 1/2 lambda w^2  + 1/n sum_i=1^n L(z_i,w)
 56 | 
 57 | where w is the parameter, {z_1,...,z_n} are the training examples, 1/2 \lambda
 58 | w^2 a regularization term, and L(z,w) is the loss function. Each iteration of
 59 | the SGD algorithm picks a single example z and updates the parameter vector
 60 | using the formula:
 61 | 
 62 |     SGD:    w := (1 - lambda eta_t) w - eta_t dL/dw(z,w)
 63 | 
 64 | The trick of course is to choose the gain sequence eta_t wisely.  We use the
 65 | formula eta_t = eta_0 / (1 + lambda eta_0 t), and we pick eta_0 by trying
 66 | several gain values on a subset of the training data.  In order to leverage
 67 | sparse dataset, we represent vector w as the ratio of a vector W and a scalar
 68 | wDivisor, that is, w = W / wDivisor. Each iteration effectively becomes:
 69 | 
 70 |     SGD:    wDivisor = wDivisor / (1 - lambda eta_t)
 71 |             W = W - eta_t wDivisor dL/dw(z,w)
 72 | 
 73 | The ASGD algorithm maintains two parameter vectors. The first parameter
 74 | vector, w, is updated like the SGD parameter. However, the output of the
 75 | algorithm is the second parameter vector, a, which computes an average of 
 76 | all the previous values of w.
 77 | 
 78 |     ASGD:   w := (1 - lambda eta_t) w - eta_t dL/dw(z,w)
 79 |             a := a + mu_t [ w - a ]
 80 | 
 81 | This algorithm has been shown to work extremely well (Polyak and Juditsky,
 82 | 1992) provided that the sequence eta_t decreases with exactly the right speed.
 83 | We follow (Xu, 2010) and choose eta_t = eta_0 / (1 + lambda eta0 t) ^ 0.75.
 84 | We select eta_0 by trying several gain values on a subset of the training
 85 | data, and we start the averaging process after a certain time, that is, 
 86 | mu_t = 1/max(1,t-t0). Following (Xu, 2010), sparse training data is treated
 87 | using the substitutions w = W / wDivisor and a = (A + wFraction W) / aDivisor.
 88 | The algorithm effectively becomes:
 89 | 
 90 |     ASGD:   wDivisor = wDivisor / (1 - eta_t * lambda)
 91 |             W = W - eta_t wDivisor dL/dw(z,w)
 92 |             A = A + eta_t wFraction wDivisor dL/dw(z,w)
 93 |             aDivisor = aDivisor / (1 - mu_t)
 94 |             wFraction = wFraction + mu_t aDivisor / wDivisor
 95 | 
 96 | 
 97 | 
 98 | 4. SUPPORT VECTOR MACHINES
 99 | 
100 | The directory "svm" contains programs to train a L2-regularized linear model
101 | for binary classification tasks. Compilation time switches determine whether
102 | the models include a bias term, whether the bias term is regularized, and
103 | which loss function should be used.  The default is to use an unregularized
104 | bias term using the log-loss function L(x,y,w) = log(1+exp(-ywx)). See file
105 | "svm/README" for details about these programs and their usage for each of the
106 | datasets.
107 | 
108 | 
109 | 
110 | 5. CONDITIONAL RANDOM FIELDS
111 | 
112 | The directory "crf" contains programs "crfsgd" and "crfasgd" for training
113 | conditional random fields for sequences. Both programs take data files and
114 | template files and produces tagging files similar to those of Taku Kudo's
115 | CRF++ program described at <http://crfpp.sourceforge.net/>.  However they also
116 | accepts gzipped data files instead of plain files. See the file "crf/README"
117 | for detailed information about these programs and their usage.
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/crf/Makefile:
--------------------------------------------------------------------------------
 1 | # CRF with stochastic gradient
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | # 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | # 
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
16 | 
17 | 
18 | L=../lib
19 | 
20 | 
21 | CXX=g++
22 | OPTS=-g -O2
23 | CXXFLAGS= ${OPTS} -Wall -I$L
24 | LIBS = -lz -lm
25 | 
26 | PROGRAMS = crfsgd crfasgd
27 | 
28 | OBJS = vectors.o matrices.o gzstream.o pstream.o timer.o
29 | 
30 | all: ${PROGRAMS}
31 | 
32 | clean:
33 | 	-rm ${PROGRAMS} 2>/dev/null
34 | 	-rm *.o 2>/dev/null
35 | 
36 | crfsgd: crfsgd.o ${OBJS}
37 | 	-rm $@ 2>/dev/null
38 | 	${CXX} ${CXXFLAGS} -o $@ crfsgd.o ${OBJS} ${LIBS}
39 | 
40 | crfasgd: crfasgd.o ${OBJS}
41 | 	-rm $@ 2>/dev/null
42 | 	${CXX} ${CXXFLAGS} -o $@ crfasgd.o ${OBJS} ${LIBS}
43 | 
44 | crfsgd.o: crfsgd.cpp $L/vectors.h  $L/gzstream.h $L/timer.h
45 | 	${CXX} ${CXXFLAGS} -c -o $@ crfsgd.cpp
46 | 
47 | crfasgd.o: crfasgd.cpp $L/vectors.h  $L/gzstream.h $L/timer.h
48 | 	${CXX} ${CXXFLAGS} -c -o $@ crfasgd.cpp
49 | 
50 | vectors.o: $L/vectors.cpp $L/vectors.h  $L/wrapper.h $L/assert.h
51 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/vectors.cpp
52 | 
53 | matrices.o: $L/matrices.cpp $L/matrices.h $L/vectors.h  $L/wrapper.h $L/assert.h
54 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/matrices.cpp
55 | 
56 | gzstream.o: $L/gzstream.cpp $L/gzstream.h $L/assert.h
57 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/gzstream.cpp
58 | 
59 | pstream.o: $L/pstream.cpp $L/pstream.h $L/assert.h
60 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/pstream.cpp
61 | 
62 | timer.o: $L/timer.cpp $L/timer.h $L/assert.h
63 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/timer.cpp
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/crf/README.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 1. COMPILING CRFSGD AND CRFASGD
  3 | 
  4 | Compiling under Unix is achieved using the traditional command "make".  
  5 | The compilation requires the libz library. This library usually comes
  6 | preinstalled on most Linux distributions, and is otherwise available
  7 | from from http://www.zlib.org.  
  8 | 
  9 | Compiling under Windows is possible using Cygwin, using MSYS, or using the
 10 | MSVC project files provided in the subdirectory "win" of the sgd distribution.
 11 | Make sure to read the instructions as you need to compile zlib adequately.
 12 | You then need to copy the executable files in this directory.
 13 | 
 14 | 
 15 | 2. USAGE
 16 | 
 17 | Synopsis (training): 
 18 |         crfsgd [options] model template traindata [validdata]
 19 |         crfasgd [options] model template traindata [validdata]
 20 | 
 21 | Synopsis (tagging): 
 22 |         crfsgd -t model testdata
 23 |         crfasgd -t model testdata
 24 | 
 25 | Program "crfsgd" and "crfasgd" implement stochastic gradient algorithms for
 26 | training conditional random field models. The program inputs are modeled after
 27 | the well known CRF++ program (http://crfpp.sourceforge.net).  In particular
 28 | these programs use the same format for the template files and the data
 29 | files. These formats are well documented on the CRF++ web page. Programs
 30 | crfsgd and crfasgd can also directly read gzipped data files, provided that
 31 | the file name ends with suffix ".gz".
 32 | 
 33 | When operating in training mode, these program construct a CRF according to
 34 | the template file and perform a predefined number of training epochs on the
 35 | training data. Every so many epochs are followed by a performance evaluation
 36 | pass over the training set and optionally over a validation set. The
 37 | performance evaluation procedure can pipe the tags into an external evaluation
 38 | program such as the standard CONLL evaluation script conlleval. The training
 39 | set performance is useful to monitor the progress of the optimization. The
 40 | validation set performance is useful to estimate the generalization
 41 | performance. The recommended procedure is to monitor the validation
 42 | performance and stop the algorithm when the validation metrics no longer
 43 | improve. In the limit of large number of examples, program "crfasgd" should
 44 | reach this point after one or two epochs only. The model is saved in the
 45 | specified model file (which in fact is a compressed text file.)
 46 | 
 47 | Both programs accept the same options when used in training mode.
 48 | 
 49 |  -c <num> : capacity control parameter (1.0)
 50 |  -f <num> : threshold on the occurences of each feature (3)
 51 |  -r <num> : total number of epochs (50)
 52 |  -h <num> : epochs between each testing phase (5)
 53 |  -e <cmd> : performance evaluation command (conlleval -q)
 54 |  -s <num> : initial learning rate
 55 |  -q       : silent mode
 56 | 
 57 | Program crfasgd accepts one additional option:
 58 | 
 59 |  -a d     : starts averaging after d iterations (default: 1.0.)
 60 | 
 61 | Using option -t switches to the tagging mode. When operating in tagging mode, the
 62 | program reads the model, tags every sentence from the provided test data file,
 63 | and outputs the tags on the standard output using a format suitable for the
 64 | standard evaluation script conlleval.
 65 | 
 66 | 
 67 | 2. RUNNING THE STOCHASTIC GRADIENT CRF ON THE CONLL CHUNKING TASK
 68 | 
 69 | Please follow the instructions in file "data/README.txt" to populate the
 70 | directories "data/conll". The gzipped files are directly usable.
 71 | No further preprocessing is necessary.
 72 | 
 73 | Training a model using stochastic gradient descent.
 74 | 
 75 |     $ ./crfsgd -c 1.0 -f 3 model.gz template \
 76 |           ../data/conll2000/train.txt.gz ../data/conll2000/test.txt.gz
 77 |     Reading template file template.
 78 |     ...
 79 |     Reading and preprocessing ../data/conll2000/train.txt.gz.
 80 |     ...
 81 |     Reading and preprocessing ../data/conll2000/test.txt.gz.
 82 |     ...
 83 |     [Calibrating] --  1000 samples
 84 |     ...
 85 |     [Epoch 1] -- wnorm=3428.22 time=15.66s.
 86 |     [Epoch 2] -- wnorm=4981.97 time=21.59s.
 87 |     [Epoch 3] -- wnorm=6099.82 time=27.5s.
 88 |     [Epoch 4] -- wnorm=6888.25 time=33.41s.
 89 |     [Epoch 5] -- wnorm=7465.87 time=39.29s.
 90 |     Training perf: sentences=8936 loss=0.8069 obj=1.22464 err=2454 (1.15904%)
 91 |     accuracy:  98.84%; precision:  97.95%; recall:  98.04%; FB1:  98.00
 92 |     Testing perf: sentences=2012 loss=2.35348 obj=2.77122 err=1997 (4.21513%)
 93 |     accuracy:  95.78%; precision:  93.31%; recall:  93.47%; FB1:  93.39
 94 |     [Epoch 6] -- wnorm=7904.99 time=45.19s.
 95 |     [Epoch 7] -- wnorm=8238.51 time=51.07s.
 96 |     [Epoch 8] -- wnorm=8494.34 time=56.96s.
 97 |     [Epoch 9] -- wnorm=8695.67 time=62.84s.
 98 |     [Epoch 10] -- wnorm=8859.06 time=68.73s.
 99 |     Training perf: sentences=8936 loss=0.592674 obj=1.08837 err=1492 (0.704681%)
100 |     accuracy:  99.30%; precision:  98.81%; recall:  98.64%; FB1:  98.72
101 |     Testing perf: sentences=2012 loss=2.27945 obj=2.77514 err=1950 (4.11592%)
102 |     accuracy:  95.88%; precision:  93.60%; recall:  93.43%; FB1:  93.51
103 |     ...
104 |     [Epoch 46] -- wnorm=9670.23 time=281.04s.
105 |     [Epoch 47] -- wnorm=9670.91 time=286.93s.
106 |     [Epoch 48] -- wnorm=9670.6 time=292.83s.
107 |     [Epoch 49] -- wnorm=9670.94 time=298.74s.
108 |     [Epoch 50] -- wnorm=9669.67 time=304.64s.
109 |     Training perf: sentences=8936 loss=0.476964 obj=1.01802 err=692 (0.326836%)
110 |     accuracy:  99.67%; precision:  99.42%; recall:  99.26%; FB1:  99.34
111 |     Testing perf: sentences=2012 loss=2.20519 obj=2.74624 err=1889 (3.98717%)
112 |     accuracy:  96.01%; precision:  93.94%; recall:  93.55%; FB1:  93.74
113 |     Saving model file model.gz.
114 |     Done!  304.64 seconds.
115 | 
116 | 
117 | 
118 | Training a model using averaged stochastic gradient descent.
119 | 
120 | 
121 |     $ ./crfasgd -c 1.0 -f 3 -r 10 model.gz template \
122 |           ../data/conll2000/train.txt.gz ../data/conll2000/test.txt.gz
123 |     Reading template file template.
124 |     ...
125 |     Reading and preprocessing ../data/conll2000/train.txt.gz.
126 |     ...
127 |     Reading and preprocessing ../data/conll2000/test.txt.gz.
128 |     ...
129 |     [Calibrating] --  1000 samples
130 |     ...
131 |     [Epoch 1] -- wnorm=3471.6 anorm=3471.6 time=16.88s.
132 |     [Epoch 2] -- wnorm=5093.77 anorm=4238.68 time=23.5s.
133 |     [Epoch 3] -- wnorm=6281.55 anorm=4871.78 time=30.11s.
134 |     [Epoch 4] -- wnorm=7128.27 anorm=5400.67 time=36.75s.
135 |     [Epoch 5] -- wnorm=7748.73 anorm=5837.29 time=43.34s.
136 |     Training perf: sentences=8936 loss=0.879526 obj=1.20614 err=2945 (1.39%)
137 |     accuracy:  98.61%; precision:  97.66%; recall:  97.51%; FB1:  97.58
138 |     Testing perf: sentences=2012 loss=2.23476 obj=2.56138 err=1895 (3.99%)
139 |     accuracy:  96.00%; precision:  93.75%; recall:  93.59%; FB1:  93.67
140 |     [Epoch 6] -- wnorm=8219.54 anorm=6203.39 time=50.05s.
141 |     [Epoch 7] -- wnorm=8569.77 anorm=6514.33 time=56.84s.
142 |     [Epoch 8] -- wnorm=8858.12 anorm=6790.36 time=63.46s.
143 |     [Epoch 9] -- wnorm=9059.93 anorm=7026.73 time=70.42s.
144 |     [Epoch 10] -- wnorm=9230.78 anorm=7235.45 time=77.04s.
145 |     Training perf: sentences=8936 loss=0.68643 obj=1.09128 err=1977 (0.93%)
146 |     accuracy:  99.07%; precision:  98.40%; recall:  98.24%; FB1:  98.32
147 |     Testing perf: sentences=2012 loss=2.21381 obj=2.61866 err=1872 (3.95%)
148 |     accuracy:  96.05%; precision:  93.84%; recall:  93.67%; FB1:  93.75
149 |     Saving model file model.gz.
150 |     Done!  77.04 seconds.
151 | 
152 | 
153 | Testing the final model (using crfsgd or crfasgd is equivalent.)
154 | 
155 |     $ ./crfsgd -t model.gz ../data/conll2000/test.txt.gz | ./conlleval
156 |     processed 47377 tokens with 23852 phrases; found: 23809 phrases; correct: 22342.
157 |     accuracy:  96.05%; precision:  93.84%; recall:  93.67%; FB1:  93.75
158 |                  ADJP: precision:  79.66%; recall:  74.20%; FB1:  76.83  408
159 |                  ADVP: precision:  82.96%; recall:  80.95%; FB1:  81.94  845
160 |                 CONJP: precision:  55.56%; recall:  55.56%; FB1:  55.56  9
161 |                  INTJ: precision: 100.00%; recall:  50.00%; FB1:  66.67  1
162 |                   LST: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
163 |                    NP: precision:  94.36%; recall:  94.08%; FB1:  94.22  12385
164 |                    PP: precision:  96.61%; recall:  97.80%; FB1:  97.20  4870
165 |                   PRT: precision:  77.45%; recall:  74.53%; FB1:  75.96  102
166 |                  SBAR: precision:  88.33%; recall:  84.86%; FB1:  86.56  514
167 |                    VP: precision:  93.80%; recall:  94.14%; FB1:  93.97  4675
168 | 
169 | 
170 | Comparing with CRF++ (on a different machine, about twice slower.)
171 | 
172 |     $ crf_learn -c 1.0 -f 3 template train.txt model
173 |     ...
174 |     Number of sentences: 8936
175 |     Number of features:  1679700
176 |     ...    iter=18 terr=0.04522 serr=0.45636 act=1679700 obj=24917.57905 diff=0.02882
177 |     ...    iter=36 terr=0.02188 serr=0.27775 act=1679700 obj=13697.78077 diff=0.01717
178 |     ...    iter=71 terr=0.00518 serr=0.09109 act=1679700 obj=9654.43394 diff=0.00167
179 |     ...    iter=142 terr=0.00340 serr=0.06256 act=1679700 obj=9042.07254 diff=0.00007
180 |     Done!4335.34 s
181 | 
182 |     $ crf_test -m model test.txt | tr '\t' ' ' | ./conlleval 
183 |     processed 47377 tokens with 23852 phrases; found: 23799 phrases; correct: 22334.
184 |     accuracy:  96.02%; precision:  93.84%; recall:  93.64%; FB1:  93.74
185 |                  ADJP: precision:  79.71%; recall:  74.43%; FB1:  76.98  409
186 |                  ADVP: precision:  83.18%; recall:  81.06%; FB1:  82.11  844
187 |                 CONJP: precision:  55.56%; recall:  55.56%; FB1:  55.56  9
188 |                  INTJ: precision: 100.00%; recall:  50.00%; FB1:  66.67  1
189 |                   LST: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
190 |                    NP: precision:  94.36%; recall:  94.03%; FB1:  94.19  12378
191 |                    PP: precision:  96.71%; recall:  97.82%; FB1:  97.26  4866
192 |                   PRT: precision:  79.05%; recall:  78.30%; FB1:  78.67  105
193 |                  SBAR: precision:  88.65%; recall:  84.67%; FB1:  86.62  511
194 |                    VP: precision:  93.63%; recall:  93.99%; FB1:  93.81  4676
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------
/crf/conlleval:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -w
  2 | # conlleval: evaluate result of processing CoNLL-2000 shared task
  3 | # usage:     conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file
  4 | #            README: http://cnts.uia.ac.be/conll2000/chunking/output.html
  5 | # options:   l: generate LaTeX output for tables like in
  6 | #               http://cnts.uia.ac.be/conll2003/ner/example.tex
  7 | #            r: accept raw result tags (without B- and I- prefix;
  8 | #                                       assumes one word per chunk)
  9 | #            d: alternative delimiter tag (default is single space)
 10 | #            o: alternative outside tag (default is O)
 11 | # note:      the file should contain lines with items separated
 12 | #            by $delimiter characters (default space). The final
 13 | #            two items should contain the correct tag and the 
 14 | #            guessed tag in that order. Sentences should be
 15 | #            separated from each other by empty lines or lines
 16 | #            with $boundary fields (default -X-).
 17 | # url:       http://lcg-www.uia.ac.be/conll2000/chunking/
 18 | # started:   1998-09-25
 19 | # version:   2004-01-26
 20 | # author:    Erik Tjong Kim Sang <erikt@uia.ua.ac.be>
 21 | 
 22 | use strict;
 23 | 
 24 | my $false = 0;
 25 | my $true = 42;
 26 | 
 27 | my $boundary = "-X-";     # sentence boundary
 28 | my $correct;              # current corpus chunk tag (I,O,B)
 29 | my $correctChunk = 0;     # number of correctly identified chunks
 30 | my $correctTags = 0;      # number of correct chunk tags
 31 | my $correctType;          # type of current corpus chunk tag (NP,VP,etc.)
 32 | my $delimiter = " ";      # field delimiter
 33 | my $FB1 = 0.0;            # FB1 score (Van Rijsbergen 1979)
 34 | my $firstItem;            # first feature (for sentence boundary checks)
 35 | my $foundCorrect = 0;     # number of chunks in corpus
 36 | my $foundGuessed = 0;     # number of identified chunks
 37 | my $guessed;              # current guessed chunk tag
 38 | my $guessedType;          # type of current guessed chunk tag
 39 | my $i;                    # miscellaneous counter
 40 | my $inCorrect = $false;   # currently processed chunk is correct until now
 41 | my $lastCorrect = "O";    # previous chunk tag in corpus
 42 | my $latex = 0;            # generate LaTeX formatted output
 43 | my $lastCorrectType = ""; # type of previously identified chunk tag
 44 | my $lastGuessed = "O";    # previously identified chunk tag
 45 | my $lastGuessedType = ""; # type of previous chunk tag in corpus
 46 | my $lastType;             # temporary storage for detecting duplicates
 47 | my $line;                 # line
 48 | my $nbrOfFeatures = -1;   # number of features per line
 49 | my $precision = 0.0;      # precision score
 50 | my $oTag = "O";           # outside tag, default O
 51 | my $raw = 0;              # raw input: add B to every token
 52 | my $quiet = 0;            # only display summary line
 53 | my $recall = 0.0;         # recall score
 54 | my $tokenCounter = 0;     # token counter (ignores sentence breaks)
 55 | 
 56 | my %correctChunk = ();    # number of correctly identified chunks per type
 57 | my %foundCorrect = ();    # number of chunks in corpus per type
 58 | my %foundGuessed = ();    # number of identified chunks per type
 59 | 
 60 | my @features;             # features on line
 61 | my @sortedTypes;          # sorted list of chunk type names
 62 | 
 63 | # sanity check
 64 | while (@ARGV and $ARGV[0] =~ /^-/) {
 65 |    if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); }
 66 |    elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); }
 67 |    elsif ($ARGV[0] eq "-q") { $quiet = 1; shift(@ARGV); }
 68 |    elsif ($ARGV[0] eq "-d") { 
 69 |       shift(@ARGV); 
 70 |       if (not defined $ARGV[0]) { 
 71 |          die "conlleval: -d requires delimiter character"; 
 72 |       }
 73 |       $delimiter = shift(@ARGV);
 74 |    } elsif ($ARGV[0] eq "-o") {
 75 |       shift(@ARGV);
 76 |       if (not defined $ARGV[0]) {
 77 |          die "conlleval: -o requires delimiter character";
 78 |       }
 79 |       $oTag = shift(@ARGV);
 80 |    } else { die "conlleval: unknown argument $ARGV[0]\n"; }
 81 | }
 82 | if (@ARGV) { die "conlleval: unexpected command line argument\n"; }
 83 | # process input
 84 | while (<STDIN>) {
 85 |    chomp($line = $_);
 86 |    @features = split(/$delimiter/,$line);
 87 |    if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; }
 88 |    elsif ($nbrOfFeatures != $#features and @features != 0) {
 89 |       printf STDERR "unexpected number of features: %d (%d)\n",
 90 |          $#features+1,$nbrOfFeatures+1;
 91 |       exit(1);
 92 |    }
 93 |    if (@features == 0 or 
 94 |        $features[0] eq $boundary) { @features = ($boundary,"O","O"); }
 95 |    if (@features < 2) { 
 96 |       die "conlleval: unexpected number of features in line $line\n"; 
 97 |    }
 98 |    if ($raw) {
 99 |       if ($features[$#features] eq $oTag) { $features[$#features] = "O"; } 
100 |       if ($features[$#features-1] eq $oTag) { $features[$#features-1] = "O"; } 
101 |       if ($features[$#features] ne "O") { 
102 |          $features[$#features] = "B-$features[$#features]";
103 |       }
104 |       if ($features[$#features-1] ne "O") { 
105 |          $features[$#features-1] = "B-$features[$#features-1]";
106 |       }
107 |    }
108 |    # 20040126 ET code which allows hyphens in the types
109 |    if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
110 |       $guessed = $1;
111 |       $guessedType = $2;
112 |    } else { 
113 |       $guessed = $features[$#features]; 
114 |       $guessedType = ""; 
115 |    }
116 |    pop(@features);
117 |    if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
118 |       $correct = $1;
119 |       $correctType = $2;
120 |    } else { 
121 |       $correct = $features[$#features]; 
122 |       $correctType = ""; 
123 |    }
124 |    pop(@features);
125 | #  ($guessed,$guessedType) = split(/-/,pop(@features));
126 | #  ($correct,$correctType) = split(/-/,pop(@features));
127 |    $guessedType = $guessedType ? $guessedType : "";
128 |    $correctType = $correctType ? $correctType : "";
129 |    $firstItem = shift(@features);
130 | 
131 |    # 1999-06-26 sentence breaks should always be counted as out of chunk
132 |    if ( $firstItem eq $boundary ) { $guessed = "O"; }
133 | 
134 |    if ($inCorrect) {
135 |       if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and
136 |            &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
137 |            $lastGuessedType eq $lastCorrectType) {
138 |          $inCorrect=$false;
139 |          $correctChunk++;
140 |          $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
141 |              $correctChunk{$lastCorrectType}+1 : 1;
142 |       } elsif ( 
143 |            &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) != 
144 |            &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or
145 |            $guessedType ne $correctType ) {
146 |          $inCorrect=$false; 
147 |       }
148 |    }
149 | 
150 |    if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and 
151 |         &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
152 |         $guessedType eq $correctType) { $inCorrect = $true; }
153 | 
154 |    if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) {
155 |       $foundCorrect++; 
156 |       $foundCorrect{$correctType} = $foundCorrect{$correctType} ?
157 |           $foundCorrect{$correctType}+1 : 1;
158 |    }
159 |    if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) {
160 |       $foundGuessed++; 
161 |       $foundGuessed{$guessedType} = $foundGuessed{$guessedType} ?
162 |           $foundGuessed{$guessedType}+1 : 1;
163 |    }
164 |    if ( $firstItem ne $boundary ) { 
165 |       if ( $correct eq $guessed and $guessedType eq $correctType ) { 
166 |          $correctTags++; 
167 |       }
168 |       $tokenCounter++; 
169 |    }
170 | 
171 |    $lastGuessed = $guessed;
172 |    $lastCorrect = $correct;
173 |    $lastGuessedType = $guessedType;
174 |    $lastCorrectType = $correctType;
175 | }
176 | if ($inCorrect) { 
177 |    $correctChunk++;
178 |    $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
179 |        $correctChunk{$lastCorrectType}+1 : 1;
180 | }
181 | 
182 | if (not $latex) {
183 |    # compute overall precision, recall and FB1 (default values are 0.0)
184 |    $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
185 |    $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
186 |    $FB1 = 2*$precision*$recall/($precision+$recall)
187 |       if ($precision+$recall > 0);
188 |    
189 |    # print overall performance
190 |    if (not $quiet) {
191 |        printf "processed $tokenCounter tokens with $foundCorrect phrases; ";
192 |        printf "found: $foundGuessed phrases; correct: $correctChunk.\n";
193 |    }
194 |    if ($tokenCounter>0) {
195 |       printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter;
196 |       printf "precision: %6.2f%%; ",$precision;
197 |       printf "recall: %6.2f%%; ",$recall;
198 |       printf "FB1: %6.2f\n",$FB1;
199 |    }
200 | }
201 | 
202 | # sort chunk type names
203 | undef($lastType);
204 | @sortedTypes = ();
205 | foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) {
206 |    if (not($lastType) or $lastType ne $i) { 
207 |       push(@sortedTypes,($i));
208 |    }
209 |    $lastType = $i;
210 | }
211 | # print performance per chunk type
212 | if (not $latex) {
213 |  if (not $quiet) {
214 |    for $i (@sortedTypes) {
215 |       $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
216 |       if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; }
217 |       else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
218 |       if (not($foundCorrect{$i})) { $recall = 0.0; }
219 |       else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
220 |       if ($precision+$recall == 0.0) { $FB1 = 0.0; }
221 |       else { $FB1 = 2*$precision*$recall/($precision+$recall); }
222 |       printf "%17s: ",$i;
223 |       printf "precision: %6.2f%%; ",$precision;
224 |       printf "recall: %6.2f%%; ",$recall;
225 |       printf "FB1: %6.2f  %d\n",$FB1,$foundGuessed{$i};
226 |    }
227 |  }
228 | } else {
229 |    print "        & Precision &  Recall  & F\$_{\\beta=1} \\\\\\hline";
230 |    if (not $quiet) {
231 |     for $i (@sortedTypes) {
232 |       $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
233 |       if (not($foundGuessed{$i})) { $precision = 0.0; }
234 |       else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
235 |       if (not($foundCorrect{$i})) { $recall = 0.0; }
236 |       else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
237 |       if ($precision+$recall == 0.0) { $FB1 = 0.0; }
238 |       else { $FB1 = 2*$precision*$recall/($precision+$recall); }
239 |       printf "\n%-7s &  %6.2f\\%% & %6.2f\\%% & %6.2f \\\\",
240 |              $i,$precision,$recall,$FB1;
241 |     }
242 |     print "\\hline\n";
243 |    }
244 |    $precision = 0.0;
245 |    $recall = 0;
246 |    $FB1 = 0.0;
247 |    $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
248 |    $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
249 |    $FB1 = 2*$precision*$recall/($precision+$recall)
250 |       if ($precision+$recall > 0);
251 |    printf "Overall &  %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n",
252 |           $precision,$recall,$FB1;
253 | }
254 | 
255 | exit 0;
256 | 
257 | # endOfChunk: checks if a chunk ended between the previous and current word
258 | # arguments:  previous and current chunk tags, previous and current types
259 | # note:       this code is capable of handling other chunk representations
260 | #             than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
261 | #             Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
262 | 
263 | sub endOfChunk {
264 |    my $prevTag = shift(@_);
265 |    my $tag = shift(@_);
266 |    my $prevType = shift(@_);
267 |    my $type = shift(@_);
268 |    my $chunkEnd = $false;
269 | 
270 |    if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; }
271 |    if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; }
272 |    if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; }
273 |    if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
274 | 
275 |    if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; }
276 |    if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; }
277 |    if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; }
278 |    if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
279 | 
280 |    if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) { 
281 |       $chunkEnd = $true; 
282 |    }
283 | 
284 |    # corrected 1998-12-22: these chunks are assumed to have length 1
285 |    if ( $prevTag eq "]" ) { $chunkEnd = $true; }
286 |    if ( $prevTag eq "[" ) { $chunkEnd = $true; }
287 | 
288 |    return($chunkEnd);   
289 | }
290 | 
291 | # startOfChunk: checks if a chunk started between the previous and current word
292 | # arguments:    previous and current chunk tags, previous and current types
293 | # note:         this code is capable of handling other chunk representations
294 | #               than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
295 | #               Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
296 | 
297 | sub startOfChunk {
298 |    my $prevTag = shift(@_);
299 |    my $tag = shift(@_);
300 |    my $prevType = shift(@_);
301 |    my $type = shift(@_);
302 |    my $chunkStart = $false;
303 | 
304 |    if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; }
305 |    if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; }
306 |    if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; }
307 |    if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
308 | 
309 |    if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; }
310 |    if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; }
311 |    if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; }
312 |    if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
313 | 
314 |    if ($tag ne "O" and $tag ne "." and $prevType ne $type) { 
315 |       $chunkStart = $true; 
316 |    }
317 | 
318 |    # corrected 1998-12-22: these chunks are assumed to have length 1
319 |    if ( $tag eq "[" ) { $chunkStart = $true; }
320 |    if ( $tag eq "]" ) { $chunkStart = $true; }
321 | 
322 |    return($chunkStart);   
323 | }
324 | 


--------------------------------------------------------------------------------
/crf/template:
--------------------------------------------------------------------------------
 1 | # Unigram
 2 | U00:%x[-2,0]
 3 | U01:%x[-1,0]
 4 | U02:%x[0,0]
 5 | U03:%x[1,0]
 6 | U04:%x[2,0]
 7 | U05:%x[-1,0]/%x[0,0]
 8 | U06:%x[0,0]/%x[1,0]
 9 | 
10 | U10:%x[-2,1]
11 | U11:%x[-1,1]
12 | U12:%x[0,1]
13 | U13:%x[1,1]
14 | U14:%x[2,1]
15 | U15:%x[-2,1]/%x[-1,1]
16 | U16:%x[-1,1]/%x[0,1]
17 | U17:%x[0,1]/%x[1,1]
18 | U18:%x[1,1]/%x[2,1]
19 | 
20 | U20:%x[-2,1]/%x[-1,1]/%x[0,1]
21 | U21:%x[-1,1]/%x[0,1]/%x[1,1]
22 | U22:%x[0,1]/%x[1,1]/%x[2,1]
23 | 
24 | # Bigram
25 | B
26 | 


--------------------------------------------------------------------------------
/data/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | This directory should be 
 3 | populated with various data files
 4 | containing well known datasets.
 5 | 
 6 | 
 7 | * The following Reuters RCV1 dataset available from
 8 |   http://jmlr.csail.mit.edu/papers/volume5/lewis04a/lyrl2004_rcv1v2_README.htm
 9 | 
10 |         rcv1/lyrl2004_tokens_test_pt0.dat.gz
11 |         rcv1/lyrl2004_tokens_test_pt1.dat.gz
12 |         rcv1/lyrl2004_tokens_test_pt2.dat.gz
13 |         rcv1/lyrl2004_tokens_test_pt3.dat.gz
14 |         rcv1/lyrl2004_tokens_train.dat.gz
15 |         rcv1/rcv1-v2.topics.qrels.gz
16 | 
17 | 
18 | * The following CONLL2000 data available from
19 |   http://www.cnts.ua.ac.be/conll2000/chunking
20 | 
21 |         conll2000/train.txt.gz
22 |         conll2000/test.txt.gz
23 | 
24 | 
25 | * The following PASCAL data available from
26 |   ftp://largescale.ml.tu-berlin.de/largescale/ 
27 | 
28 |         pascal/alpha_train.dat.bz2
29 |         pascal/alpha_train.lab.bz2
30 |         pascal/webspam_train.dat.bz2
31 |         pascal/webspam_train.lab.bz2
32 |         pascal/convert.py
33 | 
34 |    These files must then be decoded using the python script convert.py.
35 |    This can take a while.
36 |         $ cd pascal
37 |         $ ./convert.py -o alpha.txt alpha train
38 |         $ ./convert.py -o webspam.txt webspam train
39 | 
40 | 


--------------------------------------------------------------------------------
/data/conll2000/README.txt:
--------------------------------------------------------------------------------
1 | The CONLL2000 files go here.
2 | See ../README.
3 | 


--------------------------------------------------------------------------------
/data/pascal/README.txt:
--------------------------------------------------------------------------------
1 | The PASCAL Alpha files go here.
2 | See ../README.
3 | 


--------------------------------------------------------------------------------
/data/rcv1/README.txt:
--------------------------------------------------------------------------------
1 | The RCV1 files go here.
2 | See ../README.
3 | 


--------------------------------------------------------------------------------
/lib/assert.h:
--------------------------------------------------------------------------------
 1 | // -*- C++ -*-
 2 | // Simple assertions
 3 | // Copyright (C) 2007- Leon Bottou
 4 | 
 5 | 
 6 | // This library is free software; you can redistribute it and/or
 7 | // modify it under the terms of the GNU Lesser General Public
 8 | // License as published by the Free Software Foundation; either
 9 | // version 2.1 of the License, or (at your option) any later version.
10 | // 
11 | // This program is distributed in the hope that it will be useful,
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | // GNU General Public License for more details.
15 | // 
16 | // You should have received a copy of the GNU General Public License
17 | // along with this program; if not, write to the Free Software
18 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
19 | 
20 | 
21 | 
22 | #ifndef ASSERT_H
23 | #define ASSERT_H 1
24 | 
25 | #include <iostream>
26 | #include <cstdlib>
27 | 
28 | #define assertfail(msg) do { \
29 |   std::cerr << "(" << __FILE__ << ":" << __LINE__ << ") " \
30 |             << msg << std::endl; ::exit(10); } while(0)
31 | 
32 | #define assert(expr) \
33 |   do { if (!(expr)) assertfail("Assertion failed: " << #expr); } while(0)
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/lib/gzstream.cpp:
--------------------------------------------------------------------------------
  1 | // ============================================================================
  2 | // gzstream, C++ iostream classes wrapping the zlib compression library.
  3 | // Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
  4 | //
  5 | // This library is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | //
 10 | // This library is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 | // Lesser General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU Lesser General Public
 16 | // License along with this library; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 18 | // ============================================================================
 19 | //
 20 | // File          : gzstream.C
 21 | // Revision      : $Revision$
 22 | // Revision_date : $Date$
 23 | // Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
 24 | // 
 25 | // Standard streambuf implementation following Nicolai Josuttis, "The 
 26 | // Standard C++ Library".
 27 | // ============================================================================
 28 | 
 29 | #include <gzstream.h>
 30 | #include <iostream>
 31 | #include <string.h>  // for memcpy
 32 | 
 33 | #ifdef GZSTREAM_NAMESPACE
 34 | namespace GZSTREAM_NAMESPACE {
 35 | #endif
 36 | 
 37 | // ----------------------------------------------------------------------------
 38 | // Internal classes to implement gzstream. See header file for user classes.
 39 | // ----------------------------------------------------------------------------
 40 | 
 41 | // --------------------------------------
 42 | // class gzstreambuf:
 43 | // --------------------------------------
 44 | 
 45 | gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
 46 |     if ( is_open())
 47 |         return (gzstreambuf*)0;
 48 |     mode = open_mode;
 49 |     // no append nor read/write mode
 50 |     if ((mode & std::ios::ate) || (mode & std::ios::app)
 51 |         || ((mode & std::ios::in) && (mode & std::ios::out)))
 52 |         return (gzstreambuf*)0;
 53 |     char  fmode[10];
 54 |     char* fmodeptr = fmode;
 55 |     if ( mode & std::ios::in)
 56 |         *fmodeptr++ = 'r';
 57 |     else if ( mode & std::ios::out)
 58 |         *fmodeptr++ = 'w';
 59 |     *fmodeptr++ = 'b';
 60 |     *fmodeptr = '\0';
 61 |     file = gzopen( name, fmode);
 62 |     if (file == 0)
 63 |         return (gzstreambuf*)0;
 64 |     opened = 1;
 65 |     return this;
 66 | }
 67 | 
 68 | gzstreambuf * gzstreambuf::close() {
 69 |     if ( is_open()) {
 70 |         sync();
 71 |         opened = 0;
 72 |         if ( gzclose( file) == Z_OK)
 73 |             return this;
 74 |     }
 75 |     return (gzstreambuf*)0;
 76 | }
 77 | 
 78 | int gzstreambuf::underflow() { // used for input buffer only
 79 |     if ( gptr() && ( gptr() < egptr()))
 80 |         return * reinterpret_cast<unsigned char *>( gptr());
 81 | 
 82 |     if ( ! (mode & std::ios::in) || ! opened)
 83 |         return EOF;
 84 |     // Josuttis' implementation of inbuf
 85 |     int n_putback = gptr() - eback();
 86 |     if ( n_putback > 4)
 87 |         n_putback = 4;
 88 |     memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
 89 | 
 90 |     int num = gzread( file, buffer+4, bufferSize-4);
 91 |     if (num <= 0) // ERROR or EOF
 92 |         return EOF;
 93 | 
 94 |     // reset buffer pointers
 95 |     setg( buffer + (4 - n_putback),   // beginning of putback area
 96 |           buffer + 4,                 // read position
 97 |           buffer + 4 + num);          // end of buffer
 98 | 
 99 |     // return next character
100 |     return * reinterpret_cast<unsigned char *>( gptr());    
101 | }
102 | 
103 | int gzstreambuf::flush_buffer() {
104 |     // Separate the writing of the buffer from overflow() and
105 |     // sync() operation.
106 |     int w = pptr() - pbase();
107 |     if ( gzwrite( file, pbase(), w) != w)
108 |         return EOF;
109 |     pbump( -w);
110 |     return w;
111 | }
112 | 
113 | int gzstreambuf::overflow( int c) { // used for output buffer only
114 |     if ( ! ( mode & std::ios::out) || ! opened)
115 |         return EOF;
116 |     if (c != EOF) {
117 |         *pptr() = c;
118 |         pbump(1);
119 |     }
120 |     if ( flush_buffer() == EOF)
121 |         return EOF;
122 |     return c;
123 | }
124 | 
125 | int gzstreambuf::sync() {
126 |     // Changed to use flush_buffer() instead of overflow( EOF)
127 |     // which caused improper behavior with std::endl and flush(),
128 |     // bug reported by Vincent Ricard.
129 |     if ( pptr() && pptr() > pbase()) {
130 |         if ( flush_buffer() == EOF)
131 |             return -1;
132 |     }
133 |     return 0;
134 | }
135 | 
136 | // --------------------------------------
137 | // class gzstreambase:
138 | // --------------------------------------
139 | 
140 | gzstreambase::gzstreambase( const char* name, int mode) {
141 |     init( &buf);
142 |     open( name, mode);
143 | }
144 | 
145 | gzstreambase::~gzstreambase() {
146 |     buf.close();
147 | }
148 | 
149 | void gzstreambase::open( const char* name, int open_mode) {
150 |     if ( ! buf.open( name, open_mode))
151 |         clear( rdstate() | std::ios::badbit);
152 | }
153 | 
154 | void gzstreambase::close() {
155 |     if ( buf.is_open())
156 |         if ( ! buf.close())
157 |             clear( rdstate() | std::ios::badbit);
158 | }
159 | 
160 | #ifdef GZSTREAM_NAMESPACE
161 | } // namespace GZSTREAM_NAMESPACE
162 | #endif
163 | 
164 | // ============================================================================
165 | // EOF //
166 | 


--------------------------------------------------------------------------------
/lib/gzstream.h:
--------------------------------------------------------------------------------
  1 | // ============================================================================
  2 | // gzstream, C++ iostream classes wrapping the zlib compression library.
  3 | // Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
  4 | //
  5 | // This library is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | //
 10 | // This library is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 | // Lesser General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU Lesser General Public
 16 | // License along with this library; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 18 | // ============================================================================
 19 | //
 20 | // File          : gzstream.h
 21 | // Revision      : $Revision$
 22 | // Revision_date : $Date$
 23 | // Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
 24 | // 
 25 | // Standard streambuf implementation following Nicolai Josuttis, "The 
 26 | // Standard C++ Library".
 27 | // ============================================================================
 28 | 
 29 | #ifndef GZSTREAM_H
 30 | #define GZSTREAM_H 1
 31 | 
 32 | // standard C++ with new header file names and std:: namespace
 33 | #include <iostream>
 34 | #include <fstream>
 35 | #include <zlib.h>
 36 | 
 37 | #ifdef GZSTREAM_NAMESPACE
 38 | namespace GZSTREAM_NAMESPACE {
 39 | #endif
 40 | 
 41 | // ----------------------------------------------------------------------------
 42 | // Internal classes to implement gzstream. See below for user classes.
 43 | // ----------------------------------------------------------------------------
 44 | 
 45 | class gzstreambuf : public std::streambuf {
 46 | private:
 47 |     static const int bufferSize = 47+256;    // size of data buff
 48 |     // totals 512 bytes under g++ for igzstream at the end.
 49 | 
 50 |     gzFile           file;               // file handle for compressed file
 51 |     char             buffer[bufferSize]; // data buffer
 52 |     char             opened;             // open/close state of stream
 53 |     int              mode;               // I/O mode
 54 | 
 55 |     int flush_buffer();
 56 | public:
 57 |     gzstreambuf() : opened(0) {
 58 |         setp( buffer, buffer + (bufferSize-1));
 59 |         setg( buffer + 4,     // beginning of putback area
 60 |               buffer + 4,     // read position
 61 |               buffer + 4);    // end position      
 62 |         // ASSERT: both input & output capabilities will not be used together
 63 |     }
 64 |     int is_open() { return opened; }
 65 |     gzstreambuf* open( const char* name, int open_mode);
 66 |     gzstreambuf* close();
 67 |     ~gzstreambuf() { close(); }
 68 |     
 69 |     virtual int     overflow( int c = EOF);
 70 |     virtual int     underflow();
 71 |     virtual int     sync();
 72 | };
 73 | 
 74 | class gzstreambase : virtual public std::ios {
 75 | protected:
 76 |     gzstreambuf buf;
 77 | public:
 78 |     gzstreambase() { init(&buf); }
 79 |     gzstreambase( const char* name, int open_mode);
 80 |     ~gzstreambase();
 81 |     void open( const char* name, int open_mode);
 82 |     void close();
 83 |     gzstreambuf* rdbuf() { return &buf; }
 84 | };
 85 | 
 86 | // ----------------------------------------------------------------------------
 87 | // User classes. Use igzstream and ogzstream analogously to ifstream and
 88 | // ofstream respectively. They read and write files based on the gz* 
 89 | // function interface of the zlib. Files are compatible with gzip compression.
 90 | // ----------------------------------------------------------------------------
 91 | 
 92 | class igzstream : public gzstreambase, public std::istream {
 93 | public:
 94 |     igzstream() : std::istream( &buf) {} 
 95 |     igzstream( const char* name, int open_mode = std::ios::in)
 96 |         : gzstreambase( name, open_mode), std::istream( &buf) {}  
 97 |     gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
 98 |     void open( const char* name, int open_mode = std::ios::in) {
 99 |         gzstreambase::open( name, open_mode);
100 |     }
101 | };
102 | 
103 | class ogzstream : public gzstreambase, public std::ostream {
104 | public:
105 |     ogzstream() : std::ostream( &buf) {}
106 |     ogzstream( const char* name, int mode = std::ios::out)
107 |         : gzstreambase( name, mode), std::ostream( &buf) {}  
108 |     gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
109 |     void open( const char* name, int open_mode = std::ios::out) {
110 |         gzstreambase::open( name, open_mode);
111 |     }
112 | };
113 | 
114 | #ifdef GZSTREAM_NAMESPACE
115 | } // namespace GZSTREAM_NAMESPACE
116 | #endif
117 | 
118 | #endif // GZSTREAM_H
119 | // ============================================================================
120 | // EOF //
121 | 
122 | 


--------------------------------------------------------------------------------
/lib/matrices.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // Little library of matrices and sparse matrices
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This library is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | #include "assert.h"
 21 | #include <matrices.h>
 22 | 
 23 | 
 24 | void 
 25 | FMatrix::resize(int nrows, int ncols)
 26 | {
 27 |   w.detach();
 28 |   Rep *d = rep();
 29 |   if (nrows >= 0)
 30 |     {
 31 |       d->rows.resize(nrows);
 32 |       d->nrows = nrows;
 33 |     }
 34 |   if (ncols >= 0)
 35 |     {
 36 |       d->ncols = ncols;
 37 |       for (int i=0; i<d->nrows; i++)
 38 |         d->rows[i].resize(ncols);
 39 |     }
 40 | }
 41 | 
 42 | 
 43 | VFloat 
 44 | FMatrix::get(int r, int c) const 
 45 | {
 46 |   const Rep *d = rep();
 47 |   if (r>=0 && r<d->nrows)
 48 |     return d->rows[r].get(c);
 49 |   assert(r >= 0);
 50 |   return 0;
 51 | }
 52 | 
 53 | 
 54 | void 
 55 | FMatrix::set(int r, int c, VFloat v) 
 56 | {
 57 |   w.detach();
 58 |   Rep *d = rep();
 59 |   if (r>=d->nrows)
 60 |     resize(r+1);
 61 |   if (c>=d->ncols)
 62 |     d->ncols = c+1;
 63 |   assert(r >= 0);
 64 |   d->rows[r].set(c,v);
 65 | }
 66 |   
 67 | 
 68 | FVector& 
 69 | FMatrix::operator[](int r)
 70 | {
 71 |   w.detach();
 72 |   Rep *d = rep();
 73 |   if (r>=d->nrows)
 74 |     resize(r+1);
 75 |   assert(r >= 0);
 76 |   return d->rows[r];
 77 | }
 78 | 
 79 | 
 80 | // ----------------------------------------
 81 | 
 82 | 
 83 | void 
 84 | SMatrix::resize(int nrows, int ncols)
 85 | {
 86 |   w.detach();
 87 |   Rep *d = rep();
 88 |   if (nrows >= 0)
 89 |     {
 90 |       d->rows.resize(nrows);
 91 |       d->nrows = nrows;
 92 |     }
 93 |   if (ncols >= 0 && ncols < d->ncols)
 94 |     {
 95 |       d->ncols = ncols;
 96 |       for (int i=0; i<d->nrows; i++)
 97 |         if (d->rows[i].size() > ncols)
 98 |           {
 99 |             // truncate
100 |             SVector s = d->rows[i];
101 |             SVector &v = d->rows[i];
102 |             v.clear();
103 |             for (const SVector::Pair *p = s; p->i >= 0 && p->i < ncols; p++)
104 |               v.set(p->i, p->v);
105 |           }
106 |     }
107 | }
108 | 
109 | 
110 | VFloat 
111 | SMatrix::get(int r, int c) const 
112 | {
113 |   const Rep *d = rep();
114 |   if (r>=0 && r<d->nrows)
115 |     return d->rows[r].get(c);
116 |   assert(r>=0);
117 |   return 0;
118 | }
119 | 
120 | 
121 | void 
122 | SMatrix::set(int r, int c, VFloat v) 
123 | {
124 |   w.detach();
125 |   Rep *d = rep();
126 |   if (r>=d->nrows)
127 |     resize(r+1);
128 |   if (c>=d->ncols)
129 |     d->ncols = c+1;
130 |   assert(r>=0);
131 |   d->rows[r].set(c,v);
132 | }
133 |   
134 | 
135 | SVector& 
136 | SMatrix::operator[](int r)
137 | {
138 |   w.detach();
139 |   Rep *d = rep();
140 |   if (r>=d->nrows)
141 |     resize(r+1);
142 |   assert(r>=0);
143 |   return d->rows[r];
144 | }
145 | 
146 | 
147 | 
148 | /* -------------------------------------------------------------
149 |    Local Variables:
150 |    c++-font-lock-extra-types: ("\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*")
151 |    End:
152 |    ------------------------------------------------------------- */
153 | 


--------------------------------------------------------------------------------
/lib/matrices.h:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // Little library of matrices and sparse matrices
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This library is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | 
 21 | // $Id$
 22 | 
 23 | #ifndef MATRICES_H
 24 | #define MATRICES_H 1
 25 | 
 26 | #include <cstring>
 27 | #include <iostream>
 28 | #include <vector>
 29 | #include "wrapper.h"
 30 | #include "vectors.h"
 31 | 
 32 | 
 33 | class FMatrix
 34 | {
 35 |  private:
 36 |   struct Rep
 37 |   {
 38 |     int refcount;
 39 |     int ncols;
 40 |     int nrows;
 41 |     std::vector<FVector> rows;
 42 |     Rep() : ncols(0), nrows(0) { }
 43 |     Rep *copy() { return new Rep(*this); }
 44 |   };
 45 |   
 46 |   Wrapper<Rep> w;
 47 |   Rep *rep() { return w.rep(); }
 48 |   const Rep *rep() const { return w.rep(); }
 49 | 
 50 |  public:
 51 |   FMatrix() {}
 52 |   FMatrix(int rows, int cols) { resize(rows, cols); }
 53 |   int rows() const { return rep()->nrows; }
 54 |   int cols() const { return rep()->ncols; }
 55 |   void resize(int nrows, int ncols=-1);
 56 |   VFloat get(int r, int c) const;
 57 |   void set(int r, int c, VFloat v);
 58 |   
 59 |   FVector& operator[](int r);
 60 |   
 61 |   const FVector operator[](int r) const {
 62 |     const Rep *d = rep();
 63 |     if (r<0 || r>=d->nrows)
 64 |       return FVector();
 65 |     return d->rows[r];
 66 |   }
 67 | };
 68 | 
 69 | 
 70 | 
 71 | class SMatrix
 72 | {
 73 |  private:
 74 |   struct Rep
 75 |   {
 76 |     int refcount;
 77 |     int ncols;
 78 |     int nrows;
 79 |     std::vector<SVector> rows;
 80 |     Rep() : ncols(0), nrows(0) { }
 81 |     Rep *copy() { return new Rep(*this); }
 82 |   };
 83 |   
 84 |   Wrapper<Rep> w;
 85 |   Rep *rep() { return w.rep(); }
 86 |   const Rep *rep() const { return w.rep(); }
 87 | 
 88 |  public:
 89 |   SMatrix() {}
 90 |   SMatrix(int rows, int cols) { resize(rows,cols); }
 91 |   int rows() const { return rep()->nrows; }
 92 |   int cols() const { return rep()->ncols; }
 93 |   void resize(int nrows, int ncols=-1);
 94 |   VFloat get(int r, int c) const;
 95 |   void set(int r, int c, VFloat v);
 96 |   
 97 |   SVector& operator[](int r);
 98 |   
 99 |   const SVector operator[](int r) const {
100 |     const Rep *d = rep();
101 |     if (r<0 || r>=d->nrows)
102 |       return SVector();
103 |     return d->rows[r];
104 |   }
105 | };
106 | 
107 | 
108 | #endif
109 | 
110 | /* -------------------------------------------------------------
111 |    Local Variables:
112 |    c++-font-lock-extra-types: ("\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*" "std::\\sw+")
113 |    End:
114 |    ------------------------------------------------------------- */
115 | 


--------------------------------------------------------------------------------
/lib/pstream.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // Stream that uses popen/pclose internally
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This library is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | #include "pstream.h"
 21 | #include <cstdio>
 22 | #include <cstring>
 23 | 
 24 | 
 25 | pstreambuf* 
 26 | pstreambuf::open( const char *cmd, int open_mode)
 27 | {
 28 |   if (f)
 29 |     return 0;
 30 |   mode = open_mode;
 31 |   if ((mode & std::ios::ate) || (mode & std::ios::app)
 32 |       || ((mode & std::ios::in) && (mode & std::ios::out)))
 33 |     return 0;
 34 |   char fmode[10];
 35 |   char *fmodeptr = fmode;
 36 |   if ( mode & std::ios::in)
 37 |     *fmodeptr++ = 'r';
 38 |   else if ( mode & std::ios::out)
 39 |     *fmodeptr++ = 'w';
 40 | #ifdef WIN32
 41 |   if (mode & std::ios::binary)
 42 |     *fmodeptr++ = 'b';
 43 |   *fmodeptr = '\0';
 44 |   f = ::_popen(cmd, fmode);
 45 | #else
 46 |   *fmodeptr = '\0';
 47 |   f = ::popen(cmd, fmode);
 48 | #endif
 49 |   if (f == 0)
 50 |     return 0;
 51 |   return this;
 52 | }
 53 | 
 54 | 
 55 | pstreambuf* 
 56 | pstreambuf::close() 
 57 | {
 58 |   if (f)
 59 |     {
 60 |       sync();
 61 | #ifdef WIN32
 62 |       ::_pclose(f);
 63 | #else
 64 |       ::pclose(f);
 65 | #endif
 66 |       f = 0;
 67 |       return this;
 68 |     }
 69 |   return 0;
 70 | }
 71 | 
 72 | 
 73 | int 
 74 | pstreambuf::underflow() 
 75 | { // used for input buffer only
 76 |   if ( gptr() && ( gptr() < egptr()))
 77 |     return *reinterpret_cast<unsigned char *>( gptr());
 78 |   if ( ! (mode & std::ios::in) || ! f)
 79 |     return EOF;
 80 |   int n_putback = gptr() - eback();
 81 |   if ( n_putback > 4)
 82 |     n_putback = 4;
 83 |   memcpy(buffer + (4 - n_putback), gptr()-n_putback, n_putback);
 84 |   int num = std::fread(buffer+4, 1, bsize-4, f);
 85 |   if (num <= 0)
 86 |     return EOF;
 87 |   setg( buffer + (4 - n_putback),   // beginning of putback area
 88 |         buffer + 4,                 // read position
 89 |         buffer + 4 + num);          // end of buffer
 90 |   // return next character
 91 |   return *reinterpret_cast<unsigned char *>( gptr());    
 92 | }
 93 | 
 94 | 
 95 | int 
 96 | pstreambuf::overflow(int c) 
 97 | { // used for output buffer only
 98 |   if (!(mode & std::ios::out) || !f)
 99 |     return EOF;
100 |   if (c != EOF) {
101 |     *pptr() = c;
102 |     pbump(1);
103 |   }
104 |   if (! sync())
105 |     return c;
106 |   return EOF;
107 | }
108 | 
109 | 
110 | int 
111 | pstreambuf::sync() {
112 |   if ( pptr() && pptr() > pbase()) {
113 |     int w = pptr() - pbase();
114 |     if (std::fwrite( pbase(), 1, w, f ) != (size_t)w)
115 |       return EOF;
116 |     pbump( -w);
117 |   }
118 |   return 0;
119 | }
120 | 
121 | 
122 | pstreambase::pstreambase( const char* cmd, int mode) {
123 |     init(&buf);
124 |     open(cmd, mode);
125 | }
126 | 
127 | 
128 | pstreambase::~pstreambase() {
129 |     buf.close();
130 | }
131 | 
132 | 
133 | void 
134 | pstreambase::open( const char* cmd, int open_mode) {
135 |   if (! buf.open(cmd, open_mode))
136 |     setstate( std::ios::badbit);
137 | }
138 | 
139 | 
140 | void 
141 | pstreambase::close() {
142 |   if (buf.is_open())
143 |     if (! buf.close())
144 |       setstate(std::ios::badbit);
145 | }
146 | 
147 | 
148 | 
149 | /* -------------------------------------------------------------
150 |    Local Variables:
151 |    c++-font-lock-extra-types: ("\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*" )
152 |    End:
153 |    ------------------------------------------------------------- */
154 | 


--------------------------------------------------------------------------------
/lib/pstream.h:
--------------------------------------------------------------------------------
 1 | // -*- C++ -*-
 2 | // Stream that uses popen/pclose internally
 3 | // Copyright (C) 2007- Leon Bottou
 4 | 
 5 | // This library is free software; you can redistribute it and/or
 6 | // modify it under the terms of the GNU Lesser General Public
 7 | // License as published by the Free Software Foundation; either
 8 | // version 2.1 of the License, or (at your option) any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // GNU General Public License for more details.
14 | // 
15 | // You should have received a copy of the GNU General Public License
16 | // along with this program; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18 | 
19 | 
20 | #ifndef PSTREAM_H
21 | #define PSTREAM_H 1
22 | 
23 | #include <iostream>
24 | #include <fstream>
25 | #include <cstdio>
26 | 
27 | class pstreambuf : public std::streambuf 
28 | {
29 |  private:
30 |   static const int bsize = 512;
31 |   char buffer[bsize];
32 |   std::FILE *f;
33 |   int  mode;
34 |  public:
35 |   pstreambuf() : f(0), mode(0) { 
36 |     setp( buffer, buffer+bsize-1 ); 
37 |     setg( buffer+4, buffer+4, buffer+4 );
38 |   }
39 |   int is_open() { return !!f; }
40 |   pstreambuf* open(const char *cmd, int open_mode);
41 |   pstreambuf* close();
42 |   ~pstreambuf() { close(); }
43 |   virtual int overflow( int c = EOF);
44 |   virtual int underflow();
45 |   virtual int sync();
46 | };
47 | 
48 | 
49 | class pstreambase : virtual public std::ios {
50 |  protected:
51 |   pstreambuf buf;
52 |  public:
53 |   pstreambase() { init(&buf); }
54 |   pstreambase(const char *cmd, int open_mode);
55 |   ~pstreambase();
56 |   void open(const char *cmd, int open_mode);
57 |   void close();
58 |   pstreambuf* rdbuf() { return &buf; }
59 | };
60 | 
61 | // ----------------------------------------------------------------------------
62 | // User classes. Use ipstream and opstream analogously to ifstream and
63 | // ofstream respectively. They read and write files using popen().
64 | // ----------------------------------------------------------------------------
65 | 
66 | class ipstream : public pstreambase, public std::istream {
67 |  public:
68 |   ipstream() : std::istream( &buf) {} 
69 |   ipstream( const char* cmd, int open_mode = std::ios::in)
70 |     : pstreambase(cmd, open_mode), std::istream( &buf) {}  
71 |   pstreambuf* rdbuf() { return pstreambase::rdbuf(); }
72 |   void open( const char* cmd, int open_mode = std::ios::in) {
73 |     pstreambase::open(cmd, open_mode);
74 |   }
75 | };
76 | 
77 | class opstream : public pstreambase, public std::ostream {
78 |  public:
79 |   opstream() : std::ostream( &buf) {}
80 |   opstream( const char *cmd, int mode = std::ios::out)
81 |     : pstreambase(cmd, mode), std::ostream( &buf) {}  
82 |   pstreambuf* rdbuf() { return pstreambase::rdbuf(); }
83 |   void open( const char *cmd, int open_mode = std::ios::out) {
84 |     pstreambase::open( cmd, open_mode);
85 |   }
86 | };
87 | 
88 | #endif
89 | 
90 | /* -------------------------------------------------------------
91 |    Local Variables:
92 |    c++-font-lock-extra-types: ("\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*" "std::\\sw+")
93 |    End:
94 |    ------------------------------------------------------------- */
95 | 


--------------------------------------------------------------------------------
/lib/timer.cpp:
--------------------------------------------------------------------------------
 1 | // -*- C++ -*-
 2 | // A simple timer.
 3 | // Copyright (C) 2007- Leon Bottou
 4 | 
 5 | // This library is free software; you can redistribute it and/or
 6 | // modify it under the terms of the GNU Lesser General Public
 7 | // License as published by the Free Software Foundation; either
 8 | // version 2.1 of the License, or (at your option) any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // GNU General Public License for more details.
14 | // 
15 | // You should have received a copy of the GNU General Public License
16 | // along with this program; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18 | 
19 | 
20 | #include "timer.h"
21 | #include <ctime>
22 | 
23 | #ifdef USE_REALTIME_CLOCK
24 | # include <sys/time.h>
25 | # include <time.h>
26 | static double
27 | klock()
28 | {
29 |   struct timeval tv;
30 |   gettimeofday(&tv, NULL);
31 |   return (double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
32 |   return (double) std::clock() / (double) CLOCKS_PER_SEC;
33 | }
34 | #else
35 | static double
36 | klock()
37 | {
38 |   return (double) std::clock() / (double) CLOCKS_PER_SEC;
39 | }
40 | #endif
41 | 
42 | Timer::Timer()
43 |   : a(0), s(0), r(0)
44 | {
45 | }
46 | 
47 | void 
48 | Timer::reset()
49 | {
50 |   a = 0;
51 |   s = 0;
52 |   r = 0;
53 | }
54 | 
55 | 
56 | double 
57 | Timer::elapsed()
58 | {
59 |   double n = klock();
60 |   if (r)
61 |     a += n - s;
62 |   s = n;
63 |   return a;
64 | }
65 | 
66 | double 
67 | Timer::start()
68 | {
69 |   elapsed();
70 |   r = 1;
71 |   return a;
72 | }
73 | 
74 | 
75 | 
76 | double
77 | Timer::stop()
78 | {
79 |   elapsed();
80 |   r = 0;
81 |   return a;
82 | }
83 | 
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/lib/timer.h:
--------------------------------------------------------------------------------
 1 | // -*- C++ -*-
 2 | // A simple timer.
 3 | // Copyright (C) 2007- Leon Bottou
 4 | 
 5 | // This library is free software; you can redistribute it and/or
 6 | // modify it under the terms of the GNU Lesser General Public
 7 | // License as published by the Free Software Foundation; either
 8 | // version 2.1 of the License, or (at your option) any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // GNU General Public License for more details.
14 | // 
15 | // You should have received a copy of the GNU General Public License
16 | // along with this program; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18 | 
19 | 
20 | #ifndef TIMER_H
21 | #define TIMER_H 1
22 | 
23 | class Timer
24 | {
25 | public:
26 |   Timer();
27 |   void reset();
28 |   double start();
29 |   double stop();
30 |   double elapsed();
31 | private:
32 |   double a, s;
33 |   int    r;
34 | };
35 | 
36 | 
37 | /* -------------------------------------------------------------
38 |    Local Variables:
39 |    c++-font-lock-extra-types: ("\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*" "std::\\sw+_t")
40 |    End:
41 |    ------------------------------------------------------------- */
42 | 
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/lib/vectors.h:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // Little library of vectors and sparse vectors
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This library is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | 
 21 | #ifndef VECTORS_H
 22 | #define VECTORS_H 1
 23 | 
 24 | #include <cstring>
 25 | #include <iostream>
 26 | #include "wrapper.h"
 27 | 
 28 | 
 29 | class FVector;
 30 | class SVector;
 31 | 
 32 | #ifndef VFLOAT
 33 | # define VFLOAT float
 34 | #endif
 35 | typedef VFLOAT VFloat;
 36 | 
 37 | class FVector
 38 | {
 39 | private:
 40 |   struct Rep
 41 |   {
 42 |     int refcount;
 43 |     int size;
 44 |     VFloat *data;
 45 |     Rep() : size(0), data(0) {}
 46 |     ~Rep() { delete [] data; }
 47 |     void resize(int n);
 48 |     Rep *copy();
 49 |   };
 50 |   
 51 |   Wrapper<Rep> w;
 52 |   Rep *rep() { return w.rep(); }
 53 |   const Rep *rep() const { return w.rep(); }
 54 |   void qset(int i, double v);
 55 |   
 56 | public:
 57 |   FVector();
 58 |   FVector(int n);
 59 |   FVector(const SVector &v);
 60 |   int size() const { return rep()->size; }
 61 | 
 62 |   // these accessors are range-checked.
 63 |   // get() returns 0 when i is out-of-range.
 64 |   // set() expands the vector.
 65 |   double get(int i) const;
 66 |   double set(int i, double v);
 67 | 
 68 |   // warning: you can write vector[i] but
 69 |   // the subscripts are not range-checked!
 70 |   // on the other hand, that's fast.
 71 |   operator const VFloat* () const { return rep()->data; }
 72 |   operator VFloat* () { w.detach(); return rep()->data; }
 73 | 
 74 |   void clear();
 75 |   void zero();
 76 |   void resize(int n);
 77 |   void touch(int i);
 78 |   FVector slice(int fi, int ti) const;
 79 | 
 80 |   void add(double c1);
 81 |   void add(const FVector &v2);
 82 |   void add(const SVector &v2);
 83 |   void add(const FVector &v2, double c2);
 84 |   void add(const SVector &v2, double c2);
 85 |   void add(const FVector &v2, double c2, const FVector &q2);
 86 |   void add(const SVector &v2, double c2, const FVector &q2);
 87 |   void scale(double c1);
 88 |   void combine(double c1, const FVector &v2, double c2);
 89 |   void combine(double c1, const SVector &v2, double c2);
 90 | 
 91 |   friend std::ostream& operator<<(std::ostream &f, const FVector &v);
 92 |   friend std::istream& operator>>(std::istream &f, FVector &v);
 93 |   bool save(std::ostream &f) const;
 94 |   bool load(std::istream &f);
 95 | };
 96 | 
 97 | 
 98 | 
 99 | class SVector
100 | {
101 | public:
102 |   struct Pair 
103 |   { 
104 |     int i; 
105 |     VFloat v; 
106 |   };
107 | private:
108 |   struct Rep
109 |   {
110 |     int refcount;
111 |     int npairs;
112 |     int mpairs;
113 |     int size;
114 |     struct Pair *pairs;
115 |     
116 |     Rep() : npairs(0), mpairs(-1), size(0), pairs(0) {}
117 |     ~Rep() { delete [] pairs; }
118 |     void resize(int n);
119 |     double qset(int i, double v);
120 |     Rep *copy();
121 |   };
122 |   
123 |   Wrapper<Rep> w;
124 |   Rep *rep() { return w.rep(); }
125 |   const Rep *rep() const { return w.rep(); }
126 |   
127 | public:
128 |   SVector();
129 |   SVector(const FVector &v);
130 |   int size() const { return rep()->size; }
131 | 
132 |   // these accessors are range-checked.
133 |   // get() returns 0 when i is out-of-range.
134 |   // set() expands the vector.
135 |   double get(int i) const;
136 |   double set(int i, double v);
137 | 
138 |   // to quickly iterate over the non-zero coefficients,
139 |   // do for(SVector::Pair *p = x; p->i>=0; p++) { ... }
140 |   int npairs() const { return rep()->npairs; }
141 |   operator const Pair* () const { return rep()->pairs; }
142 | 
143 |   void zero();
144 |   void clear();
145 |   void trim();
146 |   SVector slice(int fi, int ti) const;
147 | 
148 |   void add(const SVector &v2);
149 |   void add(const SVector &v2, double c2);
150 |   void scale(double c1);
151 |   void combine(double c1, const SVector &v2, double c2);
152 | 
153 |   friend std::ostream& operator<<(std::ostream &f, const SVector &v);
154 |   friend std::istream& operator>>(std::istream &f, SVector &v);
155 |   bool save(std::ostream &f) const;
156 |   bool load(std::istream &f);
157 | 
158 |   friend SVector combine(const SVector &v1, double a1, 
159 |                          const SVector &v2, double a2);
160 | };
161 | 
162 | double dot(const FVector &v1, const FVector &v2);
163 | double dot(const FVector &v1, const SVector &v2);
164 | double dot(const SVector &v1, const FVector &v2);
165 | double dot(const SVector &v1, const SVector &v2);
166 | 
167 | SVector combine(const SVector &v1, double a1, const SVector &v2, double a2);
168 | FVector combine(const FVector &v1, double a1, const SVector &v2, double a2);
169 | FVector combine(const SVector &v1, double a1, const FVector &v2, double a2);
170 | FVector combine(const FVector &v1, double a1, const FVector &v2, double a2);
171 | 
172 | 
173 | 
174 | #endif
175 | 
176 | /* -------------------------------------------------------------
177 |    Local Variables:
178 |    c++-font-lock-extra-types: ("\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*" "std::\\sw+")
179 |    End:
180 |    ------------------------------------------------------------- */
181 | 


--------------------------------------------------------------------------------
/lib/wrapper.h:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // Little library of copy-on-write wrappers
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | 
  6 | // This library is free software; you can redistribute it and/or
  7 | // modify it under the terms of the GNU Lesser General Public
  8 | // License as published by the Free Software Foundation; either
  9 | // version 2.1 of the License, or (at your option) any later version.
 10 | // 
 11 | // This program is distributed in the hope that it will be useful,
 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | // GNU General Public License for more details.
 15 | // 
 16 | // You should have received a copy of the GNU General Public License
 17 | // along with this program; if not, write to the Free Software
 18 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 19 | 
 20 | 
 21 | 
 22 | #ifndef WRAPPER_H
 23 | #define WRAPPER_H 1
 24 | 
 25 | 
 26 | template <class Rep> 
 27 | class Wrapper
 28 | {
 29 |  private:
 30 |   Rep *q;
 31 |   Rep *ref(Rep *q) { q->refcount++; return q; }
 32 |   void deref(Rep *q) { if (! --(q->refcount)) delete q; }
 33 |   
 34 |  public:
 35 |   Wrapper() 
 36 |     : q(new Rep) { q->refcount = 1; }
 37 |   Wrapper(Rep *rep)
 38 |     : q(rep) { q->refcount = 1; }
 39 |   Wrapper(const Wrapper<Rep> &other) 
 40 |     : q(ref(other.q)) {}
 41 |   ~Wrapper() 
 42 |     { deref(q); }
 43 |   Wrapper& operator=(const Wrapper<Rep> &other) 
 44 |     { Rep *p = q; q = ref(other.q); deref(p); return *this; }
 45 |   void detach() 
 46 |     { if (q->refcount > 1) { deref(q); q=q->copy(); q->refcount=1; } }
 47 |   Rep *rep() const
 48 |     { return q; }
 49 | };
 50 | 
 51 | 
 52 | 
 53 | // Recommended usage
 54 | //
 55 | // #include <cstdlib>
 56 | // #include <cstring>
 57 | // 
 58 | // class String
 59 | // {
 60 | // private:
 61 | //
 62 | //   struct Rep
 63 | //   {
 64 | //     int refcount;
 65 | //     int length;
 66 | //     char *data;
 67 | //     Rep(const char *s, int l)
 68 | //       : length(len), data(new char[l+1]) 
 69 | //       { ::memcpy(data, s, l);  data[len] = 0; }
 70 | //     Rep *copy()
 71 | //       { return new StringRep(data, length); }
 72 | //   };
 73 | //
 74 | //   Wrapper<Rep> w;
 75 | //   Rep *rep() { return w.rep(); }
 76 | //   const Rep *rep() const { return w.rep(); }
 77 | //     
 78 | // public:
 79 | //   String(const char *s, int l)
 80 | //     : w(new Rep(s,l)) {}
 81 | //   String(const char *s)
 82 | //     : w(new Rep(s,::strlen(s))) {}
 83 | // 
 84 | //   // function that do not mutate
 85 | //   int size() const { return rep()->length; }
 86 | //   operator const char*() const { return rep()->data; }
 87 | //   char operator[](int i) const { return rep()->data[i]; }
 88 | //   
 89 | //   // functions that perform a mutation
 90 | //   void set(int i, char c) { w.detach(); rep()->data[i] = c; }
 91 | // }
 92 | // 
 93 | 
 94 | 
 95 | 
 96 | #endif
 97 | 
 98 | 
 99 | 
100 | /* -------------------------------------------------------------
101 |    Local Variables:
102 |    c++-font-lock-extra-types: ( "\\sw+_t" "[A-Z]\\sw*[a-z]\\sw*" )
103 |    End:
104 |    ------------------------------------------------------------- */
105 | 


--------------------------------------------------------------------------------
/svm/Makefile:
--------------------------------------------------------------------------------
 1 | # SVM with stochastic gradient
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | # 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | # 
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
16 | 
17 | 
18 | L=../lib
19 | CXX=g++
20 | OPT=
21 | OPTS=-g -O2
22 | CXXFLAGS= ${OPTS} ${OPT} -Wall -I$L
23 | LIBS = -lz -lm
24 | 
25 | PROGRAMS = prep_rcv1 prep_alpha prep_webspam svmsgd svmasgd
26 | 
27 | OBJS = vectors.o gzstream.o timer.o
28 | INCS = $L/vectors.h $L/gzstream.h $L/timer.h $L/wrapper.h $L/assert.h
29 | 
30 | all: ${PROGRAMS}
31 | 
32 | clean:
33 | 	-rm ${PROGRAMS} 2>/dev/null
34 | 	-rm *.o 2>/dev/null
35 | 
36 | vectors.o: $L/vectors.cpp ${INCS}
37 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/vectors.cpp
38 | 
39 | gzstream.o: $L/gzstream.cpp ${INCS}
40 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/gzstream.cpp
41 | 
42 | timer.o: $L/timer.cpp ${INCS}
43 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/timer.cpp
44 | 
45 | data.o: data.cpp ${INCS}
46 | 	${CXX} ${CXXFLAGS} -c -o $@ data.cpp
47 | 
48 | prep_rcv1.o: prep_rcv1.cpp ${INCS}
49 | 	${CXX} ${CXXFLAGS} -c -o $@ prep_rcv1.cpp
50 | 
51 | prep_alpha.o: prep_alpha.cpp ${INCS}
52 | 	${CXX} ${CXXFLAGS} -c -o $@ prep_alpha.cpp
53 | 
54 | prep_webspam.o: prep_webspam.cpp ${INCS}
55 | 	${CXX} ${CXXFLAGS} -c -o $@ prep_webspam.cpp
56 | 
57 | svmsgd.o: svmsgd.cpp data.h loss.h ${INCS}
58 | 	${CXX} ${CXXFLAGS} -c -o $@ svmsgd.cpp
59 | 
60 | svmasgd.o: svmasgd.cpp data.h loss.h ${INCS}
61 | 	${CXX} ${CXXFLAGS} -c -o $@ svmasgd.cpp
62 | 
63 | prep_rcv1: prep_rcv1.o ${OBJS}
64 | 	${CXX} ${CXXFLAGS} -o $@ prep_rcv1.o ${OBJS} ${LIBS}
65 | 
66 | prep_alpha: prep_alpha.o ${OBJS}
67 | 	${CXX} ${CXXFLAGS} -o $@ prep_alpha.o ${OBJS} ${LIBS}
68 | 
69 | prep_webspam: prep_webspam.o ${OBJS}
70 | 	${CXX} ${CXXFLAGS} -o $@ prep_webspam.o ${OBJS} ${LIBS}
71 | 
72 | svmsgd: svmsgd.o data.o ${OBJS}
73 | 	${CXX} ${CXXFLAGS} -o $@ svmsgd.o data.o ${OBJS} ${LIBS}
74 | 
75 | svmasgd: svmasgd.o data.o ${OBJS}
76 | 	${CXX} ${CXXFLAGS} -o $@ svmasgd.o data.o ${OBJS} ${LIBS}
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/svm/data.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | #include <cmath>
 21 | #include <cstdlib>
 22 | #include <fstream>
 23 | #include <iomanip>
 24 | #include <iostream>
 25 | #include <map>
 26 | #include <string>
 27 | #include <vector>
 28 | 
 29 | #include "gzstream.h"
 30 | #include "assert.h"
 31 | #include "data.h"
 32 | 
 33 | using namespace std;
 34 | 
 35 | static void
 36 | load_datafile_sub(istream &f, bool binary, const char *fname, 
 37 |                   xvec_t &xp, yvec_t &yp, int &maxdim,
 38 |                   bool normalize, int maxrows)
 39 | {
 40 |   cout << "# Reading file " << fname << endl;
 41 |   if (! f.good())
 42 |     assertfail("Cannot open " << fname);
 43 |   int ncount = 0;
 44 |   int pcount = 0;
 45 |   while (f.good() && maxrows--)
 46 |     {
 47 |       SVector x;
 48 |       double y;
 49 |       if (binary)
 50 |         {
 51 |           y = (f.get()) ? +1 : -1;
 52 |           x.load(f);
 53 |         }
 54 |       else
 55 |         {
 56 |           f >> y >> x;
 57 |         }
 58 |       if (f.good())
 59 |         {
 60 |           if (normalize)
 61 |             {
 62 |               double d = dot(x,x);
 63 |               if (d > 0 && d != 1.0)
 64 |                 x.scale(1.0 / sqrt(d)); 
 65 |             }
 66 |           if (y != +1 && y != -1)
 67 |             assertfail("Label should be +1 or -1.");
 68 |           xp.push_back(x);
 69 |           yp.push_back(y);
 70 |           if (y > 0)
 71 |             pcount += 1;
 72 |           else
 73 |             ncount += 1;
 74 |           if (x.size() > maxdim)
 75 |             maxdim = x.size();
 76 |         }
 77 |     }
 78 |   cout << "# Read " << pcount << "+" << ncount 
 79 |        << "=" << pcount + ncount << " examples." << endl;
 80 | }
 81 | 
 82 | 
 83 | void
 84 | load_datafile(const char *fname, 
 85 |               xvec_t &xp, yvec_t &yp, int &maxdim,
 86 |               bool normalize, int maxrows)
 87 | {
 88 |   bool binary = false;
 89 |   bool compressed = false;
 90 |   string filename = fname;
 91 |   int len = filename.size();
 92 |   if (len > 7 && filename.substr(len-7) == ".txt.gz")
 93 |     compressed = true;
 94 |   else if (len > 7 && filename.substr(len-7) == ".bin.gz")
 95 |     compressed = binary = true;
 96 |   else if (len > 4 && filename.substr(len-4) == ".bin")
 97 |     binary = true;
 98 |   else if (len > 4 && filename.substr(len-4) == ".txt")
 99 |     binary = false;
100 |   else
101 |     assertfail("Filename suffix should be one of: "
102 |                << ".bin, .txt, .bin.gz, .txt.gz");
103 |   if (compressed)
104 |     {
105 |       igzstream f;
106 |       f.open(fname);
107 |       return load_datafile_sub(f, binary, fname, xp, yp, 
108 |                                maxdim, normalize, maxrows);
109 |     }
110 |   else
111 |     {
112 |       ifstream f;
113 |       f.open(fname);
114 |       return load_datafile_sub(f, binary, fname, xp, yp, 
115 |                                maxdim, normalize, maxrows);
116 |     }
117 | }
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/svm/data.h:
--------------------------------------------------------------------------------
 1 | // -*- C++ -*-
 2 | // SVM with stochastic gradient
 3 | // Copyright (C) 2007- Leon Bottou
 4 | 
 5 | // This program is free software; you can redistribute it and/or
 6 | // modify it under the terms of the GNU Lesser General Public
 7 | // License as published by the Free Software Foundation; either
 8 | // version 2.1 of the License, or (at your option) any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // GNU General Public License for more details.
14 | // 
15 | // You should have received a copy of the GNU General Public License
16 | // along with this program; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18 | 
19 | #ifndef DATA_H
20 | #define DATA_H
21 | 
22 | #include <vector>
23 | #include "vectors.h"
24 | 
25 | typedef std::vector<SVector> xvec_t;
26 | typedef std::vector<double>  yvec_t;
27 | 
28 | void load_datafile(const char *filename, 
29 |                    xvec_t &xp, yvec_t &yp, int &maxdim,
30 |                    bool normalize = true,
31 |                    int maxrows = -1);
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/svm/loss.h:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | #ifndef LOSS_H
 20 | #define LOSS_H
 21 | 
 22 | #include <cmath>
 23 | 
 24 | struct LogLoss
 25 | {
 26 |   // logloss(a,y) = log(1+exp(-a*y))
 27 |   static double loss(double a, double y)
 28 |   {
 29 |     double z = a * y;
 30 |     if (z > 18) 
 31 |       return exp(-z);
 32 |     if (z < -18)
 33 |       return -z;
 34 |     return log(1 + exp(-z));
 35 |   }
 36 |   // -dloss(a,y)/da
 37 |   static double dloss(double a, double y)
 38 |   {
 39 |     double z = a * y;
 40 |     if (z > 18) 
 41 |       return y * exp(-z);
 42 |     if (z < -18)
 43 |       return y;
 44 |     return y / (1 + exp(z));
 45 |   }
 46 | };
 47 | 
 48 | struct HingeLoss
 49 | {
 50 |   // hingeloss(a,y) = max(0, 1-a*y)
 51 |   static double loss(double a, double y)
 52 |   {
 53 |     double z = a * y;
 54 |     if (z > 1) 
 55 |       return 0;
 56 |     return 1 - z;
 57 |   }
 58 |   // -dloss(a,y)/da
 59 |   static double dloss(double a, double y)
 60 |   {
 61 |     double z = a * y;
 62 |     if (z > 1) 
 63 |       return 0;
 64 |     return y;
 65 |   }
 66 | };
 67 | 
 68 | struct SquaredHingeLoss
 69 | {
 70 |   // squaredhingeloss(a,y) = 1/2 * max(0, 1-a*y)^2
 71 |   static double loss(double a, double y)
 72 |   {
 73 |     double z = a * y;
 74 |     if (z > 1)
 75 |       return 0;
 76 |     double d = 1 - z;
 77 |     return 0.5 * d * d;
 78 |     
 79 |   }
 80 |   // -dloss(a,y)/da
 81 |   static double dloss(double a, double y)
 82 |   {
 83 |     double z = a * y;
 84 |     if (z > 1) 
 85 |       return 0;
 86 |     return y * (1 - z);
 87 |   }
 88 | };
 89 | 
 90 | struct SmoothHingeLoss
 91 | {
 92 |   // smoothhingeloss(a,y) = ...
 93 |   static double loss(double a, double y)
 94 |   {
 95 |     double z = a * y;
 96 |     if (z > 1)
 97 |       return 0;
 98 |     if (z < 0)
 99 |       return 0.5 - z;
100 |     double d = 1 - z;
101 |     return 0.5 * d * d;
102 |   }
103 |   // -dloss(a,y)/da
104 |   static double dloss(double a, double y)
105 |   {
106 |     double z = a * y;
107 |     if (z > 1) 
108 |       return 0;
109 |     if (z < 0)
110 |       return y;
111 |     return y * (1 - z);
112 |   }
113 | };
114 | 
115 | #endif
116 | 


--------------------------------------------------------------------------------
/svm/old/Makefile:
--------------------------------------------------------------------------------
 1 | # SVM with stochastic gradient
 2 | 
 3 | # This program is free software; you can redistribute it and/or modify
 4 | # it under the terms of the GNU General Public License as published by
 5 | # the Free Software Foundation; either version 2 of the License, or
 6 | # (at your option) any later version.
 7 | # 
 8 | # This program is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU General Public License for more details.
12 | # 
13 | # You should have received a copy of the GNU General Public License
14 | # along with this program; if not, write to the Free Software
15 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
16 | 
17 | 
18 | L=../../lib
19 | 
20 | 
21 | CXX=g++
22 | OPTS=-g -O2
23 | OPT=
24 | CXXFLAGS= ${OPTS} ${OPT} -Wall -I$L
25 | LIBS = -lz -lm
26 | 
27 | PROGRAMS = svmsgd2 svmsgdqn svmcg svmolbfgs
28 | 
29 | all: ${PROGRAMS}
30 | 
31 | clean:
32 | 	-rm ${PROGRAMS} 2>/dev/null
33 | 	-rm *.o 2>/dev/null
34 | 
35 | svmsgd2: svmsgd2.o vectors.o gzstream.o timer.o
36 | 	${CXX} ${CXXFLAGS} -o $@ svmsgd2.o vectors.o gzstream.o timer.o ${LIBS}
37 | 
38 | svmsgdqn: svmsgdqn.o vectors.o gzstream.o timer.o
39 | 	${CXX} ${CXXFLAGS} -o $@ svmsgd2.o vectors.o gzstream.o timer.o ${LIBS}
40 | 
41 | svmolbfgs: svmolbfgs.o vectors.o gzstream.o timer.o
42 | 	${CXX} ${CXXFLAGS} -o $@ svmolbfgs.o vectors.o gzstream.o timer.o ${LIBS}
43 | 
44 | svmcg: svmcg.o vectors.o gzstream.o timer.o
45 | 	${CXX} ${CXXFLAGS} -o $@ svmcg.o vectors.o gzstream.o timer.o ${LIBS}
46 | 
47 | preprocess.o: preprocess.cpp $L/vectors.h  $L/gzstream.h
48 | 	${CXX} ${CXXFLAGS} -c -o $@ preprocess.cpp
49 | 
50 | svmsgd2.o: svmsgd2.cpp $L/vectors.h  $L/gzstream.h $L/timer.h
51 | 	${CXX} ${CXXFLAGS} -c -o $@ svmsgd2.cpp
52 | 
53 | svmsgdqn.o: svmsgdqn.cpp $L/vectors.h  $L/gzstream.h $L/timer.h
54 | 	${CXX} ${CXXFLAGS} -c -o $@ svmsgdqn.cpp
55 | 
56 | svmolbfgs.o: svmolbfgs.cpp $L/vectors.h  $L/gzstream.h $L/timer.h
57 | 	${CXX} ${CXXFLAGS} -c -o $@ svmolbfgs.cpp
58 | 
59 | svmcg.o: svmcg.cpp $L/vectors.h  $L/gzstream.h $L/timer.h
60 | 	${CXX} ${CXXFLAGS} -c -o $@ svmcg.cpp
61 | 
62 | vectors.o: $L/vectors.cpp $L/vectors.h  $L/wrapper.h
63 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/vectors.cpp
64 | 
65 | gzstream.o: $L/gzstream.cpp $L/gzstream.h 
66 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/gzstream.cpp
67 | 
68 | timer.o: $L/timer.cpp $L/timer.h 
69 | 	${CXX} ${CXXFLAGS} -c -o $@ $L/timer.cpp
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/svm/old/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | This directory contains older variants of the basic svmsgd code.
 3 | These programs have not been updated to the sgd-2.0 standards
 4 | but should nevertheless work.
 5 | 
 6 | * svmcg compute the svm solution using primal 
 7 |   batch conjugate gradient method (Chapelle)
 8 | 
 9 | * svmolbfgs is an implementation of the online limited 
10 |   storage BFGS (Shraudolph et al.)
11 | 
12 | * svmsgd2 is an alternative implementation of sgd for sparse
13 |   dataset using different schedules for the updates associated
14 |   with the loss term and the update associated with the
15 |   regularization term.  
16 | 
17 | * svmsgdqn is a diagonal quasi-newton algorithm (Bordes et al.)
18 |   with sometimes good but often inconsistent performance. 
19 |   Using svmasgd is usually a better choice.
20 |   
21 | 


--------------------------------------------------------------------------------
/svm/old/svmcg.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | 
 21 | // $Id$
 22 | 
 23 | 
 24 | #include "vectors.h"
 25 | #include "gzstream.h"
 26 | #include "timer.h"
 27 | #include <iostream>
 28 | #include <iomanip>
 29 | #include <string>
 30 | #include <map>
 31 | #include <vector>
 32 | #include <cassert>
 33 | #include <cstdlib>
 34 | #include <cmath>
 35 | 
 36 | using namespace std;
 37 | 
 38 | typedef vector<SVector> xvec_t;
 39 | typedef vector<double> yvec_t;
 40 | 
 41 | 
 42 | // Select loss
 43 | #ifndef LOSS
 44 | # define LOSS LOGLOSS
 45 | #endif
 46 | 
 47 | // Magic to find loss name
 48 | #define _NAME(x) #x
 49 | #define _NAME2(x) _NAME(x)
 50 | const char *lossname = _NAME2(LOSS);
 51 | 
 52 | // Available losses
 53 | #define HINGELOSS 1
 54 | #define SMOOTHHINGELOSS 2
 55 | #define SQUAREDHINGELOSS 3
 56 | #define LOGLOSS 10
 57 | #define LOGLOSSMARGIN 11
 58 | 
 59 | // Add bias at index zero during load.
 60 | #define REGULARIZEDBIAS 1
 61 | 
 62 | 
 63 | inline 
 64 | double loss(double z)
 65 | {
 66 | #if LOSS == LOGLOSS
 67 |   if (z > 18)
 68 |     return exp(-z);
 69 |   if (z < -18)
 70 |     return -z;
 71 |   return log(1+exp(-z));
 72 | #elif LOSS == LOGLOSSMARGIN
 73 |   if (z > 18)
 74 |     return exp(1-z);
 75 |   if (z < -18)
 76 |     return 1-z;
 77 |   return log(1+exp(1-z));
 78 | #elif LOSS == SMOOTHHINGELOSS
 79 |   if (z < 0)
 80 |     return 0.5 - z;
 81 |   if (z < 1)
 82 |     return 0.5 * (1-z) * (1-z);
 83 |   return 0;
 84 | #elif LOSS == SQUAREDHINGELOSS
 85 |   if (z < 1)
 86 |     return 0.5 * (1 - z) * (1 - z);
 87 |   return 0;
 88 | #elif LOSS == HINGELOSS
 89 |   if (z < 1)
 90 |     return 1 - z;
 91 |   return 0;
 92 | #else
 93 | # error "Undefined loss"
 94 | #endif
 95 | }
 96 | 
 97 | 
 98 | inline 
 99 | double dloss(double z)
100 | {
101 | #if LOSS == LOGLOSS
102 |   if (z > 18)
103 |     return exp(-z);
104 |   if (z < -18)
105 |     return 1;
106 |   return 1 / (exp(z) + 1);
107 | #elif LOSS == LOGLOSSMARGIN
108 |   if (z > 18)
109 |     return exp(1-z);
110 |   if (z < -18)
111 |     return 1;
112 |   return 1 / (exp(z-1) + 1);
113 | #elif LOSS == SMOOTHHINGELOSS
114 |   if (z < 0)
115 |     return 1;
116 |   if (z < 1)
117 |     return 1-z;
118 |   return 0;
119 | #elif LOSS == SQUAREDHINGELOSS
120 |   if (z < 1)
121 |     return (1 - z);
122 |   return 0;
123 | #else
124 |   if (z < 1)
125 |     return 1;
126 |   return 0;
127 | #endif
128 | }
129 | 
130 | 
131 | 
132 | // -- conjugate gradient
133 | 
134 | class SvmCg
135 | {
136 | public:
137 |   SvmCg(int dim, double lambda, int trainsize);
138 |   void train(int imin, int imax, const xvec_t &x, const yvec_t &y,
139 |              const char *prefix = "");
140 |   void test(int imin, int imax, const xvec_t &x, const yvec_t &y, 
141 |             const char *prefix = "");
142 | private:
143 |   double  lambda;
144 |   FVector w;
145 |   FVector g;
146 |   FVector u;
147 | 
148 |   int n;
149 |   FVector ywx;
150 |   FVector yux;
151 |   double ww;
152 |   double wu;
153 |   double uu;
154 | 
155 |   double search(double tol=1e-4);
156 |   double f(double t);
157 | 
158 |   double dsearch(double tol=1e-4);
159 |   double df(double t);
160 | };
161 | 
162 | 
163 | 
164 | SvmCg::SvmCg(int dim, double l, int trainsize)
165 |   : lambda(l), w(dim), n(trainsize)
166 | {
167 |   ywx.resize(n);
168 |   yux.resize(n);
169 | }
170 | 
171 | 
172 | double 
173 | SvmCg::f(double t)
174 | {
175 |   double cost = 0;
176 |   for (int i=0; i<n; i++)
177 |     cost += loss( ywx[i] + t * yux[i] );
178 |   double norm = ww + 2 * t * wu + t * t * uu;
179 |   return 0.5 * lambda * norm + cost / n;
180 | }
181 | 
182 | 
183 | double 
184 | SvmCg::df(double t)
185 | {
186 |   double dcost = 0;
187 |   for (int i=0; i<n; i++)
188 |     dcost += dloss( ywx[i] + t * yux[i] ) * yux[i];
189 |   double dnorm = wu + t * uu;
190 |   return - lambda * dnorm + dcost / n;
191 | }
192 | 
193 | 
194 | double 
195 | SvmCg::dsearch(double tol)
196 | {
197 |   double a = 0;
198 |   double fa = df(a);
199 |   double b = 1;
200 |   double fb = df(b);
201 |   if (fa < 0)
202 |     return -1;
203 |   while (fb > 0)
204 |     {
205 |       double ofb = fb;
206 |       b = b * 2;
207 |       assert(b < 1e80);
208 |       fb = df(b);
209 |       if (fb > ofb)
210 |         break;
211 |     }
212 |   if (fb > 0)
213 |     return -1;
214 |   tol *= b - a;
215 |   double e = b - a;
216 |   double d = e;
217 |   while (b - a > 2 * tol && fa - fb > 0)
218 |     {
219 |       double m = (a + b) / 2;
220 |       double x = (fa * b - fb * a) / (fa - fb);
221 |       if (x > a && x < b && fabs(x - m) < fabs(e))
222 |         { e = d / 2; d = x - m; }
223 |       else
224 |         { x = m; }
225 | 
226 |       double fx = df(x);
227 |       if (fx > 0)
228 |         { fa = fx; a = x; }
229 |       else if (fx < 0)
230 |         { fb = fx; b = x; }
231 |       else
232 |         return x;
233 |     }
234 |   return (a + b) / 2;
235 | }
236 | 
237 | 
238 | void 
239 | SvmCg::train(int imin, int imax, 
240 |               const xvec_t &xp, const yvec_t &yp,
241 |               const char *prefix)
242 | {
243 |   cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
244 |   assert(imin <= imax);
245 |   assert(n == imax - imin + 1);
246 |   
247 |   FVector oldg = g;
248 |   g.clear();
249 |   g.add(w, -lambda);
250 |   double cost = 0;
251 |   for (int i=imin; i<=imax; i++)
252 |     {
253 |       const SVector &x = xp.at(i);
254 |       double y = yp.at(i);
255 |       double z = ywx[i-imin];
256 | #if LOSS < LOGLOSS
257 |       if (z < 1)
258 | #endif
259 |         {
260 |           cost += loss(z);
261 |           g.add(x, dloss(z) * y / n);
262 |         }
263 |     }
264 |   ww= dot(w,w);
265 |   cost = 0.5 * lambda * ww + cost / n;
266 | 
267 |   if (u.size())
268 |     {
269 |       // conjugate gradient
270 |       oldg.add(g, -1);
271 |       double beta = - dot(g, oldg) / dot(u, oldg);
272 |       u.combine(beta, g, 1);
273 |     }
274 |   else
275 |     {
276 |       // first iteration
277 |       u = g;
278 |     }
279 |   // line search and step
280 |   wu = dot(w,u);
281 |   uu = dot(u,u);
282 |   cout << prefix << setprecision(6) 
283 |        << "Before: ww=" << ww 
284 |        << ", uu=" << uu
285 |        << ", cost=" << cost << endl;
286 |   for (int i=imin; i<=imax; i++)
287 |     {
288 |       const SVector &x = xp.at(i);
289 |       double y = yp.at(i);
290 |       yux[i-imin] = y * dot(u,x);
291 |     }
292 |   double eta = dsearch();
293 |   if (eta < 0)
294 |     {
295 |       cout << "*** Restarting CG" << endl;
296 |       u.clear();
297 |     }
298 |   else
299 |     {
300 |       w.add(u, eta);
301 |       ywx.add(yux, eta);
302 |     }
303 | }
304 | 
305 | 
306 | void 
307 | SvmCg::test(int imin, int imax, 
308 |              const xvec_t &xp, const yvec_t &yp, 
309 |              const char *prefix)
310 | 
311 | {
312 |   cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl;
313 |   assert(imin <= imax);
314 |   int nerr = 0;
315 |   double cost = 0;
316 |   for (int i=imin; i<=imax; i++)
317 |     {
318 |       const SVector &x = xp.at(i);
319 |       double y = yp.at(i);
320 |       double wx = dot(w,x);
321 |       double z = y * wx;
322 |       if (z <= 0)
323 |         nerr += 1;
324 | #if LOSS < LOGLOSS
325 |       if (z < 1)
326 | #endif
327 |         cost += loss(z);
328 |     }
329 |   int n = imax - imin + 1;
330 |   double wnorm =  dot(w,w);
331 |   double loss = cost / n;
332 |   cost = loss + 0.5 * lambda * wnorm;
333 |   cout << prefix << setprecision(4)
334 |        << "Misclassification: " << (double)nerr * 100.0 / n << "%." << endl;
335 |   cout << prefix << setprecision(12) 
336 |        << "Cost: " << cost << "." << endl;
337 |   cout << prefix << setprecision(12) 
338 |        << "Loss: " << loss << "." << endl;
339 | }
340 | 
341 | 
342 | 
343 | 
344 | // --- options
345 | 
346 | string trainfile;
347 | string testfile;
348 | double lambda = 1e-4;
349 | int epochs = 100;
350 | int trainsize = -1;
351 | 
352 | void 
353 | usage()
354 | {
355 |   cerr << "Usage: svmsgd [options] trainfile [testfile]" << endl
356 |        << "Options:" << endl
357 |        << " -lambda <lambda>" << endl
358 |        << " -epochs <epochs>" << endl
359 |        << " -trainsize <n>" << endl
360 |        << endl;
361 |   exit(10);
362 | }
363 | 
364 | void 
365 | parse(int argc, const char **argv)
366 | {
367 |   for (int i=1; i<argc; i++)
368 |     {
369 |       const char *arg = argv[i];
370 |       if (arg[0] != '-')
371 |         {
372 |           if (trainfile.empty())
373 |             trainfile = arg;
374 |           else if (testfile.empty())
375 |             testfile = arg;
376 |           else
377 |             usage();
378 |         }
379 |       else
380 |         {
381 |           while (arg[0] == '-') arg += 1;
382 |           string opt = arg;
383 |           if (opt == "lambda" && i+1<argc)
384 |             {
385 |               lambda = atof(argv[++i]);
386 |               cout << "Using lambda=" << lambda << "." << endl;
387 |               assert(lambda>0 && lambda<1e4);
388 |             }
389 |           else if (opt == "epochs" && i+1<argc)
390 |             {
391 |               epochs = atoi(argv[++i]);
392 |               cout << "Going for " << epochs << " epochs." << endl;
393 |               assert(epochs>0 && epochs<1e6);
394 |             }
395 |           else if (opt == "trainsize" && i+1<argc)
396 |             {
397 |               trainsize = atoi(argv[++i]);
398 |               assert(trainsize > 0);
399 |             }
400 |           else
401 |             usage();
402 |         }
403 |     }
404 |   if (trainfile.empty())
405 |     usage();
406 | }
407 | 
408 | 
409 | // --- loading data
410 | 
411 | int dim;
412 | xvec_t xtrain;
413 | yvec_t ytrain;
414 | xvec_t xtest;
415 | yvec_t ytest;
416 | 
417 | void
418 | load(const char *fname, xvec_t &xp, yvec_t &yp)
419 | {
420 |   cout << "Loading " << fname << "." << endl;
421 |   
422 |   igzstream f;
423 |   f.open(fname);
424 |   if (! f.good())
425 |     {
426 |       cerr << "ERROR: cannot open " << fname << "." << endl;
427 |       exit(10);
428 |     }
429 |   int pcount = 0;
430 |   int ncount = 0;
431 | 
432 |   bool binary;
433 |   string suffix = fname;
434 |   if (suffix.size() >= 7)
435 |     suffix = suffix.substr(suffix.size() - 7);
436 |   if (suffix == ".dat.gz")
437 |     binary = false;
438 |   else if (suffix == ".bin.gz")
439 |     binary = true;
440 |   else
441 |     {
442 |       cerr << "ERROR: filename should end with .bin.gz or .dat.gz" << endl;
443 |       exit(10);
444 |     }
445 | 
446 |   while (f.good())
447 |     {
448 |       SVector x;
449 |       double y;
450 |       if (binary)
451 |         {
452 |           y = (f.get()) ? +1 : -1;
453 |           x.load(f);
454 |         }
455 |       else
456 |         {
457 |           f >> y >> x;
458 |         }
459 | #if REGULARIZEDBIAS
460 |       x.set(0,1);
461 | #endif
462 |       if (f.good())
463 |         {
464 |           assert(y == +1 || y == -1);
465 |           xp.push_back(x);
466 |           yp.push_back(y);
467 |           if (y > 0)
468 |             pcount += 1;
469 |           else
470 |             ncount += 1;
471 |           if (x.size() > dim)
472 |             dim = x.size();
473 |         }
474 |       if (trainsize > 0 && xp.size() > (unsigned int)trainsize)
475 |         break;
476 |     }
477 |   cout << "Read " << pcount << "+" << ncount 
478 |        << "=" << pcount + ncount << " examples." << endl;
479 | }
480 | 
481 | 
482 | 
483 | int 
484 | main(int argc, const char **argv)
485 | {
486 |   parse(argc, argv);
487 |   cout << "Loss=" << lossname 
488 |        << " Bias=" << REGULARIZEDBIAS
489 |        << " RegBias=" << REGULARIZEDBIAS 
490 |        << " Lambda=" << lambda
491 |        << endl;
492 | 
493 |   // load training set
494 |   load(trainfile.c_str(), xtrain, ytrain);
495 |   cout << "Number of features " << dim << "." << endl;
496 |   int imin = 0;
497 |   int imax = xtrain.size() - 1;
498 |   if (trainsize > 0 && imax >= trainsize)
499 |     imax = imin + trainsize -1;
500 |   // prepare svm
501 |   SvmCg svm(dim, lambda, imax-imin+1);
502 |   Timer timer;
503 |   // load testing set
504 |   if (! testfile.empty())
505 |     load(testfile.c_str(), xtest, ytest);
506 |   int tmin = 0;
507 |   int tmax = xtest.size() - 1;
508 | 
509 |   for(int i=0; i<epochs; i++)
510 |     {
511 |       cout << "--------- Epoch " << i+1 << "." << endl;
512 |       timer.start();
513 |       svm.train(imin, imax, xtrain, ytrain);
514 |       timer.stop();
515 |       cout << "Total training time " << setprecision(6) 
516 |            << timer.elapsed() << " secs." << endl;
517 |       svm.test(imin, imax, xtrain, ytrain, "train: ");
518 |       if (tmax >= tmin)
519 |         svm.test(tmin, tmax, xtest, ytest, "test:  ");
520 |     }
521 | }
522 | 


--------------------------------------------------------------------------------
/svm/old/svmolbfgs.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | 
 21 | // $Id$
 22 | 
 23 | 
 24 | #include "vectors.h"
 25 | #include "gzstream.h"
 26 | #include "timer.h"
 27 | #include <iostream>
 28 | #include <iomanip>
 29 | #include <algorithm>
 30 | #include <string>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <cassert>
 34 | #include <cstdlib>
 35 | #include <cmath>
 36 | #include <cfloat>
 37 | 
 38 | using namespace std;
 39 | 
 40 | typedef vector<SVector> xvec_t;
 41 | typedef vector<double> yvec_t;
 42 | 
 43 | // Select loss
 44 | #ifndef LOSS
 45 | # define LOSS SQUAREDHINGELOSS
 46 | #endif
 47 | 
 48 | // Magic to find loss name
 49 | #define _NAME(x) #x
 50 | #define _NAME2(x) _NAME(x)
 51 | const char *lossname = _NAME2(LOSS);
 52 | 
 53 | // Available losses
 54 | #define HINGELOSS 1
 55 | #define SMOOTHHINGELOSS 2
 56 | #define SQUAREDHINGELOSS 3
 57 | #define LOGLOSS 10
 58 | #define LOGLOSSMARGIN 11
 59 | 
 60 | inline 
 61 | double loss(double z)
 62 | {
 63 | #if LOSS == LOGLOSS
 64 |   if (z >= 0)
 65 |     return log(1+exp(-z));
 66 |   else
 67 |     return -z + log(1+exp(z));
 68 | #elif LOSS == LOGLOSSMARGIN
 69 |   if (z >= 1)
 70 |     return log(1+exp(1-z));
 71 |   else
 72 |     return 1-z + log(1+exp(z-1));
 73 | #elif LOSS == SMOOTHHINGELOSS
 74 |   if (z < 0)
 75 |     return 0.5 - z;
 76 |   if (z < 1)
 77 |     return 0.5 * (1-z) * (1-z);
 78 |   return 0;
 79 | #elif LOSS == SQUAREDHINGELOSS
 80 |   if (z < 1)
 81 |     return 0.5 * (1 - z) * (1 - z);
 82 |   return 0;
 83 | #elif LOSS == HINGELOSS
 84 |   if (z < 1)
 85 |     return 1 - z;
 86 |   return 0;
 87 | #else
 88 | # error "Undefined loss"
 89 | #endif
 90 | }
 91 | 
 92 | inline 
 93 | double dloss(double z)
 94 | {
 95 | #if LOSS == LOGLOSS
 96 |   if (z < 0)
 97 |     return 1 / (exp(z) + 1);
 98 |   double ez = exp(-z);
 99 |   return ez / (ez + 1);
100 | #elif LOSS == LOGLOSSMARGIN
101 |   if (z < 1)
102 |     return 1 / (exp(z-1) + 1);
103 |   double ez = exp(1-z);
104 |   return ez / (ez + 1);
105 | #elif LOSS == SMOOTHHINGELOSS
106 |   if (z < 0)
107 |     return 1;
108 |   if (z < 1)
109 |     return 1-z;
110 |   return 0;
111 | #elif LOSS == SQUAREDHINGELOSS
112 |   if (z < 1)
113 |     return (1 - z);
114 |   return 0;
115 | #else
116 |   if (z < 1)
117 |     return 1;
118 |   return 0;
119 | #endif
120 | }
121 | 
122 | 
123 | // -- stochastic gradient
124 | 
125 | class olbfgs
126 | {
127 | public:
128 |   olbfgs(int dim, double lambda);
129 |   
130 |   void calibrate(int imin, int imax, 
131 | 		 const xvec_t &xp, const yvec_t &yp);
132 | 
133 |   void train(int imin, int imax, 
134 |              const xvec_t &x, const yvec_t &y,
135 |              const char *prefix);
136 | 
137 |   void test(int imin, int imax, 
138 |             const xvec_t &x, const yvec_t &y, 
139 |             const char *prefix);
140 | private:
141 |   double  t;
142 |   double  lambda;
143 |   FVector w;
144 |   double  bias;
145 |   int skip;
146 |   int count;
147 |   double t0;
148 | 
149 |   double m;
150 |   vector<FVector> ss;
151 |   vector<FVector> ys;
152 |   double sum_i;
153 |   int i_1;
154 | };
155 | 
156 | 
157 | 
158 | olbfgs::olbfgs(int dim, double l)
159 |   : t(0), lambda(l), w(dim), skip(1000), sum_i(0), i_1(0)
160 | {
161 |   double maxw = 1.0 / sqrt(lambda);
162 |   double typw = sqrt(maxw);
163 |   double eta0 = typw / max(1.0,dloss(-typw));
164 |   t0 = 1 / (eta0 * lambda);
165 |   m = 1.;
166 | }
167 | 
168 | 
169 | void 
170 | olbfgs::calibrate(int imin, int imax, 
171 | 		    const xvec_t &xp, const yvec_t &yp)
172 | {
173 |   cout << "Estimating sparsity" << endl;
174 |   int j;
175 | 
176 |   // compute average gradient size
177 |   double n = 0;
178 |   double r = 0;
179 |   for (j=imin; j<=imax; j++,n++)
180 |     {
181 |       const SVector &x = xp.at(j);
182 |       n += 1;
183 |       r += x.npairs();
184 |     }
185 |   // compute weight decay skip
186 |   skip = (int) ((8 * n * w.size()) / r);
187 |   cout << " using " << n << " examples." << endl;
188 |   cout << " skip: " << skip << endl;
189 | }
190 | 
191 | 
192 | void 
193 | olbfgs::train(int imin, int imax, 
194 | 		const xvec_t &xp, const yvec_t &yp,
195 | 		const char *prefix)
196 | {
197 |   cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
198 |   assert(imin <= imax);
199 |   count = 0;
200 |   for (int i=imin; i<=imax; i++)
201 |     {
202 |       const SVector &x = xp.at(i);
203 |       double y = yp.at(i);
204 |       double z = y * dot(w, x);
205 |       double old_loss = dloss(z);	  
206 |       FVector pt = combine(w,-lambda,x,old_loss*y); // 3 (1)
207 | 
208 |       vector<double> alphas;
209 |       alphas.resize((int)min(m,t));
210 |       for (int ii=0; ii<min(m,t); ii++) // 3 (2)
211 | 	{
212 | 	  int idx = i_1-ii;
213 | 	  if(idx<0)
214 | 	    idx+=(int)min(t,m);
215 | 	  double alpha = dot(ss[idx],pt) / dot(ss[idx],ys[idx]);
216 | 	  alphas[idx] = alpha;
217 | 	  pt.add(ys[idx],-alpha);	  
218 | 	}
219 | 
220 |       if(t>0) // 3 (3)
221 | 	pt.scale(sum_i/min(m,t));
222 |       else
223 | 	pt.scale(0.0001);
224 | 
225 |       for (int ii=0; ii<min(m,t); ii++) // 3 (4)
226 | 	{
227 | 	  int idx = i_1+ii+1;
228 | 	  if(idx>=(int)min(t,m))
229 | 	    idx-=(int)min(t,m);
230 | 	  double beta = dot(ys[idx],pt) / dot(ys[idx],ss[idx]);
231 | 	  pt.add(ss[idx],(alphas[idx]-beta));
232 | 	}
233 | 
234 | 
235 |       pt.scale(0.1*(t0/lambda)/(t+t0)); // pt -> st (c)
236 |       w.add(pt);//(d)	  
237 | 
238 |       double z2 = y * dot(w,x);
239 |       double diffloss = dloss(z2) - old_loss;      
240 |       FVector yt = combine(pt,(lambda+t0),x, -y*diffloss); // (e)
241 |       
242 |       if(t<m)
243 | 	{
244 | 	  ys.push_back(yt);
245 | 	  ss.push_back(pt);
246 | 	  i_1 = (int)t;
247 | 	  sum_i += dot(pt,yt)/dot(yt,yt);
248 | 	}
249 |       else
250 | 	{
251 | 	  int idx = i_1+1;
252 | 	  if(idx>=m)
253 | 	    idx-=(int)m;
254 | 	  sum_i += dot(pt,yt)/dot(yt,yt) - dot(ss[idx],ys[idx])/dot(ys[idx],ys[idx]);
255 | 	  ys[idx]=yt;
256 | 	  ss[idx]=pt;
257 | 	  i_1=idx;
258 | 	}
259 |       t += 1;//(i)
260 |     }
261 |   cout << prefix << setprecision(6) 
262 |        << "Norm2: " << dot(w,w) << ", Bias: " << 0 << endl;
263 | }
264 | 
265 | void
266 | olbfgs::test(int imin, int imax, 
267 | 	       const xvec_t &xp, const yvec_t &yp, 
268 | 	       const char *prefix)
269 | 
270 | {
271 |   cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl;
272 |   assert(imin <= imax);
273 |   int nerr = 0;
274 |   double cost = 0;
275 |   for (int i=imin; i<=imax; i++)
276 |     {
277 |       const SVector &x = xp.at(i);
278 |       double y = yp.at(i);
279 |       double wx = dot(w,x);
280 |       double z = y * (wx + bias);
281 |       if (z <= 0)
282 |         nerr += 1;
283 | #if LOSS < LOGLOSS
284 |       if (z < 1)
285 | #endif
286 |         cost += loss(z);
287 |     }
288 |   int n = imax - imin + 1;
289 |   double loss = cost / n;
290 |   cost = loss + 0.5 * lambda * dot(w,w);
291 | 
292 |   cout << prefix << setprecision(4)
293 |        << "Misclassification: " << (double)nerr * 100.0 / n << "%." << endl;
294 |   cout << prefix << setprecision(12) 
295 |        << "Cost: " << cost << "." << endl;
296 |   cout << prefix << setprecision(12) 
297 |        << "Loss: " << loss << "." << endl;
298 |   
299 | }
300 | 
301 | // --- options
302 | 
303 | string trainfile;
304 | string testfile;
305 | double lambda = 1e-4;
306 | int epochs = 5;
307 | int trainsize = -1;
308 | 
309 | void 
310 | usage()
311 | {
312 |   cerr << "Usage: svmsgd [options] trainfile [testfile]" << endl
313 |        << "Options:" << endl
314 |        << " -lambda <lambda>" << endl
315 |        << " -epochs <epochs>" << endl
316 |        << " -trainsize <n>" << endl
317 |        << endl;
318 |   exit(10);
319 | }
320 | 
321 | void 
322 | parse(int argc, const char **argv)
323 | {
324 |   for (int i=1; i<argc; i++)
325 |     {
326 |       const char *arg = argv[i];
327 |       if (arg[0] != '-')
328 |         {
329 |           if (trainfile.empty())
330 |             trainfile = arg;
331 |           else if (testfile.empty())
332 |             testfile = arg;
333 | 	  else
334 |             usage();
335 |         }
336 |       else
337 |         {
338 |           while (arg[0] == '-') arg += 1;
339 |           string opt = arg;
340 |           if (opt == "lambda" && i+1<argc)
341 |             {
342 |               lambda = atof(argv[++i]);
343 |               cout << "Using lambda=" << lambda << "." << endl;
344 |               assert(lambda>0 && lambda<1e4);
345 |             }
346 |           else if (opt == "epochs" && i+1<argc)
347 |             {
348 |               epochs = atoi(argv[++i]);
349 |               cout << "Going for " << epochs << " epochs." << endl;
350 |               assert(epochs>0 && epochs<1e6);
351 |             }
352 |           else if (opt == "trainsize" && i+1<argc)
353 |             {
354 |               trainsize = atoi(argv[++i]);
355 |               assert(trainsize > 0);
356 |             }
357 |           else
358 |             usage();
359 |         }
360 |     }
361 |   if (trainfile.empty())
362 |     usage();
363 | }
364 | 
365 | 
366 | // --- loading data
367 | 
368 | int dim;
369 | xvec_t xtrain;
370 | yvec_t ytrain;
371 | xvec_t xtest;
372 | yvec_t ytest;
373 | 
374 | void
375 | load(const char *fname, xvec_t &xp, yvec_t &yp)
376 | {
377 |   cout << "Loading " << fname << "." << endl;
378 |   
379 |   igzstream f;
380 |   f.open(fname);
381 |   if (! f.good())
382 |     {
383 |       cerr << "ERROR: cannot open " << fname << "." << endl;
384 |       exit(10);
385 |     }
386 |   int pcount = 0;
387 |   int ncount = 0;
388 | 
389 |   bool binary;
390 |   string suffix = fname;
391 |   if (suffix.size() >= 7)
392 |     suffix = suffix.substr(suffix.size() - 7);
393 |   if (suffix == ".dat.gz")
394 |     binary = false;
395 |   else if (suffix == ".bin.gz")
396 |     binary = true;
397 |   else
398 |     {
399 |       cerr << "ERROR: filename should end with .bin.gz or .dat.gz" << endl;
400 |       exit(10);
401 |     }
402 | 
403 |   while (f.good())
404 |     {
405 |       SVector x;
406 |       double y;
407 |       if (binary)
408 |         {
409 |           y = (f.get()) ? +1 : -1;
410 |           x.load(f);
411 |         }
412 |       else
413 |         {
414 |           f >> y >> x;
415 |         }
416 |       if (f.good())
417 |         {
418 |           assert(y == +1 || y == -1);
419 |           xp.push_back(x);
420 |           yp.push_back(y);
421 |           if (y > 0)
422 |             pcount += 1;
423 |           else
424 |             ncount += 1;
425 |           if (x.size() > dim)
426 |             dim = x.size();
427 |         }
428 |       if (trainsize > 0 && xp.size() > (unsigned int)trainsize)
429 |         break;
430 |     }
431 |   cout << "Read " << pcount << "+" << ncount 
432 |        << "=" << pcount + ncount << " examples." << endl;
433 | }
434 | 
435 | 
436 | int 
437 | main(int argc, const char **argv)
438 | {
439 |   parse(argc, argv);
440 |   cout << "Loss=" << lossname 
441 |        << " Bias=" << 0 
442 |        << " RegBias=" << 0
443 |        << " Lambda=" << lambda
444 |        << endl;
445 | 
446 |   // load training set
447 |   load(trainfile.c_str(), xtrain, ytrain);
448 |   cout << "Number of features " << dim << "." << endl;
449 |   int imin = 0;
450 |   int imax = xtrain.size() - 1;
451 |   if (trainsize > 0 && imax >= trainsize)
452 |     imax = imin + trainsize -1;
453 | 
454 |   // prepare svm
455 |   olbfgs svm(dim, lambda);
456 |   Timer timer;
457 | 
458 |   // load testing set
459 |   if (! testfile.empty())
460 |     load(testfile.c_str(), xtest, ytest);
461 |   int tmin = 0;
462 |   int tmax = xtest.size() - 1;
463 |   svm.calibrate(imin, imax, xtrain, ytrain);
464 |   for(int i=0; i<epochs; i++)
465 |     {
466 |       cout << "--------- Epoch " << i+1 << "." << endl;
467 |       timer.start();
468 |       svm.train(imin, imax, xtrain, ytrain, "train: ");
469 |       timer.stop();
470 |       cout << "Total training time " << setprecision(6) 
471 |            << timer.elapsed() << " secs." << endl;
472 |       svm.test(imin, imax, xtrain, ytrain, "train: ");
473 |       if (tmax >= tmin)
474 |         svm.test(tmin, tmax, xtest, ytest, "test:  ");
475 |     }
476 | }
477 | 


--------------------------------------------------------------------------------
/svm/old/svmsgd2.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | 
 21 | // $Id$
 22 | 
 23 | 
 24 | #include "vectors.h"
 25 | #include "gzstream.h"
 26 | #include "timer.h"
 27 | #include <iostream>
 28 | #include <iomanip>
 29 | #include <algorithm>
 30 | #include <string>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <cassert>
 34 | #include <cstdlib>
 35 | #include <cmath>
 36 | 
 37 | using namespace std;
 38 | 
 39 | 
 40 | typedef vector<SVector> xvec_t;
 41 | typedef vector<double> yvec_t;
 42 | 
 43 | // Select loss
 44 | #ifndef LOSS
 45 | # define LOSS HINGELOSS
 46 | #endif
 47 | 
 48 | // Magic to find loss name
 49 | #define _NAME(x) #x
 50 | #define _NAME2(x) _NAME(x)
 51 | const char *lossname = _NAME2(LOSS);
 52 | 
 53 | // Available losses
 54 | #define HINGELOSS 1
 55 | #define SMOOTHHINGELOSS 2
 56 | #define SQUAREDHINGELOSS 3
 57 | #define LOGLOSS 10
 58 | #define LOGLOSSMARGIN 11
 59 | 
 60 | // Zero when no bias
 61 | // One when bias term
 62 | #define BIAS 1
 63 | 
 64 | // One when bias is regularized
 65 | #define REGULARIZEBIAS 1
 66 | 
 67 | 
 68 | inline 
 69 | double loss(double z)
 70 | {
 71 | #if LOSS == LOGLOSS
 72 |   if (z >= 0)
 73 |     return log(1+exp(-z));
 74 |   else
 75 |     return -z + log(1+exp(z));
 76 | #elif LOSS == LOGLOSSMARGIN
 77 |   if (z >= 1)
 78 |     return log(1+exp(1-z));
 79 |   else
 80 |     return 1-z + log(1+exp(z-1));
 81 | #elif LOSS == SMOOTHHINGELOSS
 82 |   if (z < 0)
 83 |     return 0.5 - z;
 84 |   if (z < 1)
 85 |     return 0.5 * (1-z) * (1-z);
 86 |   return 0;
 87 | #elif LOSS == SQUAREDHINGELOSS
 88 |   if (z < 1)
 89 |     return 0.5 * (1 - z) * (1 - z);
 90 |   return 0;
 91 | #elif LOSS == HINGELOSS
 92 |   if (z < 1)
 93 |     return 1 - z;
 94 |   return 0;
 95 | #else
 96 | # error "Undefined loss"
 97 | #endif
 98 | }
 99 | 
100 | inline 
101 | double dloss(double z)
102 | {
103 | #if LOSS == LOGLOSS
104 |   if (z < 0)
105 |     return 1 / (exp(z) + 1);
106 |   double ez = exp(-z);
107 |   return ez / (ez + 1);
108 | #elif LOSS == LOGLOSSMARGIN
109 |   if (z < 1)
110 |     return 1 / (exp(z-1) + 1);
111 |   double ez = exp(1-z);
112 |   return ez / (ez + 1);
113 | #elif LOSS == SMOOTHHINGELOSS
114 |   if (z < 0)
115 |     return 1;
116 |   if (z < 1)
117 |     return 1-z;
118 |   return 0;
119 | #elif LOSS == SQUAREDHINGELOSS
120 |   if (z < 1)
121 |     return (1 - z);
122 |   return 0;
123 | #else
124 |   if (z < 1)
125 |     return 1;
126 |   return 0;
127 | #endif
128 | }
129 | 
130 | 
131 | // -- stochastic gradient
132 | 
133 | class SvmSgd
134 | {
135 | public:
136 |   SvmSgd(int dim, double lambda);
137 |   
138 |   void calibrate(int imin, int imax, 
139 |                const xvec_t &x, const yvec_t &y);
140 |   
141 |   void train(int imin, int imax, 
142 |              const xvec_t &x, const yvec_t &y,
143 |              const char *prefix = "");
144 |   void test(int imin, int imax, 
145 |             const xvec_t &x, const yvec_t &y, 
146 |             const char *prefix = "");
147 | private:
148 |   double  t;
149 |   double  lambda;
150 |   FVector w;
151 |   double  bias;
152 |   double  bscale;
153 |   int skip;
154 |   int count;
155 | };
156 | 
157 | 
158 | 
159 | SvmSgd::SvmSgd(int dim, double l)
160 |   : lambda(l), w(dim), bias(0),
161 |     bscale(1), skip(1000)
162 | {
163 |   // Shift t in order to have a 
164 |   // reasonable initial learning rate.
165 |   // This assumes |x| \approx 1.
166 |   double maxw = 1.0 / sqrt(lambda);
167 |   double typw = sqrt(maxw);
168 |   double eta0 = typw / max(1.0,dloss(-typw));
169 |   t = 1 / (eta0 * lambda);
170 | }
171 | 
172 | 
173 | void 
174 | SvmSgd::calibrate(int imin, int imax, 
175 |                 const xvec_t &xp, const yvec_t &yp)
176 | {
177 |   cout << "Estimating sparsity and bscale." << endl;
178 |   int j;
179 | 
180 |   // compute average gradient size
181 |   double n = 0;
182 |   double m = 0;
183 |   double r = 0;
184 |   FVector c(w.size());
185 |   for (j=imin; j<=imax && m<=1000; j++,n++)
186 |     {
187 |       const SVector &x = xp.at(j);
188 |       n += 1;
189 |       r += x.npairs();
190 |       const SVector::Pair *p = x;
191 |       while (p->i >= 0 && p->i < c.size())
192 |         {
193 |           double z = c.get(p->i) + fabs(p->v);
194 |           c.set(p->i, z);
195 |           m = max(m, z);
196 |           p += 1;
197 |         }
198 |     }
199 | 
200 |   // bias update scaling
201 |   bscale = m/n;
202 | 
203 |   // compute weight decay skip
204 |   skip = (int) ((8 * n * w.size()) / r);
205 |   cout << " using " << n << " examples." << endl;
206 |   cout << " skip: " << skip 
207 |        << " bscale: " << setprecision(6) << bscale << endl;
208 | }
209 | 
210 | 
211 | void 
212 | SvmSgd::train(int imin, int imax, 
213 |               const xvec_t &xp, const yvec_t &yp,
214 |               const char *prefix)
215 | {
216 |   cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
217 |   assert(imin <= imax);
218 |   count = skip;
219 |   for (int i=imin; i<=imax; i++)
220 |     {
221 |       const SVector &x = xp.at(i);
222 |       double y = yp.at(i);
223 |       double wx = dot(w,x);
224 |       double z = y * (wx + bias);
225 |       double eta = 1.0 / (lambda * t);
226 | #if LOSS < LOGLOSS
227 |       if (z < 1)
228 | #endif
229 |         {
230 |           double etd = eta * dloss(z);
231 |           w.add(x, etd * y);
232 | #if BIAS
233 | #if REGULARIZEBIAS
234 |           bias *= 1 - eta * lambda * bscale;
235 | #endif
236 |           bias += etd * y * bscale;
237 | #endif
238 |         }
239 |       if (--count <= 0)
240 |         {
241 |           double r = 1 - eta * lambda * skip;
242 |           if (r < 0.8)
243 |             r = pow(1 - eta * lambda, skip);
244 |           w.scale(r);
245 |           count = skip;
246 |         }
247 |       t += 1;
248 |     }
249 |   cout << prefix << setprecision(6) 
250 |        << "Norm: " << dot(w,w) << ", Bias: " << bias << endl;
251 | }
252 | 
253 | 
254 | void 
255 | SvmSgd::test(int imin, int imax, 
256 |              const xvec_t &xp, const yvec_t &yp, 
257 |              const char *prefix)
258 | 
259 | {
260 |   cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl;
261 |   assert(imin <= imax);
262 |   int nerr = 0;
263 |   double cost = 0;
264 |   for (int i=imin; i<=imax; i++)
265 |     {
266 |       const SVector &x = xp.at(i);
267 |       double y = yp.at(i);
268 |       double wx = dot(w,x);
269 |       double z = y * (wx + bias);
270 |       if (z <= 0)
271 |         nerr += 1;
272 | #if LOSS < LOGLOSS
273 |       if (z < 1)
274 | #endif
275 |         cost += loss(z);
276 |     }
277 |   int n = imax - imin + 1;
278 |   double loss = cost / n;
279 |   cost = loss + 0.5 * lambda * dot(w,w);
280 |   cout << prefix << setprecision(4)
281 |        << "Misclassification: " << (double)nerr * 100.0 / n << "%." << endl;
282 |   cout << prefix << setprecision(12) 
283 |        << "Cost: " << cost << "." << endl;
284 |   cout << prefix << setprecision(12) 
285 |        << "Loss: " << loss << "." << endl;
286 | }
287 | 
288 | 
289 | 
290 | 
291 | // --- options
292 | 
293 | string trainfile;
294 | string testfile;
295 | double lambda = 1e-4;
296 | int epochs = 5;
297 | int trainsize = -1;
298 | 
299 | void 
300 | usage()
301 | {
302 |   cerr << "Usage: svmsgd [options] trainfile [testfile]" << endl
303 |        << "Options:" << endl
304 |        << " -lambda <lambda>" << endl
305 |        << " -epochs <epochs>" << endl
306 |        << " -trainsize <n>" << endl
307 |        << endl;
308 |   exit(10);
309 | }
310 | 
311 | void 
312 | parse(int argc, const char **argv)
313 | {
314 |   for (int i=1; i<argc; i++)
315 |     {
316 |       const char *arg = argv[i];
317 |       if (arg[0] != '-')
318 |         {
319 |           if (trainfile.empty())
320 |             trainfile = arg;
321 |           else if (testfile.empty())
322 |             testfile = arg;
323 |           else
324 |             usage();
325 |         }
326 |       else
327 |         {
328 |           while (arg[0] == '-') arg += 1;
329 |           string opt = arg;
330 |           if (opt == "lambda" && i+1<argc)
331 |             {
332 |               lambda = atof(argv[++i]);
333 |               cout << "Using lambda=" << lambda << "." << endl;
334 |               assert(lambda>0 && lambda<1e4);
335 |             }
336 |           else if (opt == "epochs" && i+1<argc)
337 |             {
338 |               epochs = atoi(argv[++i]);
339 |               cout << "Going for " << epochs << " epochs." << endl;
340 |               assert(epochs>0 && epochs<1e6);
341 |             }
342 |           else if (opt == "trainsize" && i+1<argc)
343 |             {
344 |               trainsize = atoi(argv[++i]);
345 |               assert(trainsize > 0);
346 |             }
347 |           else
348 |             usage();
349 |         }
350 |     }
351 |   if (trainfile.empty())
352 |     usage();
353 | }
354 | 
355 | 
356 | // --- loading data
357 | 
358 | int dim;
359 | xvec_t xtrain;
360 | yvec_t ytrain;
361 | xvec_t xtest;
362 | yvec_t ytest;
363 | 
364 | void
365 | load(const char *fname, xvec_t &xp, yvec_t &yp)
366 | {
367 |   cout << "Loading " << fname << "." << endl;
368 |   
369 |   igzstream f;
370 |   f.open(fname);
371 |   if (! f.good())
372 |     {
373 |       cerr << "ERROR: cannot open " << fname << "." << endl;
374 |       exit(10);
375 |     }
376 |   int pcount = 0;
377 |   int ncount = 0;
378 | 
379 |   bool binary;
380 |   string suffix = fname;
381 |   if (suffix.size() >= 7)
382 |     suffix = suffix.substr(suffix.size() - 7);
383 |   if (suffix == ".dat.gz")
384 |     binary = false;
385 |   else if (suffix == ".bin.gz")
386 |     binary = true;
387 |   else
388 |     {
389 |       cerr << "ERROR: filename should end with .bin.gz or .dat.gz" << endl;
390 |       exit(10);
391 |     }
392 | 
393 |   while (f.good())
394 |     {
395 |       SVector x;
396 |       double y;
397 |       if (binary)
398 |         {
399 |           y = (f.get()) ? +1 : -1;
400 |           x.load(f);
401 |         }
402 |       else
403 |         {
404 |           f >> y >> x;
405 |         }
406 |       if (f.good())
407 |         {
408 |           assert(y == +1 || y == -1);
409 |           xp.push_back(x);
410 |           yp.push_back(y);
411 |           if (y > 0)
412 |             pcount += 1;
413 |           else
414 |             ncount += 1;
415 |           if (x.size() > dim)
416 |             dim = x.size();
417 |         }
418 |       if (trainsize > 0 && xp.size() > (unsigned int)trainsize)
419 |         break;
420 |     }
421 |   cout << "Read " << pcount << "+" << ncount 
422 |        << "=" << pcount + ncount << " examples." << endl;
423 | }
424 | 
425 | 
426 | 
427 | 
428 | int 
429 | main(int argc, const char **argv)
430 | {
431 |   parse(argc, argv);
432 |   cout << "Loss=" << lossname 
433 |        << " Bias=" << BIAS 
434 |        << " RegBias=" << REGULARIZEBIAS
435 |        << " Lambda=" << lambda
436 |        << endl;
437 | 
438 |   // load training set
439 |   load(trainfile.c_str(), xtrain, ytrain);
440 |   cout << "Number of features " << dim << "." << endl;
441 |   int imin = 0;
442 |   int imax = xtrain.size() - 1;
443 |   if (trainsize > 0 && imax >= trainsize)
444 |     imax = imin + trainsize -1;
445 |   // prepare svm
446 |   SvmSgd svm(dim, lambda);
447 |   Timer timer;
448 | 
449 |   // load testing set
450 |   if (! testfile.empty())
451 |     load(testfile.c_str(), xtest, ytest);
452 |   int tmin = 0;
453 |   int tmax = xtest.size() - 1;
454 | 
455 |   svm.calibrate(imin, imax, xtrain, ytrain);
456 |   for(int i=0; i<epochs; i++)
457 |     {
458 |       
459 |       cout << "--------- Epoch " << i+1 << "." << endl;
460 |       timer.start();
461 |       svm.train(imin, imax, xtrain, ytrain);
462 |       timer.stop();
463 |       cout << "Total training time " << setprecision(6) 
464 |            << timer.elapsed() << " secs." << endl;
465 |       svm.test(imin, imax, xtrain, ytrain, "train: ");
466 |       if (tmax >= tmin)
467 |         svm.test(tmin, tmax, xtest, ytest, "test:  ");
468 |     }
469 | }
470 | 


--------------------------------------------------------------------------------
/svm/prep_alpha.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient (preprocessing)
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | #include <iostream>
 21 | #include <string>
 22 | #include <map>
 23 | #include <vector>
 24 | #include <algorithm>
 25 | #include <cstdlib>
 26 | #include <cmath>
 27 | 
 28 | #include "assert.h"
 29 | #include "vectors.h"
 30 | #include "gzstream.h"
 31 | 
 32 | using namespace std;
 33 | 
 34 | #define DATADIR "../data/pascal/"
 35 | #define DATAFILE "alpha"
 36 | #define DATATEST  250000
 37 | 
 38 | typedef vector<SVector> xvec_t;
 39 | typedef vector<double> yvec_t;
 40 | 
 41 | int 
 42 | load(const char *fname, xvec_t &xp, yvec_t &yp)
 43 | {
 44 |   cerr << "# Reading " << fname << endl;
 45 |   ifstream f(fname);
 46 |   if (! f.good())
 47 |     assertfail("Cannot open file " << fname);
 48 |   int count = 0;
 49 |   while (f.good())
 50 |     {
 51 |       double y;
 52 |       SVector x;
 53 |       f >> y >> x;
 54 |       if (f.good())
 55 |         {
 56 |           xp.push_back(x);
 57 |           yp.push_back(y);
 58 |           count += 1;
 59 |         }
 60 |     }
 61 |   if (! f.eof())
 62 |     assertfail("Failed reading " << fname);
 63 |   cerr << "# Done reading " << count << " examples." << endl;
 64 |   return count;
 65 | }
 66 | 
 67 | void
 68 | saveBinary(const char *fname, xvec_t &xp, yvec_t &yp, 
 69 |            vector<int> &index, int imin, int imax)
 70 | {
 71 |   cerr << "# Writing " << fname << endl;
 72 |   ogzstream f;
 73 |   f.open(fname);
 74 |   if (! f.good())
 75 |     assertfail("ERROR: cannot open " << fname << " for writing.");
 76 |   int count = 0;
 77 |   for (int ii=imin; ii<imax; ii++)
 78 |     {
 79 |       int i = index[ii];
 80 |       double y = yp[i];
 81 |       SVector x = xp[i];
 82 |       f.put((y >= 0) ? 1 : 0);
 83 |       x.save(f);
 84 |       count += 1;
 85 |     }
 86 |   cerr << "# Wrote " << count << " examples." << endl;
 87 | }
 88 | 
 89 | 
 90 | int main(int, const char**)
 91 | {
 92 |   // load data
 93 |   vector<SVector> xp;
 94 |   vector<double> yp;
 95 |   int count = load(DATADIR DATAFILE ".txt", xp, yp);
 96 |   // compute random shuffle
 97 |   cerr << "# Shuffling" << endl;
 98 |   vector<int> index(count);
 99 |   for (int i=0; i<count; i++) index[i] = i;
100 |   random_shuffle(index.begin(), index.end());
101 |   random_shuffle(index.begin(), index.end());
102 |   // saving
103 |   saveBinary(DATAFILE ".test.bin.gz", xp, yp, index, 0, DATATEST);  
104 |   saveBinary(DATAFILE ".train.bin.gz", xp, yp, index, DATATEST, count);
105 | }
106 | 


--------------------------------------------------------------------------------
/svm/prep_rcv1.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient (preprocessing)
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | #include <iostream>
 20 | #include <string>
 21 | #include <map>
 22 | #include <vector>
 23 | #include <algorithm>
 24 | #include <cstdlib>
 25 | #include <cmath>
 26 | 
 27 | #include "assert.h"
 28 | #include "vectors.h"
 29 | #include "gzstream.h"
 30 | 
 31 | 
 32 | using namespace std;
 33 | 
 34 | #if defined(_GXX_EXPERIMENTAL_CXX0X__)
 35 | # include <unordered_map>
 36 | # define hash_map unordered_map
 37 | #elsif defined(__GNUC__)
 38 | # include <ext/hash_map>
 39 | using __gnu_cxx::hash_map;
 40 | namespace __gnu_cxx {
 41 |   template<>
 42 |   struct hash<string> {
 43 |     hash<char*> h;
 44 |     inline size_t operator()(const string &s) const { return h(s.c_str()); };
 45 |   };
 46 | };
 47 | #else
 48 | # define hash_map map
 49 | #endif
 50 | 
 51 | 
 52 | typedef hash_map<int,bool> classes_t;
 53 | classes_t classes;
 54 | 
 55 | void
 56 | readClasses(const char *fname)
 57 | {
 58 |   cout << "# Reading " << fname << endl;
 59 |   igzstream f;
 60 |   f.open(fname);
 61 |   if (! f.good())
 62 |     assertfail("Cannot open file " << fname);
 63 |   classes.clear();
 64 |   for(;;) {
 65 |     string topic;
 66 |     int id, rev;
 67 |     f >> topic >> id >> rev;
 68 |     if (! f.good())
 69 |       break;
 70 |     if (topic == "CCAT")
 71 |       classes[id] = true;
 72 |     else if (classes.find(id) == classes.end())
 73 |       classes[id] = false;
 74 |   }
 75 |   if (!f.eof())
 76 |     assertfail("Failed reading " << fname);
 77 |   int pcount = 0;
 78 |   int ncount = 0;
 79 |   for (classes_t::const_iterator it=classes.begin(); it!=classes.end(); it++)
 80 |     if (it->second)
 81 |       pcount++;
 82 |     else
 83 |       ncount++;
 84 |   cout << "# Done reading " 
 85 |        << pcount << " positives and " 
 86 |        << ncount << " negatives. " << endl;
 87 | }
 88 | 
 89 | 
 90 | 
 91 | typedef hash_map<string, int> dico_t;
 92 | dico_t dico;
 93 | 
 94 | typedef hash_map<int, SVector> docs_t;
 95 | docs_t train;
 96 | docs_t test;
 97 | 
 98 | 
 99 | void 
100 | readDocs(const char *fname, docs_t &docs, bool freezedico=false)
101 | {
102 |   cerr << "# Reading " << fname << endl;
103 | 
104 |   igzstream f;
105 |   f.open(fname);
106 |   if (! f.good())
107 |     assertfail("Cannot open file " << fname);
108 |   string token;
109 |   f >> token;
110 |   if (token != ".I")
111 |     assertfail("Cannot read initial .I in " << fname);
112 |   int id = 0;
113 |   int count = 0;
114 |   while(f.good())
115 |     {
116 |       f >> id >> token;
117 |       count += 1;
118 |       if (! f.good() || token != ".W")
119 |         assertfail("Cannot read \"<id> .W\".");
120 |       int wid = -1;
121 |       string otoken;
122 |       SVector s;
123 |       for(;;)
124 |         {
125 |           f >> token;
126 |           if (!f.good() || token == ".I")
127 |             break;
128 |           if (token != otoken)
129 |             {
130 |               dico_t::iterator it = dico.find(token);
131 |               if (it != dico.end())
132 |                 wid = it->second;
133 |               else if (freezedico)
134 |                 continue;
135 |               else
136 |                 {
137 |                   wid = dico.size() + 1;
138 |                   dico[token] = wid;
139 |                 }
140 |               otoken = token;
141 |             }
142 |           s.set(wid, s.get(wid)+1.0);
143 |         }
144 |       if (s.npairs() <= 0)
145 |         assertfail("Empty vector " << id << "?");
146 |       docs[id] = s;
147 |     }
148 |   if (!f.eof())
149 |     assertfail("Failed reading words");
150 |   cerr << "# Done reading " << count << " documents." << endl;
151 | }
152 | 
153 | 
154 | typedef vector<int> intvector_t;
155 | intvector_t trainid;
156 | intvector_t testid;
157 | 
158 | void
159 | listKeys(docs_t &docs, intvector_t &ivec, bool shuffle=false)
160 | {
161 |   ivec.clear();
162 |   for (docs_t::iterator it = docs.begin(); it != docs.end(); it++)
163 |     ivec.push_back(it->first);
164 |   if (shuffle)
165 |     random_shuffle(ivec.begin(), ivec.end());
166 | }
167 | 
168 | 
169 | 
170 | void 
171 | computeNormalizedTfIdf()
172 | {
173 |   cerr << "# Computing document frequencies" << endl;
174 | 
175 |   int terms = dico.size();
176 |   vector<double> nt(terms+1);
177 |   
178 |   double nd = trainid.size();
179 |   for(int i=0; i<terms+1; i++)
180 |     nt[i] = 0;
181 |   for(int i=0; i<(int)trainid.size(); i++)
182 |     {
183 |       int id = trainid[i];
184 |       SVector s = train[id];
185 |       for (const SVector::Pair *p = s; p->i >= 0; p++)
186 |         if (p->v > 0)
187 |           nt[p->i] += 1;
188 |     }
189 |   
190 |   cerr << "# Computing TF/IDF for training set" << endl;
191 |   for(int i=0; i<(int)trainid.size(); i++)
192 |     {
193 |       int id = trainid[i];
194 |       SVector s = train[id];
195 |       SVector v;
196 |       for (const SVector::Pair *p = s; p->i >= 0; p++)
197 |         if (nt[p->i] > 0)
198 |           v.set(p->i, (1.0 + log(p->v)) * log(nd/nt[p->i]));
199 |       double norm = dot(v,v);
200 |       v.scale(1.0 / sqrt(norm));
201 |       train[id] = v;
202 |     }
203 |   cerr << "# Computing TF/IDF for testing set" << endl;
204 |   for(int i=0; i<(int)testid.size(); i++)
205 |     {
206 |       int id = testid[i];
207 |       SVector s = test[id];
208 |       SVector v;
209 |       for (const SVector::Pair *p = s; p->i >= 0; p++)
210 |         if (nt[p->i] > 0)
211 |           v.set(p->i, (1.0 + log(p->v)) * log(nd/nt[p->i]));
212 |       double norm = dot(v,v);
213 |       v.scale(1.0 / sqrt(norm));
214 |       test[id] = v;
215 |     }
216 |   cerr << "# Done." << endl;
217 | }
218 | 
219 | 
220 | 
221 | 
222 | void
223 | saveBinary(const char *fname, docs_t &docs, intvector_t &ids)
224 | {
225 |   cerr << "# Writing " << fname << "."  << endl;
226 |   ogzstream f;
227 |   f.open(fname);
228 |   if (! f.good())
229 |     assertfail("ERROR: cannot open " << fname << " for writing.");
230 |   int pcount = 0;
231 |   int ncount = 0;
232 |   int npairs = 0;
233 |   for(int i=0; i<(int)ids.size(); i++)
234 |     {
235 |       int id = ids[i];
236 |       bool y = classes[id];
237 |       if (y)
238 |         pcount += 1;
239 |       else
240 |         ncount += 1;
241 |       SVector s = docs[id];
242 |       int p = s.npairs();
243 |       npairs += p;
244 |       if (p <= 0)
245 |         {
246 |           cerr << "ERROR: empty vector " << id << "." << endl;
247 |           ::exit(10);
248 |         }
249 |       
250 |       f.put( y ? 1 : 0);
251 |       s.save(f);
252 |       if (! f.good())
253 |         {
254 |           cerr << "ERROR: writing " << fname << " for writing." << endl;
255 |           ::exit(10);
256 |         }
257 |     }
258 | 
259 |   cerr << "# Done. Wrote " << ids.size() << " examples." << endl;
260 |   cerr << "#   with " << npairs << " pairs, " 
261 |        << pcount << " positives, and "
262 |        << ncount << " negatives." << endl;
263 | }
264 | 
265 | 
266 | void
267 | saveSvmLight(const char *fname, docs_t &docs, intvector_t &ids)
268 | {
269 |   cerr << "# Writing " << fname << "."  << endl;
270 |   ogzstream f;
271 |   f.open(fname);
272 |   if (! f.good())
273 |     assertfail("ERROR: cannot open " << fname << " for writing.");
274 |   for(int i=0; i<(int)ids.size(); i++)
275 |     {
276 |       int id = ids[i];
277 |       bool y = classes[id];
278 |       SVector s = docs[id];
279 |       int p = s.npairs();
280 |       if (p <= 0)
281 |         {
282 |           cerr << "ERROR: empty vector " << id << "." << endl;
283 |           ::exit(10);
284 |         }
285 |       f << ((y) ? +1 : -1);
286 |       f << s;
287 |       if (! f.good())
288 |         {
289 |           cerr << "ERROR: writing " << fname << " for writing." << endl;
290 |           ::exit(10);
291 |         }
292 |     }
293 |   
294 |   cerr << "# Done. Wrote " << ids.size() << " examples." << endl;
295 | }
296 | 
297 | 
298 | 
299 | #define DATADIR "../data/rcv1/"
300 | 
301 | int 
302 | main(int, const char**)
303 | {
304 |   readClasses(DATADIR "rcv1-v2.topics.qrels.gz");
305 | 
306 |   readDocs(DATADIR "lyrl2004_tokens_train.dat.gz", test);
307 |   cerr << "# Dictionary size (so far) " << dico.size() << endl;
308 | 
309 |   // We freeze the dictionary at this point.
310 |   // As a result we only use features common 
311 |   // to both the training and testing set.
312 |   // This is consistent with joachims svmperf experiments.
313 |   readDocs(DATADIR "lyrl2004_tokens_test_pt0.dat.gz", train, true);
314 |   readDocs(DATADIR "lyrl2004_tokens_test_pt1.dat.gz", train, true);
315 |   readDocs(DATADIR "lyrl2004_tokens_test_pt2.dat.gz", train, true);
316 |   readDocs(DATADIR "lyrl2004_tokens_test_pt3.dat.gz", train, true);
317 |   
318 |   cerr << "# Got " << test.size() << " testing documents." << endl;
319 |   cerr << "# Got " << train.size() << " training documents." << endl;
320 |   cerr << "# Dictionary size " << dico.size() << endl;
321 |   
322 |   listKeys(train, trainid, true);
323 |   listKeys(test, testid);
324 |   computeNormalizedTfIdf();
325 | 
326 |   saveBinary("rcv1.train.bin.gz", train, trainid);
327 |   saveBinary("rcv1.test.bin.gz", test, testid);
328 | #ifdef PREP_SVMLIGHT
329 |   saveSvmLight("rcv1.train.txt.gz", train, trainid);
330 |   saveSvmLight("rcv1.test.txt.gz", test, testid);
331 | #endif
332 |   cerr << "# The End." << endl;
333 | }
334 | 


--------------------------------------------------------------------------------
/svm/prep_webspam.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient (preprocessing)
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | 
 20 | #include <iostream>
 21 | #include <string>
 22 | #include <map>
 23 | #include <vector>
 24 | #include <algorithm>
 25 | #include <cstdlib>
 26 | #include <cmath>
 27 | 
 28 | #include "assert.h"
 29 | #include "vectors.h"
 30 | #include "gzstream.h"
 31 | 
 32 | using namespace std;
 33 | 
 34 | #define DATADIR "../data/pascal/"
 35 | #define DATAFILE "webspam"
 36 | #define DATATEST  100000
 37 | 
 38 | typedef vector<SVector> xvec_t;
 39 | typedef vector<double> yvec_t;
 40 | 
 41 | int 
 42 | load(const char *fname, xvec_t &xp, yvec_t &yp)
 43 | {
 44 |   cerr << "# Reading " << fname << endl;
 45 |   ifstream f(fname);
 46 |   if (! f.good())
 47 |     assertfail("Cannot open file " << fname);
 48 |   int count = 0;
 49 |   while (f.good())
 50 |     {
 51 |       double y;
 52 |       SVector x;
 53 |       f >> y >> x;
 54 |       if (f.good())
 55 |         {
 56 |           xp.push_back(x);
 57 |           yp.push_back(y);
 58 |           count += 1;
 59 |         }
 60 |     }
 61 |   if (! f.eof())
 62 |     assertfail("Failed reading " << fname);
 63 |   cerr << "# Done reading " << count << " examples." << endl;
 64 |   return count;
 65 | }
 66 | 
 67 | void
 68 | saveBinary(const char *fname, xvec_t &xp, yvec_t &yp, 
 69 |            vector<int> &index, int imin, int imax)
 70 | {
 71 |   cerr << "# Writing " << fname << endl;
 72 |   ogzstream f;
 73 |   f.open(fname);
 74 |   if (! f.good())
 75 |     assertfail("ERROR: cannot open " << fname << " for writing.");
 76 |   int count = 0;
 77 |   for (int ii=imin; ii<imax; ii++)
 78 |     {
 79 |       int i = index[ii];
 80 |       double y = yp[i];
 81 |       SVector x = xp[i];
 82 |       f.put((y >= 0) ? 1 : 0);
 83 |       x.save(f);
 84 |       count += 1;
 85 |     }
 86 |   cerr << "# Wrote " << count << " examples." << endl;
 87 | }
 88 | 
 89 | 
 90 | int main(int, const char**)
 91 | {
 92 |   // load data
 93 |   vector<SVector> xp;
 94 |   vector<double> yp;
 95 |   int count = load(DATADIR DATAFILE ".txt", xp, yp);
 96 |   // compute random shuffle
 97 |   cerr << "# Shuffling" << endl;
 98 |   vector<int> index(count);
 99 |   for (int i=0; i<count; i++) index[i] = i;
100 |   random_shuffle(index.begin(), index.end());
101 |   random_shuffle(index.begin(), index.end());
102 |   // saving
103 |   saveBinary(DATAFILE ".test.bin.gz", xp, yp, index, 0, DATATEST);  
104 |   saveBinary(DATAFILE ".train.bin.gz", xp, yp, index, DATATEST, count);
105 | }
106 | 


--------------------------------------------------------------------------------
/svm/svmasgd.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with averaged stochastic gradient (ASGD)
  3 | // Copyright (C) 2010- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | #include <algorithm>
 20 | #include <iostream>
 21 | #include <iomanip>
 22 | #include <string>
 23 | #include <map>
 24 | #include <vector>
 25 | #include <cstdlib>
 26 | #include <cmath>
 27 | 
 28 | #include "assert.h"
 29 | #include "vectors.h"
 30 | #include "gzstream.h"
 31 | #include "timer.h"
 32 | #include "loss.h"
 33 | #include "data.h"
 34 | 
 35 | using namespace std;
 36 | 
 37 | // ---- Loss function
 38 | // Compile with -DLOSS=xxxx to define the loss function.
 39 | // Loss functions are defined in file loss.h)
 40 | #ifndef LOSS
 41 | # define LOSS LogLoss
 42 | #endif
 43 | 
 44 | // ---- Bias term
 45 | // Compile with -DBIAS=[1/0] to enable/disable the bias term.
 46 | // Compile with -DREGULARIZED_BIAS=1 to enable regularization on the bias term
 47 | 
 48 | #ifndef BIAS
 49 | # define BIAS 1
 50 | #endif
 51 | #ifndef REGULARIZED_BIAS
 52 | # define REGULARIZED_BIAS 0
 53 | #endif
 54 | 
 55 | 
 56 | 
 57 | // ---- Averaged stochastic gradient descent 
 58 | 
 59 | class SvmAsgd
 60 | {
 61 | public:
 62 |   SvmAsgd(int dim, double lambda, double tstart, double eta0=0);
 63 |   void renorm();
 64 |   double wnorm();
 65 |   double anorm();
 66 |   double testOne(const SVector &x, double y, double *ploss, double *pnerr);
 67 |   void trainOne(const SVector &x, double y, double eta, double mu);
 68 | public:
 69 |   void train(int imin, int imax, const xvec_t &x, const yvec_t &y, const char *prefix = "");
 70 |   void test(int imin, int imax, const xvec_t &x, const yvec_t &y, const char *prefix = "");
 71 | public:
 72 |   double evaluateEta(int imin, int imax, const xvec_t &x, const yvec_t &y, double eta);
 73 |   void determineEta0(int imin, int imax, const xvec_t &x, const yvec_t &y);
 74 | private:
 75 |   double  lambda;
 76 |   double  eta0;
 77 |   double  mu0;
 78 |   double  tstart;
 79 |   FVector w;
 80 |   double  wDivisor;
 81 |   double  wBias;
 82 |   FVector a;
 83 |   double  aDivisor;
 84 |   double  wFraction;
 85 |   double  aBias;
 86 |   double  t;
 87 | };
 88 | 
 89 | /// Constructor
 90 | SvmAsgd::SvmAsgd(int dim, double lambda, double tstart, double eta0)
 91 |   : lambda(lambda), eta0(eta0), mu0(1), tstart(tstart),
 92 |     w(dim), wDivisor(1), wBias(0),
 93 |     a(), aDivisor(1), wFraction(0), aBias(0),
 94 |     t(0)
 95 | {
 96 | }
 97 | 
 98 | /// Renormalize the weights
 99 | void
100 | SvmAsgd::renorm()
101 | {
102 |   if (wDivisor != 1.0 || aDivisor != 1.0 || wFraction != 0)
103 |     {
104 |       a.combine(1/aDivisor, w, wFraction/aDivisor);
105 |       w.scale(1/wDivisor);
106 |       wDivisor = aDivisor = 1;
107 |       wFraction = 0;
108 |     }
109 | }
110 | 
111 | /// Compute the norm of the normal weights
112 | double
113 | SvmAsgd::wnorm()
114 | {
115 |   double norm = dot(w,w) / wDivisor / wDivisor;
116 | #if REGULARIZED_BIAS
117 |   norm += wBias * wBias
118 | #endif
119 |   return norm;
120 | }
121 | 
122 | /// Compute the norm of the averaged weights
123 | double
124 | SvmAsgd::anorm()
125 | {
126 |   renorm(); // this is simpler!
127 |   double norm = dot(a,a);
128 | #if REGULARIZED_BIAS
129 |   norm += aBias * aBias
130 | #endif
131 |   return norm;
132 | }
133 | 
134 | /// Compute the output for one example
135 | double
136 | SvmAsgd::testOne(const SVector &x, double y, double *ploss, double *pnerr)
137 | {
138 |   // Same as dot(a,x) + aBias after renormalization
139 |   double s = dot(a,x);
140 |   if (wFraction != 0) 
141 |     s += dot(w,x) * wFraction;
142 |   s = s / aDivisor + aBias;
143 |   // accumulate loss and errors
144 |   if (ploss)
145 |     *ploss += LOSS::loss(s, y);
146 |   if (pnerr)
147 |     *pnerr += (s * y <= 0) ? 1 : 0;
148 |   return s;
149 | }
150 | 
151 | /// Perform one iteration of the SGD algorithm with specified gain
152 | void
153 | SvmAsgd::trainOne(const SVector &x, double y, double eta, double mu)
154 | {
155 |   // Renormalize if needed
156 |   if (aDivisor > 1e5 || wDivisor > 1e5) renorm();
157 |   // Forward
158 |   double s = dot(w,x) / wDivisor + wBias;
159 |   // SGD update for regularization term
160 |   wDivisor = wDivisor / (1 - eta * lambda);
161 |   // SGD update for loss term
162 |   double d = LOSS::dloss(s, y);
163 |   double etd = eta * d * wDivisor;
164 |   if (etd != 0)
165 |     w.add(x, etd);
166 |   // Averaging
167 |   if (mu >= 1)
168 |     {
169 |       a.clear();
170 |       aDivisor = wDivisor;
171 |       wFraction = 1;
172 |     }
173 |   else if (mu > 0)
174 |     {
175 |       if (etd != 0)
176 |         a.add(x, - wFraction * etd);
177 |       aDivisor = aDivisor / (1 - mu);
178 |       wFraction = wFraction + mu * aDivisor / wDivisor;
179 |     }
180 |   // same for the bias
181 | #if BIAS
182 |   double etab = eta * 0.01;
183 | #if REGULARIZED_BIAS
184 |   wBias *= (1 - etab * lambda);
185 | #endif
186 |   wBias += etab * d;
187 |   aBias += mu * (wBias - aBias);
188 | #endif
189 | }
190 | 
191 | 
192 | /// Perform a training epoch
193 | void 
194 | SvmAsgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix)
195 | {
196 |   cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
197 |   assert(imin <= imax);
198 |   assert(eta0 > 0);
199 |   for (int i=imin; i<=imax; i++)
200 |     {
201 |       double eta = eta0 / pow(1 + lambda * eta0 * t, 0.75);
202 |       double mu = (t <= tstart) ? 1.0 : mu0 / (1 + mu0 * (t - tstart));
203 |       trainOne(xp.at(i), yp.at(i), eta, mu);
204 |       t += 1;
205 |     }
206 |   cout << prefix << setprecision(6) << "wNorm=" << wnorm() << " aNorm=" << anorm();
207 | #if BIAS
208 |   cout << " wBias=" << wBias << " aBias=" << aBias;
209 | #endif
210 |   cout << endl;
211 | }
212 | 
213 | /// Perform a test pass
214 | void 
215 | SvmAsgd::test(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix)
216 | {
217 |   cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl;
218 |   assert(imin <= imax);
219 |   double nerr = 0;
220 |   double loss = 0;
221 |   for (int i=imin; i<=imax; i++)
222 |     testOne(xp.at(i), yp.at(i), &loss, &nerr);
223 |   nerr = nerr / (imax - imin + 1);
224 |   loss = loss / (imax - imin + 1);
225 |   double cost = loss + 0.5 * lambda * wnorm();
226 |   cout << prefix 
227 |        << "Loss=" << setprecision(12) << loss
228 |        << " Cost=" << setprecision(12) << cost 
229 |        << " Misclassification=" << setprecision(4) << 100 * nerr << "%." 
230 |        << endl;
231 | }
232 | 
233 | /// Perform one epoch with fixed eta and return cost
234 | 
235 | double 
236 | SvmAsgd::evaluateEta(int imin, int imax, const xvec_t &xp, const yvec_t &yp, double eta)
237 | {
238 |   SvmAsgd clone(*this); // take a copy of the current state
239 |   assert(imin <= imax);
240 |   for (int i=imin; i<=imax; i++)
241 |     clone.trainOne(xp.at(i), yp.at(i), eta, 1.0);
242 |   double loss = 0;
243 |   double cost = 0;
244 |   for (int i=imin; i<=imax; i++)
245 |     clone.testOne(xp.at(i), yp.at(i), &loss, 0);
246 |   loss = loss / (imax - imin + 1);
247 |   cost = loss + 0.5 * lambda * clone.wnorm();
248 |   // cout << "Trying eta=" << eta << " yields cost " << cost << endl;
249 |   return cost;
250 | }
251 | 
252 | void 
253 | SvmAsgd::determineEta0(int imin, int imax, const xvec_t &xp, const yvec_t &yp)
254 | {
255 |   const double factor = 2.0;
256 |   double loEta = 1;
257 |   double loCost = evaluateEta(imin, imax, xp, yp, loEta);
258 |   double hiEta = loEta * factor;
259 |   double hiCost = evaluateEta(imin, imax, xp, yp, hiEta);
260 |   if (loCost < hiCost)
261 |     while (loCost < hiCost)
262 |       {
263 |         hiEta = loEta;
264 |         hiCost = loCost;
265 |         loEta = hiEta / factor;
266 |         loCost = evaluateEta(imin, imax, xp, yp, loEta);
267 |       }
268 |   else if (hiCost < loCost)
269 |     while (hiCost < loCost)
270 |       {
271 |         loEta = hiEta;
272 |         loCost = hiCost;
273 |         hiEta = loEta * factor;
274 |         hiCost = evaluateEta(imin, imax, xp, yp, hiEta);
275 |       }
276 |   eta0 = loEta;
277 |   cout << "# Using eta0=" << eta0 << endl;
278 | }
279 | 
280 | 
281 | // --- Command line arguments
282 | 
283 | const char *trainfile = 0;
284 | const char *testfile = 0;
285 | bool normalize = true;
286 | double lambda = 1e-5;
287 | int epochs = 5;
288 | int maxtrain = -1;
289 | int avgstart = 1;
290 | 
291 | 
292 | void
293 | usage(const char *progname)
294 | {
295 |   const char *s = ::strchr(progname,'/');
296 |   progname = (s) ? s + 1 : progname;
297 |   cerr << "Usage: " << progname << " [options] trainfile [testfile]" << endl
298 |        << "Options:" << endl;
299 | #define NAM(n) "    " << setw(16) << left << n << setw(0) << ": "
300 | #define DEF(v) " (default: " << v << ".)"
301 |   cerr << NAM("-lambda x")
302 |        << "Regularization parameter" << DEF(lambda) << endl
303 |        << NAM("-epochs n")
304 |        << "Number of training epochs" << DEF(epochs) << endl
305 |        << NAM("-dontnormalize")
306 |        << "Do not normalize the L2 norm of patterns." << endl
307 |        << NAM("-maxtrain n")
308 |        << "Restrict training set to n examples." << endl
309 |        << NAM("-avgstart x")
310 |        << "Only start averaging after x epochs." << DEF(avgstart) << endl;
311 | #undef NAM
312 | #undef DEF
313 |   ::exit(10);
314 | }
315 | 
316 | void
317 | parse(int argc, const char **argv)
318 | {
319 |   for (int i=1; i<argc; i++)
320 |     {
321 |       const char *arg = argv[i];
322 |       if (arg[0] != '-')
323 |         {
324 |           if (trainfile == 0)
325 |             trainfile = arg;
326 |           else if (testfile == 0)
327 |             testfile = arg;
328 |           else
329 |             usage(argv[0]);
330 |         }
331 |       else
332 |         {
333 |           while (arg[0] == '-') 
334 |             arg += 1;
335 |           string opt = arg;
336 |           if (opt == "lambda" && i+1<argc)
337 |             {
338 |               lambda = atof(argv[++i]);
339 |               assert(lambda>0 && lambda<1e4);
340 |             }
341 |           else if (opt == "epochs" && i+1<argc)
342 |             {
343 |               epochs = atoi(argv[++i]);
344 |               assert(epochs>0 && epochs<1e6);
345 |             }
346 |           else if (opt == "dontnormalize")
347 |             {
348 |               normalize = false;
349 |             }
350 |           else if (opt == "maxtrain" && i+1 < argc)
351 |             {
352 |               maxtrain = atoi(argv[++i]);
353 |               assert(maxtrain > 0);
354 |             }
355 |           else if (opt == "avgstart" && i+1 < argc)
356 |             {
357 |               avgstart = atof(argv[++i]);
358 |               assert(avgstart > 0);
359 |             }
360 |           else
361 |             {
362 |               cerr << "Option " << argv[i] << " not recognized." << endl;
363 |               usage(argv[0]);
364 |             }
365 | 
366 |         }
367 |     }
368 |   if (! trainfile)
369 |     usage(argv[0]);
370 | }
371 | 
372 | void 
373 | config(const char *progname)
374 | {
375 |   cout << "# Running: " << progname;
376 |   cout << " -lambda " << lambda;
377 |   cout << " -epochs " << epochs;
378 |   cout << " -avgstart " << avgstart;
379 |   if (! normalize) cout << " -dontnormalize";
380 |   if (maxtrain > 0) cout << " -maxtrain " << maxtrain;
381 |   cout << endl;
382 | #define NAME(x) #x
383 | #define NAME2(x) NAME(x)
384 |   cout << "# Compiled with: "
385 |        << " -DLOSS=" << NAME2(LOSS)
386 |        << " -DBIAS=" << BIAS
387 |        << " -DREGULARIZED_BIAS=" << REGULARIZED_BIAS
388 |        << endl;
389 | }
390 | 
391 | // --- main function
392 | 
393 | int dims;
394 | xvec_t xtrain;
395 | yvec_t ytrain;
396 | xvec_t xtest;
397 | yvec_t ytest;
398 | 
399 | int main(int argc, const char **argv)
400 | {
401 |   parse(argc, argv);
402 |   config(argv[0]);
403 |   if (trainfile)
404 |     load_datafile(trainfile, xtrain, ytrain, dims, normalize, maxtrain);
405 |   if (testfile)
406 |     load_datafile(testfile, xtest, ytest, dims, normalize);
407 |   cout << "# Number of features " << dims << "." << endl;
408 |   // prepare svm
409 |   int imin = 0;
410 |   int imax = xtrain.size() - 1;
411 |   int tmin = 0;
412 |   int tmax = xtest.size() - 1;
413 |   SvmAsgd svm(dims, lambda, avgstart * (imax-imin+1));
414 |   Timer timer;
415 |   // determine eta0 using sample
416 |   int smin = 0;
417 |   int smax = imin + min(1000, imax);
418 |   timer.start();
419 |   svm.determineEta0(smin, smax, xtrain, ytrain);
420 |   timer.stop();
421 |   // train
422 |   for(int i=0; i<epochs; i++)
423 |     {
424 |       cout << "--------- Epoch " << i+1 << "." << endl;
425 |       timer.start();
426 |       svm.train(imin, imax, xtrain, ytrain);
427 |       timer.stop();
428 |       cout << "Total training time " << setprecision(6) 
429 |            << timer.elapsed() << " secs." << endl;
430 |       svm.test(imin, imax, xtrain, ytrain, "train: ");
431 |       if (tmax >= tmin)
432 |         svm.test(tmin, tmax, xtest, ytest, "test:  ");
433 |     }
434 |   svm.renorm();
435 |   // Linear classifier is in svm.a and svm.aBias
436 |   return 0;
437 | }
438 | 


--------------------------------------------------------------------------------
/svm/svmsgd.cpp:
--------------------------------------------------------------------------------
  1 | // -*- C++ -*-
  2 | // SVM with stochastic gradient
  3 | // Copyright (C) 2007- Leon Bottou
  4 | 
  5 | // This program is free software; you can redistribute it and/or
  6 | // modify it under the terms of the GNU Lesser General Public
  7 | // License as published by the Free Software Foundation; either
  8 | // version 2.1 of the License, or (at your option) any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program; if not, write to the Free Software
 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
 18 | 
 19 | #include <iostream>
 20 | #include <iomanip>
 21 | #include <string>
 22 | #include <map>
 23 | #include <vector>
 24 | #include <cstdlib>
 25 | #include <cmath>
 26 | 
 27 | #include "assert.h"
 28 | #include "vectors.h"
 29 | #include "gzstream.h"
 30 | #include "timer.h"
 31 | #include "loss.h"
 32 | #include "data.h"
 33 | 
 34 | using namespace std;
 35 | 
 36 | // ---- Loss function
 37 | 
 38 | // Compile with -DLOSS=xxxx to define the loss function.
 39 | // Loss functions are defined in file loss.h)
 40 | #ifndef LOSS
 41 | # define LOSS LogLoss
 42 | #endif
 43 | 
 44 | // ---- Bias term
 45 | 
 46 | // Compile with -DBIAS=[1/0] to enable/disable the bias term.
 47 | // Compile with -DREGULARIZED_BIAS=1 to enable regularization on the bias term
 48 | 
 49 | #ifndef BIAS
 50 | # define BIAS 1
 51 | #endif
 52 | #ifndef REGULARIZED_BIAS
 53 | # define REGULARIZED_BIAS 0
 54 | #endif
 55 | 
 56 | // ---- Plain stochastic gradient descent
 57 | 
 58 | class SvmSgd
 59 | {
 60 | public:
 61 |   SvmSgd(int dim, double lambda, double eta0  = 0);
 62 |   void renorm();
 63 |   double wnorm();
 64 |   double testOne(const SVector &x, double y, double *ploss, double *pnerr);
 65 |   void trainOne(const SVector &x, double y, double eta);
 66 | public:
 67 |   void train(int imin, int imax, const xvec_t &x, const yvec_t &y, const char *prefix = "");
 68 |   void test(int imin, int imax, const xvec_t &x, const yvec_t &y, const char *prefix = "");
 69 | public:
 70 |   double evaluateEta(int imin, int imax, const xvec_t &x, const yvec_t &y, double eta);
 71 |   void determineEta0(int imin, int imax, const xvec_t &x, const yvec_t &y);
 72 | private:
 73 |   double  lambda;
 74 |   double  eta0;
 75 |   FVector w;
 76 |   double  wDivisor;
 77 |   double  wBias;
 78 |   double  t;
 79 | };
 80 | 
 81 | /// Constructor
 82 | SvmSgd::SvmSgd(int dim, double lambda, double eta0)
 83 |   : lambda(lambda), eta0(eta0), 
 84 |     w(dim), wDivisor(1), wBias(0),
 85 |     t(0)
 86 | {
 87 | }
 88 | 
 89 | /// Renormalize the weights
 90 | void
 91 | SvmSgd::renorm()
 92 | {
 93 |   if (wDivisor != 1.0)
 94 |     {
 95 |       w.scale(1.0 / wDivisor);
 96 |       wDivisor = 1.0;
 97 |     }
 98 | }
 99 | 
100 | /// Compute the norm of the weights
101 | double
102 | SvmSgd::wnorm()
103 | {
104 |   double norm = dot(w,w) / wDivisor / wDivisor;
105 | #if REGULARIZED_BIAS
106 |   norm += wBias * wBias
107 | #endif
108 |   return norm;
109 | }
110 | 
111 | /// Compute the output for one example.
112 | double
113 | SvmSgd::testOne(const SVector &x, double y, double *ploss, double *pnerr)
114 | {
115 |   double s = dot(w,x) / wDivisor + wBias;
116 |   if (ploss)
117 |     *ploss += LOSS::loss(s, y);
118 |   if (pnerr)
119 |     *pnerr += (s * y <= 0) ? 1 : 0;
120 |   return s;
121 | }
122 | 
123 | /// Perform one iteration of the SGD algorithm with specified gains
124 | void
125 | SvmSgd::trainOne(const SVector &x, double y, double eta)
126 | {
127 |   double s = dot(w,x) / wDivisor + wBias;
128 |   // update for regularization term
129 |   wDivisor = wDivisor / (1 - eta * lambda);
130 |   if (wDivisor > 1e5) renorm();
131 |   // update for loss term
132 |   double d = LOSS::dloss(s, y);
133 |   if (d != 0)
134 |     w.add(x, eta * d * wDivisor);
135 |   // same for the bias
136 | #if BIAS
137 |   double etab = eta * 0.01;
138 | #if REGULARIZED_BIAS
139 |   wBias *= (1 - etab * lambda);
140 | #endif
141 |   wBias += etab * d;
142 | #endif
143 | }
144 | 
145 | 
146 | /// Perform a training epoch
147 | void 
148 | SvmSgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix)
149 | {
150 |   cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
151 |   assert(imin <= imax);
152 |   assert(eta0 > 0);
153 |   for (int i=imin; i<=imax; i++)
154 |     {
155 |       double eta = eta0 / (1 + lambda * eta0 * t);
156 |       trainOne(xp.at(i), yp.at(i), eta);
157 |       t += 1;
158 |     }
159 |   cout << prefix << setprecision(6) << "wNorm=" << wnorm();
160 | #if BIAS
161 |   cout << " wBias=" << wBias;
162 | #endif
163 |   cout << endl;
164 | }
165 | 
166 | /// Perform a test pass
167 | void 
168 | SvmSgd::test(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix)
169 | {
170 |   cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl;
171 |   assert(imin <= imax);
172 |   double nerr = 0;
173 |   double loss = 0;
174 |   for (int i=imin; i<=imax; i++)
175 |     testOne(xp.at(i), yp.at(i), &loss, &nerr);
176 |   nerr = nerr / (imax - imin + 1);
177 |   loss = loss / (imax - imin + 1);
178 |   double cost = loss + 0.5 * lambda * wnorm();
179 |   cout << prefix 
180 |        << "Loss=" << setprecision(12) << loss
181 |        << " Cost=" << setprecision(12) << cost 
182 |        << " Misclassification=" << setprecision(4) << 100 * nerr << "%." 
183 |        << endl;
184 | }
185 | 
186 | /// Perform one epoch with fixed eta and return cost
187 | 
188 | double 
189 | SvmSgd::evaluateEta(int imin, int imax, const xvec_t &xp, const yvec_t &yp, double eta)
190 | {
191 |   SvmSgd clone(*this); // take a copy of the current state
192 |   assert(imin <= imax);
193 |   for (int i=imin; i<=imax; i++)
194 |     clone.trainOne(xp.at(i), yp.at(i), eta);
195 |   double loss = 0;
196 |   double cost = 0;
197 |   for (int i=imin; i<=imax; i++)
198 |     clone.testOne(xp.at(i), yp.at(i), &loss, 0);
199 |   loss = loss / (imax - imin + 1);
200 |   cost = loss + 0.5 * lambda * clone.wnorm();
201 |   // cout << "Trying eta=" << eta << " yields cost " << cost << endl;
202 |   return cost;
203 | }
204 | 
205 | void 
206 | SvmSgd::determineEta0(int imin, int imax, const xvec_t &xp, const yvec_t &yp)
207 | {
208 |   const double factor = 2.0;
209 |   double loEta = 1;
210 |   double loCost = evaluateEta(imin, imax, xp, yp, loEta);
211 |   double hiEta = loEta * factor;
212 |   double hiCost = evaluateEta(imin, imax, xp, yp, hiEta);
213 |   if (loCost < hiCost)
214 |     while (loCost < hiCost)
215 |       {
216 |         hiEta = loEta;
217 |         hiCost = loCost;
218 |         loEta = hiEta / factor;
219 |         loCost = evaluateEta(imin, imax, xp, yp, loEta);
220 |       }
221 |   else if (hiCost < loCost)
222 |     while (hiCost < loCost)
223 |       {
224 |         loEta = hiEta;
225 |         loCost = hiCost;
226 |         hiEta = loEta * factor;
227 |         hiCost = evaluateEta(imin, imax, xp, yp, hiEta);
228 |       }
229 |   eta0 = loEta;
230 |   cout << "# Using eta0=" << eta0 << endl;
231 | }
232 | 
233 | 
234 | // --- Command line arguments
235 | 
236 | const char *trainfile = 0;
237 | const char *testfile = 0;
238 | bool normalize = true;
239 | double lambda = 1e-5;
240 | int epochs = 5;
241 | int maxtrain = -1;
242 | 
243 | 
244 | void
245 | usage(const char *progname)
246 | {
247 |   const char *s = ::strchr(progname,'/');
248 |   progname = (s) ? s + 1 : progname;
249 |   cerr << "Usage: " << progname << " [options] trainfile [testfile]" << endl
250 |        << "Options:" << endl;
251 | #define NAM(n) "    " << setw(16) << left << n << setw(0) << ": "
252 | #define DEF(v) " (default: " << v << ".)"
253 |   cerr << NAM("-lambda x")
254 |        << "Regularization parameter" << DEF(lambda) << endl
255 |        << NAM("-epochs n")
256 |        << "Number of training epochs" << DEF(epochs) << endl
257 |        << NAM("-dontnormalize")
258 |        << "Do not normalize the L2 norm of patterns." << endl
259 |        << NAM("-maxtrain n")
260 |        << "Restrict training set to n examples." << endl;
261 | #undef NAM
262 | #undef DEF
263 |   ::exit(10);
264 | }
265 | 
266 | void
267 | parse(int argc, const char **argv)
268 | {
269 |   for (int i=1; i<argc; i++)
270 |     {
271 |       const char *arg = argv[i];
272 |       if (arg[0] != '-')
273 |         {
274 |           if (trainfile == 0)
275 |             trainfile = arg;
276 |           else if (testfile == 0)
277 |             testfile = arg;
278 |           else
279 |             usage(argv[0]);
280 |         }
281 |       else
282 |         {
283 |           while (arg[0] == '-') 
284 |             arg += 1;
285 |           string opt = arg;
286 |           if (opt == "lambda" && i+1<argc)
287 |             {
288 |               lambda = atof(argv[++i]);
289 |               assert(lambda>0 && lambda<1e4);
290 |             }
291 |           else if (opt == "epochs" && i+1<argc)
292 |             {
293 |               epochs = atoi(argv[++i]);
294 |               assert(epochs>0 && epochs<1e6);
295 |             }
296 |           else if (opt == "dontnormalize")
297 |             {
298 |               normalize = false;
299 |             }
300 |           else if (opt == "maxtrain" && i+1 < argc)
301 |             {
302 |               maxtrain = atoi(argv[++i]);
303 |               assert(maxtrain > 0);
304 |             }
305 |           else
306 |             {
307 |               cerr << "Option " << argv[i] << " not recognized." << endl;
308 |               usage(argv[0]);
309 |             }
310 | 
311 |         }
312 |     }
313 |   if (! trainfile)
314 |     usage(argv[0]);
315 | }
316 | 
317 | void 
318 | config(const char *progname)
319 | {
320 |   cout << "# Running: " << progname;
321 |   cout << " -lambda " << lambda;
322 |   cout << " -epochs " << epochs;
323 |   if (! normalize) cout << " -dontnormalize";
324 |   if (maxtrain > 0) cout << " -maxtrain " << maxtrain;
325 |   cout << endl;
326 | #define NAME(x) #x
327 | #define NAME2(x) NAME(x)
328 |   cout << "# Compiled with: "
329 |        << " -DLOSS=" << NAME2(LOSS)
330 |        << " -DBIAS=" << BIAS
331 |        << " -DREGULARIZED_BIAS=" << REGULARIZED_BIAS
332 |        << endl;
333 | }
334 | 
335 | // --- main function
336 | 
337 | int dims;
338 | xvec_t xtrain;
339 | yvec_t ytrain;
340 | xvec_t xtest;
341 | yvec_t ytest;
342 | 
343 | int main(int argc, const char **argv)
344 | {
345 |   parse(argc, argv);
346 |   config(argv[0]);
347 |   if (trainfile)
348 |     load_datafile(trainfile, xtrain, ytrain, dims, normalize, maxtrain);
349 |   if (testfile)
350 |     load_datafile(testfile, xtest, ytest, dims, normalize);
351 |   cout << "# Number of features " << dims << "." << endl;
352 |   // prepare svm
353 |   int imin = 0;
354 |   int imax = xtrain.size() - 1;
355 |   int tmin = 0;
356 |   int tmax = xtest.size() - 1;
357 |   SvmSgd svm(dims, lambda);
358 |   Timer timer;
359 |   // determine eta0 using sample
360 |   int smin = 0;
361 |   int smax = imin + min(1000, imax);
362 |   timer.start();
363 |   svm.determineEta0(smin, smax, xtrain, ytrain);
364 |   timer.stop();
365 |   // train
366 |   for(int i=0; i<epochs; i++)
367 |     {
368 |       cout << "--------- Epoch " << i+1 << "." << endl;
369 |       timer.start();
370 |       svm.train(imin, imax, xtrain, ytrain);
371 |       timer.stop();
372 |       cout << "Total training time " << setprecision(6) 
373 |            << timer.elapsed() << " secs." << endl;
374 |       svm.test(imin, imax, xtrain, ytrain, "train: ");
375 |       if (tmax >= tmin)
376 |         svm.test(tmin, tmax, xtest, ytest, "test:  ");
377 |     }
378 |   return 0;
379 | }
380 | 


--------------------------------------------------------------------------------
/win/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | This directory contains a visual c++ project (vc9) for compiling the svm and crf programs.
 3 | After compiling, you must copy the executables (found in the Release directory) into
 4 | their respective source code directories "svm" and "crf".
 5 | 
 6 | The zlib library is a prerequisite:
 7 | - Download the source code of zlib from "http://zlib.net".
 8 | - Compile using  "nmake -f win32\Makefile.msc OBJA=inffast.obj".
 9 | - Copy the files zlib.h zconf.h and zlib.lib into the subdirectory "zlib"
10 | 
11 | 


--------------------------------------------------------------------------------
/win/crfasgd/crfasgd.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="crfasgd"
  6 | 	ProjectGUID="{E3102F60-8536-4A6E-B7C0-F68B60974832}"
  7 | 	RootNamespace="crfasgd"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\crf&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\crf&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\crf\crfasgd.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\lib\gzstream.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\lib\matrices.cpp"
187 | 				>
188 | 			</File>
189 | 			<File
190 | 				RelativePath="..\..\lib\pstream.cpp"
191 | 				>
192 | 			</File>
193 | 			<File
194 | 				RelativePath="..\..\lib\timer.cpp"
195 | 				>
196 | 			</File>
197 | 			<File
198 | 				RelativePath="..\..\lib\vectors.cpp"
199 | 				>
200 | 			</File>
201 | 		</Filter>
202 | 		<Filter
203 | 			Name="Header Files"
204 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
205 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
206 | 			>
207 | 		</Filter>
208 | 		<Filter
209 | 			Name="Resource Files"
210 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
211 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
212 | 			>
213 | 		</Filter>
214 | 	</Files>
215 | 	<Globals>
216 | 	</Globals>
217 | </VisualStudioProject>
218 | 


--------------------------------------------------------------------------------
/win/crfsgd/crfsgd.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="crfsgd"
  6 | 	ProjectGUID="{DCEB5351-BB55-4E97-AEB9-CBC3341000FD}"
  7 | 	RootNamespace="crfsgd"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\crf&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\crf&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\crf\crfsgd.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\lib\gzstream.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\lib\matrices.cpp"
187 | 				>
188 | 			</File>
189 | 			<File
190 | 				RelativePath="..\..\lib\pstream.cpp"
191 | 				>
192 | 			</File>
193 | 			<File
194 | 				RelativePath="..\..\lib\timer.cpp"
195 | 				>
196 | 			</File>
197 | 			<File
198 | 				RelativePath="..\..\lib\vectors.cpp"
199 | 				>
200 | 			</File>
201 | 		</Filter>
202 | 		<Filter
203 | 			Name="Header Files"
204 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
205 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
206 | 			>
207 | 		</Filter>
208 | 		<Filter
209 | 			Name="Resource Files"
210 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
211 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
212 | 			>
213 | 		</Filter>
214 | 	</Files>
215 | 	<Globals>
216 | 	</Globals>
217 | </VisualStudioProject>
218 | 


--------------------------------------------------------------------------------
/win/prep_alpha/prep_alpha.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="prep_alpha"
  6 | 	ProjectGUID="{0C22B9B5-5E7D-41BC-A99D-B76AE17E68C1}"
  7 | 	RootNamespace="prep_alpha"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\lib\gzstream.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\svm\prep_alpha.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\lib\vectors.cpp"
187 | 				>
188 | 			</File>
189 | 		</Filter>
190 | 		<Filter
191 | 			Name="Header Files"
192 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
193 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
194 | 			>
195 | 		</Filter>
196 | 		<Filter
197 | 			Name="Resource Files"
198 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
199 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
200 | 			>
201 | 		</Filter>
202 | 	</Files>
203 | 	<Globals>
204 | 	</Globals>
205 | </VisualStudioProject>
206 | 


--------------------------------------------------------------------------------
/win/prep_rcv1/prep_rcv1.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="prep_rcv1"
  6 | 	ProjectGUID="{EE81C130-DFFD-4614-83AB-A15C5CBD07EA}"
  7 | 	RootNamespace="prep_rcv1"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\lib\gzstream.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\svm\prep_rcv1.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\lib\vectors.cpp"
187 | 				>
188 | 			</File>
189 | 		</Filter>
190 | 		<Filter
191 | 			Name="Header Files"
192 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
193 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
194 | 			>
195 | 		</Filter>
196 | 		<Filter
197 | 			Name="Resource Files"
198 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
199 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
200 | 			>
201 | 		</Filter>
202 | 	</Files>
203 | 	<Globals>
204 | 	</Globals>
205 | </VisualStudioProject>
206 | 


--------------------------------------------------------------------------------
/win/prep_webspam/prep_webspam.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="prep_webspam"
  6 | 	ProjectGUID="{4D87FC90-7973-4CB4-A7BB-A5F9456AB1BC}"
  7 | 	RootNamespace="prep_webspam"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\lib\gzstream.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\svm\prep_webspam.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\lib\vectors.cpp"
187 | 				>
188 | 			</File>
189 | 		</Filter>
190 | 		<Filter
191 | 			Name="Header Files"
192 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
193 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
194 | 			>
195 | 		</Filter>
196 | 		<Filter
197 | 			Name="Resource Files"
198 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
199 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
200 | 			>
201 | 		</Filter>
202 | 	</Files>
203 | 	<Globals>
204 | 	</Globals>
205 | </VisualStudioProject>
206 | 


--------------------------------------------------------------------------------
/win/sgd.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 10.00
 3 | # Visual C++ Express 2008
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "svmasgd", "svmasgd\svmasgd.vcproj", "{DBF34410-064F-42C9-BCDD-EA1478399099}"
 5 | EndProject
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "svmsgd", "svmsgd\svmsgd.vcproj", "{9DCFACBF-41C1-41F7-AE80-0A25AF861730}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "prep_rcv1", "prep_rcv1\prep_rcv1.vcproj", "{EE81C130-DFFD-4614-83AB-A15C5CBD07EA}"
 9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "prep_alpha", "prep_alpha\prep_alpha.vcproj", "{0C22B9B5-5E7D-41BC-A99D-B76AE17E68C1}"
11 | EndProject
12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "prep_webspam", "prep_webspam\prep_webspam.vcproj", "{4D87FC90-7973-4CB4-A7BB-A5F9456AB1BC}"
13 | EndProject
14 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "crfsgd", "crfsgd\crfsgd.vcproj", "{DCEB5351-BB55-4E97-AEB9-CBC3341000FD}"
15 | EndProject
16 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "crfasgd", "crfasgd\crfasgd.vcproj", "{E3102F60-8536-4A6E-B7C0-F68B60974832}"
17 | EndProject
18 | Global
19 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
20 | 		Debug|Win32 = Debug|Win32
21 | 		Release|Win32 = Release|Win32
22 | 	EndGlobalSection
23 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
24 | 		{DBF34410-064F-42C9-BCDD-EA1478399099}.Debug|Win32.ActiveCfg = Debug|Win32
25 | 		{DBF34410-064F-42C9-BCDD-EA1478399099}.Debug|Win32.Build.0 = Debug|Win32
26 | 		{DBF34410-064F-42C9-BCDD-EA1478399099}.Release|Win32.ActiveCfg = Release|Win32
27 | 		{DBF34410-064F-42C9-BCDD-EA1478399099}.Release|Win32.Build.0 = Release|Win32
28 | 		{9DCFACBF-41C1-41F7-AE80-0A25AF861730}.Debug|Win32.ActiveCfg = Debug|Win32
29 | 		{9DCFACBF-41C1-41F7-AE80-0A25AF861730}.Debug|Win32.Build.0 = Debug|Win32
30 | 		{9DCFACBF-41C1-41F7-AE80-0A25AF861730}.Release|Win32.ActiveCfg = Release|Win32
31 | 		{9DCFACBF-41C1-41F7-AE80-0A25AF861730}.Release|Win32.Build.0 = Release|Win32
32 | 		{EE81C130-DFFD-4614-83AB-A15C5CBD07EA}.Debug|Win32.ActiveCfg = Debug|Win32
33 | 		{EE81C130-DFFD-4614-83AB-A15C5CBD07EA}.Debug|Win32.Build.0 = Debug|Win32
34 | 		{EE81C130-DFFD-4614-83AB-A15C5CBD07EA}.Release|Win32.ActiveCfg = Release|Win32
35 | 		{EE81C130-DFFD-4614-83AB-A15C5CBD07EA}.Release|Win32.Build.0 = Release|Win32
36 | 		{0C22B9B5-5E7D-41BC-A99D-B76AE17E68C1}.Debug|Win32.ActiveCfg = Debug|Win32
37 | 		{0C22B9B5-5E7D-41BC-A99D-B76AE17E68C1}.Debug|Win32.Build.0 = Debug|Win32
38 | 		{0C22B9B5-5E7D-41BC-A99D-B76AE17E68C1}.Release|Win32.ActiveCfg = Release|Win32
39 | 		{0C22B9B5-5E7D-41BC-A99D-B76AE17E68C1}.Release|Win32.Build.0 = Release|Win32
40 | 		{4D87FC90-7973-4CB4-A7BB-A5F9456AB1BC}.Debug|Win32.ActiveCfg = Debug|Win32
41 | 		{4D87FC90-7973-4CB4-A7BB-A5F9456AB1BC}.Debug|Win32.Build.0 = Debug|Win32
42 | 		{4D87FC90-7973-4CB4-A7BB-A5F9456AB1BC}.Release|Win32.ActiveCfg = Release|Win32
43 | 		{4D87FC90-7973-4CB4-A7BB-A5F9456AB1BC}.Release|Win32.Build.0 = Release|Win32
44 | 		{DCEB5351-BB55-4E97-AEB9-CBC3341000FD}.Debug|Win32.ActiveCfg = Debug|Win32
45 | 		{DCEB5351-BB55-4E97-AEB9-CBC3341000FD}.Debug|Win32.Build.0 = Debug|Win32
46 | 		{DCEB5351-BB55-4E97-AEB9-CBC3341000FD}.Release|Win32.ActiveCfg = Release|Win32
47 | 		{DCEB5351-BB55-4E97-AEB9-CBC3341000FD}.Release|Win32.Build.0 = Release|Win32
48 | 		{E3102F60-8536-4A6E-B7C0-F68B60974832}.Debug|Win32.ActiveCfg = Debug|Win32
49 | 		{E3102F60-8536-4A6E-B7C0-F68B60974832}.Debug|Win32.Build.0 = Debug|Win32
50 | 		{E3102F60-8536-4A6E-B7C0-F68B60974832}.Release|Win32.ActiveCfg = Release|Win32
51 | 		{E3102F60-8536-4A6E-B7C0-F68B60974832}.Release|Win32.Build.0 = Release|Win32
52 | 	EndGlobalSection
53 | 	GlobalSection(SolutionProperties) = preSolution
54 | 		HideSolutionNode = FALSE
55 | 	EndGlobalSection
56 | EndGlobal
57 | 


--------------------------------------------------------------------------------
/win/svmasgd/svmasgd.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="svmasgd"
  6 | 	ProjectGUID="{DBF34410-064F-42C9-BCDD-EA1478399099}"
  7 | 	RootNamespace="svmasgd"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="&quot;$(SolutionDir)\zlib\zlib.lib&quot;"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\svm\data.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\lib\gzstream.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\svm\svmasgd.cpp"
187 | 				>
188 | 			</File>
189 | 			<File
190 | 				RelativePath="..\..\lib\timer.cpp"
191 | 				>
192 | 			</File>
193 | 			<File
194 | 				RelativePath="..\..\lib\vectors.cpp"
195 | 				>
196 | 			</File>
197 | 		</Filter>
198 | 		<Filter
199 | 			Name="Header Files"
200 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
201 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
202 | 			>
203 | 		</Filter>
204 | 		<Filter
205 | 			Name="Resource Files"
206 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
207 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
208 | 			>
209 | 		</Filter>
210 | 	</Files>
211 | 	<Globals>
212 | 	</Globals>
213 | </VisualStudioProject>
214 | 


--------------------------------------------------------------------------------
/win/svmsgd/svmsgd.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="9.00"
  5 | 	Name="svmsgd"
  6 | 	ProjectGUID="{9DCFACBF-41C1-41F7-AE80-0A25AF861730}"
  7 | 	RootNamespace="svmsgd"
  8 | 	Keyword="Win32Proj"
  9 | 	TargetFrameworkVersion="196613"
 10 | 	>
 11 | 	<Platforms>
 12 | 		<Platform
 13 | 			Name="Win32"
 14 | 		/>
 15 | 	</Platforms>
 16 | 	<ToolFiles>
 17 | 	</ToolFiles>
 18 | 	<Configurations>
 19 | 		<Configuration
 20 | 			Name="Debug|Win32"
 21 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 22 | 			IntermediateDirectory="$(ConfigurationName)"
 23 | 			ConfigurationType="1"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				DebugInformationFormat="4"
 52 | 			/>
 53 | 			<Tool
 54 | 				Name="VCManagedResourceCompilerTool"
 55 | 			/>
 56 | 			<Tool
 57 | 				Name="VCResourceCompilerTool"
 58 | 			/>
 59 | 			<Tool
 60 | 				Name="VCPreLinkEventTool"
 61 | 			/>
 62 | 			<Tool
 63 | 				Name="VCLinkerTool"
 64 | 				AdditionalDependencies="$(SolutionDir)\zlib\zlib.lib"
 65 | 				LinkIncremental="2"
 66 | 				GenerateDebugInformation="true"
 67 | 				SubSystem="1"
 68 | 				TargetMachine="1"
 69 | 			/>
 70 | 			<Tool
 71 | 				Name="VCALinkTool"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCManifestTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCXDCMakeTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCBscMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCFxCopTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCAppVerifierTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCPostBuildEventTool"
 90 | 			/>
 91 | 		</Configuration>
 92 | 		<Configuration
 93 | 			Name="Release|Win32"
 94 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 95 | 			IntermediateDirectory="$(ConfigurationName)"
 96 | 			ConfigurationType="1"
 97 | 			CharacterSet="1"
 98 | 			WholeProgramOptimization="1"
 99 | 			>
100 | 			<Tool
101 | 				Name="VCPreBuildEventTool"
102 | 			/>
103 | 			<Tool
104 | 				Name="VCCustomBuildTool"
105 | 			/>
106 | 			<Tool
107 | 				Name="VCXMLDataGeneratorTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCWebServiceProxyGeneratorTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCMIDLTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCCLCompilerTool"
117 | 				Optimization="2"
118 | 				EnableIntrinsicFunctions="true"
119 | 				AdditionalIncludeDirectories="&quot;$(SolutionDir)\..\lib&quot;;&quot;$(SolutionDir)\..\svm&quot;;&quot;$(SolutionDir)\zlib&quot;"
120 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
121 | 				RuntimeLibrary="2"
122 | 				EnableFunctionLevelLinking="true"
123 | 				UsePrecompiledHeader="0"
124 | 				WarningLevel="3"
125 | 				DebugInformationFormat="3"
126 | 			/>
127 | 			<Tool
128 | 				Name="VCManagedResourceCompilerTool"
129 | 			/>
130 | 			<Tool
131 | 				Name="VCResourceCompilerTool"
132 | 			/>
133 | 			<Tool
134 | 				Name="VCPreLinkEventTool"
135 | 			/>
136 | 			<Tool
137 | 				Name="VCLinkerTool"
138 | 				AdditionalDependencies="$(SolutionDir)\zlib\zlib.lib"
139 | 				LinkIncremental="1"
140 | 				GenerateDebugInformation="true"
141 | 				SubSystem="1"
142 | 				OptimizeReferences="2"
143 | 				EnableCOMDATFolding="2"
144 | 				TargetMachine="1"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCALinkTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCManifestTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCXDCMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCBscMakeTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCFxCopTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCAppVerifierTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath="..\..\svm\data.cpp"
179 | 				>
180 | 			</File>
181 | 			<File
182 | 				RelativePath="..\..\lib\gzstream.cpp"
183 | 				>
184 | 			</File>
185 | 			<File
186 | 				RelativePath="..\..\svm\svmsgd.cpp"
187 | 				>
188 | 			</File>
189 | 			<File
190 | 				RelativePath="..\..\lib\timer.cpp"
191 | 				>
192 | 			</File>
193 | 			<File
194 | 				RelativePath="..\..\lib\vectors.cpp"
195 | 				>
196 | 			</File>
197 | 		</Filter>
198 | 		<Filter
199 | 			Name="Header Files"
200 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
201 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
202 | 			>
203 | 		</Filter>
204 | 		<Filter
205 | 			Name="Resource Files"
206 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
207 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
208 | 			>
209 | 		</Filter>
210 | 	</Files>
211 | 	<Globals>
212 | 	</Globals>
213 | </VisualStudioProject>
214 | 


--------------------------------------------------------------------------------
/win/zlib/README.txt:
--------------------------------------------------------------------------------
1 | 
2 | You need to populate this directory with the static version of the zlib library.
3 | See the instructions in file ..\README.txt
4 | 


--------------------------------------------------------------------------------