├── LICENSE
├── README.md
├── makefile
├── setup.py
└── src
    ├── convex_polytope_machine.cpp
    ├── convex_polytope_machine.h
    ├── cpm.cpp
    ├── cpm.h
    ├── cpm.py
    ├── dense_matrix.cpp
    ├── dense_matrix.h
    ├── eval_utils.cpp
    ├── eval_utils.h
    ├── main.cpp
    ├── numpy.i
    ├── option_parser.cpp
    ├── option_parser.h
    ├── parallel_eval.cpp
    ├── parallel_eval.h
    ├── python.i
    ├── python_wrap.cpp
    ├── sparse_vector.cpp
    ├── sparse_vector.h
    ├── stochastic_data_adaptor.cpp
    └── stochastic_data_adaptor.h


/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | This is a C++11 implementation of the Convex Polytope Machine algorithm as presented in
  2 | 
  3 | Kantchelian, A., Tschantz, M. C., Huang, L., Bartlett, P. L., Joseph, A. D., & Tygar, J. D. [Large-Margin Convex Polytope Machine](http://papers.nips.cc/paper/5511-large-margin-convex-polytope-machine).
  4 |  
  5 | In addition to the command line tool, Python bindings which are fully aware
  6 | of numpy arrays and scipy sparse matrices are provided.
  7 | 
  8 | ## Building the code
  9 | 
 10 | The CPM can be invoked via command line or from a python interpreter 
 11 | directly. Building these two should be painless.
 12 | 
 13 | ### Building the command line tool
 14 | 
 15 | Running
 16 | 
 17 | ``` bash
 18 | $ make cmdapp
 19 | ```
 20 | 
 21 | will create the `bin/cpm` executable.
 22 | 
 23 | ### Building the python module
 24 | 
 25 | The Python module is built and installed using the distutils tools, which
 26 | is already included in the standard library. However, the python module
 27 | itself requires numpy (and numpy headers) and scipy, so make sure these 
 28 | are installed. To build the extension, run:
 29 | 
 30 | ``` bash
 31 | $ python setup.py build
 32 | ```
 33 | 
 34 | This puts all necessary files in `build/lib.<your_architecture>`. 
 35 | From this directory, you should be able to launch a Python interpreter and
 36 | successfully import the module. For example:
 37 | 
 38 | ``` bash
 39 | $ cd build/lib.macosx-10.9-x86_64-2.7
 40 | $ ls
 41 | _cpm.so cpm.py
 42 | $ python
 43 | Python 2.7.6 (default, Nov 25 2013, 16:54:21) 
 44 | [GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.2.79)] on darwin
 45 | Type "help", "copyright", "credits" or "license" for more information.
 46 | >>> import cpm
 47 | >>> 
 48 | ```
 49 | 
 50 | If you are particularly fond of it, you can install the cpm 
 51 | extension module by
 52 | 
 53 | ``` bash
 54 | $ python setup.py install
 55 | ```
 56 | 
 57 | which will make it import available from anywhere. Or you can just copy the
 58 | build/lib.* files where needed.
 59 | 
 60 | #### Building the python wrapper
 61 | 
 62 | Unless you are planning to extend the python module features yourself, 
 63 | this is part is irrelevant to you.
 64 | 
 65 | You need a working installation of [SWIG](http://www.swig.org/). Running
 66 | 
 67 | ``` bash
 68 | $ make wrapper
 69 | ```
 70 | 
 71 | will create `python_wrapper.cpp` and `cpm.py` in the src directory.
 72 | 
 73 | ## Usage
 74 | 
 75 | ### Python module
 76 | 
 77 | The python module essentially provides two classes: `CPM` and `Dataset`. 
 78 | `CPM` essentially behaves like a scikit learner, with `fit()` and 
 79 | `predict()` methods.
 80 | `Dataset` takes care of reading libSVM files from disk (fast!) and/or 
 81 | translating your dense numpy data or sparse matrices into the memory 
 82 | layout CPM likes. Most of the methods come with `__docstring__`s for
 83 | easy reference. Here's an example usage:
 84 | 
 85 | ``` python
 86 | >>> import numpy as np, scipy.sparse as sp
 87 | >>> import cpm
 88 | 
 89 | >>> X = [[0, 1, 0], [1, 0, 0]] # two instances with 3 features
 90 | >>> Y = [0, 1] # corresponding labels
 91 | 
 92 | >>> Xnumpy = np.array(X, dtype=np.float32) # notice the type spec
 93 | >>> Xsparse = sp.csr_matrix(X, dtype=np.float32) # notice the type spec again
 94 | >>> Ynumpy = np.array(Y, dtype=np.int32) # everything is 32 bits.
 95 | 
 96 | >>> trainset_1 = cpm.Dataset(X, Y) # this works
 97 | >>> trainset_2 = cpm.Dataset(Xnumpy, Ynumpy) # this is fine too
 98 | >>> trainset_3 = cpm.Dataset(Xsparse, Ynumpy) # and this as well
 99 | 
100 | >>> clf = cpm.CPM(10) # a CPM with 10 sub-classifiers and default meta-parameters values
101 | >>> clf.fit(trainset_1, 100) # train model on 100 SGD steps
102 | >>> scores, assignments = clf.predict(trainset_2) # returns predicted scores and assigned sub-classifiers
103 | >>> scores
104 | array([-1.11855221,  1.37075484], dtype=float32)
105 | >>> assignments
106 | array([0, 0], dtype=int32)
107 | 
108 | >>> clf.serializeModel('my_model.cpm') # you can save the current model to disk
109 | >>> clf_1 = cpm.CPM.deserializeModel('my_model.cpm') # and load it again later
110 | ```
111 | 
112 | The main gotcha is that data arrays should be float 32 bits and label arrays
113 | 32 bit integers (or less). You can accomplish this easily by the `view()` 
114 | method of numpy/scipy.sparse objects. It is anyhow a good idea to be
115 | using 32 bit floats by default if working with large datasets. At the moment,
116 | only CSR sparse matrices are supported.
117 | 
118 | The wrapper also exposes `parallelFitPredict()`, a multithreaded method 
119 | which trains multiple models with arbitrary parameters and outputs their 
120 | predictions. Refer to the docstring for how to use it.
121 | 
122 | ### Command line
123 | 
124 | The command line interface follows [Sofia-ml](https://code.google.com/p/sofia-ml) 
125 | in spirit.
126 | 
127 | ``` bash
128 | $ ./cpm -h
129 | Perform CPM training and/or inference.
130 | 
131 | --quiet -q   be quiet.
132 |     Default: False
133 | --reshuffle   shuffle training set between epochs.
134 |     Default: False
135 | --classifiers -k <int>   number of classifiers.
136 |     Default: 1
137 | --outer_label <int>   outer class label (the class that will be decomposed).
138 |     Default: 1
139 | --iterations -i <int>   number of iterations.
140 |     Default: 50000000
141 | --C -C <float>   C regularization factor.
142 |     Default: 1
143 | --cost_ratio <float>   cost ratio of negatives vs positives.
144 |     Default: 1
145 | --entropy <float>   minimal (exp of) entropy to maintain in heuristic max. Value between 1 and k.
146 |     Default: 1
147 | --seed <unsigned long>   random seed (for reproducibility).
148 | --train -t <string>   train data file.
149 | --test -c <string>   test data file.
150 | --model_in -m <string>   model in file. Will be ignored if in training mode.
151 | --model_out -o <string>   model out file.
152 | --scores -s <string>   scores file.
153 | ```
154 | 
155 | For instance, to train a model with 10 subclassifiers on 1,000,000 iterations, 
156 | from libsvm formated file `train.libsvm`, save model in `model.txt` and output the scores 
157 | of the model on `test.libsvm` in `scores.txt`:
158 | 
159 | ``` bash
160 | $ ./cpm -k 10 -i 1000000 -t train.libsvm -c test.libsvm -o model.txt -s scores.txt
161 | ```
162 | 
163 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | CXX=g++
 2 | CXXFLAGS=-Wall -pedantic -pthread -std=c++11
 3 | SWIGFLAGS=-c++ -python -O -builtin
 4 | OFLAG=-O3
 5 | VPATH=src
 6 | OBJDIR=build
 7 | BINDIR=bin
 8 | 
 9 | all: directories build cmdapp
10 | 
11 | build: $(OBJDIR)/sparse_vector.o $(OBJDIR)/dense_matrix.o \
12 | 			 $(OBJDIR)/stochastic_data_adaptor.o \
13 | 			 $(OBJDIR)/convex_polytope_machine.o \
14 | 			 $(OBJDIR)/eval_utils.o \
15 | 			 $(OBJDIR)/option_parser.o \
16 | 			 $(OBJDIR)/parallel_eval.o \
17 | 			 $(OBJDIR)/cpm.o
18 | 
19 | cmdapp: $(BINDIR)/cpm
20 | 
21 | $(BINDIR)/cpm: $(OBJDIR)/main.o $(OBJDIR)/sparse_vector.o \
22 | 			 $(OBJDIR)/dense_matrix.o \
23 | 			 $(OBJDIR)/stochastic_data_adaptor.o \
24 | 			 $(OBJDIR)/eval_utils.o \
25 | 			 $(OBJDIR)/convex_polytope_machine.o\
26 | 			 $(OBJDIR)/option_parser.o \
27 | 			 $(OBJDIR)/parallel_eval.o \
28 | 			 $(OBJDIR)/cpm.o
29 | 	$(CXX) -o $@ $(OFLAG) $(CXXFLAGS) $^
30 | 
31 | wrapper: python.i
32 | 	swig $(SWIGFLAGS) -outdir $(VPATH) -o $(VPATH)/python_wrap.cpp $^
33 | 
34 | directories:
35 | 	mkdir -p $(OBJDIR)
36 | 	mkdir -p $(BINDIR)
37 | 
38 | $(OBJDIR)/sparse_vector.o: sparse_vector.cpp sparse_vector.h
39 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
40 | 
41 | $(OBJDIR)/dense_matrix.o: dense_matrix.cpp \
42 | 	dense_matrix.h
43 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
44 | 
45 | $(OBJDIR)/parallel_eval.o: parallel_eval.cpp parallel_eval.h
46 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
47 | 
48 | $(OBJDIR)/stochastic_data_adaptor.o: stochastic_data_adaptor.cpp \
49 | 	stochastic_data_adaptor.h
50 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
51 | 
52 | $(OBJDIR)/convex_polytope_machine.o: convex_polytope_machine.cpp \
53 | 	convex_polytope_machine.h
54 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
55 | 
56 | $(OBJDIR)/option_parser.o: option_parser.cpp option_parser.h
57 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
58 | 
59 | $(OBJDIR)/cpm.o: cpm.cpp cpm.h
60 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
61 | 
62 | $(OBJDIR)/eval_utils.o: eval_utils.cpp eval_utils.h
63 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
64 | 
65 | $(OBJDIR)/main.o: main.cpp
66 | 	$(CXX) -c $(CXXFLAGS) $(OFLAG) $< -o $@
67 | 
68 | clean:
69 | 	rm -rf $(OBJDIR)/*
70 | 	rm -f $(BINDIR)/*
71 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | setup.py file for SWIG interface
 5 | """
 6 | 
 7 | from distutils.core import setup, Extension
 8 | import numpy as np
 9 | 
10 | cpm_module = Extension('_cpm',
11 |                            sources=['src/python_wrap.cpp',
12 |                                    'src/sparse_vector.cpp',
13 |                                    'src/stochastic_data_adaptor.cpp',
14 |                                    'src/convex_polytope_machine.cpp',
15 |                                    'src/dense_matrix.cpp',
16 |                                    'src/cpm.cpp',
17 |                                    'src/eval_utils.cpp',
18 |                                    'src/parallel_eval.cpp'],
19 |                            language='c++',
20 |                            swig_opts=['-c++', '-O', '-builtin'],
21 |                            extra_compile_args=['-std=c++11', '-pthread'],
22 |                            include_dirs=[np.get_include(), 'src']
23 |                            )
24 | 
25 | setup (name = 'cpm',
26 |        version = '0.2',
27 |        author      = "Alex Kantchelian",
28 |        description = """Convex Polytope Machine""",
29 |        ext_modules = [cpm_module],
30 |        py_modules = ["cpm"],
31 |        package_dir={'': 'src'}
32 |       )
33 | 


--------------------------------------------------------------------------------
/src/convex_polytope_machine.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // convex_polytope_machine.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include "convex_polytope_machine.h"
 23 | #include <sstream>
 24 | #include <fstream>
 25 | #include <cmath>
 26 | #include <iomanip>
 27 | 
 28 | #include <stdexcept>
 29 | 
 30 | ConvexPolytopeMachine::ConvexPolytopeMachine(int outer_label, int dim, unsigned short k, float lambda,
 31 |                                              float entropy, float negative_cost,
 32 |                                              float positive_cost, size_t n_positives,
 33 |                                              unsigned int seed):
 34 |         outer_label(outer_label), k(k), lambda(lambda), entropy(entropy), negative_cost(negative_cost),
 35 |         positive_cost(positive_cost), n_positives(n_positives), seed(seed), W(dim, k) {
 36 |     
 37 |     iter = 0;
 38 |     distinct_p = 0;
 39 |     score = new double[k];
 40 |     assignments = new int[n_positives];
 41 |     occupancy = new unsigned int[k]();
 42 |     
 43 |     for (size_t i = 0; i < n_positives; ++i) {
 44 |         assignments[i] = -1;
 45 |     }
 46 | }
 47 | 
 48 | void ConvexPolytopeMachine::clear() {
 49 |     iter = 0;
 50 |     distinct_p = 0;
 51 |     W.clear();
 52 | }
 53 | 
 54 | void ConvexPolytopeMachine::serializeModel(const char* filename) const {
 55 |     std::ofstream ss(filename);
 56 |     
 57 |     ss << "version: " << 2 << '\n';
 58 |     
 59 |     ss << "\n### DATASET ###\n";
 60 |     ss << "outer label: " << outer_label << '\n';
 61 |     ss << "outer instances: " << n_positives << '\n';
 62 |     ss << "dimensions: " << W.dimensions << '\n';
 63 |     
 64 |     ss << "\n### CPM PARAMETERS ###\n";
 65 |     ss << "hyperplanes: " << k << '\n';
 66 |     ss << "iterations: " << iter-1 << '\n';
 67 |     ss << "lambda: " << lambda << '\n';
 68 |     ss << "entropy: " << entropy << '\n';
 69 |     ss << "cost ratio: " << negative_cost/positive_cost << '\n';
 70 |     ss << "seed: " << seed << '\n';
 71 |     
 72 |     ss << "\n### ASSIGNMENTS COUNTS ###\n";
 73 |     int active = 0;
 74 |     
 75 |     for (int i=0; i<k; ++i){
 76 |         if (occupancy[i] > 0) {
 77 |             active++;
 78 |         }
 79 |     }
 80 |     ss << "active classifiers: " << active << '\n';
 81 |     ss << "counts: ";
 82 |     for (int i=0; i<k; ++i){
 83 |         ss << occupancy[i] << ' ';
 84 |     }
 85 |     ss << '\n';
 86 |     
 87 |     ss << "\n### MODEL ###\n";
 88 |     ss << "encoding: " << "dense\n";
 89 |     W.serialize(&ss);
 90 | }
 91 | 
 92 | ConvexPolytopeMachine* ConvexPolytopeMachine::deserializeModel(const char *filename) {
 93 |     std::ifstream ss(filename);
 94 |     
 95 |     int version;
 96 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
 97 |     ss >> version;
 98 |     
 99 |     if (version != 2) {
100 |         throw std::runtime_error("Unsupported model file version.");
101 |     }
102 |     
103 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
104 |     int outer_label;
105 |     ss >> outer_label;
106 |     
107 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
108 |     size_t n_positives;
109 |     ss >> n_positives;
110 |     
111 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
112 |     int dimensions;
113 |     ss >> dimensions;
114 |     
115 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
116 |     int k;
117 |     ss >> k;
118 |     
119 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
120 |     int iter;
121 |     ss >> iter;
122 |     
123 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
124 |     float lambda;
125 |     ss >> lambda;
126 |     
127 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
128 |     float entropy;
129 |     ss >> entropy;
130 |     
131 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
132 |     float cost_ratio;
133 |     ss >> cost_ratio;
134 |     
135 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
136 |     unsigned int seed;
137 |     ss >> seed;
138 |     
139 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
140 |     int active;
141 |     ss >> active;
142 |     
143 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
144 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), ':');
145 |     ss.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
146 |     
147 |     ConvexPolytopeMachine* cpm = new ConvexPolytopeMachine(outer_label, dimensions, (unsigned short) active, lambda,
148 |                                                            entropy,
149 |                                                            cost_ratio/(1.0f + cost_ratio),
150 |                                                            1.0f/(1.0f + cost_ratio),
151 |                                                            n_positives, seed);
152 |     (cpm->W).deserialize(&ss);
153 |     
154 |     if (ss.fail() | ss.eof() | ss.bad()) {
155 |         throw std::runtime_error("Error when reading model file.");
156 |     }
157 |     
158 |     return cpm;
159 | }
160 | 
161 | std::pair<double, int> ConvexPolytopeMachine::predict(const SparseVector& s) {
162 |     W.inner(s, score);
163 |     
164 |     int index = 0;
165 |     double max_score = score[0];
166 |     
167 |     for (int i=1; i<k; ++i) {
168 |         if (score[i] > max_score) {
169 |             index = i;
170 |             max_score = score[i];
171 |         }
172 |     }
173 |     
174 |     return std::make_pair(score[index], index);
175 | }
176 | 
177 | 
178 | std::pair<unsigned short, unsigned short> ConvexPolytopeMachine::heuristicMax(const SparseVector s, size_t cid) {
179 |     // true argmax
180 |     unsigned short true_imax = 0;
181 |     double max_score = score[true_imax];
182 |     
183 |     for (unsigned short i = 1; i < k; ++i) {
184 |         if (max_score < score[i]) {
185 |             max_score = score[i];
186 |             true_imax = i;
187 |         }
188 |     }
189 |     
190 |     double N = distinct_p;
191 |     // not enough samples to compute an entropy score yet
192 |     // just return argmax
193 |     if (entropy <= 0 || N < k * 5.0f) {
194 |         return std::make_pair(true_imax, true_imax);
195 |     }
196 |     
197 |     // compute old and candidate entropy
198 |     int old = assignments[cid];
199 |     
200 |     double h_old = 0;
201 |     double h_new = 0;
202 |     
203 |     for (unsigned short i = 0; i < k; ++i) {
204 |         double pi = occupancy[i]/N;
205 |         double hpi = 0;
206 |         
207 |         if (pi > pepsilon) {
208 |             hpi = -pi * std::log(pi);
209 |         }
210 |         h_old += hpi;
211 |         
212 |         if (old != -1) {
213 |             if ((old == true_imax) || (i != old && i != true_imax)) {
214 |                 h_new += hpi;
215 |             } else {
216 |                 if (i == old) {
217 |                     double pold = (occupancy[i] - 1)/N;
218 |                     h_new += -pold * std::log(pold);
219 |                 } else if (i == true_imax) {
220 |                     double pnew = (occupancy[i] + 1)/N;
221 |                     h_new += -pnew * std::log(pnew);
222 |                 }
223 |             }
224 |         } else {
225 |             double pi;
226 |             if (i == true_imax) {
227 |                 pi = (occupancy[i] + 1)/(N+1.0f);
228 |             } else {
229 |                 pi = occupancy[i]/(N+1.0f);
230 |             }
231 |             
232 |             h_new += -pi * std::log(pi);
233 |         }
234 |     }
235 |     
236 |     // return regular argmax when new entropy is large enough
237 |     if ((h_new >= entropy) || (h_old < h_new)) {
238 |         return std::make_pair(true_imax, true_imax);
239 |     }
240 |     
241 |     // return argmax provided entropy is guaranteed to increase
242 |     if (old != -1) {
243 |         unsigned short imax = 0;
244 |         double max_score = -std::numeric_limits<float>::infinity();
245 | 
246 |         for (unsigned short i = 0; i < k; ++i) {
247 |             if (occupancy[i] < occupancy[old]) {
248 |                 if (max_score < score[i]) {
249 |                     max_score = score[i];
250 |                     imax = i;
251 |                 }
252 |             }
253 |         }
254 |         
255 |         return std::make_pair(imax, true_imax);
256 |     } else {
257 |         unsigned short imax = 0;
258 |         double max_score = -std::numeric_limits<float>::infinity();
259 |         
260 |         for (unsigned short i = 0; i < k; ++i) {
261 |             if (occupancy[i] < (k/N)) {
262 |                 if (max_score < score[i]) {
263 |                     max_score = score[i];
264 |                     imax = i;
265 |                 }
266 |             }
267 |         }
268 |         
269 |         return std::make_pair(imax, true_imax);
270 |     }
271 | }
272 | 
273 | void ConvexPolytopeMachine::setHistory(size_t cid, unsigned short imax) {
274 |     int old = assignments[cid];
275 |     if (cid >= n_positives){
276 |         throw std::logic_error("positive instance special id >= number of positive instances.");
277 |     }
278 |     assignments[cid] = imax;
279 |     
280 |     occupancy[imax]++;
281 |     if (old == -1) {
282 |         distinct_p++;
283 |     } else {
284 |         occupancy[old]--;
285 |     }
286 | }
287 | 
288 | std::tuple<float, float, unsigned short> ConvexPolytopeMachine::oneStep(const std::tuple<int, const SparseVector, size_t>& lsi) {
289 |     
290 |     const double eta = 1.0/(lambda * (iter + 2.0)); // learning rate
291 |     
292 |     const SparseVector& s = std::get<1>(lsi);
293 |     
294 |     // get all scores
295 |     W.inner(s, score);
296 |     
297 |     unsigned short imax;
298 |     double max_score;
299 |     
300 |     double eloss = 0.0;
301 |     
302 |     // learn from instance
303 |     if (std::get<0>(lsi) == outer_label) { // case y = +1
304 |         
305 |         // compute attribution
306 |         auto imax_trueimax = heuristicMax(s, std::get<2>(lsi));
307 |         imax = imax_trueimax.first;
308 |         unsigned short true_imax = imax_trueimax.second;
309 |         
310 |         max_score = score[imax];
311 |         
312 |         // compute exclusion loss
313 |         for (unsigned short i = 0; i < k; ++i) {
314 |             if (i != imax) {
315 |                 eloss += std::max(0.0, score[i]);
316 |             }
317 |         }
318 |         
319 |         if (max_score < margin) {
320 |             W.addInplace(s, eta * positive_cost, imax);
321 |         }
322 |         
323 |         setHistory(std::get<2>(lsi), true_imax);
324 |         imax = true_imax;
325 |     } else { // case y = -1
326 |         imax = 0;
327 |         max_score = score[imax];
328 |         
329 |         // push down all classifiers as needed
330 |         bool active= false;
331 |         double* grad_mul = new double[k];
332 |         
333 |         for(unsigned short i = 0; i < k; ++i) {
334 |             if (score[i] > -margin) {
335 |                 grad_mul[i] = -eta * negative_cost;
336 |                 active = true;
337 |             } else {
338 |                 grad_mul[i] = 0.0;
339 |             }
340 |             
341 |             if (score[i] > max_score) {
342 |                 imax = i;
343 |                 max_score = score[imax];
344 |             }
345 |         }
346 |         
347 |         if (active) W.addInplace(std::get<1>(lsi), grad_mul);
348 |         delete[] grad_mul;
349 |     }
350 |     
351 |     // L2 penalty
352 |     double coeff = std::max(0.0, 1.0 - eta*lambda);
353 |     W.mulInplace(coeff);
354 |     
355 |     iter++;
356 |     return std::make_tuple(max_score, eloss, imax);
357 | }
358 | 


--------------------------------------------------------------------------------
/src/convex_polytope_machine.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // convex_polytope_machine.h
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #ifndef __cpm__convex_polytope_machine__
 23 | #define __cpm__convex_polytope_machine__
 24 | 
 25 | #include <iostream>
 26 | #include <random>
 27 | #include <utility>
 28 | 
 29 | #include "sparse_vector.h"
 30 | #include "dense_matrix.h"
 31 | 
 32 | class ConvexPolytopeMachine{
 33 | public:
 34 |     /* Initalizes the parameters of the SGD
 35 |      *
 36 |      * outer_label: label on the outside of the polytope
 37 |      * dim: maximum number of dimensions
 38 |      * k: number of sub-classifiers
 39 |      * lambda: L2 penalty, taken per iteration
 40 |      * entropy: minimum assignment entropy to maintain
 41 |      * negative_cost: cost incurred on false positives
 42 |      * positive_cost: cost incurred on true positives
 43 |      * n_positives: total number of outer_label samples
 44 |      */
 45 |     ConvexPolytopeMachine(int outer_label, int dim, unsigned short k,
 46 |                           float lambda, float entropy,
 47 |                           float negative_cost, float positive_cost, size_t n_positives,
 48 |                           unsigned int seed);
 49 |     
 50 |     // destructor
 51 |     ~ConvexPolytopeMachine() {
 52 |         delete[] score;
 53 |         delete[] assignments;
 54 |         delete[] occupancy;
 55 |     }
 56 |     
 57 |     // perform one SGD step with the given sample
 58 |     std::tuple<float, float, unsigned short> oneStep(const std::tuple<int, const SparseVector, size_t>& lsi);
 59 |     
 60 |     // get number of iterations since beginning
 61 |     size_t getIter() const {return iter;};
 62 |     
 63 |     // get dense matrix
 64 |     const DenseMatrix& getW() const {
 65 |         return W;
 66 |     }
 67 |         
 68 |     // get score and assigned classifier for given instance
 69 |     std::pair<double, int> predict(const SparseVector& s);
 70 |     
 71 |     // scores for each sub-classifier
 72 |     const double* getScores() const {return score;}
 73 |     
 74 |     // clear W and set iter to 0
 75 |     void clear();
 76 |     
 77 |     // get table of all assignments of positive class
 78 |     const int* getAssignments() const {return assignments;};
 79 |     
 80 |     const int getAssignment(size_t cid) const {return assignments[cid];}
 81 |     
 82 |     // write model to disk
 83 |     void serializeModel(const char* filename) const;
 84 |     
 85 |     // read model from disk
 86 |     static ConvexPolytopeMachine* deserializeModel(const char* filename);
 87 |     
 88 |     // margin value
 89 |     const float margin = 1.0f;
 90 |     
 91 |     const int outer_label;
 92 |     const unsigned short k;
 93 |     const float lambda;
 94 |     const float entropy;
 95 |     const float negative_cost;
 96 |     const float positive_cost;
 97 |     const size_t n_positives;
 98 |     const unsigned int seed;
 99 | 
100 | private:
101 |     const float pepsilon = 1e-6f;
102 |     double* score; // w's
103 |     size_t iter;
104 |     DenseMatrix W;
105 |     
106 |     int* assignments; // holds assignments history for outer instances
107 |     unsigned int* occupancy; // holds # of firings per classifier
108 |     size_t distinct_p; // number of entries filled up in history
109 |     
110 |     void setHistory(size_t cid, unsigned short imax);
111 |     std::pair<unsigned short, unsigned short> heuristicMax(const SparseVector s, size_t cid);
112 |     // computes optimal assignment that will maintain entropy constraint
113 |     // updates all counting-related fields (namely history and occupancy)
114 | };
115 | 
116 | #endif /* defined(__cpm__convex_polytope_machine__) */
117 | 


--------------------------------------------------------------------------------
/src/cpm.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // cpm.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include <cmath>
 23 | #include <algorithm>
 24 | #include <stdexcept>
 25 | 
 26 | #include "eval_utils.h"
 27 | #include "cpm.h"
 28 | 
 29 | CPM::CPM(int k, int outer_label, float lambda, float entropy, float cost_ratio, unsigned int seed) : outer_label(outer_label), k(k), lambda(lambda), entropy(entropy), cost_ratio(cost_ratio), seed(seed), generator(seed) {
 30 | }
 31 | 
 32 | void CPM::fit(const StochasticDataAdaptor& trainset, int iterations, bool reshuffle, bool verbose){
 33 |     size_t dim = trainset.getDimensions();
 34 |     
 35 |     size_t n_instances = trainset.getNInstances();
 36 |     
 37 |     if (n_instances < 1) {
 38 |         std::cerr << "Empty training set" << std::endl;
 39 |         return;
 40 |     }
 41 |     
 42 |     size_t n_positives = trainset.getCountsPerClass().find(outer_label)->second;
 43 |     size_t n_negatives = n_instances - n_positives;
 44 |     
 45 |     if (verbose){
 46 |         std::cout << "Number of dimensions: " << dim <<'\n'
 47 |         << "Number of classifiers: " << k << '\n'
 48 |         << "Lambda: " << lambda << '\n'
 49 |         << "Iterations: " << iterations << '\n'
 50 |         << "Cost ratio: " << cost_ratio << '\n'
 51 |         << "Minimum entropy: " << std::exp(entropy) << "\n\n";
 52 |         
 53 |         std::cout << "negatives: " << n_negatives << " ("
 54 |         << 100*((float) n_negatives)/n_instances
 55 |         << "%), positives: " << n_positives << " ("
 56 |         << 100*((float) n_positives)/n_instances << "%)\n\n";
 57 |     }
 58 |     
 59 |     if(model) {
 60 |         delete model;
 61 |     }
 62 |     
 63 |     model = new ConvexPolytopeMachine(outer_label, (int) dim, (unsigned short) k, lambda/iterations, entropy, cost_ratio/(1.0f + cost_ratio), 1.0f/(1.0f+cost_ratio), n_positives, seed);
 64 |     
 65 |     int seen_positives = 0; // number of positive instances seen
 66 |     int seen_negatives = 0; // number of negative instances seen
 67 |     double pos_loss = 0; // loss on positive samples
 68 |     double neg_loss = 0; // loss on negative samples
 69 |     double redundancy = 0; // exclusion loss
 70 |     size_t reassignments = 0;
 71 |     int epoch = 0;
 72 |     
 73 |     if (verbose){
 74 |         std::cout << "Round\tReassignments\tRedundancy\tEntropy\tNegative loss\tPositive loss\n";
 75 |     }
 76 |     
 77 |     size_t* perm = new size_t[n_instances];
 78 |     for (size_t i=0; i < n_instances; i++) { //FIXME
 79 |         perm[i] = i;
 80 |     }
 81 |     std::shuffle(perm, perm + n_instances, generator);
 82 |     
 83 |     for(int iter = 0; iter < iterations; ++iter) {
 84 |         // sample next instance
 85 |         const std::tuple<int, const SparseVector, size_t>& lic = trainset.getInstance(perm[iter%n_instances]);
 86 |         
 87 |         int previous_assignment = (std::get<0>(lic) == outer_label) ? model->getAssignment(std::get<2>(lic)) : -1;
 88 |         
 89 |         auto score_eloss_assignment_active = model->oneStep(lic);
 90 |         
 91 |         float score = std::get<0>(score_eloss_assignment_active);
 92 |         float redun = std::get<1>(score_eloss_assignment_active);
 93 |         unsigned short assignment = std::get<2>(score_eloss_assignment_active);
 94 |         
 95 |         if (std::get<0>(lic) == outer_label) {
 96 |             pos_loss += std::max(0.0, 1.0 - score);
 97 |             redundancy += redun;
 98 |             
 99 |             if (previous_assignment != assignment) {
100 |                 reassignments++;
101 |             }
102 |             
103 |             seen_positives++;
104 |             
105 |         } else {
106 |             neg_loss += std::max(0.0, 1.0 + score);
107 |             seen_negatives++;
108 |         }
109 |         
110 |         if ((seen_negatives >= (int) n_negatives) &&
111 |             (seen_positives >= (int) n_positives)) {
112 |             float rate = ((float) reassignments) / n_positives;
113 |             float entropy = (float) evalutils::entropy(model->getAssignments(), n_positives, (unsigned short) k);
114 |             
115 |             if(verbose) {
116 |                 std::cout << epoch << '\t'
117 |                 << rate << '\t'
118 |                 << redundancy/n_positives << '\t'
119 |                 << entropy << '\t'
120 |                 << neg_loss/seen_negatives << '\t'
121 |                 << pos_loss/n_positives << std::endl;
122 |             }
123 |             
124 |             seen_positives = 0;
125 |             reassignments = 0;
126 |             seen_negatives = 0;
127 |             neg_loss = 0.0;
128 |             pos_loss = 0.0;
129 |             redundancy = 0.0;
130 |             epoch++;
131 |             
132 |             if (reshuffle) std::shuffle(perm, perm + n_instances, generator);
133 |         }
134 |     }
135 |     
136 |     delete[] perm;
137 | }
138 | 
139 | void CPM::predict(const StochasticDataAdaptor& testset, float* scores, int* assignments) const {
140 |     size_t n_instances = testset.getNInstances();
141 |     
142 |     if (!model) {
143 |         throw std::runtime_error("Empty model.");
144 |     }
145 |     
146 |     for (size_t i = 0; i < n_instances; ++i) {
147 |         auto sa = model->predict(std::get<1>(testset.getInstance(i)));
148 |         scores[i] = (float) sa.first;
149 |         assignments[i] = sa.second;
150 |     }
151 | }
152 | 
153 | std::pair<double, int> CPM::predict(const SparseVector& sv) const {
154 |     return model->predict(sv);
155 | }
156 | 
157 | void CPM::serializeModel(const char* filename) const {
158 |     if(model) {
159 |         model->serializeModel(filename);
160 |     }
161 | }
162 | 
163 | CPM* CPM::deserializeModel(const char* filename) {
164 |     ConvexPolytopeMachine* model = ConvexPolytopeMachine::deserializeModel(filename);
165 |     
166 |     CPM* res = new CPM(model->k, model->outer_label, model->lambda, model->entropy,
167 |                        model->positive_cost/(model->positive_cost + model->negative_cost),
168 |                        model->seed);
169 |     res->model = model;
170 |     
171 |     return res;
172 | }
173 | 


--------------------------------------------------------------------------------
/src/cpm.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright 2014 Alex Kantchelian
 3 |  
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 |  
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 |  
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 | */
16 | 
17 | // cpm.h
18 | 
19 | // Author: Alex Kantchelian, 2014
20 | // akant@cs.berkeley.edu
21 | 
22 | #ifndef __cpm__cpm__
23 | #define __cpm__cpm__
24 | 
25 | #include <iostream>
26 | #include <random>
27 | #include <utility>
28 | 
29 | #include "stochastic_data_adaptor.h"
30 | #include "convex_polytope_machine.h"
31 | #include "sparse_vector.h"
32 | 
33 | class CPM {
34 | public:
35 |     CPM(int k, int outer_label, float lambda, float entropy, float cost_ratio, unsigned int seed);
36 |     // lambda is taken as the global penalty constraint in the optimization problem (in praticular,
37 |     // it will later be divided by the number of iterations)
38 |     ~CPM() {delete model;};
39 |     
40 |     void fit(const StochasticDataAdaptor& trainset, int iterations, bool reshuffle, bool verbose);
41 |     void predict(const StochasticDataAdaptor& testset, float* scores, int* assignments) const;
42 |     std::pair<double, int> predict(const SparseVector& sv) const;
43 |     void serializeModel(const char* filename) const;
44 |     static CPM* deserializeModel(const char* filename);
45 |     
46 |     const int outer_label;
47 |     const int k;
48 |     const float lambda;
49 |     const float entropy;
50 |     const float cost_ratio;
51 |     const unsigned int seed;
52 |     
53 | private:
54 |     std::mt19937 generator;
55 |     ConvexPolytopeMachine* model = nullptr;
56 | };
57 | 
58 | #endif /* defined(__cpm__cpm__) */
59 | 


--------------------------------------------------------------------------------
/src/cpm.py:
--------------------------------------------------------------------------------
  1 | # This file was automatically generated by SWIG (http://www.swig.org).
  2 | # Version 3.0.2
  3 | #
  4 | # Do not make changes to this file unless you know what you are doing--modify
  5 | # the SWIG interface file instead.
  6 | 
  7 | # Copyright 2014 Alex Kantchelian
  8 | # 
  9 | # Licensed under the Apache License, Version 2.0 (the "License");
 10 | # you may not use this file except in compliance with the License.
 11 | # You may obtain a copy of the License at
 12 | # 
 13 | # http://www.apache.org/licenses/LICENSE-2.0
 14 | # 
 15 | # Unless required by applicable law or agreed to in writing, software
 16 | # distributed under the License is distributed on an "AS IS" BASIS,
 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | # See the License for the specific language governing permissions and
 19 | # limitations under the License.
 20 | #
 21 | # Author: Alex Kantchelian, 2014
 22 | # akant@cs.berkeley.edu
 23 | 
 24 | import random
 25 | import numpy as np
 26 | from scipy import sparse
 27 | 
 28 | 
 29 | 
 30 | 
 31 | """
 32 | This module provides a wrapper for the Convex Polytope Machine C++ code.
 33 | """
 34 | 
 35 | 
 36 | from sys import version_info
 37 | if version_info >= (2,6,0):
 38 |     def swig_import_helper():
 39 |         from os.path import dirname
 40 |         import imp
 41 |         fp = None
 42 |         try:
 43 |             fp, pathname, description = imp.find_module('_cpm', [dirname(__file__)])
 44 |         except ImportError:
 45 |             import _cpm
 46 |             return _cpm
 47 |         if fp is not None:
 48 |             try:
 49 |                 _mod = imp.load_module('_cpm', fp, pathname, description)
 50 |             finally:
 51 |                 fp.close()
 52 |             return _mod
 53 |     _cpm = swig_import_helper()
 54 |     del swig_import_helper
 55 | else:
 56 |     import _cpm
 57 | del version_info
 58 | from _cpm import *
 59 | try:
 60 |     _swig_property = property
 61 | except NameError:
 62 |     pass # Python < 2.2 doesn't have 'property'.
 63 | def _swig_setattr_nondynamic(self,class_type,name,value,static=1):
 64 |     if (name == "thisown"): return self.this.own(value)
 65 |     if (name == "this"):
 66 |         if type(value).__name__ == 'SwigPyObject':
 67 |             self.__dict__[name] = value
 68 |             return
 69 |     method = class_type.__swig_setmethods__.get(name,None)
 70 |     if method: return method(self,value)
 71 |     if (not static):
 72 |         self.__dict__[name] = value
 73 |     else:
 74 |         raise AttributeError("You cannot add attributes to %s" % self)
 75 | 
 76 | def _swig_setattr(self,class_type,name,value):
 77 |     return _swig_setattr_nondynamic(self,class_type,name,value,0)
 78 | 
 79 | def _swig_getattr(self,class_type,name):
 80 |     if (name == "thisown"): return self.this.own()
 81 |     method = class_type.__swig_getmethods__.get(name,None)
 82 |     if method: return method(self)
 83 |     raise AttributeError(name)
 84 | 
 85 | def _swig_repr(self):
 86 |     try: strthis = "proxy of " + self.this.__repr__()
 87 |     except: strthis = ""
 88 |     return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
 89 | 
 90 | try:
 91 |     _object = object
 92 |     _newclass = 1
 93 | except AttributeError:
 94 |     class _object : pass
 95 |     _newclass = 0
 96 | 
 97 | 
 98 | def _swig_setattr_nondynamic_method(set):
 99 |     def set_attr(self,name,value):
100 |         if (name == "thisown"): return self.this.own(value)
101 |         if hasattr(self,name) or (name == "this"):
102 |             set(self,name,value)
103 |         else:
104 |             raise AttributeError("You cannot add attributes to %s" % self)
105 |     return set_attr
106 | 
107 | 
108 | 
109 | 
110 | 
111 | class Dataset(_Dataset):
112 |   def __init__(self, *args):
113 |     """Constructs a labeled dataset object that can be used for CPM training 
114 |     and prediction (labels will be ignored when used for prediction). 
115 |     This always incurs a memory copy (for either dense or sparse matrices) 
116 |     or allocation (when loading from libSVM format file).
117 |     
118 |     Dataset(filename):
119 |       filename: str
120 | 
121 |       Creates a dataset from a libSVM file format on disk.
122 | 
123 |     Dataset(X, Y):
124 |       X: 2d float array-like object. Sparse scipy CSR matrices are supported.
125 |       Y: 1d int array-like object
126 |       
127 |       Creates a dataset from instances X (one instance per row) and labels Y.
128 |     """
129 |     if len(args) == 1:
130 |       super(Dataset, self).__init__(*args)
131 | 
132 |     if len(args) == 2:
133 |       if sparse.isspmatrix_csr(args[0]):
134 |         super(Dataset, self).__init__(args[0].data, args[0].indices, args[0].indptr, args[1])
135 |       else:
136 |         super(Dataset, self).__init__(*args)
137 |     
138 |     if len(args) > 2:
139 |       raise ValueError("Too many arguments.")
140 | 
141 |   def getLabels(self):
142 |     """Returns a numpy array of labels."""
143 |     return self._getLabels(int(self.getNInstances()))
144 | 
145 | 
146 | class CPM(_CPM):
147 |   def __init__(self, k, C=1.0, entropy=0.0, 
148 |               cost_ratio=1.0, outer_label=1, 
149 |               seed=None):
150 |     """Initialize an empty CPM model.
151 |        
152 |        Inputs:
153 |           k: int -- number of sub-classifiers
154 |           C: float -- inverse of L2 regularization factor
155 |           entropy: float -- minimal assignment entropy to maintain
156 |           cost_ratio: float -- in penalty, cost ratio between negative and positive 
157 |             misclassification training errors
158 |           outer_label: int -- outside (positive) class
159 |           seed: (None, int) -- random seed for reproducibility
160 |     """
161 |     if seed is None:
162 |       seed = int(random.getrandbits(32))
163 | 
164 |     super(CPM, self).__init__(k, outer_label, 1.0/C, entropy, cost_ratio, seed)
165 | 
166 |   def fit(self, trainset, iterations=-1, reshuffle=True, verbose=False):
167 |     """Trains a model via SGD.
168 |        
169 |        Inputs:
170 |           trainset: Dataset
171 |           iterations: int -- number of SGD steps. If < 0, will be set to 10 * training set size.
172 |           reshuffle: bool -- reshuffle trainingset between each epoch
173 |           verbose: bool -- print training statistics on stdout
174 |     """
175 |     if iterations < 0:
176 |       iterations = 10 * trainset.getNInstances()
177 |     super(CPM, self).fit(trainset, iterations, reshuffle, verbose)
178 | 
179 |   def predict(self, testset):
180 |     """Performs inference.
181 |        Input:
182 |           testset: Dataset
183 | 
184 |        Outputs:
185 |           scores: 1d float array of model scores
186 |           assignments: 1d int array of active sub-classifiers per instance
187 |     """
188 |     return super(CPM, self).predict(testset, int(testset.getNInstances()), int(testset.getNInstances()))
189 | 
190 | 
191 | def parallelFitPredict(trainset, testset, parameters):
192 |   """Trains and tests len(parameters) models on trainset and testset respectively.
193 |   This will launch exactly len(parameters) threads, use at your own risk.
194 | 
195 |   Inputs:
196 |     trainset: Dataset - the learning dataset
197 |     testset: Dataset - the testing dataset
198 |     parameters: list of dict objects. Each dict object can contain the
199 |       following string keys defining the parameters of the run. See CPM class. 
200 |       Default values are provided for some keys.
201 |         k
202 |         outer_label (1)
203 |         iterations
204 |         C (1)
205 |         entropy (0) 
206 |         cost_ratio (1)
207 |         reshuffle: (True)
208 | 
209 |   Outputs:
210 |     S: len(parameters) x testset.getCounts() float array of scores
211 |     A: len(parameters) x testset.getCounts() int array of assignments
212 |   """
213 |   configs = []
214 |   for params in parameters:
215 |     configs.append(_CPMConfig(params.get('outer_label', 1), params['k'], 1.0/params.get('C', 1),
216 |                              params.get('entropy', 0), params.get('cost_ratio', 1), 
217 |                              params['iterations'], params.get('reshuffle', True)))
218 |     
219 |   S, A = _parallelEval(trainset, testset, configs, 
220 |                        int(len(parameters)*testset.getNInstances()),
221 |                        int(len(parameters)*testset.getNInstances()))
222 |   S.resize((len(parameters), testset.getNInstances()))
223 |   A.resize((len(parameters), testset.getNInstances()))
224 |   
225 |   return S, A
226 | 
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/src/dense_matrix.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // dense_matrix.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include <random>
 23 | #include <cmath>
 24 | #include <fstream>
 25 | 
 26 | #include "dense_matrix.h"
 27 | 
 28 | DenseMatrix::DenseMatrix(int dimensions, int classifiers) : dimensions(dimensions), classifiers(classifiers) {
 29 |     
 30 |     data = new float[((size_t) dimensions) * ((size_t) classifiers)]();
 31 |     
 32 |     scales = new double[classifiers];
 33 |     for (int k = 0; k < classifiers; ++k) {
 34 |         scales[k] = 1.0;
 35 |     }
 36 |     
 37 |     intercept = new double[classifiers]();
 38 | }
 39 | 
 40 | void DenseMatrix::clear() {
 41 |     for(size_t i = 0; i < ((size_t) dimensions) * ((size_t) classifiers); ++i) {
 42 |         data[i] = 0.0f;
 43 |     }
 44 |     
 45 |     for (int k = 0; k < classifiers; ++k) {
 46 |         scales[k] = 0.0;
 47 |         intercept[k] = 0;
 48 |     }
 49 | }
 50 | 
 51 | void DenseMatrix::inner(const SparseVector& s, double* res, const bool* fmask) const {
 52 |     for(int k = 0; k < classifiers; ++k) {
 53 |         res[k] = 0.0;
 54 |     }
 55 |     
 56 |     int i = 0;
 57 |     for(auto const& iv: s.data){
 58 |         if (iv.index >= dimensions) continue; // ignore extra dimensions
 59 |         if (fmask && fmask[i]) continue; // dropout feature
 60 |         
 61 |         size_t offset = ((size_t) iv.index) * ((size_t) classifiers);
 62 |         double value = (double) iv.value;
 63 |         
 64 |         for(size_t k = 0; k < (size_t) classifiers; ++k){
 65 |             res[k] += value * ((double) data[offset + k]);
 66 |         }
 67 |         ++i;
 68 |     }
 69 |     
 70 |     for (int k = 0; k < classifiers; ++k) {
 71 |         res[k] = res[k]*scales[k] + intercept[k];
 72 |     }
 73 | }
 74 | 
 75 | void DenseMatrix::rescale() {
 76 |     for (size_t i = 0; i < ((size_t) dimensions) * ((size_t) classifiers); ++i) {
 77 |         data[i] = (float) (((double) data[i]) +  scales[i%classifiers]);
 78 |     }
 79 |     
 80 |     for (int k = 0; k < classifiers; ++k) {
 81 |         scales[k] = 1.0;
 82 |     }
 83 | }
 84 | 
 85 | void DenseMatrix::mulInplace(const double * const a) {
 86 |     bool torescale = false;
 87 |     for (int k = 0; k < classifiers; ++k) {
 88 |         scales[k] *= a[k];
 89 |         intercept[k] *= a[k];
 90 |         if (scales[k] < min_scale) torescale = true;
 91 |     }
 92 |     
 93 |     if (torescale) rescale();
 94 | }
 95 | 
 96 | void DenseMatrix::mulInplace(double a) {
 97 |     bool torescale = false;
 98 |     for (int k = 0; k < classifiers; ++k) {
 99 |         scales[k] *= a;
100 |         intercept[k] *= a;
101 |         if (scales[k] < min_scale) torescale = true;
102 |     }
103 |     
104 |     if (torescale) rescale();
105 | }
106 | 
107 | void DenseMatrix::addInplace(const SparseVector& s, const double* const a, const bool* fmask) {
108 |     int i = 0;
109 |     for(auto const& iv: s.data) {
110 |         if(fmask && fmask[i]) continue;
111 |         
112 |         double value = iv.value;
113 |         size_t offset = ((size_t) iv.index) * ((size_t) classifiers);
114 |         
115 |         for(size_t k = 0; k < ((size_t) classifiers); ++k){
116 |             data[k + offset] = (float) (((double) data[k + offset]) + (value * a[k])/scales[k]);
117 |         }
118 |         ++i;
119 |     }
120 |     
121 |     for(int k = 0; k < classifiers; ++k) {
122 |         intercept[k] += bias * a[k];
123 |     }
124 | }
125 | 
126 | void DenseMatrix::addInplace(const SparseVector& s, double a, int k, const bool* fmask) {
127 |     int i = 0;
128 |     for(auto const& iv: s.data) {
129 |         if(fmask && fmask[i]) continue;
130 |         
131 |         double value = iv.value;
132 |         size_t index = ((size_t) iv.index) * ((size_t) classifiers) + ((size_t) k);
133 |         
134 |         data[index] = (float) (((double) data[index]) + (a * value)/scales[k]);
135 |         ++i;
136 |     }
137 |     
138 |     intercept[k] += bias * a;
139 | }
140 | 
141 | double DenseMatrix::l2norm() const {
142 |     double res = 0;
143 |     for(size_t i = 0; i< ((size_t) dimensions) * ((size_t) classifiers); ++i){
144 |         res += (((double) data[i]) * scales[i%classifiers]) * (((double) data[i]) * scales[i%classifiers]);
145 |     }
146 |     
147 |     return std::sqrt(res);
148 | }
149 | 
150 | void DenseMatrix::serialize(std::ofstream* outstream) const {
151 |     for(size_t i = 0; i < ((size_t) dimensions) * ((size_t) classifiers); ++i) {
152 |         int k = i%classifiers;
153 |         *outstream << scales[k] * data[i] << ' ';
154 |     }
155 |     
156 |     for(int i = 0; i < classifiers; ++i){
157 |         *outstream << intercept[i] << ' ';
158 |     }
159 |     
160 |     *outstream << '\n';
161 | }
162 | 
163 | void DenseMatrix::deserialize(std::ifstream* instream) {
164 |     for (size_t i = 0; i < ((size_t) dimensions) * ((size_t) classifiers); ++i){
165 |         *instream >> data[i];
166 |     }
167 |     
168 |     for (int i = 0; i < classifiers; ++i) {
169 |         *instream >> intercept[i];
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/dense_matrix.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // dense_matrix.h
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #ifndef __cpm__dense_matrix__
 23 | #define __cpm__dense_matrix__
 24 | 
 25 | #include <iostream>
 26 | #include <cstring>
 27 | #include <limits>
 28 | #include <cmath>
 29 | 
 30 | #include "sparse_vector.h"
 31 | 
 32 | class DenseMatrix {
 33 | public:
 34 |     DenseMatrix(int dimensions, int classifiers);
 35 |     
 36 |     DenseMatrix(const DenseMatrix& other) : dimensions(other.dimensions), classifiers(other.classifiers) {
 37 |         
 38 |         data = new float[dimensions * ((size_t) classifiers)];
 39 |         std::memcpy(data, other.data,
 40 |                     sizeof(float) * ((size_t) dimensions) * ((size_t) classifiers));
 41 |         
 42 |         scales = new double[classifiers];
 43 |         std::memcpy(scales, other.scales, sizeof(double) * classifiers);
 44 |         
 45 |         intercept = new double[classifiers];
 46 |         std::memcpy(intercept, other.intercept, sizeof(double) * classifiers);
 47 |     }
 48 |     
 49 |     DenseMatrix(DenseMatrix&& other) : dimensions(other.dimensions), classifiers(other.classifiers), data(other.data), scales(other.scales), intercept(other.intercept) {
 50 |         
 51 |         other.data = nullptr;
 52 |         other.scales = nullptr;
 53 |         other.intercept = nullptr;
 54 |         //other.norms2 = nullptr;
 55 |     }
 56 |     
 57 |     ~DenseMatrix() {delete[] data; delete[] scales; delete[] intercept;};
 58 |     
 59 |     // res will be zeroed-out
 60 |     // res must have 'classifiers' size
 61 |     void inner(const SparseVector& s, double* res, const bool* fmask=nullptr) const;
 62 |     
 63 |     double l2norm() const;
 64 |     
 65 |     // for all k, w_k += a_k * s
 66 |     // with optional support for dropout noise
 67 |     void addInplace(const SparseVector& s, const double * const a, const bool* fmask=nullptr);
 68 |     
 69 |     // w_k += a * s
 70 |     // with optional support for dropout noise
 71 |     void addInplace(const SparseVector& s, double a, int k, const bool* fmask=nullptr);
 72 |     
 73 |     // for all k, w_k *= a_k
 74 |     void mulInplace(const double * const a);
 75 |     
 76 |     // w *= a
 77 |     void mulInplace(double a);
 78 |     
 79 |     // zeros-out matrix
 80 |     void clear();
 81 |     
 82 |     void serialize(std::ofstream* outstream) const;
 83 |     void deserialize(std::ifstream* instream);
 84 |     
 85 |     const int dimensions;
 86 |     const int classifiers;
 87 |     
 88 |     const double bias = 1.0;
 89 |     
 90 | private:
 91 |     // unscaled data
 92 |     float* data;
 93 |     
 94 |     // data scales
 95 |     double* scales;
 96 |     
 97 |     // bias terms are scaled
 98 |     double* intercept;
 99 |     
100 |     void rescale();
101 |     const double min_scale = std::sqrt(std::numeric_limits<float>::min());
102 | };
103 | 
104 | 
105 | #endif /* defined(__cpm__dense_matrix__) */
106 | 


--------------------------------------------------------------------------------
/src/eval_utils.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // eval_utils.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include <limits>
 23 | #include <algorithm>
 24 | #include <cmath>
 25 | 
 26 | #include <stdexcept>
 27 | 
 28 | #include "eval_utils.h"
 29 | 
 30 | namespace evalutils {
 31 |     const float margin = 1.0f;
 32 |     
 33 |     double entropy(const int* assignments, size_t length, unsigned short k) {
 34 |         int* occ = new int[k]();
 35 |         
 36 |         for (size_t i = 0; i < length; ++i) {
 37 |             if ((assignments[i] < 0) || (assignments[i] >= k)) {
 38 |                 throw std::logic_error("Assignment int outside of [0;k-1].");
 39 |             }
 40 |             occ[assignments[i]]++;
 41 |         }
 42 |         
 43 |         double entropy = 0;
 44 |         
 45 |         for (unsigned short i = 0; i < k; ++i) {
 46 |             double p = ((double) occ[i])/length;
 47 |             if (p > 0) {
 48 |                 entropy -= p * std::log(p);
 49 |             }
 50 |         }
 51 |         
 52 |         entropy /= std::log(2.0);
 53 |         
 54 |         delete[] occ;
 55 |         return entropy;
 56 |     }
 57 |     
 58 |     std::unique_ptr<std::map<Metric, double>> measure(const StochasticDataAdaptor& testset, ConvexPolytopeMachine& model) {
 59 |         int outer_label = model.outer_label;
 60 |         int k = model.k;
 61 |         
 62 |         double cost_pos = 0;
 63 |         double cost_neg = 0;
 64 |         double cost_exclusion = 0;
 65 |         double entropy = 0;
 66 |         double l2 = (model.getW()).l2norm();
 67 |         
 68 |         int* occ = new int[k]();
 69 |         float* p = new float[k]();
 70 |         
 71 |         int n_neg = 0;
 72 |         int n_pos = 0;
 73 |         
 74 |         size_t fps = 0;
 75 |         size_t fns = 0;
 76 |         auto all_scores = new std::vector<std::pair<bool, float>>();
 77 |         
 78 |         for (size_t instance=0; instance<testset.getNInstances(); ++instance) {
 79 |             auto lic = testset.getInstance(instance);
 80 |             const SparseVector sv = std::get<1>(lic);
 81 |             
 82 |             auto score_index = model.predict(sv);
 83 |             float score = (float) score_index.first;
 84 |             int index = score_index.second;
 85 |             bool pred = score > 0.0f;
 86 |             
 87 |             const double* scores = model.getScores();
 88 |             
 89 |             if (std::get<0>(lic) == outer_label) { // positive sample
 90 |                 occ[index] += 1;
 91 |                 
 92 |                 if(score < margin) {
 93 |                     cost_pos += margin - score;
 94 |                 }
 95 |                 
 96 |                 for(int i = 0; i < k; i++) {
 97 |                     if (i != index) {
 98 |                         cost_exclusion += (scores[i] > 0.0) ? scores[i] : 0.0;
 99 |                     }
100 |                 }
101 |                 
102 |                 if(score < 0) {
103 |                     fns++;
104 |                     if (pred) {
105 |                         throw std::logic_error("Negative score but predicted positive (pos instance).");
106 |                     }
107 |                 } else {
108 |                     if (!pred) {
109 |                         throw std::logic_error("Positive score but predicted negative (pos instance).");
110 |                     }
111 |                 }
112 |                 
113 |                 all_scores->emplace_back(true, score);
114 |                 
115 |                 n_pos++;
116 |             } else { // negative sample
117 |                 for(int i = 0; i < k; i++) {
118 |                     cost_neg += (scores[i] > -margin) ? (margin + scores[i]) : 0.0;
119 |                 }
120 |                 
121 |                 if(score >= 0) {
122 |                     fps++;
123 |                     if(!pred) {
124 |                         throw std::logic_error("Positive score but predicted negative (neg instance).");
125 |                     }
126 |                 } else if (pred) {
127 |                     throw std::logic_error("Negative score but predicted positive (neg instance).");
128 |                 }
129 |                 
130 |                 all_scores->emplace_back(false, score);
131 |                 
132 |                 n_neg++;
133 |             }
134 |         }
135 |         
136 |         // compute entropy
137 |         for (int i = 0; i < k; i++) {
138 |             p[i] = ((float) occ[i]) / ((float) n_pos);
139 |             if(p[i] > 0.0) {
140 |                 entropy -= p[i] * std::log(p[i]);
141 |             }
142 |         }
143 |         entropy /= std::log(2.0f); // entropy in bits
144 |         
145 |         cost_exclusion /= n_pos;
146 |         
147 |         double misc_cost = (cost_neg + cost_pos)/(n_pos + n_neg);
148 |         
149 |         cost_pos /= n_pos;
150 |         cost_neg /= n_neg;
151 |         
152 |         double accuracy = 1.0 - ((double) (fps + fns)) / (n_pos + n_neg);
153 |         size_t tps = (size_t) n_pos - fns;
154 |         double tpr = ((double) tps) / n_pos;
155 |         double fpr = ((double) fps) / n_neg;
156 |         double precision = ((double) tps) / (tps + fps);
157 |         
158 |         // compute AUCs
159 |         std::sort(all_scores->begin(), all_scores->end(),
160 |                   [](const std::pair<bool, float>& lhs, const std::pair<bool, float>& rhs) {
161 |             return lhs.second > rhs.second;}); // sorted by decreasing scores
162 |         
163 |         size_t i = 0;
164 |         size_t fn = (size_t) n_pos;
165 |         size_t fp = 0;
166 |         size_t top_correct = 0;
167 |         
168 |         double tprs = ((float) fp) / n_neg;
169 |         double fprs = 1.0f - ((float) fn) / ((float) n_pos);
170 |         double last_tprs, last_fprs;
171 |         
172 |         double area001 = 0.0;
173 |         double area01 = 0.0;
174 |         double area1 = 0.0;
175 |         while (i < all_scores->size()) {
176 |             last_tprs = tprs;
177 |             last_fprs = fprs;
178 |             
179 |             if ((*all_scores)[i].first) {
180 |                 fn -= 1;
181 |                 if (fp == 0) top_correct += 1;
182 |             } else {
183 |                 fp += 1;
184 |             }
185 |             i += 1;
186 |             
187 |             while (i < all_scores->size() &&
188 |                    (*all_scores)[i-1].second == (*all_scores)[i].second) {
189 |                 if ((*all_scores)[i].first) {
190 |                     fn -= 1;
191 |                     if (fp == 0) top_correct += 1;
192 |                 } else {
193 |                     fp += 1;
194 |                 }
195 |                 i += 1;
196 |             }
197 |             
198 |             fprs = ((double) fp)/n_neg;
199 |             tprs = 1.0 - ((double) fn)/n_pos;
200 |             
201 |             double darea1 = (fprs - last_fprs) * (last_tprs + tprs)/2.0;
202 |             
203 |             area1 += darea1;
204 |             
205 |             if (last_fprs < .1) {
206 |                 double darea01 = 0.0;
207 |                 if (fprs <= .1) {
208 |                     darea01 = (fprs - last_fprs) * (last_tprs + tprs)/2.0;
209 |                 } else {
210 |                     if (fprs > last_fprs) {
211 |                         double tprs01 = last_tprs + (.1 - last_fprs)/(fprs - last_fprs) * (tprs - last_tprs);
212 |                         darea01 = (.1 - last_fprs) * (last_tprs + tprs01)/2.0;
213 |                     }
214 |                 }
215 |                 
216 |                 area01 += darea01;
217 |                 
218 |                 if (last_fprs < .01) {
219 |                     double darea001 = 0.0;
220 |                     if (fprs <= .01) {
221 |                         darea001 = (fprs - last_fprs) * (last_tprs + tprs)/2.0;
222 |                     } else {
223 |                         if (fprs > last_fprs) {
224 |                             double tprs001 = last_tprs + (.01 - last_fprs)/(fprs - last_fprs) * (tprs - last_tprs);
225 |                             darea001 = (.01 - last_fprs) * (last_tprs + tprs001)/2.0;
226 |                         }
227 |                     }
228 |                     
229 |                     area001 += darea001;
230 |                 }
231 |             }
232 |         }
233 |         
234 |         area01 *= 10;
235 |         area001 *= 100;
236 |         double absolute_top = ((double) top_correct) / n_pos;
237 |         
238 |         delete[] occ;
239 |         delete all_scores;
240 |         delete[] p;
241 |         
242 |         auto res = std::unique_ptr<std::map<Metric, double>>(new std::map<Metric, double>());
243 |         (*res)[Metric::Cost] = misc_cost;
244 |         (*res)[Metric::CostPositives] = cost_pos;
245 |         (*res)[Metric::CostNegatives] = cost_neg;
246 |         (*res)[Metric::L2] = l2;
247 |         
248 |         (*res)[Metric::Redundancy] = cost_exclusion;
249 |         (*res)[Metric::Entropy] = entropy;
250 |         
251 |         (*res)[Metric::Accuracy] = accuracy;
252 |         (*res)[Metric::TruePositiveRate] = tpr;
253 |         (*res)[Metric::FalsePositiveRate] = fpr;
254 |         (*res)[Metric::Precision] = precision;
255 |         
256 |         (*res)[Metric::AUC] = area1;
257 |         (*res)[Metric::AUC01] = area01;
258 |         (*res)[Metric::AUC001] = area001;
259 |         (*res)[Metric::AbsoluteTop] = absolute_top;
260 |         return res;
261 |     }
262 | }
263 | 


--------------------------------------------------------------------------------
/src/eval_utils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright 2014 Alex Kantchelian
 3 |  
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 |  
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 |  
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 | */
16 | 
17 | // eval_utils.h
18 | 
19 | // Author: Alex Kantchelian, 2014
20 | // akant@cs.berkeley.edu
21 | 
22 | #ifndef __cpm__eval_utils__
23 | #define __cpm__eval_utils__
24 | 
25 | #include <iostream>
26 | #include <map>
27 | #include <memory>
28 | 
29 | #include "sparse_vector.h"
30 | #include "convex_polytope_machine.h"
31 | #include "stochastic_data_adaptor.h"
32 | 
33 | namespace evalutils {
34 | 
35 | enum Metric { Accuracy, AbsoluteTop, AUC, AUC01, AUC001,
36 |             Cost, CostPositives, CostNegatives, Redundancy, Entropy, L2,
37 |     TruePositiveRate, FalsePositiveRate, Precision};
38 | 
39 | double entropy(const int* assignments, size_t length, unsigned short k);
40 | 
41 | std::unique_ptr<std::map<Metric, double>> measure(const StochasticDataAdaptor& testset, ConvexPolytopeMachine& model);
42 | 
43 | }
44 | 
45 | #endif /* defined(__cpm__eval_utils__) */
46 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // main.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include <iostream>
 23 | #include <unistd.h>
 24 | #include <stdlib.h>
 25 | #include <cmath>
 26 | #include <chrono>
 27 | 
 28 | #include <fstream>
 29 | #include <sstream>
 30 | #include <string>
 31 | 
 32 | #include "time.h"
 33 | 
 34 | #include "option_parser.h"
 35 | #include "sparse_vector.h"
 36 | #include "stochastic_data_adaptor.h"
 37 | #include "convex_polytope_machine.h"
 38 | #include "eval_utils.h"
 39 | #include "cpm.h"
 40 | 
 41 | int main(int argc, char* const argv[]) {
 42 |     OptionParser op("Perform CPM training and/or inference.");
 43 |     
 44 |     op.addOption("be quiet.", 'q', "quiet", true, false);
 45 |     
 46 |     op.addOption("number of classifiers.", 'k', "classifiers", true, (int) 1, nullptr);
 47 |     op.addOption("C regularization factor.", 'C', "C", true, 1.0f, nullptr);
 48 |     op.addOption("cost ratio of negatives vs positives.", '\0', "cost_ratio", true, 1.0f, nullptr);
 49 |     op.addOption("minimal (exp of) entropy to maintain in heuristic max. Value between 1 and k.", '\0',
 50 |                  "entropy", true, 1.0f, nullptr);
 51 |     
 52 |     size_t seed = (size_t) std::chrono::system_clock::now().time_since_epoch().count();
 53 |     op.addOption("random seed (for reproducibility).", '\0', "seed", false, seed, nullptr);
 54 |     
 55 |     // op.addOption("compute aggregated metrics instead of raw scores.", '\0', "agg_scores", true, false);
 56 |     
 57 |     op.addOption("outer class label (the class that will be decomposed).", '\0', "outer_label", true, (int) 1, nullptr);
 58 | 
 59 |     op.addOption("shuffle training set between epochs.", '\0', "reshuffle", true, false);
 60 |     
 61 |     op.addOption("number of iterations.", 'i',
 62 |                  "iterations", true, (int) 50000000, nullptr);
 63 |     
 64 |     op.addOption("train data file.", 't', "train", false, "", nullptr);
 65 |     op.addOption("test data file.", 'c', "test", false, "", nullptr);
 66 |     op.addOption("model in file. Will be ignored if in training mode.", 'm', "model_in", false, "", nullptr);
 67 |     op.addOption("model out file.", 'o', "model_out", false, "", nullptr);
 68 |     op.addOption("scores file.", 's', "scores", false, "", nullptr);
 69 |     
 70 |     op.parseCmdString(argc, argv);
 71 |     
 72 |     const bool verbose = !op.getBool("quiet");
 73 |     const int outer_label = op.getInt("outer_label");
 74 |     const char* trainfile = op.getString("train");
 75 |     const char* model_in = op.getString("model_in");
 76 |     const char* testfile = op.getString("test");
 77 |     const char* scoresfile = op.getString("scores");
 78 |     const int k = op.getInt("classifiers");
 79 |     const float C = op.getFloat("C");
 80 |     const int iterations = op.getInt("iterations");
 81 |     const float cost_ratio = op.getFloat("cost_ratio");
 82 |     const float entropy = op.getFloat("entropy");
 83 |     bool reshuffle = op.getBool("reshuffle");
 84 |     
 85 |     seed = op.getSizet("seed");
 86 |     if (sizeof(seed) == 8) {
 87 |         seed = seed ^ (seed >> 32);
 88 |     }
 89 |     
 90 |     CPM* model = nullptr;
 91 |     
 92 |     if (std::strlen(trainfile) > 0) {
 93 |         clock_t start_time = clock();
 94 |         
 95 |         StochasticDataAdaptor trainset(trainfile);
 96 |         
 97 |         clock_t end_time = clock();
 98 |         std::cout << "Loaded data in "
 99 |         << ((float) (end_time-start_time))/CLOCKS_PER_SEC << "s.\n";
100 |         start_time = end_time;
101 |         
102 |         // train cpm
103 |         model = new CPM(k, outer_label, 1.0f/C, entropy, cost_ratio, (unsigned short) seed);
104 |         model->fit(trainset, iterations, reshuffle, verbose);
105 |         
106 |         end_time = clock();
107 |         std::cout << "\nFinished " << iterations << " iterations in " << ((float) (end_time-start_time))/CLOCKS_PER_SEC << "s.\n";
108 |         
109 |         const char* model_out = op.getString("model_out");
110 |         
111 |         if(verbose && (std::strlen(model_out) > 0)) std::cout << "Writing model to " << model_out << '\n';
112 |         
113 |         if (std::strlen(model_out) > 0) {
114 |             model->serializeModel(model_out);
115 |         }
116 |         
117 |     } else if (std::strlen(model_in) > 0) {
118 |         if(verbose) std::cout << "Reading model from " << model_in << '\n';
119 |         model = CPM::deserializeModel(model_in);
120 |     }
121 |     
122 |     if (model && std::strlen(testfile) > 0) {
123 |         if (std::strlen(scoresfile) == 0) {
124 |             std::cerr << "Missing output scores file.\n";
125 |             exit(1);
126 |         }
127 |         
128 |         StochasticDataAdaptor testset(testfile);
129 |         
130 |         std::ofstream rfile(scoresfile);
131 |         
132 |         for(size_t i = 0; i < testset.getNInstances(); ++i) {
133 |             const std::tuple<int, SparseVector, size_t>& lic = testset.getInstance(i);
134 |             
135 |             auto score_sub = model->predict(std::get<1>(lic));
136 |             
137 |             // format: raw score (margin), assigned classifier, ground truth (model_outer_label == instance_label)
138 |             rfile << score_sub.first << '\t' << score_sub.second << '\t' << (std::get<0>(lic) == model->outer_label) << '\n';
139 |         }
140 |     }
141 |     
142 |     delete model;
143 | }
144 | 


--------------------------------------------------------------------------------
/src/option_parser.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // option_parser.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include <typeinfo>
 23 | #include <cstring>
 24 | 
 25 | #include "option_parser.h"
 26 | 
 27 | template <class T> void Option<T>::show(){
 28 |     if (long_name) {
 29 |         std::cout << "--" << long_name << " ";
 30 |     }
 31 |     if (short_name != '\0') {
 32 |         std::cout << '-' << short_name << " ";
 33 |     }
 34 |     
 35 |     if (typeid(value) == typeid(int*)) {
 36 |         std::cout << "<int> ";
 37 |     }
 38 |     
 39 |     if (typeid(value) == typeid(float*)) {
 40 |         std::cout << "<float> ";
 41 |     }
 42 |     
 43 |     if (typeid(value) == typeid(size_t*)) {
 44 |         std::cout << "<unsigned long> ";
 45 |     }
 46 |     
 47 |     if (typeid(value) == typeid(const char**)) {
 48 |         std::cout << "<string> ";
 49 |     }
 50 |     
 51 |     std::cout << "  " << desc << '\n';
 52 |     
 53 |     if (allowed) {
 54 |         std::cout << "    Allowed: {";
 55 |         
 56 |         if (typeid(value) == typeid(int*)) {
 57 |             for(int val: *(std::vector<int>*) allowed) {
 58 |                 std::cout << val<< ", ";
 59 |             }
 60 |         }
 61 |         
 62 |         if (typeid(value) == typeid(float*)) {
 63 |             for(float val: * (std::vector<float>*) allowed) {
 64 |                 std::cout << val<< ", ";
 65 |             }
 66 |         }
 67 |         
 68 |         if (typeid(value) == typeid(size_t*)) {
 69 |             for(size_t val: * (std::vector<size_t>*) allowed) {
 70 |                 std::cout << val<< ", ";
 71 |             }
 72 |         }
 73 |         
 74 |         if (typeid(value) == typeid(const char**)) {
 75 |             for(const char* val: * (std::vector<const char*>*) allowed) {
 76 |                 std::cout << val<< ", ";
 77 |             }
 78 |         }
 79 |         
 80 |         std::cout << "}\n";
 81 |     }
 82 |     
 83 |     if (provide_default) {
 84 |         if (typeid(value) == typeid(bool*)) {
 85 |             std::cout << "    Default: " << (* (bool*) value ? "True" : "False") << '\n';
 86 |             return;
 87 |         }
 88 |         
 89 |         if (typeid(value) == typeid(int*)) {
 90 |             std::cout << "    Default: " << * (int*)value << '\n';
 91 |             return;
 92 |         }
 93 |         
 94 |         if (typeid(value) == typeid(float*)) {
 95 |             std::cout << "    Default: " << * (float*)value << '\n';
 96 |             return;
 97 |         }
 98 |         
 99 |         if (typeid(value) == typeid(size_t*)) {
100 |             std::cout << "    Default: " << * (size_t*)value << '\n';
101 |             return;
102 |         }
103 |         
104 |         if (typeid(value) == typeid(const char**)) {
105 |             std::cout << "    Default: " << * (const char**)value << '\n';
106 |             return;
107 |         }
108 |     }
109 | }
110 | 
111 | template <> void Option<bool>::setValue(bool val) {
112 |     *value = val;
113 | }
114 | 
115 | template <> void Option<int>::setValue(int val) {
116 |     if (allowed) {
117 |         bool allow = false;
118 |         
119 |         for (auto allo : *allowed) {
120 |             if (allo == val) {
121 |                 allow = true;
122 |                 break;
123 |             }
124 |         }
125 |         
126 |         if (!allow) {
127 |             std::cerr << "Value " << val << " not allowed for option " << long_name << '\n';
128 |             exit(1);
129 |         }
130 |     }
131 |     
132 |     *value = val;
133 | }
134 | 
135 | template <> void Option<float>::setValue(float val) {
136 |     if (allowed) {
137 |         bool allow = false;
138 |         
139 |         for (auto allo : *allowed) {
140 |             if (allo == val) {
141 |                 allow = true;
142 |                 break;
143 |             }
144 |         }
145 |         
146 |         if (!allow) {
147 |             std::cerr << "Value " << val << " not allowed for option " << long_name << '\n';
148 |             exit(1);
149 |         }
150 |     }
151 |     
152 |     *value = val;
153 | }
154 | 
155 | template <> void Option<size_t>::setValue(size_t val) {
156 |     if (allowed) {
157 |         bool allow = false;
158 |         
159 |         for (auto allo : *allowed) {
160 |             if (allo == val) {
161 |                 allow = true;
162 |                 break;
163 |             }
164 |         }
165 |         
166 |         if (!allow) {
167 |             std::cerr << "Value " << val << " not allowed for option " << long_name << '\n';
168 |             exit(1);
169 |         }
170 |     }
171 |     
172 |     *value = val;
173 | }
174 | 
175 | template <> void Option<const char*>::setValue(const char* val) {
176 |     if (allowed) {
177 |         bool allow = false;
178 |         
179 |         for (auto allo : *allowed) {
180 |             if (0 == std::strcmp(allo, val)) {
181 |                 allow = true;
182 |                 break;
183 |             }
184 |         }
185 |         
186 |         if (!allow) {
187 |             std::cerr << "Value " << val << " not allowed for option " << long_name << '\n';
188 |             exit(1);
189 |         }
190 |     }
191 |     
192 |     *value = val;
193 | }
194 | 
195 | OptionParser::OptionParser(const char* desc) : desc(desc) {}
196 | 
197 | void OptionParser::addOption(const char *desc, const char short_name, const char *long_name, bool provide_default, bool default_value) {
198 |     bool_options.emplace_back(desc, short_name, long_name, provide_default, default_value, nullptr);
199 | }
200 | 
201 | void OptionParser::addOption(const char *desc, const char short_name, const char *long_name, bool provide_default, int default_value, const std::vector<int>* allowed) {
202 |     int_options.emplace_back(desc, short_name, long_name, provide_default, default_value, allowed);
203 | }
204 | 
205 | void OptionParser::addOption(const char *desc, const char short_name, const char *long_name, bool provide_default, float default_value, const std::vector<float>* allowed) {
206 |     float_options.emplace_back(desc, short_name, long_name, provide_default, default_value, allowed);
207 | }
208 | 
209 | void OptionParser::addOption(const char *desc, const char short_name, const char *long_name, bool provide_default, size_t default_value, const std::vector<size_t>* allowed) {
210 |     sizet_options.emplace_back(desc, short_name, long_name, provide_default, default_value, allowed);
211 | }
212 | 
213 | void OptionParser::addOption(const char *desc, const char short_name, const char *long_name, bool provide_default, const char* default_value, const std::vector<const char*>* allowed) {
214 |     string_options.emplace_back(desc, short_name, long_name, provide_default, default_value, allowed);
215 | }
216 | 
217 | void OptionParser::showHelp() {
218 |     std::cout << desc << "\n\n";
219 |     
220 |     for (auto& opt: bool_options) {
221 |         opt.show();
222 |     }
223 |     
224 |     for (auto& opt: int_options) {
225 |         opt.show();
226 |     }
227 |     
228 |     for (auto& opt: float_options) {
229 |         opt.show();
230 |     }
231 |     
232 |     for (auto& opt: sizet_options) {
233 |         opt.show();
234 |     }
235 |     
236 |     for (auto& opt: string_options) {
237 |         opt.show();
238 |     }
239 | }
240 | 
241 | void OptionParser::terminate(const char* msg) {
242 |     std::cerr << msg << '\n';
243 |     exit(1);
244 | }
245 | 
246 | void OptionParser::parseCmdString(int argc, char* const argv[]) {
247 |     for (int i = 1; i < argc; ) {
248 |         if (argv[i][0] == '-') {
249 |             if (0 == std::strcmp("--help", argv[i]) || 0 == std::strcmp("-h", argv[i])) {
250 |                 showHelp();
251 |                 exit(0);
252 |             }
253 |             
254 |             i = searchAndFill(argc, argv, i);
255 |         } else {
256 |             terminate("Positional arguments not allowed");
257 |         }
258 |     }
259 | }
260 | 
261 | int OptionParser::searchAndFill(int argc, char * const argv[], int pos){
262 |     //check if switch is well formed
263 |     size_t size = std::strlen(argv[pos]);
264 |     if (size < 2) {
265 |         std::cerr << "Invalid switch " << argv[pos] << '\n';
266 |         exit(1);
267 |     }
268 |     
269 |     if (size > 2 && (argv[pos][1] != '-')) {
270 |         std::cerr << "Invalid switch " << argv[pos] << '\n';
271 |         exit(1);
272 |     }
273 |     
274 |     for (auto& opt: bool_options) {
275 |         if (argv[pos][1] == '-' ? 0 == std::strcmp(opt.long_name, (argv[pos] + 2)) : argv[pos][1] == opt.short_name) {
276 |             opt.setValue(true);
277 |             return pos + 1;
278 |         }
279 |     }
280 |     
281 |     for (auto& opt: int_options) {
282 |         if (argv[pos][1] == '-' ? 0 == std::strcmp(opt.long_name, (argv[pos] + 2)) : argv[pos][1] == opt.short_name) {
283 |             pos++;
284 |             if (pos >= argc) {
285 |                 std::cerr << "Option --" << opt.long_name << " requires an <int> argument.\n";
286 |                 exit(1);
287 |             }
288 |             opt.setValue((int) std::strtol(argv[pos], nullptr, 10));
289 |             return pos + 1;
290 |         }
291 |     }
292 |     
293 |     for (auto& opt: float_options) {
294 |         if (argv[pos][1] == '-' ? 0 == std::strcmp(opt.long_name, (argv[pos] + 2)) : argv[pos][1] == opt.short_name) {
295 |             pos++;
296 |             if (pos >= argc) {
297 |                 std::cerr << "Option --" << opt.long_name << " requires a <float> argument.\n";
298 |                 exit(1);
299 |             }
300 |             opt.setValue(std::strtof(argv[pos], nullptr));
301 |             return pos + 1;
302 |         }
303 |     }
304 |     
305 |     for (auto& opt: sizet_options) {
306 |         if (argv[pos][1] == '-' ? 0 == std::strcmp(opt.long_name, (argv[pos] + 2)) : argv[pos][1] == opt.short_name) {
307 |             pos++;
308 |             if (pos >= argc) {
309 |                 std::cerr << "Option --" << opt.long_name << " requires an <unsigned long> argument.\n";
310 |                 exit(1);
311 |             }
312 |             opt.setValue(std::strtoul(argv[pos], nullptr, 10));
313 |             return pos + 1;
314 |         }
315 |     }
316 |     
317 |     for (auto& opt: string_options) {
318 |         if (argv[pos][1] == '-' ? 0 == std::strcmp(opt.long_name, (argv[pos] + 2)) : argv[pos][1] == opt.short_name) {
319 |             pos++;
320 |             if (pos >= argc) {
321 |                 std::cerr << "Option --" << opt.long_name << " requires a <string> argument.\n";
322 |                 exit(1);
323 |             }
324 |             opt.setValue(argv[pos]);
325 |             return pos + 1;
326 |         }
327 |     }
328 |     
329 |     std::cerr << "Invalid switch " << argv[pos] << '\n';
330 |     exit(1);
331 | }
332 | 
333 | bool OptionParser::getBool(const char* long_name) {
334 |     for (auto& opt: bool_options) {
335 |         if (0 == std::strcmp(opt.long_name, long_name)) {
336 |             return *opt.value;
337 |         }
338 |     }
339 |     
340 |     std::cerr << "Option " << long_name << " is not defined for type <bool>.\n";
341 |     exit(1);
342 | }
343 | 
344 | int OptionParser::getInt(const char* long_name) {
345 |     for (auto& opt: int_options) {
346 |         if (0 == std::strcmp(opt.long_name, long_name)) {
347 |             return *opt.value;
348 |         }
349 |     }
350 |     
351 |     std::cerr << "Option " << long_name << " is not defined for type <int>.\n";
352 |     exit(1);
353 | }
354 | 
355 | size_t OptionParser::getSizet(const char* long_name) {
356 |     for (auto& opt: sizet_options) {
357 |         if (0 == std::strcmp(opt.long_name, long_name)) {
358 |             return *opt.value;
359 |         }
360 |     }
361 |     
362 |     std::cerr << "Option " << long_name << " is not defined for type <size_t>.\n";
363 |     exit(1);
364 | }
365 | 
366 | float OptionParser::getFloat(const char* long_name) {
367 |     for (auto& opt: float_options) {
368 |         if (0 == std::strcmp(opt.long_name, long_name)) {
369 |             return *opt.value;
370 |         }
371 |     }
372 |     
373 |     std::cerr << "Option " << long_name << " is not defined for type <float>.\n";
374 |     exit(1);
375 | }
376 | 
377 | const char* OptionParser::getString(const char* long_name) {
378 |     for (auto& opt: string_options) {
379 |         if (0 == std::strcmp(opt.long_name, long_name)) {
380 |             return *opt.value;
381 |         }
382 |     }
383 |     
384 |     std::cerr << "Option " << long_name << " is not defined for type <const char*>.\n";
385 |     exit(1);
386 | }
387 | 


--------------------------------------------------------------------------------
/src/option_parser.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // option_parser.h
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #ifndef __cpm__option_parser__
 23 | #define __cpm__option_parser__
 24 | 
 25 | #include <iostream>
 26 | #include <map>
 27 | #include <vector>
 28 | #include <string>
 29 | 
 30 | // Hacky command line option parser so we don't have to import Boost's one.
 31 | 
 32 | template <class T> class Option {
 33 | public:
 34 |     Option(const char* desc, const char short_name,
 35 |            const char* long_name, bool provide_default, T value, const std::vector<T>* allowed) : desc(desc), short_name(short_name),
 36 |             long_name(long_name), allowed(allowed), provide_default(provide_default){
 37 |         
 38 |         this->value = new T(value);
 39 |     }
 40 |     
 41 |     Option(Option&& other) : desc(other.desc), short_name(other.short_name),
 42 |     long_name(other.long_name), allowed(other.allowed), provide_default(other.provide_default), value(other.value){
 43 |         other.value = nullptr;
 44 |         other.allowed = nullptr;
 45 |     }
 46 |     
 47 |     ~Option() {
 48 |         delete value;
 49 |     }
 50 |     
 51 |     void parseValue(const char* string);
 52 |     void show();
 53 |     void setValue(T val);
 54 |     
 55 |     const char* desc;
 56 |     const char short_name;
 57 |     const char* long_name;
 58 |     const std::vector<T>* allowed;
 59 |     const bool provide_default;
 60 |     
 61 |     T* value;
 62 | };
 63 | 
 64 | class OptionParser {
 65 | public:
 66 |     OptionParser(const char* desc);
 67 |     
 68 |     void addOption(const char* desc, const char short_name,
 69 |                    const char* long_name, bool provide_default, bool default_value);
 70 |     void addOption(const char* desc, const char short_name,
 71 |                    const char* long_name, bool provide_default, int default_value, const std::vector<int>* allowed);
 72 |     void addOption(const char* desc, const char short_name,
 73 |                    const char* long_name, bool provide_default, float default_value, const std::vector<float>* allowed);
 74 |     void addOption(const char* desc, const char short_name,
 75 |                    const char* long_name, bool provide_default, size_t default_value, const std::vector<size_t>* allowed);
 76 |     void addOption(const char* desc, const char short_name,
 77 |                    const char* long_name, bool provide_default, const char* default_value, const std::vector<const char*>* allowed);
 78 |     
 79 |     void parseCmdString(int argc, char* const argv[]);
 80 |     
 81 |     bool getBool(const char* long_name);
 82 |     int getInt(const char* long_name);
 83 |     size_t getSizet(const char* long_name);
 84 |     float getFloat(const char* long_name);
 85 |     const char* getString(const char* long_name);
 86 |     
 87 |     void showHelp();
 88 | 
 89 | private:
 90 |     void terminate(const char* msg);
 91 |     
 92 |     int searchAndFill(int argc, char* const argv[], int pos);
 93 |     
 94 |     const char* desc;
 95 |     std::vector<Option<bool>> bool_options;
 96 |     std::vector<Option<int>> int_options;
 97 |     std::vector<Option<size_t>> sizet_options;
 98 |     std::vector<Option<const char*>> string_options;
 99 |     std::vector<Option<float>> float_options;
100 | };
101 | 
102 | #endif /* defined(__cpm__option_parser__) */
103 | 


--------------------------------------------------------------------------------
/src/parallel_eval.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright 2014 Alex Kantchelian
 3 |  
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 |  
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 |  
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 | */
16 | 
17 | // parallel_eval.cpp
18 | 
19 | // Author: Alex Kantchelian, 2014
20 | // akant@cs.berkeley.edu
21 | 
22 | #include <thread>
23 | 
24 | #include "parallel_eval.h"
25 | #include "cpm.h"
26 | 
27 | void ParallelEval::evalf(const StochasticDataAdaptor& trainset, const StochasticDataAdaptor& testset,
28 |                          const CPMConfig config, float* scores, int* assignments) {
29 |     
30 |     // generate random seed
31 |     size_t seed = std::chrono::system_clock::now().time_since_epoch().count();
32 |     seed = seed ^ std::hash<std::thread::id>()(std::this_thread::get_id());
33 |     if (sizeof(seed) == 8) {
34 |         seed = seed ^ (seed >> 32);
35 |     }
36 |     
37 |     CPM model(config.k, config.outer_label, config.lambda, config.entropy, config.cost_ratio, (unsigned int) seed);
38 |     model.fit(trainset, config.iterations, config.reshuffle, false);
39 |     model.predict(testset, scores, assignments);
40 | }
41 | 
42 | void ParallelEval::parallelEval(const StochasticDataAdaptor& trainset, const StochasticDataAdaptor& testset,
43 |                                 const std::vector<CPMConfig> configs, float* out_scores, int* out_assignments) {
44 |     std::thread* threads = new std::thread[configs.size()];
45 |     
46 |     for(size_t i=0; i<configs.size(); ++i) {
47 |         threads[i] = std::thread(evalf, std::ref(trainset), std::ref(testset), configs[i],
48 |                                  out_scores + i*testset.getNInstances(), out_assignments + i*testset.getNInstances());
49 |     }
50 |     
51 |     for (size_t i=0; i<configs.size(); ++i) {
52 |         threads[i].join();
53 |     }
54 |     
55 |     delete[] threads;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/parallel_eval.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright 2014 Alex Kantchelian
 3 |  
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 |  
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 |  
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 | */
16 | 
17 | // parallel_eval.h
18 | 
19 | // Author: Alex Kantchelian, 2014
20 | // akant@cs.berkeley.edu
21 | 
22 | #ifndef __cpm__parallel_eval__
23 | #define __cpm__parallel_eval__
24 | 
25 | #include <vector>
26 | 
27 | #include "stochastic_data_adaptor.h"
28 | 
29 | struct CPMConfig {
30 |     
31 |     CPMConfig() {}; // to make SWIG templating happy
32 |     
33 |     CPMConfig(int outer_label, int k, float lambda, float entropy, float cost_ratio,
34 |               int iterations, bool reshuffle) : outer_label(outer_label),
35 |     k(k), lambda(lambda), entropy(entropy), cost_ratio(cost_ratio), iterations(iterations),
36 |     reshuffle(reshuffle) {}
37 |     
38 |     int outer_label;
39 |     int k;
40 |     float lambda;
41 |     float entropy;
42 |     float cost_ratio;
43 |     int iterations;
44 |     bool reshuffle;
45 | };
46 | 
47 | namespace ParallelEval {
48 |     void evalf(const StochasticDataAdaptor& trainset, const StochasticDataAdaptor& testset,
49 |                const CPMConfig config, float* scores, int* assignments);
50 |     
51 |     void parallelEval(const StochasticDataAdaptor& trainset, const StochasticDataAdaptor& testset,
52 |                       const std::vector<CPMConfig> configs, float* out_scores, int* out_assignments) ;
53 | }
54 | 
55 | #endif /* defined(__cpm__parallel_eval__) */
56 | 


--------------------------------------------------------------------------------
/src/python.i:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // python.i
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | 
 23 | %module(docstring="This module provides a wrapper for the Convex Polytope Machine C++ code.") cpm
 24 | 
 25 | %{
 26 | #define SWIG_FILE_WITH_INIT
 27 | 
 28 | #include <vector>
 29 | #include <map>
 30 | #include "stochastic_data_adaptor.h"
 31 | #include "cpm.h"
 32 | #include "parallel_eval.h"
 33 | %}
 34 | 
 35 | %include "std_vector.i"
 36 | %include "std_map.i"
 37 | %include "numpy.i"
 38 | %init %{
 39 | import_array();
 40 | %}
 41 | 
 42 | %pythonbegin %{
 43 | # Copyright 2014 Alex Kantchelian
 44 | # 
 45 | # Licensed under the Apache License, Version 2.0 (the "License");
 46 | # you may not use this file except in compliance with the License.
 47 | # You may obtain a copy of the License at
 48 | # 
 49 | # http://www.apache.org/licenses/LICENSE-2.0
 50 | # 
 51 | # Unless required by applicable law or agreed to in writing, software
 52 | # distributed under the License is distributed on an "AS IS" BASIS,
 53 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 54 | # See the License for the specific language governing permissions and
 55 | # limitations under the License.
 56 | #
 57 | # Author: Alex Kantchelian, 2014
 58 | # akant@cs.berkeley.edu
 59 | %}
 60 | 
 61 | %pythonbegin %{
 62 | import random
 63 | import numpy as np
 64 | from scipy import sparse
 65 | %}
 66 | 
 67 | %template() std::map<int, size_t>;
 68 | 
 69 | namespace std {
 70 |   %template(VectorOfStruct) std::vector<CPMConfig>;
 71 | }
 72 | 
 73 | /* ########### Numpy typemaps ############ */
 74 | %apply (float* IN_ARRAY2, int DIM1, int DIM2) {(float* data, int dim1, int dim2), 
 75 |                                                (float* lambda_entropy_cost_ratio, int dif1, int dif2)};
 76 | 
 77 | %apply (int* IN_ARRAY2, int DIM1, int DIM2) {(int* k_outer_labels_iterations, int dii1, int dii2)};
 78 | 
 79 | %apply (int* IN_ARRAY1, int DIM1) {(int* labels, int dim_labels), 
 80 |                                    (int* indices, int dim2), 
 81 |                                    (int* indptr, int dim3)};
 82 | 
 83 | %apply (float* IN_ARRAY1, int DIM1) {(float* sparse_data, int dim1)};
 84 | 
 85 | %apply (float* ARGOUT_ARRAY1, int DIM1) {(float* scores, int scores_dim), 
 86 |                                          (float* out_scores, int dof)};
 87 | %apply (int* ARGOUT_ARRAY1, int DIM1) {(int* assignments, int assignments_dim), 
 88 |                                        (int* out_assignments, int doi),
 89 |                                        (int* out_labels, int dol)};
 90 | 
 91 | /* ###### basic exception handling ##### */
 92 | %exception {
 93 |   try {
 94 |     $action
 95 |   } catch (std::exception &e) {
 96 |     PyErr_SetString(PyExc_RuntimeError, const_cast<char*>(e.what()));
 97 |   }
 98 | }
 99 | 
100 | /* ########################################### */
101 | 
102 | %rename(_Dataset) StochasticDataAdaptor;
103 | 
104 | // Directly wrapped calls
105 | class StochasticDataAdaptor {
106 | public:
107 |   StochasticDataAdaptor(const char* fname);
108 | 
109 |   ~StochasticDataAdaptor();
110 |   
111 |   size_t getNInstances() const;
112 |   
113 |   size_t getDimensions() const;
114 | 
115 |   const std::map<int, size_t> getCountsPerClass() const;
116 | };
117 | 
118 | %extend StochasticDataAdaptor {
119 |   StochasticDataAdaptor(float* data, int dim1, int dim2, int* labels, int dim_labels) {
120 |     if (dim1 != dim_labels) {
121 |       PyErr_Format(PyExc_ValueError, "Dimensions mismatch.");
122 |       return nullptr;
123 |     }
124 | 
125 |     return new StochasticDataAdaptor(data, labels, dim1, dim2);
126 |   }
127 | 
128 |   StochasticDataAdaptor(float* sparse_data, int dim1, int* indices, int dim2, int* indptr, int dim3, int* labels, int dim_labels) {
129 |   if (dim1 != dim2) {
130 |     PyErr_Format(PyExc_ValueError, "Dimension mismatch for data and indices arrays.");
131 |     return nullptr;
132 |   }
133 | 
134 |   if (dim3 != dim_labels+1) {
135 |     PyErr_Format(PyExc_ValueError, "Dimension mismatch for indptr and labels arrays.");
136 |     return nullptr;
137 |   }
138 | 
139 |   return new StochasticDataAdaptor(sparse_data, indices, indptr, labels, dim1, dim3);
140 |   }
141 | 
142 |   void _getLabels(int* out_labels, int dol) const {
143 |     if ($self->getNInstances() != dol) {
144 |       PyErr_Format(PyExc_ValueError, "Internal error.");
145 |       return;
146 |     }
147 | 
148 |     $self->getLabels(out_labels);
149 |   }
150 | }
151 | 
152 | %pythoncode %{
153 | class Dataset(_Dataset):
154 |   def __init__(self, *args):
155 |     """Constructs a labeled dataset object that can be used for CPM training 
156 |     and prediction (labels will be ignored when used for prediction). 
157 |     This always incurs a memory copy (for either dense or sparse matrices) 
158 |     or allocation (when loading from libSVM format file).
159 |     
160 |     Dataset(filename):
161 |       filename: str
162 | 
163 |       Creates a dataset from a libSVM file format on disk.
164 | 
165 |     Dataset(X, Y):
166 |       X: 2d float array-like object. Sparse scipy CSR matrices are supported.
167 |       Y: 1d int array-like object
168 |       
169 |       Creates a dataset from instances X (one instance per row) and labels Y.
170 |     """
171 |     if len(args) == 1:
172 |       super(Dataset, self).__init__(*args)
173 | 
174 |     if len(args) == 2:
175 |       if sparse.isspmatrix_csr(args[0]):
176 |         super(Dataset, self).__init__(args[0].data, args[0].indices, args[0].indptr, args[1])
177 |       else:
178 |         super(Dataset, self).__init__(*args)
179 |     
180 |     if len(args) > 2:
181 |       raise ValueError("Too many arguments.")
182 | 
183 |   def getLabels(self):
184 |     """Returns a numpy array of labels."""
185 |     return self._getLabels(int(self.getNInstances()))
186 | %}
187 | 
188 | /* ###################################################### */
189 | 
190 | %rename(_CPM) CPM;
191 | 
192 | class CPM {
193 | public:
194 |     CPM(int k, int outer_label, float lambda, float entropy, float cost_ratio, 
195 |         unsigned int seed);
196 |     ~CPM();
197 |     
198 |     void fit(const StochasticDataAdaptor& trainset, int iterations, bool reshuffle, bool verbose);
199 |     void serializeModel(const char* filename) const;
200 |     static CPM* deserializeModel(const char* filename);
201 |     
202 |     const int outer_label;
203 | };
204 | 
205 | %extend CPM {
206 |   void predict(const StochasticDataAdaptor& testset, float* scores, int scores_dim, 
207 |                 int* assignments, int assignments_dim) {
208 |     if ((scores_dim != testset.getNInstances()) || (assignments_dim != testset.getNInstances())) {
209 |       PyErr_Format(PyExc_RuntimeError, "Internal error.");
210 |       return;
211 |     }
212 |     
213 |     return $self->predict(testset, scores, assignments);
214 |   }
215 | }
216 | 
217 | %pythoncode %{
218 | class CPM(_CPM):
219 |   def __init__(self, k, C=1.0, entropy=0.0, 
220 |               cost_ratio=1.0, outer_label=1, 
221 |               seed=None):
222 |     """Initialize an empty CPM model.
223 |        
224 |        Inputs:
225 |           k: int -- number of sub-classifiers
226 |           C: float -- inverse of L2 regularization factor
227 |           entropy: float -- minimal assignment entropy to maintain
228 |           cost_ratio: float -- in penalty, cost ratio between negative and positive 
229 |             misclassification training errors
230 |           outer_label: int -- outside (positive) class
231 |           seed: (None, int) -- random seed for reproducibility
232 |     """
233 |     if seed is None:
234 |       seed = int(random.getrandbits(32))
235 | 
236 |     super(CPM, self).__init__(k, outer_label, 1.0/C, entropy, cost_ratio, seed)
237 | 
238 |   def fit(self, trainset, iterations=-1, reshuffle=True, verbose=False):
239 |     """Trains a model via SGD.
240 |        
241 |        Inputs:
242 |           trainset: Dataset
243 |           iterations: int -- number of SGD steps. If < 0, will be set to 10 * training set size.
244 |           reshuffle: bool -- reshuffle trainingset between each epoch
245 |           verbose: bool -- print training statistics on stdout
246 |     """
247 |     if iterations < 0:
248 |       iterations = 10 * trainset.getNInstances()
249 |     super(CPM, self).fit(trainset, iterations, reshuffle, verbose)
250 | 
251 |   def predict(self, testset):
252 |     """Performs inference.
253 |        Input:
254 |           testset: Dataset
255 | 
256 |        Outputs:
257 |           scores: 1d float array of model scores
258 |           assignments: 1d int array of active sub-classifiers per instance
259 |     """
260 |     return super(CPM, self).predict(testset, int(testset.getNInstances()), int(testset.getNInstances()))
261 | %}
262 | 
263 | /* ######################################### */
264 | 
265 | %rename(_CPMConfig) CPMConfig;
266 | 
267 | struct CPMConfig {
268 |   CPMConfig::CPMConfig(int outer_label, int k, float lambda, float entropy, float cost_ratio,
269 |               int iterations, bool reshuffle);
270 |   
271 |   CPMConfig::CPMConfig() {};
272 | };
273 | 
274 | %inline %{
275 |   void _parallelEval(const StochasticDataAdaptor& trainset, 
276 |                      const StochasticDataAdaptor& testset,
277 |                      const std::vector<CPMConfig> configs,
278 |                      float* out_scores, int dof,
279 |                      int* out_assignments, int doi) {
280 |     
281 |     ParallelEval::parallelEval(trainset, testset, 
282 |                                configs,
283 |                                out_scores,
284 |                                out_assignments);
285 | 
286 |   }
287 | %}
288 | 
289 | %pythoncode %{
290 | def parallelFitPredict(trainset, testset, parameters):
291 |   """Trains and tests len(parameters) models on trainset and testset respectively.
292 |   This will launch exactly len(parameters) threads, use at your own risk.
293 | 
294 |   Inputs:
295 |     trainset: Dataset - the learning dataset
296 |     testset: Dataset - the testing dataset
297 |     parameters: list of dict objects. Each dict object can contain the
298 |       following string keys defining the parameters of the run. See CPM class. 
299 |       Default values are provided for some keys.
300 |         k
301 |         outer_label (1)
302 |         iterations
303 |         C (1)
304 |         entropy (0) 
305 |         cost_ratio (1)
306 |         reshuffle: (True)
307 | 
308 |   Outputs:
309 |     S: len(parameters) x testset.getCounts() float array of scores
310 |     A: len(parameters) x testset.getCounts() int array of assignments
311 |   """
312 |   configs = []
313 |   for params in parameters:
314 |     configs.append(_CPMConfig(params.get('outer_label', 1), params['k'], 1.0/params.get('C', 1),
315 |                              params.get('entropy', 0), params.get('cost_ratio', 1), 
316 |                              params['iterations'], params.get('reshuffle', True)))
317 |     
318 |   S, A = _parallelEval(trainset, testset, configs, 
319 |                        int(len(parameters)*testset.getNInstances()),
320 |                        int(len(parameters)*testset.getNInstances()))
321 |   S.resize((len(parameters), testset.getNInstances()))
322 |   A.resize((len(parameters), testset.getNInstances()))
323 |   
324 |   return S, A
325 | %}
326 | 


--------------------------------------------------------------------------------
/src/sparse_vector.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // sparse_vector.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include "sparse_vector.h"
 23 | 
 24 | #include <string.h>
 25 | #include <sstream>
 26 | #include <stdexcept>
 27 | #include <cmath>
 28 | 
 29 | SparseVector::SparseVector(const char* lsf_string, int non_zeros) {
 30 |     data.clear();
 31 |     data.reserve(non_zeros);
 32 |     norm = 0.0;
 33 |     
 34 |     int last_index = -1;
 35 |     
 36 |     const char* curr = lsf_string;
 37 |     size_t str_len = strlen(lsf_string);
 38 |     
 39 |     while(curr && (curr < lsf_string + str_len)
 40 |           && (curr[0] != '#') && (curr[0] != '\n') && (curr[0] != '\r')){
 41 |         
 42 |         if (curr[0] == ' ') {
 43 |             curr += 1;
 44 |             continue;
 45 |         }
 46 |         
 47 |         int index = atoi(curr);
 48 |         curr = strchr(curr, ':');
 49 |         if (curr) {
 50 |             curr += 1;
 51 |         } else {
 52 |             throw std::runtime_error("Invalid format: expected ':'");
 53 |         }
 54 | 
 55 |         float value = (float) atof(curr);
 56 |         
 57 |         if (index <= last_index) {
 58 |             throw std::runtime_error("Indices must be sorted by increasing order.");
 59 |         }
 60 |         last_index = index;
 61 |         
 62 |         data.emplace_back(index, value);
 63 |         norm += value * value;
 64 |         
 65 |         curr = strchr(curr, ' ');
 66 |         if (!curr) break;
 67 |         curr += 1;
 68 |     }
 69 |     
 70 |     norm = std::sqrt(norm);
 71 |     data.shrink_to_fit();
 72 | }
 73 | 
 74 | SparseVector::SparseVector(float* cdata, size_t len) {
 75 |     data.clear();
 76 |     data.reserve(len);
 77 |     norm = 0.0;
 78 |     
 79 |     for (size_t i = 0; i < len; ++i) {
 80 |         float value = cdata[i];
 81 |         if (value != 0.0f) {
 82 |             data.emplace_back(i, value);
 83 |             norm += value * value;
 84 |         }
 85 |     }
 86 |     
 87 |     norm = std::sqrt(norm);
 88 |     data.shrink_to_fit();
 89 | }
 90 | 
 91 | SparseVector::SparseVector(int* indices, float* cdata, size_t len) {
 92 |     data.clear();
 93 |     data.reserve(len);
 94 |     norm = 0.0;
 95 |     
 96 |     int last_index = -1;
 97 |     
 98 |     for (size_t i = 0; i < len; ++i) {
 99 |         float value = cdata[i];
100 |         int index = indices[i];
101 |         
102 |         if (index <= last_index) {
103 |             throw std::runtime_error("Indices must be sorted by increasing order.");
104 |         }
105 |         last_index = index;
106 |         
107 |         data.emplace_back(index, value);
108 |         norm += value * value;
109 |     }
110 |     
111 |     norm = std::sqrt(norm);
112 |     data.shrink_to_fit();
113 | }
114 | 
115 | void SparseVector::multiplyInplace(float weight) {
116 |     for(auto& iv : data){
117 |         iv.value *= weight;
118 |     }
119 |     norm *= weight;
120 | }
121 | 
122 | std::unique_ptr<std::string> SparseVector::toLibSVMFormat() const {
123 |     std::stringstream ss;
124 |     for (auto const& iv: data){
125 |         ss << iv.index << ':' << iv.value << ' ';
126 |     }
127 |     ss << '\n';
128 |     return std::unique_ptr<std::string>(new std::string(ss.str()));
129 | }
130 | 
131 | size_t SparseVector::getMaxDimension() const {
132 |     if (!data.empty()) {
133 |         return data.back().index;
134 |     }
135 |     return 0;
136 | }
137 | 


--------------------------------------------------------------------------------
/src/sparse_vector.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright 2014 Alex Kantchelian
 3 |  
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 |  
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 |  
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 | */
16 | 
17 | // sparse_vector.h
18 | 
19 | // Author: Alex Kantchelian, 2014
20 | // akant@cs.berkeley.edu
21 | 
22 | #ifndef __cpm__sparse_vector__
23 | #define __cpm__sparse_vector__
24 | 
25 | #include <vector>
26 | #include <memory>
27 | 
28 | // element cell
29 | struct IValue{
30 |     IValue(int i, float v) : index(i), value(v) {};
31 |     
32 |     int index;
33 |     float value;
34 | };
35 | 
36 | class SparseVector {
37 | 
38 | friend class DenseMatrix;
39 | 
40 | public:
41 |     /* constructor from a libsvm-like string (without label)
42 |      * non_zeros is a performance hint and represents the 
43 |      * initial size of the internal data vector.
44 |     */
45 |     SparseVector(const char* lsf_string, int non_zeros=1000);
46 |     
47 |     // constructor from dense data
48 |     SparseVector(float* data, size_t len);
49 |     
50 |     // constructor from sparse data
51 |     SparseVector(int* indices, float* data, size_t len);
52 |     
53 |     // get number of non-zeros
54 |     inline size_t getSize() const {return data.size();}
55 |     
56 |     // serialize to libsvm-like string
57 |     std::unique_ptr<std::string> toLibSVMFormat() const;
58 |     
59 |     // largest non-zero dimension index, 0 if empty vector
60 |     size_t getMaxDimension() const;
61 |     
62 |     // x = weight * x
63 |     void multiplyInplace(float weight);
64 |     
65 |     // get ||x||_2
66 |     inline double getNorm() const {return norm;}
67 |     
68 | private:
69 |     // internal array of data
70 |     std::vector<IValue> data;
71 |     
72 |     // ||x||_2
73 |     double norm;
74 | };
75 | 
76 | #endif /* defined(__cpm__sparse_vector__) */
77 | 


--------------------------------------------------------------------------------
/src/stochastic_data_adaptor.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  Copyright 2014 Alex Kantchelian
  3 |  
  4 |  Licensed under the Apache License, Version 2.0 (the "License");
  5 |  you may not use this file except in compliance with the License.
  6 |  You may obtain a copy of the License at
  7 |  
  8 |  http://www.apache.org/licenses/LICENSE-2.0
  9 |  
 10 |  Unless required by applicable law or agreed to in writing, software
 11 |  distributed under the License is distributed on an "AS IS" BASIS,
 12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  See the License for the specific language governing permissions and
 14 |  limitations under the License.
 15 | */
 16 | 
 17 | // stochastic_data_adaptor.cpp
 18 | 
 19 | // Author: Alex Kantchelian, 2014
 20 | // akant@cs.berkeley.edu
 21 | 
 22 | #include <string.h>
 23 | #include <utility>
 24 | #include <algorithm>
 25 | #include <fstream>
 26 | #include <stdexcept>
 27 | 
 28 | #include "stochastic_data_adaptor.h"
 29 | 
 30 | StochasticDataAdaptor::StochasticDataAdaptor(const char* fname, size_t n_instances) {
 31 |     instances.clear();
 32 |     instances.reserve(n_instances);
 33 |     countsPerClass.clear();
 34 |     dimensions = 0;
 35 |     
 36 |     // load data in memory
 37 |     const size_t buffer_size = 8 * 1024 * 1024;
 38 |     std::ifstream fin(fname);
 39 |     char* localBuffer = new char[buffer_size];
 40 |     fin.rdbuf()->pubsetbuf(localBuffer, buffer_size);
 41 |     
 42 |     std::string line;
 43 |     
 44 |     while(getline(fin, line) && line.size() > 4){
 45 |         const char* cline = line.c_str();
 46 |         
 47 |         int label = atoi(cline);
 48 |         cline = strchr(cline, ' ');
 49 |         if (!cline) {
 50 |             throw std::runtime_error("Invalid format: expected ' '");
 51 |         }
 52 |         
 53 |         size_t cid;
 54 |         auto it = countsPerClass.find(label);
 55 |         if (it == countsPerClass.end()) {
 56 |             cid = 0;
 57 |             countsPerClass[label] = 1;
 58 |         } else {
 59 |             cid = it->second;
 60 |             it->second++;
 61 |         }
 62 |         
 63 |         SparseVector sv(cline);
 64 |         
 65 |         instances.emplace_back(label, sv, cid);
 66 |         dimensions = std::max(sv.getMaxDimension(), dimensions);
 67 |     }
 68 |     
 69 |     ++dimensions;
 70 |     instances.shrink_to_fit();
 71 |     
 72 |     fin.close();
 73 |     delete[] localBuffer;
 74 | }
 75 | 
 76 | StochasticDataAdaptor::StochasticDataAdaptor(float* data, int* labels, size_t n_instances, size_t n_dimensions) {
 77 |     instances.clear();
 78 |     instances.reserve(n_instances);
 79 |     countsPerClass.clear();
 80 |     dimensions = n_dimensions;
 81 |     
 82 |     for(size_t i = 0; i < (size_t) n_instances; ++i) {
 83 |         int label = labels[i];
 84 |         
 85 |         size_t cid;
 86 |         
 87 |         auto it = countsPerClass.find(label);
 88 |         if (it == countsPerClass.end()) {
 89 |             cid = 0;
 90 |             countsPerClass[label] = 1;
 91 |         } else {
 92 |             cid = it->second;
 93 |             it->second++;
 94 |         }
 95 |         
 96 |         SparseVector sv(data + i*n_dimensions, n_dimensions);
 97 |         instances.emplace_back(label, sv, cid);
 98 |     }
 99 | }
100 | 
101 | StochasticDataAdaptor::StochasticDataAdaptor(float* data, int* indices, int* indptr, int* labels, size_t data_len, size_t indptr_len) {
102 |     instances.clear();
103 |     instances.reserve(indptr_len - 1);
104 |     countsPerClass.clear();
105 |     dimensions = 0;
106 |     
107 |     for(size_t i = 0; i < indptr_len - 1; ++i) {
108 |         int label = labels[i];
109 |         
110 |         size_t cid;
111 |         auto it = countsPerClass.find(label);
112 |         if (it == countsPerClass.end()) {
113 |             cid = 0;
114 |             countsPerClass[label] = 1;
115 |         } else {
116 |             cid = it->second;
117 |             it->second++;
118 |         }
119 |         
120 |         SparseVector sv(indices + indptr[i], data + indptr[i], indptr[i+1] - indptr[i]);
121 |         instances.emplace_back(label, sv, cid);
122 |         dimensions = std::max(sv.getMaxDimension(), dimensions);
123 |     }
124 |     
125 |     ++dimensions;
126 | }
127 | 
128 | void StochasticDataAdaptor::getLabels(int* labels) const {
129 |     for (size_t i = 0; i < instances.size(); ++i){
130 |         labels[i] = std::get<0>(instances[i]);
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/stochastic_data_adaptor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright 2014 Alex Kantchelian
 3 |  
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 |  
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 |  
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 | */
16 | 
17 | // stochastic_data_adaptor.h
18 | 
19 | // Author: Alex Kantchelian, 2014
20 | // akant@cs.berkeley.edu
21 | 
22 | #ifndef __cpm__stochastic_data_adaptor__
23 | #define __cpm__stochastic_data_adaptor__
24 | 
25 | #include <iostream>
26 | #include <vector>
27 | #include <map>
28 | #include <random>
29 | 
30 | #include "sparse_vector.h"
31 | 
32 | class StochasticDataAdaptor {
33 | public:
34 |     /* constructs dataset from a libsvm formatted text file.
35 |      * n_instances is only a performance hint.
36 |      */
37 |     StochasticDataAdaptor(const char* fname, size_t n_instances=1000000);
38 |     
39 |     // constructs dataset from dense in memory data
40 |     StochasticDataAdaptor(float* data, int* labels, size_t n_instances, size_t n_dimensions);
41 |     
42 |     // constructs dataset from sparse in memory data
43 |     StochasticDataAdaptor(float* data, int* indices, int* indptr, int* labels, size_t data_len, size_t indptr_len);
44 |     
45 |     // get a given instance: label, sparsevector, class id
46 |     inline const std::tuple<int, SparseVector, size_t>& getInstance(size_t i) const {
47 |         return instances[i];
48 |     }
49 |     
50 |     void getLabels(int* labels) const;
51 |     
52 |     size_t getNInstances() const {return instances.size();}
53 |     size_t getDimensions() const {return dimensions;}
54 |     const std::map<int, size_t> getCountsPerClass() const {return countsPerClass;}
55 |     
56 | private:
57 |     // number of dimensions
58 |     size_t dimensions;
59 |     
60 |     // label, sparsevector, class id
61 |     std::vector<std::tuple<int, SparseVector, size_t>> instances;
62 |     
63 |     // number of instances per label
64 |     std::map<int, size_t> countsPerClass;
65 | };
66 | 
67 | #endif /* defined(__cpm__stochastic_data_adaptor__) */
68 | 


--------------------------------------------------------------------------------