├── COPYRIGHT
├── Makefile
├── Makefile.win
├── README
├── ffm-predict.cpp
├── ffm-train.cpp
├── ffm.cpp
├── ffm.h
├── timer.cpp
└── timer.h


/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2017 The LIBFFM Project.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = g++
 2 | CXXFLAGS = -Wall -O3 -std=c++0x -march=native
 3 | 
 4 | # comment the following flags if you do not want to SSE instructions
 5 | DFLAG += -DUSESSE
 6 | 
 7 | # comment the following flags if you do not want to use OpenMP
 8 | DFLAG += -DUSEOMP
 9 | CXXFLAGS += -fopenmp
10 | 
11 | all: ffm-train ffm-predict
12 | 
13 | ffm-train: ffm-train.cpp ffm.o timer.o
14 | 	$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
15 | 
16 | ffm-predict: ffm-predict.cpp ffm.o timer.o
17 | 	$(CXX) $(CXXFLAGS) $(DFLAG) -o $@ $^
18 | 
19 | ffm.o: ffm.cpp ffm.h timer.o
20 | 	$(CXX) $(CXXFLAGS) $(DFLAG) -c -o $@ $<
21 | 
22 | timer.o: timer.cpp timer.h
23 | 	$(CXX) $(CXXFLAGS) $(DFLAG) -c -o $@ $<
24 | 
25 | clean:
26 | 	rm -f ffm-train ffm-predict ffm.o timer.o
27 | 


--------------------------------------------------------------------------------
/Makefile.win:
--------------------------------------------------------------------------------
 1 | CXX = cl.exe
 2 | CFLAGS = /nologo /O2 /EHsc /D "_CRT_SECURE_NO_DEPRECATE" /D "USEOMP" /D "USESSE" /openmp
 3 | 
 4 | TARGET = windows
 5 | 
 6 | all: $(TARGET) $(TARGET)\ffm-train.exe $(TARGET)\ffm-predict.exe
 7 | 
 8 | $(TARGET)\ffm-predict.exe: ffm.h ffm-predict.cpp ffm.obj timer.obj
 9 | 	$(CXX) $(CFLAGS) ffm-predict.cpp ffm.obj timer.obj -Fe$(TARGET)\ffm-predict.exe
10 | 
11 | $(TARGET)\ffm-train.exe: ffm.h ffm-train.cpp ffm.obj timer.obj
12 | 	$(CXX) $(CFLAGS) ffm-train.cpp ffm.obj timer.obj -Fe$(TARGET)\ffm-train.exe
13 | 
14 | ffm.obj: ffm.cpp ffm.h
15 | 	$(CXX) $(CFLAGS) -c ffm.cpp
16 | 
17 | timer.obj: timer.cpp timer.h
18 | 	$(CXX) $(CFLAGS) -c timer.cpp
19 | 
20 | .PHONY: $(TARGET)
21 | $(TARGET):
22 | 	-mkdir $(TARGET)
23 | 
24 | clean:
25 | 	-erase /Q *.obj *.exe $(TARGET)\. 
26 | 	-rd $(TARGET)
27 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | Table of Contents
  2 | =================
  3 | 
  4 | - What is LIBFFM
  5 | - Overfitting and Early Stopping
  6 | - Installation
  7 | - Data Format
  8 | - Command Line Usage
  9 | - Examples
 10 | - OpenMP and SSE
 11 | - Building Windows Binaries
 12 | - FAQ
 13 | 
 14 | 
 15 | What is LIBFFM
 16 | ==============
 17 | 
 18 | LIBFFM is a library for field-aware factorization machine (FFM). 
 19 | 
 20 | Field-aware factorization machine is a effective model for CTR prediction. It has been used to win the top-3 positions
 21 | of following competitions:
 22 | 
 23 |     * Criteo: https://www.kaggle.com/c/criteo-display-ad-challenge
 24 | 
 25 |     * Avazu: https://www.kaggle.com/c/avazu-ctr-prediction
 26 | 
 27 |     * Outbrain: https://www.kaggle.com/c/outbrain-click-prediction
 28 | 
 29 |     * RecSys 2015: http://dl.acm.org/citation.cfm?id=2813511&dl=ACM&coll=DL&CFID=941880276&CFTOKEN=60022934
 30 | 
 31 | You can find more information about FFM in the following paper / slides:
 32 | 
 33 |     * http://www.csie.ntu.edu.tw/~r01922136/slides/ffm.pdf
 34 | 
 35 |     * http://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf
 36 | 
 37 |     * https://arxiv.org/abs/1701.04099
 38 | 
 39 | 
 40 | Overfitting and Early Stopping
 41 | ==============================
 42 | 
 43 | FFM is prone to overfitting, and the solution we have so far is early stopping. See how FFM behaves on a certain data
 44 | set:
 45 | 
 46 |     > ffm-train -p va.ffm -l 0.00002 tr.ffm
 47 |     iter   tr_logloss   va_logloss
 48 |        1      0.49738      0.48776
 49 |        2      0.47383      0.47995
 50 |        3      0.46366      0.47480
 51 |        4      0.45561      0.47231
 52 |        5      0.44810      0.47034
 53 |        6      0.44037      0.47003
 54 |        7      0.43239      0.46952
 55 |        8      0.42362      0.46999
 56 |        9      0.41394      0.47088
 57 |       10      0.40326      0.47228
 58 |       11      0.39156      0.47435
 59 |       12      0.37886      0.47683
 60 |       13      0.36522      0.47975
 61 |       14      0.35079      0.48321
 62 |       15      0.33578      0.48703
 63 | 
 64 | 
 65 | We see the best validation loss is achieved at 7th iteration. If we keep training, then overfitting begins. It is worth
 66 | noting that increasing regularization parameter do not help:
 67 | 
 68 |     > ffm-train -p va.ffm -l 0.0002 -t 50 -s 12 tr.ffm
 69 |     iter   tr_logloss   va_logloss
 70 |        1      0.50532      0.49905
 71 |        2      0.48782      0.49242
 72 |        3      0.48136      0.48748
 73 |                  ...
 74 |       29      0.42183      0.47014
 75 |                  ...
 76 |       48      0.37071      0.47333
 77 |       49      0.36767      0.47374
 78 |       50      0.36472      0.47404
 79 | 
 80 | 
 81 | To avoid overfitting, we recommend always provide a validation set with option `-p.' You can use option `--auto-stop' to
 82 | stop at the iteration that reaches the best validation loss:
 83 | 
 84 |     > ffm-train -p va.ffm -l 0.00002 --auto-stop tr.ffm
 85 |     iter   tr_logloss   va_logloss
 86 |        1      0.49738      0.48776
 87 |        2      0.47383      0.47995
 88 |        3      0.46366      0.47480
 89 |        4      0.45561      0.47231
 90 |        5      0.44810      0.47034
 91 |        6      0.44037      0.47003
 92 |        7      0.43239      0.46952
 93 |        8      0.42362      0.46999
 94 |     Auto-stop. Use model at 7th iteration.
 95 | 
 96 | 
 97 | Installation
 98 | ============
 99 | 
100 | Requirement: It requires a C++11 compatible compiler. We also use OpenMP to provide multi-threading. If OpenMP is not
101 | available on your platform, please refer to section `OpenMP and SSE.'
102 | 
103 | - Unix-like systems:
104 |   Typeype `make' in the command line.
105 | 
106 | - Windows:
107 |   See `Building Windows Binaries' to compile.
108 | 
109 | 
110 | 
111 | Data Format
112 | ===========
113 | 
114 | The data format of LIBFFM is:
115 | 
116 | <label> <field1>:<feature1>:<value1> <field2>:<feature2>:<value2> ...
117 | .
118 | .
119 | .
120 | 
121 | `field' and `feature' should be non-negative integers. See an example `bigdata.tr.txt.'
122 | 
123 | It is important to understand the difference between `field' and `feature'. For example, if we have a raw data like this:
124 | 
125 | Click  Advertiser  Publisher
126 | =====  ==========  =========
127 |     0        Nike        CNN
128 |     1        ESPN        BBC
129 | 
130 | Here, we have 
131 | 
132 |     * 2 fields: Advertiser and Publisher
133 | 
134 |     * 4 features: Advertiser-Nike, Advertiser-ESPN, Publisher-CNN, Publisher-BBC
135 | 
136 | Usually you will need to build two dictionares, one for field and one for features, like this:
137 |     
138 |     DictField[Advertiser] -> 0
139 |     DictField[Publisher]  -> 1
140 |     
141 |     DictFeature[Advertiser-Nike] -> 0
142 |     DictFeature[Publisher-CNN]   -> 1
143 |     DictFeature[Advertiser-ESPN] -> 2
144 |     DictFeature[Publisher-BBC]   -> 3
145 | 
146 | Then, you can generate FFM format data:
147 | 
148 |     0 0:0:1 1:1:1
149 |     1 0:2:1 1:3:1
150 | 
151 | Note that because these features are categorical, the values here are all ones.
152 | 
153 | 
154 | Command Line Usage
155 | ==================
156 | 
157 | -   `ffm-train'
158 | 
159 |     usage: ffm-train [options] training_set_file [model_file]
160 | 
161 |     options:
162 |     -l <lambda>: set regularization parameter (default 0.00002)
163 |     -k <factor>: set number of latent factors (default 4)
164 |     -t <iteration>: set number of iterations (default 15)
165 |     -r <eta>: set learning rate (default 0.2)
166 |     -s <nr_threads>: set number of threads (default 1)
167 |     -p <path>: set path to the validation set
168 |     --quiet: quiet model (no output)
169 |     --no-norm: disable instance-wise normalization
170 |     --auto-stop: stop at the iteration that achieves the best validation loss (must be used with -p)
171 | 
172 |     By default we do instance-wise normalization. That is, we normalize the 2-norm of each instance to 1. You can use
173 |     `--no-norm' to disable this function.
174 |     
175 |     A binary file `training_set_file.bin' will be generated to store the data in binary format.
176 | 
177 |     Because FFM usually need early stopping for better test performance, we provide an option `--auto-stop' to stop at
178 |     the iteration that achieves the best validation loss. Note that you need to provide a validation set with `-p' when
179 |     you use this option.
180 | 
181 | 
182 | -   `ffm-predict'
183 | 
184 |     usage: ffm-predict test_file model_file output_file
185 | 
186 | 
187 | 
188 | Examples
189 | ========
190 | 
191 | Download a toy data from:
192 | 
193 |     zip: https://drive.google.com/open?id=1HZX7zSQJy26hY4_PxSlOWz4x7O-tbQjt
194 | 
195 |     tar.gz: https://drive.google.com/open?id=12-EczjiYGyJRQLH5ARy1MXRFbCvkgfPx
196 | 
197 | This dataset is subsampled 1% from Criteo's challenge.
198 | 
199 | > tar -xzf libffm_toy.tar.gz
200 | 
201 | or 
202 | 
203 | > unzip libffm_toy.zip
204 | 
205 | 
206 | > ./ffm-train -p libffm_toy/criteo.va.r100.gbdt0.ffm libffm_toy/criteo.tr.r100.gbdt0.ffm model
207 | 
208 | train a model using the default parameters
209 | 
210 | 
211 | > ./ffm-predict libffm_toy/criteo.va.r100.gbdt0.ffm model output
212 | 
213 | do prediction
214 | 
215 | 
216 | > ./ffm-train -l 0.0001 -k 15 -t 30 -r 0.05 -s 4 --auto-stop -p libffm_toy/criteo.va.r100.gbdt0.ffm libffm_toy/criteo.tr.r100.gbdt0.ffm model
217 | 
218 | train a model using the following parameters:
219 | 
220 |     regularization cost = 0.0001
221 |     latent factors = 15
222 |     iterations = 30
223 |     learning rate = 0.3
224 |     threads = 4
225 |     let it auto-stop
226 | 
227 | 
228 | OpenMP and SSE
229 | ==============
230 | 
231 | We use OpenMP to do parallelization. If OpenMP is not available on your
232 | platform, then please comment out the following lines in Makefile.
233 | 
234 |     DFLAG += -DUSEOMP
235 |     CXXFLAGS += -fopenmp
236 | 
237 | Note: Please run `make clean all' if these flags are changed.
238 | 
239 | We use SSE instructions to perform fast computation. If you do not want to use it, comment out the following line:
240 | 
241 |     DFLAG += -DUSESSE
242 | 
243 | Then, run `make clean all'
244 | 
245 | 
246 | 
247 | Building Windows Binaries
248 | =========================
249 | 
250 | The Windows part is maintained by different maintainer, so it may not always support the latest version.
251 | 
252 | The latest version it supports is: v1.21
253 | 
254 | To build them via command-line tools of Visual C++, use the following steps:
255 | 
256 | 1. Open a DOS command box (or Developer Command Prompt for Visual Studio) and go to LIBFFM directory. If environment
257 | variables of VC++ have not been set, type
258 | 
259 | "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\amd64\vcvars64.bat"
260 | 
261 | You may have to modify the above command according which version of VC++ or
262 | where it is installed.
263 | 
264 | 2. Type
265 | 
266 | nmake -f Makefile.win clean all
267 | 
268 | 
269 | FAQ
270 | ===
271 | 
272 | Q: Why I have the same model size when k = 1 and k = 4?
273 | 
274 | A: This is because we use SSE instructions. In order to use SSE, the memory need to be aligned. So even you assign k =
275 |    1, we still fill some dummy zeros from k = 2 to 4.
276 | 
277 | 
278 | Q: Why the logloss is slightly different on the same data when I run the program two or more times when I use multi-threading
279 | 
280 | A: When there are more then one thread, the program becomes non-deterministic. To make it determinisitc you can only use one thread.
281 | 
282 | 
283 | Contributors
284 | ============
285 | 
286 | Yuchin Juan, Wei-Sheng Chin, and Yong Zhuang
287 | 


--------------------------------------------------------------------------------
/ffm-predict.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstring>
  2 | #include <fstream>
  3 | #include <iostream>
  4 | #include <string>
  5 | #include <iomanip>
  6 | #include <memory>
  7 | #include <cmath>
  8 | #include <stdexcept>
  9 | #include <vector>
 10 | #include <cstdlib>
 11 | 
 12 | #include "ffm.h"
 13 | 
 14 | using namespace std;
 15 | using namespace ffm;
 16 | 
 17 | struct Option {
 18 |     string test_path, model_path, output_path;
 19 | };
 20 | 
 21 | string predict_help() {
 22 |     return string(
 23 | "usage: ffm-predict test_file model_file output_file\n");
 24 | }
 25 | 
 26 | Option parse_option(int argc, char **argv) {
 27 |     vector<string> args;
 28 |     for(int i = 0; i < argc; i++)
 29 |         args.push_back(string(argv[i]));
 30 | 
 31 |     if(argc == 1)
 32 |         throw invalid_argument(predict_help());
 33 | 
 34 |     Option option;
 35 | 
 36 |     if(argc != 4)
 37 |         throw invalid_argument("cannot parse argument");
 38 | 
 39 |     option.test_path = string(args[1]);
 40 |     option.model_path = string(args[2]);
 41 |     option.output_path = string(args[3]);
 42 | 
 43 |     return option;
 44 | }
 45 | 
 46 | void predict(string test_path, string model_path, string output_path) {
 47 |     int const kMaxLineSize = 1000000;
 48 | 
 49 |     FILE *f_in = fopen(test_path.c_str(), "r");
 50 |     ofstream f_out(output_path);
 51 |     char line[kMaxLineSize];
 52 | 
 53 |     ffm_model model = ffm_load_model(model_path);
 54 | 
 55 |     ffm_double loss = 0;
 56 |     vector<ffm_node> x;
 57 |     ffm_int i = 0;
 58 | 
 59 |     for(; fgets(line, kMaxLineSize, f_in) != nullptr; i++) {
 60 |         x.clear();
 61 |         char *y_char = strtok(line, " \t");
 62 |         ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
 63 | 
 64 |         while(true) {
 65 |             char *field_char = strtok(nullptr,":");
 66 |             char *idx_char = strtok(nullptr,":");
 67 |             char *value_char = strtok(nullptr," \t");
 68 |             if(field_char == nullptr || *field_char == '\n')
 69 |                 break;
 70 | 
 71 |             ffm_node N;
 72 |             N.f = atoi(field_char);
 73 |             N.j = atoi(idx_char);
 74 |             N.v = atof(value_char);
 75 | 
 76 |             x.push_back(N);
 77 |         }
 78 | 
 79 |         ffm_float y_bar = ffm_predict(x.data(), x.data()+x.size(), model);
 80 | 
 81 |         loss -= y==1? log(y_bar) : log(1-y_bar);
 82 | 
 83 |         f_out << y_bar << "\n";
 84 |     }
 85 | 
 86 |     loss /= i;
 87 | 
 88 |     cout << "logloss = " << fixed << setprecision(5) << loss << endl;
 89 | 
 90 |     fclose(f_in);
 91 | }
 92 | 
 93 | int main(int argc, char **argv) {
 94 |     Option option;
 95 |     try {
 96 |         option = parse_option(argc, argv);
 97 |     } catch(invalid_argument const &e) {
 98 |         cout << e.what() << endl;
 99 |         return 1;
100 |     }
101 | 
102 |     predict(option.test_path, option.model_path, option.output_path);
103 | 
104 |     return 0;
105 | }
106 | 


--------------------------------------------------------------------------------
/ffm-train.cpp:
--------------------------------------------------------------------------------
  1 | #pragma GCC diagnostic ignored "-Wunused-result" 
  2 | #include <algorithm>
  3 | #include <cstring>
  4 | #include <iostream>
  5 | #include <stdexcept>
  6 | #include <string>
  7 | #include <vector>
  8 | #include <cstdlib>
  9 | 
 10 | #include "ffm.h"
 11 | 
 12 | #if defined USEOMP
 13 | #include <omp.h>
 14 | #endif
 15 | 
 16 | using namespace std;
 17 | using namespace ffm;
 18 | 
 19 | string train_help() {
 20 |     return string(
 21 | "usage: ffm-train [options] training_set_file [model_file]\n"
 22 | "\n"
 23 | "options:\n"
 24 | "-l <lambda>: set regularization parameter (default 0.00002)\n"
 25 | "-k <factor>: set number of latent factors (default 4)\n"
 26 | "-t <iteration>: set number of iterations (default 15)\n"
 27 | "-r <eta>: set learning rate (default 0.2)\n"
 28 | "-s <nr_threads>: set number of threads (default 1)\n"
 29 | "-p <path>: set path to the validation set\n"
 30 | "--quiet: quiet mode (no output)\n"
 31 | "--no-norm: disable instance-wise normalization\n"
 32 | "--auto-stop: stop at the iteration that achieves the best validation loss (must be used with -p)\n");
 33 | }
 34 | 
 35 | struct Option {
 36 |     string tr_path;
 37 |     string va_path;
 38 |     string model_path;
 39 |     ffm_parameter param;
 40 |     bool quiet = false;
 41 |     ffm_int nr_threads = 1;
 42 | };
 43 | 
 44 | string basename(string path) {
 45 |     const char *ptr = strrchr(&*path.begin(), '/');
 46 |     if(!ptr)
 47 |         ptr = path.c_str();
 48 |     else
 49 |         ptr++;
 50 |     return string(ptr);
 51 | }
 52 | 
 53 | Option parse_option(int argc, char **argv) {
 54 |     vector<string> args;
 55 |     for(int i = 0; i < argc; i++)
 56 |         args.push_back(string(argv[i]));
 57 | 
 58 |     if(argc == 1)
 59 |         throw invalid_argument(train_help());
 60 | 
 61 |     Option opt;
 62 | 
 63 |     ffm_int i = 1;
 64 |     for(; i < argc; i++) {
 65 |         if(args[i].compare("-t") == 0)
 66 |         {
 67 |             if(i == argc-1)
 68 |                 throw invalid_argument("need to specify number of iterations after -t");
 69 |             i++;
 70 |             opt.param.nr_iters = atoi(args[i].c_str());
 71 |             if(opt.param.nr_iters <= 0)
 72 |                 throw invalid_argument("number of iterations should be greater than zero");
 73 |         } else if(args[i].compare("-k") == 0) {
 74 |             if(i == argc-1)
 75 |                 throw invalid_argument("need to specify number of factors after -k");
 76 |             i++;
 77 |             opt.param.k = atoi(args[i].c_str());
 78 |             if(opt.param.k <= 0)
 79 |                 throw invalid_argument("number of factors should be greater than zero");
 80 |         } else if(args[i].compare("-r") == 0) {
 81 |             if(i == argc-1)
 82 |                 throw invalid_argument("need to specify eta after -r");
 83 |             i++;
 84 |             opt.param.eta = atof(args[i].c_str());
 85 |             if(opt.param.eta <= 0)
 86 |                 throw invalid_argument("learning rate should be greater than zero");
 87 |         } else if(args[i].compare("-l") == 0) {
 88 |             if(i == argc-1)
 89 |                 throw invalid_argument("need to specify lambda after -l");
 90 |             i++;
 91 |             opt.param.lambda = atof(args[i].c_str());
 92 |             if(opt.param.lambda < 0)
 93 |                 throw invalid_argument("regularization cost should not be smaller than zero");
 94 |         } else if(args[i].compare("-s") == 0) {
 95 |             if(i == argc-1)
 96 |                 throw invalid_argument("need to specify number of threads after -s");
 97 |             i++;
 98 |             opt.nr_threads = atoi(args[i].c_str());
 99 |             if(opt.nr_threads <= 0)
100 |                 throw invalid_argument("number of threads should be greater than zero");
101 |         } else if(args[i].compare("-p") == 0) {
102 |             if(i == argc-1)
103 |                 throw invalid_argument("need to specify path after -p");
104 |             i++;
105 |             opt.va_path = args[i];
106 |         } else if(args[i].compare("--no-norm") == 0) {
107 |             opt.param.normalization = false;
108 |         } else if(args[i].compare("--quiet") == 0) {
109 |             opt.quiet = true;
110 |         } else if(args[i].compare("--auto-stop") == 0) {
111 |             opt.param.auto_stop = true;
112 |         } else {
113 |             break;
114 |         }
115 |     }
116 | 
117 |     if(i != argc-2 && i != argc-1)
118 |         throw invalid_argument("cannot parse command\n");
119 | 
120 |     opt.tr_path = args[i];
121 |     i++;
122 | 
123 |     if(i < argc) {
124 |         opt.model_path = string(args[i]);
125 |     } else if(i == argc) {
126 |         opt.model_path = basename(opt.tr_path) + ".model";
127 |     } else {
128 |         throw invalid_argument("cannot parse argument");
129 |     }
130 | 
131 |     return opt;
132 | }
133 | 
134 | int train_on_disk(Option opt) {
135 |     string tr_bin_path = basename(opt.tr_path) + ".bin";
136 |     string va_bin_path = opt.va_path.empty()? "" : basename(opt.va_path) + ".bin";
137 | 
138 |     ffm_read_problem_to_disk(opt.tr_path, tr_bin_path);
139 |     if(!opt.va_path.empty())
140 |         ffm_read_problem_to_disk(opt.va_path, va_bin_path);
141 | 
142 |     ffm_model model = ffm_train_on_disk(tr_bin_path.c_str(), va_bin_path.c_str(), opt.param);
143 | 
144 |     ffm_save_model(model, opt.model_path);
145 | 
146 |     return 0;
147 | }
148 | 
149 | int main(int argc, char **argv) {
150 |     Option opt;
151 |     try {
152 |         opt = parse_option(argc, argv);
153 |     } catch(invalid_argument &e) {
154 |         cout << e.what() << endl;
155 |         return 1;
156 |     }
157 | 
158 |     if(opt.quiet)
159 |         cout.setstate(ios_base::badbit);
160 | 
161 |     if(opt.param.auto_stop && opt.va_path.empty()) {
162 |         cout << "To use auto-stop, you need to assign a validation set" << endl;
163 |         return 1;
164 |     }
165 | 
166 | #if defined USEOMP
167 |     omp_set_num_threads(opt.nr_threads);
168 | #endif
169 | 
170 |     train_on_disk(opt);
171 | 
172 |     return 0;
173 | }
174 | 


--------------------------------------------------------------------------------
/ffm.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | The following table is the meaning of some variables in this code:
  3 | 
  4 | W: The pointer to the beginning of the model
  5 | w: Dynamic pointer to access values in the model
  6 | m: Number of fields
  7 | k: Number of latent factors
  8 | n: Number of features
  9 | l: Number of data points
 10 | f: Field index (0 to m-1)
 11 | d: Latent factor index (0 to k-1)
 12 | j: Feature index (0 to n-1)
 13 | i: Data point index (0 to l-1)
 14 | nnz: Number of non-zero elements
 15 | X, P: Used to store the problem in a compressed sparse row (CSR) format. len(X) = nnz, len(P) = l + 1
 16 | Y: The label. len(Y) = l
 17 | R: Precomputed scaling factor to make the 2-norm of each instance to be 1. len(R) = l
 18 | v: Value of each element in the problem
 19 | */
 20 | 
 21 | #pragma GCC diagnostic ignored "-Wunused-result" 
 22 | #include <algorithm>
 23 | #include <cmath>
 24 | #include <iostream>
 25 | #include <iomanip>
 26 | #include <fstream>
 27 | #include <new>
 28 | #include <memory>
 29 | #include <random>
 30 | #include <stdexcept>
 31 | #include <string>
 32 | #include <cstring>
 33 | #include <vector>
 34 | #include <cassert>
 35 | #include <numeric>
 36 | 
 37 | #if defined USESSE
 38 | #include <pmmintrin.h>
 39 | #endif
 40 | 
 41 | #if defined USEOMP
 42 | #include <omp.h>
 43 | #endif
 44 | 
 45 | #include "ffm.h"
 46 | #include "timer.h"
 47 | 
 48 | namespace ffm {
 49 | 
 50 | namespace {
 51 | 
 52 | using namespace std;
 53 | 
 54 | #if defined USESSE
 55 | ffm_int const kALIGNByte = 16;
 56 | #else
 57 | ffm_int const kALIGNByte = 4;
 58 | #endif
 59 | 
 60 | ffm_int const kALIGN = kALIGNByte/sizeof(ffm_float);
 61 | ffm_int const kCHUNK_SIZE = 10000000;
 62 | ffm_int const kMaxLineSize = 100000;
 63 | 
 64 | inline ffm_int get_k_aligned(ffm_int k) {
 65 |     return (ffm_int) ceil((ffm_float)k / kALIGN) * kALIGN;
 66 | }
 67 | 
 68 | ffm_long get_w_size(ffm_model &model) {
 69 |     ffm_int k_aligned = get_k_aligned(model.k);
 70 |     return (ffm_long) model.n * model.m * k_aligned * 2;
 71 | }
 72 | 
 73 | #if defined USESSE
 74 | inline ffm_float wTx(
 75 |     ffm_node *begin,
 76 |     ffm_node *end,
 77 |     ffm_float r,
 78 |     ffm_model &model, 
 79 |     ffm_float kappa=0, 
 80 |     ffm_float eta=0, 
 81 |     ffm_float lambda=0, 
 82 |     bool do_update=false) {
 83 | 
 84 |     ffm_int align0 = 2 * get_k_aligned(model.k);
 85 |     ffm_int align1 = model.m * align0;
 86 | 
 87 |     __m128 XMMkappa = _mm_set1_ps(kappa);
 88 |     __m128 XMMeta = _mm_set1_ps(eta);
 89 |     __m128 XMMlambda = _mm_set1_ps(lambda);
 90 | 
 91 |     __m128 XMMt = _mm_setzero_ps();
 92 | 
 93 |     for(ffm_node *N1 = begin; N1 != end; N1++)
 94 |     {
 95 |         ffm_int j1 = N1->j;
 96 |         ffm_int f1 = N1->f;
 97 |         ffm_float v1 = N1->v;
 98 |         if(j1 >= model.n || f1 >= model.m)
 99 |             continue;
100 | 
101 |         for(ffm_node *N2 = N1+1; N2 != end; N2++)
102 |         {
103 |             ffm_int j2 = N2->j;
104 |             ffm_int f2 = N2->f;
105 |             ffm_float v2 = N2->v;
106 |             if(j2 >= model.n || f2 >= model.m)
107 |                 continue;
108 | 
109 |             ffm_float *w1_base = model.W + (ffm_long)j1*align1 + f2*align0;
110 |             ffm_float *w2_base = model.W + (ffm_long)j2*align1 + f1*align0;
111 | 
112 |             __m128 XMMv = _mm_set1_ps(v1*v2*r);
113 | 
114 |             if(do_update)
115 |             {
116 |                 __m128 XMMkappav = _mm_mul_ps(XMMkappa, XMMv);
117 | 
118 |                 for(ffm_int d = 0; d < align0; d += kALIGN * 2)
119 |                 {
120 |                     ffm_float *w1 = w1_base + d;
121 |                     ffm_float *w2 = w2_base + d;
122 | 
123 |                     ffm_float *wg1 = w1 + kALIGN;
124 |                     ffm_float *wg2 = w2 + kALIGN;
125 | 
126 |                     __m128 XMMw1 = _mm_load_ps(w1);
127 |                     __m128 XMMw2 = _mm_load_ps(w2);
128 | 
129 |                     __m128 XMMwg1 = _mm_load_ps(wg1);
130 |                     __m128 XMMwg2 = _mm_load_ps(wg2);
131 | 
132 |                     __m128 XMMg1 = _mm_add_ps(
133 |                                    _mm_mul_ps(XMMlambda, XMMw1),
134 |                                    _mm_mul_ps(XMMkappav, XMMw2));
135 |                     __m128 XMMg2 = _mm_add_ps(
136 |                                    _mm_mul_ps(XMMlambda, XMMw2),
137 |                                    _mm_mul_ps(XMMkappav, XMMw1));
138 | 
139 |                     XMMwg1 = _mm_add_ps(XMMwg1, _mm_mul_ps(XMMg1, XMMg1));
140 |                     XMMwg2 = _mm_add_ps(XMMwg2, _mm_mul_ps(XMMg2, XMMg2));
141 | 
142 |                     XMMw1 = _mm_sub_ps(XMMw1, _mm_mul_ps(XMMeta, 
143 |                             _mm_mul_ps(_mm_rsqrt_ps(XMMwg1), XMMg1)));
144 |                     XMMw2 = _mm_sub_ps(XMMw2, _mm_mul_ps(XMMeta, 
145 |                             _mm_mul_ps(_mm_rsqrt_ps(XMMwg2), XMMg2)));
146 | 
147 |                     _mm_store_ps(w1, XMMw1);
148 |                     _mm_store_ps(w2, XMMw2);
149 | 
150 |                     _mm_store_ps(wg1, XMMwg1);
151 |                     _mm_store_ps(wg2, XMMwg2);
152 |                 }
153 |             }
154 |             else
155 |             {
156 |                 for(ffm_int d = 0; d < align0; d += kALIGN * 2)
157 |                 {
158 |                     __m128  XMMw1 = _mm_load_ps(w1_base+d);
159 |                     __m128  XMMw2 = _mm_load_ps(w2_base+d);
160 | 
161 |                     XMMt = _mm_add_ps(XMMt, 
162 |                            _mm_mul_ps(_mm_mul_ps(XMMw1, XMMw2), XMMv));
163 |                 }
164 |             }
165 |         }
166 |     }
167 | 
168 |     if(do_update)
169 |         return 0;
170 | 
171 |     XMMt = _mm_hadd_ps(XMMt, XMMt);
172 |     XMMt = _mm_hadd_ps(XMMt, XMMt);
173 |     ffm_float t;
174 |     _mm_store_ss(&t, XMMt);
175 | 
176 |     return t;
177 | }
178 | 
179 | #else
180 | 
181 | inline ffm_float wTx(
182 |     ffm_node *begin,
183 |     ffm_node *end,
184 |     ffm_float r,
185 |     ffm_model &model, 
186 |     ffm_float kappa=0, 
187 |     ffm_float eta=0, 
188 |     ffm_float lambda=0, 
189 |     bool do_update=false) {
190 | 
191 |     ffm_int align0 = 2 * get_k_aligned(model.k);
192 |     ffm_int align1 = model.m * align0;
193 | 
194 |     ffm_float t = 0;
195 |     for(ffm_node *N1 = begin; N1 != end; N1++) {
196 |         ffm_int j1 = N1->j;
197 |         ffm_int f1 = N1->f;
198 |         ffm_float v1 = N1->v;
199 |         if(j1 >= model.n || f1 >= model.m)
200 |             continue;
201 | 
202 |         for(ffm_node *N2 = N1+1; N2 != end; N2++) {
203 |             ffm_int j2 = N2->j;
204 |             ffm_int f2 = N2->f;
205 |             ffm_float v2 = N2->v;
206 |             if(j2 >= model.n || f2 >= model.m)
207 |                 continue;
208 | 
209 |             ffm_float *w1 = model.W + (ffm_long)j1*align1 + f2*align0;
210 |             ffm_float *w2 = model.W + (ffm_long)j2*align1 + f1*align0;
211 | 
212 |             ffm_float v = v1 * v2 * r;
213 | 
214 |             if(do_update) {
215 |                 ffm_float *wg1 = w1 + kALIGN;
216 |                 ffm_float *wg2 = w2 + kALIGN;
217 |                 for(ffm_int d = 0; d < align0; d += kALIGN * 2)
218 |                 {
219 |                     ffm_float g1 = lambda * w1[d] + kappa * w2[d] * v;
220 |                     ffm_float g2 = lambda * w2[d] + kappa * w1[d] * v;
221 | 
222 |                     wg1[d] += g1 * g1;
223 |                     wg2[d] += g2 * g2;
224 | 
225 |                     w1[d] -= eta / sqrt(wg1[d]) * g1;
226 |                     w2[d] -= eta / sqrt(wg2[d]) * g2;
227 |                 }
228 |             } else {
229 |                 for(ffm_int d = 0; d < align0; d += kALIGN * 2)
230 |                     t += w1[d] * w2[d] * v;
231 |             }
232 |         }
233 |     }
234 | 
235 |     return t;
236 | }
237 | #endif
238 | 
239 | ffm_float* malloc_aligned_float(ffm_long size)
240 | {
241 |     void *ptr;
242 | 
243 | #ifndef USESSE
244 | 
245 |     ptr = malloc(size * sizeof(ffm_float));
246 | 
247 | #else
248 | 
249 |     #ifdef _WIN32
250 |         ptr = _aligned_malloc(size*sizeof(ffm_float), kALIGNByte);
251 |         if(ptr == nullptr)
252 |             throw bad_alloc();
253 |     #else
254 |         int status = posix_memalign(&ptr, kALIGNByte, size*sizeof(ffm_float));
255 |         if(status != 0)
256 |             throw bad_alloc();
257 |     #endif
258 | 
259 | #endif
260 |     
261 |     return (ffm_float*)ptr;
262 | }
263 | 
264 | ffm_model init_model(ffm_int n, ffm_int m, ffm_parameter param)
265 | {
266 |     ffm_model model;
267 |     model.n = n;
268 |     model.k = param.k;
269 |     model.m = m;
270 |     model.W = nullptr;
271 |     model.normalization = param.normalization;
272 | 
273 |     ffm_int k_aligned = get_k_aligned(model.k);
274 |     
275 |     model.W = malloc_aligned_float((ffm_long)n*m*k_aligned*2);
276 | 
277 |     ffm_float coef = 1.0f / sqrt(model.k);
278 |     ffm_float *w = model.W;
279 | 
280 |     default_random_engine generator;
281 |     uniform_real_distribution<ffm_float> distribution(0.0, 1.0);
282 | 
283 |     for(ffm_int j = 0; j < model.n; j++) {
284 |         for(ffm_int f = 0; f < model.m; f++) {
285 |             for(ffm_int d = 0; d < k_aligned;) {
286 |                 for(ffm_int s = 0; s < kALIGN; s++, w++, d++) {
287 |                     w[0] = (d < model.k)? coef * distribution(generator) : 0.0;
288 |                     w[kALIGN] = 1;
289 |                 }
290 |                 w += kALIGN;
291 |             }
292 |         }
293 |     }
294 | 
295 |     return model;
296 | }
297 | 
298 | struct disk_problem_meta {
299 |     ffm_int n = 0;
300 |     ffm_int m = 0;
301 |     ffm_int l = 0;
302 |     ffm_int num_blocks = 0;
303 |     ffm_long B_pos = 0;
304 |     uint64_t hash1;
305 |     uint64_t hash2;
306 | };
307 | 
308 | struct problem_on_disk {
309 |     disk_problem_meta meta;
310 |     vector<ffm_float> Y;
311 |     vector<ffm_float> R;
312 |     vector<ffm_long> P;
313 |     vector<ffm_node> X;
314 |     vector<ffm_long> B;
315 | 
316 |     problem_on_disk(string path) {
317 |         f.open(path, ios::in | ios::binary);
318 |         if(f.good()) {
319 |             f.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
320 |             f.seekg(meta.B_pos);
321 |             B.resize(meta.num_blocks);
322 |             f.read(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * meta.num_blocks);
323 |         }
324 |     }
325 | 
326 |     int load_block(int block_index) {
327 |         if(block_index >= meta.num_blocks)
328 |             assert(false);
329 | 
330 |         f.seekg(B[block_index]);
331 | 
332 |         ffm_int l;
333 |         f.read(reinterpret_cast<char*>(&l), sizeof(ffm_int));
334 | 
335 |         Y.resize(l);
336 |         f.read(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
337 | 
338 |         R.resize(l);
339 |         f.read(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
340 | 
341 |         P.resize(l+1);
342 |         f.read(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
343 | 
344 |         X.resize(P[l]);
345 |         f.read(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * P[l]);
346 | 
347 |         return l;
348 |     }
349 | 
350 |     bool is_empty() {
351 |         return meta.l == 0;
352 |     }
353 | 
354 | private:
355 |     ifstream f;
356 | };
357 | 
358 | uint64_t hashfile(string txt_path, bool one_block=false)
359 | {
360 |     ifstream f(txt_path, ios::ate | ios::binary);
361 |     if(f.bad())
362 |         return 0;
363 | 
364 |     ffm_long end = (ffm_long) f.tellg();
365 |     f.seekg(0, ios::beg);
366 |     assert(static_cast<int>(f.tellg()) == 0);
367 | 
368 |     uint64_t magic = 90359;
369 |     for(ffm_long pos = 0; pos < end; ) {
370 |         ffm_long next_pos = min(pos + kCHUNK_SIZE, end);
371 |         ffm_long size = next_pos - pos;
372 |         vector<char> buffer(kCHUNK_SIZE);
373 |         f.read(buffer.data(), size);
374 | 
375 |         ffm_int i = 0;
376 |         while(i < size - 8) {
377 |             uint64_t x = *reinterpret_cast<uint64_t*>(buffer.data() + i);
378 |             magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
379 |             i += 8;
380 |         }
381 |         for(; i < size; i++) {
382 |             char x = buffer[i];
383 |             magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
384 |         }
385 | 
386 |         pos = next_pos;
387 |         if(one_block)
388 |             break;
389 |     }
390 | 
391 |     return magic;
392 | }
393 | 
394 | void txt2bin(string txt_path, string bin_path) {
395 |     
396 |     FILE *f_txt = fopen(txt_path.c_str(), "r");
397 |     if(f_txt == nullptr)
398 |         throw;
399 | 
400 |     ofstream f_bin(bin_path, ios::out | ios::binary);
401 | 
402 |     vector<char> line(kMaxLineSize);
403 | 
404 |     ffm_long p = 0;
405 |     disk_problem_meta meta;
406 | 
407 |     vector<ffm_float> Y;
408 |     vector<ffm_float> R;
409 |     vector<ffm_long> P(1, 0);
410 |     vector<ffm_node> X;
411 |     vector<ffm_long> B;
412 | 
413 |     auto write_chunk = [&] () {
414 |         B.push_back(f_bin.tellp());
415 |         ffm_int l = Y.size();
416 |         ffm_long nnz = P[l];
417 |         meta.l += l;
418 | 
419 |         f_bin.write(reinterpret_cast<char*>(&l), sizeof(ffm_int));
420 |         f_bin.write(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
421 |         f_bin.write(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
422 |         f_bin.write(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
423 |         f_bin.write(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * nnz);
424 | 
425 |         Y.clear();
426 |         R.clear();
427 |         P.assign(1, 0);
428 |         X.clear();
429 |         p = 0;
430 |         meta.num_blocks++;
431 |     };
432 | 
433 |     f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
434 | 
435 |     while(fgets(line.data(), kMaxLineSize, f_txt)) {
436 |         char *y_char = strtok(line.data(), " \t");
437 | 
438 |         ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
439 | 
440 |         ffm_float scale = 0;
441 |         for(; ; p++) {
442 |             char *field_char = strtok(nullptr,":");
443 |             char *idx_char = strtok(nullptr,":");
444 |             char *value_char = strtok(nullptr," \t");
445 |             if(field_char == nullptr || *field_char == '\n')
446 |                 break;
447 | 
448 |             ffm_node N;
449 |             N.f = atoi(field_char);
450 |             N.j = atoi(idx_char);
451 |             N.v = atof(value_char);
452 | 
453 |             X.push_back(N);
454 | 
455 |             meta.m = max(meta.m, N.f+1);
456 |             meta.n = max(meta.n, N.j+1);
457 | 
458 |             scale += N.v*N.v;
459 |         }
460 |         scale = 1.0 / scale;
461 | 
462 |         Y.push_back(y);
463 |         R.push_back(scale);
464 |         P.push_back(p);
465 | 
466 |         if(X.size() > (size_t)kCHUNK_SIZE)
467 |             write_chunk(); 
468 |     }
469 |     write_chunk(); 
470 |     write_chunk(); // write a dummy empty chunk in order to know where the EOF is
471 |     assert(meta.num_blocks == (ffm_int)B.size());
472 |     meta.B_pos = f_bin.tellp();
473 |     f_bin.write(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * B.size());
474 | 
475 |     fclose(f_txt);
476 |     meta.hash1 = hashfile(txt_path, true);
477 |     meta.hash2 = hashfile(txt_path, false);
478 | 
479 |     f_bin.seekp(0, ios::beg);
480 |     f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
481 | }
482 | 
483 | bool check_same_txt_bin(string txt_path, string bin_path) {
484 |     ifstream f_bin(bin_path, ios::binary | ios::ate);
485 |     if(f_bin.tellg() < (ffm_long)sizeof(disk_problem_meta))
486 |         return false;
487 |     disk_problem_meta meta;
488 |     f_bin.seekg(0, ios::beg);
489 |     f_bin.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
490 |     if(meta.hash1 != hashfile(txt_path, true))
491 |         return false;
492 |     if(meta.hash2 != hashfile(txt_path, false))
493 |         return false;
494 | 
495 |     return true;
496 | }
497 | 
498 | } // unnamed namespace
499 | 
500 | ffm_model::~ffm_model() {
501 |     if(W != nullptr) {
502 | #ifndef USESSE
503 |         free(W);
504 | #else
505 |     #ifdef _WIN32
506 |         _aligned_free(W);
507 |     #else
508 |         free(W);
509 |     #endif
510 | #endif
511 |         W = nullptr;
512 |     }
513 | }
514 | 
515 | void ffm_read_problem_to_disk(string txt_path, string bin_path) {
516 | 
517 |     Timer timer;
518 |     
519 |     cout << "First check if the text file has already been converted to binary format " << flush;
520 |     bool same_file = check_same_txt_bin(txt_path, bin_path);
521 |     cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
522 | 
523 |     if(same_file) {
524 |         cout << "Binary file found. Skip converting text to binary" << endl;
525 |     } else {
526 |         cout << "Binary file NOT found. Convert text file to binary file " << flush;
527 |         txt2bin(txt_path, bin_path);
528 |         cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
529 |     }
530 | }
531 | 
532 | ffm_model ffm_train_on_disk(string tr_path, string va_path, ffm_parameter param) {
533 | 
534 |     problem_on_disk tr(tr_path);
535 |     problem_on_disk va(va_path);
536 | 
537 |     ffm_model model = init_model(tr.meta.n, tr.meta.m, param);
538 | 
539 |     bool auto_stop = param.auto_stop && !va_path.empty();
540 | 
541 |     ffm_long w_size = get_w_size(model);
542 |     vector<ffm_float> prev_W(w_size, 0);
543 |     if(auto_stop)
544 |         prev_W.assign(w_size, 0);
545 |     ffm_double best_va_loss = numeric_limits<ffm_double>::max();
546 | 
547 |     cout.width(4);
548 |     cout << "iter";
549 |     cout.width(13);
550 |     cout << "tr_logloss";
551 |     if(!va_path.empty())
552 |     {
553 |         cout.width(13);
554 |         cout << "va_logloss";
555 |     }
556 |     cout.width(13);
557 |     cout << "tr_time";
558 |     cout << endl;
559 | 
560 |     Timer timer;
561 | 
562 |     auto one_epoch = [&] (problem_on_disk &prob, bool do_update) {
563 | 
564 |         ffm_double loss = 0;
565 | 
566 |         vector<ffm_int> outer_order(prob.meta.num_blocks);
567 |         iota(outer_order.begin(), outer_order.end(), 0);
568 |         random_shuffle(outer_order.begin(), outer_order.end());
569 |         for(auto blk : outer_order) {
570 |             ffm_int l = prob.load_block(blk);
571 | 
572 |             vector<ffm_int> inner_order(l);
573 |             iota(inner_order.begin(), inner_order.end(), 0);
574 |             random_shuffle(inner_order.begin(), inner_order.end());
575 | 
576 | #if defined USEOMP
577 | #pragma omp parallel for schedule(static) reduction(+: loss)
578 | #endif
579 |             for(ffm_int ii = 0; ii < l; ii++) {
580 |                 ffm_int i = inner_order[ii];
581 | 
582 |                 ffm_float y = prob.Y[i];
583 |                 
584 |                 ffm_node *begin = &prob.X[prob.P[i]];
585 | 
586 |                 ffm_node *end = &prob.X[prob.P[i+1]];
587 | 
588 |                 ffm_float r = param.normalization? prob.R[i] : 1;
589 | 
590 |                 ffm_double t = wTx(begin, end, r, model);
591 | 
592 |                 ffm_double expnyt = exp(-y*t);
593 | 
594 |                 loss += log1p(expnyt);
595 | 
596 |                 if(do_update) {
597 |                    
598 |                     ffm_float kappa = -y*expnyt/(1+expnyt);
599 | 
600 |                     wTx(begin, end, r, model, kappa, param.eta, param.lambda, true);
601 |                 }
602 |             }
603 |         }
604 | 
605 |         return loss / prob.meta.l;
606 |     };
607 | 
608 |     for(ffm_int iter = 1; iter <= param.nr_iters; iter++) {
609 |         timer.tic();
610 |         ffm_double tr_loss = one_epoch(tr, true);
611 |         timer.toc();
612 | 
613 |         cout.width(4);
614 |         cout << iter;
615 |         cout.width(13);
616 |         cout << fixed << setprecision(5) << tr_loss;
617 | 
618 |         if(!va.is_empty()) {
619 |             ffm_double va_loss = one_epoch(va, false);
620 | 
621 |             cout.width(13);
622 |             cout << fixed << setprecision(5) << va_loss;
623 | 
624 |             if(auto_stop) {
625 |                 if(va_loss > best_va_loss) {
626 |                     memcpy(model.W, prev_W.data(), w_size*sizeof(ffm_float));
627 |                     cout << endl << "Auto-stop. Use model at " << iter-1 << "th iteration." << endl;
628 |                     break;
629 |                 } else {
630 |                     memcpy(prev_W.data(), model.W, w_size*sizeof(ffm_float));
631 |                     best_va_loss = va_loss; 
632 |                 }
633 |             }
634 |         }
635 |         cout.width(13);
636 |         cout << fixed << setprecision(1) << timer.get() << endl;
637 |     }
638 | 
639 |     return model;
640 | }
641 | 
642 | void ffm_save_model(ffm_model &model, string path) {
643 |     ofstream f_out(path, ios::out | ios::binary);
644 |     f_out.write(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
645 |     f_out.write(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
646 |     f_out.write(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
647 |     f_out.write(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
648 | 
649 |     ffm_long w_size = get_w_size(model);
650 |     // f_out.write(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
651 |     // Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
652 | 
653 |     for(ffm_long offset = 0; offset < w_size; ) {
654 |         ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
655 |         ffm_long size = next_offset - offset;
656 |         f_out.write(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
657 |         offset = next_offset;
658 |     }
659 | }
660 | 
661 | ffm_model ffm_load_model(string path) {
662 |     ifstream f_in(path, ios::in | ios::binary);
663 | 
664 |     ffm_model model;
665 |     f_in.read(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
666 |     f_in.read(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
667 |     f_in.read(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
668 |     f_in.read(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
669 | 
670 |     ffm_long w_size = get_w_size(model);
671 |     model.W = malloc_aligned_float(w_size);
672 |     // f_in.read(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
673 |     // Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
674 | 
675 |     for(ffm_long offset = 0; offset < w_size; ) {
676 |         ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
677 |         ffm_long size = next_offset - offset;
678 |         f_in.read(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
679 |         offset = next_offset;
680 |     }
681 | 
682 |     return model;
683 | }
684 | 
685 | ffm_float ffm_predict(ffm_node *begin, ffm_node *end, ffm_model &model) {
686 |     ffm_float r = 1;
687 |     if(model.normalization) {
688 |         r = 0;
689 |         for(ffm_node *N = begin; N != end; N++)
690 |             r += N->v*N->v; 
691 |         r = 1/r;
692 |     }
693 | 
694 |     ffm_float t = wTx(begin, end, r, model);
695 | 
696 |     return 1/(1+exp(-t));
697 | }
698 | 
699 | } // namespace ffm
700 | 


--------------------------------------------------------------------------------
/ffm.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LIBFFM_H
 2 | #define _LIBFFM_H
 3 | 
 4 | #include <string>
 5 | 
 6 | namespace ffm {
 7 | 
 8 | using namespace std;
 9 | 
10 | typedef float ffm_float;
11 | typedef double ffm_double;
12 | typedef int ffm_int;
13 | typedef long long ffm_long;
14 | 
15 | struct ffm_node {
16 |     ffm_int f; // field index
17 |     ffm_int j; // feature index
18 |     ffm_float v; // value
19 | };
20 | 
21 | struct ffm_model {
22 |     ffm_int n; // number of features
23 |     ffm_int m; // number of fields
24 |     ffm_int k; // number of latent factors
25 |     ffm_float *W = nullptr;
26 |     bool normalization;
27 |     ~ffm_model();
28 | };
29 | 
30 | struct ffm_parameter {
31 |     ffm_float eta = 0.2; // learning rate
32 |     ffm_float lambda = 0.00002; // regularization parameter
33 |     ffm_int nr_iters = 15;
34 |     ffm_int k = 4; // number of latent factors
35 |     bool normalization = true;
36 |     bool auto_stop = false;
37 | };
38 | 
39 | void ffm_read_problem_to_disk(string txt_path, string bin_path);
40 | 
41 | void ffm_save_model(ffm_model &model, string path);
42 | 
43 | ffm_model ffm_load_model(string path);
44 | 
45 | ffm_model ffm_train_on_disk(string Tr_path, string Va_path, ffm_parameter param);
46 | 
47 | ffm_float ffm_predict(ffm_node *begin, ffm_node *end, ffm_model &model);
48 | 
49 | } // namespace ffm
50 | 
51 | #endif // _LIBFFM_H
52 | 


--------------------------------------------------------------------------------
/timer.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include "timer.h"
 3 | 
 4 | Timer::Timer()
 5 | {
 6 |     reset();
 7 | }
 8 | 
 9 | void Timer::reset()
10 | {
11 |     begin = std::chrono::high_resolution_clock::now();
12 |     duration = 
13 |         std::chrono::duration_cast<std::chrono::milliseconds>(begin-begin);
14 | }
15 | 
16 | void Timer::tic()
17 | {
18 |     begin = std::chrono::high_resolution_clock::now();
19 | }
20 | 
21 | float Timer::toc()
22 | {
23 |     duration += std::chrono::duration_cast<std::chrono::milliseconds>
24 |                     (std::chrono::high_resolution_clock::now()-begin);
25 |     return get();
26 | }
27 | 
28 | float Timer::get()
29 | {
30 |     return (float)duration.count() / 1000;
31 | }
32 | 


--------------------------------------------------------------------------------
/timer.h:
--------------------------------------------------------------------------------
 1 | #include <chrono>
 2 | 
 3 | class Timer
 4 | {
 5 | public:
 6 |     Timer();
 7 |     void reset();
 8 |     void tic();
 9 |     float toc();
10 |     float get();
11 | private:
12 |     std::chrono::high_resolution_clock::time_point begin;
13 |     std::chrono::milliseconds duration;
14 | };
15 | 


--------------------------------------------------------------------------------