├── word2vec
├── .gitignore
├── huffman_test.cc
└── word2vec.c
├── .DS_Store
├── libfm-1.42
├── src-fm_core
│ ├── fm_data.h
│ └── fm_model.h
├── src-linfm-src
│ └── fm_learn_sgd_element.h
└── src-util
│ └── rlog.h
└── README.md
/word2vec/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaozhiyong19890102/OpenSourceReading/HEAD/.DS_Store
--------------------------------------------------------------------------------
/libfm-1.42/src-fm_core/fm_data.h:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2010, 2011, 2012, 2013, 2014 Steffen Rendle
2 | // Contact: srendle@libfm.org, http://www.libfm.org/
3 | //
4 | // This file is part of libFM.
5 | //
6 | // libFM is free software: you can redistribute it and/or modify
7 | // it under the terms of the GNU General Public License as published by
8 | // the Free Software Foundation, either version 3 of the License, or
9 | // (at your option) any later version.
10 | //
11 | // libFM is distributed in the hope that it will be useful,
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | // GNU General Public License for more details.
15 | //
16 | // You should have received a copy of the GNU General Public License
17 | // along with libFM. If not, see .
18 | //
19 | //
20 | // fm_data.h: Base data type of libFM
21 |
22 | #ifndef FM_DATA_H_
23 | #define FM_DATA_H_
24 |
25 | typedef float FM_FLOAT;// 定义数据类型
26 |
27 | #endif /*FM_DATA_H_*/
28 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenSourceReading
2 |
3 | 对一些开源代码的学习与总结,从源码中学习。
4 |
5 | ## 1、word2vec
6 |
7 | - [代码注释](https://github.com/zhaozhiyong19890102/OpenSourceReading/tree/master/word2vec)
8 | - [机器学习算法实现解析——word2vec源码解析](https://blog.csdn.net/google19890102/article/details/51887344 " 机器学习算法实现解析——word2vec源码解析")
9 |
10 | ## 2、libfm-1.42
11 |
12 | - [代码注释](https://github.com/zhaozhiyong19890102/OpenSourceReading/tree/master/libfm-1.42)
13 | - [机器学习算法实现解析——libFM之libFM的模型处理部分](https://blog.csdn.net/google19890102/article/details/72866290 "机器学习算法实现解析——libFM之libFM的模型处理部分")
14 | - [机器学习算法实现解析——libFM之libFM的训练过程概述](https://blog.csdn.net/google19890102/article/details/72866320 " 机器学习算法实现解析——libFM之libFM的训练过程概述")
15 | - [机器学习算法实现解析——libFM之libFM的训练过程之SGD的方法](https://blog.csdn.net/google19890102/article/details/72866334 "机器学习算法实现解析——libFM之libFM的训练过程之SGD的方法")
16 | - [机器学习算法实现解析——libFM之libFM的训练过程之Adaptive Regularization](https://blog.csdn.net/google19890102/article/details/73301949 "机器学习算法实现解析——libFM之libFM的训练过程之Adaptive Regularization")
17 |
18 | ## 3、liblbfgs-1.10
19 |
20 | - [机器学习算法实现解析——liblbfgs之L-BFGS算法](https://blog.csdn.net/google19890102/article/details/77187890 "机器学习算法实现解析——liblbfgs之L-BFGS算法")
--------------------------------------------------------------------------------
/libfm-1.42/src-linfm-src/fm_learn_sgd_element.h:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2010, 2011, 2012, 2013, 2014 Steffen Rendle
2 | // Contact: srendle@libfm.org, http://www.libfm.org/
3 | //
4 | // This file is part of libFM.
5 | //
6 | // libFM is free software: you can redistribute it and/or modify
7 | // it under the terms of the GNU General Public License as published by
8 | // the Free Software Foundation, either version 3 of the License, or
9 | // (at your option) any later version.
10 | //
11 | // libFM is distributed in the hope that it will be useful,
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | // GNU General Public License for more details.
15 | //
16 | // You should have received a copy of the GNU General Public License
17 | // along with libFM. If not, see .
18 | //
19 | //
20 | // fm_learn_sgd.h: Stochastic Gradient Descent based learning for
21 | // classification and regression
22 | //
23 | // Based on the publication(s):
24 | // - Steffen Rendle (2010): Factorization Machines, in Proceedings of the 10th
25 | // IEEE International Conference on Data Mining (ICDM 2010), Sydney,
26 | // Australia.
27 |
28 | #ifndef FM_LEARN_SGD_ELEMENT_H_
29 | #define FM_LEARN_SGD_ELEMENT_H_
30 |
31 | #include "fm_learn_sgd.h"
32 |
33 | // 继承了fm_learn_sgd
34 | class fm_learn_sgd_element: public fm_learn_sgd {
35 | public:
36 | // 初始化
37 | virtual void init() {
38 | fm_learn_sgd::init();
39 | // 日志输出
40 | if (log != NULL) {
41 | log->addField("rmse_train", std::numeric_limits::quiet_NaN());
42 | }
43 | }
44 | // 利用SGD训练FM模型
45 | virtual void learn(Data& train, Data& test) {
46 | fm_learn_sgd::learn(train, test);// 输出参数信息
47 |
48 | std::cout << "SGD: DON'T FORGET TO SHUFFLE THE ROWS IN TRAINING DATA TO GET THE BEST RESULTS." << std::endl;
49 | // SGD
50 | for (int i = 0; i < num_iter; i++) {// 开始迭代,每一轮的迭代过程
51 | double iteration_time = getusertime();// 记录开始的时间
52 | for (train.data->begin(); !train.data->end(); train.data->next()) {// 对于每一个样本
53 | double p = fm->predict(train.data->getRow(), sum, sum_sqr);// 得到样本的预测值
54 | double mult = 0;// 损失函数的导数
55 | if (task == 0) {// 回归
56 | p = std::min(max_target, p);
57 | p = std::max(min_target, p);
58 | // loss=(y_ori-y_pre)^2
59 | mult = -(train.target(train.data->getRowIndex())-p);// 对损失函数求导
60 | } else if (task == 1) {// 分类
61 | // loss
62 | mult = -train.target(train.data->getRowIndex())*(1.0-1.0/(1.0+exp(-train.target(train.data->getRowIndex())*p)));
63 | }
64 | // 利用梯度下降法对参数进行学习
65 | SGD(train.data->getRow(), mult, sum);
66 | }
67 | iteration_time = (getusertime() - iteration_time);// 记录时间差
68 | // evaluate函数是调用的fm_learn类中的方法
69 | double rmse_train = evaluate(train);// 对训练结果评估
70 | double rmse_test = evaluate(test);// 将模型应用在测试数据上
71 | std::cout << "#Iter=" << std::setw(3) << i << "\tTrain=" << rmse_train << "\tTest=" << rmse_test << std::endl;
72 | // 日志输出
73 | if (log != NULL) {
74 | log->log("rmse_train", rmse_train);
75 | log->log("time_learn", iteration_time);
76 | log->newLine();
77 | }
78 | }
79 | }
80 |
81 | };
82 |
83 | #endif /*FM_LEARN_SGD_ELEMENT_H_*/
84 |
--------------------------------------------------------------------------------
/libfm-1.42/src-util/rlog.h:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2010, 2011, 2012, 2013, 2014 Steffen Rendle
2 | // Contact: srendle@libfm.org, http://www.libfm.org/
3 | //
4 | // This file is part of libFM.
5 | //
6 | // libFM is free software: you can redistribute it and/or modify
7 | // it under the terms of the GNU General Public License as published by
8 | // the Free Software Foundation, either version 3 of the License, or
9 | // (at your option) any later version.
10 | //
11 | // libFM is distributed in the hope that it will be useful,
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | // GNU General Public License for more details.
15 | //
16 | // You should have received a copy of the GNU General Public License
17 | // along with libFM. If not, see .
18 | //
19 | //
20 | // rlog.h: Logging into R compatible files
21 |
22 | #ifndef RLOG_H_
23 | #define RLOG_H_
24 | #include
25 | #include
26 | #include
27 | #include