├── data ├── run_data_pipline.sh ├── v2v_run_pipline.sh ├── v2v_sampling_traindata.py ├── v2v_run_split_data.sh └── split_data.py ├── hosts ├── stop.sh ├── src ├── threadpool │ ├── Makefile │ ├── example.cpp │ └── thread_pool.h ├── learner │ ├── sgd_learner.h │ ├── owlqn_learner.h │ ├── learner.h │ ├── ftrl_learner.h │ ├── ftrl_learner.cc │ └── owlqn.h ├── update │ └── update.h ├── io │ ├── load_data.h │ ├── io.h │ └── load_data.cc ├── main.cpp ├── param.h └── predict.h ├── v2v_config.h ├── n2n_config.h ├── config.h ├── Makefile ├── run_n2n_ffm_mpi.sh ├── run_v2v_ffm_mpi.sh ├── run_ffm_mpi.sh └── README.md /data/run_data_pipline.sh: -------------------------------------------------------------------------------- 1 | sh run_n2n_split_data.sh 2 | -------------------------------------------------------------------------------- /hosts: -------------------------------------------------------------------------------- 1 | 10.101.2.88 2 | 10.101.2.89 3 | 10.101.2.90 4 | -------------------------------------------------------------------------------- /stop.sh: -------------------------------------------------------------------------------- 1 | ps -ef | grep ffm_mpi | awk '{ print $2 }' | sudo xargs kill -9 2 | -------------------------------------------------------------------------------- /src/threadpool/Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | all: 3 | g++ -std=c++11 example.cpp -lpthread 4 | -------------------------------------------------------------------------------- /src/learner/sgd_learner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace dml{ 4 | class SGD_learner : Learner{ 5 | public: 6 | void Init(); 7 | }; 8 | } 9 | -------------------------------------------------------------------------------- /src/learner/owlqn_learner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace dml{ 4 | class OWLQN_learner : Learner{ 5 | public: 6 | void Init(); 7 | }; 8 | } 9 | -------------------------------------------------------------------------------- /data/v2v_run_pipline.sh: -------------------------------------------------------------------------------- 1 | python v2v_sampling_traindata.py v2v_traindata.txt > v2v_train.txt 2 | python v2v_sampling_traindata.py v2v_testdata.txt > v2v_test.txt 3 | sh v2v_run_split_data.sh 4 | -------------------------------------------------------------------------------- /src/update/update.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "param.h" 3 | namespace dml{ 4 | class Update(){ 5 | public: 6 | Update(Param *param) : param(param){} 7 | ~Update(){} 8 | public: 9 | Param param; 10 | 11 | 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /data/v2v_sampling_traindata.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | 4 | f = open(sys.argv[1], 'r') 5 | for line in f: 6 | br = line.strip().split('\t') 7 | label = br[0] 8 | n = random.random(); 9 | #if label == '1': 10 | # print line.strip() 11 | # continue 12 | if n < 0.1: 13 | print line.strip() 14 | 15 | -------------------------------------------------------------------------------- /v2v_config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | --isonline=1 3 | --isbatch=0 4 | --ismultithread=1 5 | --epoch=10 6 | --batch_size=800 7 | --bias=0.0 8 | --alpha=2.0 9 | --beta=1.0 10 | --lambda1=4.0 11 | --lambda2=0.0 12 | --fea_dim=400000 13 | --factor=2 14 | --group=52 15 | --isffm=0 16 | --isfm=0 17 | --islr=1 18 | --issgd=0 19 | --isftrl=1 20 | --isowlqn=0 21 | --train_data_path=./data/v2v_train 22 | --test_data_path=./data/v2v_test 23 | -------------------------------------------------------------------------------- /n2n_config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | --isonline=1 3 | --isbatch=0 4 | --ismultithread=1 5 | --epoch=100 6 | --batch_size=400 7 | --bias=0.0 8 | --alpha=2.0 9 | --beta=1.0 10 | --lambda1=10.0 11 | --lambda2=0.0 12 | --fea_dim=2000000 13 | --factor=2 14 | --group=52 15 | --isffm=0 16 | --isfm=0 17 | --islr=1 18 | --issgd=0 19 | --isftrl=1 20 | --isowlqn=0 21 | --train_data_path=./data/n2n_train 22 | --test_data_path=./data/n2n_test 23 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | --isbatch=1 3 | --isonline=0 4 | --epoch=1 5 | --batch_size=800 6 | --bias=0.0 7 | --alpha=0.8 8 | --beta=1.0 9 | --lambda1=2.0 10 | --lambda2=0.0 11 | --fea_dim=370000 12 | --factor=2 13 | --group=42 14 | --isffm=0 15 | --isfm=0 16 | --islr=1 17 | --issgd=0 18 | --isftrl=1 19 | --isowlqn=0 20 | --issinglethread=0 21 | --ismultithread=1 22 | --train_data_path=./data/v2v_train 23 | --test_data_path=./data/v2v_test 24 | -------------------------------------------------------------------------------- /src/io/load_data.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "io.h" 3 | #include 4 | 5 | namespace dml{ 6 | class LoadData : public IO{ 7 | public: 8 | LoadData(const char *file_path) : IO(file_path){ 9 | } 10 | ~LoadData(){} 11 | 12 | void load_all_data(); 13 | void load_batch_data(int num); 14 | 15 | public: 16 | key_val keyval; 17 | std::vector sample; 18 | std::vector> fea_matrix; 19 | std::vector label; 20 | }; 21 | } 22 | -------------------------------------------------------------------------------- /data/v2v_run_split_data.sh: -------------------------------------------------------------------------------- 1 | rm v2v_test-0000* 2 | python split_data.py v2v_test.txt 3 v2v_test 3 | scp v2v_test-0000* slave1:/home/worker/xiaoshu/Field-aware-Factorization-Machine-ftrl-mpi/data 4 | scp v2v_test-0000* slave2:/home/worker/xiaoshu/Field-aware-Factorization-Machine-ftrl-mpi/data 5 | 6 | rm v2v_train-0000* 7 | python split_data.py v2v_train.txt 3 v2v_train 8 | scp v2v_train-0000* slave1:/home/worker/xiaoshu/Field-aware-Factorization-Machine-ftrl-mpi/data 9 | scp v2v_train-0000* slave2:/home/worker/xiaoshu/Field-aware-Factorization-Machine-ftrl-mpi/data 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | INCLUDEPATH = -I/usr/local/include/ -I/usr/include -I/opt/OpenBLAS/include 3 | LIBRARYPATH = -L/usr/local/lib -L/opt/OpenBLAS/lib 4 | LIBRARY = -lpthread -lopenblas -lm -lgflags -fopenmp 5 | CPP_tag = -std=gnu++11 -fopenmp 6 | 7 | LIB=/home/services/xiaoshu/lib 8 | INCLUDE=/home/services/xiaoshu/include 9 | 10 | all:ffm_mpi rm 11 | 12 | ffm_mpi:main.o 13 | mpicxx $(CPP_tag) -g -o ffm_mpi main.o $(LIBRARYPATH) $(LIBRARY) 14 | 15 | main.o: src/main.cpp 16 | mpicxx $(CPP_tag) $(INCLUDEPATH) -c src/main.cpp -DGLFAGS_NAMESPACE=google 17 | rm: 18 | rm main.o 19 | 20 | clean: 21 | rm -f *~ ffm_mpi predict *.o 22 | -------------------------------------------------------------------------------- /data/split_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | 4 | f=open(sys.argv[1], 'r') 5 | num = int(sys.argv[2]) 6 | ftype = sys.argv[3] 7 | names = [] 8 | for i in xrange(num): 9 | name = ftype + '-0000' + str(i) 10 | names.append(name) 11 | f1 = open(names[0], 'w') 12 | f2 = open(names[1], 'w') 13 | f3 = open(names[2], 'w') 14 | 15 | for line in f: 16 | v = random.random() 17 | if 0 <= v and v < 1.0/num: 18 | f1.write(line.strip()) 19 | f1.write('\n') 20 | elif 1.0 / num <= v and v < 2 * 1.0/num: 21 | f2.write(line.strip()) 22 | f2.write('\n') 23 | elif 2 * 1.0/num <= v and v < 1.0: 24 | f3.write(line.strip()) 25 | f3.write('\n') 26 | f1.close() 27 | f2.close() 28 | f3.close() 29 | -------------------------------------------------------------------------------- /src/threadpool/example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ThreadPool.h" 6 | 7 | void task(int i){ 8 | std::cout << "hello " << i << std::endl; 9 | std::this_thread::sleep_for(std::chrono::seconds(1)); 10 | std::cout << "world " << i << std::endl; 11 | } 12 | 13 | int main() 14 | { 15 | ThreadPool pool(4); 16 | std::vector< std::future > results; 17 | for(int i = 0; i < 8; ++i) { 18 | pool.enqueue(std::bind(task, i)); 19 | } 20 | return 0; 21 | } 22 | /*pool.enqueue([i] { 23 | std::cout << "hello " << i << std::endl; 24 | std::this_thread::sleep_for(std::chrono::seconds(1)); 25 | std::cout << "world " << i << std::endl; 26 | return i*i; 27 | }) 28 | */ 29 | //); 30 | //} 31 | 32 | //for(auto && result: results) 33 | // std::cout << result.get() << ' '; 34 | //std::cout << std::endl; 35 | 36 | // return 0; 37 | //} 38 | -------------------------------------------------------------------------------- /src/io/io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace dml{ 6 | 7 | struct kv{ 8 | int fgid; 9 | long int fid; 10 | float val; 11 | }; 12 | 13 | class IO{ 14 | public: 15 | IO(const char *file_path) : file_path(file_path){ 16 | Init(); 17 | }; 18 | ~IO(){}; 19 | 20 | void Init(){ 21 | fin_.open(file_path, std::ios::in); 22 | if(!fin_.is_open()){ 23 | std::cout<<"open file "< 2 | #include "param.h" 3 | #include "io/load_data.cc" 4 | #include "learner/ftrl_learner.cc" 5 | #include "predict.h" 6 | #include "mpi.h" 7 | 8 | int main(int argc,char* argv[]){ 9 | int rank, nproc; 10 | int kRankNameLength = 1024; 11 | char processor_name[kRankNameLength]; 12 | MPI_Init(&argc,&argv); 13 | MPI_Comm_rank(MPI_COMM_WORLD,&rank); 14 | MPI_Comm_size(MPI_COMM_WORLD,&nproc); 15 | MPI_Get_processor_name(processor_name,&kRankNameLength); 16 | 17 | dml::Param param(argc, argv); 18 | 19 | char train_data_path[1024]; 20 | snprintf(train_data_path, 1024, "%s-%05d", param.train_data_path.c_str(), rank); 21 | char test_data_path[1024]; 22 | snprintf(test_data_path, 1024, "%s-%05d", param.test_data_path.c_str(), rank); 23 | 24 | dml::LoadData train_data(train_data_path); 25 | 26 | dml::LoadData test_data(test_data_path); 27 | test_data.load_all_data(); 28 | 29 | dml::Predict predict(&test_data, ¶m, nproc, rank); 30 | 31 | if(param.isftrl == 1){ 32 | dml::FtrlLearner ftrl(&train_data, &predict, ¶m, nproc, rank); 33 | ftrl.run(); 34 | predict.run(ftrl.loc_w, ftrl.loc_v); 35 | } 36 | 37 | MPI::Finalize(); 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /src/io/load_data.cc: -------------------------------------------------------------------------------- 1 | #include "load_data.h" 2 | 3 | namespace dml{ 4 | void LoadData::load_all_data(){ 5 | fea_matrix.clear(); 6 | while(!fin_.eof()){ 7 | std::getline(fin_, line); 8 | sample.clear(); 9 | const char *pline = line.c_str(); 10 | if(sscanf(pline, "%d%n", &y, &nchar) >= 1){ 11 | pline += nchar; 12 | label.push_back(y); 13 | while(sscanf(pline, "%d:%ld:%f%n", &fgid, &fid, &val, &nchar) >= 3){ 14 | pline += nchar; 15 | keyval.fgid = fgid; 16 | keyval.fid = fid; 17 | keyval.val = val; 18 | sample.push_back(keyval); 19 | } 20 | } 21 | fea_matrix.push_back(sample); 22 | } 23 | }//end load 24 | 25 | void LoadData::load_batch_data(int num){ 26 | fea_matrix.clear(); 27 | for(int i = 0; i < num; i++){ 28 | std::getline(fin_, line); 29 | if(fin_.eof()) break; 30 | sample.clear(); 31 | const char *pline = line.c_str(); 32 | if(sscanf(pline, "%d%n", &y, &nchar) >= 1){ 33 | pline += nchar; 34 | label.push_back(y); 35 | while(sscanf(pline, "%d:%ld:%f%n", &fgid, &fid, &val, &nchar) >= 3){ 36 | pline += nchar; 37 | keyval.fgid = fgid; 38 | keyval.fid = fid; 39 | keyval.val = val; 40 | sample.push_back(keyval); 41 | }//end while 42 | }//end if 43 | fea_matrix.push_back(sample); 44 | }//end for 45 | }//end load 46 | 47 | }//end namespace 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 1. Introduction 2 | 3 | field-aware factorization machie[1] solve by ftrl algorighm[2], based on mpich 4 | 5 | 2. Dependency 6 | 7 | openblas 8 | 9 | mpich-2 10 | 11 | 3. Use 12 | 13 | This folder shoulder in same path on all of your cluseter nodes. 14 | 15 | all of follow action in the folder of Field-aware-Factorization-Machine-ftrl-mpi 16 | step 1: 17 | 18 | split your data into n parts. (n equal the number of nodes of your cluster), 19 | for example, cd ./data and run the command: sh run_split_data.sh 20 | 21 | step 2: 22 | 23 | cd .. 24 | 25 | modeify Ip in the file run_ffm_mpi.sh and then rum commend: sh run_ffm_mpi.sh 26 | 27 | Note: 28 | 29 | if you want to stop the programme, run commend: sh stop.sh 30 | 31 | 4. Feature list 32 | 33 | 4.1. About model selection: 34 | 35 | 4.1.1. if you want to use LR(Logistic Regression) model only, set the parameter in config.h: isffm=0 isfm=0 islr=1 36 | 37 | 4.1.2. if you want to use FM(Factorization Machine) model only, set the parameter in config.h: isffm=0 isfm=1 islr=0 38 | 39 | 4.1.3. if you want to use FFM(Field-aware Factorization Machine) model only, set the parameter in config.h: isffm=1 isfm=0 islr=0 40 | 41 | 4.2. Evaluation 42 | 43 | print AUC after some epochs 44 | 45 | 4.3 Save Model 46 | 47 | dump model to the model folder 48 | 49 | 5. Todo list 50 | 51 | 1, multithread 52 | 53 | 2, same model paralle on Parameter Server 54 | 55 | 3, load data batch 56 | 57 | 4, optimazation the learning algorithm 58 | 59 | 6. Contact: 60 | 61 | 2012wxs@gmail.com 62 | 63 | 7. References: 64 | 65 | [1] Field-aware Factorization Machines for CTR Prediction. http://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf 66 | 67 | [2] Ad Click Prediction: a View from the Trenches. http://dl.acm.org/citation.cfm?id=2488200 68 | 69 | [3] Factorization Machine. http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf 70 | -------------------------------------------------------------------------------- /src/param.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | DEFINE_int64(isbatch, 0, ""); 5 | DEFINE_int64(isonline, 0, ""); 6 | DEFINE_int64(epoch, 0, "epoch"); 7 | DEFINE_int64(batch_size, 0, "batchsize"); 8 | DEFINE_int64(fea_dim, 0, ""); 9 | DEFINE_int64(factor, 0, ""); 10 | DEFINE_int64(group, 0, ""); 11 | DEFINE_int64(isffm, 0, ""); 12 | DEFINE_int64(isfm, 0, ""); 13 | DEFINE_int64(islr, 0, ""); 14 | DEFINE_int64(issgd, 0, ""); 15 | DEFINE_int64(isftrl, 0, ""); 16 | DEFINE_int64(isowlqn, 0, ""); 17 | DEFINE_int64(issinglethread, 0, ""); 18 | DEFINE_int64(ismultithread, 0, ""); 19 | 20 | DEFINE_double(bias, 0.0, "bias"); 21 | DEFINE_double(alpha, 0.0, "alpha"); 22 | DEFINE_double(beta, 0.0, ""); 23 | DEFINE_double(lambda1, 0.0, ""); 24 | DEFINE_double(lambda2, 0.0, ""); 25 | 26 | DEFINE_string(train_data_path, "", ""); 27 | DEFINE_string(test_data_path, "", ""); 28 | 29 | namespace dml{ 30 | class Param{ 31 | public: 32 | Param(int &argc, char *argv[]) : argc(argc), argv(argv){ 33 | ::google::ParseCommandLineFlags(&argc, &argv, true); 34 | Init(); 35 | } 36 | ~Param(){} 37 | 38 | void Init(){ 39 | isbatch = FLAGS_isbatch; 40 | isonline = FLAGS_isonline; 41 | epoch = FLAGS_epoch; 42 | batch_size = FLAGS_batch_size; 43 | bias = FLAGS_fea_dim; 44 | alpha = FLAGS_alpha; 45 | beta = FLAGS_beta; 46 | lambda1 = FLAGS_lambda1; 47 | lambda2 = FLAGS_lambda2; 48 | fea_dim = FLAGS_fea_dim; 49 | factor = FLAGS_factor; 50 | group = FLAGS_group; 51 | isffm = FLAGS_isffm; 52 | isfm = FLAGS_isfm; 53 | islr = FLAGS_islr; 54 | issgd = FLAGS_issgd; 55 | isftrl = FLAGS_isftrl; 56 | isowlqn = FLAGS_isowlqn; 57 | issinglethread = FLAGS_issinglethread; 58 | ismultithread = FLAGS_ismultithread; 59 | train_data_path = FLAGS_train_data_path; 60 | test_data_path = FLAGS_test_data_path; 61 | } 62 | public: 63 | int argc; 64 | char **argv; 65 | int isbatch; 66 | int isonline; 67 | int epoch; 68 | int batch_size; 69 | double bias; 70 | double alpha; 71 | double beta; 72 | double lambda1; 73 | double lambda2; 74 | long int fea_dim; 75 | int factor; 76 | int group; 77 | int isffm; 78 | int isfm; 79 | int islr; 80 | int issgd; 81 | int isftrl; 82 | int isowlqn; 83 | int issinglethread; 84 | int ismultithread; 85 | std::string train_data_path; 86 | std::string test_data_path; 87 | }; 88 | } 89 | -------------------------------------------------------------------------------- /src/threadpool/thread_pool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class ThreadPool { 15 | public: 16 | ThreadPool(size_t); 17 | template 18 | auto enqueue(F&& f, Args&&... args) 19 | -> std::future::type>; 20 | ~ThreadPool(); 21 | private: 22 | // need to keep track of threads so we can join them 23 | std::vector< std::thread > workers; 24 | // the task queue 25 | std::queue< std::function > tasks; 26 | 27 | // synchronization 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | // the constructor just launches some amount of workers 34 | inline ThreadPool::ThreadPool(size_t threads) 35 | : stop(false) 36 | { 37 | for(size_t i = 0;i task; 44 | 45 | { 46 | std::unique_lock lock(this->queue_mutex); 47 | this->condition.wait(lock, 48 | [this]{ return this->stop || !this->tasks.empty(); }); 49 | if(this->stop && this->tasks.empty()) 50 | return; 51 | task = std::move(this->tasks.front()); 52 | this->tasks.pop(); 53 | } 54 | 55 | task(); 56 | } 57 | } 58 | ); 59 | } 60 | 61 | // add new work item to the pool 62 | template 63 | auto ThreadPool::enqueue(F&& f, Args&&... args) 64 | -> std::future::type> 65 | { 66 | using return_type = typename std::result_of::type; 67 | 68 | auto task = std::make_shared< std::packaged_task >( 69 | std::bind(std::forward(f), std::forward(args)...) 70 | ); 71 | 72 | std::future res = task->get_future(); 73 | { 74 | std::unique_lock lock(queue_mutex); 75 | 76 | // don't allow enqueueing after stopping the pool 77 | if(stop) 78 | throw std::runtime_error("enqueue on stopped ThreadPool"); 79 | 80 | tasks.emplace([task](){ (*task)(); }); 81 | } 82 | condition.notify_one(); 83 | return res; 84 | } 85 | 86 | // the destructor joins all threads 87 | inline ThreadPool::~ThreadPool() 88 | { 89 | { 90 | std::unique_lock lock(queue_mutex); 91 | stop = true; 92 | } 93 | condition.notify_all(); 94 | for(std::thread &worker: workers) 95 | worker.join(); 96 | } 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /src/learner/learner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "../param.h" 4 | #include 5 | #include 6 | #include 7 | #include "mpi.h" 8 | #include 9 | 10 | namespace dml{ 11 | class Send_datatype{ 12 | public: 13 | double key; 14 | double val; 15 | }; 16 | 17 | class Learner{ 18 | public: 19 | Learner(Param *param) : param(param){ 20 | if(param->islr) v_dim = 0; 21 | else if(param->isfm) v_dim = param->factor * param->fea_dim * 1; 22 | else if(param->isffm) v_dim = param->factor * param->fea_dim * param->group; 23 | loc_w = new double[param->fea_dim](); 24 | loc_g = new double[param->fea_dim](); 25 | glo_g = new double[param->fea_dim](); 26 | loc_sigma = new double[param->fea_dim](); 27 | loc_n = new double[param->fea_dim](); 28 | loc_z = new double[param->fea_dim](); 29 | 30 | loc_v = new double[v_dim](); 31 | for(int i = 0; i < v_dim; i++){ 32 | loc_v[i] = gaussrand(); 33 | } 34 | loc_g_v = new double[v_dim](); 35 | glo_g_v = new double[v_dim]; 36 | loc_sigma_v = new double[v_dim](); 37 | loc_n_v = new double[v_dim](); 38 | loc_z_v = new double[v_dim](); 39 | 40 | int block_length[] = {1, 1}; 41 | MPI::Datatype oldType[] = {MPI_DOUBLE, MPI_DOUBLE}; 42 | MPI::Aint addressOffsets[] = {0, 1 * sizeof(double)}; 43 | newType = MPI::Datatype::Create_struct( 44 | sizeof(block_length) / sizeof(int), 45 | block_length, 46 | addressOffsets, 47 | oldType 48 | ); 49 | newType.Commit(); 50 | } 51 | ~Learner(){ 52 | delete[] loc_w; 53 | delete[] loc_v; 54 | 55 | delete[] loc_g; 56 | delete[] glo_g; 57 | delete[] loc_sigma; 58 | delete[] loc_n; 59 | delete[] loc_z; 60 | 61 | delete[] loc_g_v; 62 | delete[] glo_g_v; 63 | delete[] loc_sigma_v; 64 | delete[] loc_n_v; 65 | delete[] loc_z_v; 66 | } 67 | 68 | virtual void Init() = 0; 69 | virtual void calculate_batch_gradient_multithread(int start, int end) = 0; 70 | virtual void update_w() = 0; 71 | virtual void update_v() = 0; 72 | virtual void dump(int epoch) = 0; 73 | public: 74 | double gaussrand(){ 75 | static double V1, V2, S; 76 | static int phase = 0; 77 | double X; 78 | if(phase == 0){ 79 | do{ 80 | double U1 = (double)rand() / RAND_MAX; 81 | double U2 = (double)rand() / RAND_MAX; 82 | V1 = 2 * U1 - 1; 83 | V2 = 2 * U2 - 1; 84 | S = V1 * V1 + V2 * V2; 85 | }while(S >= 1 || S == 0); 86 | X = V1 * sqrt(-2 * log(S) / S); 87 | } 88 | else{ 89 | X = V2 * sqrt(-2 * log(S) / S); 90 | } 91 | phase = 1 - phase; 92 | return X * 0.1 + 0.0; 93 | } 94 | 95 | float sigmoid(float x){ 96 | if(x < -30) return 1e-6; 97 | else if(x > 30) return 1.0; 98 | else{ 99 | double ex = pow(2.718281828, x); 100 | return ex / (1.0 + ex); 101 | } 102 | } 103 | 104 | double getElem(double* arr, int i, int j, int k){ 105 | if(param->isfm) return arr[i * param->fea_dim + j + k]; 106 | else return arr[i * param->fea_dim*param->group + j * param->group + k]; 107 | } 108 | 109 | void putVal(double* arr, float val, int i, int j, int k){ 110 | if(param->isfm) arr[i*param->fea_dim + j + k] = val; 111 | else arr[i*param->fea_dim*param->group + j * param->group + k] = val; 112 | } 113 | 114 | void addVal(double* arr, int val, int i, int j, int k){ 115 | if(param->isfm) arr[i * param->fea_dim + j + k] += val; 116 | else arr[i * param->fea_dim*param->group + j * param->group + k] += val; 117 | } 118 | 119 | long int filter(double* a, long int n){ 120 | int nonzero = 0; 121 | //#pragma omp parallel for 122 | for(int i = 0; i < n; ++i){ 123 | if(a[i] != 0.0) nonzero += 1; 124 | } 125 | return nonzero; 126 | } 127 | void filter_nonzero(double *a, long int n, std::vector &vec){ 128 | Send_datatype dt; 129 | //#pragma omp parallel for 130 | for(int i = 0; i < n; ++i){ 131 | if(a[i] != 0.0){ 132 | dt.key = i; 133 | dt.val = a[i]; 134 | vec.push_back(dt); 135 | } 136 | } 137 | } 138 | 139 | public: 140 | MPI::Datatype newType; 141 | Param *param; 142 | std::set::iterator setIter; 143 | std::vector > cross_field; 144 | 145 | int v_dim; 146 | 147 | double *loc_w; 148 | double *loc_v; 149 | 150 | double* loc_g; 151 | double* glo_g; 152 | double* loc_z; 153 | double* loc_sigma; 154 | double* loc_n; 155 | 156 | double* loc_g_v; 157 | double* glo_g_v; 158 | double* loc_sigma_v; 159 | double* loc_n_v; 160 | double* loc_z_v; 161 | }; 162 | } 163 | -------------------------------------------------------------------------------- /src/learner/ftrl_learner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "learner.h" 3 | #include "../predict.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "../io/load_data.h" 10 | #include "../threadpool/thread_pool.h" 11 | 12 | namespace dml{ 13 | struct ThreadParam { 14 | int batchsize4thread; 15 | }; 16 | 17 | class FtrlLearner : public Learner{ 18 | public: 19 | FtrlLearner(LoadData *train_data, Predict* predict, Param *param, int nproc, int rank) 20 | : Learner(param), data(train_data), pred(predict), param(param), nproc(nproc), rank(rank){ 21 | Init(); 22 | } 23 | ~FtrlLearner(){ 24 | delete[] loc_g_tmp; 25 | delete[] loc_g_v_tmp; 26 | } 27 | 28 | void Init(){ 29 | core_num = std::thread::hardware_concurrency(); 30 | for(int i = 0; i < param->group; ++i){ 31 | std::set s; 32 | for(int j = 0; j < param->group; j += 1){ 33 | s.insert(j); 34 | } 35 | cross_field.push_back(s); 36 | } 37 | 38 | loc_g_tmp = new double[param->fea_dim]; 39 | loc_g_v_tmp = new double[v_dim]; 40 | 41 | alpha_v = 0.0; 42 | beta_v = 0.00001; 43 | lambda1_v = 0.000001; 44 | lambda2_v = 0.0; 45 | 46 | alpha = param->alpha; 47 | beta = param->beta; 48 | lambda1 = param->lambda1; 49 | lambda2 = param->lambda2; 50 | } 51 | 52 | void dump(int epoch){ 53 | char buffer[1024]; 54 | snprintf(buffer, 1024, "%d", epoch); 55 | std::string filename = buffer; 56 | std::ofstream md; 57 | md.open("./model/model_epoch" + filename + ".txt"); 58 | if(!md.is_open()){ 59 | std::cout<<"save model open file error: "<< std::endl; 60 | } 61 | float wi; 62 | for(int j = 0; j < param->fea_dim; ++j){ 63 | wi = loc_w[j]; 64 | md<< j << "\t" <&); 71 | void calculate_batch_gradient_multithread(int start, int end); 72 | void allreduce_gradient(); 73 | void allreduce_weight(); 74 | void train_online(ThreadPool& pool); 75 | void train_batch(ThreadPool& pool); 76 | 77 | void update_w(){ 78 | #pragma omp parallel for 79 | for(int col = 0; col < param->fea_dim; ++col){ 80 | loc_sigma[col] = ( sqrt (loc_n[col] + glo_g[col] * glo_g[col]) - sqrt(loc_n[col]) ) / param->alpha; 81 | loc_n[col] += glo_g[col] * glo_g[col]; 82 | loc_z[col] += glo_g[col] - loc_sigma[col] * loc_w[col]; 83 | if(abs(loc_z[col]) <= param->lambda1){ 84 | loc_w[col] = 0.0; 85 | } 86 | else{ 87 | float tmpr= 0.0; 88 | if(loc_z[col] >= 0) tmpr = loc_z[col] - param->lambda1; 89 | else tmpr = loc_z[col] + param->lambda1; 90 | float tmpl = -1 * ( ( param->beta + sqrt(loc_n[col]) ) / param->alpha + param->lambda2); 91 | loc_w[col] = tmpr / tmpl; 92 | } 93 | }//end for 94 | } 95 | 96 | void update_v(){ 97 | for(int k = 0; k < param->factor; ++k){ 98 | if(param->islr) break; 99 | #pragma omp parallel for 100 | for(int col = 0; col < param->fea_dim; ++col){ 101 | for(int f = 0; f < param->group; ++f){ 102 | if(param->isfm) f = 0; 103 | float old_locnv = getElem(loc_n_v, k, col, f); 104 | float glogv = getElem(glo_g_v, k, col, f); 105 | float locsigmav = (sqrt(old_locnv + glogv*glogv) - sqrt(old_locnv)) / alpha_v; 106 | 107 | double new_locnv = old_locnv + glogv * glogv; 108 | putVal(loc_n_v, new_locnv, k, col, f); 109 | double old_loczv = getElem(loc_z_v, k, col, f); 110 | double new_loczv = old_loczv + glogv - locsigmav * getElem(loc_v, k, col, f); 111 | putVal(loc_z_v, new_loczv, k, col, f); 112 | if(abs(new_loczv) <= lambda1_v){ 113 | putVal(loc_v, 0.0, k, col, f); 114 | } 115 | else{ 116 | float tmpr= 0.0; 117 | if(new_loczv >= 0) tmpr = new_loczv - lambda1_v; 118 | else tmpr = new_loczv + lambda1_v; 119 | float tmpl = -1 * ( ( beta_v + sqrt(getElem(loc_n_v, k, col, f)) ) / alpha_v + lambda2_v); 120 | putVal(loc_v, tmpr / tmpl, k, col, f); 121 | } 122 | if(param->isfm) break; 123 | } 124 | }//end for 125 | }//end for 126 | }//end update_v 127 | public: 128 | std::mutex mutex; 129 | MPI_Status status; 130 | Param *param; 131 | LoadData *data; 132 | Predict *pred; 133 | 134 | int core_num; 135 | int calculate_gradient_thread_count; 136 | 137 | double *loc_g_tmp; 138 | double *loc_g_v_tmp; 139 | 140 | int loc_g_nonzero; 141 | int loc_g_v_nonzero; 142 | float bias; 143 | 144 | float alpha; 145 | float beta; 146 | float lambda1; 147 | float lambda2; 148 | 149 | float alpha_v; 150 | float beta_v; 151 | float lambda1_v; 152 | float lambda2_v; 153 | 154 | int nproc; 155 | int rank; 156 | }; 157 | } 158 | -------------------------------------------------------------------------------- /src/predict.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "mpi.h" 9 | #include 10 | 11 | namespace dml{ 12 | typedef struct{ 13 | float clk; 14 | float nclk; 15 | long idx; 16 | } clkinfo; 17 | 18 | class Predict{ 19 | public: 20 | Predict(LoadData* load_data, Param *param, int total_num_proc, int my_rank) 21 | : data(load_data), param(param), nproc(total_num_proc), rank(my_rank){ 22 | pctr = 0.0; 23 | MAX_ARRAY_SIZE = 1e6; 24 | 25 | g_all_non_clk = new float[MAX_ARRAY_SIZE]; 26 | g_all_clk = new float[MAX_ARRAY_SIZE]; 27 | g_nclk = new float[MAX_ARRAY_SIZE]; 28 | g_clk = new float[MAX_ARRAY_SIZE]; 29 | 30 | for(int i = 0; i < param->group; ++i){ 31 | std::set s; 32 | for(int j = 0; j < param->group; j += 1){ 33 | s.insert(j); 34 | } 35 | cross_field.push_back(s); 36 | } 37 | } 38 | 39 | ~Predict(){ 40 | delete[] g_all_non_clk; 41 | delete[] g_all_clk; 42 | delete[] g_nclk; 43 | delete[] g_clk; 44 | } 45 | 46 | double getElem(double* arr, int i, int j, int k){ 47 | if(param->isfm) return arr[i * param->fea_dim + j + k]; 48 | else return arr[i * param->fea_dim*param->group + j * param->group + k]; 49 | } 50 | void print1dim(double* arr){ 51 | for(int i = 0; i < param->factor * param->fea_dim * param->group; ++i) 52 | std::cout< > cross_field; 167 | std::vector result_list; 168 | int MAX_ARRAY_SIZE; 169 | double auc = 0.0; 170 | float* g_all_non_clk; 171 | float* g_all_clk; 172 | float* g_nclk; 173 | float* g_clk; 174 | float g_total_clk; 175 | float g_total_nclk; 176 | 177 | float pctr; 178 | 179 | int nproc; // total num of process in MPI comm world 180 | int rank; // my process rank in MPT comm world 181 | }; 182 | 183 | } 184 | -------------------------------------------------------------------------------- /src/learner/ftrl_learner.cc: -------------------------------------------------------------------------------- 1 | #include "ftrl_learner.h" 2 | 3 | namespace dml{ 4 | void FtrlLearner::run(){ 5 | ThreadPool pool(core_num - 1); 6 | if(param->isonline == 1){ 7 | train_online(pool); 8 | } 9 | else if(param->isbatch == 1){ 10 | train_batch(pool); 11 | } 12 | }//end run 13 | 14 | void FtrlLearner::train_online(ThreadPool& pool){ 15 | int b = 0; 16 | for(int epoch = 0; epoch < param->epoch; ++epoch){ 17 | while(b < 700){ 18 | data->load_batch_data(param->batch_size); 19 | if(data->fea_matrix.size() < param->batch_size) break; 20 | memset(loc_g, 0.0, param->fea_dim * sizeof(double));//notation: 21 | memset(loc_g_v, 0.0, v_dim * sizeof(double));//notation: 22 | int start, end; 23 | int thread_batch = param->batch_size / core_num; 24 | calculate_gradient_thread_count = 0; 25 | for(int j = 0; j < core_num; j++){ 26 | start = j * thread_batch; 27 | end = (j + 1) * thread_batch; 28 | pool.enqueue(std::bind(&FtrlLearner::calculate_batch_gradient_multithread, this, start, end)); 29 | } 30 | while(calculate_gradient_thread_count < core_num);//main thread must wait for all thread in thread pool 31 | mutex.lock(); 32 | allreduce_gradient(); 33 | allreduce_weight(); 34 | mutex.unlock(); 35 | if((b+1) % 200 == 0) pred->run(loc_w, loc_v); 36 | ++b; 37 | }//end while 38 | }//end for 39 | } 40 | 41 | void FtrlLearner::train_batch(ThreadPool& pool){ 42 | data->load_all_data(); 43 | int batch_num = data->fea_matrix.size() / param->batch_size, batch_num_min = 0; 44 | MPI_Allreduce(&batch_num, &batch_num_min, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); 45 | std::cout<<"total epochs = "<epoch<<" batch_num_min = "<epoch; ++epoch){ 48 | std::cout<<"epoch "<fea_dim * sizeof(double));//notation: 54 | memset(loc_g_v, 0.0, v_dim * sizeof(double));//notation: 55 | int all_start = i * param->batch_size; 56 | int thread_batch = param->batch_size / core_num; 57 | int start, end; 58 | calculate_gradient_thread_count = 0; 59 | for(int j = 0; j < core_num; ++j){ 60 | start = all_start + j * thread_batch; 61 | end = all_start + (j + 1) * thread_batch; 62 | pool.enqueue(std::bind(&FtrlLearner::calculate_batch_gradient_multithread, this, start, end)); 63 | } 64 | while(calculate_gradient_thread_count < core_num);//main thread must wait for all thread in thread pool 65 | mutex.lock(); 66 | allreduce_gradient(); 67 | allreduce_weight(); 68 | mutex.unlock(); 69 | if((i+1) % 200 == 0) pred->run(loc_w, loc_v); 70 | }//end for 71 | finish_time = clock(); 72 | std::cout<<"Elasped time:"<<(finish_time - start_time) * 1.0 / CLOCKS_PER_SEC<fea_dim); 79 | if(!param->islr) memset(loc_g_v_tmp, 0.0, sizeof(double) * v_dim); 80 | for(int r = start; r < end; ++r){ 81 | float wx = bias; 82 | int ins_seg_num = data->fea_matrix[r].size(); 83 | std::vector vx_sum(param->factor, 0.0); 84 | float vxvx = 0.0, vvxx = 0.0; 85 | for(int col = 0; col < ins_seg_num; ++col){//for one instance 86 | group = data->fea_matrix[r][col].fgid; 87 | index = data->fea_matrix[r][col].fid; 88 | value = data->fea_matrix[r][col].val; 89 | wx += loc_w[index] * value; 90 | for(int k = 0; k < param->factor; ++k){ 91 | if(param->islr) break; 92 | for(int f = 0; f < param->group; ++f){ 93 | setIter = cross_field[group].find(f); 94 | if(setIter == cross_field[group].end()) continue; 95 | if(param->isfm) f = 0; 96 | int loc_v_temp = getElem(loc_v, k, index, f); 97 | vx_sum[k] += loc_v_temp * value; 98 | vvxx += loc_v_temp * loc_v_temp * value * value; 99 | if(param->isfm) break; 100 | } 101 | } 102 | }//end for 103 | for(int k = 0; k < param->factor; ++k){ 104 | if(param->islr) break; 105 | vxvx += vx_sum[k] * vx_sum[k]; 106 | } 107 | vxvx -= vvxx; 108 | wx += vxvx * 1.0 / 2.0; 109 | pctr = sigmoid(wx); 110 | float delta = pctr - data->label[r]; 111 | update_gradient(ins_seg_num, r, delta, loc_g_tmp, vx_sum); 112 | if(!param->islr) update_gradient(ins_seg_num, r, delta, loc_g_v_tmp, vx_sum); 113 | }//end for 114 | mutex.lock(); 115 | cblas_daxpy(param->fea_dim, 1.0, loc_g_tmp, 1, loc_g, 1); 116 | if(!param->islr)cblas_daxpy(v_dim, 1.0, loc_g_v_tmp, 1, loc_g_v, 1); 117 | calculate_gradient_thread_count += 1; 118 | mutex.unlock(); 119 | } 120 | 121 | void FtrlLearner::update_gradient(int ins_seg_num, int r, float& delta, double*& loc_gv_tmp, std::vector& vx_sum){ 122 | for(int col = 0; col < ins_seg_num; col++){ 123 | int group = data->fea_matrix[r][col].fgid; 124 | int index = data->fea_matrix[r][col].fid; 125 | float value = data->fea_matrix[r][col].val; 126 | loc_gv_tmp[index] += delta * value; 127 | float vx = 0.0; 128 | for(int k = 0; k < param->factor; k++){ 129 | if(param->islr) break; 130 | for(int f = 0; f < param->group; f++){ 131 | setIter = cross_field[group].find(f); 132 | if(setIter == cross_field[group].end()) continue; 133 | if(param->isfm) f = 0; 134 | float tmpv = getElem(loc_v, k, index, f); 135 | vx = tmpv * value; 136 | addVal(loc_gv_tmp, -1 * delta * (vx_sum[k] - vx) * value, k, index, f); 137 | if(param->isfm) break; 138 | } 139 | }//end for 140 | }//end for 141 | } 142 | 143 | void FtrlLearner::allreduce_gradient(){ 144 | cblas_dscal(param->fea_dim, 1.0/param->batch_size, loc_g, 1); 145 | loc_g_nonzero = filter(loc_g, param->fea_dim);// 146 | std::vector loc_g_vec; 147 | filter_nonzero(loc_g, param->fea_dim, loc_g_vec);// 148 | 149 | std::vector loc_g_v_vec; 150 | if(!param->islr){ 151 | cblas_dscal(v_dim, 1.0/param->batch_size, loc_g_v, 1); 152 | loc_g_v_nonzero = filter(loc_g_v, v_dim);// 153 | filter_nonzero(loc_g_v, v_dim, loc_g_v_vec);// 154 | } 155 | if(rank != 0){ 156 | MPI_Send(&loc_g_vec[0], loc_g_nonzero, newType, 0, 99, MPI_COMM_WORLD); 157 | if(!param->islr) MPI_Send(&loc_g_v_vec[0], loc_g_v_nonzero, newType, 0, 399, MPI_COMM_WORLD); 158 | }else if(rank == 0){ 159 | cblas_dcopy(param->fea_dim, loc_g, 1, glo_g, 1); 160 | for(int r = 1; r < nproc; ++r){ 161 | std::vector recv_loc_g_vec; 162 | recv_loc_g_vec.resize(param->fea_dim); 163 | MPI_Recv(&recv_loc_g_vec[0], param->fea_dim, newType, r, 99, MPI_COMM_WORLD, &status); 164 | int recv_loc_g_num; 165 | MPI_Get_count(&status, newType, &recv_loc_g_num); 166 | #pragma omp parallel for 167 | for(int i = 0; i < recv_loc_g_num; ++i){ 168 | int k = recv_loc_g_vec[i].key; 169 | int v = recv_loc_g_vec[i].val; 170 | glo_g[k] += v; 171 | } 172 | if(!param->islr){ 173 | cblas_dcopy(v_dim, loc_g_v, 1, glo_g_v, 1); 174 | std::vector recv_loc_g_v_vec; 175 | recv_loc_g_v_vec.resize(v_dim); 176 | MPI_Recv(&recv_loc_g_v_vec[0], v_dim, newType, r, 399, MPI_COMM_WORLD, &status); 177 | int recv_loc_g_v_num; 178 | MPI_Get_count(&status, newType, &recv_loc_g_v_num); 179 | #pragma omp parallel for 180 | for(int i = 0; i < recv_loc_g_v_num; ++i){ 181 | int k = recv_loc_g_v_vec[i].key; 182 | int v = recv_loc_g_v_vec[i].val; 183 | glo_g_v[k] += v; 184 | } 185 | } 186 | } 187 | cblas_dscal(param->fea_dim, 1.0/nproc, glo_g, 1); 188 | update_w(); 189 | if(!param->islr){ 190 | cblas_dscal(v_dim, 1.0/nproc, glo_g_v, 1); 191 | update_v(); 192 | } 193 | } 194 | }//end allreduce_gradient 195 | 196 | void FtrlLearner::allreduce_weight(){ 197 | if(rank == 0){ 198 | int loc_w_nonzero = filter(loc_w, param->fea_dim);// 199 | std::vector loc_w_vec; 200 | filter_nonzero(loc_w, param->fea_dim, loc_w_vec);// 201 | 202 | std::vector loc_v_vec; 203 | int loc_v_nonzero; 204 | if(!param->islr){ 205 | loc_v_nonzero = filter(loc_v, v_dim);// 206 | filter_nonzero(loc_v, v_dim, loc_v_vec);// 207 | } 208 | for(int r = 1; r < nproc; ++r){ 209 | MPI_Send(&loc_w_vec[0], loc_w_nonzero, newType, r, 999, MPI_COMM_WORLD); 210 | if(!param->islr) MPI_Send(&loc_v_vec[0], loc_v_nonzero, newType, r, 3999, MPI_COMM_WORLD); 211 | } 212 | }else if(rank != 0){ 213 | std::vector recv_loc_w_vec; 214 | recv_loc_w_vec.resize(param->fea_dim); 215 | MPI_Recv(&recv_loc_w_vec[0], param->fea_dim, newType, 0, 999, MPI_COMM_WORLD, &status); 216 | int recv_loc_w_num; 217 | MPI_Get_count(&status, newType, &recv_loc_w_num); 218 | memset(loc_w, 0.0, param->fea_dim * sizeof(double)); 219 | #pragma omp parallel for 220 | for(int i = 0; i < recv_loc_w_num; ++i){ 221 | int k = recv_loc_w_vec[i].key; 222 | int v = recv_loc_w_vec[i].val; 223 | loc_w[k] = v; 224 | } 225 | 226 | if(param->islr != 1){ 227 | std::vector recv_loc_v_vec; 228 | recv_loc_v_vec.resize(v_dim); 229 | MPI_Recv(&recv_loc_v_vec[0], v_dim, newType, 0, 3999, MPI_COMM_WORLD, &status); 230 | int recv_loc_v_num; 231 | MPI_Get_count(&status, newType, &recv_loc_v_num); 232 | memset(loc_v, 0.0, v_dim * sizeof(double)); 233 | #pragma omp parallel for 234 | for(int i = 0; i < recv_loc_v_num; ++i){ 235 | int k = recv_loc_v_vec[i].key; 236 | int v = recv_loc_v_vec[i].val; 237 | loc_v[k] = v; 238 | } 239 | } 240 | } 241 | }//end allreduce_weight; 242 | 243 | } 244 | -------------------------------------------------------------------------------- /src/learner/owlqn.h: -------------------------------------------------------------------------------- 1 | #ifndef OWLQN_H_ 2 | #define OWLQN_H_ 3 | #include "mpi.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "load_data.h" 16 | #include "predict.h" 17 | #include 18 | 19 | #define MASTERID 0 20 | #define NUM 999 21 | 22 | extern "C"{ 23 | #include 24 | } 25 | 26 | class OWLQN{ 27 | public: 28 | OWLQN(Load_Data* ld, Predict* predict, int total_num_proc, int my_rank) 29 | : data(ld), pred(predict), num_proc(total_num_proc), rank(my_rank) { 30 | init(); 31 | } 32 | 33 | ~OWLQN(){ 34 | delete[] glo_w; 35 | delete[] glo_new_w; 36 | 37 | delete[] loc_wx; 38 | 39 | delete[] loc_g; 40 | delete[] glo_g; 41 | delete[] glo_new_g; 42 | delete[] glo_sub_g; 43 | delete[] glo_q; 44 | 45 | for(int i = 0; i < m; i++){ 46 | delete[] glo_s_list[i]; 47 | delete[] glo_y_list[i]; 48 | } 49 | delete[] glo_s_list; 50 | delete[] glo_y_list; 51 | 52 | delete[] glo_alpha_list; 53 | delete[] glo_ro_list; 54 | } 55 | 56 | void init(){ 57 | bias = 0.0; 58 | c = 1.0; 59 | glo_w = new double[data->glo_fea_dim](); 60 | glo_new_w = new double[data->glo_fea_dim](); 61 | for(int i = 0; i < data->glo_fea_dim; i++) { 62 | glo_w[i] = 0.0; 63 | } 64 | 65 | data->loc_ins_num = data->fea_matrix.size(); 66 | loc_wx = new double[data->loc_ins_num](); 67 | 68 | loc_g = new double[data->glo_fea_dim](); 69 | glo_g = new double[data->glo_fea_dim](); 70 | glo_new_g = new double[data->glo_fea_dim](); 71 | glo_sub_g = new double[data->glo_fea_dim](); 72 | glo_q = new double[data->glo_fea_dim](); 73 | 74 | m = 10; 75 | now_m = 1; 76 | glo_s_list = new double*[m]; 77 | for(int i = 0; i < m; i++){ 78 | glo_s_list[i] = new double[data->glo_fea_dim](); 79 | for(int j = 0; j < data->glo_fea_dim; j++){ 80 | glo_s_list[i][j] = glo_w[j]; 81 | } 82 | } 83 | glo_y_list = new double*[m]; 84 | for(int i = 0; i < m; i++){ 85 | glo_y_list[i] = new double[data->glo_fea_dim](); 86 | for(int j = 0; j < data->glo_fea_dim; j++){ 87 | glo_y_list[i][j] = glo_g[j]; 88 | } 89 | } 90 | glo_alpha_list = new double[data->glo_fea_dim](); 91 | glo_ro_list = new double[data->glo_fea_dim](); 92 | 93 | loc_loss = 0.0; 94 | glo_loss = 0.0; 95 | loc_new_loss = 0.0; 96 | glo_new_loss = 0.0; 97 | 98 | lambda = 0.0001; 99 | backoff = 0.9; 100 | flag_wolf = 1; 101 | } 102 | 103 | void calculate_wx(double *w){ 104 | long int idx = 0; 105 | int val = 0; 106 | for(int i = 0; i < data->fea_matrix.size(); i++) { 107 | loc_wx[i] = bias; 108 | for(int j = 0; j < data->fea_matrix[i].size(); j++) { 109 | idx = data->fea_matrix[i][j].idx; 110 | val = data->fea_matrix[i][j].val; 111 | loc_wx[i] += w[idx] * val; 112 | } 113 | } 114 | } 115 | 116 | double sigmoid(double x){ 117 | if(x < -30){ 118 | return 1e-6; 119 | } 120 | else if(x > 30){ 121 | return 1.0; 122 | } 123 | else{ 124 | double ex = pow(2.718281828, x); 125 | return ex / (1.0 + ex); 126 | } 127 | } 128 | 129 | double calculate_loss(double *w){ 130 | double f = 0.0, single_loss = 0.0, regular_loss = 0.0; 131 | memset(loc_wx, 0, sizeof(double) * data->fea_matrix.size()); 132 | calculate_wx(w); 133 | for(int i = 0; i < data->fea_matrix.size(); i++){ 134 | single_loss = data->label[i] * log(sigmoid(loc_wx[i])) + 135 | (1 - data->label[i]) * log(1 - sigmoid(loc_wx[i])); 136 | f += single_loss; 137 | } 138 | for(int j = 0; j < data->glo_fea_dim; j++){ 139 | regular_loss += abs(w[j]); 140 | } 141 | return -f / data->fea_matrix.size() + regular_loss; 142 | } 143 | 144 | void calculate_gradient(double* g, double *w){ 145 | int value; 146 | int index, single_feature_num, instance_num = data->fea_matrix.size(); 147 | memset(g, 0.0, data->glo_fea_dim * sizeof(double)); 148 | memset(loc_wx, 0, sizeof(double) * data->fea_matrix.size()); 149 | calculate_wx(w); 150 | for(int i = 0; i < instance_num; i++){ 151 | single_feature_num = data->fea_matrix[i].size(); 152 | double y_h = data->label[i] - sigmoid(loc_wx[i]); 153 | for(int j = 0; j < single_feature_num; j++){ 154 | index = data->fea_matrix[i][j].idx; 155 | value = data->fea_matrix[i][j].val; 156 | g[index] += y_h * value; 157 | } 158 | } 159 | for(int index = 0; index < data->glo_fea_dim; index++){ 160 | g[index] = g[index] / instance_num; 161 | } 162 | }//end calculate_gradient 163 | 164 | void calculate_subgradient(){ 165 | if(c == 0.0){ 166 | for(int j = 0; j < data->glo_fea_dim; j++){ 167 | glo_sub_g[j] = glo_g[j]; 168 | } 169 | } else if(c != 0.0){ 170 | for(int j = 0; j < data->glo_fea_dim; j++){ 171 | if(glo_w[j] > 0){ 172 | glo_sub_g[j] = glo_g[j] + c; 173 | } 174 | else if(glo_w[j] < 0){ 175 | glo_sub_g[j] = glo_g[j] - c; 176 | } 177 | else { 178 | if(glo_g[j] + c < 0){ 179 | glo_sub_g[j] = glo_g[j] - c;//左导数 180 | } else if(glo_g[j] - c > 0){ 181 | glo_sub_g[j] = glo_g[j] + c; 182 | } else { 183 | glo_sub_g[j] = 0.0; 184 | } 185 | } 186 | } 187 | } 188 | } 189 | 190 | void fix_dir_glo_q(){ 191 | for(int j = 0; j < data->glo_fea_dim; ++j){ 192 | if(glo_q[j] * glo_sub_g[j] < 0){ 193 | glo_q[j] = 0.0; 194 | } 195 | } 196 | } 197 | 198 | void fix_dir_glo_new_w(){ 199 | for(int j = 0; j < data->glo_fea_dim; j++){ 200 | if(glo_new_w[j] * glo_w[j] < 0) glo_new_w[j] = 0.0; 201 | else glo_new_w[j] = glo_new_w[j]; 202 | } 203 | } 204 | 205 | void line_search(){ 206 | flag_wolf = 1; 207 | int lo = 0; 208 | lambda = 0.05; 209 | while(true){ 210 | std::cout<<"line search loop = "<glo_fea_dim; j++){ 213 | glo_new_w[j] = glo_w[j] + lambda * glo_q[j];//change + to - 214 | } 215 | fix_dir_glo_new_w();//new_w subject to w in linesearch 216 | for(int r = 1; r < num_proc; r++){ 217 | MPI_Send(glo_new_w, data->glo_fea_dim, MPI_DOUBLE, r, 999, MPI_COMM_WORLD); 218 | } 219 | } else if(rank != MASTERID){ 220 | MPI_Recv(glo_new_w, data->glo_fea_dim, MPI_DOUBLE, 0, 999, MPI_COMM_WORLD, &status); 221 | } 222 | 223 | loc_new_loss = calculate_loss(glo_new_w); 224 | 225 | if(rank != MASTERID){ 226 | MPI_Send(&loc_new_loss, 1, MPI_DOUBLE, 0, 9999, MPI_COMM_WORLD); 227 | } 228 | else if(rank == MASTERID){ 229 | glo_new_loss = loc_new_loss; 230 | for(int r = 1; r < num_proc; r++){ 231 | MPI_Recv(&loc_new_loss, 1, MPI_DOUBLE, r, 9999, MPI_COMM_WORLD, &status); 232 | glo_new_loss += loc_new_loss; 233 | } 234 | double wolf_pf = cblas_ddot(data->glo_fea_dim, (double*)glo_q, 1, (double*)glo_sub_g, 1); 235 | std::cout<glo_fea_dim, -1, (double*)glo_new_w, 1, (double*)glo_w, 1); 237 | glo_new_loss += 0.0001 * lambda * wolf_pf; 238 | std::cout<<"glo_loss "<glo_fea_dim; j++){ 265 | if (a[j] < max) max = a[j]; 266 | if(a[j] != 0.0)std::cout<<"print function: a["<glo_fea_dim, glo_sub_g, 1, glo_q, 1); 273 | if(now_m > m) now_m = m; 274 | for(int loop = now_m-2; loop >= 0; --loop){ 275 | glo_ro_list[loop] = cblas_ddot(data->glo_fea_dim, &(*glo_y_list)[loop], 1, &(*glo_s_list)[loop], 1); 276 | glo_alpha_list[loop] = (cblas_ddot( data->glo_fea_dim, &(*glo_s_list)[loop], 1, (double*)glo_q, 1 ) + 1.0) / (glo_ro_list[loop] + 1.0); 277 | cblas_daxpy(data->glo_fea_dim, -1 * glo_alpha_list[loop], &(*glo_y_list)[loop], 1, (double*)glo_q, 1); 278 | }//end for 279 | double ydots = cblas_ddot(data->glo_fea_dim, glo_s_list[now_m - 2], 1, glo_y_list[now_m - 2], 1); 280 | double gamma = (ydots + 1.0)/ (glo_ro_list[now_m - 2] + 1.0); 281 | 282 | cblas_dscal(data->glo_fea_dim, gamma, (double*)glo_q, 1); 283 | 284 | for(int loop = 0; loop <= now_m-2; ++loop){ 285 | double beta = (cblas_ddot(data->glo_fea_dim, &(*glo_y_list)[loop], 1, (double*)glo_q, 1) + 1.0) / (glo_ro_list[loop] + 1.0); 286 | cblas_daxpy(data->glo_fea_dim, glo_alpha_list[loop] - beta, &(*glo_s_list)[loop], 1, (double*)glo_q, 1); 287 | }//end for 288 | } 289 | 290 | void update_state(){ 291 | calculate_gradient(glo_new_g, glo_new_w); 292 | 293 | //print(glo_new_w); 294 | cblas_daxpy(data->glo_fea_dim, -1, (double*)glo_w, 1, (double*)glo_new_w, 1); 295 | cblas_dcopy(data->glo_fea_dim, (double*)glo_new_w, 1, (double*)glo_s_list[(now_m-1) % m], 1); 296 | cblas_daxpy(data->glo_fea_dim, 1, (double*)glo_w, 1, (double*)glo_new_w, 1); 297 | std::swap(glo_w, glo_new_w); 298 | 299 | cblas_daxpy(data->glo_fea_dim, -1, (double*)glo_g, 1, (double*)glo_new_g, 1); 300 | cblas_dcopy(data->glo_fea_dim, (double*)glo_new_g, 1, (double*)glo_y_list[(now_m-1) % m], 1); 301 | //double a = cblas_ddot(data->glo_fea_dim, &(*glo_y_list)[0], 1, &(*glo_s_list)[0], 1); 302 | //std::cout<<" a ==== "<glo_fea_dim; ++i) { 326 | wi = glo_new_w[i]; 327 | md << i << ':' << wi; 328 | if(i != data->glo_fea_dim - 1){ 329 | md << ' '; 330 | } 331 | } 332 | md.close(); 333 | } 334 | } 335 | 336 | void owlqn(){ 337 | for(step = 0; step < steps; step++){ 338 | std::cout<<"step = "<