├── LICENSE ├── Makefile ├── README.md ├── bin ├── MLP ├── RNN ├── SVM ├── conv ├── linear ├── logistic ├── mpirun │ ├── conv.sh │ ├── linear.sh │ ├── logistic.sh │ ├── mlp.sh │ ├── rnn.sh │ ├── run.sh │ └── svm.sh ├── runner └── slurmrun │ ├── conv.sh │ ├── linear.sh │ ├── logistic.sh │ ├── mlp.sh │ ├── rnn.sh │ ├── run.sh │ └── svm.sh ├── build ├── MLP.o ├── RNN.o ├── SVM.o ├── compressCommunicate.o ├── conv.o ├── cuda.o ├── line.o ├── logistic.o ├── main.o ├── offline.o ├── online.o └── read.o ├── include ├── ParSecureML.h ├── ParSecureML_noMPI.h ├── compress.h ├── output.h └── read.h ├── setup ├── CIFAR ├── MNIST ├── NIST ├── SYN └── VGG ├── source ├── compressCommunicate.cc ├── cuda.cu ├── main.cc ├── offline.cc ├── online.cc └── read.cc └── test ├── MLP.cc ├── RNN.cc ├── SVM.cc ├── conv.cc ├── line.cc └── logistic.cc /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 ZhengChenCS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CUDA_INSTALL_PATH = /usr/local/cuda-11.1 2 | MPICC = mpicxx 3 | NVCC = $(CUDA_INSTALL_PATH)/bin/nvcc 4 | CC = g++ 5 | CFLAGS = -std=c++11 6 | TARGET = bin/runner 7 | SRCDIR = source 8 | BUILDDIR = build 9 | TESTDIR = test 10 | OBJECTS = $(BUILDDIR)/compressCommunicate.o $(BUILDDIR)/offline.o $(BUILDDIR)/online.o $(BUILDDIR)/read.o $(BUILDDIR)/cuda.o 11 | INC = -I../../common/inc 12 | LIBS = -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lcurand -lcublas -lpthread 13 | RM = rm 14 | TESTLinear = bin/linear 15 | TESTLinearOBJ = build/line.o 16 | TESTLogistic = bin/logistic 17 | TESTLogisticOBJ = build/logistic.o 18 | TESTMLP = bin/MLP 19 | TESTMLPOBJ = build/MLP.o 20 | TESTConv = bin/conv 21 | TESTConvOBJ = build/conv.o 22 | TESTRNN = bin/RNN 23 | TESTRNNOBJ = build/RNN.o 24 | TESTSVM = bin/SVM 25 | TESTSVMOBJ = build/SVM.o 26 | TESTSVMo = bin/SVMo 27 | TESTSVMoOBJ = build/SVMo.o 28 | 29 | all: 30 | make $(TARGET) 31 | make TestLinear 32 | make TestLogistic 33 | make TestMLP 34 | make TestConv 35 | make TestRNN 36 | make TestSVM 37 | 38 | 39 | $(TARGET): $(OBJECTS) $(BUILDDIR)/main.o 40 | @echo "Linking..." 41 | $(MPICC) $^ $(LIBS) -o $(TARGET) -fopenmp 42 | 43 | $(BUILDDIR)/main.o : $(SRCDIR)/main.cc 44 | @mkdir -p $(BUILDDIR) 45 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(SRCDIR)/main.cc -o $(BUILDDIR)/main.o 46 | $(BUILDDIR)/compressCommunicate.o: $(SRCDIR)/compressCommunicate.cc 47 | @mkdir -p $(BUILDDIR) 48 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(SRCDIR)/compressCommunicate.cc -o $(BUILDDIR)/compressCommunicate.o 49 | $(BUILDDIR)/offline.o: $(SRCDIR)/offline.cc 50 | @mkdir -p $(BUILDDIR) 51 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(SRCDIR)/offline.cc -o $(BUILDDIR)/offline.o 52 | $(BUILDDIR)/online.o: $(SRCDIR)/online.cc 53 | @mkdir -p $(BUILDDIR) 54 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(SRCDIR)/online.cc -o $(BUILDDIR)/online.o 55 | $(BUILDDIR)/read.o: $(SRCDIR)/read.cc 56 | @mkdir -p $(BUILDDIR) 57 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(SRCDIR)/read.cc -o $(BUILDDIR)/read.o 58 | $(BUILDDIR)/cuda.o: $(SRCDIR)/cuda.cu 59 | @mkdir -p $(BUILDDIR) 60 | $(NVCC) $(CFLAGS) $(INC) $(LIBS) -c $(SRCDIR)/cuda.cu -o $(BUILDDIR)/cuda.o 61 | TestLinear: $(TESTDIR)/line.cc $(OBJECTS) 62 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(TESTDIR)/line.cc -o $(BUILDDIR)/line.o 63 | $(MPICC) -fopenmp $(OBJECTS) $(TESTLinearOBJ) $(LIBS) -o $(TESTLinear) 64 | TestLogistic: $(TESTDIR)/logistic.cc $(OBJECTS) 65 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(TESTDIR)/logistic.cc -o $(BUILDDIR)/logistic.o 66 | $(MPICC) -fopenmp $(OBJECTS) $(TESTLogisticOBJ) $(LIBS) -o $(TESTLogistic) 67 | TestMLP: $(TESTDIR)/MLP.cc $(OBJECTS) 68 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(TESTDIR)/MLP.cc -o $(BUILDDIR)/MLP.o 69 | $(MPICC) -fopenmp $(OBJECTS) $(TESTMLPOBJ) $(LIBS) -o $(TESTMLP) 70 | TestConv: $(TESTDIR)/conv.cc $(OBJECTS) 71 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(TESTDIR)/conv.cc -o $(BUILDDIR)/conv.o 72 | $(MPICC) -fopenmp $(OBJECTS) $(TESTConvOBJ) $(LIBS) -o $(TESTConv) 73 | TestSVM: $(TESTDIR)/SVM.cc $(OBJECTS) 74 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(TESTDIR)/SVM.cc -o $(BUILDDIR)/SVM.o 75 | $(MPICC) -fopenmp $(OBJECTS) $(TESTSVMOBJ) $(LIBS) -o $(TESTSVM) 76 | TestRNN: $(TESTDIR)/RNN.cc $(OBJECTS) 77 | $(MPICC) $(CFLAGS) $(INC) -c -fopenmp $(TESTDIR)/RNN.cc -o $(BUILDDIR)/RNN.o 78 | $(MPICC) -fopenmp $(OBJECTS) $(TESTRNNOBJ) $(LIBS) -o $(TESTRNN) 79 | 80 | clean: 81 | @echo "clean..." 82 | $(RM) -rf $(BUILDDIR)/* $(TARGET) $(TEST) 83 | $(RM) -rf $(TESTLinear) 84 | $(RM) -rf $(TESTLogistic) 85 | $(RM) -rf $(TESTMLP) 86 | $(RM) -rf $(TESTConv) 87 | $(RM) -rf $(TESTRNN) 88 | $(RM) -rf $(TESTSVM) 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParSecureML 2 | 3 | An Efficient Parallel Secure Machine Learning Framework on GPUs 4 | 5 | 6 | ## Introduction 7 | 8 | This is the source code of our TPDS paper entitled "An Efficient Parallel Secure Machine Learning Framework on GPUs" by Zheng Chen, Feng Zhang, Amelie Chi Zhou, Jidong Zhai, Chenyang Zhang, Xiaoyong Du, 2021. 9 | 10 | We develop a parallel secure machine learning framework on GPUS using secure multi-party computing. 11 | 12 | ## Abstract 13 | 14 | Machine learning has been widely used in our daily lives. Large amounts of data have been continuously produced and transmitted to the cloud for model training and data processing, which raises a problem: how to preserve the security of the data. Recently, a secure machine learning system named SecureML has been proposed to solve this issue using two-party computation. However, due to the excessive computation expenses of two-party computation, the secure machine learning is about 2x slower than the original machine learning methods. Previous work on secure machine learning mostly focused on novel protocols or improving accuracy, while the performance metric has been ignored. In this paper, we propose a GPU-based framework ParSecureML to improve the performance of secure machine learning algorithms based on two-party computation. The main challenges of developing ParSecureML lie in the complex computation patterns, frequent intra-node data transmission between CPU and GPU, and complicated inter-node data dependence. To handle these challenges, we propose a series of novel solutions, including profiling-guided adaptive GPU utilization, finegrained double pipeline for intra-node CPU-GPU cooperation, and compressed transmission for inter-node communication. As far as we know, this is the first GPU-based secure machine learning framework. Compared to the state-of-the-art framework, ParSecureML achieves an average of 33.8x speedup. 15 | 16 | ## Compliation 17 | 1.Set CUDA path and MPI path in Makefile. 18 | 19 | 2.Complie all example program 20 | 21 | ```makefile 22 | make 23 | ``` 24 | You can also only complie an example program, option: 25 | 26 | ```makefile 27 | make TestLinear 28 | make TestLogistic 29 | make TestMLP 30 | make TestConv 31 | make TestSVM 32 | make TestRNN 33 | ``` 34 | 35 | 36 | ## Execution 37 | 38 | 1.``cd bin`` 39 | 40 | 2.If the slurm system is installed in your environment, 41 | 42 | ​ ``cd slurmrun`` 43 | 44 | ​ If not, you can use ``mpirun`` to run the program, 45 | 46 | ​ ``cd mpirun`` 47 | 48 | 3.Run all example program: 49 | 50 | ```shell 51 | bash run.sh 52 | ``` 53 | 54 | If you just want to run a example, 55 | 56 | ```shell 57 | bash linear.sh 58 | bash logistic.sh 59 | bash mlp.sh 60 | bash conv.sh 61 | bash svm.sh 62 | bash rnn.sh 63 | ``` 64 | 65 | You can also use other appropriate methods to run. Our program needs three nodes to run. 66 | 67 | ## Acknowledgement 68 | 69 | ParSecureML is developed by Renmin University of China, Shenzhen University, Tsinghua University. 70 | 71 | Zheng Chen, Feng Zhang, Chenyang Zhang and Xiaoyong Du are with the Key Laboratory of Data Engineering and Knowledge Engineering (MOE), and School of Information, Renmin University of China. 72 | 73 | Amelie Chi Zhou is with the Guangdong Province Engineering Center of China-made High Performance Data Computing System, Shenzhen University. 74 | 75 | Jidong Zhai is with the Department of Computer Science and Technology, Tsinghua University, BNRist 76 | 77 | If you have any questions, please contact us (chenzheng123@ruc.edu.cn) 78 | 79 | ## Citation 80 | 81 | If you use our code, please cite our paper: 82 | 83 | ``` 84 | @article{Zhang2021parsecureml, 85 | title={{An Efficient Parallel Secure Machine Learning Framework on GPUs}}, 86 | author={Feng Zhang, Zheng Chen, Chenyang Zhang, Amelie Chi Zhou, Jidong Zhai, Xiaoyong Du}, 87 | booktitle={IEEE Transactions on Parallel and Distributed Systems}, 88 | year={2021} 89 | } 90 | ``` 91 | 92 | ``` 93 | @inproceedings{chen2020parsecureml, 94 | title={ParSecureML: An Efficient Parallel Secure Machine Learning Framework on GPUs}, 95 | author={Chen, Zheng and Zhang, Feng and Zhou, Amelie Chi and Zhai, Jidong and Zhang, Chenyang and Du, Xiaoyong}, 96 | booktitle={49th International Conference on Parallel Processing-ICPP}, 97 | pages={1--11}, 98 | year={2020} 99 | } 100 | ``` 101 | 102 | 103 | 104 | 105 | 106 | ### 107 | -------------------------------------------------------------------------------- /bin/MLP: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/MLP -------------------------------------------------------------------------------- /bin/RNN: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/RNN -------------------------------------------------------------------------------- /bin/SVM: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/SVM -------------------------------------------------------------------------------- /bin/conv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/conv -------------------------------------------------------------------------------- /bin/linear: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/linear -------------------------------------------------------------------------------- /bin/logistic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/logistic -------------------------------------------------------------------------------- /bin/mpirun/conv.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../conv ../../setup/MNIST -------------------------------------------------------------------------------- /bin/mpirun/linear.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../linear ../../setup/MNIST -------------------------------------------------------------------------------- /bin/mpirun/logistic.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../logistic ../../setup/MNIST -------------------------------------------------------------------------------- /bin/mpirun/mlp.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../MLP ../../setup/MNIST -------------------------------------------------------------------------------- /bin/mpirun/rnn.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../RNN -------------------------------------------------------------------------------- /bin/mpirun/run.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../runner 2 | mpirun -np 3 ../linear ../../setup/MNIST 3 | mpirun -np 3 ../logistic ../../setup/MNIST 4 | mpirun -np 3 ../MLP ../../setup/MNIST 5 | mpirun -np 3 ../conv ../../setup/MNIST 6 | mpirun -np 3 ../RNN 7 | mpirun -np 3 ../SVM ../../setup/MNIST -------------------------------------------------------------------------------- /bin/mpirun/svm.sh: -------------------------------------------------------------------------------- 1 | mpirun -np 3 ../SVM ../../setup/MNIST -------------------------------------------------------------------------------- /bin/runner: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/bin/runner -------------------------------------------------------------------------------- /bin/slurmrun/conv.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../conv ../../setup/MNIST -------------------------------------------------------------------------------- /bin/slurmrun/linear.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../linear ../../setup/MNIST -------------------------------------------------------------------------------- /bin/slurmrun/logistic.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../logistic ../../setup/MNIST -------------------------------------------------------------------------------- /bin/slurmrun/mlp.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../MLP ../../setup/MNIST -------------------------------------------------------------------------------- /bin/slurmrun/rnn.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../RNN -------------------------------------------------------------------------------- /bin/slurmrun/run.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../runner 2 | srun -N 3 -n 3 ../linear ../../setup/MNIST 3 | srun -N 3 -n 3 ../logistic ../../setup/MNIST 4 | srun -N 3 -n 3 ../MLP ../../setup/MNIST 5 | srun -N 3 -n 3 ../conv ../../setup/MNIST 6 | srun -N 3 -n 3 ../RNN 7 | srun -N 3 -n 3 ../SVM ../../setup/MNIST -------------------------------------------------------------------------------- /bin/slurmrun/svm.sh: -------------------------------------------------------------------------------- 1 | srun -N 3 -n 3 ../SVM ../../setup/MNIST -------------------------------------------------------------------------------- /build/MLP.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/MLP.o -------------------------------------------------------------------------------- /build/RNN.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/RNN.o -------------------------------------------------------------------------------- /build/SVM.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/SVM.o -------------------------------------------------------------------------------- /build/compressCommunicate.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/compressCommunicate.o -------------------------------------------------------------------------------- /build/conv.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/conv.o -------------------------------------------------------------------------------- /build/cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/cuda.o -------------------------------------------------------------------------------- /build/line.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/line.o -------------------------------------------------------------------------------- /build/logistic.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/logistic.o -------------------------------------------------------------------------------- /build/main.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/main.o -------------------------------------------------------------------------------- /build/offline.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/offline.o -------------------------------------------------------------------------------- /build/online.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/online.o -------------------------------------------------------------------------------- /build/read.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhengChenCS/ParSecureML/fac17b1bb7e5b2a64a588d11035c2a3765d5c127/build/read.o -------------------------------------------------------------------------------- /include/ParSecureML.h: -------------------------------------------------------------------------------- 1 | /* 2 | Class with MPI_operation 3 | */ 4 | 5 | #include "ParSecureML_noMPI.h" 6 | #include "mpi.h" 7 | 8 | class Support: public Support_noMPI{ 9 | public: 10 | void Send(int MPI_dest1, int MPI_dest2, MPI_Comm client2server1_comm, MPI_Comm client2server2_comm); 11 | }; 12 | 13 | class Triplet: public Triplet_noMPI{ 14 | public: 15 | void Rec(int MPI_dest, MPI_Comm server2server_comm); 16 | void Recv(int MPI_dest, MPI_Comm client2server_comm); 17 | void Activation(int MPI_dest, MPI_Comm server2server_comm); 18 | }; 19 | 20 | class ConvSupport: public ConvSupport_noMPI{ 21 | public: 22 | void Send(int MPI_dest1, int MPI_dest2, MPI_Comm client2server1_comm, MPI_Comm client2server2_comm); 23 | }; 24 | 25 | class ConvTriplet: public ConvTriplet_noMPI{ 26 | public: 27 | void Rec(int MPI_dest, MPI_Comm server2server_comm); 28 | void Recv(int MPI_dest, MPI_Comm client2server_comm); 29 | }; 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /include/ParSecureML_noMPI.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | const int MPI_client = 0; 7 | const int MPI_server1 = 1; 8 | const int MPI_server2 = 2; 9 | const int CHUNK_SIZE =2; 10 | const int BLOCKNUM = 1024; 11 | const int THREADNUM = 1024; 12 | 13 | class Support_noMPI{ 14 | public: 15 | float *U1; 16 | float *U2; 17 | float *U; 18 | float *V1; 19 | float *V2; 20 | float *V; 21 | float *Z; 22 | float *Z1; 23 | float *Z2; 24 | float *GPU_U; 25 | float *GPU_V; 26 | float *GPU_Z; 27 | int row1; 28 | int col1; 29 | int row2; 30 | int col2; 31 | void GetShape(int in_row1, int in_col1, int in_row2, int in_col2); 32 | void GPU_Mul(); 33 | void Initial(); 34 | void Assign(); 35 | void Release(); 36 | }; 37 | 38 | class ConvSupport_noMPI{ 39 | public: 40 | float *U1; 41 | float *U2; 42 | float *U; 43 | float *V1; 44 | float *V2; 45 | float *V; 46 | float *Z; 47 | float *Z1; 48 | float *Z2; 49 | int row; 50 | int col; 51 | void GetShape(int in_row, int in_col); 52 | void Initial(); 53 | void Assign(); 54 | void Release(); 55 | }; 56 | class Triplet_noMPI{ 57 | public: 58 | float *A; 59 | float *old_A; 60 | float *delta_A; 61 | float *B; 62 | float *old_B; 63 | float *delta_B; 64 | float *C; 65 | float *D; 66 | float *U; 67 | float *V; 68 | float *E1; 69 | float *E2; 70 | float *E; 71 | float *F1; 72 | float *F2; 73 | float *F; 74 | float *Z; 75 | int row1; 76 | int col1; 77 | int row2; 78 | int col2; 79 | float *GPU_A; 80 | float *GPU_B; 81 | float *GPU_C; 82 | float *GPU_E; 83 | float *GPU_F; 84 | float *GPU_Z; 85 | float *GPU_D; 86 | float *fac1; 87 | float *fac2; 88 | int flag1; 89 | int flag2; 90 | int flag3; 91 | int pFlag; 92 | double commtime; 93 | pthread_t pipelineId; 94 | void GetShape(int in_row1, int in_col1, int in_row2, int in_col2); 95 | void Initial(); 96 | void Release(); 97 | void GetData(float *input_A, float *input_B); 98 | void OP(int flag); 99 | void cudaTripletMul(int flag); 100 | void cpuToGPU(); 101 | static void *threadPipeline(void *ptr); 102 | }; 103 | class ConvTriplet_noMPI{ 104 | public: 105 | float *A; 106 | float *B; 107 | float *C; 108 | float *U; 109 | float *V; 110 | float *E1; 111 | float *E2; 112 | float *E; 113 | float *F1; 114 | float *F2; 115 | float *F; 116 | float *Z; 117 | float *GPU_A; 118 | float *GPU_B; 119 | float *GPU_E; 120 | float *GPU_F; 121 | float *GPU_Z; 122 | float *GPU_C; 123 | int row1; 124 | int col1; 125 | int row2; 126 | int col2; 127 | int num; 128 | int o_row; 129 | int o_col; 130 | float *convA; 131 | double commtime; 132 | void GetShape(int in_row1, int in_col1, int in_row2, int in_col2, int in_num); 133 | void Initial(); 134 | void Release(); 135 | void GetData(float *input_A, float *input_filter); 136 | void OP(int flag); 137 | void GPU_OP(int flag); 138 | }; 139 | void SelectBatch(float *Img, float *y_hat, float *batchImg, float *batchY, int cur, int batch_size, int SIZE); 140 | void MallocD(float *&gpu_a, int size); 141 | void CopyHtoD(float *gpu_a, float *a, int size); 142 | void CopyDtoH(float *&a, float *&gpu_a, int size); 143 | void ReleaseGPU(float *A); 144 | double timestamp(); 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /include/compress.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | struct CSR{ 4 | float *val; 5 | int *col; 6 | int *row; 7 | }; 8 | void MallocCsr(struct CSR &C, int val_count, int row, int col); 9 | void ReleaseCsr(struct CSR &C); 10 | void Compress(float *matrix, struct CSR &C, int val_count, int row, int col); 11 | void deCompress(struct CSR &C, int val_count, int row, int col, float *mat); -------------------------------------------------------------------------------- /include/output.h: -------------------------------------------------------------------------------- 1 | /* 2 | Time metric: 3 | 1.All time 4 | 2.Offline phase time 5 | 3.Online phase time 6 | */ 7 | #include 8 | #include 9 | using namespace std; 10 | #define WIDTH 60 11 | typedef struct Output{ 12 | float all_time; 13 | float offline_time; 14 | float online_time; 15 | string app_name; 16 | Output(float _all_time, float _offline_time, float _online_time, string _app_name){ 17 | all_time = _all_time; 18 | offline_time = _offline_time; 19 | online_time = _online_time; 20 | app_name = _app_name; 21 | } 22 | void draw(){ 23 | cout << "+"; 24 | for(int i = 0; i < WIDTH; i++){ 25 | cout << "-"; 26 | } 27 | cout << "+"; 28 | cout << endl; 29 | cout << "| Application: " << setiosflags(ios::left)<< setw(WIDTH-15) << app_name << " |" << endl; 30 | cout << "|"; 31 | for(int i = 0; i < WIDTH; i++){ 32 | cout << "-"; 33 | } 34 | cout << "|" << endl; 35 | cout << "|" << setiosflags(ios::left) << setw(16) << " All Time" << setiosflags(ios::left) << setw(24) << "| Offline Phase Time" << setiosflags(ios::left) << setw(23) << "| Online Phase Time |" << endl; 36 | cout << "|" << " " << setiosflags(ios::left) << setw(15) << setiosflags(ios::fixed) << setprecision(2) << all_time << "| " << setiosflags(ios::left) << setw(22) << setiosflags(ios::fixed) << setprecision(2) << offline_time << "| " << setiosflags(ios::left) << setw(18) << setiosflags(ios::fixed) << setprecision(2) << online_time << "|" << endl; 37 | cout << "+"; 38 | for(int i = 0; i < WIDTH; i++){ 39 | cout << "-"; 40 | } 41 | cout << "+" << endl; 42 | } 43 | }Output; -------------------------------------------------------------------------------- /include/read.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | void read_Images(string filename, float *images, int SIZE, int train_size); 9 | void read_Label(string filename, float *labels, int train_size); 10 | void Generator(float *array, int bound, int SIZE); -------------------------------------------------------------------------------- /setup/CIFAR: -------------------------------------------------------------------------------- 1 | --imagesPath ../data/cifar/50000.bin 2 | --train_size 50000 3 | --batch_size 128 4 | --SIZE 3072 5 | --FSIZE 5 6 | --row 96 7 | --col 32 8 | -------------------------------------------------------------------------------- /setup/MNIST: -------------------------------------------------------------------------------- 1 | --imagesPath ../data/mnist.txt 2 | --train_size 10000 3 | --batch_size 128 4 | --SIZE 784 5 | --FSIZE 5 6 | --row 28 7 | --col 28 8 | -------------------------------------------------------------------------------- /setup/NIST: -------------------------------------------------------------------------------- 1 | --imagesPath ../data/NIST.txt 2 | --train_size 4000 3 | --batch_size 128 4 | --SIZE 262144 5 | --FSIZE 5 6 | --row 512 7 | --col 512 8 | -------------------------------------------------------------------------------- /setup/SYN: -------------------------------------------------------------------------------- 1 | --train_size 640000 2 | --batch_size 128 3 | --SIZE 2048 4 | --FSIZE 5 5 | --row 64 6 | --col 32 7 | -------------------------------------------------------------------------------- /setup/VGG: -------------------------------------------------------------------------------- 1 | --imagesPath ../data/vgg.txt 2 | --train_size 40000 3 | --batch_size 128 4 | --SIZE 40000 5 | --FSIZE 5 6 | --row 200 7 | --col 200 8 | -------------------------------------------------------------------------------- /source/compressCommunicate.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file contains some functions for compression communiate 3 | */ 4 | 5 | #include "../include/compress.h" 6 | #include 7 | #include 8 | #include 9 | void MallocCsr(struct CSR &C, int val_count, int row, int col){ 10 | C.val = (float*)malloc(sizeof(float)*val_count); 11 | C.col = (int*)malloc(sizeof(float)*val_count); 12 | C.row = (int*)malloc(sizeof(float)*row); 13 | } 14 | void ReleaseCsr(struct CSR &C){ 15 | free(C.val); 16 | free(C.col); 17 | free(C.row); 18 | } 19 | void Compress(float *matrix, struct CSR &C, int val_count, int row, int col){ 20 | int count = 0; 21 | #pragma omp parallel for 22 | for(int i = 0; i < row; i++){ 23 | int flag = 1; 24 | for(int j = 0; j < col; j++){ 25 | if(abs(matrix[i*col+j]) < 1e-3){ 26 | continue; 27 | }else{ 28 | C.val[count] = matrix[i*col+j]; 29 | C.col[count] = j; 30 | #pragma omp atomic 31 | count++; 32 | if(flag == 1){ 33 | C.row[i] = j; 34 | flag = 0; 35 | } 36 | } 37 | } 38 | if(flag==1){ 39 | C.row[i] = -1; 40 | } 41 | } 42 | } 43 | void deCompress(struct CSR &C, int val_count, int row, int col, float *mat){ 44 | int cur = 0; 45 | #pragma omp parallel for schedule(guided) 46 | for(int i = 0; i < row; i++){ 47 | if(C.row[i] == -1) continue; 48 | //#pragma omp simd 49 | for(int j = C.row[i]; j < C.row[i+1]; j++){ 50 | mat[i*col+j] += C.val[cur++]; 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /source/cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | This file contains some function for GPU operation 3 | */ 4 | #include 5 | #include "cublas_v2.h" 6 | #include "../include/ParSecureML_noMPI.h" 7 | /* 8 | Malloc GPU memory 9 | */ 10 | void MallocD(float *&gpu_a, int size){ 11 | cudaError_t cudaStat; 12 | cudaStat = cudaMalloc((void**)&gpu_a, sizeof(*gpu_a)*size); 13 | if(cudaStat != cudaSuccess){ 14 | cout << "Malloc failed:" << cudaGetErrorString(cudaStat)<< endl; 15 | exit(0); 16 | } 17 | } 18 | /* 19 | Transfer data from host to device 20 | */ 21 | void CopyHtoD(float *gpu_a, float *a, int size){ 22 | cudaError_t cudaStat; 23 | cudaStat = cudaMemcpy(gpu_a, a, sizeof(*a)*size, cudaMemcpyHostToDevice); 24 | if(cudaStat != cudaSuccess){ 25 | cout << "Error code:" << cudaStat << endl; 26 | cout << "CopyHtoD failed." << endl; 27 | exit(0); 28 | } 29 | } 30 | 31 | /* 32 | Transfer data from device to host 33 | */ 34 | void CopyDtoH(float *&a, float *&gpu_a, int size){ 35 | cudaError_t cudaStat; 36 | cudaStat = cudaMemcpy(a, gpu_a, sizeof(*a)*size, cudaMemcpyDeviceToHost); 37 | if(cudaStat != cudaSuccess){ 38 | cout << "Error code:" << cudaStat << endl; 39 | cout << "CopyDtoH failed." << endl; 40 | exit(0); 41 | } 42 | } 43 | 44 | /* 45 | Matrix multiplication for Support class 46 | */ 47 | void Support_noMPI::GPU_Mul(){ 48 | cublasStatus_t stat; 49 | cublasHandle_t handle; 50 | 51 | stat = cublasCreate(&handle); 52 | stat = cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH); 53 | 54 | if(stat != CUBLAS_STATUS_SUCCESS){ 55 | cout << "CUBLAS create failed." << endl; 56 | exit(0); 57 | } 58 | float alpha = 1; 59 | float b = 0; 60 | 61 | stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, row1, col2, col1, &alpha, GPU_U, row1, GPU_V, row2, &b, GPU_Z, row1); 62 | if(stat != CUBLAS_STATUS_SUCCESS){ 63 | cout << "Cublas sgemm failed." << endl; 64 | exit(0); 65 | } 66 | } 67 | 68 | void ReleaseGPU(float *A){ 69 | cudaFree(A); 70 | } 71 | 72 | /* 73 | Triplet sum function 74 | sum = fac1+fac2+fac3 75 | */ 76 | __global__ void cudaTripletSum(float *sum, float *fac1, float *fac2, float *fac3, int size){ 77 | int bid = blockIdx.x; 78 | int tid = threadIdx.x; 79 | int cur = bid*blockDim.x+tid; 80 | if(cur >= size) return; 81 | float tmp = fac1[cur] + fac2[cur] + fac3[cur]; 82 | sum[cur] = tmp; 83 | } 84 | 85 | /* 86 | sum = A + B 87 | */ 88 | __global__ void cudaSum(float *A, float *B, float *sum, int size){ 89 | int bid = blockIdx.x; 90 | int tid = threadIdx.x; 91 | int cur = bid*blockDim.x+tid; 92 | if(cur >= size) return; 93 | float tmp = A[cur]+B[cur]; 94 | sum[cur] = tmp; 95 | } 96 | 97 | /* 98 | min = A - B 99 | */ 100 | __global__ void cudaMinus(float *A, float *B, float *min, int size){ 101 | int bid = blockIdx.x; 102 | int tid = threadIdx.x; 103 | int cur = bid*blockDim.x+tid; 104 | if(cur >= size) return; 105 | float tmp = A[cur]-B[cur]; 106 | min[cur] = tmp; 107 | } 108 | 109 | /* 110 | Triplet multiplication 111 | if server1: 112 | result = A*F + E*B + Z 113 | if server2: 114 | result = (A-E)*F + E*B + Z 115 | */ 116 | void Triplet_noMPI::cudaTripletMul(int flag){ 117 | cublasStatus_t stat; 118 | cublasHandle_t handle; 119 | cudaError_t cudaStat; 120 | stat = cublasCreate(&handle); 121 | stat = cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH); 122 | 123 | if(stat != CUBLAS_STATUS_SUCCESS){ 124 | cout << "CUBLAS create failed." << endl; 125 | exit(0); 126 | } 127 | float alpha1 = 1; 128 | float alpha2 = 1; 129 | float b = 0; 130 | while(flag1 == 0){ 131 | continue; 132 | } 133 | if(flag == 0){ 134 | while(flag2 == 0){ 135 | continue; 136 | } 137 | stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, row1, col2, col1, &alpha1, GPU_A, row1, GPU_F, row2, &b, fac1, row1); 138 | if(stat != CUBLAS_STATUS_SUCCESS){ 139 | cout << "Cublas sgemm failed." << endl; 140 | exit(0); 141 | } 142 | } 143 | else if(flag == 1){ 144 | cudaMinus<<>>(GPU_A, GPU_E, GPU_D, row1*col1); 145 | while(flag2 == 0){ 146 | continue; 147 | } 148 | stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, row1, col2, col1, &alpha1, GPU_D, row1, GPU_F, row2, &b, fac1, row1); 149 | if(stat != CUBLAS_STATUS_SUCCESS){ 150 | cout << "Cublas sgemm failed." << endl; 151 | exit(0); 152 | } 153 | } 154 | while(flag3 == 0){ 155 | continue; 156 | } 157 | stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, row1, col2, col1, &alpha2, GPU_E, row1, GPU_B, row2, &b, fac2, row1); 158 | if(stat != CUBLAS_STATUS_SUCCESS){ 159 | cout << "Cublas sgemm failed." << endl; 160 | exit(0); 161 | } 162 | cublasDestroy(handle); 163 | cudaTripletSum<<>>(GPU_C, fac1, fac2, GPU_Z, row1*col2); 164 | cudaStat = cudaGetLastError(); 165 | if(cudaStat != cudaSuccess){ 166 | cout << "Kernel launch failed." << endl; 167 | exit(0); 168 | } 169 | } 170 | 171 | /* 172 | Convolution for SecureML on GPU 173 | */ 174 | __global__ void cudaConv(int flag, float *GPU_A, float *GPU_B, float *GPU_C, float *GPU_E, float *GPU_F, float *GPU_Z, int row1, int col1, int row2, int col2, int o_row, int o_col, int num){ 175 | int bid = blockIdx.x; 176 | int tid = threadIdx.x; 177 | int cur = bid*blockDim.x+tid; 178 | if(cur >= num*o_row*o_col) return; 179 | int num_cur = cur/(o_row*o_col); 180 | int row_cur = cur%(o_row*o_col)/o_col; 181 | int col_cur = cur%(o_row*o_col)%o_col; 182 | float tem = 0; 183 | for(int i = 0; i < row2; i++){ 184 | for(int j = 0; j < col2; j++){ 185 | tem += flag*GPU_E[num_cur*o_row*o_col*row2*col2+row_cur*o_col*row2*col2+col_cur*row2*col2+i*col2*j]*GPU_F[i*col2+j] + GPU_A[num_cur*row1*col1+(row_cur+i)*row1+col_cur+j] * GPU_F[i*col2+j] + GPU_E[num_cur*o_row*o_col*row2*col2+row_cur*o_col*row2*col2+col_cur*row2*col2+i*col2*j] * GPU_B[i*col2+j] + GPU_Z[i*col2+j]; 186 | } 187 | } 188 | GPU_C[num_cur*o_row*o_col+row_cur*o_col+col_cur] = tem; 189 | } 190 | void ConvTriplet_noMPI::GPU_OP(int flag){ 191 | cudaConv<<>>(flag, GPU_A, GPU_B, GPU_C, GPU_E, GPU_F, GPU_Z, row1, col1, row2, col2, o_row, o_row, num); 192 | } 193 | -------------------------------------------------------------------------------- /source/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "mpi.h" 24 | #include "../include/output.h" 25 | using namespace std; 26 | 27 | #if defined (_MSC_VER) // Visual studio 28 | #define thread_local __declspec( thread ) 29 | #elif defined (__GCC__) // GCC 30 | #define thread_local __thread 31 | #endif 32 | 33 | #define FLOAT_PER_CACHELINE (64/4) 34 | const int N = 100; 35 | 36 | 37 | /* Thread-safe function that returns a random number between min and max (inclusive). 38 | This function takes ~142% the time that calling rand() would take. For this extra 39 | cost you get a better uniform distribution and thread-safety. */ 40 | extern int intRand(const int & min, const int & max, bool lastUsed = false); 41 | 42 | int main(int argc, char **argv){ 43 | MPI_Init(&argc, &argv); 44 | int MPI_rank, MPI_size; 45 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 46 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 47 | 48 | MPI_Group world_group; 49 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 50 | 51 | double all_stime, all_etime; 52 | all_stime = timestamp(); 53 | double offline_stime; 54 | offline_stime = timestamp(); 55 | if(MPI_rank == MPI_client){ 56 | 57 | /* 58 | Init MPI group: client2server1 clientserver2 59 | */ 60 | 61 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 62 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 63 | 64 | MPI_Group client2server1; 65 | MPI_Group client2server2; 66 | 67 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 68 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 69 | 70 | MPI_Comm client2server1_comm; 71 | MPI_Comm client2server2_comm; 72 | 73 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 74 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 75 | 76 | int MPI_group_client2server1_rank; 77 | int MPI_group_client2server2_rank; 78 | 79 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 80 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 81 | 82 | int server1_group_rank = 0, server2_group_rank = 0; 83 | 84 | if(MPI_group_client2server1_rank == 0){ 85 | server1_group_rank = 1; 86 | } 87 | if(MPI_group_client2server2_rank == 0){ 88 | server2_group_rank = 1; 89 | } 90 | 91 | float *A, *B, *C; 92 | A = (float*)malloc(sizeof(float)*N*N); 93 | B = (float*)malloc(sizeof(float)*N*N); 94 | C = (float*)malloc(sizeof(float)*N*N); 95 | Generator(A, 256, N*N); 96 | Generator(B, 256, N*N); 97 | float *A1 = (float*)malloc(sizeof(float)*N*N); 98 | float *A2 = (float*)malloc(sizeof(float)*N*N); 99 | float *B1 = (float*)malloc(sizeof(float)*N*N); 100 | float *B2 = (float*)malloc(sizeof(float)*N*N); 101 | float *C1 = (float*)malloc(sizeof(float)*N*N); 102 | float *C2 = (float*)malloc(sizeof(float)*N*N); 103 | 104 | 105 | int procNum = omp_get_num_procs(); 106 | int *seed = (int *)malloc(sizeof(int) * procNum*2); 107 | for (int i = 0; i < procNum*2; i++){ 108 | seed[i] = rand(); 109 | } 110 | #pragma omp parallel num_threads(2*procNum) 111 | { 112 | int s = seed[omp_get_thread_num()]; 113 | #pragma omp for 114 | for(int i = 0; i < N*N; i++){ 115 | A1[i] = intRand(0, 255); 116 | A2[i] = A[i] - A1[i]; 117 | } 118 | #pragma omp for 119 | for(int i = 0; i < N*N; i++){ 120 | B1[i] = intRand(0, 255); 121 | B2[i] = B[i] - B1[i]; 122 | } 123 | } 124 | 125 | 126 | Support sp; 127 | sp.GetShape(N, N, N, N); 128 | sp.Initial(); 129 | sp.Assign(); 130 | int flag1 = 0; 131 | int flag2 = -1; 132 | 133 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 134 | MPI_Send(A1, N*N, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 135 | MPI_Send(B1, N*N, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 136 | 137 | 138 | 139 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 140 | MPI_Send(A2, N*N, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 141 | MPI_Send(B2, N*N, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 142 | 143 | 144 | sp.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 145 | 146 | 147 | MPI_Status status; 148 | 149 | MPI_Recv(C1, N*N, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 150 | MPI_Recv(C2, N*N, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 151 | 152 | #pragma omp parallel for 153 | for(int i = 0; i < N*N/FLOAT_PER_CACHELINE; i++){ 154 | for (int j = 0; j < FLOAT_PER_CACHELINE; j++){ 155 | C[i * FLOAT_PER_CACHELINE + j] = C1[i * FLOAT_PER_CACHELINE + j] + C2[i * FLOAT_PER_CACHELINE + j]; 156 | } 157 | } 158 | free(A1); 159 | free(A2); 160 | free(B1); 161 | free(B2); 162 | free(A); 163 | free(B); 164 | free(C); 165 | sp.Release(); 166 | float server1_offline_time, server2_offline_time; 167 | float server1_online_time, server2_online_time; 168 | all_etime = timestamp(); 169 | float all_time = all_etime - all_stime; 170 | 171 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 172 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 173 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 174 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 175 | Output out( 176 | all_time, 177 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 178 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 179 | "Single matrix multiplication" 180 | ); 181 | out.draw(); 182 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 183 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 184 | } 185 | else{ 186 | 187 | /* 188 | Init MPI_group: client2server, server2server 189 | */ 190 | 191 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 192 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 193 | 194 | MPI_Group client2server; 195 | MPI_Group server2server; 196 | 197 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 198 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 199 | 200 | MPI_Comm client2server_comm; 201 | MPI_Comm server2server_comm; 202 | 203 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 204 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 205 | 206 | int MPI_group_client2server_rank; 207 | int MPI_group_server2server_rank; 208 | 209 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 210 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 211 | 212 | int client_group_rank = 0, server_group_rank = 0; 213 | 214 | if(MPI_group_client2server_rank == 0){ 215 | client_group_rank = 1; 216 | } 217 | if(MPI_group_server2server_rank == 0){ 218 | server_group_rank = 1; 219 | } 220 | 221 | 222 | double offline_etime; 223 | MPI_Status status; 224 | int flag; 225 | float *A1 = (float*)malloc(sizeof(float)*N*N); 226 | float *B1 = (float*)malloc(sizeof(float)*N*N); 227 | 228 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 229 | MPI_Recv(A1, N*N, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 230 | MPI_Recv(B1, N*N, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 231 | 232 | Triplet T; 233 | T.GetShape(N, N, N, N); 234 | T.Initial(); 235 | T.Recv(client_group_rank, client2server_comm); 236 | 237 | offline_etime = timestamp(); 238 | 239 | double online_stime, online_etime; 240 | online_stime = timestamp(); 241 | T.GetData(A1, B1); 242 | T.Rec(server_group_rank, server2server_comm); 243 | T.OP(flag); 244 | online_etime = timestamp(); 245 | MPI_Send(T.C, N*N, MPI_FLOAT, client_group_rank, 0, client2server_comm); 246 | T.Release(); 247 | 248 | float online_time = online_etime-online_stime; 249 | float offline_time = offline_etime-offline_stime; 250 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 251 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 252 | if(client2server != MPI_GROUP_NULL){ 253 | MPI_Group_free(&client2server); 254 | } 255 | if(server2server != MPI_GROUP_NULL){ 256 | MPI_Group_free(&server2server); 257 | } 258 | } 259 | if(world_group != MPI_GROUP_NULL){ 260 | MPI_Group_free(&world_group); 261 | } 262 | MPI_Finalize(); 263 | return 0; 264 | } 265 | -------------------------------------------------------------------------------- /source/offline.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "../include/ParSecureML.h" 8 | #include "mpi.h" 9 | 10 | /* Thread-safe function that returns a random number between min and max (inclusive). 11 | This function takes ~142% the time that calling rand() would take. For this extra 12 | cost you get a better uniform distribution and thread-safety. */ 13 | int intRand(const int & min, const int & max, bool lastUsed = false) { 14 | static thread_local mt19937* generator = nullptr; 15 | hash hasher; 16 | if (!generator) generator = new mt19937(clock() + hasher(this_thread::get_id())); 17 | uniform_int_distribution distribution(min, max); 18 | if (lastUsed){ 19 | int returnVal = distribution(*generator); 20 | if (generator) delete(generator); 21 | return returnVal; 22 | } 23 | return distribution(*generator); 24 | } 25 | 26 | void Support_noMPI::GetShape(int in_row1, int in_col1, int in_row2, int in_col2){ 27 | row1 = in_row1; 28 | col1 = in_col1; 29 | row2 = in_row2; 30 | col2 = in_col2; 31 | } 32 | /* 33 | Full connect layer shape: 34 | U,U1,U2:row1*col1 35 | V,V1,V2:row2*col2 36 | Z,Z1,Z2:row1*col2 37 | */ 38 | void Support_noMPI::Initial(){ 39 | U1 = (float*)malloc(sizeof(float)*row1*col1); 40 | U2 = (float*)malloc(sizeof(float)*row1*col1); 41 | U = (float*)malloc(sizeof(float)*row1*col1); 42 | V1 = (float*)malloc(sizeof(float)*row2*col2); 43 | V2 = (float*)malloc(sizeof(float)*row2*col2); 44 | V = (float*)malloc(sizeof(float)*row2*col2); 45 | Z1 = (float*)malloc(sizeof(float)*row1*col2); 46 | Z2 = (float*)malloc(sizeof(float)*row1*col2); 47 | Z = (float*)malloc(sizeof(float)*row1*col2); 48 | MallocD(GPU_U, row1*col1); 49 | MallocD(GPU_V, row2*col2); 50 | MallocD(GPU_Z, row1*col2); 51 | } 52 | 53 | /* 54 | Client: 55 | U = U1+U2 56 | V = V1+V2 57 | Z = U*V 58 | Z = Z1+Z2 59 | */ 60 | void Support_noMPI::Assign(){ 61 | #pragma omp parallel 62 | { 63 | #pragma omp for 64 | for(int i = 0; i < row1*col1; i++){ 65 | U1[i] = (float)intRand(0, RAND_MAX); 66 | U2[i] = (float)intRand(0, RAND_MAX); 67 | U[i] = U1[i] + U2[i]; 68 | } 69 | #pragma omp for 70 | for(int i = 0; i < row2*col2; i++){ 71 | V1[i] = (float)intRand(0, RAND_MAX); 72 | V2[i] = (float)intRand(0, RAND_MAX); 73 | V[i] = V1[i] + V2[i]; 74 | } 75 | } 76 | 77 | CopyHtoD(GPU_U, U, row1*col1); 78 | CopyHtoD(GPU_V, V, row2*col2); 79 | GPU_Mul(); 80 | CopyDtoH(Z, GPU_Z, row1*col2); 81 | #pragma omp parallel 82 | { 83 | #pragma omp for 84 | for(int i = 0; i < row1*col2; i++){ 85 | Z1[i] = (float)intRand(0, RAND_MAX); 86 | Z2[i] = Z[i] - Z1[i]; 87 | } 88 | } 89 | 90 | } 91 | 92 | 93 | /* 94 | Send data to server1 and server2 95 | @parms: 96 | MPI_dest1, MPI_dest2: MPI_rank of server1 and server2 97 | server1_comm, server2_comm: MPI_comm of server1 and server2 98 | */ 99 | 100 | void Support::Send(int MPI_dest1, int MPI_dest2, MPI_Comm server1_comm, MPI_Comm server2_comm){ 101 | MPI_Send(U1, row1*col1, MPI_FLOAT, MPI_dest1, 0, server1_comm); 102 | MPI_Send(V1, row2*col2, MPI_FLOAT, MPI_dest1, 0, server1_comm); 103 | MPI_Send(Z1, row1*col2, MPI_FLOAT, MPI_dest1, 0, server1_comm); 104 | 105 | MPI_Send(U2, row1*col1, MPI_FLOAT, MPI_dest2, 0, server2_comm); 106 | MPI_Send(V2, row2*col2, MPI_FLOAT, MPI_dest2, 0, server2_comm); 107 | MPI_Send(Z2, row1*col2, MPI_FLOAT, MPI_dest2, 0, server2_comm); 108 | } 109 | 110 | void Support_noMPI::Release(){ 111 | free(U1); 112 | free(U2); 113 | free(U); 114 | free(V1); 115 | free(V2); 116 | free(V); 117 | free(Z); 118 | free(Z1); 119 | free(Z2); 120 | ReleaseGPU(GPU_U); 121 | ReleaseGPU(GPU_V); 122 | ReleaseGPU(GPU_Z); 123 | } 124 | void ConvSupport_noMPI::GetShape(int in_row, int in_col){ 125 | row = in_row; 126 | col = in_col; 127 | } 128 | 129 | /* 130 | Convolution layer shape: 131 | U,U1,U2,V,V1,V2,Z,Z1,Z2: row*col 132 | */ 133 | void ConvSupport_noMPI::Initial(){ 134 | U1 = (float*)malloc(sizeof(float)*row*col); 135 | U2 = (float*)malloc(sizeof(float)*row*col); 136 | U = (float*)malloc(sizeof(float)*row*col); 137 | V1 = (float*)malloc(sizeof(float)*row*col); 138 | V2 = (float*)malloc(sizeof(float)*row*col); 139 | V = (float*)malloc(sizeof(float)*row*col); 140 | Z1 = (float*)malloc(sizeof(float)*row*col); 141 | Z2 = (float*)malloc(sizeof(float)*row*col); 142 | Z = (float*)malloc(sizeof(float)*row*col); 143 | } 144 | 145 | void ConvSupport_noMPI::Assign(){ 146 | #pragma omp parallel 147 | { 148 | #pragma omp for 149 | for(int i = 0; i < row*col; i++){ 150 | U1[i] = (float)intRand(0, RAND_MAX); 151 | U2[i] = (float)intRand(0, RAND_MAX); 152 | U[i] = U1[i] + U2[i]; 153 | } 154 | #pragma omp for 155 | for(int i = 0; i < row*col; i++){ 156 | V1[i] = (float)intRand(0, RAND_MAX); 157 | V2[i] = (float)intRand(0, RAND_MAX); 158 | V[i] = V1[i] + V2[i]; 159 | } 160 | #pragma omp for 161 | for(int i = 0; i < row; i++){ 162 | for(int j = 0; j < col; j++){ 163 | Z[i*col+j] = U[i*col+j] * V[i*col+j]; 164 | } 165 | } 166 | #pragma omp for 167 | for(int i = 0; i < row*col; i++){ 168 | Z1[i] = (float)intRand(0, RAND_MAX); 169 | Z2[i] = Z[i] - Z1[i]; 170 | } 171 | } 172 | 173 | } 174 | 175 | /* 176 | Send data function of Convolution support 177 | */ 178 | void ConvSupport::Send(int MPI_dest1, int MPI_dest2, MPI_Comm server1_comm, MPI_Comm server2_comm){ 179 | MPI_Send(U1, row*col, MPI_FLOAT, MPI_dest1, 0, server1_comm); 180 | MPI_Send(V1, row*col, MPI_FLOAT, MPI_dest1, 0, server1_comm); 181 | MPI_Send(Z1, row*col, MPI_FLOAT, MPI_dest1, 0, server1_comm); 182 | 183 | MPI_Send(U2, row*col, MPI_FLOAT, MPI_dest2, 0, server2_comm); 184 | MPI_Send(V2, row*col, MPI_FLOAT, MPI_dest2, 0, server2_comm); 185 | MPI_Send(Z2, row*col, MPI_FLOAT, MPI_dest2, 0, server2_comm); 186 | } 187 | 188 | void ConvSupport_noMPI::Release(){ 189 | #pragma omp parallel 190 | { 191 | intRand(0, 1, true); 192 | } 193 | free(U1); 194 | free(U2); 195 | free(U); 196 | free(V1); 197 | free(V2); 198 | free(V); 199 | free(Z); 200 | free(Z1); 201 | free(Z2); 202 | } 203 | -------------------------------------------------------------------------------- /source/online.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "../include/ParSecureML.h" 8 | #include "../include/compress.h" 9 | #include "../include/read.h" 10 | #include 11 | double timestamp(){ 12 | struct timeval time; 13 | gettimeofday(&time, NULL); 14 | return time.tv_sec + 1e-6*time.tv_usec; 15 | } 16 | void Triplet_noMPI::GetShape(int in_row1, int in_col1, int in_row2, int in_col2){ 17 | row1 = in_row1; 18 | col1 = in_col1; 19 | row2 = in_row2; 20 | col2 = in_col2; 21 | } 22 | /* 23 | Malloc memory for triplet && create pipeline 24 | */ 25 | void Triplet_noMPI::Initial(){ 26 | commtime = 0; 27 | A = (float*)malloc(sizeof(float)*row1*col1); 28 | old_A = (float*)malloc(sizeof(float)*row1*col1); 29 | delta_A = (float*)malloc(sizeof(float)*row1*col1); 30 | B = (float*)malloc(sizeof(float)*row2*col2); 31 | old_B = (float*)malloc(sizeof(float)*row2*col2); 32 | delta_B = (float*)malloc(sizeof(float)*row2*col2); 33 | C = (float*)malloc(sizeof(float)*row1*col2); 34 | D = (float*)malloc(sizeof(float)*row1*col1); 35 | U = (float*)malloc(sizeof(float)*row1*col1); 36 | V = (float*)malloc(sizeof(float)*row2*col2); 37 | Z = (float*)malloc(sizeof(float)*row1*col2); 38 | E1 = (float*)malloc(sizeof(float)*row1*col1); 39 | E2 = (float*)malloc(sizeof(float)*row1*col1); 40 | E = (float*)malloc(sizeof(float)*row1*col1); 41 | F1 = (float*)malloc(sizeof(float)*row2*col2); 42 | F2 = (float*)malloc(sizeof(float)*row2*col2); 43 | F = (float*)malloc(sizeof(float)*row2*col2); 44 | for(int i = 0; i < row1*col1; i++){ 45 | E[i] = (float)rand()/RAND_MAX; 46 | } 47 | for(int i = 0; i < row2*col2; i++){ 48 | F[i] = (float)rand()/RAND_MAX; 49 | } 50 | for(int i = 0; i < row1*col1; i++){ 51 | old_A[i] = 0; 52 | } 53 | for(int i = 0; i < row2*col2; i++){ 54 | old_B[i] = 0; 55 | } 56 | MallocD(GPU_A, row1*col1); 57 | MallocD(GPU_B, row2*col2); 58 | MallocD(GPU_E, row1*col1); 59 | MallocD(GPU_F, row2*col2); 60 | MallocD(GPU_Z, row1*col2); 61 | MallocD(fac1, row1*col2); 62 | MallocD(fac2, row1*col2); 63 | MallocD(GPU_C, row1*col2); 64 | MallocD(GPU_D, row1*col1); 65 | flag1 = 0; 66 | flag2 = 0; 67 | flag3 = 0; 68 | pFlag = 0; 69 | pthread_create(&pipelineId, NULL, threadPipeline, (void*)this); 70 | } 71 | void Triplet_noMPI::Release(){ 72 | free(A); 73 | free(B); 74 | free(old_A); 75 | free(old_B); 76 | free(delta_A); 77 | free(delta_B); 78 | free(C); 79 | free(Z); 80 | free(U); 81 | free(V); 82 | free(E1); 83 | free(E2); 84 | free(E); 85 | free(F1); 86 | free(F2); 87 | free(F); 88 | free(D); 89 | ReleaseGPU(GPU_A); 90 | ReleaseGPU(GPU_B); 91 | ReleaseGPU(GPU_C); 92 | ReleaseGPU(GPU_E); 93 | ReleaseGPU(GPU_F); 94 | ReleaseGPU(GPU_Z); 95 | ReleaseGPU(fac1); 96 | ReleaseGPU(fac2); 97 | ReleaseGPU(GPU_D); 98 | pFlag = 2; 99 | pthread_join(pipelineId, NULL); 100 | } 101 | void Triplet_noMPI::GetData(float *input_A, float *input_B){ 102 | for(int i = 0; i < row1*col1; i++){ 103 | A[i] = input_A[i]; 104 | } 105 | for(int i = 0; i < row2*col2; i++){ 106 | B[i] = input_B[i]; 107 | } 108 | } 109 | 110 | /* 111 | Prepare E,F , which need communication between server1 and server2 112 | */ 113 | void Triplet::Rec(int MPI_dest, MPI_Comm server2server_comm){ 114 | int countA = 0; 115 | int countB = 0; 116 | int countA2; 117 | int countB2; 118 | /* 119 | delta compression 120 | */ 121 | for(int i = 0; i < row1*col1; i++){ 122 | delta_A[i] = A[i] - old_A[i]; 123 | if(abs(delta_A[i]) < 0.00001){ 124 | countA++; 125 | } 126 | old_A[i] = A[i]; 127 | } 128 | for(int i = 0; i < row2*col2; i++){ 129 | delta_B[i] = B[i] - old_B[i]; 130 | if(abs(delta_B[i]) < 0.00001){ 131 | countB++; 132 | } 133 | old_B[i] = B[i]; 134 | } 135 | for(int i = 0; i < row1*col1; i++){ 136 | E1[i] = A[i] - U[i]; 137 | } 138 | for(int i = 0; i < row2*col2; i++){ 139 | F1[i] = B[i] - V[i]; 140 | } 141 | int csrFlagA1 = 0, csrFlagA2 = 0; 142 | struct CSR deltaACsr1; 143 | struct CSR deltaACsr2; 144 | MPI_Status status; 145 | if((double)countA/(row1*col1) > 0.75){ 146 | csrFlagA1 = 1; 147 | } 148 | MPI_Sendrecv(&csrFlagA1, 1, MPI_INT, MPI_dest, 0, &csrFlagA2, 1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 149 | if(csrFlagA1==1 && csrFlagA2==1){ 150 | MallocCsr(deltaACsr1, countA, row1, col1); 151 | Compress(delta_A, deltaACsr1, countA, row1, col1); 152 | MPI_Sendrecv(&countA, 1, MPI_INT, MPI_dest, 0, &countA2, 1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 153 | MallocCsr(deltaACsr2, countA2, row1, col1); 154 | double stime = timestamp(); 155 | MPI_Sendrecv(deltaACsr1.val, countA, MPI_FLOAT, MPI_dest, 0, deltaACsr2.val, countA2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 156 | MPI_Sendrecv(deltaACsr1.col, countA, MPI_INT, MPI_dest, 0, deltaACsr2.col, countA2, MPI_INT, MPI_dest, 0, server2server_comm, &status); 157 | MPI_Sendrecv(deltaACsr1.row, row1, MPI_INT, MPI_dest, 0, deltaACsr2.row, row1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 158 | double etime = timestamp(); 159 | commtime += (etime-stime); 160 | deCompress(deltaACsr2, countA2, row1, col1, E2); 161 | ReleaseCsr(deltaACsr1); 162 | ReleaseCsr(deltaACsr2); 163 | } 164 | if(csrFlagA1==1 && csrFlagA2==0){ 165 | MallocCsr(deltaACsr1, countA, row1, col1); 166 | Compress(delta_A, deltaACsr1, countA, row1, col1); 167 | double stime = timestamp(); 168 | MPI_Send(&countA, 1, MPI_INT, MPI_dest, 0, server2server_comm); 169 | MPI_Send(deltaACsr1.val, countA, MPI_FLOAT, MPI_dest, 0, server2server_comm); 170 | MPI_Send(deltaACsr1.col, countA, MPI_INT, MPI_dest, 0, server2server_comm); 171 | MPI_Send(deltaACsr1.row, row1, MPI_INT, MPI_dest, 0, server2server_comm); 172 | MPI_Recv(E2, row1*col1, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 173 | double etime = timestamp(); 174 | commtime += (etime-stime); 175 | ReleaseCsr(deltaACsr1); 176 | } 177 | if(csrFlagA1==0 && csrFlagA2==1){ 178 | MPI_Recv(&countA2, 1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 179 | MallocCsr(deltaACsr2, countA2, row1, col1); 180 | double stime = timestamp(); 181 | MPI_Recv(deltaACsr2.val, countA2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 182 | MPI_Recv(deltaACsr2.col, countA2, MPI_INT, MPI_dest, 0, server2server_comm, &status); 183 | MPI_Recv(deltaACsr2.row, row1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 184 | MPI_Send(E1, row1*col1, MPI_FLOAT, MPI_dest, 0, server2server_comm); 185 | double etime = timestamp(); 186 | commtime += (etime-stime); 187 | deCompress(deltaACsr2, countA2, row1, col1, E2); 188 | ReleaseCsr(deltaACsr2); 189 | } 190 | if(csrFlagA1==0 && csrFlagA2==0){ 191 | double stime = timestamp(); 192 | MPI_Sendrecv(E1, row1*col1, MPI_FLOAT, MPI_dest, 0, E2, row1*col1, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 193 | double etime = timestamp(); 194 | commtime += (etime-stime); 195 | } 196 | 197 | int csrFlagB1 = 0, csrFlagB2 = 0; 198 | if((double)countB/(row2*col2) > 0.75){ 199 | csrFlagB1 = 1; 200 | } 201 | MPI_Sendrecv(&csrFlagB1, 1, MPI_INT, MPI_dest, 0, &csrFlagB2, 1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 202 | struct CSR deltaBCsr1; 203 | struct CSR deltaBCsr2; 204 | if(csrFlagB1==1 && csrFlagB2==1){ 205 | MallocCsr(deltaBCsr1, countB, row2, col2); 206 | Compress(delta_B, deltaBCsr1, countB, row2, col2); 207 | MPI_Sendrecv(&countB, 1, MPI_INT, MPI_dest, 0, &countB2, 1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 208 | MallocCsr(deltaBCsr2, countB2, row2, col2); 209 | double stime = timestamp(); 210 | MPI_Sendrecv(deltaBCsr1.val, countB, MPI_FLOAT, MPI_dest, 0, deltaBCsr2.val, countB2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 211 | MPI_Sendrecv(deltaBCsr1.col, countB, MPI_INT, MPI_dest, 0, deltaBCsr2.col, countB2, MPI_INT, MPI_dest, 0, server2server_comm, &status); 212 | MPI_Sendrecv(deltaBCsr1.row, row2, MPI_INT, MPI_dest, 0, deltaBCsr2.row, row2, MPI_INT, MPI_dest, 0, server2server_comm, &status); 213 | double etime = timestamp(); 214 | commtime += (stime-etime); 215 | deCompress(deltaBCsr2, countB2, row2, col2, F2); 216 | ReleaseCsr(deltaBCsr1); 217 | ReleaseCsr(deltaBCsr2); 218 | } 219 | if(csrFlagB1==1 && csrFlagB2==0){ 220 | MallocCsr(deltaBCsr1, countB, row2, col2); 221 | Compress(delta_B, deltaBCsr1, countB, row2, col2); 222 | MPI_Send(&countB, 1, MPI_INT, MPI_dest, 0, server2server_comm); 223 | double stime = timestamp(); 224 | MPI_Send(deltaBCsr1.val, countB, MPI_FLOAT, MPI_dest, 0, server2server_comm); 225 | MPI_Send(deltaBCsr1.col, countB, MPI_INT, MPI_dest, 0, server2server_comm); 226 | MPI_Send(deltaBCsr1.row, row2, MPI_INT, MPI_dest, 0, server2server_comm); 227 | MPI_Recv(F2, row2*col2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 228 | double etime = timestamp(); 229 | commtime += (etime-stime); 230 | ReleaseCsr(deltaBCsr1); 231 | } 232 | if(csrFlagB1==0 && csrFlagB2==1){ 233 | MPI_Recv(&countB2, 1, MPI_INT, MPI_dest, 0, server2server_comm, &status); 234 | MallocCsr(deltaBCsr2, countB2, row2, col2); 235 | MPI_Recv(deltaBCsr2.val, countB2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 236 | double stime = timestamp(); 237 | MPI_Recv(deltaBCsr2.col, countB2, MPI_INT, MPI_dest, 0, server2server_comm, &status); 238 | MPI_Recv(deltaBCsr2.row, row2, MPI_INT, MPI_dest, 0, server2server_comm, &status); 239 | MPI_Send(F1, row2*col2, MPI_FLOAT, MPI_dest, 0, server2server_comm); 240 | double etime = timestamp(); 241 | commtime += (etime-stime); 242 | deCompress(deltaBCsr2, countB2, row2, col2, F2); 243 | ReleaseCsr(deltaBCsr2); 244 | } 245 | if(csrFlagB1==0 && csrFlagB2==0){ 246 | double stime = timestamp(); 247 | MPI_Sendrecv(F1, row2*col2, MPI_FLOAT, MPI_dest, 0, F2, row2*col2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 248 | double etime = timestamp(); 249 | commtime += (etime-stime); 250 | } 251 | for(int i = 0; i < row1*col1; i++){ 252 | E[i] = E1[i] + E2[i]; 253 | } 254 | for(int i = 0; i < row2*col2; i++){ 255 | F[i] = F1[i] + F2[i]; 256 | } 257 | } 258 | 259 | /* 260 | Transer data from CPU to GPU 261 | */ 262 | void Triplet_noMPI::cpuToGPU(){ 263 | CopyHtoD(GPU_E, E, row1*col1); 264 | CopyHtoD(GPU_A, D, row1*col1); 265 | flag1 = 1; 266 | CopyHtoD(GPU_F, F, row2*col2); 267 | flag2 =1; 268 | CopyHtoD(GPU_B, B, row2*col2); 269 | flag3 = 1; 270 | } 271 | 272 | /* 273 | Pipeline 274 | */ 275 | void* Triplet_noMPI::threadPipeline(void *ptr){ 276 | Triplet *pt = (Triplet*)ptr; 277 | while(pt->pFlag != 2){ // thread kill 278 | while(pt->pFlag == 0){ 279 | continue; 280 | } 281 | if(pt->pFlag == 2){ 282 | break; 283 | } 284 | pt->cpuToGPU(); 285 | pt->pFlag = 0; 286 | } 287 | return pt; 288 | } 289 | 290 | /* 291 | OP operation for triplet 292 | C = triplet.op(flag) 293 | */ 294 | void Triplet_noMPI::OP(int flag){ 295 | pFlag = 1; 296 | CopyHtoD(GPU_Z, Z, row1*col2); 297 | cudaTripletMul(flag); 298 | CopyDtoH(C, GPU_C, row1*col2); 299 | flag1 = 0; 300 | flag2 = 0; 301 | flag3 = 0; 302 | } 303 | 304 | 305 | /* 306 | Recvive data from client 307 | @parms: 308 | MPI_dest: client rank 309 | MPI_Comm: MPI_Comm of client2server 310 | */ 311 | void Triplet::Recv(int MPI_dest, MPI_Comm client2server_comm){ 312 | MPI_Status status; 313 | MPI_Recv(U, row1*col1, MPI_FLOAT, MPI_dest, 0, client2server_comm, &status); 314 | MPI_Recv(V, row2*col2, MPI_FLOAT, MPI_dest, 0, client2server_comm, &status); 315 | MPI_Recv(Z, row1*col2, MPI_FLOAT, MPI_dest, 0, client2server_comm, &status); 316 | } 317 | 318 | /* 319 | Activation function 320 | */ 321 | void Triplet::Activation(int MPI_dest, MPI_Comm server2server_comm){ 322 | for(int i = 0; i < row1*col2; i++){ 323 | if(C[i] < 0) C[i] = 0; 324 | } 325 | MPI_Status status; 326 | MPI_Sendrecv(C, row1*col2, MPI_FLOAT, MPI_dest, 0, C, row1*col2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 327 | } 328 | 329 | /* 330 | convolution shape: 331 | input: row1*col1 332 | Convolution kernels: row2*col2 333 | output:(row1-row2+1)*(col1*col2+1) 334 | */ 335 | void ConvTriplet_noMPI::GetShape(int in_row1, int in_col1, int in_row2, int in_col2, int in_num){ 336 | row1 = in_row1; 337 | col1 = in_col1; 338 | row2 = in_row2; 339 | col2 = in_col2; 340 | num = in_num; 341 | o_row = row1-row2+1; 342 | o_col = col1-col2+1; 343 | } 344 | void ConvTriplet_noMPI::Initial(){ 345 | commtime = 0; 346 | A = (float*)malloc(sizeof(float)*num*row1*col1); 347 | B = (float*)malloc(sizeof(float)*row2*col2); 348 | C = (float*)malloc(sizeof(float)*num*o_row*o_col); 349 | U = (float*)malloc(sizeof(float)*row2*col2); 350 | V = (float*)malloc(sizeof(float)*row2*col2); 351 | Z = (float*)malloc(sizeof(float)*row2*col2); 352 | E1 = (float*)malloc(sizeof(float)*num*o_row*o_col*row2*col2); 353 | E2 = (float*)malloc(sizeof(float)*num*o_row*o_col*row2*col2); 354 | E = (float*)malloc(sizeof(float)*num*o_row*o_col*row2*col2); 355 | F1 = (float*)malloc(sizeof(float)*row2*col2); 356 | F2 = (float*)malloc(sizeof(float)*row2*col2); 357 | F = (float*)malloc(sizeof(float)*row2*col2); 358 | MallocD(GPU_A, num*row1*col1); 359 | MallocD(GPU_B, row2*col2); 360 | MallocD(GPU_C, num*o_row*o_col); 361 | MallocD(GPU_E, num*o_row*o_col*row2*col2); 362 | MallocD(GPU_F, row2*col2); 363 | MallocD(GPU_Z, row1*col2); 364 | 365 | } 366 | void ConvTriplet_noMPI::Release(){ 367 | free(A); 368 | free(B); 369 | free(C); 370 | free(Z); 371 | free(U); 372 | free(V); 373 | free(E1); 374 | free(E2); 375 | free(E); 376 | free(F1); 377 | free(F2); 378 | free(F); 379 | ReleaseGPU(GPU_A); 380 | ReleaseGPU(GPU_B); 381 | ReleaseGPU(GPU_C); 382 | ReleaseGPU(GPU_E); 383 | ReleaseGPU(GPU_F); 384 | ReleaseGPU(GPU_Z); 385 | } 386 | void ConvTriplet_noMPI::GetData(float *input_A, float *input_filter){ 387 | for(int i = 0; i < num*row1*col1; i++){ 388 | A[i] = input_A[i]; 389 | } 390 | for(int i = 0; i < row2*col2; i++){ 391 | B[i] = input_filter[i]; 392 | } 393 | } 394 | 395 | /* 396 | prepare E,F for convolution layer 397 | */ 398 | void ConvTriplet::Rec(int MPI_dest, MPI_Comm server2server_comm){ 399 | for(int s = 0; s < num; s++){ 400 | for(int i = 0; i < o_row; i++){ 401 | for(int j = 0; j < o_col; j++){ 402 | for(int k = 0; k < row2; k++){ 403 | for(int h = 0; h < col2; h++){ 404 | E1[s*o_row*o_col*row2*col2+i*o_col*row2*col2+j*row2*col2+k*col2+h] = A[s*row1*col1+(i+k)*col1+j+h] - U[k*col2+h]; 405 | } 406 | } 407 | } 408 | } 409 | } 410 | for(int i = 0; i < row2*col2; i++){ 411 | F1[i] = B[i] - V[i]; 412 | } 413 | double stime = timestamp(); 414 | MPI_Status status; 415 | MPI_Sendrecv(E1, num*o_row*o_col*row2*col2, MPI_FLOAT, MPI_dest, 0, E2, num*o_row*o_col*row2*col2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 416 | MPI_Sendrecv(F1, row2*col2, MPI_FLOAT, MPI_dest, 0, F2, row2*col2, MPI_FLOAT, MPI_dest, 0, server2server_comm, &status); 417 | double etime = timestamp(); 418 | commtime += (etime-stime); 419 | for(int i = 0; i < num*o_row*o_col*row2*col2; i++){ 420 | E[i] = E1[i] + E2[i]; 421 | } 422 | for(int i = 0; i < row2*col2; i++){ 423 | F[i] = F1[i] + F2[i]; 424 | } 425 | } 426 | float ConvMul(int flag, float *A, float *B, float *E, float *F, float * Z, int size){ 427 | float re = 0; 428 | for(int i = 0; i < size; i++){ 429 | re += flag*E[i]*F[i] + A[i]*F[i] + E[i]*B[i] + Z[i]; 430 | } 431 | return re; 432 | } 433 | /* 434 | OP operatioon for convolution layer 435 | */ 436 | void ConvTriplet_noMPI::OP(int flag){ 437 | CopyHtoD(GPU_A, A, num*row1*col1); 438 | CopyHtoD(GPU_B, B, row2*col2); 439 | CopyHtoD(GPU_E, E, num*o_row*o_col*row2*col2); 440 | CopyHtoD(GPU_F, F, row2*col2); 441 | CopyHtoD(GPU_Z, Z, row2*col2); 442 | GPU_OP(flag); 443 | CopyDtoH(C, GPU_C, o_row*o_col); 444 | } 445 | 446 | void ConvTriplet::Recv(int MPI_dest, MPI_Comm client2server_comm){ 447 | MPI_Status status; 448 | MPI_Recv(U, row2*col2, MPI_FLOAT, MPI_dest, 0, client2server_comm, &status); 449 | MPI_Recv(V, row2*col2, MPI_FLOAT, MPI_dest, 0, client2server_comm, &status); 450 | MPI_Recv(Z, row2*col2, MPI_FLOAT, MPI_dest, 0, client2server_comm, &status); 451 | } 452 | 453 | 454 | /* 455 | Select a batch of data 456 | */ 457 | void SelectBatch(float *Img, float *y_hat, float *batchImg, float *batchY, int cur, int batch_size, int SIZE){ 458 | for(int i = cur; i < cur+batch_size; i++){ 459 | for(int j = 0; j < SIZE; j++){ 460 | batchImg[(i-cur)*SIZE+j] = Img[i*SIZE+j]; 461 | } 462 | } 463 | for(int i = cur; i < cur+batch_size; i++){ 464 | batchY[i-cur] = y_hat[i]; 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /source/read.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file contains some function for loading data from storage 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "../include/read.h" 8 | #include 9 | #include 10 | #include 11 | #include "../include/ParSecureML.h" 12 | /* Thread-safe function that returns a random number between min and max (inclusive). 13 | This function takes ~142% the time that calling rand() would take. For this extra 14 | cost you get a better uniform distribution and thread-safety. */ 15 | extern int intRand(const int & min, const int & max, bool lastUsed = false); 16 | 17 | void read_Images(string filename, float *images, int SIZE, int train_size){ 18 | ifstream file(filename, ios::in); 19 | if(!file.is_open()){ 20 | cout << "Open file failed." << endl; 21 | } 22 | for(int i = 0; i < train_size*SIZE; i++){ 23 | file >> images[i]; 24 | } 25 | file.close(); 26 | } 27 | void read_Label(string filename, float *labels, int train_size){ 28 | ifstream file(filename, ios::in); 29 | if(!file.is_open()){ 30 | cout << "Open file failed." << endl; 31 | } 32 | for(int i = 0; i < train_size; i++){ 33 | file >> labels[i]; 34 | } 35 | file.close(); 36 | } 37 | 38 | /* 39 | generate random input to evaluation speed 40 | */ 41 | void Generator(float *array, int bound, int SIZE){ 42 | #pragma omp parallel for 43 | for(int i = 0; i < SIZE; i++){ 44 | array[i] = intRand(0, bound); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /test/MLP.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This is the source code of MLP 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "../include/output.h" 24 | #include "mpi.h" 25 | using namespace std; 26 | 27 | float *labels; 28 | float *images; 29 | string imagesPath; 30 | string labelsPath; 31 | int isRand = 1; 32 | int train_size; 33 | int batch_size; 34 | int SIZE; 35 | int row; 36 | int col; 37 | int FSIZE; 38 | 39 | int dim1; 40 | int dim2 = 128; 41 | int dim3 = 64; 42 | int dim4 = 10; 43 | int main(int argc, char **argv){ 44 | MPI_Init(&argc, &argv); 45 | int MPI_rank, MPI_size; 46 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 47 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 48 | 49 | MPI_Group world_group; 50 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 51 | 52 | double all_stime, all_etime; 53 | all_stime = timestamp(); 54 | double offline_stime; 55 | string setupFile = argv[1]; 56 | ifstream setup(setupFile); 57 | if(!setup.is_open()){ 58 | cout << "Setup not exist." << endl; 59 | return 0; 60 | } 61 | string line; 62 | while(getline(setup, line)){ 63 | stringstream para(line); 64 | string attr; 65 | para >> attr; 66 | if(attr == "--imagesPath"){ 67 | para >> imagesPath; 68 | } 69 | else if(attr == "--labelsPath"){ 70 | para >> labelsPath; 71 | } 72 | else if(attr == "--train_size"){ 73 | para >> train_size; 74 | } 75 | else if(attr == "--batch_size"){ 76 | para >> batch_size; 77 | } 78 | else if(attr == "--SIZE"){ 79 | para >> SIZE; 80 | } 81 | else if(attr == "--row"){ 82 | para >> row; 83 | } 84 | else if(attr == "--col"){ 85 | para >> col; 86 | } 87 | else if(attr == "--FSIZE"){ 88 | para >> FSIZE; 89 | } 90 | else{ 91 | cout << "Setup parameters error." << endl; 92 | return 0; 93 | } 94 | } 95 | dim1 = SIZE; 96 | setup.close(); 97 | 98 | offline_stime = timestamp(); 99 | if(MPI_rank == MPI_client){ 100 | 101 | /* 102 | Init MPI group: client2server1 clientserver2 103 | */ 104 | 105 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 106 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 107 | 108 | MPI_Group client2server1; 109 | MPI_Group client2server2; 110 | 111 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 112 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 113 | 114 | MPI_Comm client2server1_comm; 115 | MPI_Comm client2server2_comm; 116 | 117 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 118 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 119 | 120 | int MPI_group_client2server1_rank; 121 | int MPI_group_client2server2_rank; 122 | 123 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 124 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 125 | 126 | int server1_group_rank = 0, server2_group_rank = 0; 127 | 128 | if(MPI_group_client2server1_rank == 0){ 129 | server1_group_rank = 1; 130 | } 131 | if(MPI_group_client2server2_rank == 0){ 132 | server2_group_rank = 1; 133 | } 134 | 135 | 136 | if((images = (float*)malloc(sizeof(float)*train_size*SIZE)) == NULL){ 137 | cout << "Malloc error." << endl; 138 | return 0; 139 | } 140 | if((labels = (float*)malloc(sizeof(float)*train_size)) == NULL){ 141 | cout << "Malloc error." << endl; 142 | return 0; 143 | } 144 | if(isRand == 1){ 145 | Generator(images, 256, train_size*SIZE); 146 | Generator(labels, 10, train_size); 147 | }else{ 148 | read_Images(imagesPath, images, SIZE, train_size); 149 | Generator(labels, 10, train_size); 150 | } 151 | float *model1 = (float*)malloc(sizeof(float)*dim1*dim2); 152 | float *model2 = (float*)malloc(sizeof(float)*dim2*dim3); 153 | float *model3 = (float*)malloc(sizeof(float)*dim3*dim4); 154 | for(int i = 0; i < dim1*dim2; i++){ 155 | model1[i] = (float)rand()/RAND_MAX; 156 | } 157 | for(int i = 0; i < dim2*dim3; i++){ 158 | model2[i] = (float)rand()/RAND_MAX; 159 | } 160 | for(int i = 0; i < dim3*dim4; i++){ 161 | model3[i] = (float)rand()/RAND_MAX; 162 | } 163 | float *images_th1 = (float*)malloc(sizeof(float)*train_size*SIZE); 164 | float *images_th2 = (float*)malloc(sizeof(float)*train_size*SIZE); 165 | float *model1_th1 = (float*)malloc(sizeof(float)*dim1*dim2); 166 | float *model1_th2 = (float*)malloc(sizeof(float)*dim1*dim2); 167 | float *model2_th1 = (float*)malloc(sizeof(float)*dim2*dim3); 168 | float *model2_th2 = (float*)malloc(sizeof(float)*dim2*dim3); 169 | float *model3_th1 = (float*)malloc(sizeof(float)*dim3*dim4); 170 | float *model3_th2 = (float*)malloc(sizeof(float)*dim3*dim4); 171 | float *y_hat1 = (float*)malloc(sizeof(float)*train_size); 172 | float *y_hat2 = (float*)malloc(sizeof(float)*train_size); 173 | double c_mstime, c_metime; 174 | c_mstime = timestamp(); 175 | for(int i = 0; i < train_size; i++){ 176 | for(int j = 0; j < SIZE; j++){ 177 | images_th1[i*SIZE+j] = rand() % 256; 178 | images_th2[i*SIZE+j] = images[i*SIZE+j] - images_th1[i*SIZE+j]; 179 | } 180 | } 181 | for(int i = 0; i < dim1*dim2; i++){ 182 | model1_th1[i] = (float)rand() /RAND_MAX; 183 | model1_th2[i] = model1[i] - model1_th1[i]; 184 | } 185 | for(int i = 0; i < dim2*dim3; i++){ 186 | model2_th1[i] = (float)rand() /RAND_MAX; 187 | model2_th2[i] = model2[i] - model2_th1[i]; 188 | } 189 | for(int i = 0; i < dim3*dim4; i++){ 190 | model3_th1[i] = (float)rand() /RAND_MAX; 191 | model3_th2[i] = model3[i] - model3_th1[i]; 192 | } 193 | for(int i = 0; i < train_size; i++){ 194 | y_hat1[i] = rand()%10; 195 | y_hat2[i] = labels[i] - y_hat1[i]; 196 | } 197 | 198 | int flag1 = 0; 199 | int flag2 = -1; 200 | Support sp1, sp2, sp3; 201 | sp1.GetShape(batch_size, dim1, dim1, dim2); 202 | sp1.Initial(); 203 | sp1.Assign(); 204 | 205 | sp2.GetShape(batch_size, dim2, dim2, dim3); 206 | sp2.Initial(); 207 | sp2.Assign(); 208 | 209 | sp3.GetShape(batch_size, dim3, dim3, dim4); 210 | sp3.Initial(); 211 | sp3.Assign(); 212 | 213 | c_metime = timestamp(); 214 | 215 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 216 | MPI_Send(images_th1, train_size*SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 217 | MPI_Send(model1_th1, dim1*dim2, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 218 | MPI_Send(model2_th1, dim2*dim3, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 219 | MPI_Send(model3_th1, dim3*dim4, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 220 | MPI_Send(y_hat1, train_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 221 | 222 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 223 | MPI_Send(images_th2, train_size*SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 224 | MPI_Send(model1_th2, dim1*dim2, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 225 | MPI_Send(model2_th2, dim2*dim3, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 226 | MPI_Send(model3_th2, dim3*dim4, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 227 | MPI_Send(y_hat2, train_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 228 | 229 | sp1.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 230 | sp2.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 231 | sp3.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 232 | 233 | free(images_th1); 234 | free(images_th2); 235 | free(model1_th1); 236 | free(model1_th2); 237 | free(model2_th1); 238 | free(model2_th2); 239 | free(model3_th1); 240 | free(model3_th2); 241 | free(y_hat1); 242 | free(y_hat2); 243 | sp1.Release(); 244 | sp2.Release(); 245 | sp3.Release(); 246 | float server1_offline_time, server2_offline_time; 247 | float server1_online_time, server2_online_time; 248 | 249 | MPI_Status status; 250 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 251 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 252 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 253 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 254 | 255 | all_etime = timestamp(); 256 | float all_time = all_etime - all_stime; 257 | 258 | 259 | Output out( 260 | all_time, 261 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 262 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 263 | "MLP" 264 | ); 265 | out.draw(); 266 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 267 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 268 | } 269 | else{ 270 | 271 | /* 272 | Init MPI_group: client2server, server2server 273 | */ 274 | 275 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 276 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 277 | 278 | MPI_Group client2server; 279 | MPI_Group server2server; 280 | 281 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 282 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 283 | 284 | MPI_Comm client2server_comm; 285 | MPI_Comm server2server_comm; 286 | 287 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 288 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 289 | 290 | int MPI_group_client2server_rank; 291 | int MPI_group_server2server_rank; 292 | 293 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 294 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 295 | 296 | int client_group_rank = 0, server_group_rank = 0; 297 | 298 | if(MPI_group_client2server_rank == 0){ 299 | client_group_rank = 1; 300 | } 301 | if(MPI_group_server2server_rank == 0){ 302 | server_group_rank = 1; 303 | } 304 | 305 | double offline_etime; 306 | MPI_Status status; 307 | int flag; 308 | float y_hat[train_size]; 309 | float *img = (float*)malloc(sizeof(float)*train_size*SIZE); 310 | float *model1 = (float*)malloc(sizeof(float)*dim1*dim2); 311 | float *model2 = (float*)malloc(sizeof(float)*dim2*dim3); 312 | float *model3 = (float*)malloc(sizeof(float)*dim3*dim4); 313 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 314 | MPI_Recv(img, train_size*SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 315 | MPI_Recv(model1, dim1*dim2, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 316 | MPI_Recv(model2, dim2*dim3, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 317 | MPI_Recv(model3, dim3*dim4, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 318 | MPI_Recv(y_hat, train_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 319 | Triplet layer1, layer2, layer3; 320 | 321 | layer1.GetShape(batch_size, dim1, dim1, dim2); 322 | layer1.Initial(); 323 | 324 | layer2.GetShape(batch_size, dim2, dim2, dim3); 325 | layer2.Initial(); 326 | 327 | layer3.GetShape(batch_size, dim3, dim3, dim4); 328 | layer3.Initial(); 329 | 330 | layer1.Recv(client_group_rank, client2server_comm); 331 | layer2.Recv(client_group_rank, client2server_comm); 332 | layer3.Recv(client_group_rank, client2server_comm); 333 | 334 | offline_etime = timestamp(); 335 | double online_stime, online_etime; 336 | online_stime = timestamp(); 337 | float batchY[batch_size]; 338 | float *batchImg = (float*)malloc(sizeof(float)*batch_size*SIZE); 339 | for(int i = 0; i+batch_size <= train_size; i+=batch_size){ 340 | SelectBatch(img, y_hat, batchImg, batchY, i, batch_size, SIZE); 341 | layer1.GetData(batchImg, model1); 342 | layer1.Rec(server_group_rank, server2server_comm); 343 | layer1.OP(flag); 344 | layer1.Activation(server_group_rank, server2server_comm); 345 | layer2.GetData(layer1.C, model2); 346 | layer2.Rec(server_group_rank, server2server_comm); 347 | layer2.OP(flag); 348 | layer1.Activation(server_group_rank, server2server_comm); 349 | layer3.GetData(layer2.C, model3); 350 | layer3.Rec(server_group_rank, server2server_comm); 351 | layer3.OP(flag); 352 | layer1.Activation(server_group_rank, server2server_comm); 353 | } 354 | online_etime = timestamp(); 355 | layer1.Release(); 356 | layer2.Release(); 357 | layer3.Release(); 358 | float online_time = online_etime-online_stime; 359 | float offline_time = offline_etime-offline_stime; 360 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 361 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 362 | if(client2server != MPI_GROUP_NULL){ 363 | MPI_Group_free(&client2server); 364 | } 365 | if(server2server != MPI_GROUP_NULL){ 366 | MPI_Group_free(&server2server); 367 | } 368 | } 369 | if(world_group != MPI_GROUP_NULL){ 370 | MPI_Group_free(&world_group); 371 | } 372 | MPI_Finalize(); 373 | return 0; 374 | } 375 | -------------------------------------------------------------------------------- /test/RNN.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This is the source code of RNN 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "../include/output.h" 24 | #include "mpi.h" 25 | using namespace std; 26 | 27 | float *labels; 28 | float *images; 29 | string imagesPath; 30 | string labelsPath; 31 | int isRand = 1; 32 | int train_size = 10000; 33 | int batch_size = 128; 34 | int SIZE = 728; 35 | 36 | int dim1 = 128; 37 | int main(int argc, char **argv){ 38 | MPI_Init(&argc, &argv); 39 | int MPI_rank, MPI_size; 40 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 41 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 42 | 43 | MPI_Group world_group; 44 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 45 | 46 | double all_stime, all_etime; 47 | all_stime = timestamp(); 48 | double offline_stime; 49 | 50 | offline_stime = timestamp(); 51 | if(MPI_rank == MPI_client){ 52 | 53 | /* 54 | Init MPI group: client2server1 clientserver2 55 | */ 56 | 57 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 58 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 59 | 60 | MPI_Group client2server1; 61 | MPI_Group client2server2; 62 | 63 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 64 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 65 | 66 | MPI_Comm client2server1_comm; 67 | MPI_Comm client2server2_comm; 68 | 69 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 70 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 71 | 72 | int MPI_group_client2server1_rank; 73 | int MPI_group_client2server2_rank; 74 | 75 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 76 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 77 | 78 | int server1_group_rank = 0, server2_group_rank = 0; 79 | 80 | if(MPI_group_client2server1_rank == 0){ 81 | server1_group_rank = 1; 82 | } 83 | if(MPI_group_client2server2_rank == 0){ 84 | server2_group_rank = 1; 85 | } 86 | 87 | if((images = (float*)malloc(sizeof(float)*train_size*SIZE)) == NULL){ 88 | cout << "Malloc error." << endl; 89 | return 0; 90 | } 91 | if((labels = (float*)malloc(sizeof(float)*train_size)) == NULL){ 92 | cout << "Malloc error." << endl; 93 | return 0; 94 | } 95 | if(isRand == 1){ 96 | Generator(images, 256, train_size*SIZE); 97 | Generator(labels, 10, train_size); 98 | } 99 | float *model1 = (float*)malloc(sizeof(float)*dim1*batch_size); 100 | for(int i = 0; i < dim1*batch_size; i++){ 101 | model1[i] = (float)rand()/RAND_MAX; 102 | } 103 | float *model2 = (float*)malloc(sizeof(float)*dim1*dim1); 104 | for(int i = 0; i < dim1*dim1; i++){ 105 | model2[i] = (float)rand()/RAND_MAX; 106 | } 107 | float *images_th1 = (float*)malloc(sizeof(float)*train_size*SIZE); 108 | float *images_th2 = (float*)malloc(sizeof(float)*train_size*SIZE); 109 | float *model1_th1 = (float*)malloc(sizeof(float)*dim1*batch_size); 110 | float *model1_th2 = (float*)malloc(sizeof(float)*dim1*batch_size); 111 | float *model2_th1 = (float*)malloc(sizeof(float)*dim1*dim1); 112 | float *model2_th2 = (float*)malloc(sizeof(float)*dim1*dim1); 113 | float *y_hat1 = (float*)malloc(sizeof(float)*train_size); 114 | float *y_hat2 = (float*)malloc(sizeof(float)*train_size); 115 | double c_mstime, c_metime; 116 | c_mstime = timestamp(); 117 | for(int i = 0; i < train_size; i++){ 118 | for(int j = 0; j < SIZE; j++){ 119 | images_th1[i*SIZE+j] = rand() % 256; 120 | images_th2[i*SIZE+j] = images[i*SIZE+j] - images_th1[i*SIZE+j]; 121 | } 122 | } 123 | for(int i = 0; i < dim1*batch_size; i++){ 124 | model1_th1[i] = (float)rand() /RAND_MAX; 125 | model1_th2[i] = model1[i] - model1_th1[i]; 126 | } 127 | for(int i = 0; i < dim1*dim1; i++){ 128 | model2_th1[i] = (float)rand() /RAND_MAX; 129 | model2_th2[i] = model2[i] - model2_th1[i]; 130 | } 131 | for(int i = 0; i < train_size; i++){ 132 | y_hat1[i] = rand()%10; 133 | y_hat2[i] = labels[i] - y_hat1[i]; 134 | } 135 | 136 | int flag1 = 0; 137 | int flag2 = -1; 138 | Support sp1, sp2, sp3; 139 | sp1.GetShape(dim1, batch_size, batch_size, SIZE); 140 | sp1.Initial(); 141 | sp1.Assign(); 142 | sp2.GetShape(dim1, batch_size, batch_size, SIZE); 143 | sp2.Initial(); 144 | sp2.Assign(); 145 | 146 | sp3.GetShape(dim1, dim1, dim1, SIZE); 147 | sp3.Initial(); 148 | sp3.Assign(); 149 | 150 | c_metime = timestamp(); 151 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 152 | MPI_Send(images_th1, train_size*SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 153 | MPI_Send(model1_th1, dim1*batch_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 154 | MPI_Send(model2_th1, dim1*dim1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 155 | MPI_Send(y_hat1, train_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 156 | 157 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 158 | MPI_Send(images_th2, train_size*SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 159 | MPI_Send(model1_th2, dim1*batch_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 160 | MPI_Send(model2_th2, dim1*dim1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 161 | MPI_Send(y_hat2, train_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 162 | 163 | sp1.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 164 | sp2.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 165 | sp3.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 166 | 167 | free(images_th1); 168 | free(images_th2); 169 | free(model1_th1); 170 | free(model1_th2); 171 | free(model2_th1); 172 | free(model2_th2); 173 | free(y_hat1); 174 | free(y_hat2); 175 | sp1.Release(); 176 | sp2.Release(); 177 | sp3.Release(); 178 | float server1_offline_time, server2_offline_time; 179 | float server1_online_time, server2_online_time; 180 | 181 | MPI_Status status; 182 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 183 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 184 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 185 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 186 | 187 | all_etime = timestamp(); 188 | float all_time = all_etime - all_stime; 189 | Output out( 190 | all_time, 191 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 192 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 193 | "RNN" 194 | ); 195 | out.draw(); 196 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 197 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 198 | } 199 | else{ 200 | 201 | /* 202 | Init MPI_group: client2server, server2server 203 | */ 204 | 205 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 206 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 207 | 208 | MPI_Group client2server; 209 | MPI_Group server2server; 210 | 211 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 212 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 213 | 214 | MPI_Comm client2server_comm; 215 | MPI_Comm server2server_comm; 216 | 217 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 218 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 219 | 220 | int MPI_group_client2server_rank; 221 | int MPI_group_server2server_rank; 222 | 223 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 224 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 225 | 226 | int client_group_rank = 0, server_group_rank = 0; 227 | 228 | if(MPI_group_client2server_rank == 0){ 229 | client_group_rank = 1; 230 | } 231 | if(MPI_group_server2server_rank == 0){ 232 | server_group_rank = 1; 233 | } 234 | 235 | double offline_etime; 236 | MPI_Status status; 237 | int flag; 238 | float y_hat[train_size]; 239 | float *img = (float*)malloc(sizeof(float)*train_size*SIZE); 240 | float *model1 = (float*)malloc(sizeof(float)*dim1*batch_size); 241 | float *model2 = (float*)malloc(sizeof(float)*dim1*dim1); 242 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 243 | MPI_Recv(img, train_size*SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 244 | MPI_Recv(model1, dim1*batch_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 245 | MPI_Recv(model2, dim1*dim1, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 246 | MPI_Recv(y_hat, train_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 247 | 248 | Triplet T1, T2, T3; 249 | 250 | T1.GetShape(dim1, batch_size, batch_size, SIZE); 251 | T1.Initial(); 252 | T2.GetShape(dim1, batch_size, batch_size, SIZE); 253 | T2.Initial(); 254 | T3.GetShape(dim1, dim1, dim1, SIZE); 255 | T3.Initial(); 256 | T1.Recv(client_group_rank, client2server_comm); 257 | T2.Recv(client_group_rank, client2server_comm); 258 | T3.Recv(client_group_rank, client2server_comm); 259 | 260 | offline_etime = timestamp(); 261 | double online_stime, online_etime; 262 | float batchY[batch_size]; 263 | float *batchImg = (float*)malloc(sizeof(float)*batch_size*SIZE); 264 | online_stime = timestamp(); 265 | 266 | for(int i = 0; i+batch_size <= train_size; i+=batch_size){ 267 | SelectBatch(img, y_hat, batchImg, batchY, i, batch_size, SIZE); 268 | T1.GetData(model1, batchImg); 269 | T1.Rec(server_group_rank, server2server_comm); 270 | T1.OP(flag); 271 | T2.GetData(model1, batchImg); 272 | T2.Rec(server_group_rank, server2server_comm); 273 | T2.OP(flag); 274 | T3.GetData(model2, T1.C); 275 | T3.Rec(server_group_rank, server2server_comm); 276 | T3.OP(flag); 277 | for(int i = 0; i < dim1*SIZE; i++){ 278 | T3.C[i] += T2.C[i]; 279 | } 280 | } 281 | 282 | T1.Release(); 283 | T2.Release(); 284 | T3.Release(); 285 | online_etime = timestamp(); 286 | float online_time = online_etime-online_stime; 287 | float offline_time = offline_etime-offline_stime; 288 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 289 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 290 | if(client2server != MPI_GROUP_NULL){ 291 | MPI_Group_free(&client2server); 292 | } 293 | if(server2server != MPI_GROUP_NULL){ 294 | MPI_Group_free(&server2server); 295 | } 296 | } 297 | if(world_group != MPI_GROUP_NULL){ 298 | MPI_Group_free(&world_group); 299 | } 300 | MPI_Finalize(); 301 | return 0; 302 | } 303 | -------------------------------------------------------------------------------- /test/SVM.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This is the source code of SVM 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "../include/output.h" 24 | #include "mpi.h" 25 | using namespace std; 26 | 27 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) 28 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) 29 | #define OUT_SIZE 50 30 | 31 | float* labels; 32 | float* images; 33 | string imagesPath; 34 | string labelsPath; 35 | int isRand = 1; 36 | int train_size; 37 | int batch_size; 38 | int SIZE; 39 | int row; 40 | int col; 41 | int FSIZE; 42 | float alpha; 43 | 44 | int dim1 = 784; 45 | int dim2 = 784; 46 | int dim3 = 64; 47 | int dim4 = 10; 48 | 49 | float clipAlpha(float aj, float H, float L) { 50 | if (aj > H) 51 | return H; 52 | if (aj < L) 53 | return L; 54 | return aj; 55 | } 56 | 57 | int main(int argc, char** argv) { 58 | MPI_Init(&argc, &argv); 59 | int MPI_rank, MPI_size; 60 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 61 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 62 | 63 | MPI_Group world_group; 64 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 65 | 66 | double all_stime, all_etime; 67 | all_stime = timestamp(); 68 | double offline_stime; 69 | string setupFile = argv[1]; 70 | ifstream setup(setupFile); 71 | if (!setup.is_open()) { 72 | cout << "Setup not exist." << endl; 73 | return 0; 74 | } 75 | string line; 76 | while (getline(setup, line)) { 77 | stringstream para(line); 78 | string attr; 79 | para >> attr; 80 | if (attr == "--imagesPath") { 81 | para >> imagesPath; 82 | } else if (attr == "--labelsPath") { 83 | para >> labelsPath; 84 | } else if (attr == "--train_size") { 85 | para >> train_size; 86 | } else if (attr == "--batch_size") { 87 | para >> batch_size; 88 | } else if (attr == "--SIZE") { 89 | para >> SIZE; 90 | } else if (attr == "--row") { 91 | para >> row; 92 | } else if (attr == "--col") { 93 | para >> col; 94 | } else if (attr == "--FSIZE") { 95 | para >> FSIZE; 96 | } else if (attr == "--alpha") { 97 | para >> alpha; 98 | } else { 99 | cout << "Setup parameters error." << endl; 100 | return 0; 101 | } 102 | } 103 | setup.close(); 104 | batch_size = train_size; 105 | 106 | offline_stime = timestamp(); 107 | if (MPI_rank == MPI_client) { 108 | 109 | /* 110 | Init MPI group: client2server1 clientserver2 111 | */ 112 | 113 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 114 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 115 | 116 | MPI_Group client2server1; 117 | MPI_Group client2server2; 118 | 119 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 120 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 121 | 122 | MPI_Comm client2server1_comm; 123 | MPI_Comm client2server2_comm; 124 | 125 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 126 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 127 | 128 | int MPI_group_client2server1_rank; 129 | int MPI_group_client2server2_rank; 130 | 131 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 132 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 133 | 134 | int server1_group_rank = 0, server2_group_rank = 0; 135 | 136 | if(MPI_group_client2server1_rank == 0){ 137 | server1_group_rank = 1; 138 | } 139 | if(MPI_group_client2server2_rank == 0){ 140 | server2_group_rank = 1; 141 | } 142 | 143 | if ((images = (float*)malloc(sizeof(float) * train_size * SIZE)) == NULL) { 144 | cout << "Malloc error." << endl; 145 | return 0; 146 | } 147 | if ((labels = (float*)malloc(sizeof(float) * train_size)) == NULL) { 148 | cout << "Malloc error." << endl; 149 | return 0; 150 | } 151 | if (isRand == 1) { 152 | Generator(images, 256, train_size * SIZE); 153 | Generator(labels, 10, train_size); 154 | } else { 155 | read_Images(imagesPath, images, SIZE, train_size); 156 | Generator(labels, 10, train_size); 157 | } 158 | float* alphas = (float*)malloc(sizeof(float) * train_size * OUT_SIZE); 159 | for (int i = 0; i < train_size * OUT_SIZE; i++) { 160 | alphas[i] = 0; 161 | } 162 | float* images_th1 = (float*)malloc(sizeof(float) * train_size * SIZE); 163 | float* images_th2 = (float*)malloc(sizeof(float) * train_size * SIZE); 164 | float* alphas1 = (float*)malloc(sizeof(float) * train_size * OUT_SIZE); 165 | float* alphas2 = (float*)malloc(sizeof(float) * train_size * OUT_SIZE); 166 | float* y_hat1 = (float*)malloc(sizeof(float) * train_size); 167 | float* y_hat2 = (float*)malloc(sizeof(float) * train_size); 168 | double c_mstime, c_metime; 169 | 170 | for (int i = 0; i < train_size; i++) { 171 | for (int j = 0; j < SIZE; j++) { 172 | images_th1[i * SIZE + j] = rand() % 256; 173 | images_th2[i * SIZE + j] = images[i * SIZE + j] - images_th1[i * SIZE + j]; 174 | } 175 | } 176 | 177 | for (int i = 0; i < train_size * OUT_SIZE; i++) { 178 | alphas1[i] = 0; 179 | alphas2[i] = 0; 180 | } 181 | for (int i = 0; i < train_size; i++) { 182 | y_hat1[i] = rand() % 10; 183 | y_hat2[i] = labels[i] - y_hat1[i]; 184 | } 185 | 186 | int flag1 = 0; 187 | int flag2 = -1; 188 | c_mstime = timestamp(); 189 | Support sp1, sp2, sp3, sp4, sp5, sp6, sp7, sp8, sp9; 190 | 191 | sp1.GetShape(batch_size, SIZE, SIZE, 1); 192 | sp1.Initial(); 193 | sp1.Assign(); 194 | sp2.GetShape(OUT_SIZE, batch_size, batch_size, 1); 195 | sp2.Initial(); 196 | sp2.Assign(); 197 | sp3.GetShape(OUT_SIZE, SIZE, SIZE, OUT_SIZE); 198 | sp3.Initial(); 199 | sp3.Assign(); 200 | 201 | c_metime = timestamp(); 202 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 203 | MPI_Send(images_th1, train_size*SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 204 | MPI_Send(alphas1, train_size * OUT_SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 205 | MPI_Send(y_hat1, train_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 206 | 207 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 208 | MPI_Send(images_th2, train_size*SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 209 | MPI_Send(alphas2, train_size * OUT_SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 210 | MPI_Send(y_hat2, train_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 211 | 212 | sp1.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 213 | sp2.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 214 | sp3.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 215 | 216 | MPI_Status status; 217 | MPI_Recv(alphas1, train_size * OUT_SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 218 | MPI_Recv(alphas2, train_size * OUT_SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 219 | for (int i = 0; i < train_size * OUT_SIZE; i++) { 220 | alphas[i] = alphas1[i] + alphas2[i]; 221 | } 222 | free(images_th1); 223 | free(images_th2); 224 | free(alphas); 225 | free(alphas1); 226 | free(alphas2); 227 | free(y_hat1); 228 | free(y_hat2); 229 | sp1.Release(); 230 | sp2.Release(); 231 | float server1_offline_time, server2_offline_time; 232 | float server1_online_time, server2_online_time; 233 | 234 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 235 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 236 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 237 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 238 | 239 | all_etime = timestamp(); 240 | float all_time = all_etime - all_stime; 241 | Output out( 242 | all_time, 243 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 244 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 245 | "SVM" 246 | ); 247 | out.draw(); 248 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 249 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 250 | } else { 251 | 252 | /* 253 | Init MPI_group: client2server, server2server 254 | */ 255 | 256 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 257 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 258 | 259 | MPI_Group client2server; 260 | MPI_Group server2server; 261 | 262 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 263 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 264 | 265 | MPI_Comm client2server_comm; 266 | MPI_Comm server2server_comm; 267 | 268 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 269 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 270 | 271 | int MPI_group_client2server_rank; 272 | int MPI_group_server2server_rank; 273 | 274 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 275 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 276 | 277 | int client_group_rank = 0, server_group_rank = 0; 278 | 279 | if(MPI_group_client2server_rank == 0){ 280 | client_group_rank = 1; 281 | } 282 | if(MPI_group_server2server_rank == 0){ 283 | server_group_rank = 1; 284 | } 285 | 286 | double offline_etime; 287 | MPI_Status status; 288 | int flag; 289 | float y_hat[train_size]; 290 | float* img = (float*)malloc(sizeof(float) * train_size * SIZE); 291 | float* alphas = (float*)malloc(sizeof(float) * train_size * OUT_SIZE); 292 | 293 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 294 | MPI_Recv(img, train_size*SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 295 | MPI_Recv(alphas, train_size * OUT_SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 296 | MPI_Recv(y_hat, train_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 297 | 298 | Triplet T1, T2, T3; 299 | batch_size = train_size; 300 | T1.GetShape(batch_size, SIZE, SIZE, 1); 301 | T1.Initial(); 302 | T2.GetShape(OUT_SIZE, batch_size, batch_size, 1); 303 | T2.Initial(); 304 | T3.GetShape(OUT_SIZE, SIZE, SIZE, OUT_SIZE); 305 | T3.Initial(); 306 | float commTime = 0; 307 | T1.Recv(client_group_rank, client2server_comm); 308 | T2.Recv(client_group_rank, client2server_comm); 309 | T3.Recv(client_group_rank, client2server_comm); 310 | commTime += T1.commtime + T2.commtime + T3.commtime; 311 | offline_etime = timestamp(); 312 | double online_stime, online_etime; 313 | 314 | float* batchY = y_hat; 315 | float* batchImg = img; 316 | online_stime = timestamp(); 317 | float toler = 0.1; 318 | float C = 1; 319 | memset(alphas, 0, sizeof(float) * batch_size * OUT_SIZE); 320 | for (int j = 0; j < 1; j++) { //for i in range(m) 321 | float alphas_pro_Y[batch_size * OUT_SIZE]; 322 | for (int k = 0; k < batch_size * OUT_SIZE; k++) 323 | alphas_pro_Y[k] = alphas[k] * batchY[k % batch_size]; 324 | 325 | T1.GetData(batchImg, batchImg + (j * SIZE)); 326 | T1.Rec(server_group_rank, server2server_comm); 327 | T1.OP(flag); 328 | 329 | T2.GetData(alphas_pro_Y, T1.C); 330 | T2.Rec(server_group_rank, server2server_comm); 331 | T2.OP(flag); 332 | commTime += T1.commtime + T2.commtime; 333 | float fXj = T2.C[0]; 334 | float Ej = fXj - batchY[j]; 335 | 336 | if (((batchY[j] * Ej < -toler) && (alphas[j] < C)) || ((batchY[j] * Ej > toler) && (alphas[j] > 0))) { 337 | int l = rand() % (batch_size - 2); 338 | T1.GetData(batchImg, batchImg + (l * SIZE)); 339 | T1.Rec(server_group_rank, server2server_comm); 340 | T1.OP(flag); 341 | T2.GetData(alphas_pro_Y, T1.C); 342 | T2.Rec(server_group_rank, server2server_comm); 343 | T2.OP(flag); 344 | float fXl = T2.C[0]; 345 | float El = fXl - batchY[l]; 346 | 347 | float alphaJold = alphas[j]; 348 | float alphaLold = alphas[l]; 349 | float L, H; 350 | if (batchY[j] != batchY[l]) { 351 | L = MAX(0, alphas[l] - alphas[j]); 352 | H = MIN(C, C + alphas[l] - alphas[j]); 353 | } else { 354 | L = MAX(0, alphas[l] + alphas[j] - C); 355 | H = MIN(C, alphas[l] + alphas[j]); 356 | } 357 | if (L == H) 358 | continue; 359 | 360 | T3.GetData(batchImg + j * SIZE, batchImg + l * SIZE); 361 | T3.Rec(server_group_rank, server2server_comm); 362 | // T3.OP(flag); 363 | commTime += T3.commtime; 364 | float tmp1 = T3.C[0]; 365 | T3.GetData(batchImg + j * SIZE, batchImg + j * SIZE); 366 | T3.Rec(server_group_rank, server2server_comm); 367 | // T3.OP(flag); 368 | commTime += T3.commtime; 369 | float tmp2 = T3.C[0]; 370 | T3.GetData(batchImg + l * SIZE, batchImg + l * SIZE); 371 | T3.Rec(server_group_rank, server2server_comm); 372 | // T3.OP(flag); 373 | commTime += T3.commtime; 374 | float tmp3 = T3.C[0]; 375 | float eta = 2 * tmp1 - tmp2 - tmp3; 376 | if (eta >= 0) 377 | continue; 378 | alphas[l] -= batchY[l] * (Ej - El) / eta; 379 | alphas[l] = clipAlpha(alphas[l], H, L); 380 | if (alphas[l] - alphaLold < 0.0001 && alphas[l] - alphaLold > -0.0001) 381 | continue; 382 | alphas[j] += batchY[l] * batchY[j] * (alphaLold - alphas[l]); 383 | } 384 | } 385 | 386 | online_etime = timestamp(); 387 | MPI_Send(alphas, train_size * OUT_SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm); 388 | T1.Release(); 389 | T2.Release(); 390 | T3.Release(); 391 | free(img); 392 | free(alphas); 393 | float online_time = online_etime-online_stime; 394 | float offline_time = offline_etime-offline_stime; 395 | 396 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 397 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 398 | if(client2server != MPI_GROUP_NULL){ 399 | MPI_Group_free(&client2server); 400 | } 401 | if(server2server != MPI_GROUP_NULL){ 402 | MPI_Group_free(&server2server); 403 | } 404 | } 405 | if(world_group != MPI_GROUP_NULL){ 406 | MPI_Group_free(&world_group); 407 | } 408 | MPI_Finalize(); 409 | return 0; 410 | } 411 | -------------------------------------------------------------------------------- /test/conv.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This is the source code of CNN 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "../include/output.h" 24 | #include "mpi.h" 25 | using namespace std; 26 | 27 | float *labels; 28 | float *images; 29 | string imagesPath; 30 | string labelsPath; 31 | int isRand = 1; 32 | int train_size; 33 | int batch_size; 34 | int SIZE; 35 | int row; 36 | int col; 37 | int FSIZE; 38 | 39 | int dim1; 40 | int dim2; 41 | int dim3 = 64; 42 | int dim4 = 10; 43 | int main(int argc, char **argv){ 44 | MPI_Init(&argc, &argv); 45 | int MPI_rank, MPI_size; 46 | 47 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 48 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 49 | 50 | MPI_Group world_group; 51 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 52 | 53 | double all_stime, all_etime; 54 | all_stime = timestamp(); 55 | double offline_stime; 56 | string setupFile = argv[1]; 57 | ifstream setup(setupFile); 58 | if(!setup.is_open()){ 59 | cout << "Setup not exist." << endl; 60 | return 0; 61 | } 62 | string line; 63 | while(getline(setup, line)){ 64 | stringstream para(line); 65 | string attr; 66 | para >> attr; 67 | if(attr == "--imagesPath"){ 68 | para >> imagesPath; 69 | } 70 | else if(attr == "--labelsPath"){ 71 | para >> labelsPath; 72 | } 73 | else if(attr == "--train_size"){ 74 | para >> train_size; 75 | } 76 | else if(attr == "--batch_size"){ 77 | para >> batch_size; 78 | } 79 | else if(attr == "--SIZE"){ 80 | para >> SIZE; 81 | } 82 | else if(attr == "--row"){ 83 | para >> row; 84 | } 85 | else if(attr == "--col"){ 86 | para >> col; 87 | } 88 | else if(attr == "--FSIZE"){ 89 | para >> FSIZE; 90 | } 91 | else{ 92 | cout << "Setup parameters error." << endl; 93 | return 0; 94 | } 95 | } 96 | dim1 = row*col; 97 | dim2 = (row-FSIZE+1)*(col-FSIZE+1); 98 | setup.close(); 99 | offline_stime = timestamp(); 100 | if(MPI_rank == MPI_client){ 101 | 102 | /* 103 | Init MPI group: client2server1 clientserver2 104 | */ 105 | 106 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 107 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 108 | 109 | MPI_Group client2server1; 110 | MPI_Group client2server2; 111 | 112 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 113 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 114 | 115 | MPI_Comm client2server1_comm; 116 | MPI_Comm client2server2_comm; 117 | 118 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 119 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 120 | 121 | int MPI_group_client2server1_rank; 122 | int MPI_group_client2server2_rank; 123 | 124 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 125 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 126 | 127 | int server1_group_rank = 0, server2_group_rank = 0; 128 | 129 | if(MPI_group_client2server1_rank == 0){ 130 | server1_group_rank = 1; 131 | } 132 | if(MPI_group_client2server2_rank == 0){ 133 | server2_group_rank = 1; 134 | } 135 | 136 | if((images = (float*)malloc(sizeof(float)*train_size*SIZE)) == NULL){ 137 | cout << "Malloc error." << endl; 138 | return 0; 139 | } 140 | if((labels = (float*)malloc(sizeof(float)*train_size)) == NULL){ 141 | cout << "Malloc error." << endl; 142 | return 0; 143 | } 144 | if(isRand == 1){ 145 | Generator(images, 256, train_size*SIZE); 146 | Generator(labels, 10, train_size); 147 | }else{ 148 | read_Images(imagesPath, images, SIZE, train_size); 149 | Generator(labels, 10, train_size); 150 | } 151 | float *model1 = (float*)malloc(sizeof(float)*FSIZE*FSIZE); 152 | float *model2 = (float*)malloc(sizeof(float)*dim2*dim3); 153 | float *model3 = (float*)malloc(sizeof(float)*dim3*dim4); 154 | for(int i = 0; i < FSIZE*FSIZE; i++){ 155 | model1[i] = (float)rand()/RAND_MAX; 156 | } 157 | for(int i = 0; i < dim2*dim3; i++){ 158 | model2[i] = (float)rand()/RAND_MAX; 159 | } 160 | for(int i = 0; i < dim3*dim4; i++){ 161 | model3[i] = (float)rand()/RAND_MAX; 162 | } 163 | float *images_th1 = (float*)malloc(sizeof(float)*train_size*SIZE); 164 | float *images_th2 = (float*)malloc(sizeof(float)*train_size*SIZE); 165 | float *model1_th1 = (float*)malloc(sizeof(float)*FSIZE*FSIZE); 166 | float *model1_th2 = (float*)malloc(sizeof(float)*FSIZE*FSIZE); 167 | float *model2_th1 = (float*)malloc(sizeof(float)*dim2*dim3); 168 | float *model2_th2 = (float*)malloc(sizeof(float)*dim2*dim3); 169 | float *model3_th1 = (float*)malloc(sizeof(float)*dim3*dim4); 170 | float *model3_th2 = (float*)malloc(sizeof(float)*dim3*dim4); 171 | float *y_hat1 = (float*)malloc(sizeof(float)*train_size); 172 | float *y_hat2 = (float*)malloc(sizeof(float)*train_size); 173 | double c_mstime, c_metime; 174 | c_mstime = timestamp(); 175 | for(int i = 0; i < train_size; i++){ 176 | for(int j = 0; j < SIZE; j++){ 177 | images_th1[i*SIZE+j] = rand() % 256; 178 | images_th2[i*SIZE+j] = images[i*SIZE+j] - images_th1[i*SIZE+j]; 179 | } 180 | } 181 | for(int i = 0; i < FSIZE*FSIZE; i++){ 182 | model1_th1[i] = (float)rand() /RAND_MAX; 183 | model1_th2[i] = model1[i] - model1_th1[i]; 184 | } 185 | for(int i = 0; i < dim2*dim3; i++){ 186 | model2_th1[i] = (float)rand() /RAND_MAX; 187 | model2_th2[i] = model2[i] - model2_th1[i]; 188 | } 189 | for(int i = 0; i < dim3*dim4; i++){ 190 | model3_th1[i] = (float)rand() /RAND_MAX; 191 | model3_th2[i] = model3[i] - model3_th1[i]; 192 | } 193 | for(int i = 0; i < train_size; i++){ 194 | y_hat1[i] = rand()%10; 195 | y_hat2[i] = labels[i] - y_hat1[i]; 196 | } 197 | 198 | int flag1 = 0; 199 | int flag2 = -1; 200 | ConvSupport sp1; 201 | Support sp2, sp3; 202 | sp1.GetShape(FSIZE, FSIZE); 203 | sp1.Initial(); 204 | sp1.Assign(); 205 | 206 | sp2.GetShape(batch_size, dim2, dim2, dim3); 207 | sp2.Initial(); 208 | sp2.Assign(); 209 | 210 | sp3.GetShape(batch_size, dim3, dim3, dim4); 211 | sp3.Initial(); 212 | sp3.Assign(); 213 | 214 | c_metime = timestamp(); 215 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 216 | MPI_Send(images_th1, train_size*SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 217 | MPI_Send(model1_th1, FSIZE*FSIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 218 | MPI_Send(model2_th1, dim2*dim3, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 219 | MPI_Send(model3_th1, dim3*dim4, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 220 | MPI_Send(y_hat1, train_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 221 | 222 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 223 | MPI_Send(images_th2, train_size*SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 224 | MPI_Send(model1_th2, FSIZE*FSIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 225 | MPI_Send(model2_th2, dim2*dim3, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 226 | MPI_Send(model3_th2, dim3*dim4, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 227 | MPI_Send(y_hat2, train_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 228 | 229 | sp1.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 230 | sp2.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 231 | sp3.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 232 | 233 | free(images_th1); 234 | free(images_th2); 235 | free(model1_th1); 236 | free(model1_th2); 237 | free(model2_th1); 238 | free(model2_th2); 239 | free(model3_th1); 240 | free(model3_th2); 241 | free(y_hat1); 242 | free(y_hat2); 243 | sp1.Release(); 244 | sp2.Release(); 245 | sp3.Release(); 246 | float server1_offline_time, server2_offline_time; 247 | float server1_online_time, server2_online_time; 248 | MPI_Status status; 249 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 250 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 251 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 252 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 253 | all_etime = timestamp(); 254 | float all_time = all_etime - all_stime; 255 | Output out( 256 | all_time, 257 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 258 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 259 | "CNN" 260 | ); 261 | out.draw(); 262 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 263 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 264 | } 265 | else{ 266 | 267 | /* 268 | Init MPI_group: client2server, server2server 269 | */ 270 | 271 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 272 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 273 | 274 | MPI_Group client2server; 275 | MPI_Group server2server; 276 | 277 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 278 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 279 | 280 | MPI_Comm client2server_comm; 281 | MPI_Comm server2server_comm; 282 | 283 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 284 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 285 | 286 | int MPI_group_client2server_rank; 287 | int MPI_group_server2server_rank; 288 | 289 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 290 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 291 | 292 | int client_group_rank = 0, server_group_rank = 0; 293 | 294 | if(MPI_group_client2server_rank == 0){ 295 | client_group_rank = 1; 296 | } 297 | if(MPI_group_server2server_rank == 0){ 298 | server_group_rank = 1; 299 | } 300 | 301 | double offline_etime; 302 | MPI_Status status; 303 | int flag; 304 | float y_hat[train_size]; 305 | float *img = (float*)malloc(sizeof(float)*train_size*SIZE); 306 | float *model1 = (float*)malloc(sizeof(float)*FSIZE*FSIZE); 307 | float *model2 = (float*)malloc(sizeof(float)*dim2*dim3); 308 | float *model3 = (float*)malloc(sizeof(float)*dim3*dim4); 309 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 310 | MPI_Recv(img, train_size*SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 311 | MPI_Recv(model1, FSIZE*FSIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 312 | MPI_Recv(model2, dim2*dim3, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 313 | MPI_Recv(model3, dim3*dim4, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 314 | MPI_Recv(y_hat, train_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 315 | 316 | ConvTriplet layer1; 317 | Triplet layer2, layer3; 318 | layer1.GetShape(row, col, FSIZE, FSIZE, batch_size); 319 | layer1.Initial(); 320 | layer2.GetShape(batch_size, dim2, dim2, dim3); 321 | layer2.Initial(); 322 | layer3.GetShape(batch_size, dim3, dim3, dim4); 323 | layer3.Initial(); 324 | 325 | layer1.Recv(client_group_rank, client2server_comm); 326 | layer2.Recv(client_group_rank, client2server_comm); 327 | layer3.Recv(client_group_rank, client2server_comm); 328 | 329 | offline_etime = timestamp(); 330 | double online_stime, online_etime; 331 | online_stime = timestamp(); 332 | int MPI_dest; 333 | if(MPI_rank == MPI_server1){ 334 | MPI_dest = MPI_server2; 335 | } 336 | else{ 337 | MPI_dest = MPI_server1; 338 | } 339 | float batchY[batch_size]; 340 | float *batchImg = (float*)malloc(sizeof(float)*batch_size*SIZE); 341 | for(int i = 0; i+batch_size <= train_size; i+=batch_size){ 342 | SelectBatch(img, y_hat, batchImg, batchY, i, batch_size, SIZE); 343 | layer1.GetData(batchImg, model1); 344 | layer1.Rec(server_group_rank, server2server_comm); 345 | layer1.OP(flag); 346 | layer2.GetData(layer1.C, model2); 347 | layer2.Rec(server_group_rank, server2server_comm); 348 | layer2.OP(flag); 349 | layer3.GetData(layer2.C, model3); 350 | layer3.Rec(server_group_rank, server2server_comm); 351 | layer3.OP(flag); 352 | } 353 | online_etime = timestamp(); 354 | 355 | layer1.Release(); 356 | layer2.Release(); 357 | layer3.Release(); 358 | free(img); 359 | free(model1); 360 | free(model2); 361 | free(model3); 362 | free(batchImg); 363 | float online_time = online_etime-online_stime; 364 | float offline_time = offline_etime-offline_stime; 365 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 366 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 367 | if(client2server != MPI_GROUP_NULL){ 368 | MPI_Group_free(&client2server); 369 | } 370 | if(server2server != MPI_GROUP_NULL){ 371 | MPI_Group_free(&server2server); 372 | } 373 | } 374 | if(world_group != MPI_GROUP_NULL){ 375 | MPI_Group_free(&world_group); 376 | } 377 | MPI_Finalize(); 378 | return 0; 379 | } 380 | -------------------------------------------------------------------------------- /test/line.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This is the source code of Linear regression 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "../include/output.h" 24 | #include "mpi.h" 25 | using namespace std; 26 | 27 | float *labels; 28 | float *images; 29 | string imagesPath; 30 | string labelsPath; 31 | int isRand = 1; 32 | int train_size; 33 | int batch_size; 34 | int SIZE; 35 | int row; 36 | int col; 37 | int FSIZE; 38 | float alpha; 39 | 40 | int dim1 = 784; 41 | int dim2 = 784; 42 | int dim3 = 64; 43 | int dim4 = 10; 44 | int main(int argc, char **argv){ 45 | MPI_Init(&argc, &argv); 46 | int MPI_rank, MPI_size; 47 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 48 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 49 | 50 | MPI_Group world_group; 51 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 52 | 53 | double all_stime, all_etime; 54 | all_stime = timestamp(); 55 | double offline_stime; 56 | string setupFile = argv[1]; 57 | ifstream setup(setupFile); 58 | if(!setup.is_open()){ 59 | cout << "Setup not exist." << endl; 60 | return 0; 61 | } 62 | string line; 63 | while(getline(setup, line)){ 64 | stringstream para(line); 65 | string attr; 66 | para >> attr; 67 | if(attr == "--imagesPath"){ 68 | para >> imagesPath; 69 | } 70 | else if(attr == "--labelsPath"){ 71 | para >> labelsPath; 72 | } 73 | else if(attr == "--train_size"){ 74 | para >> train_size; 75 | } 76 | else if(attr == "--batch_size"){ 77 | para >> batch_size; 78 | } 79 | else if(attr == "--SIZE"){ 80 | para >> SIZE; 81 | } 82 | else if(attr == "--row"){ 83 | para >> row; 84 | } 85 | else if(attr == "--col"){ 86 | para >> col; 87 | } 88 | else if(attr == "--FSIZE"){ 89 | para >> FSIZE; 90 | } 91 | else if(attr == "--alpha"){ 92 | para >> alpha; 93 | } 94 | else{ 95 | cout << "Setup parameters error. " << attr << endl; 96 | return 0; 97 | } 98 | } 99 | setup.close(); 100 | 101 | offline_stime = timestamp(); 102 | if(MPI_rank == MPI_client){ 103 | 104 | /* 105 | Init MPI group: client2server1 clientserver2 106 | */ 107 | 108 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 109 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 110 | 111 | MPI_Group client2server1; 112 | MPI_Group client2server2; 113 | 114 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 115 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 116 | 117 | MPI_Comm client2server1_comm; 118 | MPI_Comm client2server2_comm; 119 | 120 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 121 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 122 | 123 | int MPI_group_client2server1_rank; 124 | int MPI_group_client2server2_rank; 125 | 126 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 127 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 128 | 129 | int server1_group_rank = 0, server2_group_rank = 0; 130 | 131 | if(MPI_group_client2server1_rank == 0){ 132 | server1_group_rank = 1; 133 | } 134 | if(MPI_group_client2server2_rank == 0){ 135 | server2_group_rank = 1; 136 | } 137 | 138 | 139 | if((images = (float*)malloc(sizeof(float)*train_size*SIZE)) == NULL){ 140 | cout << "Malloc error." << endl; 141 | return 0; 142 | } 143 | if((labels = (float*)malloc(sizeof(float)*train_size)) == NULL){ 144 | cout << "Malloc error." << endl; 145 | return 0; 146 | } 147 | if(isRand == 1){ 148 | Generator(images, 256, train_size*SIZE); 149 | Generator(labels, 10, train_size); 150 | }else{ 151 | read_Images(imagesPath, images, SIZE, train_size); 152 | Generator(labels, 10, train_size); 153 | } 154 | float *model1 = (float*)malloc(sizeof(float)*SIZE); 155 | for(int i = 0; i < SIZE; i++){ 156 | model1[i] = (float)rand()/RAND_MAX; 157 | } 158 | float *images_th1 = (float*)malloc(sizeof(float)*train_size*SIZE); 159 | float *images_th2 = (float*)malloc(sizeof(float)*train_size*SIZE); 160 | float *model1_th1 = (float*)malloc(sizeof(float)*SIZE); 161 | float *model1_th2 = (float*)malloc(sizeof(float)*SIZE); 162 | float *y_hat1 = (float*)malloc(sizeof(float)*train_size); 163 | float *y_hat2 = (float*)malloc(sizeof(float)*train_size); 164 | double c_mstime, c_metime; 165 | 166 | for(int i = 0; i < train_size; i++){ 167 | for(int j = 0; j < SIZE; j++){ 168 | images_th1[i*SIZE+j] = rand() % 256; 169 | images_th2[i*SIZE+j] = images[i*SIZE+j] - images_th1[i*SIZE+j]; 170 | } 171 | } 172 | 173 | for(int i = 0; i < SIZE; i++){ 174 | model1_th1[i] = (float)rand() /RAND_MAX; 175 | model1_th2[i] = model1[i] - model1_th1[i]; 176 | } 177 | for(int i = 0; i < train_size; i++){ 178 | y_hat1[i] = rand()%10; 179 | y_hat2[i] = labels[i] - y_hat1[i]; 180 | } 181 | 182 | int flag1 = 0; 183 | int flag2 = -1; 184 | c_mstime = timestamp(); 185 | Support sp1, sp2, sp3, sp4, sp5, sp6, sp7, sp8, sp9; 186 | sp1.GetShape(batch_size, SIZE, SIZE, 1); 187 | sp1.Initial(); 188 | sp1.Assign(); 189 | //cout << "here" << endl; 190 | sp2.GetShape(SIZE, batch_size, batch_size, 1); 191 | sp2.Initial(); 192 | sp2.Assign(); 193 | 194 | c_metime = timestamp(); 195 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 196 | MPI_Send(images_th1, train_size*SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 197 | MPI_Send(model1_th1, SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 198 | MPI_Send(y_hat1, train_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 199 | 200 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 201 | MPI_Send(images_th2, train_size*SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 202 | MPI_Send(model1_th2, SIZE, MPI_FLOAT,server2_group_rank, 0, client2server2_comm); 203 | MPI_Send(y_hat2, train_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 204 | 205 | sp1.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 206 | sp2.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 207 | 208 | MPI_Status status; 209 | MPI_Recv(model1_th1, SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 210 | MPI_Recv(model1_th2, SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 211 | 212 | for(int i = 0; i < SIZE; i++){ 213 | model1[i] = model1_th1[i] + model1_th2[i]; 214 | } 215 | free(images_th1); 216 | free(images_th2); 217 | free(model1_th1); 218 | free(model1_th2); 219 | free(y_hat1); 220 | free(y_hat2); 221 | free(images); 222 | free(labels); 223 | sp1.Release(); 224 | sp2.Release(); 225 | 226 | float server1_offline_time, server2_offline_time; 227 | float server1_online_time, server2_online_time; 228 | 229 | 230 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 231 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 232 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 233 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 234 | 235 | all_etime = timestamp(); 236 | float all_time = all_etime - all_stime; 237 | 238 | Output out( 239 | all_time, 240 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 241 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 242 | "Linear regression" 243 | ); 244 | out.draw(); 245 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 246 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 247 | } 248 | else{ 249 | 250 | /* 251 | Init MPI_group: client2server, server2server 252 | */ 253 | 254 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 255 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 256 | 257 | MPI_Group client2server; 258 | MPI_Group server2server; 259 | 260 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 261 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 262 | 263 | MPI_Comm client2server_comm; 264 | MPI_Comm server2server_comm; 265 | 266 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 267 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 268 | 269 | int MPI_group_client2server_rank; 270 | int MPI_group_server2server_rank; 271 | 272 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 273 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 274 | 275 | int client_group_rank = 0, server_group_rank = 0; 276 | 277 | if(MPI_group_client2server_rank == 0){ 278 | client_group_rank = 1; 279 | } 280 | if(MPI_group_server2server_rank == 0){ 281 | server_group_rank = 1; 282 | } 283 | 284 | double offline_etime; 285 | MPI_Status status; 286 | int flag; 287 | float y_hat[train_size]; 288 | float *img = (float*)malloc(sizeof(float)*train_size*SIZE); 289 | float *model1 = (float*)malloc(sizeof(float)*SIZE*50); 290 | 291 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 292 | MPI_Recv(img, train_size*SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 293 | MPI_Recv(model1, SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 294 | MPI_Recv(y_hat, train_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 295 | 296 | Triplet T1, T2; 297 | T1.GetShape(batch_size, SIZE, SIZE, 1); 298 | T1.Initial(); 299 | T2.GetShape(SIZE, batch_size, batch_size, 1); 300 | T2.Initial(); 301 | T1.Recv(client_group_rank, client2server_comm); 302 | T2.Recv(client_group_rank, client2server_comm); 303 | 304 | offline_etime = timestamp(); 305 | double online_stime, online_etime; 306 | 307 | float batchY[batch_size]; 308 | float *batchImg = (float*)malloc(sizeof(float)*batch_size*SIZE); 309 | online_stime = timestamp(); 310 | for(int i = 0; i+batch_size < train_size; i+=batch_size){ 311 | SelectBatch(img, y_hat, batchImg, batchY, i, batch_size, SIZE); 312 | T1.GetData(batchImg, model1); 313 | T1.Rec(server_group_rank, server2server_comm); 314 | T1.OP(flag); 315 | for(int j = 0; j < batch_size; j++){ 316 | T1.C[j] = batchY[j] - T1.C[j]; 317 | } 318 | T2.GetData(batchImg, T1.C); 319 | T2.Rec(server_group_rank, server2server_comm); 320 | T2.OP(flag); 321 | for(int j = 0; j < SIZE; j++){ 322 | model1[j] += alpha/batch_size*T2.C[j]; 323 | } 324 | } 325 | online_etime = timestamp(); 326 | 327 | MPI_Send(model1, SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm); 328 | 329 | T1.Release(); 330 | T2.Release(); 331 | free(img); 332 | free(model1); 333 | 334 | float online_time = online_etime-online_stime; 335 | float offline_time = offline_etime-offline_stime; 336 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 337 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 338 | if(client2server != MPI_GROUP_NULL){ 339 | MPI_Group_free(&client2server); 340 | } 341 | if(server2server != MPI_GROUP_NULL){ 342 | MPI_Group_free(&server2server); 343 | } 344 | } 345 | if(world_group != MPI_GROUP_NULL){ 346 | MPI_Group_free(&world_group); 347 | } 348 | MPI_Finalize(); 349 | return 0; 350 | } 351 | -------------------------------------------------------------------------------- /test/logistic.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This is the source code of Linear regression 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "../include/ParSecureML.h" 22 | #include "../include/read.h" 23 | #include "../include/output.h" 24 | #include "mpi.h" 25 | using namespace std; 26 | 27 | float *labels; 28 | float *images; 29 | string imagesPath; 30 | string labelsPath; 31 | int isRand = 1; 32 | int train_size; 33 | int batch_size; 34 | int SIZE; 35 | int row; 36 | int col; 37 | int FSIZE; 38 | float alpha; 39 | 40 | int dim1 = 784; 41 | int dim2 = 784; 42 | int dim3 = 64; 43 | int dim4 = 10; 44 | int main(int argc, char **argv){ 45 | MPI_Init(&argc, &argv); 46 | int MPI_rank, MPI_size; 47 | MPI_Comm_rank(MPI_COMM_WORLD, &MPI_rank); 48 | MPI_Comm_size(MPI_COMM_WORLD, &MPI_size); 49 | 50 | MPI_Group world_group; 51 | MPI_Comm_group(MPI_COMM_WORLD, &world_group); 52 | 53 | double all_stime, all_etime; 54 | all_stime = timestamp(); 55 | double offline_stime; 56 | string setupFile = argv[1]; 57 | ifstream setup(setupFile); 58 | if(!setup.is_open()){ 59 | cout << "Setup not exist." << endl; 60 | return 0; 61 | } 62 | string line; 63 | while(getline(setup, line)){ 64 | stringstream para(line); 65 | string attr; 66 | para >> attr; 67 | if(attr == "--imagesPath"){ 68 | para >> imagesPath; 69 | } 70 | else if(attr == "--labelsPath"){ 71 | para >> labelsPath; 72 | } 73 | else if(attr == "--train_size"){ 74 | para >> train_size; 75 | } 76 | else if(attr == "--batch_size"){ 77 | para >> batch_size; 78 | } 79 | else if(attr == "--SIZE"){ 80 | para >> SIZE; 81 | } 82 | else if(attr == "--row"){ 83 | para >> row; 84 | } 85 | else if(attr == "--col"){ 86 | para >> col; 87 | } 88 | else if(attr == "--FSIZE"){ 89 | para >> FSIZE; 90 | } 91 | else if(attr == "--alpha"){ 92 | para >> alpha; 93 | } 94 | else{ 95 | cout << "Setup parameters error. " << attr << endl; 96 | return 0; 97 | } 98 | } 99 | setup.close(); 100 | 101 | offline_stime = timestamp(); 102 | if(MPI_rank == MPI_client){ 103 | 104 | /* 105 | Init MPI group: client2server1 clientserver2 106 | */ 107 | 108 | const int group_client2server1[CHUNK_SIZE] = {MPI_client, MPI_server1}; 109 | const int group_client2server2[CHUNK_SIZE] = {MPI_client, MPI_server2}; 110 | 111 | MPI_Group client2server1; 112 | MPI_Group client2server2; 113 | 114 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server1, &client2server1); 115 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server2, &client2server2); 116 | 117 | MPI_Comm client2server1_comm; 118 | MPI_Comm client2server2_comm; 119 | 120 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server1, 0, &client2server1_comm); 121 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server2, 0, &client2server2_comm); 122 | 123 | int MPI_group_client2server1_rank; 124 | int MPI_group_client2server2_rank; 125 | 126 | MPI_Comm_rank(client2server1_comm, &MPI_group_client2server1_rank); 127 | MPI_Comm_rank(client2server2_comm, &MPI_group_client2server2_rank); 128 | 129 | int server1_group_rank = 0, server2_group_rank = 0; 130 | 131 | if(MPI_group_client2server1_rank == 0){ 132 | server1_group_rank = 1; 133 | } 134 | if(MPI_group_client2server2_rank == 0){ 135 | server2_group_rank = 1; 136 | } 137 | 138 | 139 | if((images = (float*)malloc(sizeof(float)*train_size*SIZE)) == NULL){ 140 | cout << "Malloc error." << endl; 141 | return 0; 142 | } 143 | if((labels = (float*)malloc(sizeof(float)*train_size)) == NULL){ 144 | cout << "Malloc error." << endl; 145 | return 0; 146 | } 147 | if(isRand == 1){ 148 | Generator(images, 256, train_size*SIZE); 149 | Generator(labels, 10, train_size); 150 | }else{ 151 | read_Images(imagesPath, images, SIZE, train_size); 152 | Generator(labels, 10, train_size); 153 | } 154 | float *model1 = (float*)malloc(sizeof(float)*SIZE); 155 | for(int i = 0; i < SIZE; i++){ 156 | model1[i] = (float)rand()/RAND_MAX; 157 | } 158 | float *images_th1 = (float*)malloc(sizeof(float)*train_size*SIZE); 159 | float *images_th2 = (float*)malloc(sizeof(float)*train_size*SIZE); 160 | float *model1_th1 = (float*)malloc(sizeof(float)*SIZE); 161 | float *model1_th2 = (float*)malloc(sizeof(float)*SIZE); 162 | float *y_hat1 = (float*)malloc(sizeof(float)*train_size); 163 | float *y_hat2 = (float*)malloc(sizeof(float)*train_size); 164 | double c_mstime, c_metime; 165 | 166 | for(int i = 0; i < train_size; i++){ 167 | for(int j = 0; j < SIZE; j++){ 168 | images_th1[i*SIZE+j] = rand() % 256; 169 | images_th2[i*SIZE+j] = images[i*SIZE+j] - images_th1[i*SIZE+j]; 170 | } 171 | } 172 | 173 | for(int i = 0; i < SIZE; i++){ 174 | model1_th1[i] = (float)rand() /RAND_MAX; 175 | model1_th2[i] = model1[i] - model1_th1[i]; 176 | } 177 | for(int i = 0; i < train_size; i++){ 178 | y_hat1[i] = rand()%10; 179 | y_hat2[i] = labels[i] - y_hat1[i]; 180 | } 181 | 182 | int flag1 = 0; 183 | int flag2 = -1; 184 | c_mstime = timestamp(); 185 | Support sp1, sp2, sp3, sp4, sp5, sp6, sp7, sp8, sp9; 186 | sp1.GetShape(batch_size, SIZE, SIZE, 1); 187 | sp1.Initial(); 188 | sp1.Assign(); 189 | //cout << "here" << endl; 190 | sp2.GetShape(SIZE, batch_size, batch_size, 1); 191 | sp2.Initial(); 192 | sp2.Assign(); 193 | 194 | c_metime = timestamp(); 195 | MPI_Send(&flag1, 1, MPI_INT, server1_group_rank, 0, client2server1_comm); 196 | MPI_Send(images_th1, train_size*SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 197 | MPI_Send(model1_th1, SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 198 | MPI_Send(y_hat1, train_size, MPI_FLOAT, server1_group_rank, 0, client2server1_comm); 199 | 200 | MPI_Send(&flag2, 1, MPI_INT, server2_group_rank, 0, client2server2_comm); 201 | MPI_Send(images_th2, train_size*SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 202 | MPI_Send(model1_th2, SIZE, MPI_FLOAT,server2_group_rank, 0, client2server2_comm); 203 | MPI_Send(y_hat2, train_size, MPI_FLOAT, server2_group_rank, 0, client2server2_comm); 204 | 205 | sp1.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 206 | sp2.Send(server1_group_rank, server2_group_rank, client2server1_comm, client2server2_comm); 207 | 208 | MPI_Status status; 209 | MPI_Recv(model1_th1, SIZE, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 210 | MPI_Recv(model1_th2, SIZE, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 211 | 212 | for(int i = 0; i < SIZE; i++){ 213 | model1[i] = model1_th1[i] + model1_th2[i]; 214 | } 215 | free(images_th1); 216 | free(images_th2); 217 | free(model1_th1); 218 | free(model1_th2); 219 | free(y_hat1); 220 | free(y_hat2); 221 | free(images); 222 | free(labels); 223 | sp1.Release(); 224 | sp2.Release(); 225 | 226 | float server1_offline_time, server2_offline_time; 227 | float server1_online_time, server2_online_time; 228 | 229 | 230 | MPI_Recv(&server1_offline_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 231 | MPI_Recv(&server2_offline_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 232 | MPI_Recv(&server1_online_time, 1, MPI_FLOAT, server1_group_rank, 0, client2server1_comm, &status); 233 | MPI_Recv(&server2_online_time, 1, MPI_FLOAT, server2_group_rank, 0, client2server2_comm, &status); 234 | 235 | all_etime = timestamp(); 236 | float all_time = all_etime - all_stime; 237 | Output out( 238 | all_time, 239 | server1_offline_time > server2_offline_time ? server1_offline_time : server2_offline_time, 240 | server1_online_time > server2_online_time ? server1_online_time : server2_online_time, 241 | "Logistic regression" 242 | ); 243 | out.draw(); 244 | if(client2server1 != MPI_GROUP_NULL) MPI_Group_free(&client2server1); 245 | if(client2server2 != MPI_GROUP_NULL) MPI_Group_free(&client2server2); 246 | } 247 | else{ 248 | 249 | /* 250 | Init MPI_group: client2server, server2server 251 | */ 252 | 253 | const int group_client2server[CHUNK_SIZE] = {MPI_client, MPI_rank}; 254 | const int group_server2server[CHUNK_SIZE] = {MPI_server1, MPI_server2}; 255 | 256 | MPI_Group client2server; 257 | MPI_Group server2server; 258 | 259 | MPI_Group_incl(world_group, CHUNK_SIZE, group_client2server, &client2server); 260 | MPI_Group_incl(world_group, CHUNK_SIZE, group_server2server, &server2server); 261 | 262 | MPI_Comm client2server_comm; 263 | MPI_Comm server2server_comm; 264 | 265 | MPI_Comm_create_group(MPI_COMM_WORLD, client2server, 0, &client2server_comm); 266 | MPI_Comm_create_group(MPI_COMM_WORLD, server2server, 0, &server2server_comm); 267 | 268 | int MPI_group_client2server_rank; 269 | int MPI_group_server2server_rank; 270 | 271 | MPI_Comm_rank(client2server_comm, &MPI_group_client2server_rank); 272 | MPI_Comm_rank(server2server_comm, &MPI_group_server2server_rank); 273 | 274 | int client_group_rank = 0, server_group_rank = 0; 275 | 276 | if(MPI_group_client2server_rank == 0){ 277 | client_group_rank = 1; 278 | } 279 | if(MPI_group_server2server_rank == 0){ 280 | server_group_rank = 1; 281 | } 282 | 283 | double offline_etime; 284 | MPI_Status status; 285 | int flag; 286 | float y_hat[train_size]; 287 | float *img = (float*)malloc(sizeof(float)*train_size*SIZE); 288 | float *model1 = (float*)malloc(sizeof(float)*SIZE*50); 289 | 290 | MPI_Recv(&flag, 1, MPI_INT, client_group_rank, 0, client2server_comm, &status); 291 | MPI_Recv(img, train_size*SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 292 | MPI_Recv(model1, SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 293 | MPI_Recv(y_hat, train_size, MPI_FLOAT, client_group_rank, 0, client2server_comm, &status); 294 | 295 | Triplet T1, T2; 296 | T1.GetShape(batch_size, SIZE, SIZE, 1); 297 | T1.Initial(); 298 | T2.GetShape(SIZE, batch_size, batch_size, 1); 299 | T2.Initial(); 300 | T1.Recv(client_group_rank, client2server_comm); 301 | T2.Recv(client_group_rank, client2server_comm); 302 | 303 | offline_etime = timestamp(); 304 | double online_stime, online_etime; 305 | 306 | float batchY[batch_size]; 307 | float *batchImg = (float*)malloc(sizeof(float)*batch_size*SIZE); 308 | online_stime = timestamp(); 309 | for(int i = 0; i+batch_size < train_size; i+=batch_size){ 310 | SelectBatch(img, y_hat, batchImg, batchY, i, batch_size, SIZE); 311 | T1.GetData(batchImg, model1); 312 | T1.Rec(server_group_rank, server2server_comm); 313 | T1.OP(flag); 314 | T1.Activation(server_group_rank, server2server_comm); 315 | for(int j = 0; j < batch_size; j++){ 316 | T1.C[j] = batchY[j] - T1.C[j]; 317 | } 318 | T2.GetData(batchImg, T1.C); 319 | T2.Rec(server_group_rank, server2server_comm); 320 | T2.OP(flag); 321 | for(int j = 0; j < SIZE; j++){ 322 | model1[j] += alpha/batch_size*T2.C[j]; 323 | } 324 | } 325 | online_etime = timestamp(); 326 | 327 | MPI_Send(model1, SIZE, MPI_FLOAT, client_group_rank, 0, client2server_comm); 328 | 329 | T1.Release(); 330 | T2.Release(); 331 | free(img); 332 | free(model1); 333 | 334 | float online_time = online_etime-online_stime; 335 | float offline_time = offline_etime-offline_stime; 336 | MPI_Send(&offline_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 337 | MPI_Send(&online_time, 1, MPI_FLOAT, client_group_rank, 0, client2server_comm); 338 | if(client2server != MPI_GROUP_NULL){ 339 | MPI_Group_free(&client2server); 340 | } 341 | if(server2server != MPI_GROUP_NULL){ 342 | MPI_Group_free(&server2server); 343 | } 344 | } 345 | if(world_group != MPI_GROUP_NULL){ 346 | MPI_Group_free(&world_group); 347 | } 348 | MPI_Finalize(); 349 | return 0; 350 | } 351 | --------------------------------------------------------------------------------