├── PBCT ├── utils │ ├── __pycache__ │ │ ├── SFS.cpython-39.pyc │ │ └── solve_loss.cpython-39.pyc │ ├── solve_loss.py │ ├── PBCT.py │ └── SFS.py ├── Offline │ ├── __pycache__ │ │ ├── SFS.cpython-39.pyc │ │ └── solve_loss.cpython-39.pyc │ ├── solve_loss.py │ ├── SFS.py │ └── PBCT_offline.py ├── Online │ ├── __pycache__ │ │ ├── SFS.cpython-39.pyc │ │ └── solve_loss.cpython-39.pyc │ ├── solve_loss.py │ ├── SFS.py │ └── PBCT_online.py ├── Unlabeled_Offline │ ├── __pycache__ │ │ ├── SFS.cpython-39.pyc │ │ └── solve_loss.cpython-39.pyc │ ├── solve_loss.py │ ├── SFS.py │ └── PBCT_offline_unlabeled.py ├── Unlabeled_Online │ ├── __pycache__ │ │ ├── SFS.cpython-39.pyc │ │ └── solve_loss.cpython-39.pyc │ ├── solve_loss.py │ ├── SFS.py │ └── PBCT_online_unlabeled.py ├── README.txt └── Data │ ├── sec_20_feature.csv │ ├── train_20_feature.csv │ └── pri_20_feature.csv ├── README.md └── LICENSE /PBCT/utils/__pycache__/SFS.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/utils/__pycache__/SFS.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Offline/__pycache__/SFS.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Offline/__pycache__/SFS.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Online/__pycache__/SFS.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Online/__pycache__/SFS.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/utils/__pycache__/solve_loss.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/utils/__pycache__/solve_loss.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Offline/__pycache__/solve_loss.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Offline/__pycache__/solve_loss.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Online/__pycache__/solve_loss.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Online/__pycache__/solve_loss.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Unlabeled_Offline/__pycache__/SFS.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Unlabeled_Offline/__pycache__/SFS.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Unlabeled_Online/__pycache__/SFS.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Unlabeled_Online/__pycache__/SFS.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Unlabeled_Offline/__pycache__/solve_loss.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Unlabeled_Offline/__pycache__/solve_loss.cpython-39.pyc -------------------------------------------------------------------------------- /PBCT/Unlabeled_Online/__pycache__/solve_loss.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ppguo/PBCT/HEAD/PBCT/Unlabeled_Online/__pycache__/solve_loss.cpython-39.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The core function of the PBCT algorithm is included in the file utils/PBCT.py. Given the labeled and unlabeled training data as well as the test data, it triggers the training of the complete-view model and parital-view models, save the model parameters in the desired paths, and return the test error measured using RMSE. An example for utilizing the PBCT algorithm is provided in the __main__ section of this file. 2 | 3 | The source dataset under /data comes from 4 | 5 | Severson, K.A., Attia, P.M., Jin, N., Perkins, N., Jiang, B., Yang, Z., Chen, M.H., Aykol, M., Herring, P.K., Fraggedakis, D., et al. (2019). Data-driven prediction of battery cycle life before capacity degradation. Nat. Energy 4, 383–391 6 | 7 | The original data can be found in [link](https://data.matr.io/1/) under the license of [CC-BY](https://creativecommons.org/licenses/by/4.0/). 8 | We extract the features according to the instruction from paper. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Nanlin Guo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PBCT/README.txt: -------------------------------------------------------------------------------- 1 | Requirements: 2 | python==3.9 3 | numpy==1.23.3 4 | pandas==1.4.4 5 | scikit-learn==1.1.2 6 | scipy==1.9.1 7 | gurobi==9.5.2 8 | 9 | 10 | --- Project 11 | - utils/ 12 | PBCT.py 13 | SFS.py 14 | solve_loss.py 15 | - Data/ 16 | train_20_feature.csv (dataset1) 17 | pri_20_feature.csv (dataset2) 18 | sec_20_feature.csv (dataset3) 19 | - Offline/ 20 | PBCT_offline.py 21 | - Online/ 22 | PBCT_online.py 23 | - Unlabeled_Offline/ 24 | PBCT_offline_unlabeled.py 25 | - Unlabeled_Online/ 26 | PBCT_online_unlabeled.py 27 | 28 | 29 | The core function of the PBCT algorithm is included in the file utils/PBCT.py. Given the labeled and unlabeled training data as well as the test data, it triggers the training of the complete-view model and parital-view models, save the model parameters in the desired paths, and return the test error measured using RMSE. An example for utilizing the PBCT algorithm is provided in the __main__ section of this file. 30 | 31 | As four scenarios have been considered in this work, we provide four scripts accordingly as follows, to obtain the corresponding experimental results. 32 | - Offline/PBCT_offline.py 33 | - Online/PBCT_online.py 34 | - Unlabeled_Offline/PBCT_offline_unlabeled.py 35 | - Unlabeled_Online/PBCT_online_unlabeled.py 36 | The impact of labeled training data size in the offline scenario can be evaluated through executing Offline/PBCT_offline.py. The impact of labeled training data size in the online scenario can be evaluated through executing Online/PBCT_online.py. The impact of unlabeled training data size in the offline scenario can be evaluated through executing Unlabeled_Offline/PBCT_offline_unlabeled.py. The impact of unlabeled training data size in the online scenario can be evaluated through executing Unlabeled_Online/PBCT_online_unlabeled.py. The results of the baseline methods considered in these scenarios can also be obtained through executing the corresponding scripts. 37 | 38 | 39 | -------------------------------------------------------------------------------- /PBCT/Offline/solve_loss.py: -------------------------------------------------------------------------------- 1 | import gurobipy as gp 2 | from gurobipy import GRB 3 | from gurobipy import quicksum as qsum 4 | import numpy as np 5 | 6 | def solve_loss(y,XL,ZL,XU,ZU,l1,l2,l3,l4,l5): 7 | N = XL.shape[0] 8 | L = XU.shape[0] 9 | L2 = ZU.shape[0] 10 | M = XL.shape[1] 11 | P = ZL.shape[1] 12 | 13 | alpha = [] 14 | beta = [] 15 | m = gp.Model("loss_model_with_l1_prior") 16 | for i in range(M): 17 | alpha.append(m.addVar(name="alpha%d" % i,vtype = GRB.CONTINUOUS,lb=-10)) 18 | for i in range(P): 19 | beta.append(m.addVar(name = "beta%d" % i,vtype = GRB.CONTINUOUS,lb=-10)) 20 | 21 | h0 = [m.addVar(name = "h0%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 22 | for i in range(N): 23 | m.addConstr(h0[i] == y[i]-qsum(XL[i][j]*alpha[j] for j in range(M))) 24 | if l1>=0: 25 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 26 | else: 27 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 28 | m.addConstr(sub1==l1*qsum(h0[i]**2 for i in range(N))) 29 | 30 | h2 = [m.addVar(name = "h2%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 31 | for i in range(N): 32 | m.addConstr(h2[i] == y[i]-qsum(ZL[i][j]*beta[j] for j in range(P))) 33 | if l2>=0: 34 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 35 | else: 36 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 37 | m.addConstr(sub2==l2*qsum(h2[i]**2 for i in range(N))) 38 | 39 | h6 = [m.addVar(name = "h6%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 40 | 41 | for i in range(N): 42 | m.addConstr(h6[i]==qsum(ZL[i][j]*beta[j] for j in range(P))-qsum(XL[i][j]*alpha[j] for j in range(M))) 43 | if l3>=0: 44 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 45 | else: 46 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 47 | m.addConstr(sub3==l3*qsum(h6[i]**2 for i in range(N))) 48 | 49 | h9 = [m.addVar(name = "h9%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(L)] 50 | 51 | for i in range(L): 52 | m.addConstr(h9[i]==qsum(ZU[i][j]*beta[j] for j in range(P))-qsum(XU[i][j]*alpha[j] for j in range(M))) 53 | if l4>=0: 54 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 55 | else: 56 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 57 | m.addConstr(sub4==l4*qsum(h9[i]**2 for i in range(L))) 58 | 59 | l1_norm = [m.addVar(name = "l1%d" % i,vtype = GRB.CONTINUOUS,lb=0) for i in range(M)] 60 | for i in range(M): 61 | m.addConstr(alpha[i]<=l1_norm[i]) 62 | m.addConstr(-1*alpha[i]<=l1_norm[i]) 63 | #for i in range(P): 64 | # m.addConstr(beta[i]<=l1_norm[i+M]) 65 | # m.addConstr(-1*beta[i]<=l1_norm[i+M]) 66 | if l5>=0: 67 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 68 | else: 69 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 70 | #sub5 = m.addVar(vtype = GRB.CONTINUOUS) 71 | m.addConstr(sub5==l5*qsum(l1_norm[i] for i in range(M))) 72 | 73 | m.setObjective(sub1+sub2+sub3+sub4+sub5, GRB.MINIMIZE) 74 | m.Params.NonConvex = 2 75 | m.setParam('MIPGap',0.01) 76 | m.setParam('TimeLimit', 20) 77 | #m.setParam('Threads',16) 78 | 79 | 80 | m.optimize() 81 | #m.computeIIS() 82 | #m.write('my_iis.ilp') 83 | alpha_final = [] 84 | beta_final = [] 85 | for i in range(M): 86 | alpha_final.append(alpha[i].x) 87 | if i=0: 25 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 26 | else: 27 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 28 | m.addConstr(sub1==l1*qsum(h0[i]**2 for i in range(N))) 29 | 30 | h2 = [m.addVar(name = "h2%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 31 | for i in range(N): 32 | m.addConstr(h2[i] == y[i]-qsum(ZL[i][j]*beta[j] for j in range(P))) 33 | if l2>=0: 34 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 35 | else: 36 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 37 | m.addConstr(sub2==l2*qsum(h2[i]**2 for i in range(N))) 38 | 39 | h6 = [m.addVar(name = "h6%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 40 | 41 | for i in range(N): 42 | m.addConstr(h6[i]==qsum(ZL[i][j]*beta[j] for j in range(P))-qsum(XL[i][j]*alpha[j] for j in range(M))) 43 | if l3>=0: 44 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 45 | else: 46 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 47 | m.addConstr(sub3==l3*qsum(h6[i]**2 for i in range(N))) 48 | 49 | h9 = [m.addVar(name = "h9%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(L)] 50 | 51 | for i in range(L): 52 | m.addConstr(h9[i]==qsum(ZU[i][j]*beta[j] for j in range(P))-qsum(XU[i][j]*alpha[j] for j in range(M))) 53 | if l4>=0: 54 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 55 | else: 56 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 57 | m.addConstr(sub4==l4*qsum(h9[i]**2 for i in range(L))) 58 | 59 | l1_norm = [m.addVar(name = "l1%d" % i,vtype = GRB.CONTINUOUS,lb=0) for i in range(M)] 60 | for i in range(M): 61 | m.addConstr(alpha[i]<=l1_norm[i]) 62 | m.addConstr(-1*alpha[i]<=l1_norm[i]) 63 | #for i in range(P): 64 | # m.addConstr(beta[i]<=l1_norm[i+M]) 65 | # m.addConstr(-1*beta[i]<=l1_norm[i+M]) 66 | if l5>=0: 67 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 68 | else: 69 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 70 | #sub5 = m.addVar(vtype = GRB.CONTINUOUS) 71 | m.addConstr(sub5==l5*qsum(l1_norm[i] for i in range(M))) 72 | 73 | m.setObjective(sub1+sub2+sub3+sub4+sub5, GRB.MINIMIZE) 74 | m.Params.NonConvex = 2 75 | m.setParam('MIPGap',0.01) 76 | m.setParam('TimeLimit', 20) 77 | #m.setParam('Threads',16) 78 | 79 | 80 | m.optimize() 81 | #m.computeIIS() 82 | #m.write('my_iis.ilp') 83 | alpha_final = [] 84 | beta_final = [] 85 | for i in range(M): 86 | alpha_final.append(alpha[i].x) 87 | if i=0: 25 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 26 | else: 27 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 28 | m.addConstr(sub1==l1*qsum(h0[i]**2 for i in range(N))) 29 | 30 | h2 = [m.addVar(name = "h2%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 31 | for i in range(N): 32 | m.addConstr(h2[i] == y[i]-qsum(ZL[i][j]*beta[j] for j in range(P))) 33 | if l2>=0: 34 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 35 | else: 36 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 37 | m.addConstr(sub2==l2*qsum(h2[i]**2 for i in range(N))) 38 | 39 | h6 = [m.addVar(name = "h6%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 40 | 41 | for i in range(N): 42 | m.addConstr(h6[i]==qsum(ZL[i][j]*beta[j] for j in range(P))-qsum(XL[i][j]*alpha[j] for j in range(M))) 43 | if l3>=0: 44 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 45 | else: 46 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 47 | m.addConstr(sub3==l3*qsum(h6[i]**2 for i in range(N))) 48 | 49 | h9 = [m.addVar(name = "h9%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(L)] 50 | 51 | for i in range(L): 52 | m.addConstr(h9[i]==qsum(ZU[i][j]*beta[j] for j in range(P))-qsum(XU[i][j]*alpha[j] for j in range(M))) 53 | if l4>=0: 54 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 55 | else: 56 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 57 | m.addConstr(sub4==l4*qsum(h9[i]**2 for i in range(L))) 58 | 59 | l1_norm = [m.addVar(name = "l1%d" % i,vtype = GRB.CONTINUOUS,lb=0) for i in range(M)] 60 | for i in range(M): 61 | m.addConstr(alpha[i]<=l1_norm[i]) 62 | m.addConstr(-1*alpha[i]<=l1_norm[i]) 63 | #for i in range(P): 64 | # m.addConstr(beta[i]<=l1_norm[i+M]) 65 | # m.addConstr(-1*beta[i]<=l1_norm[i+M]) 66 | if l5>=0: 67 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 68 | else: 69 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 70 | #sub5 = m.addVar(vtype = GRB.CONTINUOUS) 71 | m.addConstr(sub5==l5*qsum(l1_norm[i] for i in range(M))) 72 | 73 | m.setObjective(sub1+sub2+sub3+sub4+sub5, GRB.MINIMIZE) 74 | m.Params.NonConvex = 2 75 | m.setParam('MIPGap',0.01) 76 | m.setParam('TimeLimit', 20) 77 | #m.setParam('Threads',16) 78 | 79 | 80 | m.optimize() 81 | #m.computeIIS() 82 | #m.write('my_iis.ilp') 83 | alpha_final = [] 84 | beta_final = [] 85 | for i in range(M): 86 | alpha_final.append(alpha[i].x) 87 | if i=0: 25 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 26 | else: 27 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 28 | m.addConstr(sub1==l1*qsum(h0[i]**2 for i in range(N))) 29 | 30 | h2 = [m.addVar(name = "h2%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 31 | for i in range(N): 32 | m.addConstr(h2[i] == y[i]-qsum(ZL[i][j]*beta[j] for j in range(P))) 33 | if l2>=0: 34 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 35 | else: 36 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 37 | m.addConstr(sub2==l2*qsum(h2[i]**2 for i in range(N))) 38 | 39 | h6 = [m.addVar(name = "h6%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 40 | 41 | for i in range(N): 42 | m.addConstr(h6[i]==qsum(ZL[i][j]*beta[j] for j in range(P))-qsum(XL[i][j]*alpha[j] for j in range(M))) 43 | if l3>=0: 44 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 45 | else: 46 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 47 | m.addConstr(sub3==l3*qsum(h6[i]**2 for i in range(N))) 48 | 49 | h9 = [m.addVar(name = "h9%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(L)] 50 | 51 | for i in range(L): 52 | m.addConstr(h9[i]==qsum(ZU[i][j]*beta[j] for j in range(P))-qsum(XU[i][j]*alpha[j] for j in range(M))) 53 | if l4>=0: 54 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 55 | else: 56 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 57 | m.addConstr(sub4==l4*qsum(h9[i]**2 for i in range(L))) 58 | 59 | l1_norm = [m.addVar(name = "l1%d" % i,vtype = GRB.CONTINUOUS,lb=0) for i in range(M)] 60 | for i in range(M): 61 | m.addConstr(alpha[i]<=l1_norm[i]) 62 | m.addConstr(-1*alpha[i]<=l1_norm[i]) 63 | #for i in range(P): 64 | # m.addConstr(beta[i]<=l1_norm[i+M]) 65 | # m.addConstr(-1*beta[i]<=l1_norm[i+M]) 66 | if l5>=0: 67 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 68 | else: 69 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 70 | #sub5 = m.addVar(vtype = GRB.CONTINUOUS) 71 | m.addConstr(sub5==l5*qsum(l1_norm[i] for i in range(M))) 72 | 73 | m.setObjective(sub1+sub2+sub3+sub4+sub5, GRB.MINIMIZE) 74 | m.Params.NonConvex = 2 75 | m.setParam('MIPGap',0.01) 76 | m.setParam('TimeLimit', 20) 77 | #m.setParam('Threads',16) 78 | 79 | 80 | m.optimize() 81 | #m.computeIIS() 82 | #m.write('my_iis.ilp') 83 | alpha_final = [] 84 | beta_final = [] 85 | for i in range(M): 86 | alpha_final.append(alpha[i].x) 87 | if i=0: 25 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 26 | else: 27 | sub1 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 28 | m.addConstr(sub1==l1*qsum(h0[i]**2 for i in range(N))) 29 | 30 | h2 = [m.addVar(name = "h2%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 31 | for i in range(N): 32 | m.addConstr(h2[i] == y[i]-qsum(ZL[i][j]*beta[j] for j in range(P))) 33 | if l2>=0: 34 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 35 | else: 36 | sub2 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 37 | m.addConstr(sub2==l2*qsum(h2[i]**2 for i in range(N))) 38 | 39 | h6 = [m.addVar(name = "h6%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(N)] 40 | 41 | for i in range(N): 42 | m.addConstr(h6[i]==qsum(ZL[i][j]*beta[j] for j in range(P))-qsum(XL[i][j]*alpha[j] for j in range(M))) 43 | if l3>=0: 44 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 45 | else: 46 | sub3 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 47 | m.addConstr(sub3==l3*qsum(h6[i]**2 for i in range(N))) 48 | 49 | h9 = [m.addVar(name = "h9%d" % i,vtype = GRB.CONTINUOUS,lb=-100) for i in range(L)] 50 | 51 | for i in range(L): 52 | m.addConstr(h9[i]==qsum(ZU[i][j]*beta[j] for j in range(P))-qsum(XU[i][j]*alpha[j] for j in range(M))) 53 | if l4>=0: 54 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 55 | else: 56 | sub4 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 57 | m.addConstr(sub4==l4*qsum(h9[i]**2 for i in range(L))) 58 | 59 | l1_norm = [m.addVar(name = "l1%d" % i,vtype = GRB.CONTINUOUS,lb=0) for i in range(M)] 60 | for i in range(M): 61 | m.addConstr(alpha[i]<=l1_norm[i]) 62 | m.addConstr(-1*alpha[i]<=l1_norm[i]) 63 | #for i in range(P): 64 | # m.addConstr(beta[i]<=l1_norm[i+M]) 65 | # m.addConstr(-1*beta[i]<=l1_norm[i+M]) 66 | if l5>=0: 67 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,lb = 0) 68 | else: 69 | sub5 = m.addVar(vtype = GRB.CONTINUOUS,ub = 0) 70 | #sub5 = m.addVar(vtype = GRB.CONTINUOUS) 71 | m.addConstr(sub5==l5*qsum(l1_norm[i] for i in range(M))) 72 | 73 | m.setObjective(sub1+sub2+sub3+sub4+sub5, GRB.MINIMIZE) 74 | m.Params.NonConvex = 2 75 | m.setParam('MIPGap',0.01) 76 | m.setParam('TimeLimit', 20) 77 | #m.setParam('Threads',16) 78 | 79 | 80 | m.optimize() 81 | #m.computeIIS() 82 | #m.write('my_iis.ilp') 83 | alpha_final = [] 84 | beta_final = [] 85 | for i in range(M): 86 | alpha_final.append(alpha[i].x) 87 | if i0: 33 | if error_list[min_index]>curr_error: 34 | return [Feature_set,curr_error] 35 | curr_error = error_list[min_index] 36 | Feature_set.append(Feature_candidates[min_index]) 37 | Feature_candidates = Feature_candidates.delete(min_index) 38 | #print(Feature_candidates) 39 | return [Feature_set,curr_error] 40 | 41 | 42 | def Sequential_Forward_Selection_dc(Features,labeled_X,labled_Y,R): 43 | Feature_set = ['dc'] 44 | Feature_candidates = copy.deepcopy(Features) 45 | 46 | cv = LeaveOneOut() 47 | model = LinearRegression(fit_intercept=False) 48 | for i in range(R): 49 | error_list = [] 50 | for tmp_Feature in Feature_candidates: 51 | tmp_Feature_set = copy.deepcopy(Feature_set) 52 | tmp_Feature_set.append(tmp_Feature) 53 | tmp_X = labeled_X[tmp_Feature_set] 54 | #print(tmp_X) 55 | #use LOOCV to evaluate model 56 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 57 | cv=cv, n_jobs=-1) 58 | #print(scores) 59 | error_list.append(mean(absolute(scores))) 60 | 61 | min_index = np.argmin(np.array(error_list)) 62 | #print(min_index) 63 | if i>0: 64 | if error_list[min_index]>curr_error: 65 | return [Feature_set,curr_error] 66 | curr_error = error_list[min_index] 67 | Feature_set.append(Feature_candidates[min_index]) 68 | Feature_candidates = Feature_candidates.delete(min_index) 69 | #print(Feature_candidates) 70 | return [Feature_set,curr_error] 71 | 72 | 73 | 74 | 75 | def Sequential_Forward_Selection_corr(Features,labeled_X,labled_Y,random_index,L,R): 76 | """ 77 | The input should be original Labeled X, y 78 | """ 79 | 80 | 81 | Feature_set = [] 82 | Feature_candidates = copy.deepcopy(Features) 83 | 84 | for i in range(R): 85 | error_list = [] 86 | for tmp_Feature in Feature_candidates: 87 | tmp_Feature_set = copy.deepcopy(Feature_set) 88 | tmp_Feature_set.append(tmp_Feature) 89 | tmp_X_all = labeled_X[tmp_Feature_set] 90 | #print(tmp_X) 91 | #use LOOCV to evaluate model 92 | LOO_list = [] 93 | for j in range(L): 94 | model = LinearRegression(fit_intercept=False) 95 | predict_x = tmp_X_all.iloc[j] 96 | tmp_X = tmp_X_all.drop(random_index[j]) 97 | predict_y = labled_Y.iloc[j] 98 | tmp_Y = labled_Y.drop(random_index[j]) 99 | print(tmp_X) 100 | mean_labled_x = tmp_X.mean() 101 | print(mean_labled_x) 102 | std_labled_x = tmp_X.std() 103 | mean_labled_y = tmp_Y.mean() 104 | std_labled_y = tmp_Y.std() 105 | ##Normalize## 106 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 107 | print(X_train_tmp) 108 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 109 | predict_x = (predict_x-mean_labled_x)/std_labled_x 110 | 111 | 112 | #print(tmp_X.to_numpy()) 113 | #print(tmp_Y.to_numpy()) 114 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 115 | #print('alpha',alpha) 116 | #print('beta',beta) 117 | #print(predict_x) 118 | alpha_y = model.predict([predict_x]) 119 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 120 | print('alpha_y',real_alpha_y[0]) 121 | 122 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 123 | #print(tmp_error) 124 | tmp_error_square = tmp_error * tmp_error 125 | LOO_list.append(tmp_error_square) 126 | print(LOO_list) 127 | error_list.append(np.mean(LOO_list)) 128 | 129 | 130 | min_index = np.argmin(np.array(error_list)) 131 | #print(min_index) 132 | if i>0: 133 | if error_list[min_index]>curr_error: 134 | return [Feature_set,curr_error] 135 | curr_error = error_list[min_index] 136 | Feature_set.append(Feature_candidates[min_index]) 137 | Feature_candidates = Feature_candidates.delete(min_index) 138 | #print(Feature_candidates) 139 | return [Feature_set,curr_error] 140 | 141 | 142 | def Sequential_Forward_Selection_corr_test(Features,labeled_X,labled_Y,random_index,L,R): 143 | """ 144 | The input should be original Labeled X, y 145 | """ 146 | 147 | 148 | Feature_set = [] 149 | Feature_candidates = copy.deepcopy(Features) 150 | 151 | for i in range(R): 152 | error_list = [] 153 | err_nor_list = [] 154 | for tmp_Feature in Feature_candidates: 155 | tmp_Feature_set = copy.deepcopy(Feature_set) 156 | tmp_Feature_set.append(tmp_Feature) 157 | tmp_X_all = labeled_X[tmp_Feature_set] 158 | #print(tmp_X) 159 | #use LOOCV to evaluate model 160 | LOO_list = [] 161 | LOO_nor_list = [] 162 | for j in range(L): 163 | model = LinearRegression(fit_intercept=False) 164 | predict_x = tmp_X_all.iloc[j] 165 | tmp_X = tmp_X_all.drop(random_index[j]) 166 | predict_y = labled_Y.iloc[j] 167 | tmp_Y = labled_Y.drop(random_index[j]) 168 | print(tmp_X) 169 | mean_labled_x = tmp_X.mean() 170 | print(mean_labled_x) 171 | std_labled_x = tmp_X.std() 172 | mean_labled_y = tmp_Y.mean() 173 | std_labled_y = tmp_Y.std() 174 | ##Normalize## 175 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 176 | print(X_train_tmp) 177 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 178 | predict_x = (predict_x-mean_labled_x)/std_labled_x 179 | predict_y_nor = (predict_y-mean_labled_y)/std_labled_y 180 | 181 | 182 | #print(tmp_X.to_numpy()) 183 | #print(tmp_Y.to_numpy()) 184 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 185 | #print('alpha',alpha) 186 | #print('beta',beta) 187 | #print(predict_x) 188 | alpha_y = model.predict([predict_x]) 189 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 190 | print('alpha_y',real_alpha_y[0]) 191 | 192 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 193 | tmp_error_nor = alpha_y[0]-predict_y_nor.to_numpy()[0] 194 | #print(tmp_error) 195 | tmp_error_square = tmp_error * tmp_error 196 | tmp_err_nor_square = tmp_error_nor * tmp_error_nor 197 | LOO_list.append(tmp_error_square) 198 | LOO_nor_list.append(tmp_err_nor_square) 199 | print(LOO_list) 200 | error_list.append(np.mean(LOO_list)) 201 | err_nor_list.append(np.mean(LOO_nor_list)) 202 | 203 | 204 | min_index = np.argmin(np.array(error_list)) 205 | #print(min_index) 206 | if i>0: 207 | if error_list[min_index]>curr_error: 208 | return [Feature_set,curr_error_nor] 209 | curr_error = error_list[min_index] 210 | curr_error_nor = err_nor_list[min_index] 211 | Feature_set.append(Feature_candidates[min_index]) 212 | Feature_candidates = Feature_candidates.delete(min_index) 213 | #print(Feature_candidates) 214 | return [Feature_set,curr_error_nor] 215 | 216 | if __name__=="__main__": 217 | data_samples = pd.read_csv('train_20_feature.csv',index_col=FALSE) 218 | data_shape = data_samples.shape 219 | data_columnslable_x = data_samples.columns[:-2] 220 | data_columnslable_y = data_samples.columns[-1:] 221 | print(data_columnslable_x) 222 | print(data_columnslable_y) 223 | ##split the labeled and unlabeled data 224 | L = 5 225 | random_index = [32, 27, 29, 7, 20, 39, 16, 18, 24, 23, 11, 33, 10, 40, 5, 37, 2, 25, 34, 6, 36, 1, 21, 14, 9, 19, 13, 0, 12, 22, 35, 17, 3, 31, 4, 38, 28, 26, 30, 8, 15] 226 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 227 | 228 | 229 | data_labled_x = data_labled[data_columnslable_x] 230 | data_labled_y = data_labled[data_columnslable_y] 231 | #### 232 | Partial_Feature = Sequential_Forward_Selection_corr(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 233 | 234 | print(Partial_Feature[0],Partial_Feature[1]) 235 | -------------------------------------------------------------------------------- /PBCT/utils/SFS.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | from sklearn.model_selection import LeaveOneOut,cross_val_score 3 | from sklearn.linear_model import LinearRegression 4 | import numpy as np 5 | from numpy import mean,absolute,sqrt 6 | import copy 7 | import math 8 | import pandas as pd 9 | import random 10 | 11 | def Sequential_Forward_Selection(Features,labeled_X,labled_Y,R): 12 | Feature_set = [] 13 | Feature_candidates = copy.deepcopy(Features) 14 | 15 | cv = LeaveOneOut() 16 | model = LinearRegression(fit_intercept=False) 17 | for i in range(R): 18 | error_list = [] 19 | for tmp_Feature in Feature_candidates: 20 | tmp_Feature_set = copy.deepcopy(Feature_set) 21 | tmp_Feature_set.append(tmp_Feature) 22 | tmp_X = labeled_X[tmp_Feature_set] 23 | #print(tmp_X) 24 | #use LOOCV to evaluate model 25 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 26 | cv=cv, n_jobs=-1) 27 | #print(scores) 28 | error_list.append(mean(absolute(scores))) 29 | 30 | min_index = np.argmin(np.array(error_list)) 31 | #print(min_index) 32 | if i>0: 33 | if error_list[min_index]>curr_error: 34 | return [Feature_set,curr_error] 35 | curr_error = error_list[min_index] 36 | Feature_set.append(Feature_candidates[min_index]) 37 | Feature_candidates = Feature_candidates.delete(min_index) 38 | #print(Feature_candidates) 39 | return [Feature_set,curr_error] 40 | 41 | 42 | def Sequential_Forward_Selection_dc(Features,labeled_X,labled_Y,R): 43 | Feature_set = ['dc'] 44 | Feature_candidates = copy.deepcopy(Features) 45 | 46 | cv = LeaveOneOut() 47 | model = LinearRegression(fit_intercept=False) 48 | for i in range(R): 49 | error_list = [] 50 | for tmp_Feature in Feature_candidates: 51 | tmp_Feature_set = copy.deepcopy(Feature_set) 52 | tmp_Feature_set.append(tmp_Feature) 53 | tmp_X = labeled_X[tmp_Feature_set] 54 | #print(tmp_X) 55 | #use LOOCV to evaluate model 56 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 57 | cv=cv, n_jobs=-1) 58 | #print(scores) 59 | error_list.append(mean(absolute(scores))) 60 | 61 | min_index = np.argmin(np.array(error_list)) 62 | #print(min_index) 63 | if i>0: 64 | if error_list[min_index]>curr_error: 65 | return [Feature_set,curr_error] 66 | curr_error = error_list[min_index] 67 | Feature_set.append(Feature_candidates[min_index]) 68 | Feature_candidates = Feature_candidates.delete(min_index) 69 | #print(Feature_candidates) 70 | return [Feature_set,curr_error] 71 | 72 | 73 | 74 | 75 | def Sequential_Forward_Selection_corr(Features,labeled_X,labled_Y,random_index,L,R): 76 | """ 77 | The input should be original Labeled X, y 78 | """ 79 | 80 | 81 | Feature_set = [] 82 | Feature_candidates = copy.deepcopy(Features) 83 | 84 | for i in range(R): 85 | error_list = [] 86 | for tmp_Feature in Feature_candidates: 87 | tmp_Feature_set = copy.deepcopy(Feature_set) 88 | tmp_Feature_set.append(tmp_Feature) 89 | tmp_X_all = labeled_X[tmp_Feature_set] 90 | #print(tmp_X) 91 | #use LOOCV to evaluate model 92 | LOO_list = [] 93 | for j in range(L): 94 | model = LinearRegression(fit_intercept=False) 95 | predict_x = tmp_X_all.iloc[j] 96 | tmp_X = tmp_X_all.drop(random_index[j]) 97 | predict_y = labled_Y.iloc[j] 98 | tmp_Y = labled_Y.drop(random_index[j]) 99 | print(tmp_X) 100 | mean_labled_x = tmp_X.mean() 101 | print(mean_labled_x) 102 | std_labled_x = tmp_X.std() 103 | mean_labled_y = tmp_Y.mean() 104 | std_labled_y = tmp_Y.std() 105 | ##Normalize## 106 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 107 | print(X_train_tmp) 108 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 109 | predict_x = (predict_x-mean_labled_x)/std_labled_x 110 | 111 | 112 | #print(tmp_X.to_numpy()) 113 | #print(tmp_Y.to_numpy()) 114 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 115 | #print('alpha',alpha) 116 | #print('beta',beta) 117 | #print(predict_x) 118 | alpha_y = model.predict([predict_x]) 119 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 120 | print('alpha_y',real_alpha_y[0]) 121 | 122 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 123 | #print(tmp_error) 124 | tmp_error_square = tmp_error * tmp_error 125 | LOO_list.append(tmp_error_square) 126 | print(LOO_list) 127 | error_list.append(np.mean(LOO_list)) 128 | 129 | 130 | min_index = np.argmin(np.array(error_list)) 131 | #print(min_index) 132 | if i>0: 133 | if error_list[min_index]>curr_error: 134 | return [Feature_set,curr_error] 135 | curr_error = error_list[min_index] 136 | Feature_set.append(Feature_candidates[min_index]) 137 | Feature_candidates = Feature_candidates.delete(min_index) 138 | #print(Feature_candidates) 139 | return [Feature_set,curr_error] 140 | 141 | 142 | def Sequential_Forward_Selection_corr_test(Features,labeled_X,labled_Y,random_index,L,R): 143 | """ 144 | The input should be original Labeled X, y 145 | """ 146 | 147 | 148 | Feature_set = [] 149 | Feature_candidates = copy.deepcopy(Features) 150 | 151 | for i in range(R): 152 | error_list = [] 153 | err_nor_list = [] 154 | for tmp_Feature in Feature_candidates: 155 | tmp_Feature_set = copy.deepcopy(Feature_set) 156 | tmp_Feature_set.append(tmp_Feature) 157 | tmp_X_all = labeled_X[tmp_Feature_set] 158 | #print(tmp_X) 159 | #use LOOCV to evaluate model 160 | LOO_list = [] 161 | LOO_nor_list = [] 162 | for j in range(L): 163 | model = LinearRegression(fit_intercept=False) 164 | predict_x = tmp_X_all.iloc[j] 165 | tmp_X = tmp_X_all.drop(random_index[j]) 166 | predict_y = labled_Y.iloc[j] 167 | tmp_Y = labled_Y.drop(random_index[j]) 168 | print(tmp_X) 169 | mean_labled_x = tmp_X.mean() 170 | print(mean_labled_x) 171 | std_labled_x = tmp_X.std() 172 | mean_labled_y = tmp_Y.mean() 173 | std_labled_y = tmp_Y.std() 174 | ##Normalize## 175 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 176 | print(X_train_tmp) 177 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 178 | predict_x = (predict_x-mean_labled_x)/std_labled_x 179 | predict_y_nor = (predict_y-mean_labled_y)/std_labled_y 180 | 181 | 182 | #print(tmp_X.to_numpy()) 183 | #print(tmp_Y.to_numpy()) 184 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 185 | #print('alpha',alpha) 186 | #print('beta',beta) 187 | #print(predict_x) 188 | alpha_y = model.predict([predict_x]) 189 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 190 | print('alpha_y',real_alpha_y[0]) 191 | 192 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 193 | tmp_error_nor = alpha_y[0]-predict_y_nor.to_numpy()[0] 194 | #print(tmp_error) 195 | tmp_error_square = tmp_error * tmp_error 196 | tmp_err_nor_square = tmp_error_nor * tmp_error_nor 197 | LOO_list.append(tmp_error_square) 198 | LOO_nor_list.append(tmp_err_nor_square) 199 | print(LOO_list) 200 | error_list.append(np.mean(LOO_list)) 201 | err_nor_list.append(np.mean(LOO_nor_list)) 202 | 203 | 204 | min_index = np.argmin(np.array(error_list)) 205 | #print(min_index) 206 | if i>0: 207 | if error_list[min_index]>curr_error: 208 | return [Feature_set,curr_error_nor] 209 | curr_error = error_list[min_index] 210 | curr_error_nor = err_nor_list[min_index] 211 | Feature_set.append(Feature_candidates[min_index]) 212 | Feature_candidates = Feature_candidates.delete(min_index) 213 | #print(Feature_candidates) 214 | return [Feature_set,curr_error_nor] 215 | 216 | if __name__=="__main__": 217 | data_samples = pd.read_csv('train_20_feature.csv',index_col=FALSE) 218 | data_shape = data_samples.shape 219 | data_columnslable_x = data_samples.columns[:-2] 220 | data_columnslable_y = data_samples.columns[-1:] 221 | print(data_columnslable_x) 222 | print(data_columnslable_y) 223 | ##split the labeled and unlabeled data 224 | L = 5 225 | random_index = [32, 27, 29, 7, 20, 39, 16, 18, 24, 23, 11, 33, 10, 40, 5, 37, 2, 25, 34, 6, 36, 1, 21, 14, 9, 19, 13, 0, 12, 22, 35, 17, 3, 31, 4, 38, 28, 26, 30, 8, 15] 226 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 227 | 228 | 229 | data_labled_x = data_labled[data_columnslable_x] 230 | data_labled_y = data_labled[data_columnslable_y] 231 | #### 232 | Partial_Feature = Sequential_Forward_Selection_corr(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 233 | 234 | print(Partial_Feature[0],Partial_Feature[1]) 235 | -------------------------------------------------------------------------------- /PBCT/Offline/SFS.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | from sklearn.model_selection import LeaveOneOut,cross_val_score 3 | from sklearn.linear_model import LinearRegression 4 | import numpy as np 5 | from numpy import mean,absolute,sqrt 6 | import copy 7 | import math 8 | import pandas as pd 9 | import random 10 | 11 | def Sequential_Forward_Selection(Features,labeled_X,labled_Y,R): 12 | Feature_set = [] 13 | Feature_candidates = copy.deepcopy(Features) 14 | 15 | cv = LeaveOneOut() 16 | model = LinearRegression(fit_intercept=False) 17 | for i in range(R): 18 | error_list = [] 19 | for tmp_Feature in Feature_candidates: 20 | tmp_Feature_set = copy.deepcopy(Feature_set) 21 | tmp_Feature_set.append(tmp_Feature) 22 | tmp_X = labeled_X[tmp_Feature_set] 23 | #print(tmp_X) 24 | #use LOOCV to evaluate model 25 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 26 | cv=cv, n_jobs=-1) 27 | #print(scores) 28 | error_list.append(mean(absolute(scores))) 29 | 30 | min_index = np.argmin(np.array(error_list)) 31 | #print(min_index) 32 | if i>0: 33 | if error_list[min_index]>curr_error: 34 | return [Feature_set,curr_error] 35 | curr_error = error_list[min_index] 36 | Feature_set.append(Feature_candidates[min_index]) 37 | Feature_candidates = Feature_candidates.delete(min_index) 38 | #print(Feature_candidates) 39 | return [Feature_set,curr_error] 40 | 41 | 42 | def Sequential_Forward_Selection_dc(Features,labeled_X,labled_Y,R): 43 | Feature_set = ['dc'] 44 | Feature_candidates = copy.deepcopy(Features) 45 | 46 | cv = LeaveOneOut() 47 | model = LinearRegression(fit_intercept=False) 48 | for i in range(R): 49 | error_list = [] 50 | for tmp_Feature in Feature_candidates: 51 | tmp_Feature_set = copy.deepcopy(Feature_set) 52 | tmp_Feature_set.append(tmp_Feature) 53 | tmp_X = labeled_X[tmp_Feature_set] 54 | #print(tmp_X) 55 | #use LOOCV to evaluate model 56 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 57 | cv=cv, n_jobs=-1) 58 | #print(scores) 59 | error_list.append(mean(absolute(scores))) 60 | 61 | min_index = np.argmin(np.array(error_list)) 62 | #print(min_index) 63 | if i>0: 64 | if error_list[min_index]>curr_error: 65 | return [Feature_set,curr_error] 66 | curr_error = error_list[min_index] 67 | Feature_set.append(Feature_candidates[min_index]) 68 | Feature_candidates = Feature_candidates.delete(min_index) 69 | #print(Feature_candidates) 70 | return [Feature_set,curr_error] 71 | 72 | 73 | 74 | 75 | def Sequential_Forward_Selection_corr(Features,labeled_X,labled_Y,random_index,L,R): 76 | """ 77 | The input should be original Labeled X, y 78 | """ 79 | 80 | 81 | Feature_set = [] 82 | Feature_candidates = copy.deepcopy(Features) 83 | 84 | for i in range(R): 85 | error_list = [] 86 | for tmp_Feature in Feature_candidates: 87 | tmp_Feature_set = copy.deepcopy(Feature_set) 88 | tmp_Feature_set.append(tmp_Feature) 89 | tmp_X_all = labeled_X[tmp_Feature_set] 90 | #print(tmp_X) 91 | #use LOOCV to evaluate model 92 | LOO_list = [] 93 | for j in range(L): 94 | model = LinearRegression(fit_intercept=False) 95 | predict_x = tmp_X_all.iloc[j] 96 | tmp_X = tmp_X_all.drop(random_index[j]) 97 | predict_y = labled_Y.iloc[j] 98 | tmp_Y = labled_Y.drop(random_index[j]) 99 | print(tmp_X) 100 | mean_labled_x = tmp_X.mean() 101 | print(mean_labled_x) 102 | std_labled_x = tmp_X.std() 103 | mean_labled_y = tmp_Y.mean() 104 | std_labled_y = tmp_Y.std() 105 | ##Normalize## 106 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 107 | print(X_train_tmp) 108 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 109 | predict_x = (predict_x-mean_labled_x)/std_labled_x 110 | 111 | 112 | #print(tmp_X.to_numpy()) 113 | #print(tmp_Y.to_numpy()) 114 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 115 | #print('alpha',alpha) 116 | #print('beta',beta) 117 | #print(predict_x) 118 | alpha_y = model.predict([predict_x]) 119 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 120 | print('alpha_y',real_alpha_y[0]) 121 | 122 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 123 | #print(tmp_error) 124 | tmp_error_square = tmp_error * tmp_error 125 | LOO_list.append(tmp_error_square) 126 | print(LOO_list) 127 | error_list.append(np.mean(LOO_list)) 128 | 129 | 130 | min_index = np.argmin(np.array(error_list)) 131 | #print(min_index) 132 | if i>0: 133 | if error_list[min_index]>curr_error: 134 | return [Feature_set,curr_error] 135 | curr_error = error_list[min_index] 136 | Feature_set.append(Feature_candidates[min_index]) 137 | Feature_candidates = Feature_candidates.delete(min_index) 138 | #print(Feature_candidates) 139 | return [Feature_set,curr_error] 140 | 141 | 142 | def Sequential_Forward_Selection_corr_test(Features,labeled_X,labled_Y,random_index,L,R): 143 | """ 144 | The input should be original Labeled X, y 145 | """ 146 | 147 | 148 | Feature_set = [] 149 | Feature_candidates = copy.deepcopy(Features) 150 | 151 | for i in range(R): 152 | error_list = [] 153 | err_nor_list = [] 154 | for tmp_Feature in Feature_candidates: 155 | tmp_Feature_set = copy.deepcopy(Feature_set) 156 | tmp_Feature_set.append(tmp_Feature) 157 | tmp_X_all = labeled_X[tmp_Feature_set] 158 | #print(tmp_X) 159 | #use LOOCV to evaluate model 160 | LOO_list = [] 161 | LOO_nor_list = [] 162 | for j in range(L): 163 | model = LinearRegression(fit_intercept=False) 164 | predict_x = tmp_X_all.iloc[j] 165 | tmp_X = tmp_X_all.drop(random_index[j]) 166 | predict_y = labled_Y.iloc[j] 167 | tmp_Y = labled_Y.drop(random_index[j]) 168 | print(tmp_X) 169 | mean_labled_x = tmp_X.mean() 170 | print(mean_labled_x) 171 | std_labled_x = tmp_X.std() 172 | mean_labled_y = tmp_Y.mean() 173 | std_labled_y = tmp_Y.std() 174 | ##Normalize## 175 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 176 | print(X_train_tmp) 177 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 178 | predict_x = (predict_x-mean_labled_x)/std_labled_x 179 | predict_y_nor = (predict_y-mean_labled_y)/std_labled_y 180 | 181 | 182 | #print(tmp_X.to_numpy()) 183 | #print(tmp_Y.to_numpy()) 184 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 185 | #print('alpha',alpha) 186 | #print('beta',beta) 187 | #print(predict_x) 188 | alpha_y = model.predict([predict_x]) 189 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 190 | print('alpha_y',real_alpha_y[0]) 191 | 192 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 193 | tmp_error_nor = alpha_y[0]-predict_y_nor.to_numpy()[0] 194 | #print(tmp_error) 195 | tmp_error_square = tmp_error * tmp_error 196 | tmp_err_nor_square = tmp_error_nor * tmp_error_nor 197 | LOO_list.append(tmp_error_square) 198 | LOO_nor_list.append(tmp_err_nor_square) 199 | print(LOO_list) 200 | error_list.append(np.mean(LOO_list)) 201 | err_nor_list.append(np.mean(LOO_nor_list)) 202 | 203 | 204 | min_index = np.argmin(np.array(error_list)) 205 | #print(min_index) 206 | if i>0: 207 | if error_list[min_index]>curr_error: 208 | return [Feature_set,curr_error_nor] 209 | curr_error = error_list[min_index] 210 | curr_error_nor = err_nor_list[min_index] 211 | Feature_set.append(Feature_candidates[min_index]) 212 | Feature_candidates = Feature_candidates.delete(min_index) 213 | #print(Feature_candidates) 214 | return [Feature_set,curr_error_nor] 215 | 216 | if __name__=="__main__": 217 | data_samples = pd.read_csv('train_20_feature.csv',index_col=FALSE) 218 | data_shape = data_samples.shape 219 | data_columnslable_x = data_samples.columns[:-2] 220 | data_columnslable_y = data_samples.columns[-1:] 221 | print(data_columnslable_x) 222 | print(data_columnslable_y) 223 | ##split the labeled and unlabeled data 224 | L = 5 225 | random_index = [32, 27, 29, 7, 20, 39, 16, 18, 24, 23, 11, 33, 10, 40, 5, 37, 2, 25, 34, 6, 36, 1, 21, 14, 9, 19, 13, 0, 12, 22, 35, 17, 3, 31, 4, 38, 28, 26, 30, 8, 15] 226 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 227 | 228 | 229 | data_labled_x = data_labled[data_columnslable_x] 230 | data_labled_y = data_labled[data_columnslable_y] 231 | #### 232 | Partial_Feature = Sequential_Forward_Selection_corr(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 233 | 234 | print(Partial_Feature[0],Partial_Feature[1]) 235 | -------------------------------------------------------------------------------- /PBCT/Unlabeled_Online/SFS.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | from sklearn.model_selection import LeaveOneOut,cross_val_score 3 | from sklearn.linear_model import LinearRegression 4 | import numpy as np 5 | from numpy import mean,absolute,sqrt 6 | import copy 7 | import math 8 | import pandas as pd 9 | import random 10 | 11 | def Sequential_Forward_Selection(Features,labeled_X,labled_Y,R): 12 | Feature_set = [] 13 | Feature_candidates = copy.deepcopy(Features) 14 | 15 | cv = LeaveOneOut() 16 | model = LinearRegression(fit_intercept=False) 17 | for i in range(R): 18 | error_list = [] 19 | for tmp_Feature in Feature_candidates: 20 | tmp_Feature_set = copy.deepcopy(Feature_set) 21 | tmp_Feature_set.append(tmp_Feature) 22 | tmp_X = labeled_X[tmp_Feature_set] 23 | #print(tmp_X) 24 | #use LOOCV to evaluate model 25 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 26 | cv=cv, n_jobs=-1) 27 | #print(scores) 28 | error_list.append(mean(absolute(scores))) 29 | 30 | min_index = np.argmin(np.array(error_list)) 31 | #print(min_index) 32 | if i>0: 33 | if error_list[min_index]>curr_error: 34 | return [Feature_set,curr_error] 35 | curr_error = error_list[min_index] 36 | Feature_set.append(Feature_candidates[min_index]) 37 | Feature_candidates = Feature_candidates.delete(min_index) 38 | #print(Feature_candidates) 39 | return [Feature_set,curr_error] 40 | 41 | 42 | def Sequential_Forward_Selection_dc(Features,labeled_X,labled_Y,R): 43 | Feature_set = ['dc'] 44 | Feature_candidates = copy.deepcopy(Features) 45 | 46 | cv = LeaveOneOut() 47 | model = LinearRegression(fit_intercept=False) 48 | for i in range(R): 49 | error_list = [] 50 | for tmp_Feature in Feature_candidates: 51 | tmp_Feature_set = copy.deepcopy(Feature_set) 52 | tmp_Feature_set.append(tmp_Feature) 53 | tmp_X = labeled_X[tmp_Feature_set] 54 | #print(tmp_X) 55 | #use LOOCV to evaluate model 56 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 57 | cv=cv, n_jobs=-1) 58 | #print(scores) 59 | error_list.append(mean(absolute(scores))) 60 | 61 | min_index = np.argmin(np.array(error_list)) 62 | #print(min_index) 63 | if i>0: 64 | if error_list[min_index]>curr_error: 65 | return [Feature_set,curr_error] 66 | curr_error = error_list[min_index] 67 | Feature_set.append(Feature_candidates[min_index]) 68 | Feature_candidates = Feature_candidates.delete(min_index) 69 | #print(Feature_candidates) 70 | return [Feature_set,curr_error] 71 | 72 | 73 | 74 | 75 | def Sequential_Forward_Selection_corr(Features,labeled_X,labled_Y,random_index,L,R): 76 | """ 77 | The input should be original Labeled X, y 78 | """ 79 | 80 | 81 | Feature_set = [] 82 | Feature_candidates = copy.deepcopy(Features) 83 | 84 | for i in range(R): 85 | error_list = [] 86 | for tmp_Feature in Feature_candidates: 87 | tmp_Feature_set = copy.deepcopy(Feature_set) 88 | tmp_Feature_set.append(tmp_Feature) 89 | tmp_X_all = labeled_X[tmp_Feature_set] 90 | #print(tmp_X) 91 | #use LOOCV to evaluate model 92 | LOO_list = [] 93 | for j in range(L): 94 | model = LinearRegression(fit_intercept=False) 95 | predict_x = tmp_X_all.iloc[j] 96 | tmp_X = tmp_X_all.drop(random_index[j]) 97 | predict_y = labled_Y.iloc[j] 98 | tmp_Y = labled_Y.drop(random_index[j]) 99 | print(tmp_X) 100 | mean_labled_x = tmp_X.mean() 101 | print(mean_labled_x) 102 | std_labled_x = tmp_X.std() 103 | mean_labled_y = tmp_Y.mean() 104 | std_labled_y = tmp_Y.std() 105 | ##Normalize## 106 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 107 | print(X_train_tmp) 108 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 109 | predict_x = (predict_x-mean_labled_x)/std_labled_x 110 | 111 | 112 | #print(tmp_X.to_numpy()) 113 | #print(tmp_Y.to_numpy()) 114 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 115 | #print('alpha',alpha) 116 | #print('beta',beta) 117 | #print(predict_x) 118 | alpha_y = model.predict([predict_x]) 119 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 120 | print('alpha_y',real_alpha_y[0]) 121 | 122 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 123 | #print(tmp_error) 124 | tmp_error_square = tmp_error * tmp_error 125 | LOO_list.append(tmp_error_square) 126 | print(LOO_list) 127 | error_list.append(np.mean(LOO_list)) 128 | 129 | 130 | min_index = np.argmin(np.array(error_list)) 131 | #print(min_index) 132 | if i>0: 133 | if error_list[min_index]>curr_error: 134 | return [Feature_set,curr_error] 135 | curr_error = error_list[min_index] 136 | Feature_set.append(Feature_candidates[min_index]) 137 | Feature_candidates = Feature_candidates.delete(min_index) 138 | #print(Feature_candidates) 139 | return [Feature_set,curr_error] 140 | 141 | 142 | def Sequential_Forward_Selection_corr_test(Features,labeled_X,labled_Y,random_index,L,R): 143 | """ 144 | The input should be original Labeled X, y 145 | """ 146 | 147 | 148 | Feature_set = [] 149 | Feature_candidates = copy.deepcopy(Features) 150 | 151 | for i in range(R): 152 | error_list = [] 153 | err_nor_list = [] 154 | for tmp_Feature in Feature_candidates: 155 | tmp_Feature_set = copy.deepcopy(Feature_set) 156 | tmp_Feature_set.append(tmp_Feature) 157 | tmp_X_all = labeled_X[tmp_Feature_set] 158 | #print(tmp_X) 159 | #use LOOCV to evaluate model 160 | LOO_list = [] 161 | LOO_nor_list = [] 162 | for j in range(L): 163 | model = LinearRegression(fit_intercept=False) 164 | predict_x = tmp_X_all.iloc[j] 165 | tmp_X = tmp_X_all.drop(random_index[j]) 166 | predict_y = labled_Y.iloc[j] 167 | tmp_Y = labled_Y.drop(random_index[j]) 168 | print(tmp_X) 169 | mean_labled_x = tmp_X.mean() 170 | print(mean_labled_x) 171 | std_labled_x = tmp_X.std() 172 | mean_labled_y = tmp_Y.mean() 173 | std_labled_y = tmp_Y.std() 174 | ##Normalize## 175 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 176 | print(X_train_tmp) 177 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 178 | predict_x = (predict_x-mean_labled_x)/std_labled_x 179 | predict_y_nor = (predict_y-mean_labled_y)/std_labled_y 180 | 181 | 182 | #print(tmp_X.to_numpy()) 183 | #print(tmp_Y.to_numpy()) 184 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 185 | #print('alpha',alpha) 186 | #print('beta',beta) 187 | #print(predict_x) 188 | alpha_y = model.predict([predict_x]) 189 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 190 | print('alpha_y',real_alpha_y[0]) 191 | 192 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 193 | tmp_error_nor = alpha_y[0]-predict_y_nor.to_numpy()[0] 194 | #print(tmp_error) 195 | tmp_error_square = tmp_error * tmp_error 196 | tmp_err_nor_square = tmp_error_nor * tmp_error_nor 197 | LOO_list.append(tmp_error_square) 198 | LOO_nor_list.append(tmp_err_nor_square) 199 | print(LOO_list) 200 | error_list.append(np.mean(LOO_list)) 201 | err_nor_list.append(np.mean(LOO_nor_list)) 202 | 203 | 204 | min_index = np.argmin(np.array(error_list)) 205 | #print(min_index) 206 | if i>0: 207 | if error_list[min_index]>curr_error: 208 | return [Feature_set,curr_error_nor] 209 | curr_error = error_list[min_index] 210 | curr_error_nor = err_nor_list[min_index] 211 | Feature_set.append(Feature_candidates[min_index]) 212 | Feature_candidates = Feature_candidates.delete(min_index) 213 | #print(Feature_candidates) 214 | return [Feature_set,curr_error_nor] 215 | 216 | if __name__=="__main__": 217 | data_samples = pd.read_csv('train_20_feature.csv',index_col=FALSE) 218 | data_shape = data_samples.shape 219 | data_columnslable_x = data_samples.columns[:-2] 220 | data_columnslable_y = data_samples.columns[-1:] 221 | print(data_columnslable_x) 222 | print(data_columnslable_y) 223 | ##split the labeled and unlabeled data 224 | L = 5 225 | random_index = [32, 27, 29, 7, 20, 39, 16, 18, 24, 23, 11, 33, 10, 40, 5, 37, 2, 25, 34, 6, 36, 1, 21, 14, 9, 19, 13, 0, 12, 22, 35, 17, 3, 31, 4, 38, 28, 26, 30, 8, 15] 226 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 227 | 228 | 229 | data_labled_x = data_labled[data_columnslable_x] 230 | data_labled_y = data_labled[data_columnslable_y] 231 | #### 232 | Partial_Feature = Sequential_Forward_Selection_corr(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 233 | 234 | print(Partial_Feature[0],Partial_Feature[1]) 235 | -------------------------------------------------------------------------------- /PBCT/Unlabeled_Offline/SFS.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | from sklearn.model_selection import LeaveOneOut,cross_val_score 3 | from sklearn.linear_model import LinearRegression 4 | import numpy as np 5 | from numpy import mean,absolute,sqrt 6 | import copy 7 | import math 8 | import pandas as pd 9 | import random 10 | 11 | def Sequential_Forward_Selection(Features,labeled_X,labled_Y,R): 12 | Feature_set = [] 13 | Feature_candidates = copy.deepcopy(Features) 14 | 15 | cv = LeaveOneOut() 16 | model = LinearRegression(fit_intercept=False) 17 | for i in range(R): 18 | error_list = [] 19 | for tmp_Feature in Feature_candidates: 20 | tmp_Feature_set = copy.deepcopy(Feature_set) 21 | tmp_Feature_set.append(tmp_Feature) 22 | tmp_X = labeled_X[tmp_Feature_set] 23 | #print(tmp_X) 24 | #use LOOCV to evaluate model 25 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 26 | cv=cv, n_jobs=-1) 27 | #print(scores) 28 | error_list.append(mean(absolute(scores))) 29 | 30 | min_index = np.argmin(np.array(error_list)) 31 | #print(min_index) 32 | if i>0: 33 | if error_list[min_index]>curr_error: 34 | return [Feature_set,curr_error] 35 | curr_error = error_list[min_index] 36 | Feature_set.append(Feature_candidates[min_index]) 37 | Feature_candidates = Feature_candidates.delete(min_index) 38 | #print(Feature_candidates) 39 | return [Feature_set,curr_error] 40 | 41 | 42 | def Sequential_Forward_Selection_dc(Features,labeled_X,labled_Y,R): 43 | Feature_set = ['dc'] 44 | Feature_candidates = copy.deepcopy(Features) 45 | 46 | cv = LeaveOneOut() 47 | model = LinearRegression(fit_intercept=False) 48 | for i in range(R): 49 | error_list = [] 50 | for tmp_Feature in Feature_candidates: 51 | tmp_Feature_set = copy.deepcopy(Feature_set) 52 | tmp_Feature_set.append(tmp_Feature) 53 | tmp_X = labeled_X[tmp_Feature_set] 54 | #print(tmp_X) 55 | #use LOOCV to evaluate model 56 | scores = cross_val_score(model, tmp_X, labled_Y, scoring='neg_mean_squared_error', 57 | cv=cv, n_jobs=-1) 58 | #print(scores) 59 | error_list.append(mean(absolute(scores))) 60 | 61 | min_index = np.argmin(np.array(error_list)) 62 | #print(min_index) 63 | if i>0: 64 | if error_list[min_index]>curr_error: 65 | return [Feature_set,curr_error] 66 | curr_error = error_list[min_index] 67 | Feature_set.append(Feature_candidates[min_index]) 68 | Feature_candidates = Feature_candidates.delete(min_index) 69 | #print(Feature_candidates) 70 | return [Feature_set,curr_error] 71 | 72 | 73 | 74 | 75 | def Sequential_Forward_Selection_corr(Features,labeled_X,labled_Y,random_index,L,R): 76 | """ 77 | The input should be original Labeled X, y 78 | """ 79 | 80 | 81 | Feature_set = [] 82 | Feature_candidates = copy.deepcopy(Features) 83 | 84 | for i in range(R): 85 | error_list = [] 86 | for tmp_Feature in Feature_candidates: 87 | tmp_Feature_set = copy.deepcopy(Feature_set) 88 | tmp_Feature_set.append(tmp_Feature) 89 | tmp_X_all = labeled_X[tmp_Feature_set] 90 | #print(tmp_X) 91 | #use LOOCV to evaluate model 92 | LOO_list = [] 93 | for j in range(L): 94 | model = LinearRegression(fit_intercept=False) 95 | predict_x = tmp_X_all.iloc[j] 96 | tmp_X = tmp_X_all.drop(random_index[j]) 97 | predict_y = labled_Y.iloc[j] 98 | tmp_Y = labled_Y.drop(random_index[j]) 99 | print(tmp_X) 100 | mean_labled_x = tmp_X.mean() 101 | print(mean_labled_x) 102 | std_labled_x = tmp_X.std() 103 | mean_labled_y = tmp_Y.mean() 104 | std_labled_y = tmp_Y.std() 105 | ##Normalize## 106 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 107 | print(X_train_tmp) 108 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 109 | predict_x = (predict_x-mean_labled_x)/std_labled_x 110 | 111 | 112 | #print(tmp_X.to_numpy()) 113 | #print(tmp_Y.to_numpy()) 114 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 115 | #print('alpha',alpha) 116 | #print('beta',beta) 117 | #print(predict_x) 118 | alpha_y = model.predict([predict_x]) 119 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 120 | print('alpha_y',real_alpha_y[0]) 121 | 122 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 123 | #print(tmp_error) 124 | tmp_error_square = tmp_error * tmp_error 125 | LOO_list.append(tmp_error_square) 126 | print(LOO_list) 127 | error_list.append(np.mean(LOO_list)) 128 | 129 | 130 | min_index = np.argmin(np.array(error_list)) 131 | #print(min_index) 132 | if i>0: 133 | if error_list[min_index]>curr_error: 134 | return [Feature_set,curr_error] 135 | curr_error = error_list[min_index] 136 | Feature_set.append(Feature_candidates[min_index]) 137 | Feature_candidates = Feature_candidates.delete(min_index) 138 | #print(Feature_candidates) 139 | return [Feature_set,curr_error] 140 | 141 | 142 | def Sequential_Forward_Selection_corr_test(Features,labeled_X,labled_Y,random_index,L,R): 143 | """ 144 | The input should be original Labeled X, y 145 | """ 146 | 147 | 148 | Feature_set = [] 149 | Feature_candidates = copy.deepcopy(Features) 150 | 151 | for i in range(R): 152 | error_list = [] 153 | err_nor_list = [] 154 | for tmp_Feature in Feature_candidates: 155 | tmp_Feature_set = copy.deepcopy(Feature_set) 156 | tmp_Feature_set.append(tmp_Feature) 157 | tmp_X_all = labeled_X[tmp_Feature_set] 158 | #print(tmp_X) 159 | #use LOOCV to evaluate model 160 | LOO_list = [] 161 | LOO_nor_list = [] 162 | for j in range(L): 163 | model = LinearRegression(fit_intercept=False) 164 | predict_x = tmp_X_all.iloc[j] 165 | tmp_X = tmp_X_all.drop(random_index[j]) 166 | predict_y = labled_Y.iloc[j] 167 | tmp_Y = labled_Y.drop(random_index[j]) 168 | print(tmp_X) 169 | mean_labled_x = tmp_X.mean() 170 | print(mean_labled_x) 171 | std_labled_x = tmp_X.std() 172 | mean_labled_y = tmp_Y.mean() 173 | std_labled_y = tmp_Y.std() 174 | ##Normalize## 175 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 176 | print(X_train_tmp) 177 | y_train_tmp = (tmp_Y-mean_labled_y)/std_labled_y 178 | predict_x = (predict_x-mean_labled_x)/std_labled_x 179 | predict_y_nor = (predict_y-mean_labled_y)/std_labled_y 180 | 181 | 182 | #print(tmp_X.to_numpy()) 183 | #print(tmp_Y.to_numpy()) 184 | model.fit(X_train_tmp.to_numpy(),y_train_tmp.to_numpy()) 185 | #print('alpha',alpha) 186 | #print('beta',beta) 187 | #print(predict_x) 188 | alpha_y = model.predict([predict_x]) 189 | real_alpha_y = (alpha_y[0]*std_labled_y+mean_labled_y).to_numpy() 190 | print('alpha_y',real_alpha_y[0]) 191 | 192 | tmp_error = (predict_y.to_numpy()[0] - real_alpha_y[0]) 193 | tmp_error_nor = alpha_y[0]-predict_y_nor.to_numpy()[0] 194 | #print(tmp_error) 195 | tmp_error_square = tmp_error * tmp_error 196 | tmp_err_nor_square = tmp_error_nor * tmp_error_nor 197 | LOO_list.append(tmp_error_square) 198 | LOO_nor_list.append(tmp_err_nor_square) 199 | print(LOO_list) 200 | error_list.append(np.mean(LOO_list)) 201 | err_nor_list.append(np.mean(LOO_nor_list)) 202 | 203 | 204 | min_index = np.argmin(np.array(error_list)) 205 | #print(min_index) 206 | if i>0: 207 | if error_list[min_index]>curr_error: 208 | return [Feature_set,curr_error_nor] 209 | curr_error = error_list[min_index] 210 | curr_error_nor = err_nor_list[min_index] 211 | Feature_set.append(Feature_candidates[min_index]) 212 | Feature_candidates = Feature_candidates.delete(min_index) 213 | #print(Feature_candidates) 214 | return [Feature_set,curr_error_nor] 215 | 216 | if __name__=="__main__": 217 | data_samples = pd.read_csv('train_20_feature.csv',index_col=FALSE) 218 | data_shape = data_samples.shape 219 | data_columnslable_x = data_samples.columns[:-2] 220 | data_columnslable_y = data_samples.columns[-1:] 221 | print(data_columnslable_x) 222 | print(data_columnslable_y) 223 | ##split the labeled and unlabeled data 224 | L = 5 225 | random_index = [32, 27, 29, 7, 20, 39, 16, 18, 24, 23, 11, 33, 10, 40, 5, 37, 2, 25, 34, 6, 36, 1, 21, 14, 9, 19, 13, 0, 12, 22, 35, 17, 3, 31, 4, 38, 28, 26, 30, 8, 15] 226 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 227 | 228 | 229 | data_labled_x = data_labled[data_columnslable_x] 230 | data_labled_y = data_labled[data_columnslable_y] 231 | #### 232 | Partial_Feature = Sequential_Forward_Selection_corr(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 233 | 234 | print(Partial_Feature[0],Partial_Feature[1]) 235 | -------------------------------------------------------------------------------- /PBCT/Data/pri_20_feature.csv: -------------------------------------------------------------------------------- 1 | ,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,Cyclelife,cycle_log 2 | 0,-2.072647967,-2.541601611,-2.507213266,-0.274040637,0.129790365,0.000767931,-0.000206807,1.091143732,3.53E-05,1.072424693,1.0706892,0.4683652,1.0759126,13.374894,35.994705,29.012251,3097.042122,0.016742354,0.016443744,-7.52E-05,1852,3.267640982 3 | 1,-1.764057861,-2.34806965,-2.368282615,0.033501873,-0.45762675,-0.000809314,1.03E-05,1.08484612,-4.42E-05,1.089364321,1.0799223,0.0081312,1.0849404,13.35824233,35.127342,29.269444,3084.890135,0.01686798,0,-1.38E-05,2237,3.349665984 4 | 2,-1.855176959,-2.240331542,-2.323654574,-0.440634335,0.125100669,-0.000916774,1.90E-05,1.081874821,-2.93E-05,1.085538347,1.0784167,0.0058999,1.0826457,12.04185067,35.651741,29.092649,3081.926451,0.016787428,0,-5.33E-05,1709,3.232742063 5 | 3,-1.421520794,-1.796829547,-1.884221694,-0.389408127,0.092745233,-0.004688861,-4.81E-06,1.081157026,-6.22E-05,1.085484911,1.0758358,0.0063032,1.0792426,10.076479,36.496124,29.495056,3162.727047,0.017001977,0,-0.000333346,636,2.803457116 6 | 4,-1.54174789,-1.938902016,-2.029474583,-0.368971416,0.072680249,-0.003251318,1.82E-05,1.086771425,-1.30E-05,1.088946321,1.0829742,0.0059757,1.0876312,11.207956,36.799232,29.999718,3198.856716,0.016978227,0.016611163,-3.37E-05,1054,3.022840611 7 | 5,-1.548306331,-1.922769556,-2.022817664,-0.549054366,0.118359685,-0.002537903,2.62E-05,1.074764977,-1.95E-05,1.07814312,1.0709661,0.0068777,1.0762016,10.764334,37.818813,29.769632,3241.551649,0.017121186,0.016649306,-6.81E-05,880,2.944482672 8 | 6,-1.5589859,-1.906445029,-2.049489582,-0.910272068,0.089233166,-0.004475491,2.32E-05,1.085288383,-3.06E-05,1.089363988,1.0811664,0.0074255,1.0863206,10.11310767,37.566532,29.542667,3176.911423,0.01683085,0,-1.48E-05,862,2.935507266 9 | 7,-1.35523443,-1.683979581,-1.824823017,-1.28333827,0.129215481,-0.008717842,-0.000294542,1.101465004,-0.000105866,1.074975859,1.0661705,1.817914,1.0645274,9.958173667,36.084282,29.688587,3119.430903,0.019886132,0.015845969,-0.003994543,691,2.839478047 10 | 8,-1.19667656,-1.484179347,-1.683401449,-0.616772288,0.030486792,-0.021207678,-0.000139776,1.076969671,-0.000233113,1.084587725,1.07263,0.0030942,1.0612478,9.021115,35.504932,29.662241,3150.86638,0.015740737,0,-0.000580528,534,2.727541257 11 | 9,-1.523552178,-1.87482397,-2.020179566,-0.844134617,0.090118977,-0.005262941,-6.95E-06,1.082815554,-2.04E-05,1.083739446,1.0792934,0.004493,1.0816909,11.44700633,36.165897,29.545927,3135.130054,0.016937815,0.016472535,-9.14E-06,1014,3.006037955 12 | 10,-1.531044892,-1.906361465,-2.013727154,-0.446196105,0.093909737,-0.003627993,5.13E-07,1.078821728,-3.22E-05,1.081288054,1.0752345,0.0046786,1.0780438,12.13012667,38.670872,30.400667,3317.175252,0.016699277,0.016397864,-1.13E-05,854,2.931457871 13 | 11,-1.4457275,-1.809270974,-1.926634344,-0.538323647,0.092308315,-0.005440518,-1.08E-05,1.082584738,-1.33E-05,1.082296433,1.0790461,0.0039534,1.0810226,10.79698533,38.178074,30.495707,3321.71633,0.016174601,0.015953196,3.19E-05,842,2.925312091 14 | 12,-1.433345122,-1.779878218,-1.918923597,-0.861287007,0.106707044,-0.0065266,-5.21E-06,1.08304867,-3.73E-05,1.085559155,1.0794537,0.0043492,1.0818107,11.17953533,36.258759,30.240517,3205.023239,0.016626336,0.016308507,1.78E-05,917,2.962369336 15 | 13,-1.403642586,-1.745123636,-1.888649056,-0.955723216,0.094164493,-0.00787256,-2.62E-05,1.09840466,-4.08E-05,1.099446616,1.0946392,0.0036942,1.0953494,10.11367267,31.658901,29.649641,2976.825513,0.016952254,0.016684072,1.07E-05,876,2.942504106 16 | 14,-1.347736431,-1.664510621,-1.833311543,-1.188209862,0.090872478,-0.010515542,-2.18E-05,1.096604067,-4.61E-05,1.098375564,1.0924908,0.0043354,1.0937657,10.14642867,38.728722,30.119719,3299.917552,0.016591828,0.016214989,-1.72E-05,757,2.87909588 17 | 15,-1.407245667,-1.739371983,-1.886085652,-1.024626695,0.113918038,-0.007187952,3.20E-06,1.074344712,-2.63E-05,1.076614707,1.0707464,0.0050792,1.0739936,11.051195,36.075569,29.54327,3145.599717,0.017059039,0.016647711,3.24E-05,703,2.846955325 18 | 16,-1.351203133,-1.711374684,-1.836752191,-0.626213167,0.101669146,-0.006756777,-9.68E-06,1.078523771,-4.38E-05,1.081177342,1.0748194,0.0044724,1.0768065,11.574392,36.993626,29.988958,3230.101432,0.016607935,0.016318144,6.72E-05,648,2.811575006 19 | 17,-1.274546136,-1.611898061,-1.752049604,-0.816723512,0.106255475,-0.009724309,-3.29E-06,1.085827017,-2.56E-05,1.087359352,1.0817773,0.0046894,1.0847629,10.258443,38.024876,30.181021,3283.667684,0.016743599,0.016519627,9.16E-05,625,2.795880017 20 | 18,-1.54505928,-1.908031829,-2.035539729,-0.635718645,0.094453402,-0.004392038,-1.39E-07,1.082893853,-3.19E-05,1.085361975,1.0789614,0.0049075,1.0821686,12.087368,37.576675,30.021296,3241.202769,0.016856873,0.016552322,-9.89E-07,1051,3.021602716 21 | 19,-1.350607789,-1.717476973,-1.83367228,-0.624787306,0.114815318,-0.006383537,-2.40E-05,1.07174695,-3.97E-05,1.072803769,1.0682389,0.0034321,1.0688359,11.17087433,37.725166,30.034327,3267.260796,0.016111452,0.015904058,0.000127854,651,2.813580989 22 | 20,-1.226187894,-1.539612031,-1.712428862,-0.938555944,0.107343088,-0.015948906,-1.29E-05,1.087818373,-3.90E-05,1.089747546,1.0839558,0.0044825,1.0858277,10.25446533,37.855122,29.696999,3234.117647,0.016685704,0.016382927,0.000150384,599,2.777426822 23 | 21,-0.860038351,-1.109668103,-1.363233208,-0.031051806,-0.292914219,-0.0907911,-0.001092105,1.084408327,-0.001405749,1.109249796,1.0720705,0.0015459,0.96833885,10.32765733,38.543324,27.851921,3176.440781,0.017582141,0.016267318,-0.000832979,148,2.170261715 24 | 22,-0.996792629,-1.246301958,-1.464555834,-0.374624865,0.01478523,-0.044899376,-0.000227374,1.072520538,-0.000355322,1.082344632,1.0668509,0.0020589,1.046734,10.135263,39.808941,29.275244,3298.505686,0.016825944,0.015852956,-0.000333825,335,2.525044807 25 | 23,-1.317856778,-1.653251108,-1.801130253,-1.100566218,0.105057355,-0.00985517,-1.14E-06,1.072032036,-7.46E-05,1.078154133,1.0680153,0.0050053,1.070745,10.16286633,36.829567,29.224894,3141.087363,0.017180175,0.016727364,-0.000308501,480,2.681241237 26 | 24,-1.315854095,-1.628843703,-1.812939444,-0.78113063,0.037838938,-0.013147319,-6.00E-05,1.074592569,-0.000165943,1.083473019,1.0690521,0.0048897,1.0670482,10.114951,39.457458,29.682396,3274.369054,0.018265773,0.017548179,-0.000579358,561,2.748962861 27 | 25,-1.177533343,-1.455802392,-1.65983767,-0.453949117,0.035080705,-0.024080687,-0.000148013,1.076501974,-0.000243721,1.084382516,1.0712759,0.4178432,1.0600308,10.17263867,39.435226,29.859741,3292.733011,0.017133227,0.016133603,-0.000956277,458,2.660865478 28 | 26,-1.282073166,-1.613205991,-1.749158647,-0.947698474,0.121548082,-0.010206932,-3.97E-06,1.082271562,-4.31E-05,1.085222101,1.0780684,0.0048147,1.0809475,10.114209,38.206268,29.854347,3221.053821,0.017164165,0.016895344,-0.000133067,485,2.685741739 29 | 27,-1.256229522,-1.576485457,-1.763610069,-1.001823429,0.004996959,-0.01603237,-8.16E-05,1.06828311,-0.000141642,1.072919504,1.063838,0.0037714,1.0588086,10.18597433,38.70813,29.016958,3229.927635,0.017536523,0.016697574,-0.000435537,487,2.687528961 30 | 28,-1.359308707,-1.679684478,-1.860250972,-1.071780486,0.035815002,-0.01060556,-6.30E-06,1.088277042,-8.80E-05,1.094967578,1.0831043,0.0063896,1.0862604,10.147933,38.40601,29.268984,3199.8853,0.017545482,0.017116761,-9.67E-05,502,2.700703717 31 | 29,-1.295833799,-1.625600205,-1.776956175,-1.271819313,0.103376173,-0.011103328,-8.08E-06,1.084615931,-8.59E-05,1.091102888,1.079723,0.0056468,1.0825431,10.14747167,36.146839,29.35239,3129.147508,0.017338831,0.016888004,-0.000345537,513,2.710117365 32 | 30,-1.35617917,-1.708177397,-1.847683699,-0.851878767,0.070388087,-0.008578796,2.77E-06,1.071938919,-8.55E-05,1.079509,1.0679682,0.0054579,1.0710108,10.058893,36.811321,26.808544,2947.781955,0.017488673,0.016777694,-0.000260558,495,2.694605199 33 | 31,-1.229542591,-1.5404255,-1.703363231,-1.125729639,0.117483669,-0.015056322,-3.70E-05,1.082561314,-0.000123453,1.089652165,1.0766411,0.0058852,1.0773104,10.00823233,36.585075,29.097336,3102.652549,0.018048871,0.017301282,-0.000577019,471,2.673020907 34 | 32,-1.286735986,-1.617675048,-1.760922583,-0.880585497,0.113089,-0.010281161,6.73E-06,1.076352866,-5.91E-05,1.082173513,1.07155,0.0063132,1.0759076,10.13968633,38.591679,29.461449,3240.364394,0.017233955,0.01686197,-8.50E-05,509,2.706717782 35 | 33,-1.325416644,-1.68269549,-1.802179826,-0.700897629,0.110284867,-0.007184448,8.46E-06,1.071719845,-6.42E-05,1.07765363,1.0672802,0.0058776,1.071249,10.09733367,37.101799,29.613438,3175.880694,0.017789029,0.017394017,-0.00031568,481,2.682145076 36 | 34,-1.237979444,-1.616384877,-1.743317803,-0.681143843,0.115776588,-0.008924289,1.78E-05,1.078764532,-3.53E-05,1.083118649,1.0742669,0.0066598,1.0795146,10.13043933,39.249191,29.45435,3258.621233,0.017645668,0.017273866,0.000109414,519,2.715167358 37 | 35,-1.372643592,-1.728728015,-1.851501363,-0.898147393,0.12951776,-0.005705056,2.09E-05,1.073610613,-5.00E-05,1.079649324,1.0688509,0.0068526,1.0746909,10.164115,38.83897,30.105608,3276.273487,0.017662423,0.017337065,-0.00014857,499,2.698100546 38 | 36,-1.309596445,-1.689763433,-1.781809923,-0.482088116,0.120810095,-0.005446508,2.41E-05,1.083945036,-3.12E-05,1.088271479,1.079731,0.0063388,1.0852383,10.14772167,39.087948,29.841801,3274.543878,0.02021559,0.020022187,9.95E-05,535,2.728353782 39 | 37,-1.289183904,-1.607177777,-1.767903376,-1.393010048,0.120361724,-0.011827637,1.65E-05,1.074105116,-4.34E-05,1.078955405,1.0699497,0.0057986,1.074622,10.14849333,37.465702,29.400135,3153.077772,0.017847016,0.017577264,-3.57E-05,465,2.667452953 40 | 38,-1.298509866,-1.621650674,-1.779714086,-1.500458354,0.118415814,-0.011114882,-1.99E-05,1.082268664,-5.40E-05,1.085115201,1.0765519,0.0065683,1.0798434,10.13158333,39.397137,29.524549,3267.607207,0.017582053,0.016943704,-0.000469548,499,2.698100546 41 | 39,-1.301274614,-1.625376932,-1.793430444,-1.115133257,0.094905113,-0.012084104,1.18E-05,1.082518045,-6.92E-05,1.089385525,1.0780239,0.0060867,1.082616,10.13045433,39.339207,29.589365,3247.579941,0.017394457,0.016997622,-9.30E-05,466,2.668385917 42 | 40,-1.244631657,-1.577171966,-1.726073413,-1.389834643,0.111611938,-0.01248278,2.23E-05,1.075213642,-2.26E-05,1.078684715,1.070887,0.4738314,1.0763599,10.14690533,39.444595,29.683577,3236.6924,0.01761589,0.017408777,0.000186326,457,2.6599162 43 | 41,-1.219913247,-1.521692046,-1.684254412,-1.20553435,0.132095028,-0.015151973,2.94E-05,1.077631966,-1.32E-05,1.080845185,1.0726138,0.0073842,1.0795197,10.14691733,37.894932,29.27129,3166.380862,0.017752018,0.017551575,0.000247778,429,2.632457292 44 | 42,-1.528514098,-1.921233625,-2.016748066,-0.566280441,0.083097651,-0.003747603,-9.55E-06,1.049389155,-7.97E-05,1.054948215,1.0421375,0.0081127,1.0470229,10.112717,36.923679,29.348475,3175.861056,0.017923383,0.017025596,-0.000836281,713,2.85308953 -------------------------------------------------------------------------------- /PBCT/Offline/PBCT_offline.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | import pandas as pd 3 | from SFS import Sequential_Forward_Selection_corr_test 4 | from sklearn.linear_model import LinearRegression 5 | from solve_loss import solve_loss 6 | from sklearn.model_selection import LeaveOneOut,cross_val_score 7 | import numpy as np 8 | import math 9 | import sklearn 10 | import random 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | def PBCT_log(csv_name,L,U,random_index,repeated_num,coef_file): 15 | """ 16 | csv_name: name of the file of dataset 17 | L:number of labeled data 18 | U:number of unlabeled data 19 | random_index: a random sample of all the index of data 20 | repeat_num: random experiment index 21 | coef_file: file to store the coef of learned model parameter 22 | 23 | """ 24 | 25 | data_samples = pd.read_csv(csv_name+'.csv',index_col=FALSE) 26 | data_shape = data_samples.shape 27 | data_columnslable_x = data_samples.columns[:-2] 28 | data_columnslable_y = data_samples.columns[-1:] 29 | 30 | print('labeled_num',L) 31 | data_labled = data_samples.loc[random_index[:L]] 32 | data_unlabled = data_samples.loc[random_index[-U:]] 33 | print('unlabled_num',U) 34 | 35 | 36 | data_labled_x = data_labled[data_columnslable_x] 37 | data_labled_y = data_labled[data_columnslable_y] 38 | data_unlabled_x = data_unlabled[data_columnslable_x] 39 | data_unlabled_y = data_unlabled[data_columnslable_y] 40 | ####Nomalize#### 41 | mean_labled_x = data_labled_x.mean() 42 | std_labled_x = data_labled_x.std() 43 | 44 | mean_labled_y = data_labled_y.mean() 45 | std_labled_y = data_labled_y.std() 46 | #print("std_y",std_labled_y) 47 | 48 | X_train_labled = (data_labled_x-mean_labled_x)/std_labled_x 49 | #print(X_train_labled) 50 | X_train_unlabled = (data_unlabled_x-mean_labled_x)/std_labled_x 51 | y_train = (data_labled_y-mean_labled_y)/std_labled_y 52 | y_test = (data_unlabled_y-mean_labled_y)/std_labled_y 53 | ################# 54 | 55 | ####build Partial model#### 56 | Partial_feature_var2 = Sequential_Forward_Selection_corr_test(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 57 | Partial_feature = Partial_feature_var2[0] 58 | print('var features for'+str(L)+' '+'repeated_num '+str(repeated_num)+'is', Partial_feature) 59 | 60 | 61 | ZL = data_labled_x[Partial_feature] 62 | ZU = data_unlabled_x[Partial_feature] 63 | var2 = Partial_feature_var2[1] 64 | print('var2 is ',var2) 65 | #var2 = max(1e-8,var2) 66 | #print('var2 is ',var2) 67 | ######################## 68 | cv = LeaveOneOut() 69 | 70 | #######this part should be checked##### 71 | lasso_model = sklearn.linear_model.LassoCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10]) 72 | Reg_lasso = lasso_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 73 | print('alpha of lasso is',Reg_lasso.alpha_) 74 | 75 | ####find the var1#### 76 | V = [0.5,1,2,5,10] 77 | print('current V',V) 78 | var1_candidate_set = [i*var2 for i in V] ## 79 | l5_candidate = [10,100] 80 | #V = 5 81 | #var1_candidate_set = [math.pow(0.1,V)*0.05*var2 for i in range(V)] 82 | LOO_list = [] 83 | 84 | for i in range(len(V)): 85 | for m in range(len(l5_candidate)): 86 | tmp_var1 = var1_candidate_set[i] 87 | l1 = 1/2/tmp_var1 88 | l2 = 1/2/var2 89 | l3 = 0 90 | l4 = 1/2/(tmp_var1+var2) 91 | #l5 = 0 92 | l5 = l1*l5_candidate[m] 93 | error_list = [] 94 | for j in range(L): 95 | predict_x = data_labled_x.iloc[j] 96 | tmp_X = data_labled_x.drop(random_index[j]) 97 | tmp_ZL = ZL.drop(random_index[j]) 98 | predict_y = data_labled_y.iloc[j] 99 | tmp_y = data_labled_y.drop(random_index[j]) 100 | 101 | ##Normalize## 102 | mean_labled_x = tmp_X.mean() 103 | std_labled_x = tmp_X.std() 104 | mean_labled_y = tmp_y.mean() 105 | std_labled_y = tmp_y.std() 106 | mean_labled_z = tmp_ZL.mean() 107 | std_labled_z = tmp_ZL.std() 108 | 109 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 110 | y_train_tmp = (tmp_y-mean_labled_y)/std_labled_y 111 | X_train_unlabled_tmp = (data_unlabled_x-mean_labled_x)/std_labled_x 112 | 113 | tmp_ZL = (tmp_ZL-mean_labled_z)/std_labled_z 114 | tmp_ZU = (ZU-mean_labled_z)/std_labled_z 115 | 116 | 117 | predict_x = (predict_x-mean_labled_x)/std_labled_x 118 | 119 | 120 | alpha,beta = solve_loss(y_train_tmp.to_numpy().ravel(),X_train_tmp.to_numpy(),tmp_ZL.to_numpy(), 121 | X_train_unlabled_tmp.to_numpy(),tmp_ZU.to_numpy(),l1,l2,l3,l4,l5) 122 | 123 | real_predict_y = predict_y.to_numpy() 124 | alpha_y = np.matmul(alpha.T,predict_x.to_numpy()) 125 | real_alpha_y = (alpha_y*std_labled_y+mean_labled_y).to_numpy() 126 | #print('real_alpha_y',real_alpha_y[0]) 127 | 128 | tmp_error = (real_predict_y[0] - real_alpha_y[0]) 129 | tmp_error_square = tmp_error * tmp_error 130 | error_list.append(tmp_error_square) 131 | LOO_list.append(np.mean(error_list)) 132 | #print(LOO_list) 133 | var1_index = np.argmin(np.array(LOO_list)) 134 | print(var1_index) 135 | l1_index = var1_index // len(l5_candidate) 136 | print(l1_index) 137 | l5_index = var1_index % len(l5_candidate) 138 | print(l5_index) 139 | ####################### 140 | print('var1_index',var1_index) 141 | ####get the optimal alpha and beta#### 142 | l1 = 1/2/var1_candidate_set[l1_index] 143 | l2 = 1/2/var2 144 | l3 = 0 145 | l4 = 1/2/(var1_candidate_set[l1_index]+var2) 146 | l5 = l1*l5_candidate[l5_index] 147 | 148 | ZL_all = X_train_labled[Partial_feature] 149 | ZU_all = X_train_unlabled[Partial_feature] 150 | 151 | 152 | 153 | alpha,beta = solve_loss(y_train.to_numpy().ravel(),X_train_labled.to_numpy(),ZL_all.to_numpy(), 154 | X_train_unlabled.to_numpy(),ZU_all.to_numpy(),l1,l2,l3,l4,l5) 155 | print('alpha is ',alpha) 156 | print('beta is',beta) 157 | Loss = (l1*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(X_train_labled.to_numpy(),alpha)),2)+ 158 | l2*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(ZL.to_numpy(),beta)),2)+ 159 | l3*math.pow(np.linalg.norm(np.matmul(X_train_labled.to_numpy(),alpha)-np.matmul(ZL.to_numpy(),beta)),2)+ 160 | l4*math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 161 | #check_loss(y_train,X_train_labled,ZL, 162 | # X_train_unlabled,ZU,l1,l2,l3,l4,alpha,beta,Loss) 163 | #print('Loss',math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 164 | ##################################### 165 | 166 | 167 | ####start to test###### 168 | test_err_list = [] 169 | err_percent_list = [] 170 | for i in range(y_test.shape[0]): 171 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 172 | tmp_pre = np.matmul(alpha.T,X_train_unlabled.iloc[i].to_numpy())*std_labled_y+mean_labled_y 173 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 174 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 175 | tmp_error_square = tmp_error * tmp_error 176 | test_err_list.append(tmp_error_square) 177 | err_percent_list.append(tmp_percent_err) 178 | 179 | print('PBCT_RMSE',np.sqrt(np.mean(test_err_list))) 180 | print('PBCT_ERR',np.mean(err_percent_list)) 181 | 182 | PBCT_RMSE = np.sqrt(np.mean(test_err_list)) 183 | PBCT_ERR = np.mean(err_percent_list) 184 | #print(LOO_list) 185 | test0_err_list = [] 186 | err0_percent_list = [] 187 | for i in range(y_test.shape[0]): 188 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 189 | tmp_pre = np.matmul(beta.T,ZU_all.iloc[i].to_numpy())*std_labled_y+mean_labled_y 190 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 191 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 192 | tmp_error_square = tmp_error * tmp_error 193 | test0_err_list.append(tmp_error_square) 194 | err0_percent_list.append(tmp_percent_err) 195 | 196 | print('PBCT_beta_RMSE',np.sqrt(np.mean(test0_err_list))) 197 | print('PBCT_beta_ERR',np.mean(err0_percent_list)) 198 | PBCT_beta_RMSE = np.sqrt(np.mean(test0_err_list)) 199 | PBCT_beta_ERR = np.mean(err0_percent_list) 200 | 201 | ###LinearRegreesion normalized manually#### 202 | model = LinearRegression(fit_intercept=False) 203 | 204 | #print('Z_train_shape',ZL_all.to_numpy().shape) 205 | Reg = model.fit(ZL_all.to_numpy(), y_train.to_numpy()) 206 | test1_err_list = [] 207 | err1_percent_list = [] 208 | for i in range(y_test.shape[0]): 209 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 210 | tmp_pre = Reg.predict([ZU_all.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 211 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 212 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 213 | tmp_error_square = tmp_error * tmp_error 214 | test1_err_list.append(tmp_error_square) 215 | err1_percent_list.append(tmp_percent_err) 216 | print('LS_beta_RMSE',np.sqrt(np.mean(test1_err_list))) 217 | print('LS_beta_ERR',np.mean(err1_percent_list)) 218 | 219 | LS_beta_RMSE = np.sqrt(np.mean(test1_err_list)) 220 | LS_beta_ERR = np.mean(err1_percent_list) 221 | 222 | 223 | test2_err_list = [] 224 | err2_percent_list = [] 225 | for i in range(y_test.shape[0]): 226 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 227 | tmp_pre = Reg_lasso.predict([X_train_unlabled.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 228 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 229 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 230 | tmp_error_square = tmp_error * tmp_error 231 | test2_err_list.append(tmp_error_square) 232 | err2_percent_list.append(tmp_percent_err) 233 | print('LS_lasso_RMSE',np.sqrt(np.mean(test2_err_list))) 234 | print('LS_lasso_ERR',np.mean(err2_percent_list)) 235 | LS_lasso_RMSE = np.sqrt(np.mean(test2_err_list)) 236 | LS_lasso_ERR = np.mean(err2_percent_list) 237 | l1_ratio_list = [0.05,0.2,0.4,0.6,0.8,0.95] 238 | elasticNet_model = sklearn.linear_model.ElasticNetCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10],l1_ratio = l1_ratio_list) 239 | Reg_elasticnet = elasticNet_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 240 | print('alpha and l1_ratio of elasticNet is',Reg_elasticnet.alpha_,Reg_elasticnet.l1_ratio_) 241 | 242 | test3_err_list = [] 243 | err3_percent_list = [] 244 | for i in range(y_test.shape[0]): 245 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 246 | tmp_pre = Reg_elasticnet.predict([X_train_unlabled.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 247 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 248 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 249 | tmp_error_square = tmp_error * tmp_error 250 | test3_err_list.append(tmp_error_square) 251 | err3_percent_list.append(tmp_percent_err) 252 | print('LS_elasnet_RMSE',np.sqrt(np.mean(test3_err_list))) 253 | print('LS_elasnet_ERR',np.mean(err3_percent_list)) 254 | LS_elasnet_RMSE = np.sqrt(np.mean(test3_err_list)) 255 | LS_elasnet_ERR = np.mean(err3_percent_list) 256 | 257 | 258 | coef_list = [] 259 | coef_list.append(Reg.coef_[0]) 260 | coef_list.append(np.array([v for v in Reg_lasso.coef_])) 261 | coef_list.append(np.array([v for v in Reg_elasticnet.coef_])) 262 | coef_list.append(np.array([v for v in alpha.ravel()])) 263 | coef_list.append(np.array(beta.ravel())) 264 | tmp_df = pd.DataFrame(data = coef_list) 265 | tmp_df.to_csv(coef_file+str(L)+'_'+str(repeated_num)+'.csv',index = False, header = False) 266 | return PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE 267 | 268 | if __name__=="__main__": 269 | repeated_num = 200 270 | coef_file = './tmp_coef_file/' 271 | all_average = {} 272 | all_median = {} 273 | csv_name = '../Data/pri_20_feature' 274 | file_name = 'tmp_file' 275 | unlabeled_num = 25 276 | data_num = 43 277 | random_index = [random.sample(range(data_num),data_num) for i in range(repeated_num)] 278 | for sweep_num in range(7,16): 279 | tmp_dict ={} 280 | all_list = [] 281 | for i in range(repeated_num): 282 | print('the ',i+1,'-th result') 283 | PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE = PBCT_log(csv_name,sweep_num,unlabeled_num,random_index[i],i,coef_file) 284 | tmp_dict[i] = [PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE] 285 | all_list.append(tmp_dict[i]) 286 | average_array = np.mean(np.array(all_list),axis = 0) 287 | median_array = np.median(np.array(all_list),axis = 0) 288 | tmp_dict['average'] = average_array 289 | tmp_dict['median'] = median_array 290 | all_average[sweep_num] = average_array 291 | all_median[sweep_num] = median_array 292 | df = pd.DataFrame(tmp_dict) 293 | df.to_csv('./'+file_name+'/'+csv_name+'_result_PBCT_'+str(sweep_num)+'_offline.csv',index=False) 294 | avg_all = pd.DataFrame(all_average) 295 | median_all = pd.DataFrame(all_median) 296 | avg_all.to_csv('./'+file_name+'/offline_avg'+'.csv',index = False) 297 | median_all.to_csv('./'+file_name+'/offline_median'+'.csv',index = False) 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | -------------------------------------------------------------------------------- /PBCT/Online/PBCT_online.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | import pandas as pd 3 | from SFS import Sequential_Forward_Selection_corr_test 4 | from sklearn.linear_model import LinearRegression 5 | from solve_loss import solve_loss 6 | #from solve_loss import solve_loss 7 | from sklearn.model_selection import LeaveOneOut,cross_val_score 8 | import numpy as np 9 | import math 10 | import sklearn 11 | import random 12 | import matplotlib.pyplot as plt 13 | 14 | def PBCT_log(csv_name,L,U,Upper_var1,train_index,test_index,repeated_num,coef_file): 15 | data_samples = pd.read_csv(csv_name+'.csv',index_col=FALSE) 16 | data_shape = data_samples.shape 17 | data_columnslable_x = data_samples.columns[:-2] 18 | data_columnslable_y = data_samples.columns[-1:] 19 | print(data_columnslable_x) 20 | print(data_columnslable_y) 21 | ##split the labeled and unlabeled data 22 | #labeled_ratio = 0.10 23 | #L = int(data_shape[0]*labeled_ratio) 24 | #L = 12 25 | #U = 20 26 | #random_index = random.sample(range(120),L+U) 27 | print('labeled_num',L) 28 | random_index = train_index 29 | 30 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 31 | #print(data_labled) 32 | data_unlabled = data_samples.loc[random_index[-U:]] 33 | data_test = data_samples.loc[test_index] 34 | #print(data_unlabled) 35 | print('unlabled_num',U) 36 | #print(data_labled) 37 | #print(data_unlabled) 38 | 39 | data_labled_x = data_labled[data_columnslable_x] 40 | data_labled_y = data_labled[data_columnslable_y] 41 | data_unlabled_x = data_unlabled[data_columnslable_x] 42 | data_unlabled_y = data_unlabled[data_columnslable_y] 43 | data_test_x = data_test[data_columnslable_x] 44 | data_test_y = data_test[data_columnslable_y] 45 | ####Nomalize#### 46 | mean_labled_x = data_labled_x.mean() 47 | std_labled_x = data_labled_x.std() 48 | 49 | mean_labled_y = data_labled_y.mean() 50 | std_labled_y = data_labled_y.std() 51 | print("std_y",std_labled_y) 52 | 53 | X_train_labled = (data_labled_x-mean_labled_x)/std_labled_x 54 | print(X_train_labled) 55 | X_train_unlabled = (data_unlabled_x-mean_labled_x)/std_labled_x 56 | y_train = (data_labled_y-mean_labled_y)/std_labled_y 57 | X_test = (data_test_x-mean_labled_x)/std_labled_x 58 | y_test = (data_test_y-mean_labled_y)/std_labled_y 59 | ################# 60 | 61 | ####build Partial model#### 62 | Partial_feature_var2 = Sequential_Forward_Selection_corr_test(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 63 | Partial_feature = Partial_feature_var2[0] 64 | print(Partial_feature) 65 | 66 | 67 | ZL = data_labled_x[Partial_feature] 68 | ZU = data_unlabled_x[Partial_feature] 69 | Z_test = X_test[Partial_feature] 70 | #var2 = max(Partial_feature_var2[1],1e-3) 71 | var2 = Partial_feature_var2[1] 72 | print('var2 is ',var2) 73 | #var2 = max(1e-8,var2) 74 | #print('var2 is ',var2) 75 | ######################## 76 | cv = LeaveOneOut() 77 | 78 | #######this part should be checked##### 79 | lasso_model = sklearn.linear_model.LassoCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10]) 80 | Reg_lasso = lasso_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 81 | print('alpha of lasso is',Reg_lasso.alpha_) 82 | 83 | ####find the var1#### 84 | V = [0.5,1,2,5,10] 85 | print('current V',V) 86 | var1_candidate_set = [Upper_var1*i*var2 for i in V] ## 87 | l5_candidate = [10,100] 88 | #V = 5 89 | #var1_candidate_set = [math.pow(0.1,V)*0.05*var2 for i in range(V)] 90 | LOO_list = [] 91 | 92 | for i in range(len(V)): 93 | for m in range(len(l5_candidate)): 94 | tmp_var1 = var1_candidate_set[i] 95 | l1 = 1/2/tmp_var1 96 | l2 = 1/2/var2 97 | l3 = 0 98 | l4 = 1/2/(tmp_var1+var2) 99 | #l5 = 0 100 | l5 = l1*l5_candidate[m] 101 | error_list = [] 102 | for j in range(L): 103 | predict_x = data_labled_x.iloc[j] 104 | tmp_X = data_labled_x.drop(random_index[j]) 105 | tmp_ZL = ZL.drop(random_index[j]) 106 | predict_y = data_labled_y.iloc[j] 107 | tmp_y = data_labled_y.drop(random_index[j]) 108 | 109 | ##Normalize## 110 | mean_labled_x = tmp_X.mean() 111 | std_labled_x = tmp_X.std() 112 | mean_labled_y = tmp_y.mean() 113 | std_labled_y = tmp_y.std() 114 | mean_labled_z = tmp_ZL.mean() 115 | std_labled_z = tmp_ZL.std() 116 | 117 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 118 | y_train_tmp = (tmp_y-mean_labled_y)/std_labled_y 119 | X_train_unlabled_tmp = (data_unlabled_x-mean_labled_x)/std_labled_x 120 | 121 | tmp_ZL = (tmp_ZL-mean_labled_z)/std_labled_z 122 | tmp_ZU = (ZU-mean_labled_z)/std_labled_z 123 | 124 | 125 | predict_x = (predict_x-mean_labled_x)/std_labled_x 126 | 127 | 128 | alpha,beta = solve_loss(y_train_tmp.to_numpy().ravel(),X_train_tmp.to_numpy(),tmp_ZL.to_numpy(), 129 | X_train_unlabled_tmp.to_numpy(),tmp_ZU.to_numpy(),l1,l2,l3,l4,l5) 130 | 131 | real_predict_y = predict_y.to_numpy() 132 | alpha_y = np.matmul(alpha.T,predict_x.to_numpy()) 133 | real_alpha_y = (alpha_y*std_labled_y+mean_labled_y).to_numpy() 134 | 135 | tmp_error = (real_predict_y[0] - real_alpha_y[0]) 136 | tmp_error_square = tmp_error * tmp_error 137 | error_list.append(tmp_error_square) 138 | LOO_list.append(np.mean(error_list)) 139 | print(LOO_list) 140 | var1_index = np.argmin(np.array(LOO_list)) 141 | print(var1_index) 142 | l1_index = var1_index // len(l5_candidate) 143 | print(l1_index) 144 | l5_index = var1_index % len(l5_candidate) 145 | print(l5_index) 146 | ####################### 147 | print('var1_index',var1_index) 148 | ####get the optimal alpha and beta#### 149 | l1 = 1/2/var1_candidate_set[l1_index] 150 | l2 = 1/2/var2 151 | l3 = 0 152 | l4 = 1/2/(var1_candidate_set[l1_index]+var2) 153 | l5 = l1*l5_candidate[l5_index] 154 | 155 | ZL_all = X_train_labled[Partial_feature] 156 | ZU_all = X_train_unlabled[Partial_feature] 157 | 158 | 159 | 160 | alpha,beta = solve_loss(y_train.to_numpy().ravel(),X_train_labled.to_numpy(),ZL_all.to_numpy(), 161 | X_train_unlabled.to_numpy(),ZU_all.to_numpy(),l1,l2,l3,l4,l5) 162 | print('alpha is ',alpha) 163 | print('beta is',beta) 164 | Loss = (l1*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(X_train_labled.to_numpy(),alpha)),2)+ 165 | l2*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(ZL.to_numpy(),beta)),2)+ 166 | l3*math.pow(np.linalg.norm(np.matmul(X_train_labled.to_numpy(),alpha)-np.matmul(ZL.to_numpy(),beta)),2)+ 167 | l4*math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 168 | #check_loss(y_train,X_train_labled,ZL, 169 | # X_train_unlabled,ZU,l1,l2,l3,l4,alpha,beta,Loss) 170 | print('Loss',math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 171 | ##################################### 172 | 173 | 174 | ####start to test###### 175 | test_err_list = [] 176 | err_percent_list = [] 177 | for i in range(y_test.shape[0]): 178 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 179 | tmp_pre = np.matmul(alpha.T,X_test.iloc[i].to_numpy())*std_labled_y+mean_labled_y 180 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 181 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 182 | tmp_error_square = tmp_error * tmp_error 183 | test_err_list.append(tmp_error_square) 184 | err_percent_list.append(tmp_percent_err) 185 | 186 | print('PBCT_RMSE',np.sqrt(np.mean(test_err_list))) 187 | print('PBCT_ERR',np.mean(err_percent_list)) 188 | 189 | PBCT_RMSE = np.sqrt(np.mean(test_err_list)) 190 | PBCT_ERR = np.mean(err_percent_list) 191 | #print(LOO_list) 192 | test0_err_list = [] 193 | err0_percent_list = [] 194 | for i in range(y_test.shape[0]): 195 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 196 | tmp_pre = np.matmul(beta.T,Z_test.iloc[i].to_numpy())*std_labled_y+mean_labled_y 197 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 198 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 199 | tmp_error_square = tmp_error * tmp_error 200 | test0_err_list.append(tmp_error_square) 201 | err0_percent_list.append(tmp_percent_err) 202 | 203 | print('PBCT_beta_RMSE',np.sqrt(np.mean(test0_err_list))) 204 | print('PBCT_beta_ERR',np.mean(err0_percent_list)) 205 | PBCT_beta_RMSE = np.sqrt(np.mean(test0_err_list)) 206 | PBCT_beta_ERR = np.mean(err0_percent_list) 207 | 208 | ###LinearRegreesion normalized manually#### 209 | model = LinearRegression(fit_intercept=False) 210 | 211 | print('Z_train_shape',ZL_all.to_numpy().shape) 212 | Reg = model.fit(ZL_all.to_numpy(), y_train.to_numpy()) 213 | test1_err_list = [] 214 | err1_percent_list = [] 215 | for i in range(y_test.shape[0]): 216 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 217 | tmp_pre = Reg.predict([Z_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 218 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 219 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 220 | tmp_error_square = tmp_error * tmp_error 221 | test1_err_list.append(tmp_error_square) 222 | err1_percent_list.append(tmp_percent_err) 223 | print('LS_beta_RMSE',np.sqrt(np.mean(test1_err_list))) 224 | print('LS_beta_ERR',np.mean(err1_percent_list)) 225 | 226 | LS_beta_RMSE = np.sqrt(np.mean(test1_err_list)) 227 | LS_beta_ERR = np.mean(err1_percent_list) 228 | 229 | 230 | test2_err_list = [] 231 | err2_percent_list = [] 232 | for i in range(y_test.shape[0]): 233 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 234 | tmp_pre = Reg_lasso.predict([X_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 235 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 236 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 237 | tmp_error_square = tmp_error * tmp_error 238 | test2_err_list.append(tmp_error_square) 239 | err2_percent_list.append(tmp_percent_err) 240 | print('LS_lasso_RMSE',np.sqrt(np.mean(test2_err_list))) 241 | print('LS_lasso_ERR',np.mean(err2_percent_list)) 242 | LS_lasso_RMSE = np.sqrt(np.mean(test2_err_list)) 243 | LS_lasso_ERR = np.mean(err2_percent_list) 244 | l1_ratio_list = [0.05,0.2,0.4,0.6,0.8,0.95] 245 | elasticNet_model = sklearn.linear_model.ElasticNetCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10],l1_ratio = l1_ratio_list) 246 | Reg_elasticnet = elasticNet_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 247 | print('alpha and l1_ratio of elasticNet is',Reg_elasticnet.alpha_,Reg_elasticnet.l1_ratio_) 248 | 249 | test3_err_list = [] 250 | err3_percent_list = [] 251 | for i in range(y_test.shape[0]): 252 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 253 | tmp_pre = Reg_elasticnet.predict([X_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 254 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 255 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 256 | tmp_error_square = tmp_error * tmp_error 257 | test3_err_list.append(tmp_error_square) 258 | err3_percent_list.append(tmp_percent_err) 259 | print('LS_elasnet_RMSE',np.sqrt(np.mean(test3_err_list))) 260 | print('LS_elasnet_ERR',np.mean(err3_percent_list)) 261 | LS_elasnet_RMSE = np.sqrt(np.mean(test3_err_list)) 262 | LS_elasnet_ERR = np.mean(err3_percent_list) 263 | 264 | 265 | coef_list = [] 266 | coef_list.append(Reg.coef_[0]) 267 | coef_list.append(np.array([v for v in Reg_lasso.coef_])) 268 | coef_list.append(np.array([v for v in Reg_elasticnet.coef_])) 269 | coef_list.append(np.array([v for v in alpha.ravel()])) 270 | coef_list.append(np.array(beta.ravel())) 271 | tmp_df = pd.DataFrame(data = coef_list) 272 | tmp_df.to_csv(coef_file+str(L)+'_'+str(repeated_num)+'.csv',index = False, header = False) 273 | return PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE 274 | 275 | 276 | if __name__=="__main__": 277 | coef_file = './tmp_coef/' 278 | repeated_num = 450 279 | all_average = {} 280 | all_median = {} 281 | csv_name = '../Data/pri_20_feature' 282 | file_name = 'tmp_file' 283 | Upper_var1 = 1 284 | random_index = [random.sample(range(43),43) for i in range(repeated_num)] 285 | for sweep_num in range(7,16): 286 | tmp_dict ={} 287 | all_list = [] 288 | for i in range(repeated_num): 289 | print('the ',i+1,'-th result') 290 | train_index = random_index[i][:33] 291 | test_index = random_index[i][33:] 292 | train_num = len(train_index) 293 | PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE = PBCT_log(csv_name,sweep_num,train_num-sweep_num,Upper_var1,train_index,test_index,i,coef_file) 294 | tmp_dict[i] = [PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE] 295 | all_list.append(tmp_dict[i]) 296 | average_array = np.mean(np.array(all_list),axis = 0) 297 | median_array = np.median(np.array(all_list),axis = 0) 298 | tmp_dict['average'] = average_array 299 | tmp_dict['median'] = median_array 300 | all_average[sweep_num] = average_array 301 | all_median[sweep_num] = median_array 302 | df = pd.DataFrame(tmp_dict) 303 | df.to_csv('./'+file_name+'/'+csv_name+'_result_PBCT_'+str(sweep_num)+'_'+str(Upper_var1)+'_online.csv',index=False) 304 | avg_all = pd.DataFrame(all_average) 305 | median_all = pd.DataFrame(all_median) 306 | avg_all.to_csv('./'+file_name+'/online_avg_'+str(Upper_var1)+'.csv',index = False) 307 | median_all.to_csv('./'+file_name+'/online_median_'+str(Upper_var1)+'.csv',index = False) 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | -------------------------------------------------------------------------------- /PBCT/Unlabeled_Online/PBCT_online_unlabeled.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | import pandas as pd 3 | from SFS import Sequential_Forward_Selection_corr_test 4 | from sklearn.linear_model import LinearRegression 5 | from solve_loss import solve_loss 6 | #from solve_loss import solve_loss 7 | from sklearn.model_selection import LeaveOneOut,cross_val_score 8 | import numpy as np 9 | import math 10 | import sklearn 11 | import random 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def PBCT_log(csv_name,L,U,test_num,Upper_var1,random_index,repeated_num,coef_file): 16 | data_samples = pd.read_csv(csv_name+'.csv',index_col=FALSE) 17 | data_shape = data_samples.shape 18 | data_columnslable_x = data_samples.columns[:-2] 19 | data_columnslable_y = data_samples.columns[-1:] 20 | #print(data_columnslable_x) 21 | #print(data_columnslable_y) 22 | ##split the labeled and unlabeled data 23 | #labeled_ratio = 0.10 24 | #L = int(data_shape[0]*labeled_ratio) 25 | #L = 12 26 | #U = 20 27 | #random_index = random.sample(range(120),L+U) 28 | print('labeled_num',L) 29 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 30 | #print(data_labled) 31 | data_unlabled = data_samples.loc[random_index[L:L+U]] 32 | data_test = data_samples.loc[random_index[-test_num:]] 33 | #print(data_unlabled) 34 | print('unlabled_num',U) 35 | #print(data_labled) 36 | #print(data_unlabled) 37 | 38 | data_labled_x = data_labled[data_columnslable_x] 39 | data_labled_y = data_labled[data_columnslable_y] 40 | data_unlabled_x = data_unlabled[data_columnslable_x] 41 | data_unlabled_y = data_unlabled[data_columnslable_y] 42 | data_test_x = data_test[data_columnslable_x] 43 | data_test_y = data_test[data_columnslable_y] 44 | ####Nomalize#### 45 | mean_labled_x = data_labled_x.mean() 46 | std_labled_x = data_labled_x.std() 47 | 48 | mean_labled_y = data_labled_y.mean() 49 | std_labled_y = data_labled_y.std() 50 | #print("std_y",std_labled_y) 51 | 52 | X_train_labled = (data_labled_x-mean_labled_x)/std_labled_x 53 | #print(X_train_labled) 54 | X_train_unlabled = (data_unlabled_x-mean_labled_x)/std_labled_x 55 | 56 | X_test= (data_test_x-mean_labled_x)/std_labled_x 57 | y_train = (data_labled_y-mean_labled_y)/std_labled_y 58 | y_test = (data_test_y-mean_labled_y)/std_labled_y 59 | ################# 60 | 61 | ####build Partial model#### 62 | Partial_feature_var2 = Sequential_Forward_Selection_corr_test(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 63 | Partial_feature = Partial_feature_var2[0] 64 | print('var features for'+str(L)+' '+'repeated_num '+str(repeated_num)+'is', Partial_feature) 65 | 66 | 67 | ZL = data_labled_x[Partial_feature] 68 | ZU = data_unlabled_x[Partial_feature] 69 | var2 = Partial_feature_var2[1] 70 | print('var2 is ',var2) 71 | #var2 = max(1e-8,var2) 72 | #print('var2 is ',var2) 73 | ######################## 74 | cv = LeaveOneOut() 75 | 76 | #######this part should be checked##### 77 | lasso_model = sklearn.linear_model.LassoCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10]) 78 | Reg_lasso = lasso_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 79 | print('alpha of lasso is',Reg_lasso.alpha_) 80 | 81 | ####find the var1#### 82 | V = [0.5,1,2,5,10] 83 | print('current V',V) 84 | var1_candidate_set = [Upper_var1*i*var2 for i in V] ## 85 | l5_candidate = [10,100] 86 | #V = 5 87 | #var1_candidate_set = [math.pow(0.1,V)*0.05*var2 for i in range(V)] 88 | LOO_list = [] 89 | 90 | for i in range(len(V)): 91 | for m in range(len(l5_candidate)): 92 | tmp_var1 = var1_candidate_set[i] 93 | l1 = 1/2/tmp_var1 94 | l2 = 1/2/var2 95 | l3 = 0 96 | l4 = 1/2/(tmp_var1+var2) 97 | #l5 = 0 98 | l5 = l1*l5_candidate[m] 99 | error_list = [] 100 | for j in range(L): 101 | predict_x = data_labled_x.iloc[j] 102 | tmp_X = data_labled_x.drop(random_index[j]) 103 | tmp_ZL = ZL.drop(random_index[j]) 104 | predict_y = data_labled_y.iloc[j] 105 | tmp_y = data_labled_y.drop(random_index[j]) 106 | 107 | ##Normalize## 108 | mean_labled_x = tmp_X.mean() 109 | std_labled_x = tmp_X.std() 110 | mean_labled_y = tmp_y.mean() 111 | std_labled_y = tmp_y.std() 112 | mean_labled_z = tmp_ZL.mean() 113 | std_labled_z = tmp_ZL.std() 114 | 115 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 116 | y_train_tmp = (tmp_y-mean_labled_y)/std_labled_y 117 | X_train_unlabled_tmp = (data_unlabled_x-mean_labled_x)/std_labled_x 118 | 119 | tmp_ZL = (tmp_ZL-mean_labled_z)/std_labled_z 120 | tmp_ZU = (ZU-mean_labled_z)/std_labled_z 121 | 122 | 123 | predict_x = (predict_x-mean_labled_x)/std_labled_x 124 | 125 | 126 | alpha,beta = solve_loss(y_train_tmp.to_numpy().ravel(),X_train_tmp.to_numpy(),tmp_ZL.to_numpy(), 127 | X_train_unlabled_tmp.to_numpy(),tmp_ZU.to_numpy(),l1,l2,l3,l4,l5) 128 | 129 | real_predict_y = predict_y.to_numpy() 130 | alpha_y = np.matmul(alpha.T,predict_x.to_numpy()) 131 | real_alpha_y = (alpha_y*std_labled_y+mean_labled_y).to_numpy() 132 | #print('real_alpha_y',real_alpha_y[0]) 133 | 134 | tmp_error = (real_predict_y[0] - real_alpha_y[0]) 135 | tmp_error_square = tmp_error * tmp_error 136 | error_list.append(tmp_error_square) 137 | LOO_list.append(np.mean(error_list)) 138 | #print(LOO_list) 139 | var1_index = np.argmin(np.array(LOO_list)) 140 | print(var1_index) 141 | l1_index = var1_index // len(l5_candidate) 142 | print(l1_index) 143 | l5_index = var1_index % len(l5_candidate) 144 | print(l5_index) 145 | ####################### 146 | print('var1_index',var1_index) 147 | ####get the optimal alpha and beta#### 148 | l1 = 1/2/var1_candidate_set[l1_index] 149 | l2 = 1/2/var2 150 | l3 = 0 151 | l4 = 1/2/(var1_candidate_set[l1_index]+var2) 152 | l5 = l1*l5_candidate[l5_index] 153 | 154 | ZL_all = X_train_labled[Partial_feature] 155 | ZU_all = X_train_unlabled[Partial_feature] 156 | 157 | 158 | 159 | alpha,beta = solve_loss(y_train.to_numpy().ravel(),X_train_labled.to_numpy(),ZL_all.to_numpy(), 160 | X_train_unlabled.to_numpy(),ZU_all.to_numpy(),l1,l2,l3,l4,l5) 161 | print('alpha is ',alpha) 162 | print('beta is',beta) 163 | Loss = (l1*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(X_train_labled.to_numpy(),alpha)),2)+ 164 | l2*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(ZL.to_numpy(),beta)),2)+ 165 | l3*math.pow(np.linalg.norm(np.matmul(X_train_labled.to_numpy(),alpha)-np.matmul(ZL.to_numpy(),beta)),2)+ 166 | l4*math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 167 | #check_loss(y_train,X_train_labled,ZL, 168 | # X_train_unlabled,ZU,l1,l2,l3,l4,alpha,beta,Loss) 169 | #print('Loss',math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 170 | ##################################### 171 | z_test_all = X_test[Partial_feature] 172 | 173 | ####start to test###### 174 | test_err_list = [] 175 | err_percent_list = [] 176 | for i in range(y_test.shape[0]): 177 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 178 | tmp_pre = np.matmul(alpha.T,X_test.iloc[i].to_numpy())*std_labled_y+mean_labled_y 179 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 180 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 181 | tmp_error_square = tmp_error * tmp_error 182 | test_err_list.append(tmp_error_square) 183 | err_percent_list.append(tmp_percent_err) 184 | 185 | print('PBCT_RMSE',np.sqrt(np.mean(test_err_list))) 186 | print('PBCT_ERR',np.mean(err_percent_list)) 187 | 188 | PBCT_RMSE = np.sqrt(np.mean(test_err_list)) 189 | PBCT_ERR = np.mean(err_percent_list) 190 | #print(LOO_list) 191 | test0_err_list = [] 192 | err0_percent_list = [] 193 | for i in range(y_test.shape[0]): 194 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 195 | tmp_pre = np.matmul(beta.T,z_test_all.iloc[i].to_numpy())*std_labled_y+mean_labled_y 196 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 197 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 198 | tmp_error_square = tmp_error * tmp_error 199 | test0_err_list.append(tmp_error_square) 200 | err0_percent_list.append(tmp_percent_err) 201 | 202 | print('PBCT_beta_RMSE',np.sqrt(np.mean(test0_err_list))) 203 | print('PBCT_beta_ERR',np.mean(err0_percent_list)) 204 | PBCT_beta_RMSE = np.sqrt(np.mean(test0_err_list)) 205 | PBCT_beta_ERR = np.mean(err0_percent_list) 206 | 207 | ###LinearRegreesion normalized manually#### 208 | model = LinearRegression(fit_intercept=False) 209 | 210 | #print('Z_train_shape',ZL_all.to_numpy().shape) 211 | Reg = model.fit(ZL_all.to_numpy(), y_train.to_numpy()) 212 | test1_err_list = [] 213 | err1_percent_list = [] 214 | for i in range(y_test.shape[0]): 215 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 216 | tmp_pre = Reg.predict([z_test_all.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 217 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 218 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 219 | tmp_error_square = tmp_error * tmp_error 220 | test1_err_list.append(tmp_error_square) 221 | err1_percent_list.append(tmp_percent_err) 222 | print('LS_beta_RMSE',np.sqrt(np.mean(test1_err_list))) 223 | print('LS_beta_ERR',np.mean(err1_percent_list)) 224 | 225 | LS_beta_RMSE = np.sqrt(np.mean(test1_err_list)) 226 | LS_beta_ERR = np.mean(err1_percent_list) 227 | 228 | 229 | 230 | 231 | 232 | #lasso_model = sklearn.linear_model.LassoCV(fit_intercept=False,cv=cv,tol=1e-2) 233 | 234 | #Reg_lasso = lasso_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 235 | #print('alpha of lasso is',Reg_lasso.alpha_) 236 | test2_err_list = [] 237 | err2_percent_list = [] 238 | for i in range(y_test.shape[0]): 239 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 240 | tmp_pre = Reg_lasso.predict([X_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 241 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 242 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 243 | tmp_error_square = tmp_error * tmp_error 244 | test2_err_list.append(tmp_error_square) 245 | err2_percent_list.append(tmp_percent_err) 246 | print('LS_lasso_RMSE',np.sqrt(np.mean(test2_err_list))) 247 | print('LS_lasso_ERR',np.mean(err2_percent_list)) 248 | LS_lasso_RMSE = np.sqrt(np.mean(test2_err_list)) 249 | LS_lasso_ERR = np.mean(err2_percent_list) 250 | l1_ratio_list =[0.05,0.2,0.4,0.6,0.8,0.95] 251 | elasticNet_model = sklearn.linear_model.ElasticNetCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10],l1_ratio = l1_ratio_list) 252 | Reg_elasticnet = elasticNet_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 253 | print('alpha and l1_ratio of elasticNet is',Reg_elasticnet.alpha_,Reg_elasticnet.l1_ratio_) 254 | 255 | test3_err_list = [] 256 | err3_percent_list = [] 257 | for i in range(y_test.shape[0]): 258 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 259 | tmp_pre = Reg_elasticnet.predict([X_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 260 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 261 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 262 | tmp_error_square = tmp_error * tmp_error 263 | test3_err_list.append(tmp_error_square) 264 | err3_percent_list.append(tmp_percent_err) 265 | print('LS_elasnet_RMSE',np.sqrt(np.mean(test3_err_list))) 266 | print('LS_elasnet_ERR',np.mean(err3_percent_list)) 267 | LS_elasnet_RMSE = np.sqrt(np.mean(test3_err_list)) 268 | LS_elasnet_ERR = np.mean(err3_percent_list) 269 | 270 | 271 | coef_list = [] 272 | coef_list.append(Reg.coef_[0]) 273 | coef_list.append(np.array([v for v in Reg_lasso.coef_])) 274 | coef_list.append(np.array([v for v in Reg_elasticnet.coef_])) 275 | coef_list.append(np.array([v for v in alpha.ravel()])) 276 | coef_list.append(np.array(beta.ravel())) 277 | tmp_df = pd.DataFrame(data = coef_list) 278 | tmp_df.to_csv(coef_file+str(U)+'_'+str(repeated_num)+'.csv',index = False, header = False) 279 | return PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE 280 | 281 | if __name__=="__main__": 282 | repeated_num = 100 283 | coef_file = './tmp_coef/' 284 | all_average = {} 285 | all_median = {} 286 | csv_name = '../Data/pri_20_feature' 287 | file_name = 'tmp_file' 288 | test_num = 10 289 | Upper_var1 = 1 290 | random_index = [random.sample(range(43),43) for i in range(repeated_num)] 291 | sweep_num = 10 292 | print(random_index) 293 | for unlabeled_num in [0,5,10,15,20]: 294 | tmp_dict ={} 295 | all_list = [] 296 | for i in range(repeated_num): 297 | print('the ',i+1,'-th result') 298 | PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE = PBCT_log(csv_name,sweep_num,unlabeled_num,test_num,Upper_var1,random_index[i],i,coef_file) 299 | tmp_dict[i] = [PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE] 300 | all_list.append(tmp_dict[i]) 301 | average_array = np.mean(np.array(all_list),axis = 0) 302 | median_array = np.median(np.array(all_list),axis = 0) 303 | tmp_dict['average'] = average_array 304 | tmp_dict['median'] = median_array 305 | all_average[unlabeled_num] = average_array 306 | all_median[unlabeled_num] = median_array 307 | df = pd.DataFrame(tmp_dict) 308 | df.to_csv('./'+file_name+'/'+csv_name+'_result_PBCT_'+str(sweep_num)+'_'+str(unlabeled_num)+'_offline.csv',index=False) 309 | avg_all = pd.DataFrame(all_average) 310 | median_all = pd.DataFrame(all_median) 311 | avg_all.to_csv('./'+file_name+'/offline_avg_'+str(Upper_var1)+'.csv',index = False) 312 | median_all.to_csv('./'+file_name+'/offline_median_'+str(Upper_var1)+'.csv',index = False) 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | -------------------------------------------------------------------------------- /PBCT/Unlabeled_Offline/PBCT_offline_unlabeled.py: -------------------------------------------------------------------------------- 1 | from tkinter import FALSE 2 | import pandas as pd 3 | from SFS import Sequential_Forward_Selection_corr_test 4 | from sklearn.linear_model import LinearRegression 5 | from solve_loss import solve_loss 6 | #from solve_loss import solve_loss 7 | from sklearn.model_selection import LeaveOneOut,cross_val_score 8 | import numpy as np 9 | import math 10 | import sklearn 11 | import random 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def PBCT_log(csv_name,L,U,test_num,Upper_var1,random_index,repeated_num,coef_file): 16 | data_samples = pd.read_csv(csv_name+'.csv',index_col=FALSE) 17 | data_shape = data_samples.shape 18 | data_columnslable_x = data_samples.columns[:-2] 19 | data_columnslable_y = data_samples.columns[-1:] 20 | #print(data_columnslable_x) 21 | #print(data_columnslable_y) 22 | ##split the labeled and unlabeled data 23 | #labeled_ratio = 0.10 24 | #L = int(data_shape[0]*labeled_ratio) 25 | #L = 12 26 | #U = 20 27 | #random_index = random.sample(range(120),L+U) 28 | print('labeled_num',L) 29 | data_labled = data_samples.loc[random_index[:L]] ##check why loc is diff from normal slice checked 30 | #print(data_labled) 31 | data_unlabled = data_samples.loc[random_index[-test_num:-test_num+U]] 32 | data_test = data_samples.loc[random_index[-test_num:]] 33 | #print(data_unlabled) 34 | print('unlabled_num',U) 35 | #print(data_labled) 36 | #print(data_unlabled) 37 | 38 | data_labled_x = data_labled[data_columnslable_x] 39 | data_labled_y = data_labled[data_columnslable_y] 40 | data_unlabled_x = data_unlabled[data_columnslable_x] 41 | data_unlabled_y = data_unlabled[data_columnslable_y] 42 | data_test_x = data_test[data_columnslable_x] 43 | data_test_y = data_test[data_columnslable_y] 44 | ####Nomalize#### 45 | mean_labled_x = data_labled_x.mean() 46 | std_labled_x = data_labled_x.std() 47 | 48 | mean_labled_y = data_labled_y.mean() 49 | std_labled_y = data_labled_y.std() 50 | #print("std_y",std_labled_y) 51 | 52 | X_train_labled = (data_labled_x-mean_labled_x)/std_labled_x 53 | #print(X_train_labled) 54 | X_train_unlabled = (data_unlabled_x-mean_labled_x)/std_labled_x 55 | 56 | X_test= (data_test_x-mean_labled_x)/std_labled_x 57 | y_train = (data_labled_y-mean_labled_y)/std_labled_y 58 | y_test = (data_test_y-mean_labled_y)/std_labled_y 59 | ################# 60 | 61 | ####build Partial model#### 62 | Partial_feature_var2 = Sequential_Forward_Selection_corr_test(data_columnslable_x,data_labled_x,data_labled_y,random_index,L,L-2) 63 | Partial_feature = Partial_feature_var2[0] 64 | print('var features for'+str(L)+' '+'repeated_num '+str(repeated_num)+'is', Partial_feature) 65 | 66 | 67 | ZL = data_labled_x[Partial_feature] 68 | ZU = data_unlabled_x[Partial_feature] 69 | var2 = Partial_feature_var2[1] 70 | print('var2 is ',var2) 71 | #var2 = max(1e-8,var2) 72 | #print('var2 is ',var2) 73 | ######################## 74 | cv = LeaveOneOut() 75 | 76 | #######this part should be checked##### 77 | lasso_model = sklearn.linear_model.LassoCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10]) 78 | Reg_lasso = lasso_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 79 | print('alpha of lasso is',Reg_lasso.alpha_) 80 | 81 | ####find the var1#### 82 | V = [0.5,1,2,5,10] 83 | print('current V',V) 84 | var1_candidate_set = [Upper_var1*i*var2 for i in V] ## 85 | l5_candidate = [10,100] 86 | #V = 5 87 | #var1_candidate_set = [math.pow(0.1,V)*0.05*var2 for i in range(V)] 88 | LOO_list = [] 89 | 90 | for i in range(len(V)): 91 | for m in range(len(l5_candidate)): 92 | tmp_var1 = var1_candidate_set[i] 93 | l1 = 1/2/tmp_var1 94 | l2 = 1/2/var2 95 | l3 = 0 96 | l4 = 1/2/(tmp_var1+var2) 97 | #l5 = 0 98 | l5 = l1*l5_candidate[m] 99 | error_list = [] 100 | for j in range(L): 101 | predict_x = data_labled_x.iloc[j] 102 | tmp_X = data_labled_x.drop(random_index[j]) 103 | tmp_ZL = ZL.drop(random_index[j]) 104 | predict_y = data_labled_y.iloc[j] 105 | tmp_y = data_labled_y.drop(random_index[j]) 106 | 107 | ##Normalize## 108 | mean_labled_x = tmp_X.mean() 109 | std_labled_x = tmp_X.std() 110 | mean_labled_y = tmp_y.mean() 111 | std_labled_y = tmp_y.std() 112 | mean_labled_z = tmp_ZL.mean() 113 | std_labled_z = tmp_ZL.std() 114 | 115 | X_train_tmp = (tmp_X - mean_labled_x)/std_labled_x 116 | y_train_tmp = (tmp_y-mean_labled_y)/std_labled_y 117 | X_train_unlabled_tmp = (data_unlabled_x-mean_labled_x)/std_labled_x 118 | 119 | tmp_ZL = (tmp_ZL-mean_labled_z)/std_labled_z 120 | tmp_ZU = (ZU-mean_labled_z)/std_labled_z 121 | 122 | 123 | predict_x = (predict_x-mean_labled_x)/std_labled_x 124 | 125 | 126 | alpha,beta = solve_loss(y_train_tmp.to_numpy().ravel(),X_train_tmp.to_numpy(),tmp_ZL.to_numpy(), 127 | X_train_unlabled_tmp.to_numpy(),tmp_ZU.to_numpy(),l1,l2,l3,l4,l5) 128 | 129 | real_predict_y = predict_y.to_numpy() 130 | alpha_y = np.matmul(alpha.T,predict_x.to_numpy()) 131 | real_alpha_y = (alpha_y*std_labled_y+mean_labled_y).to_numpy() 132 | #print('real_alpha_y',real_alpha_y[0]) 133 | 134 | tmp_error = (real_predict_y[0] - real_alpha_y[0]) 135 | tmp_error_square = tmp_error * tmp_error 136 | error_list.append(tmp_error_square) 137 | LOO_list.append(np.mean(error_list)) 138 | #print(LOO_list) 139 | var1_index = np.argmin(np.array(LOO_list)) 140 | print(var1_index) 141 | l1_index = var1_index // len(l5_candidate) 142 | print(l1_index) 143 | l5_index = var1_index % len(l5_candidate) 144 | print(l5_index) 145 | ####################### 146 | print('var1_index',var1_index) 147 | ####get the optimal alpha and beta#### 148 | l1 = 1/2/var1_candidate_set[l1_index] 149 | l2 = 1/2/var2 150 | l3 = 0 151 | l4 = 1/2/(var1_candidate_set[l1_index]+var2) 152 | l5 = l1*l5_candidate[l5_index] 153 | 154 | ZL_all = X_train_labled[Partial_feature] 155 | ZU_all = X_train_unlabled[Partial_feature] 156 | 157 | 158 | 159 | alpha,beta = solve_loss(y_train.to_numpy().ravel(),X_train_labled.to_numpy(),ZL_all.to_numpy(), 160 | X_train_unlabled.to_numpy(),ZU_all.to_numpy(),l1,l2,l3,l4,l5) 161 | print('alpha is ',alpha) 162 | print('beta is',beta) 163 | Loss = (l1*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(X_train_labled.to_numpy(),alpha)),2)+ 164 | l2*math.pow(np.linalg.norm(y_train.to_numpy()-np.matmul(ZL.to_numpy(),beta)),2)+ 165 | l3*math.pow(np.linalg.norm(np.matmul(X_train_labled.to_numpy(),alpha)-np.matmul(ZL.to_numpy(),beta)),2)+ 166 | l4*math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 167 | #check_loss(y_train,X_train_labled,ZL, 168 | # X_train_unlabled,ZU,l1,l2,l3,l4,alpha,beta,Loss) 169 | #print('Loss',math.pow(np.linalg.norm(np.matmul(X_train_unlabled.to_numpy(),alpha)-np.matmul(ZU.to_numpy(),beta)),2)) 170 | ##################################### 171 | z_test_all = X_test[Partial_feature] 172 | 173 | ####start to test###### 174 | test_err_list = [] 175 | err_percent_list = [] 176 | for i in range(y_test.shape[0]): 177 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 178 | tmp_pre = np.matmul(alpha.T,X_test.iloc[i].to_numpy())*std_labled_y+mean_labled_y 179 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 180 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 181 | tmp_error_square = tmp_error * tmp_error 182 | test_err_list.append(tmp_error_square) 183 | err_percent_list.append(tmp_percent_err) 184 | 185 | print('PBCT_RMSE',np.sqrt(np.mean(test_err_list))) 186 | print('PBCT_ERR',np.mean(err_percent_list)) 187 | 188 | PBCT_RMSE = np.sqrt(np.mean(test_err_list)) 189 | PBCT_ERR = np.mean(err_percent_list) 190 | #print(LOO_list) 191 | test0_err_list = [] 192 | err0_percent_list = [] 193 | for i in range(y_test.shape[0]): 194 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 195 | tmp_pre = np.matmul(beta.T,z_test_all.iloc[i].to_numpy())*std_labled_y+mean_labled_y 196 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 197 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 198 | tmp_error_square = tmp_error * tmp_error 199 | test0_err_list.append(tmp_error_square) 200 | err0_percent_list.append(tmp_percent_err) 201 | 202 | print('PBCT_beta_RMSE',np.sqrt(np.mean(test0_err_list))) 203 | print('PBCT_beta_ERR',np.mean(err0_percent_list)) 204 | PBCT_beta_RMSE = np.sqrt(np.mean(test0_err_list)) 205 | PBCT_beta_ERR = np.mean(err0_percent_list) 206 | 207 | ###LinearRegreesion normalized manually#### 208 | model = LinearRegression(fit_intercept=False) 209 | 210 | #print('Z_train_shape',ZL_all.to_numpy().shape) 211 | Reg = model.fit(ZL_all.to_numpy(), y_train.to_numpy()) 212 | test1_err_list = [] 213 | err1_percent_list = [] 214 | for i in range(y_test.shape[0]): 215 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 216 | tmp_pre = Reg.predict([z_test_all.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 217 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 218 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 219 | tmp_error_square = tmp_error * tmp_error 220 | test1_err_list.append(tmp_error_square) 221 | err1_percent_list.append(tmp_percent_err) 222 | print('LS_beta_RMSE',np.sqrt(np.mean(test1_err_list))) 223 | print('LS_beta_ERR',np.mean(err1_percent_list)) 224 | 225 | LS_beta_RMSE = np.sqrt(np.mean(test1_err_list)) 226 | LS_beta_ERR = np.mean(err1_percent_list) 227 | 228 | 229 | 230 | 231 | 232 | #lasso_model = sklearn.linear_model.LassoCV(fit_intercept=False,cv=cv,tol=1e-2) 233 | 234 | #Reg_lasso = lasso_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 235 | #print('alpha of lasso is',Reg_lasso.alpha_) 236 | test2_err_list = [] 237 | err2_percent_list = [] 238 | for i in range(y_test.shape[0]): 239 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 240 | tmp_pre = Reg_lasso.predict([X_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 241 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 242 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 243 | tmp_error_square = tmp_error * tmp_error 244 | test2_err_list.append(tmp_error_square) 245 | err2_percent_list.append(tmp_percent_err) 246 | print('LS_lasso_RMSE',np.sqrt(np.mean(test2_err_list))) 247 | print('LS_lasso_ERR',np.mean(err2_percent_list)) 248 | LS_lasso_RMSE = np.sqrt(np.mean(test2_err_list)) 249 | LS_lasso_ERR = np.mean(err2_percent_list) 250 | l1_ratio_list =[0.05,0.2,0.4,0.6,0.8,0.95] 251 | elasticNet_model = sklearn.linear_model.ElasticNetCV(fit_intercept=False,cv=cv,alphas=[0.01,0.1,1,10],l1_ratio = l1_ratio_list) 252 | Reg_elasticnet = elasticNet_model.fit(X_train_labled.to_numpy(), y_train.to_numpy().ravel()) 253 | print('alpha and l1_ratio of elasticNet is',Reg_elasticnet.alpha_,Reg_elasticnet.l1_ratio_) 254 | 255 | test3_err_list = [] 256 | err3_percent_list = [] 257 | for i in range(y_test.shape[0]): 258 | tmp_y = y_test.iloc[i].to_numpy()*std_labled_y+mean_labled_y 259 | tmp_pre = Reg_elasticnet.predict([X_test.iloc[i].to_numpy()])[0]*std_labled_y+mean_labled_y 260 | tmp_error = (tmp_y.to_numpy()[0] - tmp_pre.to_numpy()[0]) 261 | tmp_percent_err =abs(tmp_error/tmp_y.to_numpy()[0])*100 262 | tmp_error_square = tmp_error * tmp_error 263 | test3_err_list.append(tmp_error_square) 264 | err3_percent_list.append(tmp_percent_err) 265 | print('LS_elasnet_RMSE',np.sqrt(np.mean(test3_err_list))) 266 | print('LS_elasnet_ERR',np.mean(err3_percent_list)) 267 | LS_elasnet_RMSE = np.sqrt(np.mean(test3_err_list)) 268 | LS_elasnet_ERR = np.mean(err3_percent_list) 269 | 270 | 271 | coef_list = [] 272 | coef_list.append(Reg.coef_[0]) 273 | coef_list.append(np.array([v for v in Reg_lasso.coef_])) 274 | coef_list.append(np.array([v for v in Reg_elasticnet.coef_])) 275 | coef_list.append(np.array([v for v in alpha.ravel()])) 276 | coef_list.append(np.array(beta.ravel())) 277 | tmp_df = pd.DataFrame(data = coef_list) 278 | tmp_df.to_csv(coef_file+str(U)+'_'+str(repeated_num)+'.csv',index = False, header = False) 279 | return PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE 280 | 281 | if __name__=="__main__": 282 | repeated_num = 100 283 | coef_file = './tmp_coef/' 284 | all_average = {} 285 | all_median = {} 286 | csv_name = '../Data/pri_20_feature' 287 | file_name = 'tmp_file' 288 | test_num = 25 289 | Upper_var1 = 1 290 | random_index = [random.sample(range(43),43) for i in range(repeated_num)] 291 | np.save('pri_r1_10.npy',random_index) 292 | sweep_num = 10 293 | print(random_index) 294 | for unlabeled_num in [0,5,10,15,20,25]: 295 | tmp_dict ={} 296 | all_list = [] 297 | for i in range(repeated_num): 298 | print('the ',i+1,'-th result') 299 | PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE = PBCT_log(csv_name,sweep_num,unlabeled_num,test_num,Upper_var1,random_index[i],i,coef_file) 300 | tmp_dict[i] = [PBCT_RMSE, PBCT_beta_RMSE,LS_beta_RMSE,LS_lasso_RMSE,LS_elasnet_RMSE] 301 | all_list.append(tmp_dict[i]) 302 | average_array = np.mean(np.array(all_list),axis = 0) 303 | median_array = np.median(np.array(all_list),axis = 0) 304 | tmp_dict['average'] = average_array 305 | tmp_dict['median'] = median_array 306 | all_average[unlabeled_num] = average_array 307 | all_median[unlabeled_num] = median_array 308 | df = pd.DataFrame(tmp_dict) 309 | df.to_csv('./'+file_name+'/'+csv_name+'_result_PBCT_'+str(sweep_num)+'_'+str(unlabeled_num)+'_offline.csv',index=False) 310 | avg_all = pd.DataFrame(all_average) 311 | median_all = pd.DataFrame(all_median) 312 | avg_all.to_csv('./'+file_name+'/offline_avg_'+str(Upper_var1)+'.csv',index = False) 313 | median_all.to_csv('./'+file_name+'/offline_median_'+str(Upper_var1)+'.csv',index = False) 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | --------------------------------------------------------------------------------