├── 811replication.zip ├── LICENSE ├── README.md ├── datasets_exp1 ├── 3CPU.zip ├── 3MEM.zip ├── 5CPU.zip ├── 5MEM.zip ├── 7CPU.zip ├── 7MEM.z01 └── 7MEM.zip ├── datasets_exp2_3 ├── split6_2_2.zip ├── split8_1_1.zip └── split9_05_05.zip ├── models ├── CNN.py ├── DAG_Transformer.py ├── DAG_Transformer_Encoder_Layer.py ├── GCN.py ├── LSTM.py └── Vanilla_Transformer.py ├── preprocess.py ├── run_exp1.py ├── run_exp2.py ├── run_exp3.py ├── scheduler.py ├── select_model.py ├── train_model_dag.py ├── train_model_gnn.py └── train_model_vanilla.py /811replication.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/811replication.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # workflow-performance-prediction-jii 2 | This source repository is dedicated for the following published journal paper: 3 | #### Jixiang Yu, Ming Gao*, Yuchan Li, Zehui Zhang, WAI HUNG IP, KAI LEUNG Yung, Workflow performance prediction based on graph structure aware deep attention neural network, Journal of Industrial Information Integration, 2022, https://doi.org/10.1016/j.jii.2022.100337. (https://www.sciencedirect.com/science/article/pii/S2452414X22000097) 4 | If you are interested in this research and use this code, please kindly reference our paper or contact the corresponding author. 5 | 6 | This dataset is extracted and aggregated based on cluster-trace-v2018 (https://github.com/alibaba/clusterdata) 7 | 8 | 9 | ### Highlights 10 | #### In workflow performance prediction, DAG structure matters; 11 | #### DAG-Transformer effectively embeds the DAG information and outperforms mainstream ML, DL and GCN methods; 12 | #### A new dataset for cloud workflow performance prediction is accompanied as well as the source code. 13 | 14 | # Usage: 15 | ## 1 To reproduce experiment 1 16 | python run_exp1 --pred_task=3/5/7 #(default=7) --pred_tgt=CPU/MEM #(default=CPU) --pred_mode=PRIOR_1/PRIOR_ALL #(default=PRIOR_ALL) --use_DAG=T/F #(default=T) 17 | ## 2 To reproduce experiment 2 18 | python run_exp2 --model_name=CNN/LSTM/VanillaTransformer/DAGTransformer --split=split9_05_05/split8_1_1/split6_2_2 #(default=split6_2_2) 19 | ## 3 To reproduce experiment 3 20 | python run_exp2 --model_name=CNN/LSTM/VanillaTransformer/DAGTransformer/GCN --split=split9_05_05/split8_1_1/split6_2_2 #(default=split6_2_2) --GCN_mode=bidirect/unidirect #(default=bidirect) 21 | 22 | # Datasets(You need to extract the *.zip files first): 23 | ## 1 In directory datasets_exp1/ 24 | There are 6 different sub-datasets, whose name indicates pred_task+pred_tgt. In each sub-dataset, for example, in 3CPU/, there are 3 DAG information files(train, val, and test) and 2 performance datasets(train, val, and test) using different pred_mode(i.e., PRIOR_1 and PRIOR_ALL). 25 | 26 | ## 2 In directory datasets_exp2_3/ 27 | There are 3 different splits. In each split, there are 3 DAG information files(train, val, and test) and their corresponding performance data(train, val, and test). 28 | 29 | # Requirements: 30 | CUDA==11.0 31 | python==3.8 32 | pytorch==1.7.0 33 | PyG==corresponding version of pytorch-1.7.0 and CUDA-11.0 34 | -------------------------------------------------------------------------------- /datasets_exp1/3CPU.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/3CPU.zip -------------------------------------------------------------------------------- /datasets_exp1/3MEM.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/3MEM.zip -------------------------------------------------------------------------------- /datasets_exp1/5CPU.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/5CPU.zip -------------------------------------------------------------------------------- /datasets_exp1/5MEM.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/5MEM.zip -------------------------------------------------------------------------------- /datasets_exp1/7CPU.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/7CPU.zip -------------------------------------------------------------------------------- /datasets_exp1/7MEM.z01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/7MEM.z01 -------------------------------------------------------------------------------- /datasets_exp1/7MEM.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/7MEM.zip -------------------------------------------------------------------------------- /datasets_exp2_3/split6_2_2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp2_3/split6_2_2.zip -------------------------------------------------------------------------------- /datasets_exp2_3/split8_1_1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp2_3/split8_1_1.zip -------------------------------------------------------------------------------- /datasets_exp2_3/split9_05_05.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp2_3/split9_05_05.zip -------------------------------------------------------------------------------- /models/CNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | class CNNModel(nn.Module): 5 | def __init__(self, config): 6 | super(CNNModel, self).__init__() 7 | self.conv=nn.Conv1d(in_channels=config.n_feat,out_channels=config.outdim,kernel_size=1) 8 | self.maxpool=nn.MaxPool1d(config.pooldim) 9 | self.fc=nn.Linear((config.outdim//config.pooldim)*config.num_task,config.num_classes) 10 | self.dropout=nn.Dropout(config.dropout) 11 | def forward(self,x): 12 | out=x.permute(0,2,1) 13 | out=F.relu(self.conv(out)) 14 | out=self.dropout(out) 15 | out=out.permute(0,2,1) 16 | out=self.maxpool(out) 17 | out=out.reshape(out.size(0),-1) 18 | out=self.fc(out) 19 | return out -------------------------------------------------------------------------------- /models/DAG_Transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import copy 4 | import torch.nn.functional as F 5 | from .DAG_Transformer_Encoder_Layer import DAGTransformerEncoderLayer as encoder_layer 6 | 7 | 8 | class DAGTransformerEncoder(nn.Module): 9 | def __init__(self, d_k, num_head, hidden_dim, dropout, num_layer): 10 | super(DAGTransformerEncoder, self).__init__() 11 | self.encoder_layer=encoder_layer(d_k, num_head, hidden_dim, dropout) 12 | self.encoder=nn.ModuleList([copy.deepcopy(self.encoder_layer) for _ in range(num_layer)]) 13 | def forward(self, out, attn_mask=None): 14 | for _ in self.encoder: 15 | out = _(out, src_mask=attn_mask) 16 | return out 17 | 18 | 19 | 20 | class resnet_layer(nn.Module): 21 | def __init__(self,n_feat,d_k,kernel_size,dropout): 22 | super(resnet_layer,self).__init__() 23 | self.conv1=nn.Conv1d(in_channels=n_feat,out_channels=d_k,kernel_size=kernel_size) 24 | self.conv2=nn.Conv1d(in_channels=d_k,out_channels=n_feat,kernel_size=kernel_size) 25 | self.dropout=nn.Dropout(dropout) 26 | 27 | 28 | def forward(self,x): 29 | out=self.conv1(x) 30 | out=self.conv2(out) 31 | out+=x 32 | out=self.dropout(x) 33 | return out 34 | 35 | 36 | class DAGTransformer(nn.Module): 37 | def __init__(self, config): 38 | super(DAGTransformer, self).__init__() 39 | self.structure=config.structure 40 | self.resnet1=resnet_layer(config.n_feat,config.d_k,1,config.dropout) 41 | if self.structure==True: 42 | self.resnet2=resnet_layer(config.n_feat,config.d_k,1,config.dropout) 43 | self.conv=nn.Conv1d(in_channels=config.n_feat,out_channels=config.d_k,kernel_size=1) 44 | self.res1=nn.ModuleList([ 45 | copy.deepcopy(self.resnet1) 46 | for _ in range(config.res_num_layer) 47 | ]) 48 | if self.structure==True: 49 | self.res2=nn.ModuleList([ 50 | copy.deepcopy(self.resnet2) 51 | for _ in range(config.res_num_layer) 52 | ]) 53 | self.encoder = DAGTransformerEncoder(config.d_k, config.num_head, config.hidden_dim, config.dropout, config.num_encoder) 54 | self.avgpool=nn.AdaptiveAvgPool2d((config.d_k,1)) 55 | self.fc1 = nn.Linear(config.d_k, 3) 56 | 57 | def forward(self,data,pos,mask): 58 | out = data 59 | out=out.permute(0,2,1) 60 | for resnet in self.res1: 61 | out = resnet(out) 62 | if self.structure==True: 63 | out1= pos 64 | attn_mask=mask.masked_fill(mask == 0, float('-inf')).masked_fill(mask != 0, float(0.0)) 65 | out1=out1.permute(0,2,1) 66 | for resnet in self.res2: 67 | out1=resnet(out1) 68 | out=self.conv(out+out1) 69 | else: 70 | out=self.conv(out) 71 | 72 | out=out.permute(2,0,1) 73 | 74 | out = self.encoder(out,attn_mask=attn_mask if self.structure==True else None) 75 | out=out.permute(1,2,0) 76 | out=F.relu(out) 77 | out=self.avgpool(out) 78 | out = out.squeeze(-1) 79 | out = self.fc1(out) 80 | return out 81 | -------------------------------------------------------------------------------- /models/DAG_Transformer_Encoder_Layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import copy 3 | from torch.nn import functional as F 4 | import torch.nn as nn 5 | from torch.nn.modules import Module 6 | from torch.nn.modules.dropout import Dropout 7 | from torch.nn.modules.linear import Linear 8 | from torch.nn.modules.normalization import LayerNorm 9 | from torch.nn.init import xavier_uniform_ 10 | from torch.nn.init import constant_ 11 | from torch.nn.init import xavier_normal_ 12 | from torch.nn.parameter import Parameter 13 | from torch.nn.functional import linear 14 | from torch.nn.functional import softmax 15 | from torch.nn.functional import dropout 16 | 17 | def _get_activation_fn(activation): 18 | if activation == "relu": 19 | return F.relu 20 | elif activation == "gelu": 21 | return F.gelu 22 | else: 23 | raise RuntimeError("activation should be relu/gelu, not %s." % activation) 24 | 25 | 26 | class DAGMultiheadAttention(Module): 27 | 28 | __annotations__ = { 29 | 'bias_k': torch._jit_internal.Optional[torch.Tensor], 30 | 'bias_v': torch._jit_internal.Optional[torch.Tensor], 31 | } 32 | __constants__ = ['q_proj_weight', 'k_proj_weight', 'v_proj_weight', 'in_proj_weight'] 33 | 34 | def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None): 35 | super(DAGMultiheadAttention, self).__init__() 36 | self.embed_dim = embed_dim 37 | self.kdim = kdim if kdim is not None else embed_dim 38 | self.vdim = vdim if vdim is not None else embed_dim 39 | self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim 40 | 41 | self.num_heads = num_heads 42 | self.dropout = dropout 43 | self.head_dim = embed_dim // num_heads 44 | assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" 45 | 46 | if self._qkv_same_embed_dim is False: 47 | self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) 48 | self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim)) 49 | self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim)) 50 | self.register_parameter('in_proj_weight', None) 51 | else: 52 | self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim)) 53 | self.register_parameter('q_proj_weight', None) 54 | self.register_parameter('k_proj_weight', None) 55 | self.register_parameter('v_proj_weight', None) 56 | 57 | if bias: 58 | self.in_proj_bias = Parameter(torch.empty(3 * embed_dim)) 59 | else: 60 | self.register_parameter('in_proj_bias', None) 61 | self.out_proj = Linear(embed_dim, embed_dim, bias=bias) 62 | 63 | if add_bias_kv: 64 | self.bias_k = Parameter(torch.empty(1, 1, embed_dim)) 65 | self.bias_v = Parameter(torch.empty(1, 1, embed_dim)) 66 | else: 67 | self.bias_k = self.bias_v = None 68 | 69 | self.add_zero_attn = add_zero_attn 70 | 71 | self._reset_parameters() 72 | 73 | def _reset_parameters(self): 74 | if self._qkv_same_embed_dim: 75 | xavier_uniform_(self.in_proj_weight) 76 | else: 77 | xavier_uniform_(self.q_proj_weight) 78 | xavier_uniform_(self.k_proj_weight) 79 | xavier_uniform_(self.v_proj_weight) 80 | 81 | if self.in_proj_bias is not None: 82 | constant_(self.in_proj_bias, 0.) 83 | constant_(self.out_proj.bias, 0.) 84 | if self.bias_k is not None: 85 | xavier_normal_(self.bias_k) 86 | if self.bias_v is not None: 87 | xavier_normal_(self.bias_v) 88 | 89 | def __setstate__(self, state): 90 | super(DAGMultiheadAttention, self).__setstate__(state) 91 | if 'self._qkv_same_embed_dim' not in self.__dict__: 92 | self._qkv_same_embed_dim = True 93 | 94 | def forward(self, query, key, value, key_padding_mask=None, 95 | need_weights=True, attn_mask=None): 96 | # type: (Tensor, Tensor, Tensor, Optional[Tensor], bool, Optional[Tensor]) -> Tuple[Tensor, Optional[Tensor]] 97 | 98 | if not self._qkv_same_embed_dim: 99 | return DAGmulti_head_attention_forward( 100 | query, key, value, self.embed_dim, self.num_heads, 101 | self.in_proj_weight, self.in_proj_bias, 102 | self.bias_k, self.bias_v, self.add_zero_attn, 103 | self.dropout, self.out_proj.weight, self.out_proj.bias, 104 | training=self.training, 105 | key_padding_mask=key_padding_mask, need_weights=need_weights, 106 | attn_mask=attn_mask, use_separate_proj_weight=True, 107 | q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight, 108 | v_proj_weight=self.v_proj_weight) 109 | else: 110 | return DAGmulti_head_attention_forward( 111 | query, key, value, self.embed_dim, self.num_heads, 112 | self.in_proj_weight, self.in_proj_bias, 113 | self.bias_k, self.bias_v, self.add_zero_attn, 114 | self.dropout, self.out_proj.weight, self.out_proj.bias, 115 | training=self.training, 116 | key_padding_mask=key_padding_mask, need_weights=need_weights, 117 | attn_mask=attn_mask) 118 | 119 | 120 | 121 | 122 | def DAGmulti_head_attention_forward(query, # type: Tensor 123 | key, # type: Tensor 124 | value, # type: Tensor 125 | embed_dim_to_check, # type: int 126 | num_heads, # type: int 127 | in_proj_weight, # type: Tensor 128 | in_proj_bias, # type: Tensor 129 | bias_k, # type: Optional[Tensor] 130 | bias_v, # type: Optional[Tensor] 131 | add_zero_attn, # type: bool 132 | dropout_p, # type: float 133 | out_proj_weight, # type: Tensor 134 | out_proj_bias, # type: Tensor 135 | training=True, # type: bool 136 | key_padding_mask=None, # type: Optional[Tensor] 137 | need_weights=True, # type: bool 138 | attn_mask=None, # type: Optional[Tensor] 139 | use_separate_proj_weight=False, # type: bool 140 | q_proj_weight=None, # type: Optional[Tensor] 141 | k_proj_weight=None, # type: Optional[Tensor] 142 | v_proj_weight=None, # type: Optional[Tensor] 143 | static_k=None, # type: Optional[Tensor] 144 | static_v=None # type: Optional[Tensor] 145 | ): 146 | # type: (...) -> Tuple[Tensor, Optional[Tensor]] 147 | 148 | 149 | tgt_len, bsz, embed_dim = query.size() 150 | assert embed_dim == embed_dim_to_check 151 | assert key.size() == value.size() 152 | 153 | head_dim = embed_dim // num_heads 154 | assert head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" 155 | scaling = float(head_dim) ** -0.5 156 | 157 | if not use_separate_proj_weight: 158 | if torch.equal(query, key) and torch.equal(key, value): 159 | q, k, v = linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1) 160 | 161 | elif torch.equal(key, value): 162 | # This is inline in_proj function with in_proj_weight and in_proj_bias 163 | _b = in_proj_bias 164 | _start = 0 165 | _end = embed_dim 166 | _w = in_proj_weight[_start:_end, :] 167 | if _b is not None: 168 | _b = _b[_start:_end] 169 | q = linear(query, _w, _b) 170 | 171 | if key is None: 172 | assert value is None 173 | k = None 174 | v = None 175 | else: 176 | 177 | # This is inline in_proj function with in_proj_weight and in_proj_bias 178 | _b = in_proj_bias 179 | _start = embed_dim 180 | _end = None 181 | _w = in_proj_weight[_start:, :] 182 | if _b is not None: 183 | _b = _b[_start:] 184 | k, v = linear(key, _w, _b).chunk(2, dim=-1) 185 | 186 | else: 187 | # This is inline in_proj function with in_proj_weight and in_proj_bias 188 | _b = in_proj_bias 189 | _start = 0 190 | _end = embed_dim 191 | _w = in_proj_weight[_start:_end, :] 192 | if _b is not None: 193 | _b = _b[_start:_end] 194 | q = linear(query, _w, _b) 195 | 196 | # This is inline in_proj function with in_proj_weight and in_proj_bias 197 | _b = in_proj_bias 198 | _start = embed_dim 199 | _end = embed_dim * 2 200 | _w = in_proj_weight[_start:_end, :] 201 | if _b is not None: 202 | _b = _b[_start:_end] 203 | k = linear(key, _w, _b) 204 | 205 | # This is inline in_proj function with in_proj_weight and in_proj_bias 206 | _b = in_proj_bias 207 | _start = embed_dim * 2 208 | _end = None 209 | _w = in_proj_weight[_start:, :] 210 | if _b is not None: 211 | _b = _b[_start:] 212 | v = linear(value, _w, _b) 213 | else: 214 | q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight) 215 | len1, len2 = q_proj_weight_non_opt.size() 216 | assert len1 == embed_dim and len2 == query.size(-1) 217 | 218 | k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight) 219 | len1, len2 = k_proj_weight_non_opt.size() 220 | assert len1 == embed_dim and len2 == key.size(-1) 221 | 222 | v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight) 223 | len1, len2 = v_proj_weight_non_opt.size() 224 | assert len1 == embed_dim and len2 == value.size(-1) 225 | 226 | if in_proj_bias is not None: 227 | q = linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim]) 228 | k = linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)]) 229 | v = linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):]) 230 | else: 231 | q = linear(query, q_proj_weight_non_opt, in_proj_bias) 232 | k = linear(key, k_proj_weight_non_opt, in_proj_bias) 233 | v = linear(value, v_proj_weight_non_opt, in_proj_bias) 234 | q = q * scaling 235 | 236 | if bias_k is not None and bias_v is not None: 237 | if static_k is None and static_v is None: 238 | k = torch.cat([k, bias_k.repeat(1, bsz, 1)]) 239 | v = torch.cat([v, bias_v.repeat(1, bsz, 1)]) 240 | if attn_mask is not None: 241 | attn_mask = torch.cat([attn_mask, 242 | torch.zeros((attn_mask.size(0), 1), 243 | dtype=attn_mask.dtype, 244 | device=attn_mask.device)], dim=1) 245 | if key_padding_mask is not None: 246 | key_padding_mask = torch.cat( 247 | [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1), 248 | dtype=key_padding_mask.dtype, 249 | device=key_padding_mask.device)], dim=1) 250 | else: 251 | assert static_k is None, "bias cannot be added to static key." 252 | assert static_v is None, "bias cannot be added to static value." 253 | else: 254 | assert bias_k is None 255 | assert bias_v is None 256 | 257 | q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1) 258 | if k is not None: 259 | k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1) 260 | if v is not None: 261 | v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1) 262 | 263 | if static_k is not None: 264 | assert static_k.size(0) == bsz * num_heads 265 | assert static_k.size(2) == head_dim 266 | k = static_k 267 | 268 | if static_v is not None: 269 | assert static_v.size(0) == bsz * num_heads 270 | assert static_v.size(2) == head_dim 271 | v = static_v 272 | 273 | src_len = k.size(1) 274 | 275 | if key_padding_mask is not None: 276 | assert key_padding_mask.size(0) == bsz 277 | assert key_padding_mask.size(1) == src_len 278 | 279 | if add_zero_attn: 280 | src_len += 1 281 | k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1) 282 | v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1) 283 | if attn_mask is not None: 284 | attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), 1), 285 | dtype=attn_mask.dtype, 286 | device=attn_mask.device)], dim=1) 287 | if key_padding_mask is not None: 288 | key_padding_mask = torch.cat( 289 | [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1), 290 | dtype=key_padding_mask.dtype, 291 | device=key_padding_mask.device)], dim=1) 292 | 293 | attn_output_weights = torch.bmm(q, k.transpose(1, 2)) 294 | assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len] 295 | 296 | if attn_mask is not None: 297 | attn_mask=attn_mask.reshape(-1,tgt_len,tgt_len) 298 | attn_output_weights += attn_mask 299 | 300 | if key_padding_mask is not None: 301 | attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len) 302 | attn_output_weights = attn_output_weights.masked_fill( 303 | key_padding_mask.unsqueeze(1).unsqueeze(2), 304 | float('-inf'), 305 | ) 306 | attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len) 307 | 308 | attn_output_weights = softmax( 309 | attn_output_weights, dim=-1) 310 | attn_output_weights = dropout(attn_output_weights, p=dropout_p, training=training) 311 | 312 | attn_output = torch.bmm(attn_output_weights, v) 313 | assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim] 314 | attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) 315 | attn_output = linear(attn_output, out_proj_weight, out_proj_bias) 316 | 317 | if need_weights: 318 | # average attention weights over heads 319 | attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len) 320 | return attn_output, attn_output_weights.sum(dim=1) / num_heads 321 | else: 322 | return attn_output, None 323 | 324 | 325 | 326 | class DAGTransformerEncoderLayer(nn.Module): 327 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"): 328 | super(DAGTransformerEncoderLayer, self).__init__() 329 | self.self_attn = DAGMultiheadAttention(d_model, nhead, dropout=dropout) 330 | # Implementation of Feedforward model 331 | self.linear1 = Linear(d_model, dim_feedforward) 332 | self.dropout = Dropout(dropout) 333 | self.linear2 = Linear(dim_feedforward, d_model) 334 | 335 | self.norm1 = LayerNorm(d_model) 336 | self.norm2 = LayerNorm(d_model) 337 | self.dropout1 = Dropout(dropout) 338 | self.dropout2 = Dropout(dropout) 339 | 340 | self.activation = _get_activation_fn(activation) 341 | 342 | def forward(self, src, src_mask=None, src_key_padding_mask=None): 343 | 344 | src2 = self.self_attn(src, src, src, attn_mask=src_mask, 345 | key_padding_mask=src_key_padding_mask)[0] 346 | src = src + self.dropout1(src2) 347 | src = self.norm1(src) 348 | if hasattr(self, "activation"): 349 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 350 | else: # for backward compatibility 351 | src2 = self.linear2(self.dropout(F.relu(self.linear1(src)))) 352 | src = src + self.dropout2(src2) 353 | src = self.norm2(src) 354 | return src -------------------------------------------------------------------------------- /models/GCN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch_geometric.nn import GCNConv 5 | 6 | 7 | class GCNModel(nn.Module): 8 | def __init__(self,config): 9 | super(GCNModel, self).__init__() 10 | self.conv1 = GCNConv(34, 64) 11 | self.conv2 = GCNConv(64, 128) 12 | self.conv3 = GCNConv(128, 256) 13 | self.conv4 = GCNConv(256, 3) 14 | self.dropout1=nn.Dropout(config.dropout) 15 | self.dropout2=nn.Dropout(config.dropout) 16 | self.dropout3=nn.Dropout(config.dropout) 17 | 18 | def forward(self, data): 19 | x, edge_index = data.x, data.edge_index 20 | 21 | x = self.conv1(x, edge_index) 22 | x = F.relu(x) 23 | x = self.dropout1(x) 24 | x = self.conv2(x, edge_index) 25 | x = F.relu(x) 26 | x = self.dropout2(x) 27 | x = self.conv3(x, edge_index) 28 | x = F.relu(x) 29 | x = self.dropout3(x) 30 | x = self.conv4(x, edge_index) 31 | return x -------------------------------------------------------------------------------- /models/LSTM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | class LSTMModel(nn.Module): 5 | def __init__(self, config): 6 | super(LSTMModel, self).__init__() 7 | self.lstm=nn.LSTM(config.n_feat,config.hidden,dropout=config.dropout,num_layers=config.num_layers) 8 | self.maxpool=nn.MaxPool1d(config.pooldim) 9 | self.fc=nn.Linear((config.hidden//config.pooldim)*config.num_task,config.num_classes) 10 | def forward(self,x): 11 | out=x.permute(1,0,2) 12 | out, _ = self.lstm(out) 13 | out=out.permute(1,0,2) 14 | #out = torch.tanh(out) 15 | out = self.maxpool(out) 16 | #out = torch.tanh(out) 17 | out=out.reshape(out.size(0),-1) 18 | out = self.fc(out) 19 | return out -------------------------------------------------------------------------------- /models/Vanilla_Transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import TransformerEncoderLayer 4 | import copy 5 | 6 | class Positional_Encoding(nn.Module): 7 | def __init__(self, n_feat, num_task, dropout, device): 8 | super(Positional_Encoding, self).__init__() 9 | self.device = device 10 | self.pe = torch.tensor([[pos / (10000.0 ** (i // 2 * 2.0 / n_feat)) for i in range(n_feat)] for pos in range(num_task)]) 11 | self.pe[:, 0::2] = torch.sin(self.pe[:, 0::2]) 12 | self.pe[:, 1::2] = torch.cos(self.pe[:, 1::2]) 13 | self.dropout = nn.Dropout(dropout) 14 | 15 | def forward(self, x): 16 | out = x + nn.Parameter(self.pe, requires_grad=False).to(self.device) 17 | out = self.dropout(out) 18 | return out 19 | 20 | class VanillaTransformerModel(nn.Module): 21 | def __init__(self, config): 22 | super(VanillaTransformerModel, self).__init__() 23 | 24 | self.postion_embedding = Positional_Encoding(config.n_feat, config.num_task, config.dropout, config.device) 25 | self.encoder = TransformerEncoderLayer(config.n_feat, config.num_head, config.hidden, config.dropout) 26 | self.encoders = nn.ModuleList([ 27 | copy.deepcopy(self.encoder) 28 | for _ in range(config.num_encoder)]) 29 | 30 | self.fc1 = nn.Linear(config.num_task * config.n_feat, config.num_classes) 31 | 32 | def forward(self, x): 33 | out = x 34 | #out=self.transform_shape(out) 35 | out = self.postion_embedding(out) 36 | out=out.permute(1,0,2) 37 | 38 | 39 | for encoder in self.encoders: 40 | out = encoder(out) 41 | out=out.permute(1,0,2) 42 | out = out.reshape(out.size(0), -1) 43 | out = self.fc1(out) 44 | return out -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | import numpy as np 4 | from tqdm import tqdm 5 | 6 | def find_pos(out_degree_matrix,num_nodes): 7 | stage=np.zeros(num_nodes) 8 | signal=True 9 | while signal: 10 | temp=stage.copy() 11 | for m in range(num_nodes): 12 | for n in range(num_nodes): 13 | if(out_degree_matrix[m,n]!=0): 14 | stage[n]=max(stage[n],stage[m]+1) 15 | if (temp==stage).all(): 16 | signal=False 17 | return stage 18 | 19 | def create_position(pos,num_feat): 20 | 21 | pe = np.array([[[posit / (10000.0 ** (i // 2 * 2.0 / num_feat)) for i in range(num_feat)]for posit in posi] for posi in pos]) 22 | pe[:,:,0::2]=np.sin(pe[:,:,0::2]) 23 | pe[:,:,1::2]=np.cos(pe[:,:,1::2]) 24 | return pe 25 | 26 | def create_attn_mask(tensor,num_heads,num_nodes): 27 | mask=np.zeros(((tensor.shape[0]*num_heads),tensor.shape[1],tensor.shape[2])) 28 | for x in range(0,mask.shape[0],num_heads): 29 | mask[x:x+num_heads]=tensor[x//num_heads]+np.eye(num_nodes) 30 | return mask.reshape(tensor.shape[0],num_heads,tensor.shape[1],tensor.shape[2]) 31 | 32 | def prepare_data_exp1_dag(pred_task,pred_tgt,pred_mode): 33 | direct='./datasets_exp1/%s%s/%s'%(pred_task,pred_tgt,pred_mode) 34 | df_train=pd.read_csv(direct+'/train.csv') 35 | df_val=pd.read_csv(direct+'/val.csv') 36 | df_test=pd.read_csv(direct+'/test.csv') 37 | dag_direct='./datasets_exp1/%s%s/'%(pred_task,pred_tgt) 38 | df_dag_train=pd.read_csv(dag_direct+'/train_daginfo.csv') 39 | df_dag_val=pd.read_csv(dag_direct+'/val_daginfo.csv') 40 | df_dag_test=pd.read_csv(dag_direct+'/test_daginfo.csv') 41 | return df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test 42 | 43 | def prepare_data_exp1(pred_task,pred_tgt,pred_mode): 44 | direct='./datasets_exp1/%s%s/%s'%(pred_task,pred_tgt,pred_mode) 45 | df_train=pd.read_csv(direct+'/train.csv') 46 | df_val=pd.read_csv(direct+'/val.csv') 47 | df_test=pd.read_csv(direct+'/test.csv') 48 | 49 | return df_train, df_val, df_test 50 | 51 | def prepare_data_exp23_dag(split):#split='split6_2_2','split8_1_1','split9_05_05' 52 | direct='./datasets_exp2_3/%s'%split 53 | df_train=pd.read_csv(direct+'/train.csv') 54 | df_val=pd.read_csv(direct+'/val.csv') 55 | df_test=pd.read_csv(direct+'/test.csv') 56 | df_dag_train=pd.read_csv(direct+'/train_daginfo.csv') 57 | df_dag_val=pd.read_csv(direct+'/val_daginfo.csv') 58 | df_dag_test=pd.read_csv(direct+'/test_daginfo.csv') 59 | return df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test 60 | 61 | 62 | def prepare_data_exp23(split):#split='split6_2_2','split8_1_1','split9_05_05' 63 | direct='./datasets_exp2_3/%s'%split 64 | df_train=pd.read_csv(direct+'/train.csv') 65 | df_val=pd.read_csv(direct+'/val.csv') 66 | df_test=pd.read_csv(direct+'/test.csv') 67 | return df_train, df_val, df_test 68 | 69 | 70 | def preprocess_data_exp1_dag(pred_task,pred_tgt,pred_mode,num_feat=34,num_task=7,num_head=8): 71 | df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp1_dag(pred_task,pred_tgt,pred_mode) 72 | ##nodes features 73 | arr1=np.array(df_train.iloc[:,:-1]) 74 | arr2=np.array(df_val.iloc[:,:-1]) 75 | arr3=np.array(df_test.iloc[:,:-1]) 76 | data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat))) 77 | data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9) 78 | data1=data[:arr1.shape[0]*num_task,:] 79 | data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:] 80 | data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:] 81 | data1=data1.reshape(-1,num_task,num_feat) 82 | data2=data2.reshape(-1,num_task,num_feat) 83 | data3=data3.reshape(-1,num_task,num_feat) 84 | #####dag info 85 | dag1=df_dag_train.to_numpy().reshape(-1,num_task,num_task*2+1) 86 | dag2=df_dag_val.to_numpy().reshape(-1,num_task,num_task*2+1) 87 | dag3=df_dag_test.to_numpy().reshape(-1,num_task,num_task*2+1) 88 | dag1=dag1[:,:,1:] 89 | dag2=dag2[:,:,1:] 90 | dag3=dag3[:,:,1:] 91 | dagout1=dag1[:,:,:7] 92 | dagout2=dag2[:,:,:7] 93 | dagout3=dag3[:,:,:7] 94 | dagin1=dag1[:,:,7:] 95 | dagin2=dag2[:,:,7:] 96 | dagin3=dag3[:,:,7:] 97 | mask1=dagin1+dagout1 98 | mask2=dagin2+dagout2 99 | mask3=dagin3+dagout3 100 | pos1=np.zeros((arr1.shape[0],7)) 101 | for x in range(pos1.shape[0]): 102 | pos1[x]=find_pos(dagout1[x],7) 103 | pos2=np.zeros((arr2.shape[0],7)) 104 | for x in range(pos2.shape[0]): 105 | pos2[x]=find_pos(dagout2[x],7) 106 | pos3=np.zeros((arr3.shape[0],7)) 107 | for x in range(pos3.shape[0]): 108 | pos3[x]=find_pos(dagout3[x],7) 109 | position1=create_position(pos1,num_feat) 110 | position2=create_position(pos2,num_feat) 111 | position3=create_position(pos3,num_feat) 112 | mask_train=create_attn_mask(mask1,num_heads=num_head,num_nodes=num_task) 113 | mask_val=create_attn_mask(mask2,num_heads=num_head,num_nodes=num_task) 114 | mask_test=create_attn_mask(mask3,num_heads=num_head,num_nodes=num_task) 115 | data1=np.array(data1,dtype=np.float32) 116 | data2=np.array(data2,dtype=np.float32) 117 | data3=np.array(data3,dtype=np.float32) 118 | position1=np.array(position1,dtype=np.float32) 119 | position2=np.array(position2,dtype=np.float32) 120 | position3=np.array(position3,dtype=np.float32) 121 | ######data 122 | train_data=[] 123 | for x in range(data1.shape[0]): 124 | train_data.append((data1[x],df_train.iloc[x,-1],position1[x],mask_train[x])) 125 | val_data=[] 126 | for x in range(data2.shape[0]): 127 | val_data.append((data2[x],df_val.iloc[x,-1],position2[x],mask_val[x])) 128 | test_data=[] 129 | for x in range(data3.shape[0]): 130 | test_data.append((data3[x],df_test.iloc[x,-1],position3[x],mask_test[x])) 131 | return train_data, val_data, test_data 132 | 133 | 134 | 135 | def preprocess_data_exp1(pred_task,pred_tgt,pred_mode,num_feat=34,num_task=7,): 136 | df_train, df_val, df_test = prepare_data_exp1(pred_task,pred_tgt,pred_mode) 137 | ##nodes features 138 | arr1=np.array(df_train.iloc[:,:-1]) 139 | arr2=np.array(df_val.iloc[:,:-1]) 140 | arr3=np.array(df_test.iloc[:,:-1]) 141 | data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat))) 142 | data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9) 143 | data1=data[:arr1.shape[0]*num_task,:] 144 | data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:] 145 | data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:] 146 | data1=data1.reshape(-1,num_task,num_feat) 147 | data2=data2.reshape(-1,num_task,num_feat) 148 | data3=data3.reshape(-1,num_task,num_feat) 149 | data1=np.array(data1,dtype=np.float32) 150 | data2=np.array(data2,dtype=np.float32) 151 | data3=np.array(data3,dtype=np.float32) 152 | 153 | ######data 154 | train_data=[] 155 | for x in range(data1.shape[0]): 156 | train_data.append((data1[x],df_train.iloc[x,-1])) 157 | val_data=[] 158 | for x in range(data2.shape[0]): 159 | val_data.append((data2[x],df_val.iloc[x,-1])) 160 | test_data=[] 161 | for x in range(data3.shape[0]): 162 | test_data.append((data3[x],df_test.iloc[x,-1])) 163 | return train_data, val_data, test_data 164 | 165 | 166 | def preprocess_data_exp23_dag(split,num_feat=34,num_task=7,num_head=8): 167 | df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp23_dag(split) 168 | ##nodes features 169 | arr1=np.array(df_train.iloc[:,:-1]) 170 | arr2=np.array(df_val.iloc[:,:-1]) 171 | arr3=np.array(df_test.iloc[:,:-1]) 172 | data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat))) 173 | data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9) 174 | data1=data[:arr1.shape[0]*num_task,:] 175 | data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:] 176 | data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:] 177 | data1=data1.reshape(-1,num_task,num_feat) 178 | data2=data2.reshape(-1,num_task,num_feat) 179 | data3=data3.reshape(-1,num_task,num_feat) 180 | #####dag info 181 | dag1=df_dag_train.to_numpy().reshape(-1,num_task,num_task*2+1) 182 | dag2=df_dag_val.to_numpy().reshape(-1,num_task,num_task*2+1) 183 | dag3=df_dag_test.to_numpy().reshape(-1,num_task,num_task*2+1) 184 | dag1=dag1[:,:,1:] 185 | dag2=dag2[:,:,1:] 186 | dag3=dag3[:,:,1:] 187 | dagout1=dag1[:,:,:7] 188 | dagout2=dag2[:,:,:7] 189 | dagout3=dag3[:,:,:7] 190 | dagin1=dag1[:,:,7:] 191 | dagin2=dag2[:,:,7:] 192 | dagin3=dag3[:,:,7:] 193 | mask1=dagin1+dagout1 194 | mask2=dagin2+dagout2 195 | mask3=dagin3+dagout3 196 | pos1=np.zeros((arr1.shape[0],7)) 197 | for x in range(pos1.shape[0]): 198 | pos1[x]=find_pos(dagout1[x],7) 199 | pos2=np.zeros((arr2.shape[0],7)) 200 | for x in range(pos2.shape[0]): 201 | pos2[x]=find_pos(dagout2[x],7) 202 | pos3=np.zeros((arr3.shape[0],7)) 203 | for x in range(pos3.shape[0]): 204 | pos3[x]=find_pos(dagout3[x],7) 205 | position1=create_position(pos1,num_feat) 206 | position2=create_position(pos2,num_feat) 207 | position3=create_position(pos3,num_feat) 208 | mask_train=create_attn_mask(mask1,num_heads=num_head,num_nodes=num_task) 209 | mask_val=create_attn_mask(mask2,num_heads=num_head,num_nodes=num_task) 210 | mask_test=create_attn_mask(mask3,num_heads=num_head,num_nodes=num_task) 211 | data1=np.array(data1,dtype=np.float32) 212 | data2=np.array(data2,dtype=np.float32) 213 | data3=np.array(data3,dtype=np.float32) 214 | position1=np.array(position1,dtype=np.float32) 215 | position2=np.array(position2,dtype=np.float32) 216 | position3=np.array(position3,dtype=np.float32) 217 | ######data 218 | train_data=[] 219 | for x in range(data1.shape[0]): 220 | train_data.append((data1[x],df_train.iloc[x,-1],position1[x],mask_train[x])) 221 | val_data=[] 222 | for x in range(data2.shape[0]): 223 | val_data.append((data2[x],df_val.iloc[x,-1],position2[x],mask_val[x])) 224 | test_data=[] 225 | for x in range(data3.shape[0]): 226 | test_data.append((data3[x],df_test.iloc[x,-1],position3[x],mask_test[x])) 227 | return train_data, val_data, test_data 228 | 229 | 230 | def preprocess_data_exp23(split,num_feat=34,num_task=7,): 231 | df_train, df_val, df_test = prepare_data_exp23(split) 232 | ##nodes features 233 | arr1=np.array(df_train.iloc[:,:-1]) 234 | arr2=np.array(df_val.iloc[:,:-1]) 235 | arr3=np.array(df_test.iloc[:,:-1]) 236 | data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat))) 237 | data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9) 238 | data1=data[:arr1.shape[0]*num_task,:] 239 | data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:] 240 | data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:] 241 | data1=data1.reshape(-1,num_task,num_feat) 242 | data2=data2.reshape(-1,num_task,num_feat) 243 | data3=data3.reshape(-1,num_task,num_feat) 244 | data1=np.array(data1,dtype=np.float32) 245 | data2=np.array(data2,dtype=np.float32) 246 | data3=np.array(data3,dtype=np.float32) 247 | 248 | 249 | ######data 250 | train_data=[] 251 | for x in range(data1.shape[0]): 252 | train_data.append((data1[x],df_train.iloc[x,-1])) 253 | val_data=[] 254 | for x in range(data2.shape[0]): 255 | val_data.append((data2[x],df_val.iloc[x,-1])) 256 | test_data=[] 257 | for x in range(data3.shape[0]): 258 | test_data.append((data3[x],df_test.iloc[x,-1])) 259 | return train_data, val_data, test_data 260 | 261 | 262 | 263 | import torch 264 | from torch_geometric.data import Data 265 | def preprocess_data_exp23_GNN_unidir(split,num_feat=34,num_task=7): 266 | df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp23_dag(split) 267 | df=pd.concat((df_train,df_val,df_test),axis=0) 268 | feat=df.iloc[:,:-1].to_numpy().reshape(-1,num_feat) 269 | label=df.iloc[:,-1].to_numpy() 270 | labels=torch.zeros(df.shape[0]*num_task) 271 | for x in range(labels.shape[0]): 272 | if((x+1)%num_task==0): 273 | labels[x]=label[x//num_task] 274 | df_dag=pd.concat((df_dag_train,df_dag_val,df_dag_test),axis=0) 275 | edge_info=df_dag.to_numpy().reshape(-1,2*num_task+1) 276 | edge_info=edge_info[:,1:] 277 | out_mat=edge_info[:,:num_task] 278 | edge=torch.tensor([[],[]],dtype=torch.long) 279 | print('preparing data...') 280 | for src in tqdm(range(out_mat.shape[0])): 281 | for tgt in range(num_task): 282 | if(out_mat[src,tgt]!=0): 283 | edge=torch.cat((edge,torch.tensor([[src],[src+(tgt-src%num_task)]],dtype=torch.long)),1) 284 | self_loop=torch.tensor([[x for x in range(df.shape[0]*num_task)],[x for x in range(df.shape[0]*num_task)]],dtype=torch.long) 285 | edge=torch.cat((edge,self_loop),1) 286 | feat=(feat-feat.min(0))/(feat.max(0)-feat.min(0)+1e-9) 287 | feat=torch.tensor(feat,dtype=torch.float) 288 | Gdata = Data(x=feat, edge_index=edge) 289 | Gdata.labels=labels.long() 290 | Gdata.train_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool() 291 | Gdata.val_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool() 292 | Gdata.test_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool() 293 | for x in range(df.shape[0]*num_task): 294 | if(x=(df_train.shape[0]*num_task+df_val.shape[0]*num_task) and (x+1)%num_task==0): 300 | Gdata.test_mask[x]=True 301 | return Gdata 302 | 303 | 304 | def preprocess_data_exp23_GNN_bidir(split,num_feat=34,num_task=7): 305 | df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp23_dag(split) 306 | df=pd.concat((df_train,df_val,df_test),axis=0) 307 | feat=df.iloc[:,:-1].to_numpy().reshape(-1,num_feat) 308 | label=df.iloc[:,-1].to_numpy() 309 | labels=torch.zeros(df.shape[0]*num_task) 310 | for x in range(labels.shape[0]): 311 | if((x+1)%num_task==0): 312 | labels[x]=label[x//num_task] 313 | df_dag=pd.concat((df_dag_train,df_dag_val,df_dag_test),axis=0) 314 | edge_info=df_dag.to_numpy().reshape(-1,2*num_task+1) 315 | edge_info=edge_info[:,1:] 316 | out_mat=edge_info[:,:num_task] 317 | edge=torch.tensor([[],[]],dtype=torch.long) 318 | print('preparing data...') 319 | for src in tqdm(range(out_mat.shape[0])): 320 | for tgt in range(num_task): 321 | if(out_mat[src,tgt]!=0): 322 | edge=torch.cat((edge,torch.tensor([[src],[src+(tgt-src%num_task)]],dtype=torch.long), 323 | torch.tensor([[src+(tgt-src%num_task)],[src]],dtype=torch.long)),1) 324 | self_loop=torch.tensor([[x for x in range(df.shape[0]*num_task)],[x for x in range(df.shape[0]*num_task)]],dtype=torch.long) 325 | edge=torch.cat((edge,self_loop),1) 326 | feat=(feat-feat.min(0))/(feat.max(0)-feat.min(0)+1e-9) 327 | feat=torch.tensor(feat,dtype=torch.float) 328 | Gdata = Data(x=feat, edge_index=edge) 329 | Gdata.labels=labels.long() 330 | Gdata.train_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool() 331 | Gdata.val_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool() 332 | Gdata.test_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool() 333 | for x in range(df.shape[0]*num_task): 334 | if(x=(df_train.shape[0]*num_task+df_val.shape[0]*num_task) and (x+1)%num_task==0): 339 | Gdata.test_mask[x]=True 340 | return Gdata -------------------------------------------------------------------------------- /run_exp1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from preprocess import preprocess_data_exp1_dag,preprocess_data_exp1 3 | from select_model import select_model_exp1 4 | from models.DAG_Transformer import DAGTransformer 5 | from train_model_dag import train 6 | import torch 7 | import argparse 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--pred_task', default=7) 10 | parser.add_argument('--pred_tgt', default='CPU') 11 | parser.add_argument('--pred_mode',default='PRIOR_ALL') 12 | parser.add_argument('--use_DAG',default='T') 13 | opt = parser.parse_args() 14 | 15 | if opt.pred_task !='3' and opt.pred_task!='5' and opt.pred_task!='7': 16 | raise AssertionError('pred_task should be 3/5/7') 17 | pred_task=opt.pred_task 18 | 19 | if opt.pred_tgt !='CPU' and opt.pred_tgt!='MEM': 20 | raise AssertionError('pred_tgt should be CPU/MEM') 21 | pred_tgt=opt.pred_tgt 22 | 23 | if opt.pred_mode !='PRIOR_1' and opt.pred_mode!='PRIOR_ALL': 24 | raise AssertionError('pred_mode should be PRIOR_1/PRIOR_ALL') 25 | pred_mode=opt.pred_mode 26 | 27 | if opt.use_DAG =='T': 28 | use_DAG=True 29 | elif opt.use_DAG!='F': 30 | use_DAG=False 31 | else: 32 | raise AssertionError('use_DAG should be T/F') 33 | if use_DAG: 34 | train_data, val_data, test_data=preprocess_data_exp1_dag(pred_task,pred_tgt,pred_mode) 35 | else: 36 | train_data, val_data, test_data=preprocess_data_exp1(pred_task,pred_tgt,pred_mode) 37 | 38 | config=select_model_exp1() 39 | if use_DAG==False: 40 | config.structure=False 41 | train_loader=torch.utils.data.DataLoader(dataset=train_data,batch_size=config.batch_size,num_workers=2, 42 | shuffle=False) 43 | val_loader=torch.utils.data.DataLoader(dataset=val_data,batch_size=config.batch_size,num_workers=2, 44 | shuffle=False) 45 | test_loader=torch.utils.data.DataLoader(dataset=test_data,batch_size=config.batch_size,num_workers=2, 46 | shuffle=False) 47 | 48 | 49 | 50 | if __name__=='__main__': 51 | model=DAGTransformer(config).to(config.device) 52 | train(config, model, train_loader, val_loader, test_loader) 53 | 54 | 55 | -------------------------------------------------------------------------------- /run_exp2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pyexpat import model 3 | from preprocess import preprocess_data_exp23_dag,preprocess_data_exp23 4 | from select_model import select_model_exp2 5 | from models.DAG_Transformer import DAGTransformer 6 | from models.CNN import CNNModel 7 | from models.LSTM import LSTMModel 8 | from models.Vanilla_Transformer import VanillaTransformerModel 9 | from train_model_dag import train 10 | from train_model_vanilla import train as train_vanilla 11 | import torch 12 | import argparse 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--model_name', required=True)#DAGTransformer, CNN, LSTM, VanillaTransformer 15 | parser.add_argument('--split', default='split6_2_2') 16 | opt = parser.parse_args() 17 | 18 | if opt.model_name !='DAGTransformer' and opt.model_name !='CNN' and opt.model_name !='LSTM' and opt.model_name !='VanillaTransformer': 19 | raise AssertionError('model should be DAGTransformer/CNN/LSTM/VanillaTransformer') 20 | model_name=opt.model_name 21 | 22 | if opt.split !='split9_05_05' and opt.split !='split8_1_1' and opt.split !='split6_2_2' : 23 | raise AssertionError('split should be split9_05_05/split8_1_1/split6_2_2') 24 | split=opt.split 25 | 26 | if model_name=='DAGTransformer': 27 | config=select_model_exp2(model_name) 28 | train_data, val_data, test_data=preprocess_data_exp23_dag(split) 29 | else: 30 | config=select_model_exp2(model_name) 31 | train_data, val_data, test_data=preprocess_data_exp23(split) 32 | train_loader=torch.utils.data.DataLoader(dataset=train_data,batch_size=config.batch_size,num_workers=2, 33 | shuffle=False) 34 | val_loader=torch.utils.data.DataLoader(dataset=val_data,batch_size=config.batch_size,num_workers=2, 35 | shuffle=False) 36 | test_loader=torch.utils.data.DataLoader(dataset=test_data,batch_size=config.batch_size,num_workers=2, 37 | shuffle=False) 38 | if __name__=='__main__': 39 | if model_name=='DAGTransformer': 40 | model=DAGTransformer(config).to(config.device) 41 | train(config, model, train_loader, val_loader, test_loader) 42 | elif model_name=='LSTM': 43 | model=LSTMModel(config).to(config.device) 44 | train_vanilla(config, model, train_loader, val_loader, test_loader) 45 | elif model_name=='CNN': 46 | model=CNNModel(config).to(config.device) 47 | train_vanilla(config, model, train_loader, val_loader, test_loader) 48 | elif model_name=='VanillaTransformer': 49 | model=VanillaTransformerModel(config).to(config.device) 50 | train_vanilla(config, model, train_loader, val_loader, test_loader) 51 | 52 | -------------------------------------------------------------------------------- /run_exp3.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pyexpat import model 3 | from preprocess import preprocess_data_exp23_dag,preprocess_data_exp23, preprocess_data_exp23_GNN_bidir, preprocess_data_exp23_GNN_unidir 4 | from select_model import select_model_exp3 5 | from models.DAG_Transformer import DAGTransformer 6 | from models.CNN import CNNModel 7 | from models.LSTM import LSTMModel 8 | from models.Vanilla_Transformer import VanillaTransformerModel 9 | from models.GCN import GCNModel 10 | from train_model_dag import train 11 | from train_model_vanilla import train as train_vanilla 12 | from train_model_gnn import train as train_gnn 13 | import torch 14 | import argparse 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', required=True)#DAGTransformer, CNN, LSTM, VanillaTransformer, GCN 17 | parser.add_argument('--split', default='split6_2_2') 18 | parser.add_argument('--GCN_mode',default='bidirect') 19 | opt = parser.parse_args() 20 | 21 | if opt.model_name !='DAGTransformer' and opt.model_name !='CNN' and opt.model_name !='LSTM' and opt.model_name !='VanillaTransformer' and opt.model_name!='GCN': 22 | raise AssertionError('model should be DAGTransformer/CNN/LSTM/VanillaTransformer/GCN') 23 | model_name=opt.model_name 24 | 25 | if opt.split !='split9_05_05' and opt.split !='split8_1_1' and opt.split !='split6_2_2' : 26 | raise AssertionError('split should be split9_05_05/split8_1_1/split6_2_2') 27 | split=opt.split 28 | if opt.GCN_mode !='bidirect' and opt.GCN_mode!='unidirect': 29 | raise AssertionError('GCN_mode should be bidirect/unidirect') 30 | 31 | 32 | 33 | if model_name!='GCN': 34 | if model_name=='DAGTransformer': 35 | config=select_model_exp3(model_name) 36 | train_data, val_data, test_data=preprocess_data_exp23_dag(split) 37 | else: 38 | config=select_model_exp3(model_name) 39 | train_data, val_data, test_data=preprocess_data_exp23(split) 40 | train_loader=torch.utils.data.DataLoader(dataset=train_data,batch_size=config.batch_size,num_workers=2, 41 | shuffle=False) 42 | val_loader=torch.utils.data.DataLoader(dataset=val_data,batch_size=config.batch_size,num_workers=2, 43 | shuffle=False) 44 | test_loader=torch.utils.data.DataLoader(dataset=test_data,batch_size=config.batch_size,num_workers=2, 45 | shuffle=False) 46 | else: 47 | config=select_model_exp3('GCN') 48 | GCN_mode=opt.GCN_mode 49 | if GCN_mode=='bidirect': 50 | data=preprocess_data_exp23_GNN_bidir(split) 51 | 52 | else: 53 | data=preprocess_data_exp23_GNN_unidir(split) 54 | 55 | 56 | if __name__=='__main__': 57 | if model_name=='DAGTransformer': 58 | model=DAGTransformer(config).to(config.device) 59 | train(config, model, train_loader, val_loader, test_loader) 60 | elif model_name=='LSTM': 61 | model=LSTMModel(config).to(config.device) 62 | train_vanilla(config, model, train_loader, val_loader, test_loader) 63 | elif model_name=='CNN': 64 | model=CNNModel(config).to(config.device) 65 | train_vanilla(config, model, train_loader, val_loader, test_loader) 66 | elif model_name=='VanillaTransformer': 67 | model=VanillaTransformerModel(config).to(config.device) 68 | train_vanilla(config, model, train_loader, val_loader, test_loader) 69 | elif model_name=='GCN': 70 | model=GCNModel(config).to(config.device) 71 | train_gnn(config,model,data) 72 | 73 | -------------------------------------------------------------------------------- /scheduler.py: -------------------------------------------------------------------------------- 1 | from torch.optim.lr_scheduler import _LRScheduler 2 | 3 | 4 | class WarmUpLR(_LRScheduler): 5 | def __init__(self, optimizer, total_iters, last_epoch=-1): 6 | 7 | self.total_iters = total_iters 8 | super().__init__(optimizer, last_epoch) 9 | 10 | def get_lr(self): 11 | return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs] 12 | class downLR(_LRScheduler): 13 | def __init__(self, optimizer, total_iters, last_epoch=-1): 14 | 15 | self.total_iters = total_iters 16 | super().__init__(optimizer, last_epoch) 17 | 18 | def get_lr(self): 19 | return [base_lr * (self.total_iters-self.last_epoch)/ (self.total_iters + 1e-8) for base_lr in self.base_lrs] -------------------------------------------------------------------------------- /select_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | class DAGTransformerConfig(object): 3 | def __init__(self): 4 | self.model_name='DAGTransformer' 5 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 6 | self.dropout = 0.3 7 | self.num_classes = 3 8 | self.num_epochs = 500 9 | self.batch_size = 500 10 | self.num_task = 7 11 | self.learning_rate = 1e-4 12 | self.n_feat = 34 13 | self.hidden_dim = 1024 14 | self.num_head = 8 15 | self.num_encoder = 6 16 | self.d_k=512 17 | self.res_num_layer=4 18 | self.structure=True 19 | 20 | class CNNConfig(object): 21 | def __init__(self): 22 | self.model_name = 'CNN' 23 | self.n_feat = 34 24 | self.num_task = 7 25 | self.outdim = 512 26 | self.num_epochs = 3000 27 | self.num_classes = 3 28 | self.pooldim = 3 29 | self.dropout = 0.3 30 | self.batch_size = 500 31 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 32 | self.learning_rate = 1e-3 33 | 34 | class LSTMConfig(object): 35 | def __init__(self): 36 | self.model_name = 'LSTM' 37 | self.n_feat=34 38 | self.num_task=7 39 | self.batch_size=500 40 | self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 41 | self.learning_rate=1e-3 42 | self.num_epochs=500 43 | self.num_classes=3 44 | self.num_layers=6 45 | self.hidden=1024 46 | self.dropout=0.5 47 | self.pooldim = 3 48 | 49 | 50 | 51 | class GCNConfig(object): 52 | def __init__(self): 53 | self.model_name = 'GCN' 54 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 55 | self.n_feat=34 56 | self.dropout = 0.5 57 | self.num_classes = 3 58 | self.num_epochs = 15000 59 | self.learning_rate = 5e-3 60 | 61 | class VanillaTransformerConfig(object): 62 | 63 | """配置参数""" 64 | def __init__(self): 65 | self.model_name = 'VanillaTransformer' 66 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 67 | 68 | self.dropout = 0.5 69 | self.num_classes = 3 70 | self.num_epochs = 100 71 | self.batch_size = 500 72 | self.num_task = 7 73 | self.learning_rate = 1e-4 74 | self.n_feat = 34 75 | self.hidden = 1024 76 | self.num_head = 2 77 | self.num_encoder = 6 78 | 79 | def select_model_exp1(): 80 | 81 | config=DAGTransformerConfig() 82 | return config 83 | 84 | 85 | def select_model_exp2(model_name): 86 | if model_name=='DAGTransformer': 87 | config=DAGTransformerConfig() 88 | config.num_epochs=100 89 | return config 90 | 91 | if model_name=='CNN': 92 | config=CNNConfig() 93 | config.num_epochs=100 94 | config.learning_rate=1e-4 95 | return config 96 | 97 | if model_name=='LSTM': 98 | config=LSTMConfig() 99 | config.learning_rate=1e-4 100 | config.num_epochs=100 101 | return config 102 | if model_name=='VanillaTransformer': 103 | config=VanillaTransformerConfig() 104 | return config 105 | 106 | 107 | def select_model_exp3(model_name): 108 | if model_name=='DAGTransformer': 109 | config=DAGTransformerConfig() 110 | return config 111 | if model_name=='GCN': 112 | config=GCNConfig() 113 | return config 114 | if model_name=='CNN': 115 | config=CNNConfig() 116 | return config 117 | if model_name=='LSTM': 118 | config=LSTMConfig() 119 | return config 120 | if model_name=='VanillaTransformer': 121 | config=VanillaTransformerConfig() 122 | return config 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /train_model_dag.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from scheduler import WarmUpLR, downLR 3 | import time 4 | import torch 5 | import numpy as np 6 | import torch.nn.functional as F 7 | from datetime import timedelta 8 | from sklearn import metrics 9 | 10 | def get_time_dif(start_time): 11 | end_time = time.time() 12 | time_dif = end_time - start_time 13 | return timedelta(seconds=int(round(time_dif))) 14 | 15 | 16 | def train(config, model, train_iter, dev_iter, test_iter): 17 | start_time = time.time() 18 | model.train() 19 | optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) 20 | warmup_epoch = config.num_epochs/2 21 | iter_per_epoch = len(train_iter) 22 | scheduler=downLR(optimizer, (config.num_epochs-warmup_epoch)*iter_per_epoch) 23 | 24 | warmup_scheduler = WarmUpLR(optimizer, warmup_epoch*iter_per_epoch) 25 | total_batch = 0 26 | dev_best_loss = float('inf') 27 | dev_best_acc = float(0) 28 | test_best_acc=float(0) 29 | 30 | lrlist=np.zeros((config.num_epochs,2)) 31 | for epoch in range(config.num_epochs): 32 | 33 | 34 | loss_total = 0 35 | print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs)) 36 | lrlist[epoch][0]=epoch 37 | predic_all = torch.LongTensor([]).to(config.device) 38 | true_all = torch.LongTensor([]).to(config.device) 39 | if(epoch>=warmup_epoch): 40 | 41 | learn_rate = scheduler.get_lr()[0] 42 | print("Learn_rate:%s" % learn_rate) 43 | lrlist[epoch][1]=learn_rate 44 | else: 45 | learn_rate = warmup_scheduler.get_lr()[0] 46 | lrlist[epoch][0]=learn_rate 47 | print("Learn_rate:%s" % learn_rate) 48 | 49 | 50 | for (trains, labels, poss, masks) in tqdm(train_iter): 51 | trains=trains.to(config.device) 52 | #trains.dtype=torch.float 53 | labels=labels.long().to(config.device) 54 | #print(labels.dtype) 55 | poss=poss.to(config.device) 56 | masks=masks.to(config.device) 57 | outputs = model(trains,poss,masks) 58 | model.zero_grad() 59 | loss = F.cross_entropy(outputs, labels) 60 | loss.backward() 61 | 62 | 63 | optimizer.step() 64 | if(epoch dev_best_acc: 91 | dev_best_acc = dev_acc 92 | #torch.save(model.state_dict(), './best.ckpt') 93 | test_best_acc = test_acc 94 | time_dif = get_time_dif(start_time) 95 | msg = 'Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Test Loss: {5:>5.2}, Test Acc: {6:>6.2%},Time: {7} {8}' 96 | print(msg.format(total_batch, lossoutput.item(), train_acc, dev_loss, dev_acc, test_loss,test_acc,time_dif, improve)) 97 | print('BEST SO FAR:') 98 | print('Val Best Acc:', dev_best_acc) 99 | print('Test Best Acc:', test_best_acc) 100 | model.train() 101 | test(config, model, test_iter) 102 | 103 | 104 | def test(config, model, test_iter): 105 | model.eval() 106 | start_time = time.time() 107 | test_acc, test_loss, test_confusion = evaluate(config, model, test_iter, test=True) 108 | msg = 'Test Loss: {0:>5.2}, Test Acc: {1:>6.2%}' 109 | print(msg.format(test_loss, test_acc)) 110 | print(test_confusion) 111 | time_dif = get_time_dif(start_time) 112 | print("Time usage:", time_dif) 113 | 114 | 115 | def evaluate(config, model, data_iter, test=False): 116 | model.eval() 117 | loss_total = 0 118 | predict_all = torch.LongTensor([]).to(config.device) 119 | labels_all = torch.LongTensor([]).to(config.device) 120 | with torch.no_grad(): 121 | for texts, labels, poss, masks in data_iter: 122 | texts=texts.float().to(config.device) 123 | poss=poss.float().to(config.device) 124 | masks=masks.float().to(config.device) 125 | labels=labels.long().to(config.device) 126 | outputs = model(texts,poss,masks) 127 | loss = F.cross_entropy(outputs, labels) 128 | loss_total += loss 129 | labels = labels.data 130 | predic = torch.max(outputs.data, 1)[1] 131 | labels_all = torch.cat((labels_all, labels),0) 132 | predict_all = torch.cat((predict_all, predic),0) 133 | 134 | acc = get_accuracy(labels_all, predict_all) 135 | if test: 136 | confusion = metrics.confusion_matrix(labels_all.cpu().numpy(), predict_all.cpu().numpy()) 137 | return acc, loss_total / len(data_iter), confusion 138 | return acc, loss_total / len(data_iter) 139 | 140 | def get_accuracy(y_true,y_pred): 141 | y_true,y_pred=y_true.cpu().numpy(), y_pred.cpu().numpy() 142 | return metrics.accuracy_score(y_true,y_pred) -------------------------------------------------------------------------------- /train_model_gnn.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from scheduler import WarmUpLR, downLR 3 | import time 4 | import torch 5 | import numpy as np 6 | import torch.nn.functional as F 7 | from datetime import timedelta 8 | from sklearn import metrics 9 | import time 10 | from datetime import timedelta 11 | def get_time_dif(start_time): 12 | end_time = time.time() 13 | time_dif = end_time - start_time 14 | return timedelta(seconds=int(round(time_dif))) 15 | 16 | def train(config, model, data): 17 | start_time = time.time() 18 | model.train() 19 | optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) 20 | warmup_epoch = config.num_epochs/2 21 | scheduler=downLR(optimizer, (config.num_epochs-warmup_epoch)) 22 | 23 | warmup_scheduler = WarmUpLR(optimizer, warmup_epoch) 24 | total_batch = 0 25 | dev_best_loss = float('inf') 26 | dev_best_acc = float(0) 27 | test_best_acc=float(0) 28 | lrlist=np.zeros((config.num_epochs,2)) 29 | for epoch in range(config.num_epochs): 30 | print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs)) 31 | lrlist[epoch][0]=epoch 32 | if(epoch>=warmup_epoch): 33 | 34 | learn_rate = scheduler.get_lr()[0] 35 | print("Learn_rate:%s" % learn_rate) 36 | lrlist[epoch][1]=learn_rate 37 | else: 38 | learn_rate = warmup_scheduler.get_lr()[0] 39 | lrlist[epoch][0]=learn_rate 40 | print("Learn_rate:%s" % learn_rate) 41 | 42 | 43 | data=data.to(config.device) 44 | outputs = model(data) 45 | model.zero_grad() 46 | loss = F.cross_entropy(outputs[data.train_mask], data.labels[data.train_mask]) 47 | loss.backward() 48 | optimizer.step() 49 | if(epoch dev_best_acc: 70 | dev_best_acc = dev_acc 71 | #torch.save(model.state_dict(), './best1nod.ckpt') 72 | test_best_acc = test_acc 73 | 74 | time_dif = get_time_dif(start_time) 75 | msg = 'Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Test Loss: {5:>5.2}, Test Acc: {6:>6.2%},Time: {7} {8}' 76 | print(msg.format(total_batch, lossoutput.item(), train_acc, dev_loss, dev_acc, test_loss,test_acc,time_dif, improve)) 77 | model.train() 78 | print('BEST SO FAR:') 79 | print('Val Best Acc:', dev_best_acc) 80 | print('Test Best Acc:', test_best_acc) 81 | test(config, model, data, final=True) 82 | 83 | 84 | def test(config, model, data, final=False): 85 | # test 86 | 87 | model.eval() 88 | with torch.no_grad(): 89 | outputs=model(data) 90 | loss_total = F.cross_entropy(outputs[data.test_mask], data.labels[data.test_mask]) 91 | predict_all=torch.max(outputs[data.test_mask], 1)[1] 92 | labels_all=data.labels[data.test_mask] 93 | acc = get_accuracy(labels_all, predict_all) 94 | if final: 95 | msg = 'Test Loss: {0:>5.2}, Test Acc: {1:>6.2%}' 96 | print(msg.format(loss_total, acc)) 97 | print("Confusion Matrix...") 98 | confusion = metrics.confusion_matrix(labels_all.cpu().numpy(), predict_all.cpu().numpy()) 99 | print(confusion) 100 | return acc, loss_total, confusion 101 | return acc, loss_total 102 | 103 | 104 | 105 | def evaluate(config, model, data): 106 | model.eval() 107 | with torch.no_grad(): 108 | 109 | outputs=model(data) 110 | loss_total = F.cross_entropy(outputs[data.val_mask], data.labels[data.val_mask]) 111 | predict_all=torch.max(outputs[data.val_mask], 1)[1] 112 | labels_all=data.labels[data.val_mask] 113 | 114 | acc = get_accuracy(labels_all, predict_all) 115 | 116 | return acc, loss_total 117 | from sklearn import metrics 118 | def get_accuracy(y_true,y_pred): 119 | y_true,y_pred=y_true.cpu().numpy(), y_pred.cpu().numpy() 120 | return metrics.accuracy_score(y_true,y_pred) 121 | 122 | -------------------------------------------------------------------------------- /train_model_vanilla.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from scheduler import WarmUpLR, downLR 3 | import time 4 | import torch 5 | import numpy as np 6 | import torch.nn.functional as F 7 | from datetime import timedelta 8 | from sklearn import metrics 9 | 10 | def get_time_dif(start_time): 11 | end_time = time.time() 12 | time_dif = end_time - start_time 13 | return timedelta(seconds=int(round(time_dif))) 14 | 15 | 16 | def train(config, model, train_iter, dev_iter, test_iter): 17 | start_time = time.time() 18 | model.train() 19 | optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) 20 | warmup_epoch = config.num_epochs/2 21 | iter_per_epoch = len(train_iter) 22 | scheduler=downLR(optimizer, (config.num_epochs-warmup_epoch)*iter_per_epoch) 23 | 24 | warmup_scheduler = WarmUpLR(optimizer, warmup_epoch*iter_per_epoch) 25 | total_batch = 0 26 | dev_best_loss = float('inf') 27 | dev_best_acc = float(0) 28 | test_best_acc=float(0) 29 | 30 | lrlist=np.zeros((config.num_epochs,2)) 31 | for epoch in range(config.num_epochs): 32 | 33 | 34 | loss_total = 0 35 | print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs)) 36 | lrlist[epoch][0]=epoch 37 | predic_all = torch.LongTensor([]).to(config.device) 38 | true_all = torch.LongTensor([]).to(config.device) 39 | if(epoch>=warmup_epoch): 40 | 41 | learn_rate = scheduler.get_lr()[0] 42 | print("Learn_rate:%s" % learn_rate) 43 | lrlist[epoch][1]=learn_rate 44 | else: 45 | learn_rate = warmup_scheduler.get_lr()[0] 46 | lrlist[epoch][0]=learn_rate 47 | print("Learn_rate:%s" % learn_rate) 48 | 49 | 50 | for (trains, labels) in tqdm(train_iter): 51 | trains=trains.to(config.device) 52 | #trains.dtype=torch.float 53 | labels=labels.long().to(config.device) 54 | #print(labels.dtype) 55 | outputs = model(trains) 56 | model.zero_grad() 57 | loss = F.cross_entropy(outputs, labels) 58 | loss.backward() 59 | 60 | 61 | optimizer.step() 62 | if(epoch dev_best_acc: 89 | dev_best_acc = dev_acc 90 | #torch.save(model.state_dict(), './best.ckpt') 91 | test_best_acc = test_acc 92 | time_dif = get_time_dif(start_time) 93 | msg = 'Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Test Loss: {5:>5.2}, Test Acc: {6:>6.2%},Time: {7} {8}' 94 | print(msg.format(total_batch, lossoutput.item(), train_acc, dev_loss, dev_acc, test_loss,test_acc,time_dif, improve)) 95 | print('BEST SO FAR:') 96 | print('Val Best Acc:', dev_best_acc) 97 | print('Test Best Acc:', test_best_acc) 98 | model.train() 99 | test(config, model, test_iter) 100 | 101 | 102 | def test(config, model, test_iter): 103 | model.eval() 104 | start_time = time.time() 105 | test_acc, test_loss, test_confusion = evaluate(config, model, test_iter, test=True) 106 | msg = 'Test Loss: {0:>5.2}, Test Acc: {1:>6.2%}' 107 | print(msg.format(test_loss, test_acc)) 108 | print(test_confusion) 109 | time_dif = get_time_dif(start_time) 110 | print("Time usage:", time_dif) 111 | 112 | 113 | def evaluate(config, model, data_iter, test=False): 114 | model.eval() 115 | loss_total = 0 116 | predict_all = torch.LongTensor([]).to(config.device) 117 | labels_all = torch.LongTensor([]).to(config.device) 118 | with torch.no_grad(): 119 | for texts, labels in data_iter: 120 | texts=texts.float().to(config.device) 121 | labels=labels.long().to(config.device) 122 | outputs = model(texts) 123 | loss = F.cross_entropy(outputs, labels) 124 | loss_total += loss 125 | labels = labels.data 126 | predic = torch.max(outputs.data, 1)[1] 127 | labels_all = torch.cat((labels_all, labels),0) 128 | predict_all = torch.cat((predict_all, predic),0) 129 | 130 | acc = get_accuracy(labels_all, predict_all) 131 | if test: 132 | confusion = metrics.confusion_matrix(labels_all.cpu().numpy(), predict_all.cpu().numpy()) 133 | return acc, loss_total / len(data_iter), confusion 134 | return acc, loss_total / len(data_iter) 135 | 136 | def get_accuracy(y_true,y_pred): 137 | y_true,y_pred=y_true.cpu().numpy(), y_pred.cpu().numpy() 138 | return metrics.accuracy_score(y_true,y_pred) --------------------------------------------------------------------------------