├── 811replication.zip
├── LICENSE
├── README.md
├── datasets_exp1
    ├── 3CPU.zip
    ├── 3MEM.zip
    ├── 5CPU.zip
    ├── 5MEM.zip
    ├── 7CPU.zip
    ├── 7MEM.z01
    └── 7MEM.zip
├── datasets_exp2_3
    ├── split6_2_2.zip
    ├── split8_1_1.zip
    └── split9_05_05.zip
├── models
    ├── CNN.py
    ├── DAG_Transformer.py
    ├── DAG_Transformer_Encoder_Layer.py
    ├── GCN.py
    ├── LSTM.py
    └── Vanilla_Transformer.py
├── preprocess.py
├── run_exp1.py
├── run_exp2.py
├── run_exp3.py
├── scheduler.py
├── select_model.py
├── train_model_dag.py
├── train_model_gnn.py
└── train_model_vanilla.py


/811replication.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/811replication.zip


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # workflow-performance-prediction-jii
 2 | This source repository is dedicated for the following published journal paper: 
 3 | #### Jixiang Yu, Ming Gao*, Yuchan Li, Zehui Zhang, WAI HUNG IP, KAI LEUNG Yung, Workflow performance prediction based on graph structure aware deep attention neural network, Journal of Industrial Information Integration, 2022, https://doi.org/10.1016/j.jii.2022.100337. (https://www.sciencedirect.com/science/article/pii/S2452414X22000097)
 4 | If you are interested in this research and use this code, please kindly reference our paper or contact the corresponding author.
 5 | 
 6 | This dataset is extracted and aggregated based on cluster-trace-v2018 (https://github.com/alibaba/clusterdata)
 7 | 
 8 | 
 9 | ### Highlights
10 | #### In workflow performance prediction, DAG structure matters;
11 | #### DAG-Transformer effectively embeds the DAG information and outperforms mainstream ML, DL and GCN methods;
12 | #### A new dataset for cloud workflow performance prediction is accompanied as well as the source code.
13 | 
14 | # Usage:
15 | ## 1 To reproduce experiment 1
16 | python run_exp1 --pred_task=3/5/7 #(default=7) --pred_tgt=CPU/MEM #(default=CPU) --pred_mode=PRIOR_1/PRIOR_ALL #(default=PRIOR_ALL) --use_DAG=T/F #(default=T)
17 | ## 2 To reproduce experiment 2
18 | python run_exp2 --model_name=CNN/LSTM/VanillaTransformer/DAGTransformer --split=split9_05_05/split8_1_1/split6_2_2 #(default=split6_2_2)
19 | ## 3 To reproduce experiment 3
20 | python run_exp2 --model_name=CNN/LSTM/VanillaTransformer/DAGTransformer/GCN --split=split9_05_05/split8_1_1/split6_2_2 #(default=split6_2_2) --GCN_mode=bidirect/unidirect #(default=bidirect)
21 | 
22 | # Datasets(You need to extract the *.zip files first):
23 | ## 1 In directory datasets_exp1/
24 | There are 6 different sub-datasets, whose name indicates pred_task+pred_tgt. In each sub-dataset, for example, in 3CPU/, there are 3 DAG information files(train, val, and test) and 2 performance datasets(train, val, and test) using different pred_mode(i.e., PRIOR_1 and PRIOR_ALL).
25 | 
26 | ## 2 In directory datasets_exp2_3/
27 | There are 3 different splits. In each split, there are 3 DAG information files(train, val, and test) and their corresponding performance data(train, val, and test).
28 | 
29 | # Requirements:
30 | CUDA==11.0
31 | python==3.8
32 | pytorch==1.7.0
33 | PyG==corresponding version of pytorch-1.7.0 and CUDA-11.0 
34 | 


--------------------------------------------------------------------------------
/datasets_exp1/3CPU.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/3CPU.zip


--------------------------------------------------------------------------------
/datasets_exp1/3MEM.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/3MEM.zip


--------------------------------------------------------------------------------
/datasets_exp1/5CPU.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/5CPU.zip


--------------------------------------------------------------------------------
/datasets_exp1/5MEM.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/5MEM.zip


--------------------------------------------------------------------------------
/datasets_exp1/7CPU.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/7CPU.zip


--------------------------------------------------------------------------------
/datasets_exp1/7MEM.z01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/7MEM.z01


--------------------------------------------------------------------------------
/datasets_exp1/7MEM.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp1/7MEM.zip


--------------------------------------------------------------------------------
/datasets_exp2_3/split6_2_2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp2_3/split6_2_2.zip


--------------------------------------------------------------------------------
/datasets_exp2_3/split8_1_1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp2_3/split8_1_1.zip


--------------------------------------------------------------------------------
/datasets_exp2_3/split9_05_05.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudworkflow/workflow-performance-prediction-jii/6716fef5a966576c48ca429b30ba7081c300bc20/datasets_exp2_3/split9_05_05.zip


--------------------------------------------------------------------------------
/models/CNN.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | class CNNModel(nn.Module):
 5 |     def __init__(self, config):
 6 |         super(CNNModel, self).__init__()
 7 |         self.conv=nn.Conv1d(in_channels=config.n_feat,out_channels=config.outdim,kernel_size=1)
 8 |         self.maxpool=nn.MaxPool1d(config.pooldim)
 9 |         self.fc=nn.Linear((config.outdim//config.pooldim)*config.num_task,config.num_classes)
10 |         self.dropout=nn.Dropout(config.dropout)
11 |     def forward(self,x):
12 |         out=x.permute(0,2,1)
13 |         out=F.relu(self.conv(out))
14 |         out=self.dropout(out)
15 |         out=out.permute(0,2,1)
16 |         out=self.maxpool(out)
17 |         out=out.reshape(out.size(0),-1)
18 |         out=self.fc(out)
19 |         return out


--------------------------------------------------------------------------------
/models/DAG_Transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import copy
 4 | import torch.nn.functional as F
 5 | from .DAG_Transformer_Encoder_Layer import DAGTransformerEncoderLayer as encoder_layer
 6 | 
 7 | 
 8 | class DAGTransformerEncoder(nn.Module):
 9 |     def __init__(self, d_k, num_head, hidden_dim, dropout, num_layer):
10 |         super(DAGTransformerEncoder, self).__init__()
11 |         self.encoder_layer=encoder_layer(d_k, num_head, hidden_dim, dropout)
12 |         self.encoder=nn.ModuleList([copy.deepcopy(self.encoder_layer) for _ in range(num_layer)])
13 |     def forward(self, out, attn_mask=None):
14 |         for _ in self.encoder:
15 |             out = _(out, src_mask=attn_mask)
16 |         return out
17 | 
18 | 
19 | 
20 | class resnet_layer(nn.Module):
21 |     def __init__(self,n_feat,d_k,kernel_size,dropout):
22 |         super(resnet_layer,self).__init__()
23 |         self.conv1=nn.Conv1d(in_channels=n_feat,out_channels=d_k,kernel_size=kernel_size)
24 |         self.conv2=nn.Conv1d(in_channels=d_k,out_channels=n_feat,kernel_size=kernel_size)
25 |         self.dropout=nn.Dropout(dropout)
26 | 
27 | 
28 |     def forward(self,x):
29 |         out=self.conv1(x)
30 |         out=self.conv2(out)
31 |         out+=x
32 |         out=self.dropout(x)
33 |         return out
34 | 
35 | 
36 | class DAGTransformer(nn.Module):
37 |     def __init__(self, config):
38 |         super(DAGTransformer, self).__init__()
39 |         self.structure=config.structure
40 |         self.resnet1=resnet_layer(config.n_feat,config.d_k,1,config.dropout)
41 |         if self.structure==True:
42 |             self.resnet2=resnet_layer(config.n_feat,config.d_k,1,config.dropout)
43 |         self.conv=nn.Conv1d(in_channels=config.n_feat,out_channels=config.d_k,kernel_size=1)
44 |         self.res1=nn.ModuleList([
45 |             copy.deepcopy(self.resnet1)
46 |             for _ in range(config.res_num_layer)     
47 |         ])
48 |         if self.structure==True:
49 |             self.res2=nn.ModuleList([
50 |                 copy.deepcopy(self.resnet2)
51 |                 for _ in range(config.res_num_layer)
52 |             ])
53 |         self.encoder = DAGTransformerEncoder(config.d_k, config.num_head, config.hidden_dim, config.dropout, config.num_encoder)
54 |         self.avgpool=nn.AdaptiveAvgPool2d((config.d_k,1))
55 |         self.fc1 = nn.Linear(config.d_k, 3)
56 |     
57 |     def forward(self,data,pos,mask):
58 |         out = data
59 |         out=out.permute(0,2,1)
60 |         for resnet in self.res1:
61 |             out = resnet(out)
62 |         if self.structure==True:
63 |             out1= pos
64 |             attn_mask=mask.masked_fill(mask == 0, float('-inf')).masked_fill(mask != 0, float(0.0))
65 |             out1=out1.permute(0,2,1)
66 |             for resnet in self.res2:
67 |                 out1=resnet(out1)
68 |             out=self.conv(out+out1)
69 |         else:
70 |             out=self.conv(out)
71 |         
72 |         out=out.permute(2,0,1)
73 |         
74 |         out = self.encoder(out,attn_mask=attn_mask if self.structure==True else None)
75 |         out=out.permute(1,2,0)
76 |         out=F.relu(out)
77 |         out=self.avgpool(out)
78 |         out = out.squeeze(-1)
79 |         out = self.fc1(out)
80 |         return out
81 | 


--------------------------------------------------------------------------------
/models/DAG_Transformer_Encoder_Layer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import copy
  3 | from torch.nn import functional as F
  4 | import torch.nn as nn
  5 | from torch.nn.modules import Module
  6 | from torch.nn.modules.dropout import Dropout
  7 | from torch.nn.modules.linear import Linear
  8 | from torch.nn.modules.normalization import LayerNorm
  9 | from torch.nn.init import xavier_uniform_
 10 | from torch.nn.init import constant_
 11 | from torch.nn.init import xavier_normal_
 12 | from torch.nn.parameter import Parameter
 13 | from torch.nn.functional import linear
 14 | from torch.nn.functional import softmax
 15 | from torch.nn.functional import dropout
 16 | 
 17 | def _get_activation_fn(activation):
 18 |     if activation == "relu":
 19 |         return F.relu
 20 |     elif activation == "gelu":
 21 |         return F.gelu
 22 |     else:
 23 |         raise RuntimeError("activation should be relu/gelu, not %s." % activation)
 24 | 
 25 | 
 26 | class DAGMultiheadAttention(Module):
 27 |     
 28 |     __annotations__ = {
 29 |         'bias_k': torch._jit_internal.Optional[torch.Tensor],
 30 |         'bias_v': torch._jit_internal.Optional[torch.Tensor],
 31 |     }
 32 |     __constants__ = ['q_proj_weight', 'k_proj_weight', 'v_proj_weight', 'in_proj_weight']
 33 | 
 34 |     def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None):
 35 |         super(DAGMultiheadAttention, self).__init__()
 36 |         self.embed_dim = embed_dim
 37 |         self.kdim = kdim if kdim is not None else embed_dim
 38 |         self.vdim = vdim if vdim is not None else embed_dim
 39 |         self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
 40 | 
 41 |         self.num_heads = num_heads
 42 |         self.dropout = dropout
 43 |         self.head_dim = embed_dim // num_heads
 44 |         assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
 45 | 
 46 |         if self._qkv_same_embed_dim is False:
 47 |             self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
 48 |             self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
 49 |             self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
 50 |             self.register_parameter('in_proj_weight', None)
 51 |         else:
 52 |             self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))
 53 |             self.register_parameter('q_proj_weight', None)
 54 |             self.register_parameter('k_proj_weight', None)
 55 |             self.register_parameter('v_proj_weight', None)
 56 | 
 57 |         if bias:
 58 |             self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))
 59 |         else:
 60 |             self.register_parameter('in_proj_bias', None)
 61 |         self.out_proj = Linear(embed_dim, embed_dim, bias=bias)
 62 | 
 63 |         if add_bias_kv:
 64 |             self.bias_k = Parameter(torch.empty(1, 1, embed_dim))
 65 |             self.bias_v = Parameter(torch.empty(1, 1, embed_dim))
 66 |         else:
 67 |             self.bias_k = self.bias_v = None
 68 | 
 69 |         self.add_zero_attn = add_zero_attn
 70 | 
 71 |         self._reset_parameters()
 72 | 
 73 |     def _reset_parameters(self):
 74 |         if self._qkv_same_embed_dim:
 75 |             xavier_uniform_(self.in_proj_weight)
 76 |         else:
 77 |             xavier_uniform_(self.q_proj_weight)
 78 |             xavier_uniform_(self.k_proj_weight)
 79 |             xavier_uniform_(self.v_proj_weight)
 80 | 
 81 |         if self.in_proj_bias is not None:
 82 |             constant_(self.in_proj_bias, 0.)
 83 |             constant_(self.out_proj.bias, 0.)
 84 |         if self.bias_k is not None:
 85 |             xavier_normal_(self.bias_k)
 86 |         if self.bias_v is not None:
 87 |             xavier_normal_(self.bias_v)
 88 | 
 89 |     def __setstate__(self, state):
 90 |         super(DAGMultiheadAttention, self).__setstate__(state)
 91 |         if 'self._qkv_same_embed_dim' not in self.__dict__:
 92 |             self._qkv_same_embed_dim = True
 93 | 
 94 |     def forward(self, query, key, value, key_padding_mask=None,
 95 |                 need_weights=True, attn_mask=None):
 96 |         # type: (Tensor, Tensor, Tensor, Optional[Tensor], bool, Optional[Tensor]) -> Tuple[Tensor, Optional[Tensor]]
 97 |        
 98 |         if not self._qkv_same_embed_dim:
 99 |             return DAGmulti_head_attention_forward(
100 |                 query, key, value, self.embed_dim, self.num_heads,
101 |                 self.in_proj_weight, self.in_proj_bias,
102 |                 self.bias_k, self.bias_v, self.add_zero_attn,
103 |                 self.dropout, self.out_proj.weight, self.out_proj.bias,
104 |                 training=self.training,
105 |                 key_padding_mask=key_padding_mask, need_weights=need_weights,
106 |                 attn_mask=attn_mask, use_separate_proj_weight=True,
107 |                 q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,
108 |                 v_proj_weight=self.v_proj_weight)
109 |         else:
110 |             return DAGmulti_head_attention_forward(
111 |                 query, key, value, self.embed_dim, self.num_heads,
112 |                 self.in_proj_weight, self.in_proj_bias,
113 |                 self.bias_k, self.bias_v, self.add_zero_attn,
114 |                 self.dropout, self.out_proj.weight, self.out_proj.bias,
115 |                 training=self.training,
116 |                 key_padding_mask=key_padding_mask, need_weights=need_weights,
117 |                 attn_mask=attn_mask)
118 | 
119 | 
120 | 
121 | 
122 | def DAGmulti_head_attention_forward(query,                        # type: Tensor
123 |                                  key,                             # type: Tensor
124 |                                  value,                           # type: Tensor
125 |                                  embed_dim_to_check,              # type: int
126 |                                  num_heads,                       # type: int
127 |                                  in_proj_weight,                  # type: Tensor
128 |                                  in_proj_bias,                    # type: Tensor
129 |                                  bias_k,                          # type: Optional[Tensor]
130 |                                  bias_v,                          # type: Optional[Tensor]
131 |                                  add_zero_attn,                   # type: bool
132 |                                  dropout_p,                       # type: float
133 |                                  out_proj_weight,                 # type: Tensor
134 |                                  out_proj_bias,                   # type: Tensor
135 |                                  training=True,                   # type: bool
136 |                                  key_padding_mask=None,           # type: Optional[Tensor]
137 |                                  need_weights=True,               # type: bool
138 |                                  attn_mask=None,                  # type: Optional[Tensor]
139 |                                  use_separate_proj_weight=False,  # type: bool
140 |                                  q_proj_weight=None,              # type: Optional[Tensor]
141 |                                  k_proj_weight=None,              # type: Optional[Tensor]
142 |                                  v_proj_weight=None,              # type: Optional[Tensor]
143 |                                  static_k=None,                   # type: Optional[Tensor]
144 |                                  static_v=None                    # type: Optional[Tensor]
145 |                                  ):
146 |     # type: (...) -> Tuple[Tensor, Optional[Tensor]]
147 |     
148 | 
149 |     tgt_len, bsz, embed_dim = query.size()
150 |     assert embed_dim == embed_dim_to_check
151 |     assert key.size() == value.size()
152 | 
153 |     head_dim = embed_dim // num_heads
154 |     assert head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
155 |     scaling = float(head_dim) ** -0.5
156 | 
157 |     if not use_separate_proj_weight:
158 |         if torch.equal(query, key) and torch.equal(key, value):
159 |             q, k, v = linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)
160 | 
161 |         elif torch.equal(key, value):
162 |             # This is inline in_proj function with in_proj_weight and in_proj_bias
163 |             _b = in_proj_bias
164 |             _start = 0
165 |             _end = embed_dim
166 |             _w = in_proj_weight[_start:_end, :]
167 |             if _b is not None:
168 |                 _b = _b[_start:_end]
169 |             q = linear(query, _w, _b)
170 | 
171 |             if key is None:
172 |                 assert value is None
173 |                 k = None
174 |                 v = None
175 |             else:
176 | 
177 |                 # This is inline in_proj function with in_proj_weight and in_proj_bias
178 |                 _b = in_proj_bias
179 |                 _start = embed_dim
180 |                 _end = None
181 |                 _w = in_proj_weight[_start:, :]
182 |                 if _b is not None:
183 |                     _b = _b[_start:]
184 |                 k, v = linear(key, _w, _b).chunk(2, dim=-1)
185 | 
186 |         else:
187 |             # This is inline in_proj function with in_proj_weight and in_proj_bias
188 |             _b = in_proj_bias
189 |             _start = 0
190 |             _end = embed_dim
191 |             _w = in_proj_weight[_start:_end, :]
192 |             if _b is not None:
193 |                 _b = _b[_start:_end]
194 |             q = linear(query, _w, _b)
195 | 
196 |             # This is inline in_proj function with in_proj_weight and in_proj_bias
197 |             _b = in_proj_bias
198 |             _start = embed_dim
199 |             _end = embed_dim * 2
200 |             _w = in_proj_weight[_start:_end, :]
201 |             if _b is not None:
202 |                 _b = _b[_start:_end]
203 |             k = linear(key, _w, _b)
204 | 
205 |             # This is inline in_proj function with in_proj_weight and in_proj_bias
206 |             _b = in_proj_bias
207 |             _start = embed_dim * 2
208 |             _end = None
209 |             _w = in_proj_weight[_start:, :]
210 |             if _b is not None:
211 |                 _b = _b[_start:]
212 |             v = linear(value, _w, _b)
213 |     else:
214 |         q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)
215 |         len1, len2 = q_proj_weight_non_opt.size()
216 |         assert len1 == embed_dim and len2 == query.size(-1)
217 | 
218 |         k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)
219 |         len1, len2 = k_proj_weight_non_opt.size()
220 |         assert len1 == embed_dim and len2 == key.size(-1)
221 | 
222 |         v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)
223 |         len1, len2 = v_proj_weight_non_opt.size()
224 |         assert len1 == embed_dim and len2 == value.size(-1)
225 | 
226 |         if in_proj_bias is not None:
227 |             q = linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])
228 |             k = linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)])
229 |             v = linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):])
230 |         else:
231 |             q = linear(query, q_proj_weight_non_opt, in_proj_bias)
232 |             k = linear(key, k_proj_weight_non_opt, in_proj_bias)
233 |             v = linear(value, v_proj_weight_non_opt, in_proj_bias)
234 |     q = q * scaling
235 | 
236 |     if bias_k is not None and bias_v is not None:
237 |         if static_k is None and static_v is None:
238 |             k = torch.cat([k, bias_k.repeat(1, bsz, 1)])
239 |             v = torch.cat([v, bias_v.repeat(1, bsz, 1)])
240 |             if attn_mask is not None:
241 |                 attn_mask = torch.cat([attn_mask,
242 |                                       torch.zeros((attn_mask.size(0), 1),
243 |                                                   dtype=attn_mask.dtype,
244 |                                                   device=attn_mask.device)], dim=1)
245 |             if key_padding_mask is not None:
246 |                 key_padding_mask = torch.cat(
247 |                     [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1),
248 |                                                    dtype=key_padding_mask.dtype,
249 |                                                    device=key_padding_mask.device)], dim=1)
250 |         else:
251 |             assert static_k is None, "bias cannot be added to static key."
252 |             assert static_v is None, "bias cannot be added to static value."
253 |     else:
254 |         assert bias_k is None
255 |         assert bias_v is None
256 | 
257 |     q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)
258 |     if k is not None:
259 |         k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
260 |     if v is not None:
261 |         v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
262 | 
263 |     if static_k is not None:
264 |         assert static_k.size(0) == bsz * num_heads
265 |         assert static_k.size(2) == head_dim
266 |         k = static_k
267 | 
268 |     if static_v is not None:
269 |         assert static_v.size(0) == bsz * num_heads
270 |         assert static_v.size(2) == head_dim
271 |         v = static_v
272 | 
273 |     src_len = k.size(1)
274 | 
275 |     if key_padding_mask is not None:
276 |         assert key_padding_mask.size(0) == bsz
277 |         assert key_padding_mask.size(1) == src_len
278 | 
279 |     if add_zero_attn:
280 |         src_len += 1
281 |         k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)
282 |         v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)
283 |         if attn_mask is not None:
284 |             attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), 1),
285 |                                                           dtype=attn_mask.dtype,
286 |                                                           device=attn_mask.device)], dim=1)
287 |         if key_padding_mask is not None:
288 |             key_padding_mask = torch.cat(
289 |                 [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1),
290 |                                                dtype=key_padding_mask.dtype,
291 |                                                device=key_padding_mask.device)], dim=1)
292 | 
293 |     attn_output_weights = torch.bmm(q, k.transpose(1, 2))
294 |     assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]
295 | 
296 |     if attn_mask is not None:
297 |         attn_mask=attn_mask.reshape(-1,tgt_len,tgt_len)
298 |         attn_output_weights += attn_mask
299 | 
300 |     if key_padding_mask is not None:
301 |         attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
302 |         attn_output_weights = attn_output_weights.masked_fill(
303 |             key_padding_mask.unsqueeze(1).unsqueeze(2),
304 |             float('-inf'),
305 |         )
306 |         attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)
307 | 
308 |     attn_output_weights = softmax(
309 |         attn_output_weights, dim=-1)
310 |     attn_output_weights = dropout(attn_output_weights, p=dropout_p, training=training)
311 | 
312 |     attn_output = torch.bmm(attn_output_weights, v)
313 |     assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]
314 |     attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
315 |     attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
316 | 
317 |     if need_weights:
318 |         # average attention weights over heads
319 |         attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
320 |         return attn_output, attn_output_weights.sum(dim=1) / num_heads
321 |     else:
322 |         return attn_output, None
323 | 
324 | 
325 | 
326 | class DAGTransformerEncoderLayer(nn.Module): 
327 |     def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
328 |         super(DAGTransformerEncoderLayer, self).__init__()
329 |         self.self_attn = DAGMultiheadAttention(d_model, nhead, dropout=dropout)
330 |         # Implementation of Feedforward model
331 |         self.linear1 = Linear(d_model, dim_feedforward)
332 |         self.dropout = Dropout(dropout)
333 |         self.linear2 = Linear(dim_feedforward, d_model)
334 | 
335 |         self.norm1 = LayerNorm(d_model)
336 |         self.norm2 = LayerNorm(d_model)
337 |         self.dropout1 = Dropout(dropout)
338 |         self.dropout2 = Dropout(dropout)
339 | 
340 |         self.activation = _get_activation_fn(activation)
341 | 
342 |     def forward(self, src, src_mask=None, src_key_padding_mask=None):
343 |         
344 |         src2 = self.self_attn(src, src, src, attn_mask=src_mask,
345 |                               key_padding_mask=src_key_padding_mask)[0]
346 |         src = src + self.dropout1(src2)
347 |         src = self.norm1(src)
348 |         if hasattr(self, "activation"):
349 |             src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
350 |         else:  # for backward compatibility
351 |             src2 = self.linear2(self.dropout(F.relu(self.linear1(src))))
352 |         src = src + self.dropout2(src2)
353 |         src = self.norm2(src)
354 |         return src


--------------------------------------------------------------------------------
/models/GCN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch_geometric.nn import GCNConv
 5 | 
 6 | 
 7 | class GCNModel(nn.Module):
 8 |     def __init__(self,config):
 9 |         super(GCNModel, self).__init__()
10 |         self.conv1 = GCNConv(34, 64)
11 |         self.conv2 = GCNConv(64, 128)
12 |         self.conv3 = GCNConv(128, 256)
13 |         self.conv4 = GCNConv(256, 3)
14 |         self.dropout1=nn.Dropout(config.dropout)
15 |         self.dropout2=nn.Dropout(config.dropout)
16 |         self.dropout3=nn.Dropout(config.dropout)
17 | 
18 |     def forward(self, data):
19 |         x, edge_index = data.x, data.edge_index
20 | 
21 |         x = self.conv1(x, edge_index)
22 |         x = F.relu(x)
23 |         x = self.dropout1(x)
24 |         x = self.conv2(x, edge_index)
25 |         x = F.relu(x)
26 |         x = self.dropout2(x)
27 |         x = self.conv3(x, edge_index)
28 |         x = F.relu(x)
29 |         x = self.dropout3(x)
30 |         x = self.conv4(x, edge_index)
31 |         return x


--------------------------------------------------------------------------------
/models/LSTM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | class LSTMModel(nn.Module):
 5 |     def __init__(self, config):
 6 |         super(LSTMModel, self).__init__()
 7 |         self.lstm=nn.LSTM(config.n_feat,config.hidden,dropout=config.dropout,num_layers=config.num_layers)
 8 |         self.maxpool=nn.MaxPool1d(config.pooldim)
 9 |         self.fc=nn.Linear((config.hidden//config.pooldim)*config.num_task,config.num_classes)
10 |     def forward(self,x):
11 |         out=x.permute(1,0,2)
12 |         out, _ = self.lstm(out)
13 |         out=out.permute(1,0,2)
14 |         #out = torch.tanh(out)
15 |         out = self.maxpool(out)
16 |         #out = torch.tanh(out)
17 |         out=out.reshape(out.size(0),-1)
18 |         out = self.fc(out)
19 |         return out


--------------------------------------------------------------------------------
/models/Vanilla_Transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import  TransformerEncoderLayer
 4 | import copy
 5 | 
 6 | class Positional_Encoding(nn.Module):
 7 |     def __init__(self, n_feat, num_task, dropout, device):
 8 |         super(Positional_Encoding, self).__init__()
 9 |         self.device = device
10 |         self.pe = torch.tensor([[pos / (10000.0 ** (i // 2 * 2.0 / n_feat)) for i in range(n_feat)] for pos in range(num_task)])
11 |         self.pe[:, 0::2] = torch.sin(self.pe[:, 0::2])
12 |         self.pe[:, 1::2] = torch.cos(self.pe[:, 1::2])
13 |         self.dropout = nn.Dropout(dropout)
14 | 
15 |     def forward(self, x):
16 |         out = x + nn.Parameter(self.pe, requires_grad=False).to(self.device)
17 |         out = self.dropout(out)
18 |         return out
19 | 
20 | class VanillaTransformerModel(nn.Module):
21 |     def __init__(self, config):
22 |         super(VanillaTransformerModel, self).__init__()
23 |         
24 |         self.postion_embedding = Positional_Encoding(config.n_feat, config.num_task, config.dropout, config.device)
25 |         self.encoder = TransformerEncoderLayer(config.n_feat, config.num_head, config.hidden, config.dropout)
26 |         self.encoders = nn.ModuleList([
27 |             copy.deepcopy(self.encoder)
28 |             for _ in range(config.num_encoder)])
29 | 
30 |         self.fc1 = nn.Linear(config.num_task * config.n_feat, config.num_classes)
31 |         
32 |     def forward(self, x):
33 |         out = x
34 |         #out=self.transform_shape(out)
35 |         out = self.postion_embedding(out)
36 |         out=out.permute(1,0,2)
37 |         
38 |         
39 |         for encoder in self.encoders:
40 |             out = encoder(out)
41 |         out=out.permute(1,0,2)
42 |         out = out.reshape(out.size(0), -1)
43 |         out = self.fc1(out)
44 |         return out


--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pandas as pd
  3 | import numpy as np
  4 | from tqdm import tqdm
  5 | 
  6 | def find_pos(out_degree_matrix,num_nodes):
  7 |     stage=np.zeros(num_nodes)
  8 |     signal=True
  9 |     while signal:
 10 |         temp=stage.copy()
 11 |         for m in range(num_nodes):
 12 |             for n in range(num_nodes):
 13 |                 if(out_degree_matrix[m,n]!=0):
 14 |                     stage[n]=max(stage[n],stage[m]+1)
 15 |         if (temp==stage).all():
 16 |             signal=False
 17 |     return stage
 18 | 
 19 | def create_position(pos,num_feat):
 20 |     
 21 |     pe = np.array([[[posit / (10000.0 ** (i // 2 * 2.0 / num_feat))  for i in range(num_feat)]for posit in posi] for posi in pos])
 22 |     pe[:,:,0::2]=np.sin(pe[:,:,0::2])
 23 |     pe[:,:,1::2]=np.cos(pe[:,:,1::2])
 24 |     return pe
 25 | 
 26 | def create_attn_mask(tensor,num_heads,num_nodes):
 27 |     mask=np.zeros(((tensor.shape[0]*num_heads),tensor.shape[1],tensor.shape[2]))
 28 |     for x in range(0,mask.shape[0],num_heads):
 29 |         mask[x:x+num_heads]=tensor[x//num_heads]+np.eye(num_nodes)
 30 |     return mask.reshape(tensor.shape[0],num_heads,tensor.shape[1],tensor.shape[2])
 31 | 
 32 | def prepare_data_exp1_dag(pred_task,pred_tgt,pred_mode):
 33 |     direct='./datasets_exp1/%s%s/%s'%(pred_task,pred_tgt,pred_mode)
 34 |     df_train=pd.read_csv(direct+'/train.csv')
 35 |     df_val=pd.read_csv(direct+'/val.csv')
 36 |     df_test=pd.read_csv(direct+'/test.csv')
 37 |     dag_direct='./datasets_exp1/%s%s/'%(pred_task,pred_tgt)
 38 |     df_dag_train=pd.read_csv(dag_direct+'/train_daginfo.csv')
 39 |     df_dag_val=pd.read_csv(dag_direct+'/val_daginfo.csv')
 40 |     df_dag_test=pd.read_csv(dag_direct+'/test_daginfo.csv')
 41 |     return df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test
 42 | 
 43 | def prepare_data_exp1(pred_task,pred_tgt,pred_mode):
 44 |     direct='./datasets_exp1/%s%s/%s'%(pred_task,pred_tgt,pred_mode)
 45 |     df_train=pd.read_csv(direct+'/train.csv')
 46 |     df_val=pd.read_csv(direct+'/val.csv')
 47 |     df_test=pd.read_csv(direct+'/test.csv')
 48 |     
 49 |     return df_train, df_val, df_test
 50 | 
 51 | def prepare_data_exp23_dag(split):#split='split6_2_2','split8_1_1'，'split9_05_05'
 52 |     direct='./datasets_exp2_3/%s'%split
 53 |     df_train=pd.read_csv(direct+'/train.csv')
 54 |     df_val=pd.read_csv(direct+'/val.csv')
 55 |     df_test=pd.read_csv(direct+'/test.csv')
 56 |     df_dag_train=pd.read_csv(direct+'/train_daginfo.csv')
 57 |     df_dag_val=pd.read_csv(direct+'/val_daginfo.csv')
 58 |     df_dag_test=pd.read_csv(direct+'/test_daginfo.csv')
 59 |     return df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test
 60 | 
 61 | 
 62 | def prepare_data_exp23(split):#split='split6_2_2','split8_1_1'，'split9_05_05'
 63 |     direct='./datasets_exp2_3/%s'%split
 64 |     df_train=pd.read_csv(direct+'/train.csv')
 65 |     df_val=pd.read_csv(direct+'/val.csv')
 66 |     df_test=pd.read_csv(direct+'/test.csv')
 67 |     return df_train, df_val, df_test
 68 |         
 69 | 
 70 | def preprocess_data_exp1_dag(pred_task,pred_tgt,pred_mode,num_feat=34,num_task=7,num_head=8):
 71 |     df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp1_dag(pred_task,pred_tgt,pred_mode)
 72 |     ##nodes features
 73 |     arr1=np.array(df_train.iloc[:,:-1])
 74 |     arr2=np.array(df_val.iloc[:,:-1])
 75 |     arr3=np.array(df_test.iloc[:,:-1])
 76 |     data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat)))
 77 |     data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9)
 78 |     data1=data[:arr1.shape[0]*num_task,:]
 79 |     data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:]
 80 |     data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:]  
 81 |     data1=data1.reshape(-1,num_task,num_feat)
 82 |     data2=data2.reshape(-1,num_task,num_feat)
 83 |     data3=data3.reshape(-1,num_task,num_feat)
 84 |     #####dag info
 85 |     dag1=df_dag_train.to_numpy().reshape(-1,num_task,num_task*2+1)
 86 |     dag2=df_dag_val.to_numpy().reshape(-1,num_task,num_task*2+1)
 87 |     dag3=df_dag_test.to_numpy().reshape(-1,num_task,num_task*2+1)
 88 |     dag1=dag1[:,:,1:]
 89 |     dag2=dag2[:,:,1:]
 90 |     dag3=dag3[:,:,1:]
 91 |     dagout1=dag1[:,:,:7]
 92 |     dagout2=dag2[:,:,:7]
 93 |     dagout3=dag3[:,:,:7]
 94 |     dagin1=dag1[:,:,7:]
 95 |     dagin2=dag2[:,:,7:]
 96 |     dagin3=dag3[:,:,7:]
 97 |     mask1=dagin1+dagout1
 98 |     mask2=dagin2+dagout2
 99 |     mask3=dagin3+dagout3
100 |     pos1=np.zeros((arr1.shape[0],7))
101 |     for x in range(pos1.shape[0]):
102 |         pos1[x]=find_pos(dagout1[x],7)
103 |     pos2=np.zeros((arr2.shape[0],7))
104 |     for x in range(pos2.shape[0]):
105 |         pos2[x]=find_pos(dagout2[x],7)
106 |     pos3=np.zeros((arr3.shape[0],7))
107 |     for x in range(pos3.shape[0]):
108 |         pos3[x]=find_pos(dagout3[x],7)
109 |     position1=create_position(pos1,num_feat)
110 |     position2=create_position(pos2,num_feat)
111 |     position3=create_position(pos3,num_feat)
112 |     mask_train=create_attn_mask(mask1,num_heads=num_head,num_nodes=num_task)
113 |     mask_val=create_attn_mask(mask2,num_heads=num_head,num_nodes=num_task)
114 |     mask_test=create_attn_mask(mask3,num_heads=num_head,num_nodes=num_task)
115 |     data1=np.array(data1,dtype=np.float32)
116 |     data2=np.array(data2,dtype=np.float32)
117 |     data3=np.array(data3,dtype=np.float32)
118 |     position1=np.array(position1,dtype=np.float32)
119 |     position2=np.array(position2,dtype=np.float32)
120 |     position3=np.array(position3,dtype=np.float32)
121 |     ######data
122 |     train_data=[]
123 |     for x in range(data1.shape[0]):
124 |         train_data.append((data1[x],df_train.iloc[x,-1],position1[x],mask_train[x]))
125 |     val_data=[]
126 |     for x in range(data2.shape[0]):
127 |         val_data.append((data2[x],df_val.iloc[x,-1],position2[x],mask_val[x]))
128 |     test_data=[]
129 |     for x in range(data3.shape[0]):
130 |         test_data.append((data3[x],df_test.iloc[x,-1],position3[x],mask_test[x]))
131 |     return train_data, val_data, test_data
132 | 
133 | 
134 | 
135 | def preprocess_data_exp1(pred_task,pred_tgt,pred_mode,num_feat=34,num_task=7,):
136 |     df_train, df_val, df_test = prepare_data_exp1(pred_task,pred_tgt,pred_mode)
137 |     ##nodes features
138 |     arr1=np.array(df_train.iloc[:,:-1])
139 |     arr2=np.array(df_val.iloc[:,:-1])
140 |     arr3=np.array(df_test.iloc[:,:-1])
141 |     data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat)))
142 |     data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9)
143 |     data1=data[:arr1.shape[0]*num_task,:]
144 |     data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:]
145 |     data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:]  
146 |     data1=data1.reshape(-1,num_task,num_feat)
147 |     data2=data2.reshape(-1,num_task,num_feat)
148 |     data3=data3.reshape(-1,num_task,num_feat)
149 |     data1=np.array(data1,dtype=np.float32)
150 |     data2=np.array(data2,dtype=np.float32)
151 |     data3=np.array(data3,dtype=np.float32)
152 | 
153 |     ######data
154 |     train_data=[]
155 |     for x in range(data1.shape[0]):
156 |         train_data.append((data1[x],df_train.iloc[x,-1]))
157 |     val_data=[]
158 |     for x in range(data2.shape[0]):
159 |         val_data.append((data2[x],df_val.iloc[x,-1]))
160 |     test_data=[]
161 |     for x in range(data3.shape[0]):
162 |         test_data.append((data3[x],df_test.iloc[x,-1]))
163 |     return train_data, val_data, test_data
164 | 
165 | 
166 | def preprocess_data_exp23_dag(split,num_feat=34,num_task=7,num_head=8):
167 |     df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp23_dag(split)
168 |     ##nodes features
169 |     arr1=np.array(df_train.iloc[:,:-1])
170 |     arr2=np.array(df_val.iloc[:,:-1])
171 |     arr3=np.array(df_test.iloc[:,:-1])
172 |     data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat)))
173 |     data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9)
174 |     data1=data[:arr1.shape[0]*num_task,:]
175 |     data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:]
176 |     data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:]  
177 |     data1=data1.reshape(-1,num_task,num_feat)
178 |     data2=data2.reshape(-1,num_task,num_feat)
179 |     data3=data3.reshape(-1,num_task,num_feat)
180 |     #####dag info
181 |     dag1=df_dag_train.to_numpy().reshape(-1,num_task,num_task*2+1)
182 |     dag2=df_dag_val.to_numpy().reshape(-1,num_task,num_task*2+1)
183 |     dag3=df_dag_test.to_numpy().reshape(-1,num_task,num_task*2+1)
184 |     dag1=dag1[:,:,1:]
185 |     dag2=dag2[:,:,1:]
186 |     dag3=dag3[:,:,1:]
187 |     dagout1=dag1[:,:,:7]
188 |     dagout2=dag2[:,:,:7]
189 |     dagout3=dag3[:,:,:7]
190 |     dagin1=dag1[:,:,7:]
191 |     dagin2=dag2[:,:,7:]
192 |     dagin3=dag3[:,:,7:]
193 |     mask1=dagin1+dagout1
194 |     mask2=dagin2+dagout2
195 |     mask3=dagin3+dagout3
196 |     pos1=np.zeros((arr1.shape[0],7))
197 |     for x in range(pos1.shape[0]):
198 |         pos1[x]=find_pos(dagout1[x],7)
199 |     pos2=np.zeros((arr2.shape[0],7))
200 |     for x in range(pos2.shape[0]):
201 |         pos2[x]=find_pos(dagout2[x],7)
202 |     pos3=np.zeros((arr3.shape[0],7))
203 |     for x in range(pos3.shape[0]):
204 |         pos3[x]=find_pos(dagout3[x],7)
205 |     position1=create_position(pos1,num_feat)
206 |     position2=create_position(pos2,num_feat)
207 |     position3=create_position(pos3,num_feat)
208 |     mask_train=create_attn_mask(mask1,num_heads=num_head,num_nodes=num_task)
209 |     mask_val=create_attn_mask(mask2,num_heads=num_head,num_nodes=num_task)
210 |     mask_test=create_attn_mask(mask3,num_heads=num_head,num_nodes=num_task)
211 |     data1=np.array(data1,dtype=np.float32)
212 |     data2=np.array(data2,dtype=np.float32)
213 |     data3=np.array(data3,dtype=np.float32)
214 |     position1=np.array(position1,dtype=np.float32)
215 |     position2=np.array(position2,dtype=np.float32)
216 |     position3=np.array(position3,dtype=np.float32)
217 |     ######data
218 |     train_data=[]
219 |     for x in range(data1.shape[0]):
220 |         train_data.append((data1[x],df_train.iloc[x,-1],position1[x],mask_train[x]))
221 |     val_data=[]
222 |     for x in range(data2.shape[0]):
223 |         val_data.append((data2[x],df_val.iloc[x,-1],position2[x],mask_val[x]))
224 |     test_data=[]
225 |     for x in range(data3.shape[0]):
226 |         test_data.append((data3[x],df_test.iloc[x,-1],position3[x],mask_test[x]))
227 |     return train_data, val_data, test_data
228 | 
229 | 
230 | def preprocess_data_exp23(split,num_feat=34,num_task=7,):
231 |     df_train, df_val, df_test = prepare_data_exp23(split)
232 |     ##nodes features
233 |     arr1=np.array(df_train.iloc[:,:-1])
234 |     arr2=np.array(df_val.iloc[:,:-1])
235 |     arr3=np.array(df_test.iloc[:,:-1])
236 |     data=np.vstack((arr1.reshape(arr1.shape[0]*num_task,num_feat),arr2.reshape(arr2.shape[0]*num_task,num_feat),arr3.reshape(arr3.shape[0]*num_task,num_feat)))
237 |     data=(data-data.min(0))/(data.max(0)-data.min(0)+1e-9)
238 |     data1=data[:arr1.shape[0]*num_task,:]
239 |     data2=data[arr1.shape[0]*num_task:arr1.shape[0]*num_task+arr2.shape[0]*num_task,:]
240 |     data3=data[arr1.shape[0]*num_task+arr2.shape[0]*num_task:,:]  
241 |     data1=data1.reshape(-1,num_task,num_feat)
242 |     data2=data2.reshape(-1,num_task,num_feat)
243 |     data3=data3.reshape(-1,num_task,num_feat)
244 |     data1=np.array(data1,dtype=np.float32)
245 |     data2=np.array(data2,dtype=np.float32)
246 |     data3=np.array(data3,dtype=np.float32)
247 | 
248 | 
249 |     ######data
250 |     train_data=[]
251 |     for x in range(data1.shape[0]):
252 |         train_data.append((data1[x],df_train.iloc[x,-1]))
253 |     val_data=[]
254 |     for x in range(data2.shape[0]):
255 |         val_data.append((data2[x],df_val.iloc[x,-1]))
256 |     test_data=[]
257 |     for x in range(data3.shape[0]):
258 |         test_data.append((data3[x],df_test.iloc[x,-1]))
259 |     return train_data, val_data, test_data
260 | 
261 | 
262 | 
263 | import torch
264 | from torch_geometric.data import Data
265 | def preprocess_data_exp23_GNN_unidir(split,num_feat=34,num_task=7):
266 |     df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp23_dag(split)
267 |     df=pd.concat((df_train,df_val,df_test),axis=0)
268 |     feat=df.iloc[:,:-1].to_numpy().reshape(-1,num_feat)
269 |     label=df.iloc[:,-1].to_numpy()
270 |     labels=torch.zeros(df.shape[0]*num_task)
271 |     for x in range(labels.shape[0]):
272 |         if((x+1)%num_task==0):
273 |             labels[x]=label[x//num_task]
274 |     df_dag=pd.concat((df_dag_train,df_dag_val,df_dag_test),axis=0)
275 |     edge_info=df_dag.to_numpy().reshape(-1,2*num_task+1)
276 |     edge_info=edge_info[:,1:]
277 |     out_mat=edge_info[:,:num_task]
278 |     edge=torch.tensor([[],[]],dtype=torch.long)
279 |     print('preparing data...')
280 |     for src in tqdm(range(out_mat.shape[0])):
281 |         for tgt in range(num_task):
282 |             if(out_mat[src,tgt]!=0):
283 |                 edge=torch.cat((edge,torch.tensor([[src],[src+(tgt-src%num_task)]],dtype=torch.long)),1)
284 |     self_loop=torch.tensor([[x for x in range(df.shape[0]*num_task)],[x for x in range(df.shape[0]*num_task)]],dtype=torch.long)
285 |     edge=torch.cat((edge,self_loop),1)
286 |     feat=(feat-feat.min(0))/(feat.max(0)-feat.min(0)+1e-9)
287 |     feat=torch.tensor(feat,dtype=torch.float)
288 |     Gdata = Data(x=feat, edge_index=edge)
289 |     Gdata.labels=labels.long()
290 |     Gdata.train_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool()
291 |     Gdata.val_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool()
292 |     Gdata.test_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool()
293 |     for x in range(df.shape[0]*num_task):
294 |         if(x<df_train.shape[0]*num_task and (x+1)%num_task==0):
295 |             Gdata.train_mask[x]=True
296 |     
297 |         elif(x<(df_train.shape[0]*num_task+df_val.shape[0]*num_task) and (x+1)%num_task==0):
298 |             Gdata.val_mask[x]=True
299 |         elif(x>=(df_train.shape[0]*num_task+df_val.shape[0]*num_task) and (x+1)%num_task==0):
300 |             Gdata.test_mask[x]=True
301 |     return Gdata
302 | 
303 | 
304 | def preprocess_data_exp23_GNN_bidir(split,num_feat=34,num_task=7):
305 |     df_train, df_val, df_test, df_dag_train, df_dag_val, df_dag_test=prepare_data_exp23_dag(split)
306 |     df=pd.concat((df_train,df_val,df_test),axis=0)
307 |     feat=df.iloc[:,:-1].to_numpy().reshape(-1,num_feat)
308 |     label=df.iloc[:,-1].to_numpy()
309 |     labels=torch.zeros(df.shape[0]*num_task)
310 |     for x in range(labels.shape[0]):
311 |         if((x+1)%num_task==0):
312 |             labels[x]=label[x//num_task]
313 |     df_dag=pd.concat((df_dag_train,df_dag_val,df_dag_test),axis=0)
314 |     edge_info=df_dag.to_numpy().reshape(-1,2*num_task+1)
315 |     edge_info=edge_info[:,1:]
316 |     out_mat=edge_info[:,:num_task]
317 |     edge=torch.tensor([[],[]],dtype=torch.long)
318 |     print('preparing data...')
319 |     for src in tqdm(range(out_mat.shape[0])):
320 |         for tgt in range(num_task):
321 |             if(out_mat[src,tgt]!=0):
322 |                 edge=torch.cat((edge,torch.tensor([[src],[src+(tgt-src%num_task)]],dtype=torch.long),
323 |                            torch.tensor([[src+(tgt-src%num_task)],[src]],dtype=torch.long)),1)
324 |     self_loop=torch.tensor([[x for x in range(df.shape[0]*num_task)],[x for x in range(df.shape[0]*num_task)]],dtype=torch.long)
325 |     edge=torch.cat((edge,self_loop),1)
326 |     feat=(feat-feat.min(0))/(feat.max(0)-feat.min(0)+1e-9)
327 |     feat=torch.tensor(feat,dtype=torch.float)
328 |     Gdata = Data(x=feat, edge_index=edge)
329 |     Gdata.labels=labels.long()
330 |     Gdata.train_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool()
331 |     Gdata.val_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool()
332 |     Gdata.test_mask=torch.ByteTensor([False for x in range(df.shape[0]*num_task)]).bool()
333 |     for x in range(df.shape[0]*num_task):
334 |         if(x<df_train.shape[0]*num_task and (x+1)%num_task==0):
335 |             Gdata.train_mask[x]=True
336 |         elif(x<(df_train.shape[0]*num_task+df_val.shape[0]*num_task) and (x+1)%num_task==0):
337 |             Gdata.val_mask[x]=True
338 |         elif(x>=(df_train.shape[0]*num_task+df_val.shape[0]*num_task) and (x+1)%num_task==0):
339 |             Gdata.test_mask[x]=True
340 |     return Gdata


--------------------------------------------------------------------------------
/run_exp1.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from preprocess import preprocess_data_exp1_dag,preprocess_data_exp1
 3 | from select_model import select_model_exp1
 4 | from models.DAG_Transformer import DAGTransformer
 5 | from train_model_dag import train
 6 | import torch
 7 | import argparse
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument('--pred_task', default=7)
10 | parser.add_argument('--pred_tgt', default='CPU')
11 | parser.add_argument('--pred_mode',default='PRIOR_ALL')
12 | parser.add_argument('--use_DAG',default='T')
13 | opt = parser.parse_args()
14 | 
15 | if opt.pred_task !='3' and opt.pred_task!='5' and opt.pred_task!='7':
16 |     raise AssertionError('pred_task should be 3/5/7')
17 | pred_task=opt.pred_task
18 | 
19 | if opt.pred_tgt !='CPU' and opt.pred_tgt!='MEM':
20 |     raise AssertionError('pred_tgt should be CPU/MEM')
21 | pred_tgt=opt.pred_tgt
22 | 
23 | if opt.pred_mode !='PRIOR_1' and opt.pred_mode!='PRIOR_ALL':
24 |     raise AssertionError('pred_mode should be PRIOR_1/PRIOR_ALL')
25 | pred_mode=opt.pred_mode
26 | 
27 | if opt.use_DAG =='T':
28 |     use_DAG=True
29 | elif opt.use_DAG!='F':
30 |     use_DAG=False
31 | else:
32 |     raise AssertionError('use_DAG should be T/F')
33 | if use_DAG:
34 |     train_data, val_data, test_data=preprocess_data_exp1_dag(pred_task,pred_tgt,pred_mode)
35 | else:
36 |     train_data, val_data, test_data=preprocess_data_exp1(pred_task,pred_tgt,pred_mode)
37 | 
38 | config=select_model_exp1()
39 | if use_DAG==False:
40 |     config.structure=False
41 | train_loader=torch.utils.data.DataLoader(dataset=train_data,batch_size=config.batch_size,num_workers=2,
42 |                                         shuffle=False)
43 | val_loader=torch.utils.data.DataLoader(dataset=val_data,batch_size=config.batch_size,num_workers=2,
44 |                                         shuffle=False)
45 | test_loader=torch.utils.data.DataLoader(dataset=test_data,batch_size=config.batch_size,num_workers=2,
46 |                                         shuffle=False)
47 | 
48 | 
49 | 
50 | if __name__=='__main__':
51 |     model=DAGTransformer(config).to(config.device)
52 |     train(config, model, train_loader, val_loader, test_loader)
53 | 
54 |     
55 | 


--------------------------------------------------------------------------------
/run_exp2.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pyexpat import model
 3 | from preprocess import preprocess_data_exp23_dag,preprocess_data_exp23
 4 | from select_model import select_model_exp2
 5 | from models.DAG_Transformer import DAGTransformer
 6 | from models.CNN import CNNModel
 7 | from models.LSTM import LSTMModel
 8 | from models.Vanilla_Transformer import VanillaTransformerModel
 9 | from train_model_dag import train
10 | from train_model_vanilla import train as train_vanilla
11 | import torch
12 | import argparse
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('--model_name', required=True)#DAGTransformer, CNN, LSTM, VanillaTransformer
15 | parser.add_argument('--split', default='split6_2_2')
16 | opt = parser.parse_args()
17 | 
18 | if opt.model_name !='DAGTransformer' and opt.model_name !='CNN' and opt.model_name !='LSTM' and opt.model_name !='VanillaTransformer':
19 |     raise AssertionError('model should be DAGTransformer/CNN/LSTM/VanillaTransformer')
20 | model_name=opt.model_name
21 | 
22 | if opt.split !='split9_05_05' and opt.split !='split8_1_1' and opt.split !='split6_2_2' :
23 |     raise AssertionError('split should be split9_05_05/split8_1_1/split6_2_2')
24 | split=opt.split
25 | 
26 | if model_name=='DAGTransformer':
27 |     config=select_model_exp2(model_name)
28 |     train_data, val_data, test_data=preprocess_data_exp23_dag(split)
29 | else:
30 |     config=select_model_exp2(model_name)
31 |     train_data, val_data, test_data=preprocess_data_exp23(split)
32 | train_loader=torch.utils.data.DataLoader(dataset=train_data,batch_size=config.batch_size,num_workers=2,
33 |                                         shuffle=False)
34 | val_loader=torch.utils.data.DataLoader(dataset=val_data,batch_size=config.batch_size,num_workers=2,
35 |                                         shuffle=False)
36 | test_loader=torch.utils.data.DataLoader(dataset=test_data,batch_size=config.batch_size,num_workers=2,
37 |                                         shuffle=False)
38 | if __name__=='__main__':
39 |     if model_name=='DAGTransformer':
40 |         model=DAGTransformer(config).to(config.device)
41 |         train(config, model, train_loader, val_loader, test_loader)
42 |     elif model_name=='LSTM':
43 |         model=LSTMModel(config).to(config.device)
44 |         train_vanilla(config, model, train_loader, val_loader, test_loader)
45 |     elif model_name=='CNN':
46 |         model=CNNModel(config).to(config.device)
47 |         train_vanilla(config, model, train_loader, val_loader, test_loader)
48 |     elif model_name=='VanillaTransformer':
49 |         model=VanillaTransformerModel(config).to(config.device)
50 |         train_vanilla(config, model, train_loader, val_loader, test_loader)
51 | 
52 |     


--------------------------------------------------------------------------------
/run_exp3.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pyexpat import model
 3 | from preprocess import preprocess_data_exp23_dag,preprocess_data_exp23, preprocess_data_exp23_GNN_bidir, preprocess_data_exp23_GNN_unidir
 4 | from select_model import select_model_exp3
 5 | from models.DAG_Transformer import DAGTransformer
 6 | from models.CNN import CNNModel
 7 | from models.LSTM import LSTMModel
 8 | from models.Vanilla_Transformer import VanillaTransformerModel
 9 | from models.GCN import GCNModel
10 | from train_model_dag import train
11 | from train_model_vanilla import train as train_vanilla
12 | from train_model_gnn import train as train_gnn
13 | import torch
14 | import argparse
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('--model_name', required=True)#DAGTransformer, CNN, LSTM, VanillaTransformer, GCN
17 | parser.add_argument('--split', default='split6_2_2')
18 | parser.add_argument('--GCN_mode',default='bidirect')
19 | opt = parser.parse_args()
20 | 
21 | if opt.model_name !='DAGTransformer' and opt.model_name !='CNN' and opt.model_name !='LSTM' and opt.model_name !='VanillaTransformer' and opt.model_name!='GCN':
22 |     raise AssertionError('model should be DAGTransformer/CNN/LSTM/VanillaTransformer/GCN')
23 | model_name=opt.model_name
24 | 
25 | if opt.split !='split9_05_05' and opt.split !='split8_1_1' and opt.split !='split6_2_2' :
26 |     raise AssertionError('split should be split9_05_05/split8_1_1/split6_2_2')
27 | split=opt.split
28 | if opt.GCN_mode !='bidirect' and opt.GCN_mode!='unidirect':
29 |     raise AssertionError('GCN_mode should be bidirect/unidirect')
30 | 
31 | 
32 | 
33 | if model_name!='GCN':
34 |     if model_name=='DAGTransformer':
35 |         config=select_model_exp3(model_name)
36 |         train_data, val_data, test_data=preprocess_data_exp23_dag(split)
37 |     else:
38 |         config=select_model_exp3(model_name)
39 |         train_data, val_data, test_data=preprocess_data_exp23(split)
40 |     train_loader=torch.utils.data.DataLoader(dataset=train_data,batch_size=config.batch_size,num_workers=2,
41 |                                         shuffle=False)
42 |     val_loader=torch.utils.data.DataLoader(dataset=val_data,batch_size=config.batch_size,num_workers=2,
43 |                                         shuffle=False)
44 |     test_loader=torch.utils.data.DataLoader(dataset=test_data,batch_size=config.batch_size,num_workers=2,
45 |                                         shuffle=False)
46 | else:
47 |     config=select_model_exp3('GCN')
48 |     GCN_mode=opt.GCN_mode
49 |     if GCN_mode=='bidirect':
50 |         data=preprocess_data_exp23_GNN_bidir(split)
51 |         
52 |     else:
53 |         data=preprocess_data_exp23_GNN_unidir(split)
54 |     
55 | 
56 | if __name__=='__main__':
57 |     if model_name=='DAGTransformer':
58 |         model=DAGTransformer(config).to(config.device)
59 |         train(config, model, train_loader, val_loader, test_loader)
60 |     elif model_name=='LSTM':
61 |         model=LSTMModel(config).to(config.device)
62 |         train_vanilla(config, model, train_loader, val_loader, test_loader)
63 |     elif model_name=='CNN':
64 |         model=CNNModel(config).to(config.device)
65 |         train_vanilla(config, model, train_loader, val_loader, test_loader)
66 |     elif model_name=='VanillaTransformer':
67 |         model=VanillaTransformerModel(config).to(config.device)
68 |         train_vanilla(config, model, train_loader, val_loader, test_loader)
69 |     elif model_name=='GCN':
70 |         model=GCNModel(config).to(config.device)
71 |         train_gnn(config,model,data)
72 | 
73 |     


--------------------------------------------------------------------------------
/scheduler.py:
--------------------------------------------------------------------------------
 1 | from torch.optim.lr_scheduler import _LRScheduler
 2 |  
 3 |  
 4 | class WarmUpLR(_LRScheduler):
 5 |     def __init__(self, optimizer, total_iters, last_epoch=-1):
 6 |         
 7 |         self.total_iters = total_iters
 8 |         super().__init__(optimizer, last_epoch)
 9 |  
10 |     def get_lr(self):
11 |         return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]
12 | class downLR(_LRScheduler):
13 |     def __init__(self, optimizer, total_iters, last_epoch=-1):
14 |         
15 |         self.total_iters = total_iters
16 |         super().__init__(optimizer, last_epoch)
17 |  
18 |     def get_lr(self):
19 |         return [base_lr * (self.total_iters-self.last_epoch)/ (self.total_iters + 1e-8) for base_lr in self.base_lrs]


--------------------------------------------------------------------------------
/select_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | class DAGTransformerConfig(object):
  3 |     def __init__(self):
  4 |         self.model_name='DAGTransformer'
  5 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   
  6 |         self.dropout = 0.3                                              
  7 |         self.num_classes = 3                                            
  8 |         self.num_epochs = 500                                          
  9 |         self.batch_size = 500                                         
 10 |         self.num_task = 7                                             
 11 |         self.learning_rate = 1e-4     
 12 |         self.n_feat = 34
 13 |         self.hidden_dim = 1024
 14 |         self.num_head = 8
 15 |         self.num_encoder = 6
 16 |         self.d_k=512
 17 |         self.res_num_layer=4
 18 |         self.structure=True
 19 | 
 20 | class CNNConfig(object):
 21 |     def __init__(self):
 22 |         self.model_name = 'CNN'
 23 |         self.n_feat = 34
 24 |         self.num_task = 7      
 25 |         self.outdim = 512
 26 |         self.num_epochs = 3000
 27 |         self.num_classes = 3
 28 |         self.pooldim = 3
 29 |         self.dropout = 0.3
 30 |         self.batch_size = 500
 31 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   
 32 |         self.learning_rate = 1e-3
 33 | 
 34 | class LSTMConfig(object):
 35 |     def __init__(self):
 36 |         self.model_name = 'LSTM'
 37 |         self.n_feat=34
 38 |         self.num_task=7
 39 |         self.batch_size=500
 40 |         self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')   
 41 |         self.learning_rate=1e-3
 42 |         self.num_epochs=500
 43 |         self.num_classes=3
 44 |         self.num_layers=6
 45 |         self.hidden=1024
 46 |         self.dropout=0.5
 47 |         self.pooldim = 3
 48 |         
 49 | 
 50 | 
 51 | class GCNConfig(object):
 52 |     def __init__(self):
 53 |         self.model_name = 'GCN'
 54 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 55 |         self.n_feat=34
 56 |         self.dropout = 0.5                                             
 57 |         self.num_classes = 3                                           
 58 |         self.num_epochs = 15000                                                                           
 59 |         self.learning_rate = 5e-3    
 60 | 
 61 | class VanillaTransformerConfig(object):
 62 | 
 63 |     """配置参数"""
 64 |     def __init__(self):
 65 |         self.model_name = 'VanillaTransformer'
 66 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
 67 | 
 68 |         self.dropout = 0.5                                              
 69 |         self.num_classes = 3                                           
 70 |         self.num_epochs = 100                                          
 71 |         self.batch_size = 500                                          
 72 |         self.num_task = 7                                             
 73 |         self.learning_rate = 1e-4                                      
 74 |         self.n_feat = 34
 75 |         self.hidden = 1024
 76 |         self.num_head = 2
 77 |         self.num_encoder = 6
 78 | 
 79 | def select_model_exp1():
 80 |     
 81 |     config=DAGTransformerConfig()
 82 |     return config
 83 | 
 84 | 
 85 | def select_model_exp2(model_name):
 86 |     if model_name=='DAGTransformer':
 87 |         config=DAGTransformerConfig()
 88 |         config.num_epochs=100
 89 |         return config
 90 |         
 91 |     if model_name=='CNN':
 92 |         config=CNNConfig()
 93 |         config.num_epochs=100
 94 |         config.learning_rate=1e-4
 95 |         return config
 96 |     
 97 |     if model_name=='LSTM':
 98 |         config=LSTMConfig()
 99 |         config.learning_rate=1e-4
100 |         config.num_epochs=100
101 |         return config
102 |     if model_name=='VanillaTransformer':
103 |         config=VanillaTransformerConfig()
104 |         return config
105 | 
106 | 
107 | def select_model_exp3(model_name):
108 |     if model_name=='DAGTransformer':
109 |         config=DAGTransformerConfig()
110 |         return config
111 |     if model_name=='GCN':
112 |         config=GCNConfig()
113 |         return config
114 |     if model_name=='CNN':
115 |         config=CNNConfig()
116 |         return config
117 |     if model_name=='LSTM':
118 |         config=LSTMConfig()
119 |         return config
120 |     if model_name=='VanillaTransformer':
121 |         config=VanillaTransformerConfig()
122 |         return config
123 | 
124 | 
125 |     
126 |         
127 | 
128 | 
129 | 
130 |     
131 | 


--------------------------------------------------------------------------------
/train_model_dag.py:
--------------------------------------------------------------------------------
  1 | from tqdm import tqdm
  2 | from scheduler import WarmUpLR, downLR
  3 | import time
  4 | import torch
  5 | import numpy as np
  6 | import torch.nn.functional as F
  7 | from datetime import timedelta
  8 | from sklearn import metrics
  9 | 
 10 | def get_time_dif(start_time):
 11 |     end_time = time.time()
 12 |     time_dif = end_time - start_time
 13 |     return timedelta(seconds=int(round(time_dif)))
 14 | 
 15 | 
 16 | def train(config, model, train_iter, dev_iter, test_iter):
 17 |     start_time = time.time()
 18 |     model.train()
 19 |     optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
 20 |     warmup_epoch = config.num_epochs/2
 21 |     iter_per_epoch = len(train_iter)
 22 |     scheduler=downLR(optimizer, (config.num_epochs-warmup_epoch)*iter_per_epoch)
 23 |     
 24 |     warmup_scheduler = WarmUpLR(optimizer, warmup_epoch*iter_per_epoch)
 25 |     total_batch = 0  
 26 |     dev_best_loss = float('inf')
 27 |     dev_best_acc = float(0)
 28 |     test_best_acc=float(0)
 29 |     
 30 |     lrlist=np.zeros((config.num_epochs,2))
 31 |     for epoch in range(config.num_epochs):
 32 |         
 33 |         
 34 |         loss_total = 0
 35 |         print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
 36 |         lrlist[epoch][0]=epoch
 37 |         predic_all = torch.LongTensor([]).to(config.device)
 38 |         true_all = torch.LongTensor([]).to(config.device)
 39 |         if(epoch>=warmup_epoch):
 40 |             
 41 |             learn_rate = scheduler.get_lr()[0]
 42 |             print("Learn_rate:%s" % learn_rate)
 43 |             lrlist[epoch][1]=learn_rate
 44 |         else:
 45 |             learn_rate = warmup_scheduler.get_lr()[0]
 46 |             lrlist[epoch][0]=learn_rate
 47 |             print("Learn_rate:%s" % learn_rate)
 48 |         
 49 |         
 50 |         for  (trains, labels, poss, masks) in tqdm(train_iter):
 51 |             trains=trains.to(config.device)
 52 |             #trains.dtype=torch.float
 53 |             labels=labels.long().to(config.device)
 54 |             #print(labels.dtype)
 55 |             poss=poss.to(config.device)
 56 |             masks=masks.to(config.device)
 57 |             outputs = model(trains,poss,masks)
 58 |             model.zero_grad()
 59 |             loss = F.cross_entropy(outputs, labels)
 60 |             loss.backward()
 61 |            
 62 |             
 63 |             optimizer.step()
 64 |             if(epoch<warmup_epoch):
 65 |                 warmup_scheduler.step()
 66 |             else:
 67 |                 scheduler.step()
 68 |             total_batch += 1
 69 |             loss_total+=loss
 70 |             
 71 |             true = labels.data
 72 |             predic = torch.max(outputs.data, 1)[1]
 73 |             predic_all=torch.cat((predic_all, predic),0)
 74 |             true_all=torch.cat((true_all, true),0)
 75 |             
 76 |         train_acc  = get_accuracy(true_all, predic_all)
 77 |         lossoutput=loss_total/len(train_iter)
 78 |                 
 79 |                 
 80 |         dev_acc, dev_loss = evaluate(config, model, dev_iter)
 81 |         
 82 |         
 83 |         test_acc, test_loss = evaluate(config, model, test_iter)
 84 |         if dev_loss < dev_best_loss:
 85 |             dev_best_loss = dev_loss
 86 |             
 87 |             improve = '*'
 88 |         else:
 89 |             improve = ''
 90 |         if dev_acc > dev_best_acc:
 91 |             dev_best_acc = dev_acc
 92 |             #torch.save(model.state_dict(), './best.ckpt')
 93 |             test_best_acc = test_acc
 94 |         time_dif = get_time_dif(start_time)
 95 |         msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%}, Test Loss: {5:>5.2}, Test Acc: {6:>6.2%},Time: {7} {8}'
 96 |         print(msg.format(total_batch, lossoutput.item(), train_acc, dev_loss, dev_acc, test_loss,test_acc,time_dif, improve))
 97 |         print('BEST SO FAR:')
 98 |         print('Val Best Acc:', dev_best_acc)
 99 |         print('Test Best Acc:', test_best_acc)
100 |         model.train()
101 |     test(config, model, test_iter)
102 | 
103 | 
104 | def test(config, model, test_iter):
105 |     model.eval()
106 |     start_time = time.time()
107 |     test_acc, test_loss, test_confusion = evaluate(config, model, test_iter, test=True)
108 |     msg = 'Test Loss: {0:>5.2},  Test Acc: {1:>6.2%}'
109 |     print(msg.format(test_loss, test_acc))
110 |     print(test_confusion)
111 |     time_dif = get_time_dif(start_time)
112 |     print("Time usage:", time_dif)
113 | 
114 | 
115 | def evaluate(config, model, data_iter, test=False):
116 |     model.eval()
117 |     loss_total = 0
118 |     predict_all = torch.LongTensor([]).to(config.device)
119 |     labels_all = torch.LongTensor([]).to(config.device)
120 |     with torch.no_grad():
121 |         for texts, labels, poss, masks in data_iter:
122 |             texts=texts.float().to(config.device)
123 |             poss=poss.float().to(config.device)
124 |             masks=masks.float().to(config.device)
125 |             labels=labels.long().to(config.device)
126 |             outputs = model(texts,poss,masks)
127 |             loss = F.cross_entropy(outputs, labels)
128 |             loss_total += loss
129 |             labels = labels.data
130 |             predic = torch.max(outputs.data, 1)[1]
131 |             labels_all = torch.cat((labels_all, labels),0)
132 |             predict_all = torch.cat((predict_all, predic),0)
133 |     
134 |     acc = get_accuracy(labels_all, predict_all)
135 |     if test:
136 |         confusion = metrics.confusion_matrix(labels_all.cpu().numpy(), predict_all.cpu().numpy())
137 |         return acc, loss_total / len(data_iter), confusion
138 |     return acc, loss_total / len(data_iter)
139 | 
140 | def get_accuracy(y_true,y_pred):
141 |     y_true,y_pred=y_true.cpu().numpy(), y_pred.cpu().numpy()
142 |     return metrics.accuracy_score(y_true,y_pred)


--------------------------------------------------------------------------------
/train_model_gnn.py:
--------------------------------------------------------------------------------
  1 | from tqdm import tqdm
  2 | from scheduler import WarmUpLR, downLR
  3 | import time
  4 | import torch
  5 | import numpy as np
  6 | import torch.nn.functional as F
  7 | from datetime import timedelta
  8 | from sklearn import metrics
  9 | import time
 10 | from datetime import timedelta
 11 | def get_time_dif(start_time):
 12 |     end_time = time.time()
 13 |     time_dif = end_time - start_time
 14 |     return timedelta(seconds=int(round(time_dif)))
 15 | 
 16 | def train(config, model, data):
 17 |     start_time = time.time()
 18 |     model.train()
 19 |     optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
 20 |     warmup_epoch = config.num_epochs/2
 21 |     scheduler=downLR(optimizer, (config.num_epochs-warmup_epoch))
 22 |     
 23 |     warmup_scheduler = WarmUpLR(optimizer, warmup_epoch)
 24 |     total_batch = 0  
 25 |     dev_best_loss = float('inf')
 26 |     dev_best_acc = float(0)
 27 |     test_best_acc=float(0)
 28 |     lrlist=np.zeros((config.num_epochs,2))
 29 |     for epoch in range(config.num_epochs):
 30 |         print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
 31 |         lrlist[epoch][0]=epoch
 32 |         if(epoch>=warmup_epoch):
 33 |             
 34 |             learn_rate = scheduler.get_lr()[0]
 35 |             print("Learn_rate:%s" % learn_rate)
 36 |             lrlist[epoch][1]=learn_rate
 37 |         else:
 38 |             learn_rate = warmup_scheduler.get_lr()[0]
 39 |             lrlist[epoch][0]=learn_rate
 40 |             print("Learn_rate:%s" % learn_rate)
 41 |         
 42 |         
 43 |         data=data.to(config.device)
 44 |         outputs = model(data)
 45 |         model.zero_grad()
 46 |         loss = F.cross_entropy(outputs[data.train_mask], data.labels[data.train_mask])   
 47 |         loss.backward()
 48 |         optimizer.step()
 49 |         if(epoch<warmup_epoch):
 50 |             warmup_scheduler.step()
 51 |         else:
 52 |             scheduler.step()
 53 |         total_batch += 1
 54 |         pred_tr=torch.max(outputs[data.train_mask], 1)[1]
 55 |         true_tr=data.labels[data.train_mask]
 56 |         train_acc  = get_accuracy(pred_tr, true_tr)
 57 |         lossoutput=loss
 58 |                 
 59 |         
 60 |         dev_acc, dev_loss = evaluate(config, model, data)
 61 |         test_acc, test_loss = test(config, model, data)
 62 |         if dev_loss < dev_best_loss:
 63 |             dev_best_loss = dev_loss
 64 |             
 65 |             improve = '*'
 66 |                     
 67 |         else:
 68 |             improve = ''
 69 |         if dev_acc > dev_best_acc:
 70 |             dev_best_acc = dev_acc
 71 |             #torch.save(model.state_dict(), './best1nod.ckpt')
 72 |             test_best_acc = test_acc
 73 |             
 74 |         time_dif = get_time_dif(start_time)
 75 |         msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%}, Test Loss: {5:>5.2}, Test Acc: {6:>6.2%},Time: {7} {8}'
 76 |         print(msg.format(total_batch, lossoutput.item(), train_acc, dev_loss, dev_acc, test_loss,test_acc,time_dif, improve))
 77 |         model.train()
 78 |         print('BEST SO FAR:')
 79 |         print('Val Best Acc:', dev_best_acc)
 80 |         print('Test Best Acc:', test_best_acc)
 81 |     test(config, model, data, final=True)
 82 | 
 83 | 
 84 | def test(config, model, data, final=False):
 85 |     # test
 86 |     
 87 |     model.eval()
 88 |     with torch.no_grad():
 89 |         outputs=model(data)
 90 |         loss_total = F.cross_entropy(outputs[data.test_mask], data.labels[data.test_mask])   
 91 |         predict_all=torch.max(outputs[data.test_mask], 1)[1]
 92 |         labels_all=data.labels[data.test_mask]
 93 |     acc = get_accuracy(labels_all, predict_all)
 94 |     if final:
 95 |         msg = 'Test Loss: {0:>5.2},  Test Acc: {1:>6.2%}'
 96 |         print(msg.format(loss_total, acc))
 97 |         print("Confusion Matrix...")
 98 |         confusion = metrics.confusion_matrix(labels_all.cpu().numpy(), predict_all.cpu().numpy())
 99 |         print(confusion)
100 |         return acc, loss_total, confusion
101 |     return acc, loss_total
102 |     
103 | 
104 | 
105 | def evaluate(config, model, data):
106 |     model.eval()
107 |     with torch.no_grad():
108 |         
109 |         outputs=model(data)
110 |         loss_total = F.cross_entropy(outputs[data.val_mask], data.labels[data.val_mask])   
111 |         predict_all=torch.max(outputs[data.val_mask], 1)[1]
112 |         labels_all=data.labels[data.val_mask]
113 |     
114 |     acc = get_accuracy(labels_all, predict_all)
115 |     
116 |     return acc, loss_total
117 | from sklearn import metrics
118 | def get_accuracy(y_true,y_pred):
119 |     y_true,y_pred=y_true.cpu().numpy(), y_pred.cpu().numpy()
120 |     return metrics.accuracy_score(y_true,y_pred)
121 | 
122 | 


--------------------------------------------------------------------------------
/train_model_vanilla.py:
--------------------------------------------------------------------------------
  1 | from tqdm import tqdm
  2 | from scheduler import WarmUpLR, downLR
  3 | import time
  4 | import torch
  5 | import numpy as np
  6 | import torch.nn.functional as F
  7 | from datetime import timedelta
  8 | from sklearn import metrics
  9 | 
 10 | def get_time_dif(start_time):
 11 |     end_time = time.time()
 12 |     time_dif = end_time - start_time
 13 |     return timedelta(seconds=int(round(time_dif)))
 14 | 
 15 | 
 16 | def train(config, model, train_iter, dev_iter, test_iter):
 17 |     start_time = time.time()
 18 |     model.train()
 19 |     optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
 20 |     warmup_epoch = config.num_epochs/2
 21 |     iter_per_epoch = len(train_iter)
 22 |     scheduler=downLR(optimizer, (config.num_epochs-warmup_epoch)*iter_per_epoch)
 23 |     
 24 |     warmup_scheduler = WarmUpLR(optimizer, warmup_epoch*iter_per_epoch)
 25 |     total_batch = 0  
 26 |     dev_best_loss = float('inf')
 27 |     dev_best_acc = float(0)
 28 |     test_best_acc=float(0)
 29 |     
 30 |     lrlist=np.zeros((config.num_epochs,2))
 31 |     for epoch in range(config.num_epochs):
 32 |         
 33 |         
 34 |         loss_total = 0
 35 |         print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
 36 |         lrlist[epoch][0]=epoch
 37 |         predic_all = torch.LongTensor([]).to(config.device)
 38 |         true_all = torch.LongTensor([]).to(config.device)
 39 |         if(epoch>=warmup_epoch):
 40 |             
 41 |             learn_rate = scheduler.get_lr()[0]
 42 |             print("Learn_rate:%s" % learn_rate)
 43 |             lrlist[epoch][1]=learn_rate
 44 |         else:
 45 |             learn_rate = warmup_scheduler.get_lr()[0]
 46 |             lrlist[epoch][0]=learn_rate
 47 |             print("Learn_rate:%s" % learn_rate)
 48 |         
 49 |         
 50 |         for  (trains, labels) in tqdm(train_iter):
 51 |             trains=trains.to(config.device)
 52 |             #trains.dtype=torch.float
 53 |             labels=labels.long().to(config.device)
 54 |             #print(labels.dtype)
 55 |             outputs = model(trains)
 56 |             model.zero_grad()
 57 |             loss = F.cross_entropy(outputs, labels)
 58 |             loss.backward()
 59 |            
 60 |             
 61 |             optimizer.step()
 62 |             if(epoch<warmup_epoch):
 63 |                 warmup_scheduler.step()
 64 |             else:
 65 |                 scheduler.step()
 66 |             total_batch += 1
 67 |             loss_total+=loss
 68 |             
 69 |             true = labels.data
 70 |             predic = torch.max(outputs.data, 1)[1]
 71 |             predic_all=torch.cat((predic_all, predic),0)
 72 |             true_all=torch.cat((true_all, true),0)
 73 |             
 74 |         train_acc  = get_accuracy(true_all, predic_all)
 75 |         lossoutput=loss_total/len(train_iter)
 76 |                 
 77 |                 
 78 |         dev_acc, dev_loss = evaluate(config, model, dev_iter)
 79 |         
 80 |         
 81 |         test_acc, test_loss = evaluate(config, model, test_iter)
 82 |         if dev_loss < dev_best_loss:
 83 |             dev_best_loss = dev_loss
 84 |             
 85 |             improve = '*'
 86 |         else:
 87 |             improve = ''
 88 |         if dev_acc > dev_best_acc:
 89 |             dev_best_acc = dev_acc
 90 |             #torch.save(model.state_dict(), './best.ckpt')
 91 |             test_best_acc = test_acc
 92 |         time_dif = get_time_dif(start_time)
 93 |         msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%}, Test Loss: {5:>5.2}, Test Acc: {6:>6.2%},Time: {7} {8}'
 94 |         print(msg.format(total_batch, lossoutput.item(), train_acc, dev_loss, dev_acc, test_loss,test_acc,time_dif, improve))
 95 |         print('BEST SO FAR:')
 96 |         print('Val Best Acc:', dev_best_acc)
 97 |         print('Test Best Acc:', test_best_acc)
 98 |         model.train()
 99 |     test(config, model, test_iter)
100 | 
101 | 
102 | def test(config, model, test_iter):
103 |     model.eval()
104 |     start_time = time.time()
105 |     test_acc, test_loss, test_confusion = evaluate(config, model, test_iter, test=True)
106 |     msg = 'Test Loss: {0:>5.2},  Test Acc: {1:>6.2%}'
107 |     print(msg.format(test_loss, test_acc))
108 |     print(test_confusion)
109 |     time_dif = get_time_dif(start_time)
110 |     print("Time usage:", time_dif)
111 | 
112 | 
113 | def evaluate(config, model, data_iter, test=False):
114 |     model.eval()
115 |     loss_total = 0
116 |     predict_all = torch.LongTensor([]).to(config.device)
117 |     labels_all = torch.LongTensor([]).to(config.device)
118 |     with torch.no_grad():
119 |         for texts, labels in data_iter:
120 |             texts=texts.float().to(config.device)
121 |             labels=labels.long().to(config.device)
122 |             outputs = model(texts)
123 |             loss = F.cross_entropy(outputs, labels)
124 |             loss_total += loss
125 |             labels = labels.data
126 |             predic = torch.max(outputs.data, 1)[1]
127 |             labels_all = torch.cat((labels_all, labels),0)
128 |             predict_all = torch.cat((predict_all, predic),0)
129 |     
130 |     acc = get_accuracy(labels_all, predict_all)
131 |     if test:
132 |         confusion = metrics.confusion_matrix(labels_all.cpu().numpy(), predict_all.cpu().numpy())
133 |         return acc, loss_total / len(data_iter), confusion
134 |     return acc, loss_total / len(data_iter)
135 | 
136 | def get_accuracy(y_true,y_pred):
137 |     y_true,y_pred=y_true.cpu().numpy(), y_pred.cpu().numpy()
138 |     return metrics.accuracy_score(y_true,y_pred)


--------------------------------------------------------------------------------