├── GeoQA+
    ├── ManualProgram
    │   ├── eval_equ.py
    │   └── operators.py
    ├── NGS_Aux.py
    ├── NGS_Aux_CKPT
    │   ├── NGS_Aux_CKPT
    │   │   ├── config.json
    │   │   ├── txt
    │   │   └── vocabulary
    │   │   │   ├── non_padded_namespaces.txt
    │   │   │   ├── tokens.txt
    │   │   │   └── txt
    │   └── txt
    ├── NGS_Aux_test.py
    ├── config
    │   ├── NGS_Aux.json
    │   └── txt
    ├── data
    │   ├── GeoQA2.2
    │   │   ├── dev.pk
    │   │   └── test.pk
    │   ├── pretrain
    │   │   └── txt
    │   ├── sub_dataset_dict.pk
    │   └── tokens.txt
    ├── mcan.py
    ├── requirements.txt
    ├── resnet.py
    ├── save
    │   └── test
    │   │   ├── stderr.log
    │   │   ├── stdout.log
    │   │   └── txt
    └── utils.py
├── README.md
├── requirements.txt
└── vocab.txt


/GeoQA+/ManualProgram/eval_equ.py:
--------------------------------------------------------------------------------
 1 | from ManualProgram import operators
 2 | from inspect import getmembers, isfunction
 3 | import itertools
 4 | import math
 5 | 
 6 | #dfsfsfgs
 7 | constant = [30, 60, 90, 180, 360, math.pi, 0.618, 72, 540]
 8 | op_dict = {0: 'g_equal', 1: 'g_double', 2: 'g_half', 3: 'g_add', 4: 'g_minus',
 9 |           5: 'g_sin', 6: 'g_cos', 7: 'g_tan', 8: 'g_asin', 9: 'g_acos',
10 |           10: 'gougu_add', 11: 'gougu_minus', 12: 'g_bili',
11 |           13: 'g_mul', 14: 'g_divide', 15: 'cal_circle_area', 16: 'cal_circle_perimeter', 17: 'cal_cone',18: 'g_sqrt'}
12 | op_list = [op_dict[key] for key in sorted(op_dict.keys())]
13 | 
14 | 
15 | class Equations:
16 |     def __init__(self):
17 | 
18 | 
19 |         self.op_list = op_list
20 |         self.op_num = {}
21 |         self.call_op = {}
22 |         self.exp_info = None
23 |         self.results = []
24 |         self.max_step = 3
25 |         self.max_len = 7
26 |         for op in self.op_list:
27 |             self.call_op[op] = eval('operators.{}'.format(op))
28 |             # self.call_op[op] = eval(op)
29 |             self.op_num[op] = self.call_op[op].__code__.co_argcount
30 | 
31 |     def str2exp(self, inputs):
32 |         inputs = inputs.split(',')
33 |         exp = inputs.copy()
34 |         for i, s in enumerate(inputs):
35 |             if 'n' in s or 'v' in s or 'c' in s:
36 |                 exp[i] = s.replace('n', 'N_').replace('v', 'V_').replace('c', 'C_')
37 |             else:
38 |                 exp[i] = op_dict[int(s[2:])]
39 |             exp[i] = exp[i].strip()
40 | 
41 |         self.exp = exp
42 |         return exp
43 | 
44 |     def excuate_equation(self, exp, source_nums=None):
45 |         if source_nums is None:
46 |             source_nums = self.exp_info['nums']
47 |         vars = []
48 |         idx = 0
49 |         while idx < len(exp):
50 |             op = exp[idx]
51 |             if op not in self.op_list:
52 |                 return None
53 |             op_nums = self.op_num[op]
54 |             if idx + op_nums >= len(exp):
55 |                 return None
56 |             excuate_nums = []
57 |             for tmp in exp[idx + 1: idx + 1 + op_nums]:
58 |                 try:
59 |                     if tmp[0] == 'N' and int(tmp[-1]) < len(source_nums):
60 |                         excuate_nums.append(source_nums[int(tmp[-1])])
61 |                     elif tmp[0] == 'V' and int(tmp[-1]) < len(vars):
62 |                         excuate_nums.append(vars[int(tmp[-1])])
63 |                     elif tmp[0] == 'C' and int(tmp[-1]) < len(constant):
64 |                         excuate_nums.append(constant[int(tmp[-1])])
65 |                     else:
66 |                         return None
67 |                 except:
68 |                     return None
69 |             idx += op_nums + 1
70 |             v = self.call_op[op](*excuate_nums)
71 |             if v is None:
72 |                 return None
73 |             vars.append(v)
74 |         return vars
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     eq = Equations()
79 | 
80 | 


--------------------------------------------------------------------------------
/GeoQA+/ManualProgram/operators.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | def g_equal(n1):  # 0
 5 |     return n1
 6 | 
 7 | def g_sqrt(n1):
 8 |     return math.sqrt(n1)
 9 | 
10 | 
11 | def g_double(n1):  # 1
12 |     return n1*2
13 | 
14 | 
15 | def g_half(n1):  # 2
16 |     return n1/2
17 | 
18 | 
19 | def g_add(n1, n2):  # 3
20 |     return n1 + n2
21 | 
22 | 
23 | def g_minus(n1, n2):  # 4
24 |     return math.fabs(n1 - n2)
25 | 
26 | 
27 | def g_sin(n1):  # 5
28 |     if n1 % 15 == 0 and 0 <= n1 <= 180:
29 |         return math.sin(n1/180*math.pi)
30 |     return False
31 | 
32 | 
33 | def g_cos(n1):  # 6
34 |     if n1 % 15 == 0 and 0 <= n1 <= 180:
35 |         return math.cos(n1/180*math.pi)
36 |     return False
37 | 
38 | 
39 | def g_tan(n1):  # 7
40 |     if n1 % 15 == 0 and 5 <= n1 <= 85:
41 |         return math.tan(n1/180*math.pi)
42 |     return False
43 | 
44 | 
45 | def g_asin(n1):  # 8
46 |     if -1 < n1 < 1:
47 |         n1 = math.asin(n1)
48 |         n1 = math.degrees(n1)
49 |         return n1
50 |     return False
51 | 
52 | 
53 | def g_acos(n1):  # 9
54 |     if -1 < n1 < 1:
55 |         n1 = math.acos(n1)
56 |         n1 = math.degrees(n1)
57 |         return n1
58 |     return False
59 | 
60 | 
61 | def gougu_add(n1, n2):  # 13
62 |     return math.sqrt(n1*n1+n2*n2)
63 | 
64 | 
65 | def gougu_minus(n1, n2):  # 14
66 |     if n1 != n2:
67 |         return math.sqrt(math.fabs(n1*n1-n2*n2))
68 |     return False
69 | 
70 | 
71 | def g_bili(n1, n2, n3):  # 16
72 |     if n1 > 0 and n2 > 0 and n3 > 0:
73 |         return n1/n2*n3
74 |     else:
75 |         return False
76 | 
77 | 
78 | def g_mul(n1, n2):  # 17
79 |     return n1*n2
80 | 
81 | 
82 | def g_divide(n1, n2):  # 18
83 |     if n1 > 0 and n2 > 0:
84 |         return n1/n2
85 |     return False
86 | 
87 | 
88 | def cal_circle_area(n1):  # 19
89 |     return n1*n1*math.pi
90 | 
91 | 
92 | def cal_circle_perimeter(n1):  # 20
93 |     return 2*math.pi*n1
94 | 
95 | 
96 | def cal_cone(n1, n2):  # 21
97 |     return n1*n2*math.pi
98 | 
99 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from typing import Dict, List, Tuple
  3 | 
  4 | import numpy
  5 | import pytorch_transformers
  6 | from overrides import overrides
  7 | import torch
  8 | import torch.nn.functional as F
  9 | import torch.nn as nn
 10 | from torch.nn.modules.linear import Linear
 11 | from torch.nn.modules.rnn import LSTMCell, RNN
 12 | from torch.nn.modules.rnn import GRUCell
 13 | from torch.nn.modules.transformer import TransformerEncoder
 14 | 
 15 | from torch.nn.modules.transformer import TransformerDecoder
 16 | from allennlp.common.checks import ConfigurationError
 17 | from allennlp.common.util import START_SYMBOL, END_SYMBOL
 18 | from allennlp.data.vocabulary import Vocabulary
 19 | from allennlp.modules.attention import LegacyAttention
 20 | from allennlp.modules import Attention, TextFieldEmbedder, Seq2SeqEncoder
 21 | from allennlp.modules.similarity_functions import SimilarityFunction
 22 | from allennlp.models.model import Model
 23 | from allennlp.modules.token_embedders import Embedding
 24 | from allennlp.nn import util
 25 | from allennlp.nn.beam_search import BeamSearch
 26 | from allennlp.training.metrics import BLEU
 27 | # from allennlp.modules.seq2seq_encoders.pytorch_seq2seq_wrapper
 28 | from ManualProgram.eval_equ import Equations
 29 | from allennlp.modules.seq2seq_encoders import PytorchSeq2SeqWrapper, StackedSelfAttentionEncoder
 30 | 
 31 | from transformers import AutoModel, AutoTokenizer
 32 | 
 33 | import random
 34 | import warnings
 35 | import math
 36 | 
 37 | warnings.filterwarnings("ignore")
 38 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 39 | 
 40 | torch.cuda.set_device(0)
 41 | from utils import *
 42 | 
 43 | from mcan import *
 44 | 
 45 | # torch.backends.cudnn.enabled = False
 46 | model_name = "data/pretrain/Roberta"
 47 | 
 48 | @Model.register("MyEncoder")
 49 | class Encoder(Model):
 50 |     def __init__(self,
 51 |                  vocab: Vocabulary,
 52 |                  input_dim: int,
 53 |                  emb_dim: int,
 54 |                  hid_dim: int,
 55 |                  dropout: int):
 56 |         super(Encoder, self).__init__(vocab)
 57 |         self.input_dim = input_dim
 58 |         self.emb_dim = emb_dim
 59 |         self.hid_dim = hid_dim
 60 |         self.embedding = AutoModel.from_pretrained(model_name)
 61 |         self.trans = nn.Linear(emb_dim, hid_dim)
 62 |         self.norm = nn.LayerNorm(hid_dim)
 63 |         self.dropout = nn.Dropout(dropout)
 64 | 
 65 |         self.lstm_embedding = nn.Embedding(22128, hid_dim, padding_idx=0)
 66 |         self.lstm_dropout = nn.Dropout(0.5)
 67 |         self.lstm = torch.nn.LSTM(hid_dim, hid_dim, batch_first=True, bidirectional=True , num_layers=2, dropout=0.5)
 68 |         self.concat_trans = nn.Linear(hid_dim, hid_dim)
 69 |         self.concat_norm = nn.LayerNorm(hid_dim)
 70 |         self._encoder_output_dim = 512
 71 |         self._decoder_output_dim = 512
 72 | 
 73 |     @overrides
 74 |     def forward(self, src, source_mask):
 75 | 
 76 |         embedded = self.embedding(src, attention_mask=source_mask, return_dict=True, output_hidden_states=True)
 77 |         bert_output = embedded.last_hidden_state
 78 |         output = self.dropout(self.norm(torch.relu(self.trans(bert_output))))
 79 |         lstm_embedding = self.lstm_dropout(self.lstm_embedding(src))
 80 |         input_length = torch.sum(source_mask, dim=1).long().view(-1,).cpu()
 81 |         packed = nn.utils.rnn.pack_padded_sequence(lstm_embedding, input_length, batch_first=True, enforce_sorted=False)
 82 |         lstm_output, _ = self.lstm(packed)
 83 |         lstm_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output, batch_first=True)
 84 |         lstm_output = lstm_output[:, :, :self.hid_dim] + lstm_output[:, :, self.hid_dim:]
 85 |         output = output + lstm_output
 86 |         # output = torch.cat((output,lstm_output), dim=-1)
 87 |         output = self.concat_norm(torch.relu(self.concat_trans(output)))
 88 |         return output
 89 | 
 90 |     def get_output_dim(self):
 91 |         return self._encoder_output_dim
 92 | 
 93 |     def is_bidirectional(self) -> bool:
 94 |         return True
 95 | 
 96 | # class lstm_encoder(torch.nn.Module):
 97 | #     def __init__(self,hid_dim:512):
 98 | #         super(RNN, self).__init__()
 99 | #         self.lstm_embedding = nn.Embedding(22128, hid_dim)
100 | #         self.lstm_dropout = nn.Dropout(0.5)
101 | #         self.lstm = torch.nn.LSTM(hid_dim, hid_dim, batch_first=True, bidirectional=True)
102 | #
103 | #     def forward(self,src,source_mask):
104 | #         lstm_embedding = self.lstm_dropout(self.lstm_embedding(src))
105 | #         packed = nn.utils.rnn.pack_padded_sequence(lstm_embedding, source_mask)
106 | #         lstm_output, _ = self.lstm(packed)
107 | #         lstm_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output)
108 | #         lstm_output = lstm_output[:, :, :self.hid_dim] + lstm_output[:, :, self.hid_dim:]
109 | #
110 | #         return lstm_output
111 | 
112 | 
113 | @Model.register("geo_s2s")
114 | class SimpleSeq2Seq(Model):
115 |     """
116 |     This ``SimpleSeq2Seq`` class is a :class:`Model` which takes a sequence, encodes it, and then
117 |     uses the encoded representations to decode another sequence.  You can use this as the basis for
118 |     a neural machine translation system, an abstractive summarization system, or any other common
119 |     seq2seq problem.  The model here is simple, but should be a decent starting place for
120 |     implementing recent models for these tasks.
121 | 
122 |     Parameters
123 |     ----------
124 |     vocab : ``Vocabulary``, required
125 |         Vocabulary containing source and target vocabularies. They may be under the same namespace
126 |         (`tokens`) or the target tokens can have a different namespace, in which case it needs to
127 |         be specified as `target_namespace`.
128 |     source_embedder : ``TextFieldEmbedder``, required
129 |         Embedder for source side sequences
130 |     encoder : ``Seq2SeqEncoder``, required
131 |         The encoder of the "encoder/decoder" model
132 |     max_decoding_steps : ``int``
133 |         Maximum length of decoded sequences.
134 |     target_namespace : ``str``, optional (default = 'tokens')
135 |         If the target side vocabulary is different from the source side's, you need to specify the
136 |         target's namespace here. If not, we'll assume it is "tokens", which is also the default
137 |         choice for the source side, and this might cause them to share vocabularies.
138 |     target_embedding_dim : ``int``, optional (default = source_embedding_dim)
139 |         You can specify an embedding dimensionality for the target side. If not, we'll use the same
140 |         value as the source embedder's.
141 |     attention : ``Attention``, optional (default = None)
142 |         If you want to use attention to get a dynamic summary of the encoder outputs at each step
143 |         of decoding, this is the function used to compute similarity between the decoder hidden
144 |         state and encoder outputs.
145 |     attention_function: ``SimilarityFunction``, optional (default = None)
146 |         This is if you want to use the legacy implementation of attention. This will be deprecated
147 |         since it consumes more memory than the specialized attention modules.
148 |     beam_size : ``int``, optional (default = None)
149 |         Width of the beam for beam search. If not specified, greedy decoding is used.
150 |     scheduled_sampling_ratio : ``float``, optional (default = 0.)
151 |         At each timestep during training, we sample a random number between 0 and 1, and if it is
152 |         not less than this value, we use the ground truth labels for the whole batch. Else, we use
153 |         the predictions from the previous time step for the whole batch. If this value is 0.0
154 |         (default), this corresponds to teacher forcing, and if it is 1.0, it corresponds to not
155 |         using target side ground truth labels.  See the following paper for more information:
156 |         `Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks. Bengio et al.,
157 |         2015 <https://arxiv.org/abs/1506.03099>`_.
158 |     use_bleu : ``bool``, optional (default = True)
159 |         If True, the BLEU metric will be calculated during validation.
160 |     """
161 | 
162 |     def __init__(self,
163 |                  vocab: Vocabulary,
164 |                  source_embedder: TextFieldEmbedder,
165 |                  encoder: Encoder,
166 |                  max_decoding_steps: int,
167 |                  knowledge_points_ratio=0,
168 |                  attention: Attention = True,
169 |                  attention_function: SimilarityFunction = None,
170 |                  beam_size: int = None,
171 |                  target_namespace: str = "tokens",
172 |                  target_embedding_dim: int = None,
173 |                  scheduled_sampling_ratio: float = 0.,
174 |                  resnet_pretrained=None,
175 |                  use_bleu: bool = True) -> None:
176 |         super(SimpleSeq2Seq, self).__init__(vocab)
177 |         resnet = build_model()
178 | 
179 |         if resnet_pretrained is not None:
180 |             resnet.load_state_dict(torch.load(resnet_pretrained))
181 |             print('##### Checkpoint Loaded! #####')
182 |         else:
183 |             print("No Diagram Pretrain !!!")
184 |         self.resnet = resnet
185 |         #encoder_layer = nn.TransformerDecoderLayer(1024, 8, batch_first=True)
186 |         #self.image_tfm = nn.TransformerEncoder(encoder_layer, num_layers=1)
187 | 
188 |         self.channel_transform = torch.nn.Linear(1024, 512)
189 | 
190 |         __C = Cfgs()
191 |         self.mcan = MCA_ED(__C)
192 |         self.attflat_img = AttFlat(__C)
193 |         self.attflat_lang = AttFlat(__C)  # not use
194 |         self.decode_transform = torch.nn.Linear(1024, 512)
195 |         self._equ = Equations()
196 | 
197 |         self._target_namespace = target_namespace
198 |         self._scheduled_sampling_ratio = scheduled_sampling_ratio
199 | 
200 |         # We need the start symbol to provide as the input at the first timestep of decoding, and
201 |         # end symbol as a way to indicate the end of the decoded sequence.
202 |         self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace)
203 |         self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace)
204 | 
205 |         if use_bleu:
206 |             pad_index = self.vocab.get_token_index(self.vocab._padding_token,
207 |                                                    self._target_namespace)  # pylint: disable=protected-access
208 |             self._bleu = BLEU(ngram_weights=(1, 0, 0, 0),
209 |                               exclude_indices={pad_index, self._end_index, self._start_index})
210 |         else:
211 |             self._bleu = None
212 |         self._acc = Average()
213 |         self._no_result = Average()
214 | 
215 |         # remember to clear after evaluation
216 |         self.new_acc = []
217 |         self.angle = []
218 |         self.length = []
219 |         self.area = []
220 |         self.other = []
221 |         self.point_acc_list = []
222 | 
223 |         # At prediction time, we use a beam search to find the most likely sequence of target tokens.
224 |         beam_size = beam_size or 1
225 |         self._max_decoding_steps = max_decoding_steps
226 |         self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size)
227 | 
228 |         # Dense embedding of source vocab tokens.
229 |         self._source_embedder = source_embedder
230 | 
231 |         # Encodes the sequence of source embeddings into a sequence of hidden states.
232 |         self._encoder = encoder  # encoder
233 | 
234 |         # self.multiHead_Attn = nn.MultiheadAttention(512, num_heads=4, dropout=0.2)
235 | 
236 |         num_classes = self.vocab.get_vocab_size(self._target_namespace)
237 | 
238 |         # Attention mechanism applied to the encoder output for each step.
239 |         # TODO: attention
240 |         if attention:
241 |             if attention_function:
242 |                 raise ConfigurationError("You can only specify an attention module or an "
243 |                                          "attention function, but not both.")
244 |             self._attention = LegacyAttention()
245 |         elif attention_function:
246 |             self._attention = LegacyAttention(attention_function)
247 |         else:
248 |             self._attention = None
249 |             print("No Attention!")
250 |             exit()
251 | 
252 |         # Dense embedding of vocab words in the target space.
253 |         target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim()
254 |         self._target_embedder = Embedding(num_classes, target_embedding_dim)
255 | 
256 |         # Decoder output dim needs to be the same as the encoder output dim since we initialize the
257 |         # hidden state of the decoder with the final hidden state of the encoder.
258 |         self._encoder_output_dim = self._encoder.get_output_dim()
259 |         self._decoder_output_dim = self._encoder_output_dim
260 | 
261 |         if self._attention:
262 |             # If using attention, a weighted average over encoder outputs will be concatenated
263 |             # to the previous target embedding to form the input to the decoder at each
264 |             # time step.
265 | 
266 |             self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim
267 | 
268 |         else:
269 |             # Otherwise, the input to the decoder is just the previous target embedding.
270 |             self._decoder_input_dim = target_embedding_dim
271 | 
272 |         # We'll use an LSTM cell as the recurrent cell that produces a hidden state
273 |         # for the decoder at each time step.
274 |         self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim)
275 |         # self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim)
276 |         # We project the hidden state from the decoder into the output vocabulary space
277 |         # in order to get log probabilities of each target token, at each time step.
278 |         self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
279 | 
280 |         # knowledge points
281 |         self.point_ratio = knowledge_points_ratio
282 |         if self.point_ratio != 0:
283 |             self.points_norm = LayerNorm(__C.FLAT_OUT_SIZE)
284 |             self.points_proj = nn.Linear(__C.FLAT_OUT_SIZE, 77)
285 |             self.points_criterion = nn.BCELoss()
286 | 
287 |     def take_step(self,
288 |                   last_predictions: torch.Tensor,
289 |                   state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
290 |         """
291 |         Take a decoding step. This is called by the beam search class.
292 | 
293 |         Parameters
294 |         ----------
295 |         last_predictions : ``torch.Tensor``
296 |             A tensor of shape ``(group_size,)``, which gives the indices of the predictions
297 |             during the last time step.
298 |         state : ``Dict[str, torch.Tensor]``
299 |             A dictionary of tensors that contain the current state information
300 |             needed to predict the next step, which includes the encoder outputs,
301 |             the source mask, and the decoder hidden state and context. Each of these
302 |             tensors has shape ``(group_size, *)``, where ``*`` can be any other number
303 |             of dimensions.
304 | 
305 |         Returns
306 |         -------
307 |         Tuple[torch.Tensor, Dict[str, torch.Tensor]]
308 |             A tuple of ``(log_probabilities, updated_state)``, where ``log_probabilities``
309 |             is a tensor of shape ``(group_size, num_classes)`` containing the predicted
310 |             log probability of each class for the next step, for each item in the group,
311 |             while ``updated_state`` is a dictionary of tensors containing the encoder outputs,
312 |             source mask, and updated decoder hidden state and context.
313 | 
314 |         Notes
315 |         -----
316 |             We treat the inputs as a batch, even though ``group_size`` is not necessarily
317 |             equal to ``batch_size``, since the group may contain multiple states
318 |             for each source sentence in the batch.
319 |         """
320 |         # shape: (group_size, num_classes)
321 |         output_projections, state = self._prepare_output_projections(last_predictions, state)
322 | 
323 |         # shape: (group_size, num_classes)
324 |         class_log_probabilities = F.log_softmax(output_projections, dim=-1)
325 | 
326 |         return class_log_probabilities, state
327 | 
328 |     @overrides
329 |     def forward(self,  # type: ignore
330 |                 image, source_nums, choice_nums, label, type,
331 |                 source_tokens: Dict[str, torch.LongTensor],
332 |                 point_label=None,
333 |                 target_tokens: Dict[str, torch.LongTensor] = None, **kwargs) -> Dict[str, torch.Tensor]:
334 |         # pylint: disable=arguments-differ
335 |         """
336 |         Make foward pass with decoder logic for producing the entire target sequence.
337 | 
338 |         Parameters
339 |         ----------
340 |         source_tokens : ``Dict[str, torch.LongTensor]``
341 |            The output of `TextField.as_array()` applied on the source `TextField`. This will be
342 |            passed through a `TextFieldEmbedder` and then through an encoder.
343 |         target_tokens : ``Dict[str, torch.LongTensor]``, optional (default = None)
344 |            Output of `Textfield.as_array()` applied on target `TextField`. We assume that the
345 |            target tokens are also represented as a `TextField`.
346 | 
347 |         Returns
348 |         -------
349 |         Dict[str, torch.Tensor]
350 |         """
351 |         bs = len(label)
352 |         state = self._encode(source_tokens)
353 | 
354 |         with torch.no_grad():
355 |             img_feats = self.resnet(image)
356 |         # (N, C, 14, 14) -> (N, 196, C)
357 |         img_feats = img_feats.reshape(img_feats.shape[0], img_feats.shape[1], -1).transpose(1, 2)
358 |         img_mask = make_mask(img_feats)
359 |         #print(img_feats.size())
360 |         #img_feats = self.image_tfm(img_feats)
361 |         img_feats = self.channel_transform(img_feats)
362 | 
363 |         lang_feats = state['encoder_outputs']
364 |         # mask the digital encoding question without embedding, i.e. source_tokens(already index to number)
365 |         lang_mask = make_mask(source_tokens['tokens'].unsqueeze(2))
366 | 
367 |         _, img_feats = self.mcan(lang_feats, img_feats, lang_mask, img_mask)
368 | 
369 |         # (N, 308, 512)
370 |         # for attention, image first and then lang, using mask
371 |         state['encoder_outputs'] = torch.cat([img_feats, lang_feats], 1)
372 | 
373 |         # decode
374 |         state = self._init_decoder_state(state, lang_feats, img_feats, img_mask)
375 |         output_dict = self._forward_loop(state, target_tokens)  # recurrent decoding for LSTM
376 | 
377 |         # knowledge points
378 |         if self.point_ratio != 0:
379 |             concat_feature = state["concat_feature"]
380 |             point_feat = self.points_norm(concat_feature)
381 |             point_feat = self.points_proj(point_feat)
382 |             point_pred = torch.sigmoid(point_feat)
383 |             point_loss = self.points_criterion(point_pred, point_label) * self.point_ratio
384 |             output_dict["point_pred"] = point_pred
385 |             output_dict["point_loss"] = point_loss
386 |             output_dict["loss"] += point_loss
387 | 
388 |         # TODO: if testing, beam search and evaluation
389 |         if not self.training:
390 |             state = self._init_decoder_state(state, lang_feats, img_feats, img_mask)  # TODO
391 |             predictions = self._forward_beam_search(state)
392 |             output_dict.update(predictions)
393 | 
394 |             if target_tokens and self._bleu:
395 |                 # shape: (batch_size, beam_size, max_sequence_length)
396 |                 top_k_predictions = output_dict["predictions"]
397 | 
398 |                 # execute the decode programs to calculate the accuracy
399 |                 # suc_knt, no_knt = 0, 0
400 |                 suc_knt, no_knt, = 0, 0
401 | 
402 |                 selected_programs = []
403 |                 for b in range(bs):
404 |                     hypo = None
405 |                     used_hypo = None
406 |                     choice = None
407 |                     for i in range(self._beam_search.beam_size):
408 |                         if choice is not None:
409 |                             break
410 |                         hypo = list(top_k_predictions[b][i])
411 |                         if self._end_index in list(hypo):
412 |                             hypo = hypo[:hypo.index(self._end_index)]
413 |                         hypo = [self.vocab.get_token_from_index(idx.item()) for idx in hypo]
414 |                         # print(hypo)
415 |                         res = self._equ.excuate_equation(hypo, source_nums[b])
416 |                         # print(res, choice_nums[b])
417 |                         if res is not None and len(res) > 0:
418 |                             for j in range(4):
419 |                                 if choice_nums[b][j] is not None and math.fabs(res[-1] - choice_nums[b][j]) < 0.001:
420 |                                     choice = j
421 |                                     used_hypo = hypo
422 | 
423 |                     selected_programs.append([hypo])
424 | 
425 |                     if choice is None:
426 |                         no_knt += 1
427 |                     if choice == label[b]:
428 |                         suc_knt += 1
429 | 
430 |                 if random.random() < 0.05:
431 |                     print('selected_programs', selected_programs)
432 | 
433 |                 # calculate BLEU
434 |                 best_predictions = top_k_predictions[:, 0, :]
435 |                 self._bleu(best_predictions, target_tokens["tokens"])
436 |                 self._acc(suc_knt / bs)
437 |                 self._no_result(no_knt / bs)
438 | 
439 |         return output_dict
440 | 
441 |     def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
442 |         """
443 |         Finalize predictions.
444 | 
445 |         This method overrides ``Model.decode``, which gets called after ``Model.forward``, at test
446 |         time, to finalize predictions. The logic for the decoder part of the encoder-decoder lives
447 |         within the ``forward`` method.
448 | 
449 |         This method trims the output predictions to the first end symbol, replaces indices with
450 |         corresponding tokens, and adds a field called ``predicted_tokens`` to the ``output_dict``.
451 |         """
452 |         predicted_indices = output_dict["predictions"]
453 |         if not isinstance(predicted_indices, numpy.ndarray):
454 |             predicted_indices = predicted_indices.detach().cpu().numpy()
455 |         all_predicted_tokens = []
456 |         for indices in predicted_indices:
457 |             # Beam search gives us the top k results for each source sentence in the batch
458 |             # but we just want the single best.
459 |             if len(indices.shape) > 1:
460 |                 indices = indices[0]
461 |             indices = list(indices)
462 |             # Collect indices till the first end_symbol
463 |             if self._end_index in indices:
464 |                 indices = indices[:indices.index(self._end_index)]
465 |             predicted_tokens = [self.vocab.get_token_from_index(x, namespace=self._target_namespace)
466 |                                 for x in indices]
467 | 
468 |             all_predicted_tokens.append(predicted_tokens)
469 |         output_dict["predicted_tokens"] = all_predicted_tokens
470 |         return output_dict
471 | 
472 |     def _encode(self, source_tokens: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
473 |         # shape: (batch_size, max_input_sequence_length, encoder_input_dim)
474 | 
475 |         # embedded_input = self._source_embedder(source_tokens)
476 |         # shape: (batch_size, max_input_sequence_length)
477 |         source_mask = util.get_text_field_mask(source_tokens)
478 |         # source mask are used in attention
479 |         img_mask = torch.ones(source_mask.shape[0], 196).long().cuda()
480 |         concat_mask = torch.cat([img_mask, source_mask], 1)
481 |         # shape:
482 | 
483 |         encoder_outputs = self._encoder(source_tokens['tokens'], source_mask)
484 | 
485 |         return {
486 |             "source_mask": source_mask,  # source_mask,
487 |             "concat_mask": concat_mask,
488 |             "encoder_outputs": encoder_outputs,
489 |         }
490 | 
491 |     def _init_decoder_state(self, state, lang_feats, img_feats, img_mask):
492 | 
493 |         batch_size = state["source_mask"].size(0)
494 |         final_lang_feat = util.get_final_encoder_states(
495 |             lang_feats,
496 |             state["source_mask"],
497 |             self._encoder.is_bidirectional())
498 |         img_feat = self.attflat_img(img_feats, img_mask)
499 |         feat = torch.cat([final_lang_feat, img_feat], 1)
500 |         feat = self.decode_transform(feat)
501 |         state["concat_feature"] = feat
502 | 
503 |         state["decoder_hidden"] = feat
504 |         # C0 shape: (batch_size, decoder_output_dim)
505 |         state["decoder_context"] = torch.zeros(batch_size, self._decoder_output_dim).cuda()
506 |         # state["decoder_context"] = state["encoder_outputs"].new_zeros(batch_size, self._decoder_output_dim)
507 |         return state
508 | 
509 |     def _forward_loop(self,
510 |                       state: Dict[str, torch.Tensor],
511 |                       target_tokens: Dict[str, torch.LongTensor] = None) -> Dict[str, torch.Tensor]:
512 |         """
513 |         Make forward pass during training or do greedy search during prediction.
514 | 
515 |         Notes
516 |         -----
517 |         We really only use the predictions from the method to test that beam search
518 |         with a beam size of 1 gives the same results.
519 |         """
520 |         # shape: (batch_size, max_input_sequence_length)
521 |         source_mask = state["source_mask"]
522 | 
523 |         batch_size = source_mask.size()[0]
524 | 
525 |         if target_tokens:
526 |             # shape: (batch_size, max_target_sequence_length)
527 |             targets = target_tokens["tokens"]
528 | 
529 |             _, target_sequence_length = targets.size()
530 | 
531 |             # The last input from the target is either padding or the end symbol.
532 |             # Either way, we don't have to process it.
533 |             num_decoding_steps = target_sequence_length - 1
534 |         else:
535 |             num_decoding_steps = self._max_decoding_steps
536 | 
537 |         # Initialize target predictions with the start index.
538 |         # shape: (batch_size,)
539 |         last_predictions = source_mask.new_full((batch_size,), fill_value=self._start_index)
540 | 
541 |         step_logits: List[torch.Tensor] = []
542 |         step_predictions: List[torch.Tensor] = []
543 |         for timestep in range(num_decoding_steps):
544 |             if self.training and torch.rand(1).item() < self._scheduled_sampling_ratio:
545 |                 # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio
546 |                 # during training.
547 |                 # shape: (batch_size,)
548 |                 input_choices = last_predictions
549 |             elif not target_tokens:
550 |                 # shape: (batch_size,)
551 |                 input_choices = last_predictions
552 |             else:
553 |                 # shape: (batch_size,)
554 |                 input_choices = targets[:, timestep]
555 | 
556 |             # shape: (batch_size, num_classes)
557 |             # recurrent decoding
558 |             output_projections, state = self._prepare_output_projections(input_choices, state)
559 | 
560 |             # list of tensors, shape: (batch_size, 1, num_classes)
561 |             step_logits.append(output_projections.unsqueeze(1))
562 | 
563 |             # shape: (batch_size, num_classes)
564 |             class_probabilities = F.softmax(output_projections, dim=-1)
565 | 
566 |             # shape (predicted_classes): (batch_size,)
567 |             _, predicted_classes = torch.max(class_probabilities, 1)
568 | 
569 |             # shape (predicted_classes): (batch_size,)
570 |             last_predictions = predicted_classes
571 | 
572 |             step_predictions.append(last_predictions.unsqueeze(1))
573 | 
574 |         # shape: (batch_size, num_decoding_steps)
575 |         predictions = torch.cat(step_predictions, 1)
576 | 
577 |         output_dict = {"predictions": predictions}
578 | 
579 |         if target_tokens:
580 |             # shape: (batch_size, num_decoding_steps, num_classes)
581 |             logits = torch.cat(step_logits, 1)
582 | 
583 |             # Compute loss.
584 |             target_mask = util.get_text_field_mask(target_tokens)
585 |             loss = self._get_loss(logits, targets, target_mask)
586 |             output_dict["loss"] = loss
587 | 
588 |         return output_dict
589 | 
590 |     def _forward_beam_search(self, state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
591 |         """Make forward pass during prediction using a beam search."""
592 |         batch_size = state["source_mask"].size()[0]
593 |         start_predictions = state["source_mask"].new_full((batch_size,), fill_value=self._start_index)
594 | 
595 |         # shape (all_top_k_predictions): (batch_size, beam_size, num_decoding_steps)
596 |         # shape (log_probabilities): (batch_size, beam_size)
597 |         all_top_k_predictions, log_probabilities = self._beam_search.search(
598 |             start_predictions, state, self.take_step)
599 | 
600 |         output_dict = {
601 |             "class_log_probabilities": log_probabilities,
602 |             "predictions": all_top_k_predictions,
603 |         }
604 |         return output_dict
605 | 
606 |     def _prepare_output_projections(self,
607 |                                     last_predictions: torch.Tensor,
608 |                                     state: Dict[str, torch.Tensor]) -> Tuple[
609 |         torch.Tensor, Dict[str, torch.Tensor]]:  # pylint: disable=line-too-long
610 |         """
611 |         Decode current state and last prediction to produce produce projections
612 |         into the target space, which can then be used to get probabilities of
613 |         each target token for the next step.
614 |         Inputs are the same as for `take_step()`.
615 |         """
616 |         # shape: (group_size, max_input_sequence_length, encoder_output_dim)
617 |         encoder_outputs = state["encoder_outputs"]
618 | 
619 |         # shape: (group_size, max_input_sequence_length)
620 |         # source_mask = state["source_mask"]
621 |         source_mask = state["concat_mask"]
622 | 
623 |         # decoder_hidden and decoder_context are get from encoder_outputs in _init_decoder_state()
624 |         # shape: (group_size, decoder_output_dim)
625 |         decoder_hidden = state["decoder_hidden"]
626 |         # shape: (group_size, decoder_output_dim)
627 |         decoder_context = state["decoder_context"]
628 | 
629 |         # shape: (group_size, target_embedding_dim)
630 |         embedded_input = self._target_embedder(last_predictions)
631 | 
632 |         if self._attention:
633 |             # shape: (group_size, encoder_output_dim)
634 |             attended_input = self._prepare_attended_input(decoder_hidden, encoder_outputs, source_mask)
635 | 
636 |             # shape: (group_size, decoder_output_dim + target_embedding_dim)
637 |             decoder_input = torch.cat((attended_input, embedded_input), -1)
638 | 
639 |         else:
640 |             # shape: (group_size, target_embedding_dim)
641 |             decoder_input = embedded_input
642 | 
643 |         # shape (decoder_hidden): (batch_size, decoder_output_dim)
644 |         # shape (decoder_context): (batch_size, decoder_output_dim)
645 | 
646 |         decoder_hidden, decoder_context = self._decoder_cell(
647 |             decoder_input,
648 |             (decoder_hidden, decoder_context))
649 | 
650 |         state["decoder_hidden"] = decoder_hidden
651 |         state["decoder_context"] = decoder_context
652 | 
653 |         # shape: (group_size, num_classes)
654 |         output_projections = self._output_projection_layer(decoder_hidden)
655 |         """
656 |         decoder_hidden = self._decoder_cell(
657 |             decoder_input,
658 |             (decoder_hidden))
659 | 
660 |         state["decoder_hidden"] = decoder_hidden
661 |         state["decoder_context"] = decoder_hidden
662 | 
663 |         # shape: (group_size, num_classes)
664 |         output_projections = self._output_projection_layer(decoder_hidden)
665 |         """
666 |         return output_projections, state
667 | 
668 |     def _prepare_attended_input(self,
669 |                                 decoder_hidden_state: torch.LongTensor = None,
670 |                                 encoder_outputs: torch.LongTensor = None,
671 |                                 encoder_outputs_mask: torch.LongTensor = None) -> torch.Tensor:
672 |         """Apply attention over encoder outputs and decoder state."""
673 |         # Ensure mask is also a FloatTensor. Or else the multiplication within
674 |         # attention will complain.
675 |         # shape: (batch_size, max_input_sequence_length)
676 |         encoder_outputs_mask = encoder_outputs_mask.float()
677 | 
678 |         # shape: (batch_size, max_input_sequence_length)
679 |         input_weights = self._attention(
680 |             decoder_hidden_state, encoder_outputs, encoder_outputs_mask)
681 | 
682 |         # shape: (batch_size, encoder_output_dim)
683 |         attended_input = util.weighted_sum(encoder_outputs, input_weights)
684 | 
685 |         return attended_input
686 | 
687 |     def multi_label_evaluation(self, input, target):
688 |         one = torch.ones(target.shape).cuda()
689 |         zero = torch.zeros(target.shape).cuda()
690 |         res = torch.where(input > 0.5, one, zero)
691 | 
692 |         over = (res * target).sum(dim=1)
693 |         union = res.sum(dim=1) + target.sum(dim=1) - over
694 |         acc = over / union
695 | 
696 |         index = torch.isnan(acc)  # nan appear when both pred and target are zeros, which means makes right answer
697 |         acc_fix = torch.where(index, torch.ones(acc.shape).cuda(), acc)
698 | 
699 |         acc_sum = acc_fix.sum().item()
700 | 
701 |         return acc_sum
702 | 
703 |     @staticmethod
704 |     def _get_loss(logits: torch.LongTensor,
705 |                   targets: torch.LongTensor,
706 |                   target_mask: torch.LongTensor) -> torch.Tensor:
707 |         """
708 |         Compute loss.
709 | 
710 |         Takes logits (unnormalized outputs from the decoder) of size (batch_size,
711 |         num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1)
712 |         and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross
713 |         entropy loss while taking the mask into account.
714 | 
715 |         The length of ``targets`` is expected to be greater than that of ``logits`` because the
716 |         decoder does not need to compute the output corresponding to the last timestep of
717 |         ``targets``. This method aligns the inputs appropriately to compute the loss.
718 | 
719 |         During training, we want the logit corresponding to timestep i to be similar to the target
720 |         token from timestep i + 1. That is, the targets should be shifted by one timestep for
721 |         appropriate comparison.  Consider a single example where the target has 3 words, and
722 |         padding is to 7 tokens.
723 |            The complete sequence would correspond to <S> w1  w2  w3  <E> <P> <P>
724 |            and the mask would be                     1   1   1   1   1   0   0
725 |            and let the logits be                     l1  l2  l3  l4  l5  l6
726 |         We actually need to compare:
727 |            the sequence           w1  w2  w3  <E> <P> <P>
728 |            with masks             1   1   1   1   0   0
729 |            against                l1  l2  l3  l4  l5  l6
730 |            (where the input was)  <S> w1  w2  w3  <E> <P>
731 |         """
732 |         # shape: (batch_size, num_decoding_steps)
733 |         relevant_targets = targets[:, 1:].contiguous()
734 | 
735 |         # shape: (batch_size, num_decoding_steps)
736 |         relevant_mask = target_mask[:, 1:].contiguous()
737 | 
738 |         return util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask)
739 | 
740 |     @overrides
741 |     def get_metrics(self, reset: bool = False) -> Dict[str, float]:
742 |         all_metrics: Dict[str, float] = {}
743 |         if self._bleu and not self.training:
744 |             all_metrics.update(self._bleu.get_metric(reset=reset))
745 |         all_metrics.update({'acc': self._acc.get_metric(reset=reset)})
746 |         all_metrics.update({'no_result': self._no_result.get_metric(reset=reset)})
747 | 
748 |         return all_metrics
749 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |      "dataset_reader": {
 3 |         "type": "s2s_manual_reader",
 4 |         "tokenizer": {
 5 |         "word_splitter":{
 6 |         "type": "just_spaces"
 7 |            }
 8 |        },
 9 |         "source_token_indexer": {
10 |           "tokens": {
11 |           "type": "pretrained_transformer",
12 |           "model_name": "data/pretrain/Roberta",
13 |             "do_lowercase": false
14 |                     }
15 |         },
16 |       "target_token_indexer": {
17 |      "tokens": {
18 |         "type": "single_id"
19 |       }
20 |       }
21 |     },
22 | 
23 |   "train_data_path": "data/GeoQA2.2/train.pk",
24 |   "validation_data_path": "data/GeoQA2.2/dev.pk",
25 |   "test_data_path" : "data/GeoQA2.2/test.pk",
26 |     "model": {
27 |     "type": "geo_s2s",
28 |     "max_decoding_steps": 16,
29 |     "beam_size": 10,
30 | 
31 |     "target_embedding_dim": 512,
32 |     "scheduled_sampling_ratio": 0,
33 |     "resnet_pretrained": "data/pretrain/best_jigsaw_model_state_dict",
34 |     "knowledge_points_ratio": 0,
35 |      "source_embedder": {
36 |         "token_embedders": {
37 | 
38 |         }
39 |             },
40 |     "encoder": {
41 |         "input_dim": 21128,
42 |         "emb_dim": 768,
43 |         "hid_dim": 512,
44 |         "dropout": 0.5
45 |     }
46 | },
47 |     "iterator": {
48 |     "type": "basic",
49 |     "batch_size": 32
50 |   },
51 |      "trainer": {
52 |     "validation_metric": "+acc",
53 |        "learning_rate_scheduler": {
54 |       "type": "reduce_on_plateau",
55 |       "factor": 0.5,
56 |       "mode": "max",
57 |       "patience": 5
58 |     },
59 |     "num_epochs": 100,
60 |     "grad_norm": 10.0,
61 |     "cuda_device": 0,
62 | 
63 |     "optimizer": {
64 |       "type": "adam",
65 |       "lr": 1e-3,
66 |       "parameter_groups": [
67 |         [["mcan", "channel_transform", "attflat_img", "attflat_lang", "decode_transform"], {"lr": 1e-5}],
68 |         [["resnet"], {"lr": 1e-5}],
69 |         [["source_embedder","encoder.embedding"],{"lr": 2e-5}],
70 |         [[ "encoder.concat_trans", "encoder.lstm_embedding","encoder.trans", "encoder.norm", "encoder.concat_norm"],{"lr": 1e-3}]
71 |       ]
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/txt:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/vocabulary/non_padded_namespaces.txt:
--------------------------------------------------------------------------------
1 | *labels
2 | *tags
3 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/vocabulary/tokens.txt:
--------------------------------------------------------------------------------
   1 | @@UNKNOWN@@
   2 | ,
   3 | N_0
   4 | A
   5 | B
   6 | C
   7 | @start@
   8 | @end@
   9 | 的
  10 | D
  11 | N_1
  12 | =
  13 | ∠
  14 | V_0
  15 | g_minus
  16 | O
  17 | (
  18 | )
  19 | 如
  20 | 图
  21 | 点
  22 | 为
  23 | 则
  24 | E
  25 | °
  26 | 是
  27 | g_half
  28 | N_2
  29 | V_1
  30 | ⊙
  31 | 于
  32 | C_3
  33 | g_double
  34 | 在
  35 | 、
  36 | 度
  37 | 中
  38 | 线
  39 | 长
  40 | 上
  41 | 直
  42 | △
  43 | g_add
  44 | 若
  45 | C_2
  46 | F
  47 | 数
  48 | P
  49 | g_divide
  50 | 一
  51 | 径
  52 | 边
  53 | 形
  54 | 分
  55 | 交
  56 | m
  57 | 角
  58 | 圆
  59 | g_bili
  60 | c
  61 | g_mul
  62 | 面
  63 | 平
  64 | ∥
  65 | 与
  66 | 等
  67 | V_2
  68 | 接
  69 | 知
  70 | 已
  71 | 切
  72 | 别
  73 | }
  74 | {
  75 | ．
  76 | gougu_minus
  77 | 半
  78 | 小
  79 | 四
  80 | 且
  81 | 两
  82 | 相
  83 | 弦
  84 | ⊥
  85 | M
  86 | gougu_add
  87 | 个
  88 | 三
  89 | 高
  90 | a
  91 | 1
  92 | 连
  93 | N_3
  94 | 周
  95 | g_equal
  96 | 米
  97 | 积
  98 | 所
  99 | 大
 100 | 2
 101 | 内
 102 | 距
 103 | 到
 104 | 行
 105 | 那
 106 | 么
 107 | 测
 108 | C_4
 109 | 延
 110 | N
 111 | 过
 112 | 方
 113 | g_sin
 114 | 离
 115 | 时
 116 | 值
 117 | 对
 118 | G
 119 | 和
 120 | 示
 121 | 得
 122 | 心
 123 | 地
 124 | 顶
 125 | t
 126 | 段
 127 | 垂
 128 | 处
 129 | \
 130 | ~
 131 | 作
 132 | 影
 133 | ′
 134 | 条
 135 | n
 136 | l
 137 | 果
 138 | 外
 139 | 向
 140 | ▱
 141 | :
 142 | 量
 143 | 树
 144 | 坡
 145 | 正
 146 | r
 147 | 以
 148 | 将
 149 | 动
 150 | R
 151 | b
 152 | f
 153 | 部
 154 | 水
 155 | 杆
 156 | 锥
 157 | 弧
 158 | 这
 159 | 板
 160 | s
 161 | ⁀
 162 | +
 163 | 底
 164 | g_tan
 165 | N_4
 166 | 沿
 167 | 子
 168 | 斜
 169 | 使
 170 | 放
 171 | 最
 172 | cal_circle_area
 173 | 同
 174 | 纸
 175 | cal_cone
 176 | 有
 177 | 足
 178 | 端
 179 | 用
 180 | Q
 181 | 从
 182 | 位
 183 | H
 184 | 成
 185 | i
 186 | 了
 187 | 下
 188 | 侧
 189 | 不
 190 | 阴
 191 | 菱
 192 | 后
 193 | S
 194 | 学
 195 | 尺
 196 | _
 197 | 扇
 198 | 其
 199 | 宽
 200 | 把
 201 | 转
 202 | 某
 203 | 都
 204 | 3
 205 | 置
 206 | 重
 207 | 好
 208 | 梯
 209 | 旗
 210 | 间
 211 | 折
 212 | 楼
 213 | 合
 214 | 当
 215 | g_cos
 216 | 出
 217 | 它
 218 | 路
 219 | 落
 220 | 经
 221 | 偏
 222 | 旋
 223 | 射
 224 | 并
 225 | 之
 226 | N_5
 227 | 明
 228 | C_1
 229 | 块
 230 | 要
 231 | 矩
 232 | 东
 233 | 北
 234 | 恰
 235 | 标
 236 | 灯
 237 | 结
 238 | 他
 239 | 此
 240 | 河
 241 | 竹
 242 | 移
 243 | o
 244 | 意
 245 | 绕
 246 | 叠
 247 | α
 248 | 竿
 249 | .
 250 | 比
 251 | 截
 252 | 计
 253 | 海
 254 | 片
 255 | 墙
 256 | 光
 257 | 发
 258 | 该
 259 | 针
 260 | 船
 261 | 应
 262 | 含
 263 | 达
 264 | 球
 265 | 腰
 266 | 着
 267 | 棵
 268 | 电
 269 | 看
 270 | 走
 271 | 母
 272 | 根
 273 | 塔
 274 | 轴
 275 | 夹
 276 | 任
 277 | 定
 278 | 校
 279 | 开
 280 | 似
 281 | 器
 282 | 帽
 283 | 自
 284 | N_6
 285 | 视
 286 | 镜
 287 | 桌
 288 | 木
 289 | 里
 290 | 公
 291 | 均
 292 | 组
 293 | 称
 294 | 现
 295 | 再
 296 | 至
 297 | 航
 298 | 体
 299 | 4
 300 | 西
 301 | 设
 302 | ∽
 303 | 身
 304 | 山
 305 | /
 306 | 表
 307 | 前
 308 | 断
 309 | 横
 310 | 取
 311 | 臂
 312 | C_5
 313 | 按
 314 | 由
 315 | 油
 316 | 少
 317 | 柱
 318 | 岛
 319 | 想
 320 | 顺
 321 | 工
 322 | 第
 323 | 可
 324 | 画
 325 | 坝
 326 | 铁
 327 | 邻
 328 | 们
 329 | 道
 330 | 制
 331 | 次
 332 | 做
 333 | C_0
 334 | 管
 335 | 她
 336 | 短
 337 | 张
 338 | 需
 339 | ^
 340 | 口
 341 | 求
 342 | 能
 343 | 被
 344 | 伞
 345 | 池
 346 | 轮
 347 | 缝
 348 | 围
 349 | 阳
 350 | 物
 351 | 岸
 352 | 关
 353 | 仰
 354 | 速
 355 | 桥
 356 | 升
 357 | 刻
 358 | 机
 359 | 车
 360 | 人
 361 | 目
 362 | 种
 363 | N_7
 364 | I
 365 | 运
 366 | 来
 367 | 优
 368 | 另
 369 | 观
 370 | 网
 371 | 建
 372 | ≈
 373 | 塘
 374 | 摆
 375 | 艘
 376 | 南
 377 | 劣
 378 | 台
 379 | 坐
 380 | 起
 381 | 进
 382 | 互
 383 | 站
 384 | 约
 385 | 去
 386 | 钢
 387 | 天
 388 | N_8
 389 | -
 390 | 飞
 391 | 俯
 392 | 厘
 393 | 逆
 394 | 选
 395 | ；
 396 | 具
 397 | 入
 398 | 反
 399 | 园
 400 | 脚
 401 | 右
 402 | 村
 403 | 0
 404 | 架
 405 | 五
 406 | 展
 407 | 忽
 408 | 略
 409 | 何
 410 | 又
 411 | 原
 412 | 扶
 413 | 古
 414 | 多
 415 | 生
 416 | '
 417 | ≌
 418 | 绳
 419 | 空
 420 | 据
 421 | 甲
 422 | h
 423 | 排
 424 | 己
 425 | 共
 426 | 家
 427 | 环
 428 | 副
 429 | 活
 430 | 打
 431 | 城
 432 | 照
 433 | 无
 434 | 场
 435 | "
 436 | 乙
 437 | 利
 438 | 保
 439 | 头
 440 | 系
 441 | x
 442 | 跷
 443 | L
 444 | 算
 445 | d
 446 | 驶
 447 | 立
 448 | 然
 449 | 区
 450 | 力
 451 | 各
 452 | 剪
 453 | 刚
 454 | 二
 455 | 全
 456 | cal_circle_perimeter
 457 | 皮
 458 | 撑
 459 | 准
 460 | 击
 461 | 支
 462 | 渔
 463 | 课
 464 | 几
 465 | 索
 466 | 热
 467 | 气
 468 | 栋
 469 | 锐
 470 | 式
 471 | 兴
 472 | 修
 473 | 满
 474 | 近
 475 | 固
 476 | 桶
 477 | 华
 478 | T
 479 | 座
 480 | ？
 481 | $
 482 | 爬
 483 | 通
 484 | 而
 485 | 市
 486 | ”
 487 | 遮
 488 | 烟
 489 | 囱
 490 | 样
 491 | 备
 492 | θ
 493 | 余
 494 | 左
 495 | 景
 496 | 也
 497 | 筑
 498 | y
 499 | 滑
 500 | 每
 501 | 货
 502 | 株
 503 | 堤
 504 | 只
 505 | 找
 506 | 带
 507 | 趣
 508 | 毯
 509 | “
 510 | 补
 511 | 深
 512 | 料
 513 | 考
 514 | 状
 515 | 加
 516 | 投
 517 | 探
 518 | 察
 519 | N_9
 520 | 倾
 521 | 化
 522 | 先
 523 | 程
 524 | 靠
 525 | 门
 526 | 我
 527 | 布
 528 | 持
 529 | 法
 530 | 窗
 531 | 石
 532 | 引
 533 | K
 534 | 马
 535 | 单
 536 | 秒
 537 | 模
 538 | 手
 539 | 异
 540 | 铺
 541 | 些
 542 | 钟
 543 | 风
 544 | 亮
 545 | 凉
 546 | 亭
 547 | 阶
 548 | 玻
 549 | 璃
 550 | 房
 551 | 拉
 552 | 爸
 553 | •
 554 | 隧
 555 | 午
 556 | 送
 557 | 商
 558 | 迎
 559 | 绿
 560 | 库
 561 | 栏
 562 | 降
 563 | 请
 564 | 你
 565 | 规
 566 | 框
 567 | 花
 568 | 拐
 569 | □
 570 | 就
 571 | 装
 572 | 型
 573 | 卷
 574 | 年
 575 | 竖
 576 | 筒
 577 | 调
 578 | 颖
 579 | 源
 580 | 太
 581 | 即
 582 | 棒
 583 | 王
 584 | 踏
 585 | 泡
 586 | 读
 587 | 传
 588 | 棱
 589 | 痕
 590 | ≠
 591 | 帮
 592 | β
 593 | 记
 594 | 草
 595 | 弯
 596 | 强
 597 | 倒
 598 | 斗
 599 | 整
 600 | 估
 601 | 者
 602 | 零
 603 | 件
 604 | 孔
 605 | 捣
 606 | 住
 607 | 格
 608 | 教
 609 | 回
 610 | 师
 611 | 基
 612 | 号
 613 | 牧
 614 | 蚂
 615 | 蚁
 616 | 主
 617 | 细
 618 | 虚
 619 | 丝
 620 | 变
 621 | 题
 622 | 实
 623 | 名
 624 | 游
 625 | 问
 626 | 会
 627 | 参
 628 | 冰
 629 | 淇
 630 | 淋
 631 | 域
 632 | 试
 633 | 拱
 634 | 较
 635 | 翻
 636 | ②
 637 | 玲
 638 | 缺
 639 | 舰
 640 | 登
 641 | ⌒
 642 | 辆
 643 | 植
 644 | 隔
 645 | 指
 646 | 监
 647 | 显
 648 | Ð
 649 | 童
 650 | 白
 651 | 拼
 652 | 很
 653 | 习
 654 | 信
 655 | 超
 656 | 缘
 657 | 完
 658 | 盖
 659 | 漏
 660 | 杯
 661 | 新
 662 | 节
 663 | 日
 664 | 综
 665 | 眼
 666 | 睛
 667 | 退
 668 | 旁
 669 | 案
 670 | 军
 671 | 划
 672 | 步
 673 | N_11
 674 | 壁
 675 | 字
 676 | 残
 677 | 继
 678 | 续
 679 | 傅
 680 | ʹ
 681 | 拦
 682 | 挥
 683 | 街
 684 | 港
 685 | g_asin
 686 | ’
 687 | 杠
 688 | 丄
 689 | 但
 690 | 够
 691 | 始
 692 | 践
 693 | 双
 694 | 覆
 695 | 览
 696 | 低
 697 | 乘
 698 | e
 699 | 李
 700 | 损
 701 | 致
 702 | 构
 703 | 伸
 704 | 缩
 705 | 插
 706 | 芳
 707 | 拍
 708 | 远
 709 | 留
 710 | 束
 711 | 锯
 712 | 千
 713 | 差
 714 | ①
 715 | 梁
 716 | 美
 717 | 盆
 718 | 质
 719 | 民
 720 | 陆
 721 | 改
 722 | 见
 723 | 确
 724 | 垫
 725 | 盘
 726 | g
 727 | 雨
 728 | 桩
 729 | 险
 730 | 林
 731 | 庄
 732 | 盒
 733 | ＋
 734 | 压
 735 | ㎝
 736 | 篱
 737 | 笆
 738 | 总
 739 | 翼
 740 | 国
 741 | 除
 742 | 污
 743 | 腿
 744 | 叉
 745 | 洋
 746 | 丑
 747 | 扫
 748 | 晚
 749 | 虑
 750 | 彩
 751 | 圣
 752 | 诞
 753 | 呈
 754 | 梢
 755 | 像
 756 | 办
 757 | 律
 758 | 老
 759 | 碎
 760 | 份
 761 | 槟
 762 | 榔
 763 | 望
 764 | 挡
 765 | 广
 766 | 牌
 767 | 厚
 768 | 跳
 769 | 假
 770 | 星
 771 | 决
 772 | 室
 773 | 槽
 774 | 屋
 775 | 菜
 776 | N_10
 777 | ③
 778 | 钝
 779 | 给
 780 | 扎
 781 | 精
 782 | 玩
 783 | 坪
 784 | 护
 785 | 巡
 786 | 政
 787 | 府
 788 | 胜
 789 | 抢
 790 | 餐
 791 | 滚
 792 | 厅
 793 | 黑
 794 | 终
 795 | 9
 796 | 户
 797 | 控
 798 | 5
 799 | 粗
 800 | 露
 801 | 快
 802 | 文
 803 | 湖
 804 | 买
 805 | 闸
 806 | 著
 807 | 匀
 808 | 红
 809 | 州
 810 | 操
 811 | 凡
 812 | 例
 813 | 螺
 814 | 态
 815 | 农
 816 | 虎
 817 | 告
 818 | 弹
 819 | 营
 820 | ○
 821 | 割
 822 | 本
 823 | 情
 824 | 秋
 825 | 伯
 826 | 羊
 827 | 因
 828 | 联
 829 | 列
 830 | 吴
 831 | 圈
 832 | 义
 833 | …
 834 | 六
 835 | 没
 836 | 凿
 837 | 蛋
 838 | 8
 839 | 月
 840 | 铅
 841 | 增
 842 | 常
 843 | 适
 844 | 庆
 845 | 金
 846 | 止
 847 | 刮
 848 | 刷
 849 | 背
 850 | 员
 851 | 厦
 852 | 队
 853 | 简
 854 | 店
 855 | 踩
 856 | 档
 857 | 必
 858 | 须
 859 | 证
 860 | 珠
 861 | 吸
 862 | 还
 863 | 柄
 864 | 客
 865 | 楔
 866 | 七
 867 | 巧
 868 | 骨
 869 | 答
 870 | 喷
 871 | 荆
 872 | 赛
 873 | 镭
 874 | 聪
 875 | 箱
 876 | 曲
 877 | 隙
 878 | 
 879 | 息
 880 | 收
 881 | ＜
 882 | 输
 883 | 
 884 | 裁
 885 | 兰
 886 | 十
 887 | 艺
 888 | 术
 889 | 耗
 890 | 复
 891 | 戏
 892 | 丽
 893 | 薄
 894 | 材
 895 | 瓷
 896 | 壶
 897 | 或
 898 | 钉
 899 | 抽
 900 | 采
 901 | 攀
 902 | 九
 903 | 级
 904 | 班
 905 | 忙
 906 | 伟
 907 | 沾
 908 | 碰
 909 | 盏
 910 | 夏
 911 | 矮
 912 | g_acos
 913 | 伐
 914 | 跨
 915 | 孩
 916 | 象
 917 | 限
 918 | 养
 919 | 拴
 920 | 吃
 921 | 6
 922 | 骤
 923 | 7
 924 | 箭
 925 | 院
 926 | 范
 927 | ☉
 928 | 墨
 929 | 央
 930 | 委
 931 | 舒
 932 | 便
 933 | 越
 934 | C_6
 935 | ＞
 936 | 別
 937 | 究
 938 | 解
 939 | >
 940 | 随
 941 | 停
 942 | 汽
 943 | 境
 944 | 栓
 945 | 兔
 946 | 套
 947 | 包
 948 | 居
 949 | 潜
 950 | 艇
 951 | 失
 952 | 搜
 953 | 笔
 954 | 礁
 955 | 触
 956 | V_N_0
 957 | 泳
 958 | 幢
 959 | 鹅
 960 | 岭
 961 | 瞰
 962 | 末
 963 | 寻
 964 | 疑
 965 | 震
 966 | 派
 967 | 悬
 968 | 崖
 969 | 易
 970 | 说
 971 | 助
 972 | 注
 973 | 往
 974 | 宾
 975 | 馆
 976 | 撞
 977 | 袋
 978 | 书
 979 | 属
 980 | 克
 981 | 造
 982 | 灰
 983 | 栽
 984 | 占
 985 | N_21
 986 | 绍
 987 | 乡
 988 | p
 989 | 牵
 990 | 饮
 991 | 赶
 992 | 蔬
 993 | 刘
 994 | 筝
 995 | 雕
 996 | 塑
 997 | 撬
 998 | 翘
 999 | 依
1000 | 施
1001 | 挖
1002 | 爷
1003 | 轩
1004 | 凯
1005 | 冒
1006 | 拢
1007 | 帐
1008 | 篷
1009 | 择
1010 | 蜡
1011 | 烛
1012 | 毛
1013 | 丹
1014 | 迪
1015 | 男
1016 | 安
1017 | X
1018 | Y
1019 | 仪
1020 | 裂
1021 | 迭
1022 | ɑ
1023 | 颗
1024 | 柏
1025 | 代
1026 | 赵
1027 | 爽
1028 | 乐
1029 | 剩
1030 | w
1031 | 罐
1032 | 演
1033 | 届
1034 | 硬
1035 | 尖
1036 | 浸
1037 | 焦
1038 | 摄
1039 | 育
1040 | 突
1041 | 事
1042 | 令
1043 | 靶
1044 | 训
1045 | 练
1046 | 枪
1047 | 瞄
1048 | 轻
1049 | 微
1050 | 抖
1051 | 拿
1052 | 《
1053 | 科
1054 | 》
1055 | 散
1056 | 负
1057 | 荡
1058 | 链
1059 | 让
1060 | 真
1061 | ◎
1062 | 鸡
1063 | 辟
1064 | 绣
1065 | 遵
1066 | 哪
1067 | 糕
1068 | 衡
1069 | 掉
1070 | 倍
1071 | 隆
1072 | 举
1073 | 阅
1074 | 兵
1075 | 崭
1076 | 貌
1077 | 激
1078 | 坚
1079 | 念
1080 | 欲
1081 | 既
1082 | 牢
1083 | 匠
1084 | N_12
1085 | 享
1086 | 验
1087 | 觉
1088 | 骑
1089 | 黄
1090 | 唯
1091 | 宣
1092 | 世
1093 | 博
1094 | 研
1095 | ≥
1096 | z
1097 | 写
1098 | 绝
1099 | 括
1100 | 京
1101 | 奥
1102 | 声
1103 | 纳
1104 | 静
1105 | 提
1106 | 防
1107 | 洪
1108 | 业
1109 | 印
1110 | 救
1111 | 杭
1112 | 郊
1113 | 暗
1114 | 跟
1115 | 踪
1116 | 鱼
1117 | 群
1118 | 危
1119 | 惠
1120 | 雅
1121 | k
1122 | 故
1123 | 霍
1124 | 邱
1125 | 县
1126 | 纵
1127 | 浅
1128 | 早
1129 | 私
1130 | 礼
1131 | 旅
1132 | 欣
1133 | 赏
1134 | 色
1135 | 嘉
1136 | 毗
1137 | 社
1138 | 漂
1139 | 浮
1140 | 幕
1141 | 朝
1142 | 汶
1143 | 川
1144 | 砍
1145 | 敏
1146 | 供
1147 | 酒
1148 | 绑
1149 | 厂
1150 | 衣
1151 | J
1152 | 破
1153 | V
1154 | 镶
1155 | 嵌
1156 | 坏
1157 | 椅
1158 | 识
1159 | 挂
1160 | 狭
1161 | 吮
1162 | 烧
1163 | 豆
1164 | 浆
1165 | 巾
1166 | 飓
1167 | 灾
1168 | 害
1169 | 干
1170 | －
1171 | 圳
1172 | 巨
1173 | 济
1174 | 垃
1175 | 圾
1176 | 贤
1177 | 則
1178 | 辺
1179 | 及
1180 | 局
1181 | 际
1182 | 清
1183 | 顾
1184 | 食
1185 | 棚
1186 | 剖
1187 | 绸
1188 | 衔
1189 | 况
1190 | 温
1191 | 季
1192 | 凳
1193 | 阻
1194 | 吗
1195 | 叫
1196 | 翔
1197 | 迹
1198 | 枳
1199 | 牛
1200 | u
1201 | q
1202 | 雪
1203 | 春
1204 | 荒
1205 | 井
1206 | 扬
1207 | 灌
1208 | 汇
1209 | γ
1210 | 品
1211 | 查
1212 | 理
1213 | 键
1214 | 涨
1215 | 妙
1216 | 莹
1217 | 羽
1218 | 脱
1219 | 战
1220 | 曾
1221 | 眩
1222 | 勇
1223 | 夺
1224 | 冠
1225 | 檐
1226 | 卡
1227 | 坯
1228 | ⊿
1229 | 错
1230 | ∘
1231 | 购
1232 | 厢
1233 | 搭
1234 | 辅
1235 | 勾
1236 | 股
1237 | 善
1238 | 性
1239 | C_7
1240 | C_8
1241 | g_sqrt
1242 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/vocabulary/txt:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_CKPT/txt:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/GeoQA+/NGS_Aux_test.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Tuple
  2 | 
  3 | import numpy
  4 | from overrides import overrides
  5 | import torch
  6 | import torch.nn.functional as F
  7 | import torch.nn as nn
  8 | from torch.nn.modules.linear import Linear
  9 | from torch.nn.modules.rnn import LSTMCell
 10 | from torch.nn.modules.rnn import GRUCell
 11 | from allennlp.common.checks import ConfigurationError
 12 | from allennlp.common.util import START_SYMBOL, END_SYMBOL
 13 | from allennlp.data.vocabulary import Vocabulary
 14 | from allennlp.modules.attention import LegacyAttention
 15 | from allennlp.modules import Attention, TextFieldEmbedder, Seq2SeqEncoder
 16 | from allennlp.modules.similarity_functions import SimilarityFunction
 17 | from allennlp.models.model import Model
 18 | from allennlp.modules.token_embedders import Embedding
 19 | from allennlp.nn import util
 20 | from allennlp.nn.beam_search import BeamSearch
 21 | from allennlp.training.metrics import BLEU
 22 | 
 23 | from ManualProgram.eval_equ import Equations
 24 | from transformers import AutoModel, AutoTokenizer
 25 | 
 26 | import random
 27 | import warnings
 28 | import math
 29 | warnings.filterwarnings("ignore")
 30 | torch.cuda.set_device(0)
 31 | 
 32 | no_result_id=[]
 33 | right_id=[]
 34 | wrong_manual=[]
 35 | noresult_manual=[]
 36 | from utils import *
 37 | 
 38 | from mcan import *
 39 | import json
 40 | model_name = "data/pretrain/Roberta"
 41 | @Model.register("MyEncoder")
 42 | class Encoder(Model):
 43 |     def __init__(self,
 44 |                  vocab: Vocabulary,
 45 |                  input_dim: int,
 46 |                  emb_dim: int,
 47 |                  hid_dim: int,
 48 |                  dropout: int):
 49 |         super(Encoder, self).__init__(vocab)
 50 |         self.input_dim = input_dim
 51 |         self.emb_dim = emb_dim
 52 |         self.hid_dim = hid_dim
 53 |         self.embedding = AutoModel.from_pretrained(model_name)
 54 |         self.trans = nn.Linear(emb_dim, hid_dim)
 55 |         self.norm = nn.LayerNorm(hid_dim)
 56 |         self.dropout = nn.Dropout(dropout)
 57 |         self.lstm_embedding = nn.Embedding(22128, hid_dim, padding_idx=0)
 58 |         self.lstm_dropout = nn.Dropout(0.5)
 59 |         self.lstm = torch.nn.LSTM(hid_dim, hid_dim, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5)
 60 |         self.concat_trans = nn.Linear(hid_dim, hid_dim)
 61 |         self.concat_norm = nn.LayerNorm(hid_dim)
 62 |         self._encoder_output_dim = 512
 63 |         self._decoder_output_dim = 512
 64 | 
 65 |     @overrides
 66 |     def forward(self, src, source_mask):
 67 | 
 68 |         embedded = self.embedding(src, attention_mask=source_mask, return_dict=True, output_hidden_states=True)
 69 |         bert_output = embedded.last_hidden_state
 70 |         output = self.dropout(self.norm(torch.relu(self.trans(bert_output))))
 71 |         lstm_embedding = self.lstm_dropout(self.lstm_embedding(src))
 72 |         input_length = torch.sum(source_mask, dim=1).long().view(-1,).cpu()
 73 |         packed = nn.utils.rnn.pack_padded_sequence(lstm_embedding, input_length, batch_first=True, enforce_sorted=False)
 74 |         lstm_output, _ = self.lstm(packed)
 75 |         lstm_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output, batch_first=True)
 76 |         lstm_output = lstm_output[:, :, :self.hid_dim] + lstm_output[:, :, self.hid_dim:]
 77 |         output = output + lstm_output
 78 |         # output = torch.cat((output,lstm_output), dim=-1)
 79 |         output = self.concat_norm(torch.relu(self.concat_trans(output)))
 80 |         return output
 81 | 
 82 |     def get_output_dim(self):
 83 |         return self._encoder_output_dim
 84 | 
 85 |     def is_bidirectional(self) -> bool:
 86 |         return True
 87 | 
 88 | @Model.register("geo_s2s")
 89 | class SimpleSeq2Seq(Model):
 90 |     """
 91 |     This ``SimpleSeq2Seq`` class is a :class:`Model` which takes a sequence, encodes it, and then
 92 |     uses the encoded representations to decode another sequence.  You can use this as the basis for
 93 |     a neural machine translation system, an abstractive summarization system, or any other common
 94 |     seq2seq problem.  The model here is simple, but should be a decent starting place for
 95 |     implementing recent models for these tasks.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     vocab : ``Vocabulary``, required
100 |         Vocabulary containing source and target vocabularies. They may be under the same namespace
101 |         (`tokens`) or the target tokens can have a different namespace, in which case it needs to
102 |         be specified as `target_namespace`.
103 |     source_embedder : ``TextFieldEmbedder``, required
104 |         Embedder for source side sequences
105 |     encoder : ``Seq2SeqEncoder``, required
106 |         The encoder of the "encoder/decoder" model
107 |     max_decoding_steps : ``int``
108 |         Maximum length of decoded sequences.
109 |     target_namespace : ``str``, optional (default = 'tokens')
110 |         If the target side vocabulary is different from the source side's, you need to specify the
111 |         target's namespace here. If not, we'll assume it is "tokens", which is also the default
112 |         choice for the source side, and this might cause them to share vocabularies.
113 |     target_embedding_dim : ``int``, optional (default = source_embedding_dim)
114 |         You can specify an embedding dimensionality for the target side. If not, we'll use the same
115 |         value as the source embedder's.
116 |     attention : ``Attention``, optional (default = None)
117 |         If you want to use attention to get a dynamic summary of the encoder outputs at each step
118 |         of decoding, this is the function used to compute similarity between the decoder hidden
119 |         state and encoder outputs.
120 |     attention_function: ``SimilarityFunction``, optional (default = None)
121 |         This is if you want to use the legacy implementation of attention. This will be deprecated
122 |         since it consumes more memory than the specialized attention modules.
123 |     beam_size : ``int``, optional (default = None)
124 |         Width of the beam for beam search. If not specified, greedy decoding is used.
125 |     scheduled_sampling_ratio : ``float``, optional (default = 0.)
126 |         At each timestep during training, we sample a random number between 0 and 1, and if it is
127 |         not less than this value, we use the ground truth labels for the whole batch. Else, we use
128 |         the predictions from the previous time step for the whole batch. If this value is 0.0
129 |         (default), this corresponds to teacher forcing, and if it is 1.0, it corresponds to not
130 |         using target side ground truth labels.  See the following paper for more information:
131 |         `Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks. Bengio et al.,
132 |         2015 <https://arxiv.org/abs/1506.03099>`_.
133 |     use_bleu : ``bool``, optional (default = True)
134 |         If True, the BLEU metric will be calculated during validation.
135 |     """
136 | 
137 |     def __init__(self,
138 |                  vocab: Vocabulary,
139 |                  source_embedder: TextFieldEmbedder,
140 |                  encoder: Encoder,
141 |                  max_decoding_steps: int,
142 |                  knowledge_points_ratio = 0,
143 |                  attention: Attention = True,
144 |                  attention_function: SimilarityFunction = None,
145 |                  beam_size: int = None,
146 |                  target_namespace: str = "tokens",
147 |                  target_embedding_dim: int = None,
148 |                  scheduled_sampling_ratio: float = 0.,
149 |                  resnet_pretrained = None,
150 |                  use_bleu: bool = True) -> None:
151 |         super(SimpleSeq2Seq, self).__init__(vocab)
152 | 
153 |         resnet = build_model()
154 | 
155 |         if resnet_pretrained is not None:
156 |             resnet.load_state_dict(torch.load(resnet_pretrained))
157 |             print('##### Checkpoint Loaded! #####')
158 |         else:
159 |             print("No Diagram Pretrain !!!")
160 |         self.resnet = resnet
161 | 
162 |         self.channel_transform = torch.nn.Linear(1024, 512)
163 | 
164 |         __C = Cfgs()
165 |         self.mcan = MCA_ED(__C)
166 |         self.attflat_img = AttFlat(__C)
167 |         self.attflat_lang = AttFlat(__C)  # not use
168 | 
169 |         self.decode_transform = torch.nn.Linear(1024, 512)
170 | 
171 |         self._equ = Equations()
172 | 
173 |         self._target_namespace = target_namespace
174 |         self._scheduled_sampling_ratio = scheduled_sampling_ratio
175 | 
176 |         # We need the start symbol to provide as the input at the first timestep of decoding, and
177 |         # end symbol as a way to indicate the end of the decoded sequence.
178 |         self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace)
179 |         self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace)
180 | 
181 |         if use_bleu:
182 |             pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace)  # pylint: disable=protected-access
183 |             self._bleu = BLEU(ngram_weights=(1, 0, 0, 0), exclude_indices={pad_index, self._end_index, self._start_index})
184 |         else:
185 |             self._bleu = None
186 |         self._acc = Average()
187 |         self._no_result = Average()
188 | 
189 |         # remember to clear after evaluation
190 |         self.new_acc = []
191 |         self.angle = []
192 |         self.length = []
193 |         self.area = []
194 |         self.other = []
195 |         self.point_acc_list = []
196 |         self.save_results = dict()
197 | 
198 |         # At prediction time, we use a beam search to find the most likely sequence of target tokens.
199 |         beam_size = beam_size or 1
200 |         self._max_decoding_steps = max_decoding_steps
201 |         self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size)
202 | 
203 |         # Dense embedding of source vocab tokens.
204 |         self._source_embedder = source_embedder
205 | 
206 |         # Encodes the sequence of source embeddings into a sequence of hidden states.
207 |         self._encoder = encoder
208 | 
209 |         num_classes = self.vocab.get_vocab_size(self._target_namespace)
210 | 
211 |         # Attention mechanism applied to the encoder output for each step.
212 |         # TODO: attention
213 |         if attention:
214 |             if attention_function:
215 |                 raise ConfigurationError("You can only specify an attention module or an "
216 |                                          "attention function, but not both.")
217 |             self._attention = LegacyAttention()
218 |         elif attention_function:
219 |             self._attention = LegacyAttention(attention_function)
220 |         else:
221 |             self._attention = None
222 |             print("No Attention!")
223 |             exit()
224 | 
225 |         # Dense embedding of vocab words in the target space.
226 |         target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim()
227 |         self._target_embedder = Embedding(num_classes, target_embedding_dim)
228 | 
229 |         # Decoder output dim needs to be the same as the encoder output dim since we initialize the
230 |         # hidden state of the decoder with the final hidden state of the encoder.
231 |         self._encoder_output_dim = self._encoder.get_output_dim()
232 |         self._decoder_output_dim = self._encoder_output_dim
233 | 
234 |         if self._attention:
235 |             # If using attention, a weighted average over encoder outputs will be concatenated
236 |             # to the previous target embedding to form the input to the decoder at each
237 |             # time step.
238 |             self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim
239 |         else:
240 |             # Otherwise, the input to the decoder is just the previous target embedding.
241 |             self._decoder_input_dim = target_embedding_dim
242 | 
243 |         # We'll use an LSTM cell as the recurrent cell that produces a hidden state
244 |         # for the decoder at each time step.
245 |         self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim)
246 |         #self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim)
247 |         # We project the hidden state from the decoder into the output vocabulary space
248 |         # in order to get log probabilities of each target token, at each time step.
249 |         self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
250 |         # knowledge points
251 |         self.point_ratio = knowledge_points_ratio
252 |         if self.point_ratio != 0:
253 |             self.points_norm = LayerNorm(__C.FLAT_OUT_SIZE)
254 |             self.points_proj = nn.Linear(__C.FLAT_OUT_SIZE, 77)
255 |             self.points_criterion = nn.BCELoss()
256 | 
257 |     def take_step(self,
258 |                   last_predictions: torch.Tensor,
259 |                   state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
260 |         """
261 |         Take a decoding step. This is called by the beam search class.
262 | 
263 |         Parameters
264 |         ----------
265 |         last_predictions : ``torch.Tensor``
266 |             A tensor of shape ``(group_size,)``, which gives the indices of the predictions
267 |             during the last time step.
268 |         state : ``Dict[str, torch.Tensor]``
269 |             A dictionary of tensors that contain the current state information
270 |             needed to predict the next step, which includes the encoder outputs,
271 |             the source mask, and the decoder hidden state and context. Each of these
272 |             tensors has shape ``(group_size, *)``, where ``*`` can be any other number
273 |             of dimensions.
274 | 
275 |         Returns
276 |         -------
277 |         Tuple[torch.Tensor, Dict[str, torch.Tensor]]
278 |             A tuple of ``(log_probabilities, updated_state)``, where ``log_probabilities``
279 |             is a tensor of shape ``(group_size, num_classes)`` containing the predicted
280 |             log probability of each class for the next step, for each item in the group,
281 |             while ``updated_state`` is a dictionary of tensors containing the encoder outputs,
282 |             source mask, and updated decoder hidden state and context.
283 | 
284 |         Notes
285 |         -----
286 |             We treat the inputs as a batch, even though ``group_size`` is not necessarily
287 |             equal to ``batch_size``, since the group may contain multiple states
288 |             for each source sentence in the batch.
289 |         """
290 |         # shape: (group_size, num_classes)
291 |         output_projections, state = self._prepare_output_projections(last_predictions, state)
292 | 
293 |         # shape: (group_size, num_classes)
294 |         class_log_probabilities = F.log_softmax(output_projections, dim=-1)
295 | 
296 |         return class_log_probabilities, state
297 | 
298 |     @overrides
299 |     def forward(self,  # type: ignore
300 |                 image, source_nums, choice_nums, label, type, data_id, manual_program,
301 |                 source_tokens: Dict[str, torch.LongTensor],
302 |                 point_label = None,
303 |                 target_tokens: Dict[str, torch.LongTensor] = None, **kwargs) -> Dict[str, torch.Tensor]:
304 |         # pylint: disable=arguments-differ
305 |         """
306 |         Make foward pass with decoder logic for producing the entire target sequence.
307 | 
308 |         Parameters
309 |         ----------
310 |         source_tokens : ``Dict[str, torch.LongTensor]``
311 |            The output of `TextField.as_array()` applied on the source `TextField`. This will be
312 |            passed through a `TextFieldEmbedder` and then through an encoder.
313 |         target_tokens : ``Dict[str, torch.LongTensor]``, optional (default = None)
314 |            Output of `Textfield.as_array()` applied on target `TextField`. We assume that the
315 |            target tokens are also represented as a `TextField`.
316 | 
317 |         Returns
318 |         -------
319 |         Dict[str, torch.Tensor]
320 |         """
321 |         bs = len(label)
322 |         state = self._encode(source_tokens)
323 | 
324 |         with torch.no_grad():
325 |             img_feats = self.resnet(image)
326 |         # (N, C, 14, 14) -> (N, 196, C)
327 |         img_feats = img_feats.reshape(img_feats.shape[0], img_feats.shape[1], -1).transpose(1, 2)
328 |         img_mask = make_mask(img_feats)
329 |         img_feats = self.channel_transform(img_feats)
330 | 
331 |         lang_feats = state['encoder_outputs']
332 |         # mask the digital encoding question without embedding, i.e. source_tokens(already index to number)
333 |         lang_mask = make_mask(source_tokens['tokens'].unsqueeze(2))
334 | 
335 |         _, img_feats = self.mcan(lang_feats, img_feats, lang_mask, img_mask)
336 | 
337 |         # (N, 308, 512)
338 |         # for attention, image first and then lang, using mask
339 |         state['encoder_outputs'] = torch.cat([img_feats, lang_feats], 1)
340 | 
341 |         # decode
342 |         state = self._init_decoder_state(state, lang_feats, img_feats, img_mask)
343 |         output_dict = self._forward_loop(state, target_tokens)  # recurrent decoding for LSTM
344 | 
345 |         # knowledge points
346 |         if self.point_ratio != 0:
347 |             concat_feature = state["concat_feature"]
348 |             point_feat = self.points_norm(concat_feature)
349 |             point_feat = self.points_proj(point_feat)
350 |             point_pred = torch.sigmoid(point_feat)
351 |             point_loss = self.points_criterion(point_pred, point_label) * self.point_ratio
352 |             output_dict["point_pred"] = point_pred
353 |             output_dict["point_loss"] = point_loss
354 |             output_dict["loss"] += point_loss
355 | 
356 |         # if testing, beam search and evaluation
357 |         if not self.training:
358 |             # state = self._init_decoder_state(state)
359 |             state = self._init_decoder_state(state, lang_feats, img_feats, img_mask)  # TODO
360 |             predictions = self._forward_beam_search(state)
361 |             output_dict.update(predictions)
362 | 
363 |             if target_tokens and self._bleu:
364 |                 # shape: (batch_size, beam_size, max_sequence_length)
365 |                 top_k_predictions = output_dict["predictions"]
366 | 
367 |                 # execute the decode programs to calculate the accuracy
368 |                 suc_knt, no_knt, = 0, 0
369 | 
370 |                 selected_programs = []
371 |                 wrong_id = []
372 |                 noresult_id = []
373 | 
374 |                 for b in range(bs):
375 | 
376 |                     hypo = None
377 |                     used_hypo = None
378 |                     choice = None
379 |                     for i in range(self._beam_search.beam_size):
380 |                         if choice is not None:
381 |                             break
382 |                         hypo = list(top_k_predictions[b][i])
383 |                         if self._end_index in list(hypo):
384 |                             hypo = hypo[:hypo.index(self._end_index)]
385 |                         hypo = [self.vocab.get_token_from_index(idx.item()) for idx in hypo]
386 |                         res = self._equ.excuate_equation(hypo, source_nums[b])
387 | 
388 |                         if res is not None and len(res) > 0:
389 |                             for j in range(4):
390 |                                 if choice_nums[b][j] is not None and math.fabs(res[-1] - choice_nums[b][j]) < 0.001:
391 |                                     choice = j
392 |                                     used_hypo = hypo
393 |                     selected_programs.append([hypo])
394 |                     if choice is None:
395 |                         no_knt += 1
396 |                         answer_state = 'no_result'
397 |                         #no_result_id.append(data_id[b])
398 | 
399 |                         self.new_acc.append(0)
400 |                     elif choice == label[b]:
401 |                         suc_knt += 1
402 |                         answer_state = 'right'
403 |                         self.new_acc.append(1)
404 |                         right_id.append(data_id[b])
405 |                     else:
406 |                         answer_state = 'false'
407 |                         wrong_id.append(b)
408 |                         self.new_acc.append(0)
409 | 
410 |                     self.save_results[data_id[b]] = dict(manual_program=manual_program[b],
411 |                                                          predict_program=hypo, predict_res=res,
412 |                                                          choice=choice_nums[b], right_answer=label[b],
413 |                                                          answer_state=answer_state)
414 | 
415 |                     flag = 1 if choice == label[b] else 0
416 |                     if type[b] == 'angle':
417 |                         self.angle.append(flag)
418 |                     elif type[b] == 'length':
419 |                         self.length.append(flag)
420 |                     else:
421 |                         self.other.append(flag)
422 | 
423 |                     # knowledge points
424 |                     # if self.point_ratio != 0:
425 |                     #     point_acc = self.multi_label_evaluation(point_pred[b].unsqueeze(0), point_label[b].unsqueeze(0))
426 |                     #     self.point_acc_list.append(point_acc)
427 | 
428 |                 # with open('save/test.json', 'w') as f:
429 |                 #    json.dump(self.save_results, f)
430 | 
431 |                 if random.random() < 0.05:
432 |                     print('selected_programs', selected_programs)
433 |                 """
434 |                 for item in noresult_id:
435 |                     noresult_manual.append(selected_programs[item])
436 | 
437 |                 for item in wrong_id:
438 |                     wrong_manual.append(selected_programs[item])
439 | 
440 |                 print((wrong_manual),(noresult_manual))
441 |                 """
442 |                 # calculate BLEU
443 |                 best_predictions = top_k_predictions[:, 0, :]
444 |                 self._bleu(best_predictions, target_tokens["tokens"])
445 |                 self._acc(suc_knt / bs)
446 |                 self._no_result(no_knt / bs)
447 | 
448 |         print(right_id)
449 |         print(len(right_id))
450 |         return output_dict
451 | 
452 |     def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
453 |         """
454 |         Finalize predictions.
455 | 
456 |         This method overrides ``Model.decode``, which gets called after ``Model.forward``, at test
457 |         time, to finalize predictions. The logic for the decoder part of the encoder-decoder lives
458 |         within the ``forward`` method.
459 | 
460 |         This method trims the output predictions to the first end symbol, replaces indices with
461 |         corresponding tokens, and adds a field called ``predicted_tokens`` to the ``output_dict``.
462 |         """
463 |         predicted_indices = output_dict["predictions"]
464 |         if not isinstance(predicted_indices, numpy.ndarray):
465 |             predicted_indices = predicted_indices.detach().cpu().numpy()
466 |         all_predicted_tokens = []
467 |         for indices in predicted_indices:
468 |             # Beam search gives us the top k results for each source sentence in the batch
469 |             # but we just want the single best.
470 |             if len(indices.shape) > 1:
471 |                 indices = indices[0]
472 |             indices = list(indices)
473 |             # Collect indices till the first end_symbol
474 |             if self._end_index in indices:
475 |                 indices = indices[:indices.index(self._end_index)]
476 |             predicted_tokens = [self.vocab.get_token_from_index(x, namespace=self._target_namespace)
477 |                                 for x in indices]
478 |             all_predicted_tokens.append(predicted_tokens)
479 |         output_dict["predicted_tokens"] = all_predicted_tokens
480 |         return output_dict
481 | 
482 |     def _encode(self, source_tokens: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
483 |         # shape: (batch_size, max_input_sequence_length, encoder_input_dim)
484 |         #embedded_input = self._source_embedder(source_tokens)
485 |         # shape: (batch_size, max_input_sequence_length)
486 |         source_mask = util.get_text_field_mask(source_tokens)
487 | 
488 |         img_mask = torch.ones(source_mask.shape[0], 196).long().cuda()
489 |         concat_mask = torch.cat([img_mask, source_mask], 1)
490 | 
491 |         # shape: (batch_size, max_input_sequence_length, encoder_output_dim)
492 |         #encoder_outputs = self._encoder(embedded_input, source_mask)
493 |         encoder_outputs = self._encoder(source_tokens['tokens'], source_mask)
494 | 
495 |         return {
496 |                 "source_mask": source_mask,  # source_mask,
497 |                 "concat_mask": concat_mask,
498 |                 "encoder_outputs": encoder_outputs,
499 |         }
500 | 
501 |     def _init_decoder_state(self, state, lang_feats, img_feats, img_mask):
502 | 
503 |         batch_size = state["source_mask"].size(0)
504 |         final_lang_feat = util.get_final_encoder_states(
505 |                         lang_feats,
506 |                         state["source_mask"],
507 |                         self._encoder.is_bidirectional())
508 |         img_feat = self.attflat_img(img_feats, img_mask)
509 |         feat = torch.cat([final_lang_feat, img_feat], 1)
510 |         feat = self.decode_transform(feat)
511 |         state["concat_feature"] = feat
512 | 
513 |         state["decoder_hidden"] = feat
514 |         # C0 shape: (batch_size, decoder_output_dim)
515 |         state["decoder_context"] = torch.zeros(batch_size, self._decoder_output_dim).cuda()
516 |         # state["decoder_context"] = state["encoder_outputs"].new_zeros(batch_size, self._decoder_output_dim)
517 |         return state
518 | 
519 |     def _forward_loop(self,
520 |                       state: Dict[str, torch.Tensor],
521 |                       target_tokens: Dict[str, torch.LongTensor] = None) -> Dict[str, torch.Tensor]:
522 |         """
523 |         Make forward pass during training or do greedy search during prediction.
524 | 
525 |         Notes
526 |         -----
527 |         We really only use the predictions from the method to test that beam search
528 |         with a beam size of 1 gives the same results.
529 |         """
530 |         # shape: (batch_size, max_input_sequence_length)
531 |         source_mask = state["source_mask"]
532 | 
533 |         batch_size = source_mask.size()[0]
534 | 
535 |         if target_tokens:
536 |             # shape: (batch_size, max_target_sequence_length)
537 |             targets = target_tokens["tokens"]
538 | 
539 |             _, target_sequence_length = targets.size()
540 | 
541 |             # The last input from the target is either padding or the end symbol.
542 |             # Either way, we don't have to process it.
543 |             num_decoding_steps = target_sequence_length - 1
544 |         else:
545 |             num_decoding_steps = self._max_decoding_steps
546 | 
547 |         # Initialize target predictions with the start index.
548 |         # shape: (batch_size,)
549 |         last_predictions = source_mask.new_full((batch_size,), fill_value=self._start_index)
550 | 
551 |         step_logits: List[torch.Tensor] = []
552 |         step_predictions: List[torch.Tensor] = []
553 |         for timestep in range(num_decoding_steps):
554 |             if self.training and torch.rand(1).item() < self._scheduled_sampling_ratio:
555 |                 # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio
556 |                 # during training.
557 |                 # shape: (batch_size,)
558 |                 input_choices = last_predictions
559 |             elif not target_tokens:
560 |                 # shape: (batch_size,)
561 |                 input_choices = last_predictions
562 |             else:
563 |                 # shape: (batch_size,)
564 |                 input_choices = targets[:, timestep]
565 | 
566 |             # shape: (batch_size, num_classes)
567 |             # recurrent decoding
568 |             output_projections, state = self._prepare_output_projections(input_choices, state)
569 | 
570 |             # list of tensors, shape: (batch_size, 1, num_classes)
571 |             step_logits.append(output_projections.unsqueeze(1))
572 | 
573 |             # shape: (batch_size, num_classes)
574 |             class_probabilities = F.softmax(output_projections, dim=-1)
575 | 
576 |             # shape (predicted_classes): (batch_size,)
577 |             _, predicted_classes = torch.max(class_probabilities, 1)
578 | 
579 |             # shape (predicted_classes): (batch_size,)
580 |             last_predictions = predicted_classes
581 | 
582 |             step_predictions.append(last_predictions.unsqueeze(1))
583 | 
584 |         # shape: (batch_size, num_decoding_steps)
585 |         predictions = torch.cat(step_predictions, 1)
586 | 
587 |         output_dict = {"predictions": predictions}
588 | 
589 |         if target_tokens:
590 |             # shape: (batch_size, num_decoding_steps, num_classes)
591 |             logits = torch.cat(step_logits, 1)
592 | 
593 |             # Compute loss.
594 |             target_mask = util.get_text_field_mask(target_tokens)
595 |             loss = self._get_loss(logits, targets, target_mask)
596 |             output_dict["loss"] = loss
597 | 
598 |         return output_dict
599 | 
600 |     def _forward_beam_search(self, state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
601 |         """Make forward pass during prediction using a beam search."""
602 |         batch_size = state["source_mask"].size()[0]
603 |         start_predictions = state["source_mask"].new_full((batch_size,), fill_value=self._start_index)
604 | 
605 |         # shape (all_top_k_predictions): (batch_size, beam_size, num_decoding_steps)
606 |         # shape (log_probabilities): (batch_size, beam_size)
607 |         all_top_k_predictions, log_probabilities = self._beam_search.search(
608 |                 start_predictions, state, self.take_step)
609 | 
610 |         output_dict = {
611 |                 "class_log_probabilities": log_probabilities,
612 |                 "predictions": all_top_k_predictions,
613 |         }
614 |         return output_dict
615 | 
616 |     def _prepare_output_projections(self,
617 |                                     last_predictions: torch.Tensor,
618 |                                     state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:  # pylint: disable=line-too-long
619 |         """
620 |         Decode current state and last prediction to produce produce projections
621 |         into the target space, which can then be used to get probabilities of
622 |         each target token for the next step.
623 | 
624 |         Inputs are the same as for `take_step()`.
625 |         """
626 |         # shape: (group_size, max_input_sequence_length, encoder_output_dim)
627 |         encoder_outputs = state["encoder_outputs"]
628 | 
629 |         # shape: (group_size, max_input_sequence_length)
630 |         # source_mask = state["source_mask"]
631 |         source_mask = state["concat_mask"]
632 | 
633 |         # decoder_hidden and decoder_context are get from encoder_outputs in _init_decoder_state()
634 |         # shape: (group_size, decoder_output_dim)
635 |         decoder_hidden = state["decoder_hidden"]
636 |         # shape: (group_size, decoder_output_dim)
637 |         decoder_context = state["decoder_context"]
638 | 
639 |         # shape: (group_size, target_embedding_dim)
640 |         embedded_input = self._target_embedder(last_predictions)
641 | 
642 |         if self._attention:
643 |             # shape: (group_size, encoder_output_dim)
644 |             attended_input = self._prepare_attended_input(decoder_hidden, encoder_outputs, source_mask)
645 | 
646 |             # shape: (group_size, decoder_output_dim + target_embedding_dim)
647 |             decoder_input = torch.cat((attended_input, embedded_input), -1)
648 | 
649 |         else:
650 |             # shape: (group_size, target_embedding_dim)
651 |             decoder_input = embedded_input
652 | 
653 |         # shape (decoder_hidden): (batch_size, decoder_output_dim)
654 |         # shape (decoder_context): (batch_size, decoder_output_dim)
655 | 
656 |         decoder_hidden, decoder_context = self._decoder_cell(
657 |                 decoder_input,
658 |                 (decoder_hidden, decoder_context))
659 | 
660 |         state["decoder_hidden"] = decoder_hidden
661 |         state["decoder_context"] = decoder_context
662 | 
663 |         # shape: (group_size, num_classes)
664 |         output_projections = self._output_projection_layer(decoder_hidden)
665 |         """
666 |         decoder_hidden = self._decoder_cell(
667 |             decoder_input,
668 |             (decoder_hidden))
669 | 
670 |         state["decoder_hidden"] = decoder_hidden
671 |         state["decoder_context"] = decoder_hidden
672 | 
673 |         # shape: (group_size, num_classes)
674 |         output_projections = self._output_projection_layer(decoder_hidden)
675 |         """
676 |         return output_projections, state
677 | 
678 |     def _prepare_attended_input(self,
679 |                                 decoder_hidden_state: torch.LongTensor = None,
680 |                                 encoder_outputs: torch.LongTensor = None,
681 |                                 encoder_outputs_mask: torch.LongTensor = None) -> torch.Tensor:
682 |         """Apply attention over encoder outputs and decoder state."""
683 |         # Ensure mask is also a FloatTensor. Or else the multiplication within
684 |         # attention will complain.
685 |         # shape: (batch_size, max_input_sequence_length)
686 |         encoder_outputs_mask = encoder_outputs_mask.float()
687 | 
688 |         # shape: (batch_size, max_input_sequence_length)
689 |         input_weights = self._attention(
690 |                 decoder_hidden_state, encoder_outputs, encoder_outputs_mask)
691 | 
692 |         # shape: (batch_size, encoder_output_dim)
693 |         attended_input = util.weighted_sum(encoder_outputs, input_weights)
694 | 
695 |         return attended_input
696 | 
697 |     def multi_label_evaluation(self, input, target):
698 |         one = torch.ones(target.shape).cuda()
699 |         zero = torch.zeros(target.shape).cuda()
700 |         res = torch.where(input > 0.5, one, zero)
701 | 
702 |         over = (res * target).sum(dim=1)
703 |         union = res.sum(dim=1) + target.sum(dim=1) - over
704 |         acc = over / union
705 | 
706 |         index = torch.isnan(acc)  # nan appear when both pred and target are zeros, which means makes right answer
707 |         acc_fix = torch.where(index, torch.ones(acc.shape).cuda(), acc)
708 | 
709 |         acc_sum = acc_fix.sum().item()
710 | 
711 |         return acc_sum
712 | 
713 |     @staticmethod
714 |     def _get_loss(logits: torch.LongTensor,
715 |                   targets: torch.LongTensor,
716 |                   target_mask: torch.LongTensor) -> torch.Tensor:
717 |         """
718 |         Compute loss.
719 | 
720 |         Takes logits (unnormalized outputs from the decoder) of size (batch_size,
721 |         num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1)
722 |         and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross
723 |         entropy loss while taking the mask into account.
724 | 
725 |         The length of ``targets`` is expected to be greater than that of ``logits`` because the
726 |         decoder does not need to compute the output corresponding to the last timestep of
727 |         ``targets``. This method aligns the inputs appropriately to compute the loss.
728 | 
729 |         During training, we want the logit corresponding to timestep i to be similar to the target
730 |         token from timestep i + 1. That is, the targets should be shifted by one timestep for
731 |         appropriate comparison.  Consider a single example where the target has 3 words, and
732 |         padding is to 7 tokens.
733 |            The complete sequence would correspond to <S> w1  w2  w3  <E> <P> <P>
734 |            and the mask would be                     1   1   1   1   1   0   0
735 |            and let the logits be                     l1  l2  l3  l4  l5  l6
736 |         We actually need to compare:
737 |            the sequence           w1  w2  w3  <E> <P> <P>
738 |            with masks             1   1   1   1   0   0
739 |            against                l1  l2  l3  l4  l5  l6
740 |            (where the input was)  <S> w1  w2  w3  <E> <P>
741 |         """
742 |         # shape: (batch_size, num_decoding_steps)
743 |         relevant_targets = targets[:, 1:].contiguous()
744 | 
745 |         # shape: (batch_size, num_decoding_steps)
746 |         relevant_mask = target_mask[:, 1:].contiguous()
747 | 
748 |         return util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask)
749 | 
750 |     @overrides
751 |     def get_metrics(self, reset: bool = False) -> Dict[str, float]:
752 |         all_metrics: Dict[str, float] = {}
753 |         if self._bleu and not self.training:
754 |             all_metrics.update(self._bleu.get_metric(reset=reset))
755 |         # all_metrics.update({'acc': self._acc.get_metric(reset=reset)})
756 |         all_metrics.update({'acc': self._acc.get_metric(reset=reset)})
757 |         if len(self.new_acc) != 0:
758 |             all_metrics.update({'new_acc': sum(self.new_acc)/len(self.new_acc)})
759 |         print('Num of total, angle, len, other', len(self.new_acc), len(self.angle), len(self.length), len(self.other))
760 |         if len(self.angle) != 0:
761 |             all_metrics.update({'angle_acc': sum(self.angle)/len(self.angle)})
762 |         if len(self.length) != 0:
763 |             all_metrics.update({'length_acc': sum(self.length)/len(self.length)})
764 |         if len(self.other) != 0:
765 |             all_metrics.update({'other_acc': sum(self.other)/len(self.other)})
766 |         all_metrics.update({'no_result': self._no_result.get_metric(reset=reset)})
767 | 
768 |         # if len(self.point_acc_list) != 0:
769 |         #     all_metrics.update({'point_acc': sum(self.point_acc_list) / len(self.point_acc_list)})
770 | 
771 |         return all_metrics
772 | 


--------------------------------------------------------------------------------
/GeoQA+/config/NGS_Aux.json:
--------------------------------------------------------------------------------
 1 | {
 2 |      "dataset_reader": {
 3 |         "type": "s2s_manual_reader",
 4 |         "tokenizer": {
 5 |         "word_splitter":{
 6 |         "type": "just_spaces"
 7 |            }
 8 |        },
 9 |         "source_token_indexer": {
10 |           "tokens": {
11 |           "type": "pretrained_transformer",
12 |           "model_name": "data/pretrain/Roberta",
13 |             "do_lowercase": false
14 |                     }
15 |         },
16 |       "target_token_indexer": {
17 |      "tokens": {
18 |         "type": "single_id"
19 |       }
20 |       }
21 |     },
22 | 
23 |   "train_data_path": "data/GeoQA2.2/train.pk",
24 |   "validation_data_path": "data/GeoQA2.2/dev.pk",
25 |   "test_data_path" : "data/GeoQA2.2/test.pk",
26 |     "model": {
27 |     "type": "geo_s2s",
28 |     "max_decoding_steps": 16,
29 |     "beam_size": 10,
30 | 
31 |     "target_embedding_dim": 512,
32 |     "scheduled_sampling_ratio": 0,
33 |     "resnet_pretrained": "data/pretrain/best_jigsaw_model_state_dict",
34 |     "knowledge_points_ratio": 0,
35 |      "source_embedder": {
36 |         "token_embedders": {
37 | 
38 |         }
39 |             },
40 |     "encoder": {
41 |         "input_dim": 21128,
42 |         "emb_dim": 768,
43 |         "hid_dim": 512,
44 |         "dropout": 0.5
45 |     }
46 | },
47 |     "iterator": {
48 |     "type": "basic",
49 |     "batch_size": 32
50 |   },
51 |      "trainer": {
52 |     "validation_metric": "+acc",
53 |        "learning_rate_scheduler": {
54 |       "type": "reduce_on_plateau",
55 |       "factor": 0.5,
56 |       "mode": "max",
57 |       "patience": 5
58 |     },
59 |     "num_epochs": 100,
60 |     "grad_norm": 10.0,
61 |     "cuda_device": 0,
62 | 
63 |     "optimizer": {
64 |       "type": "adam",
65 |       "lr": 1e-3,
66 |       "parameter_groups": [
67 |         [["mcan", "channel_transform", "attflat_img", "attflat_lang", "decode_transform"], {"lr": 1e-5}],
68 |         [["resnet"], {"lr": 1e-5}],
69 |         [["source_embedder","encoder.embedding"],{"lr": 2e-5}],
70 |         [[ "encoder.concat_trans", "encoder.lstm_embedding","encoder.trans", "encoder.norm", "encoder.concat_norm"],{"lr": 1e-3}]
71 |       ]
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/GeoQA+/config/txt:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/GeoQA+/data/GeoQA2.2/dev.pk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCNU203/GeoQA-Plus/0d0525766a9c22bd097554651e31030e503a8d67/GeoQA+/data/GeoQA2.2/dev.pk


--------------------------------------------------------------------------------
/GeoQA+/data/GeoQA2.2/test.pk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCNU203/GeoQA-Plus/0d0525766a9c22bd097554651e31030e503a8d67/GeoQA+/data/GeoQA2.2/test.pk


--------------------------------------------------------------------------------
/GeoQA+/data/pretrain/txt:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/GeoQA+/data/sub_dataset_dict.pk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCNU203/GeoQA-Plus/0d0525766a9c22bd097554651e31030e503a8d67/GeoQA+/data/sub_dataset_dict.pk


--------------------------------------------------------------------------------
/GeoQA+/data/tokens.txt:
--------------------------------------------------------------------------------
   1 | @@UNKNOWN@@
   2 | ,
   3 | .
   4 | A
   5 | B
   6 | C
   7 | D
   8 | 的
   9 | °
  10 | }
  11 | {
  12 | 0
  13 | 2
  14 | 1
  15 | 3
  16 | 5
  17 | =
  18 | c
  19 | )
  20 | (
  21 | 点
  22 | 图
  23 | ∠
  24 | 为
  25 | 4
  26 | 如
  27 | O
  28 | E
  29 | 则
  30 | 6
  31 | a
  32 | 是
  33 | m
  34 | \
  35 | r
  36 | f
  37 | √
  38 | 在
  39 | 于
  40 | 中
  41 | 8
  42 | 形
  43 | ⊙
  44 | 长
  45 | 、
  46 | 上
  47 | F
  48 | 线
  49 | △
  50 | 一
  51 | 度
  52 | 边
  53 | 直
  54 | 7
  55 | P
  56 | 面
  57 | 角
  58 | 分
  59 | 若
  60 | 9
  61 | 圆
  62 | 径
  63 | 数
  64 | π
  65 | 交
  66 | 米
  67 | 个
  68 | ~
  69 | 平
  70 | 与
  71 | 方
  72 | ．
  73 | 积
  74 | 等
  75 | 小
  76 | 别
  77 | 知
  78 | 半
  79 | 三
  80 | 接
  81 | 已
  82 | 正
  83 | M
  84 | 切
  85 | ∥
  86 | +
  87 | :
  88 | 相
  89 | 高
  90 | 四
  91 | 且
  92 | -
  93 | 两
  94 | 所
  95 | ⊥
  96 | 到
  97 | n
  98 | 值
  99 | 测
 100 | 示
 101 | G
 102 | ^
 103 | 连
 104 | t
 105 | 得
 106 | N
 107 | 弦
 108 | 顶
 109 | 和
 110 | 坡
 111 | 时
 112 | 大
 113 | 处
 114 | 部
 115 | 过
 116 | 内
 117 | 距
 118 | 行
 119 | 作
 120 | 以
 121 | 对
 122 | 心
 123 | 离
 124 | 延
 125 | ²
 126 | s
 127 | 影
 128 | 动
 129 | 段
 130 | 周
 131 | 体
 132 | 地
 133 | ′
 134 | 么
 135 | 那
 136 | 向
 137 | 有
 138 | 条
 139 | S
 140 | i
 141 | _
 142 | 水
 143 | 这
 144 | 将
 145 | l
 146 | R
 147 | H
 148 | 垂
 149 | 成
 150 | 锥
 151 | 最
 152 | 阴
 153 | 位
 154 | 同
 155 | 底
 156 | 斜
 157 | 果
 158 | 不
 159 | 下
 160 | 弧
 161 | 矩
 162 | 外
 163 | Q
 164 | 从
 165 | 视
 166 | 沿
 167 | 标
 168 | 量
 169 | ≈
 170 | 扇
 171 | 树
 172 | 菱
 173 | 杆
 174 | 楼
 175 | 某
 176 | 纸
 177 | 何
 178 | b
 179 | 其
 180 | 学
 181 | 几
 182 | 端
 183 | 格
 184 | 侧
 185 | 放
 186 | 置
 187 | ⁀
 188 | 用
 189 | 使
 190 | 后
 191 | 板
 192 | ▱
 193 | α
 194 | 坐
 195 | 都
 196 | 子
 197 | 仰
 198 | 据
 199 | o
 200 | 结
 201 | 出
 202 | 山
 203 | 该
 204 | 了
 205 | 轴
 206 | 足
 207 | 转
 208 | 重
 209 | 此
 210 | 间
 211 | x
 212 | 确
 213 | 之
 214 | 明
 215 | 当
 216 | 或
 217 | 比
 218 | 尺
 219 | 网
 220 | 把
 221 | 旗
 222 | 宽
 223 | 定
 224 | 它
 225 | 路
 226 | 海
 227 | 好
 228 | 梯
 229 | 塔
 230 | 旋
 231 | 可
 232 | 合
 233 | 要
 234 | 表
 235 | 能
 236 | 片
 237 | 意
 238 | 移
 239 | 经
 240 | 根
 241 | 块
 242 | 计
 243 | 俯
 244 | 走
 245 | 折
 246 | 落
 247 | 达
 248 | 叠
 249 | 并
 250 | 偏
 251 | 发
 252 | 射
 253 | 建
 254 | 运
 255 | 北
 256 | 第
 257 | 约
 258 | 里
 259 | "
 260 | 针
 261 | 东
 262 | 灯
 263 | 截
 264 | 前
 265 | 恰
 266 | 球
 267 | 开
 268 | 着
 269 | 由
 270 | y
 271 | 他
 272 | 次
 273 | 绕
 274 | 均
 275 | 物
 276 | 道
 277 | 似
 278 | 组
 279 | 任
 280 | 看
 281 | 系
 282 | 柱
 283 | 按
 284 | 再
 285 | 河
 286 | 腰
 287 | 考
 288 | 现
 289 | 无
 290 | 围
 291 | 参
 292 | 至
 293 | 取
 294 | 画
 295 | 设
 296 | 木
 297 | 台
 298 | 列
 299 | 五
 300 | $
 301 | 法
 302 | 电
 303 | 公
 304 | 剪
 305 | 单
 306 | ；
 307 | 观
 308 | 筑
 309 | 应
 310 | 光
 311 | 人
 312 | 种
 313 | 校
 314 | 横
 315 | 全
 316 | 断
 317 | k
 318 | 每
 319 | /
 320 | 船
 321 | 夹
 322 | 关
 323 | 多
 324 | 称
 325 | 原
 326 | 墙
 327 | 去
 328 | 进
 329 | 工
 330 | 速
 331 | 少
 332 | h
 333 | 共
 334 | 器
 335 | 棵
 336 | 们
 337 | 张
 338 | 西
 339 | 母
 340 | 左
 341 | 立
 342 | 竹
 343 | 桌
 344 | e
 345 | 铁
 346 | 求
 347 | 含
 348 | 展
 349 | 坝
 350 | 身
 351 | 桥
 352 | 站
 353 | 右
 354 | 厘
 355 | 竿
 356 | 起
 357 | 顺
 358 | 油
 359 | 精
 360 | ”
 361 | 六
 362 | 棱
 363 | 堤
 364 | 需
 365 | 管
 366 | 区
 367 | ×
 368 | 南
 369 | 主
 370 | 然
 371 | 算
 372 | 机
 373 | g
 374 | 自
 375 | 制
 376 | 来
 377 | “
 378 | 被
 379 | 另
 380 | 秒
 381 | 邻
 382 | 阶
 383 | 场
 384 | 脚
 385 | 皮
 386 | 互
 387 | 航
 388 | 保
 389 | 变
 390 | 轮
 391 | 车
 392 | I
 393 | 样
 394 | 市
 395 | 口
 396 | 岸
 397 | 飞
 398 | 各
 399 | 做
 400 | 帽
 401 | 件
 402 | 天
 403 | 想
 404 | 入
 405 | d
 406 | 阳
 407 | 式
 408 | 升
 409 | 镜
 410 | 滑
 411 | 副
 412 | 活
 413 | 步
 414 | 刻
 415 | 古
 416 | 又
 417 | ∽
 418 | 逆
 419 | 园
 420 | 摆
 421 | 整
 422 | 短
 423 | 照
 424 | 缝
 425 | 她
 426 | 气
 427 | K
 428 | ①
 429 | ②
 430 | 状
 431 | 象
 432 | 号
 433 | 座
 434 | 头
 435 | 池
 436 | 空
 437 | 二
 438 | 我
 439 | '
 440 | 反
 441 | 利
 442 | ＜
 443 | 甲
 444 | 排
 445 | 规
 446 | 劣
 447 | 教
 448 | 乙
 449 | 迎
 450 | 花
 451 | 留
 452 | 绳
 453 | 臂
 454 | 程
 455 | 风
 456 | 门
 457 | 桶
 458 | 察
 459 | 依
 460 | 锐
 461 | 深
 462 | 岛
 463 | 目
 464 | 优
 465 | 架
 466 | 即
 467 | 近
 468 | 信
 469 | 城
 470 | 具
 471 | 忽
 472 | 略
 473 | p
 474 | 草
 475 | 生
 476 | 选
 477 | L
 478 | 余
 479 | 家
 480 | 华
 481 | 景
 482 | 房
 483 | 装
 484 | ？
 485 | 先
 486 | 国
 487 | 刚
 488 | β
 489 | 打
 490 | 拉
 491 | 论
 492 | 加
 493 | 构
 494 | 字
 495 | 环
 496 | 实
 497 | 而
 498 | 塘
 499 | 色
 500 | T
 501 | 只
 502 | 广
 503 | 钟
 504 | 窗
 505 | 仪
 506 | 案
 507 | 艘
 508 | ③
 509 | 金
 510 | ≤
 511 | 通
 512 | 隧
 513 | 满
 514 | 化
 515 | 也
 516 | 靠
 517 | 新
 518 | 索
 519 | 游
 520 | 倾
 521 | 兴
 522 | 钢
 523 | 包
 524 | 盒
 525 | 寸
 526 | 些
 527 | 记
 528 | 问
 529 | …
 530 | ＞
 531 | －
 532 | 续
 533 | 眼
 534 | 年
 535 | 扶
 536 | 美
 537 | 始
 538 | •
 539 | 住
 540 | 完
 541 | 型
 542 | 热
 543 | 元
 544 | 师
 545 | 课
 546 | 料
 547 | 员
 548 | 伞
 549 | 村
 550 | 准
 551 | 拱
 552 | 拼
 553 | ≌
 554 | 固
 555 | 域
 556 | 布
 557 | u
 558 | 趣
 559 | 理
 560 | 继
 561 | 石
 562 | 级
 563 | 扫
 564 | 支
 565 | 黄
 566 | 带
 567 | 红
 568 | 商
 569 | 竖
 570 | 棒
 571 | 限
 572 | 像
 573 | θ
 574 | ④
 575 | 杯
 576 | 太
 577 | 栋
 578 | 名
 579 | 持
 580 | 湖
 581 | 模
 582 | 铺
 583 | 星
 584 | 代
 585 | 手
 586 | 函
 587 | 读
 588 | 律
 589 | 题
 590 | 己
 591 | 李
 592 | 渔
 593 | 及
 594 | 庆
 595 | 睛
 596 | 牌
 597 | 异
 598 | 遮
 599 | 盖
 600 | 会
 601 | 击
 602 | 说
 603 | 备
 604 | 千
 605 | 痕
 606 | 改
 607 | 修
 608 | 找
 609 | 望
 610 | 黑
 611 | 零
 612 | 驶
 613 | 拍
 614 | 跷
 615 | 割
 616 | 例
 617 | 造
 618 | 爬
 619 | 停
 620 | 力
 621 | 八
 622 | 估
 623 | 艇
 624 | 户
 625 | 划
 626 | 株
 627 | 干
 628 | ＋
 629 | 请
 630 | 毯
 631 | 解
 632 | 术
 633 | 践
 634 | 判
 635 | 就
 636 | <
 637 | 壁
 638 | 王
 639 | 补
 640 | 翻
 641 | 库
 642 | 室
 643 | 调
 644 | 注
 645 | 剩
 646 | 末
 647 | 斗
 648 | 终
 649 | 止
 650 | 往
 651 | 亮
 652 | 植
 653 | 输
 654 | 槽
 655 | 者
 656 | 丽
 657 | 习
 658 | 白
 659 | 安
 660 | ’
 661 | 弯
 662 | 栏
 663 | 品
 664 | 较
 665 | 今
 666 | 井
 667 | 九
 668 | 民
 669 | →
 670 | 显
 671 | 日
 672 | 笔
 673 | 旁
 674 | 烟
 675 | 囱
 676 | 你
 677 | 倍
 678 | 错
 679 | 假
 680 | 贴
 681 | 漏
 682 | 倒
 683 | 远
 684 | 指
 685 | 探
 686 | 投
 687 | q
 688 | 基
 689 | 筒
 690 | 隔
 691 | 随
 692 | 框
 693 | 性
 694 | 十
 695 | 午
 696 | 盘
 697 | 触
 698 | 月
 699 | 损
 700 | 乘
 701 | 效
 702 | ⌒
 703 | 铅
 704 | 雪
 705 | 降
 706 | 细
 707 | 识
 708 | 材
 709 | 钉
 710 | ·
 711 | 孔
 712 | 情
 713 | 货
 714 | 引
 715 | 桩
 716 | 撑
 717 | 冰
 718 | 踏
 719 | 绿
 720 | 告
 721 | 差
 722 | 文
 723 | ㎝
 724 | 著
 725 | 源
 726 | 林
 727 | 压
 728 | 丝
 729 | 《
 730 | 》
 731 | 误
 732 | 容
 733 | 传
 734 | 况
 735 | 裁
 736 | 束
 737 | 换
 738 | 致
 739 | 搭
 740 | 江
 741 | 层
 742 | 泡
 743 | 屋
 744 | 玻
 745 | 璃
 746 | Z
 747 | 傅
 748 | 给
 749 | 够
 750 | 述
 751 | 节
 752 | 马
 753 | 超
 754 | 范
 755 | 棋
 756 | 常
 757 | 跳
 758 | 缘
 759 | 耗
 760 | 办
 761 | 客
 762 | |
 763 | 防
 764 | 摩
 765 | 退
 766 | 港
 767 | 推
 768 | 箭
 769 | 厚
 770 | 老
 771 | 七
 772 | ∶
 773 | 操
 774 | 见
 775 | 军
 776 | 亭
 777 | 缩
 778 | 滚
 779 | 锯
 780 | 幢
 781 | 居
 782 | 坛
 783 | 综
 784 | 章
 785 | 钝
 786 | 便
 787 | 班
 788 | 络
 789 | 背
 790 | 勾
 791 | 志
 792 | 强
 793 | 登
 794 | 受
 795 | 礼
 796 | 残
 797 | 螺
 798 | 卷
 799 | 雨
 800 | 彩
 801 | 伸
 802 | 芳
 803 | 挂
 804 | 雕
 805 | 峰
 806 | 封
 807 | 救
 808 | 州
 809 | 本
 810 | 堆
 811 | □
 812 | 送
 813 | 罐
 814 | 丙
 815 | 试
 816 | 晚
 817 | 颖
 818 | 馆
 819 | 质
 820 | 朵
 821 | 队
 822 | 览
 823 | 卡
 824 | 梁
 825 | 曲
 826 | 首
 827 | 提
 828 | 岩
 829 | 坪
 830 | 险
 831 | [
 832 | 拐
 833 | 但
 834 | 帮
 835 | 露
 836 | 回
 837 | 虚
 838 | 紧
 839 | 快
 840 | 除
 841 | 涂
 842 | 没
 843 | 股
 844 | 序
 845 | 克
 846 | 还
 847 | 响
 848 | 际
 849 | 匀
 850 | 虑
 851 | 诞
 852 | 凉
 853 | 爸
 854 | 拦
 855 | 答
 856 | 缆
 857 | z
 858 | v
 859 | 庄
 860 | 寻
 861 | ≠
 862 | 买
 863 | ]
 864 | 蚂
 865 | 蚁
 866 | 很
 867 | 箱
 868 | 玲
 869 | 剖
 870 | 纵
 871 | 毛
 872 | 辆
 873 | 幅
 874 | 弹
 875 | 弓
 876 | 附
 877 | 越
 878 | 牧
 879 | 菜
 880 | 轨
 881 | 净
 882 | 拴
 883 | 叫
 884 | 土
 885 | 街
 886 | 期
 887 | 鹰
 888 | 塑
 889 | 堂
 890 | 监
 891 | 鱼
 892 | 搜
 893 | 舰
 894 | 供
 895 | 坎
 896 | 佛
 897 | 总
 898 | 境
 899 | 粗
 900 | ⁰
 901 | 证
 902 | 既
 903 | 院
 904 | 临
 905 | 简
 906 | 择
 907 | 适
 908 | 助
 909 | 雷
 910 | 因
 911 | 珠
 912 | 颜
 913 | 陆
 914 | 覆
 915 | 低
 916 | 义
 917 | 叉
 918 | 玩
 919 | 演
 920 | 圣
 921 | 淇
 922 | 淋
 923 | 农
 924 | 捣
 925 | 书
 926 | 施
 927 | 息
 928 | 闭
 929 | 挡
 930 | 胜
 931 | 缺
 932 | 秋
 933 | 羊
 934 | 占
 935 | 幕
 936 | 份
 937 | V
 938 | 瓶
 939 | 液
 940 | 悬
 941 | 赏
 942 | ≥
 943 | 衣
 944 | 洛
 945 | 薄
 946 | 刷
 947 | 护
 948 | 伟
 949 | 挥
 950 | 厦
 951 | 宝
 952 | 购
 953 | 抛
 954 | 攀
 955 | 梢
 956 | 复
 957 | X
 958 | 添
 959 | 增
 960 | 佳
 961 | 稳
 962 | 荆
 963 | Ð
 964 | 戏
 965 | 府
 966 | 博
 967 | 赛
 968 | 暗
 969 | 失
 970 | γ
 971 | 流
 972 | 童
 973 | 双
 974 | 收
 975 | 团
 976 | 轻
 977 | 呈
 978 | 蛋
 979 | 碎
 980 | 拿
 981 | 插
 982 | 筝
 983 | 罩
 984 | 决
 985 | 敏
 986 | 丁
 987 | ○
 988 | 丄
 989 | 喷
 990 | 屏
 991 | 警
 992 | 裂
 993 | 京
 994 | >
 995 | 扩
 996 | 丈
 997 | 舞
 998 | 店
 999 | 堵
1000 | 绘
1001 | 业
1002 | 疑
1003 | 政
1004 | 渠
1005 | 火
1006 | 刀
1007 | 哨
1008 | 阅
1009 | 旧
1010 | 盆
1011 | 厂
1012 | 思
1013 | 杂
1014 | 项
1015 | 必
1016 | 报
1017 | 属
1018 | 礁
1019 | 巨
1020 | 盛
1021 | 碰
1022 | 乐
1023 | 采
1024 | 蔬
1025 | 毡
1026 | 洋
1027 | 士
1028 | 峙
1029 | 迹
1030 | 事
1031 | 杠
1032 | 跨
1033 | 宾
1034 | ⨀
1035 | 写
1036 | 初
1037 | ʹ
1038 | 究
1039 | j
1040 | 晾
1041 | U
1042 | J
1043 | 膜
1044 | 餐
1045 | 亚
1046 | 央
1047 | 援
1048 | 温
1049 | 巡
1050 | 楔
1051 | 朝
1052 | 码
1053 | 勘
1054 | 鹅
1055 | 界
1056 | 巧
1057 | 梅
1058 | 括
1059 | 镇
1060 | 隙
1061 | 营
1062 | 挖
1063 | 特
1064 | 医
1065 | 吸
1066 | 拟
1067 | 须
1068 | 爱
1069 | 育
1070 | 闸
1071 | 慧
1072 | 棚
1073 | 腿
1074 | 牛
1075 | 骨
1076 | 类
1077 | 穿
1078 | 莹
1079 | 尖
1080 | 抽
1081 | 刘
1082 | 陈
1083 | 拔
1084 | 虎
1085 | 枪
1086 | 檐
1087 | 厅
1088 | 英
1089 | 早
1090 | 础
1091 | 垫
1092 | 雄
1093 | 旅
1094 | 宣
1095 | !
1096 | 菁
1097 | 易
1098 | 载
1099 | 未
1100 | 汉
1101 | 埋
1102 | 休
1103 | 社
1104 | 栓
1105 | 浮
1106 | 漂
1107 | 洪
1108 | 晨
1109 | 配
1110 | 扎
1111 | 福
1112 | 岭
1113 | 萌
1114 | 神
1115 | 存
1116 | 碑
1117 | 瞰
1118 | 篱
1119 | 笆
1120 | 乡
1121 | 验
1122 | 聪
1123 | 食
1124 | 云
1125 | 宅
1126 | 描
1127 | 填
1128 | 科
1129 | ☉
1130 | 嵌
1131 | 仓
1132 | 喜
1133 | 森
1134 | 令
1135 | 莲
1136 | 齐
1137 | 翼
1138 | w
1139 | 污
1140 | 丑
1141 | 感
1142 | 摄
1143 | 借
1144 | 撬
1145 | 丹
1146 | 夏
1147 | 语
1148 | 抖
1149 | 迪
1150 | 勇
1151 | 练
1152 | 矮
1153 | 轩
1154 | 伐
1155 | 伯
1156 | 联
1157 | 雅
1158 | 尾
1159 | 破
1160 | 嘉
1161 | 售
1162 | 研
1163 | 邑
1164 | 技
1165 | 醋
1166 | 柄
1167 | 闲
1168 | 灰
1169 | 艺
1170 | 钓
1171 | 侦
1172 | 司
1173 | 杭
1174 | 避
1175 | 胶
1176 | 扬
1177 | 帆
1178 | 映
1179 | 席
1180 | 震
1181 | 更
1182 | 峭
1183 | 坊
1184 | 圈
1185 | 举
1186 | 岗
1187 | 田
1188 | 套
1189 | 牙
1190 | 膏
1191 | ∁
1192 | 妈
1193 | 控
1194 | 抢
1195 | л
1196 | 槟
1197 | 榔
1198 | 郎
1199 | 绍
1200 | 
1201 | 吃
1202 | 赵
1203 | 爽
1204 | §
1205 | 获
1206 | 哪
1207 | 殊
1208 | 汽
1209 | 统
1210 | 春
1211 | 故
1212 | 饮
1213 | 炮
1214 | 凿
1215 | 非
1216 | 鼻
1217 | 镶
1218 | 圳
1219 | 奥
1220 | —
1221 | 泳
1222 | 欲
1223 | 训
1224 | 局
1225 | 突
1226 | 吹
1227 | 符
1228 | 赶
1229 | 产
1230 | 掉
1231 | 陀
1232 | 觉
1233 | 壶
1234 | 凡
1235 | 掌
1236 | 浸
1237 | 态
1238 | 翘
1239 | 蜡
1240 | 烛
1241 | 盏
1242 | 圃
1243 | 季
1244 | 瞄
1245 | 微
1246 | 吴
1247 | 辉
1248 | 预
1249 | 孩
1250 | 荡
1251 | Y
1252 | 硬
1253 | 糕
1254 | 纹
1255 | 齿
1256 | 纳
1257 | 肚
1258 | 脐
1259 | 维
1260 | 咽
1261 | 喉
1262 | 逸
1263 | 夫
1264 | 万
1265 | 百
1266 | 价
1267 | 世
1268 | 纪
1269 | 貌
1270 | ➝
1271 | 欣
1272 | 密
1273 | 粘
1274 | 狗
1275 | 潜
1276 | 静
1277 | 融
1278 | 银
1279 | 杏
1280 | 录
1281 | 印
1282 | 查
1283 | 派
1284 | 骸
1285 | 遇
1286 | 危
1287 | 镭
1288 | 私
1289 | 砍
1290 | 墩
1291 | 寺
1292 | 竣
1293 | 激
1294 | 匣
1295 | 誉
1296 | 缓
1297 | 拥
1298 | 省
1299 | 杉
1300 | 聊
1301 | 洲
1302 | 锤
1303 | 蓬
1304 | 踩
1305 | 档
1306 | 药
1307 | 服
1308 | 务
1309 | 壳
1310 | 芦
1311 | 苇
1312 | 族
1313 | 历
1314 | 刮
1315 | 洁
1316 | 堑
1317 | ;
1318 | 乌
1319 | 颗
1320 | 漆
1321 | 酒
1322 | 职
1323 | 释
1324 | 叙
1325 | 帅
1326 | 辅
1327 | 墨
1328 | 撞
1329 | 袋
1330 | 藏
1331 | 惠
1332 | 话
1333 | 循
1334 | 虫
1335 | Ｏ
1336 | 绩
1337 | 泥
1338 | 济
1339 | 健
1340 | 匹
1341 | 珑
1342 | 允
1343 | 许
1344 | 渡
1345 | 官
1346 | 牵
1347 | 蒙
1348 | 
1349 | 冒
1350 | 拢
1351 | 帐
1352 | 篷
1353 | 兰
1354 | 尽
1355 | 跟
1356 | 瓷
1357 | 威
1358 | 奶
1359 | 拾
1360 | 浦
1361 | 舍
1362 | 庙
1363 | 仿
1364 | 吊
1365 | 羽
1366 | 翔
1367 | 鸟
1368 | 率
1369 | 沾
1370 | 忙
1371 | 毫
1372 | 男
1373 | 曾
1374 | 散
1375 | 吗
1376 | 皆
1377 | 別
1378 | 负
1379 | 链
1380 | 养
1381 | ⑤
1382 | 骤
1383 | 凸
1384 | 厨
1385 | 柜
1386 | 滨
1387 | 彼
1388 | 坚
1389 | 衡
1390 | 认
1391 | 真
1392 | 坏
1393 | 善
1394 | 舒
1395 | 希
1396 | 腊
1397 | 唯
1398 | 汇
1399 | 充
1400 | 莱
1401 | 批
1402 | 素
1403 | 梦
1404 | 介
1405 | 洞
1406 | 叶
1407 | 杨
1408 | 挨
1409 | 祝
1410 | 苑
1411 | 谈
1412 | 拆
1413 | 侨
1414 | 纷
1415 | 绚
1416 | 烂
1417 | 势
1418 | 伊
1419 | 朗
1420 | 委
1421 | 遭
1422 | 检
1423 | 命
1424 | 清
1425 | 煤
1426 | 炭
1427 | 厢
1428 | 猜
1429 | 椅
1430 | 栽
1431 | 返
1432 | 什
1433 | 途
1434 | 橇
1435 | 泊
1436 | 宛
1437 | 核
1438 | 芯
1439 | 趸
1440 | 友
1441 | 荐
1442 | 撼
1443 | 票
1444 | 陡
1445 | ɑ
1446 | 协
1447 | 歌
1448 | 崖
1449 | 宇
1450 | 堪
1451 | 徽
1452 | 辟
1453 | 隆
1454 | 典
1455 | 凯
1456 | 届
1457 | 议
1458 | 言
1459 | 玉
1460 | 棍
1461 | 兔
1462 | 租
1463 | 冉
1464 | 才
1465 | 念
1466 | 贤
1467 | 筷
1468 | 贝
1469 | 编
1470 | 莉
1471 | 爷
1472 | 墅
1473 | 枚
1474 | 霞
1475 | 妹
1476 | 幸
1477 | 绣
1478 | 泰
1479 | 详
1480 | 坯
1481 | 俗
1482 | 魅
1483 | 陕
1484 | 宫
1485 | 鼓
1486 | 谁
1487 | ⊿
1488 | 蜂
1489 | 巢
1490 | ∘
1491 | 笑
1492 | 脸
1493 | ★
1494 | 隐
1495 | 吮
1496 | 烧
1497 | 豆
1498 | 浆
1499 | 巾
1500 | Ｍ
1501 | 飓
1502 | 灾
1503 | 害
1504 | 沙
1505 | 坑
1506 | 弋
1507 | 波
1508 | 阵
1509 | 則
1510 | 辺
1511 | 青
1512 | 伙
1513 | 伴
1514 | 守
1515 | 偶
1516 | 绸
1517 | 衔
1518 | 废
1519 | 弃
1520 | 女
1521 | 鞋
1522 | 携
1523 | 焦
1524 | 昔
1525 | 浙
1526 | 庵
1527 | 蠡
1528 | 耸
1529 | 筋
1530 | 混
1531 | 阁
1532 | 概
1533 | 句
1534 | 虽
1535 | 导
1536 | 脱
1537 | 战
1538 | 眩
1539 | 夺
1540 | 冠
1541 | 凳
1542 | 靶
1543 | 储
1544 | 阻
1545 | 扣
1546 | 渐
1547 | 让
1548 | 析
1549 | ◎
1550 | 罗
1551 | ∴
1552 | 戊
1553 | 销
1554 | 魔
1555 | 征
1556 | 消
1557 | 溶
1558 | 谐
1559 | 候
1560 | 享
1561 | 骑
1562 | 測
1563 | 极
1564 | 丰
1565 | 富
1566 | 斯
1567 | 鹞
1568 | 踞
1569 | 蓉
1570 | 仅
1571 | 党
1572 | 译
1573 | 专
1574 | 唐
1575 | 扳
1576 | 订
1577 | 盲
1578 | 粮
1579 | 剧
1580 | 绝
1581 | 宠
1582 | 锁
1583 | 笼
1584 | 仍
1585 | 祖
1586 | Ⅰ
1587 | Ⅱ
1588 | 谓
1589 | 畅
1590 | 宁
1591 | 违
1592 | 资
1593 | 声
1594 | 汛
1595 | 飘
1596 | 冷
1597 | 萧
1598 | 瑟
1599 | 冬
1600 | 悠
1601 | 瞬
1602 | 姿
1603 | 惊
1604 | 艳
1605 | 郊
1606 | 追
1607 | 袭
1608 | 严
1609 | 踪
1610 | 群
1611 | 缉
1612 | 逻
1613 | 锅
1614 | 炉
1615 | 趟
1616 | 廓
1617 | 顾
1618 | 霍
1619 | 邱
1620 | 县
1621 | 荒
1622 | 灌
1623 | 抗
1624 | 疾
1625 | 浏
1626 | 稍
1627 | 峡
1628 | 荫
1629 | 尊
1630 | 聚
1631 | ˚
1632 | 楚
1633 | 锋
1634 | 浅
1635 | 松
1636 | 宜
1637 | 众
1638 | 缙
1639 | 邢
1640 | 贯
1641 | 纽
1642 | 冲
1643 | 娱
1644 | 铡
1645 | 篙
1646 | 遣
1647 | 巴
1648 | 灵
1649 | 畔
1650 | 瞭
1651 | 髙
1652 | 徒
1653 | 骇
1654 | 媒
1655 | 史
1656 | 敌
1657 | 溪
1658 | 凤
1659 | 凰
1660 | 氢
1661 | 困
1662 | 难
1663 | 鑫
1664 | 浪
1665 | 贸
1666 | 纠
1667 | 缠
1668 | 绑
1669 | 召
1670 | 阿
1671 | 词
1672 | 脑
1673 | 遵
1674 | 跑
1675 | 傍
1676 | 陪
1677 | 夜
1678 | 紫
1679 | 奔
1680 | 腾
1681 | 集
1682 | 狭
1683 | 恒
1684 | 键
1685 | 崛
1686 | 钱
1687 | 龙
1688 | 欧
1689 | 撕
1690 | 透
1691 | ℃
1692 | 诲
1693 | 津
1694 | 音
1695 | 朋
1696 | 挺
1697 | 俩
1698 | 挤
1699 | 汶
1700 | 川
1701 | 旦
1702 | 领
1703 | 舟
1704 | 迭
1705 | 柏
1706 | 婴
1707 | 儿
1708 | 铆
1709 | 茶
1710 | ‘
1711 | 仕
1712 | 垃
1713 | 圾
1714 | 软
1715 | 功
1716 | 涨
1717 | 妙
1718 | 戴
1719 | 疆
1720 | 哈
1721 | 萨
1722 | 苗
1723 | 尔
1724 | 鸡
1725 | 兵
1726 | 崭
1727 | 牢
1728 | 匠
1729 | 枳
1730 | 湿
1731 | 沈
1732 | 欢
1733 | 坦
1734 | 武
1735 | 免
1736 | ∅
1737 | 梭
1738 | 培
1739 | 彬
1740 | 魄
1741 | 渝
1742 | 湾
1743 | 碧
1744 | 煌
1745 | 桷
1746 | 垭
1747 | 毗
1748 | 喻
1749 | 晓
1750 | 谣
1751 | 昌
1752 | 皇
1753 | 皋
1754 | 宋
1755 | 奇
1756 | 扁
1757 | 


--------------------------------------------------------------------------------
/GeoQA+/mcan.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | import torch.nn as nn
  4 | import torchvision
  5 | import math
  6 | 
  7 | #ddcddc
  8 | def build_model():
  9 |     cnn = getattr(torchvision.models, 'resnet101')(pretrained=True)
 10 |     layers = [cnn.conv1,
 11 |               cnn.bn1,
 12 |               cnn.relu,
 13 |               cnn.maxpool]
 14 |     for i in range(3):
 15 |         name = 'layer%d' % (i + 1)
 16 |         layers.append(getattr(cnn, name))
 17 |     model = torch.nn.Sequential(*layers)
 18 |     model.cuda()
 19 |     model.eval()
 20 |     return model
 21 | 
 22 | 
 23 | def make_mask(feature):
 24 |     return (torch.sum(torch.abs(feature),dim=-1) == 0).unsqueeze(1).unsqueeze(2)
 25 | 
 26 | 
 27 | class Cfgs:
 28 |     def __init__(self):
 29 |         super(Cfgs, self).__init__()
 30 |         self.LAYER = 6
 31 |         self.HIDDEN_SIZE =512
 32 |         self.BBOXFEAT_EMB_SIZE = 2048
 33 |         self.FF_SIZE = 2048
 34 |         self.MULTI_HEAD = 8
 35 |         self.DROPOUT_R = 0.1
 36 |         self.FLAT_MLP_SIZE = 512
 37 |         self.FLAT_GLIMPSES = 1
 38 |         # self.FLAT_OUT_SIZE = 1024
 39 |         self.FLAT_OUT_SIZE = 512
 40 |         self.USE_AUX_FEAT = False
 41 |         self.USE_BBOX_FEAT = False
 42 | 
 43 | 
 44 | class MCA_ED(nn.Module):
 45 |     def __init__(self, __C):
 46 |         super(MCA_ED, self).__init__()
 47 |         self.enc_list = nn.ModuleList([SA(__C) for _ in range(__C.LAYER)])
 48 |         self.dec_list = nn.ModuleList([SGA(__C) for _ in range(__C.LAYER)])
 49 | 
 50 |     def forward(self, lang, image, lang_mask, image_mask):  # lang, image
 51 |         for enc in self.enc_list:
 52 |             lang = enc(lang, lang_mask)
 53 | 
 54 |         for dec in self.dec_list:
 55 |             image = dec(image, lang, image_mask, lang_mask)
 56 | 
 57 |         return lang, image
 58 | 
 59 | 
 60 | class SA(nn.Module):
 61 |     def __init__(self, __C):
 62 |         super(SA, self).__init__()
 63 | 
 64 |         self.mhatt = MHAtt(__C)
 65 |         self.ffn = FFN(__C)
 66 | 
 67 |         self.dropout1 = nn.Dropout(__C.DROPOUT_R)
 68 |         self.norm1 = LayerNorm(__C.HIDDEN_SIZE)
 69 | 
 70 |         self.dropout2 = nn.Dropout(__C.DROPOUT_R)
 71 |         self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
 72 | 
 73 |     def forward(self, y, y_mask):
 74 |         y = self.norm1(y + self.dropout1(
 75 |             self.mhatt(y, y, y, y_mask)
 76 |         ))
 77 | 
 78 |         y = self.norm2(y + self.dropout2(
 79 |             self.ffn(y)
 80 |         ))
 81 | 
 82 |         return y
 83 | 
 84 | 
 85 | class SGA(nn.Module):
 86 |     def __init__(self, __C):
 87 |         super(SGA, self).__init__()
 88 | 
 89 |         self.mhatt1 = MHAtt(__C)
 90 |         self.mhatt2 = MHAtt(__C)
 91 |         self.ffn = FFN(__C)
 92 | 
 93 |         self.dropout1 = nn.Dropout(__C.DROPOUT_R)
 94 |         self.norm1 = LayerNorm(__C.HIDDEN_SIZE)
 95 | 
 96 |         self.dropout2 = nn.Dropout(__C.DROPOUT_R)
 97 |         self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
 98 | 
 99 |         self.dropout3 = nn.Dropout(__C.DROPOUT_R)
100 |         self.norm3 = LayerNorm(__C.HIDDEN_SIZE)
101 | 
102 |     def forward(self, x, y, x_mask, y_mask):
103 |         x = self.norm1(x + self.dropout1(
104 |             self.mhatt1(v=x, k=x, q=x, mask=x_mask)
105 |         ))
106 | 
107 |         x = self.norm2(x + self.dropout2(
108 |             self.mhatt2(v=y, k=y, q=x, mask=y_mask)
109 |         ))
110 | 
111 |         x = self.norm3(x + self.dropout3(
112 |             self.ffn(x)
113 |         ))
114 | 
115 |         return x
116 | 
117 | 
118 | class LayerNorm(nn.Module):
119 |     def __init__(self, size, eps=1e-6):
120 |         super(LayerNorm, self).__init__()
121 |         self.eps = eps
122 | 
123 |         self.a_2 = nn.Parameter(torch.ones(size))
124 |         self.b_2 = nn.Parameter(torch.zeros(size))
125 | 
126 |     def forward(self, x):
127 |         mean = x.mean(-1, keepdim=True)
128 |         std = x.std(-1, keepdim=True)
129 | 
130 |         return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
131 | 
132 | 
133 | class MHAtt(nn.Module):
134 |     def __init__(self, __C):
135 |         super(MHAtt, self).__init__()
136 |         self.__C = __C
137 | 
138 |         self.linear_v = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
139 |         self.linear_k = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
140 |         self.linear_q = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
141 |         self.linear_merge = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE)
142 | 
143 |         self.dropout = nn.Dropout(__C.DROPOUT_R)
144 | 
145 |     def forward(self, v, k, q, mask):
146 |         n_batches = q.size(0)
147 | 
148 |         v = self.linear_v(v).view(
149 |             n_batches,
150 |             -1,
151 |             self.__C.MULTI_HEAD,
152 |             int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
153 |         ).transpose(1, 2)
154 | 
155 |         k = self.linear_k(k).view(
156 |             n_batches,
157 |             -1,
158 |             self.__C.MULTI_HEAD,
159 |             int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
160 |         ).transpose(1, 2)
161 | 
162 |         q = self.linear_q(q).view(
163 |             n_batches,
164 |             -1,
165 |             self.__C.MULTI_HEAD,
166 |             int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD)
167 |         ).transpose(1, 2)
168 | 
169 |         atted = self.att(v, k, q, mask)
170 |         atted = atted.transpose(1, 2).contiguous().view(
171 |             n_batches,
172 |             -1,
173 |             self.__C.HIDDEN_SIZE
174 |         )
175 | 
176 |         atted = self.linear_merge(atted)
177 | 
178 |         return atted
179 | 
180 |     def att(self, value, key, query, mask):
181 |         d_k = query.size(-1)
182 | 
183 |         scores = torch.matmul(
184 |             query, key.transpose(-2, -1)
185 |         ) / math.sqrt(d_k)
186 | 
187 |         if mask is not None:
188 |             scores = scores.masked_fill(mask, -1e9)
189 | 
190 |         att_map = F.softmax(scores, dim=-1)
191 |         att_map = self.dropout(att_map)
192 | 
193 |         return torch.matmul(att_map, value)
194 | 
195 | 
196 | class FFN(nn.Module):
197 |     def __init__(self, __C):
198 |         super(FFN, self).__init__()
199 | 
200 |         self.mlp = MLP(
201 |             in_size=__C.HIDDEN_SIZE,
202 |             mid_size=__C.FF_SIZE,
203 |             out_size=__C.HIDDEN_SIZE,
204 |             dropout_r=__C.DROPOUT_R,
205 |             use_relu=True
206 |         )
207 | 
208 |     def forward(self, x):
209 |         return self.mlp(x)
210 | 
211 | 
212 | class MLP(nn.Module):
213 |     def __init__(self, in_size, mid_size, out_size, dropout_r=0., use_relu=True):
214 |         super(MLP, self).__init__()
215 | 
216 |         self.fc = FC(in_size, mid_size, dropout_r=dropout_r, use_relu=use_relu)
217 |         self.linear = nn.Linear(mid_size, out_size)
218 | 
219 |     def forward(self, x):
220 |         return self.linear(self.fc(x))
221 | 
222 | 
223 | class FC(nn.Module):
224 |     def __init__(self, in_size, out_size, dropout_r=0., use_relu=True):
225 |         super(FC, self).__init__()
226 |         self.dropout_r = dropout_r
227 |         self.use_relu = use_relu
228 | 
229 |         self.linear = nn.Linear(in_size, out_size)
230 | 
231 |         if use_relu:
232 |             self.relu = nn.ReLU(inplace=True)
233 | 
234 |         if dropout_r > 0:
235 |             self.dropout = nn.Dropout(dropout_r)
236 | 
237 |     def forward(self, x):
238 |         x = self.linear(x)
239 | 
240 |         if self.use_relu:
241 |             x = self.relu(x)
242 | 
243 |         if self.dropout_r > 0:
244 |             x = self.dropout(x)
245 | 
246 |         return x
247 | 
248 | 
249 | class AttFlat(nn.Module):
250 |     def __init__(self, __C):
251 |         super(AttFlat, self).__init__()
252 |         self.__C = __C
253 | 
254 |         self.mlp = MLP(
255 |             in_size=__C.HIDDEN_SIZE,
256 |             mid_size=__C.FLAT_MLP_SIZE,
257 |             out_size=__C.FLAT_GLIMPSES,
258 |             dropout_r=__C.DROPOUT_R,
259 |             use_relu=True
260 |         )
261 | 
262 |         self.linear_merge = nn.Linear(
263 |             __C.HIDDEN_SIZE * __C.FLAT_GLIMPSES,
264 |             __C.FLAT_OUT_SIZE
265 |         )
266 | 
267 |     def forward(self, x, x_mask):
268 |         att = self.mlp(x)
269 |         att = att.masked_fill(
270 |             x_mask.squeeze(1).squeeze(1).unsqueeze(2),
271 |             -1e9
272 |         )
273 |         att = F.softmax(att, dim=1)
274 | 
275 |         att_list = []
276 |         for i in range(self.__C.FLAT_GLIMPSES):
277 |             att_list.append(
278 |                 torch.sum(att[:, :, i: i + 1] * x, dim=1)
279 |             )
280 | 
281 |         x_atted = torch.cat(att_list, dim=1)
282 |         x_atted = self.linear_merge(x_atted)
283 | 
284 |         return x_atted


--------------------------------------------------------------------------------
/GeoQA+/requirements.txt:
--------------------------------------------------------------------------------
  1 | alabaster==0.7.12
  2 | allennlp==0.9.0
  3 | attrs==21.2.0
  4 | Babel==2.9.1
  5 | blis==0.2.4
  6 | 
  7 | botocore==1.20.91
  8 | cached-property==1.5.2
  9 | certifi==2021.5.30
 10 | chardet==4.0.0
 11 | click==8.0.1
 12 | conllu==1.3.1
 13 | cycler==0.10.0
 14 | cymem==2.0.5
 15 | Cython==0.29.23
 16 | 
 17 | distro==1.5.0
 18 | docutils==0.17.1
 19 | editdistance==0.5.3
 20 | flaky==3.7.0
 21 | Flask==2.0.1
 22 | Flask-Cors==3.0.10
 23 | ftfy==6.0.3
 24 | gevent==21.1.2
 25 | greenlet==1.1.0
 26 | h5py==3.1.0
 27 | idna==2.10
 28 | imagesize==1.2.0
 29 | importlib-metadata==4.5.0
 30 | iniconfig==1.1.1
 31 | itsdangerous==2.0.1
 32 | jieba==0.42.1
 33 | Jinja2==3.0.1
 34 | jmespath==0.10.0
 35 | joblib==1.0.1
 36 | jsonnet==0.17.0
 37 | jsonpickle==2.0.0
 38 | kiwisolver==1.3.1
 39 | MarkupSafe==2.0.1
 40 | matplotlib==3.3.4
 41 | murmurhash==1.0.5
 42 | nltk==3.6.2
 43 | numpy==1.19.5
 44 | numpydoc==1.1.0
 45 | opencv-python==4.2.0.32
 46 | overrides==3.1.0
 47 | packaging==20.9
 48 | parsimonious==0.8.1
 49 | Pillow==8.2.0
 50 | plac==0.9.6
 51 | pluggy==0.13.1
 52 | preshed==2.0.1
 53 | protobuf==3.17.3
 54 | py==1.10.0
 55 | Pygments==2.9.0
 56 | pyparsing==2.4.7
 57 | pytest==6.2.4
 58 | python-dateutil==2.8.1
 59 | pytorch-pretrained-bert==0.6.2
 60 | pytorch-transformers==1.1.0
 61 | pytz==2021.1
 62 | regex==2021.4.4
 63 | requests==2.25.1
 64 | responses==0.13.3
 65 | s3transfer==0.4.2
 66 | scikit-build==0.11.1
 67 | scikit-learn==0.24.2
 68 | scipy==1.5.4
 69 | sentencepiece==0.1.95
 70 | six==1.16.0
 71 | snowballstemmer==2.1.0
 72 | spacy==2.1.9
 73 | Sphinx==4.0.2
 74 | sphinxcontrib-applehelp==1.0.2
 75 | sphinxcontrib-devhelp==1.0.2
 76 | sphinxcontrib-htmlhelp==2.0.0
 77 | sphinxcontrib-jsmath==1.0.1
 78 | sphinxcontrib-qthelp==1.0.3
 79 | sphinxcontrib-serializinghtml==1.1.5
 80 | sqlparse==0.4.1
 81 | srsly==1.0.5
 82 | tensorboardX==2.2
 83 | thinc==7.0.8
 84 | threadpoolctl==2.1.0
 85 | toml==0.10.2
 86 | torch==1.2.0
 87 | torchvision==0.4.0
 88 | tqdm==4.61.0
 89 | typing-extensions==3.10.0.0
 90 | typing-utils==0.1.0
 91 | Unidecode==1.2.0
 92 | urllib3==1.26.5
 93 | wasabi==0.8.2
 94 | wcwidth==0.2.5
 95 | Werkzeug==2.0.1
 96 | word2number==1.1
 97 | zipp==3.4.1
 98 | zope.event==4.5.0
 99 | zope.interface==5.4.0
100 | 


--------------------------------------------------------------------------------
/GeoQA+/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | #torch.backends.cudnn.enabled = False
  5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  6 |            'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
  7 |            'wide_resnet50_2', 'wide_resnet101_2']
  8 | 
  9 | 
 10 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 11 |     """3x3 convolution with padding"""
 12 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 13 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 14 | 
 15 | 
 16 | def conv1x1(in_planes, out_planes, stride=1):
 17 |     """1x1 convolution"""
 18 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 19 | 
 20 | 
 21 | class BasicBlock(nn.Module):
 22 |     expansion = 1
 23 |     __constants__ = ['downsample']
 24 | 
 25 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 26 |                  base_width=64, dilation=1, norm_layer=None):
 27 |         super(BasicBlock, self).__init__()
 28 |         if norm_layer is None:
 29 |             norm_layer = nn.BatchNorm2d
 30 |         if groups != 1 or base_width != 64:
 31 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 32 |         if dilation > 1:
 33 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 34 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 35 |         self.conv1 = conv3x3(inplanes, planes, stride)
 36 |         self.bn1 = norm_layer(planes)
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.conv2 = conv3x3(planes, planes)
 39 |         self.bn2 = norm_layer(planes)
 40 |         self.downsample = downsample
 41 |         self.stride = stride
 42 | 
 43 |     def forward(self, x):
 44 |         identity = x
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 |         out = self.conv2(out)
 49 |         out = self.bn2(out)
 50 | 
 51 |         if self.downsample is not None:
 52 |             identity = self.downsample(x)
 53 | 
 54 |         out += identity
 55 |         out = self.relu(out)
 56 | 
 57 |         return out
 58 | 
 59 | 
 60 | class Bottleneck(nn.Module):
 61 |     expansion = 4
 62 |     __constants__ = ['downsample']
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 65 |                  base_width=64, dilation=1, norm_layer=None):
 66 |         super(Bottleneck, self).__init__()
 67 |         if norm_layer is None:
 68 |             norm_layer = nn.BatchNorm2d
 69 |         width = int(planes * (base_width / 64.)) * groups
 70 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
 71 |         self.conv1 = conv1x1(inplanes, width)
 72 |         self.bn1 = norm_layer(width)
 73 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
 74 |         self.bn2 = norm_layer(width)
 75 |         self.conv3 = conv1x1(width, planes * self.expansion)
 76 |         self.bn3 = norm_layer(planes * self.expansion)
 77 |         self.relu = nn.ReLU(inplace=True)
 78 |         self.downsample = downsample
 79 |         self.stride = stride
 80 | 
 81 |     def forward(self, x):
 82 |         identity = x
 83 | 
 84 |         out = self.conv1(x)
 85 |         out = self.bn1(out)
 86 |         out = self.relu(out)
 87 | 
 88 |         out = self.conv2(out)
 89 |         out = self.bn2(out)
 90 |         out = self.relu(out)
 91 | 
 92 |         out = self.conv3(out)
 93 |         out = self.bn3(out)
 94 | 
 95 |         if self.downsample is not None:
 96 |             identity = self.downsample(x)
 97 | 
 98 |         out += identity
 99 |         out = self.relu(out)
100 | 
101 |         return out
102 | 
103 | 
104 | class ResNet(nn.Module):
105 | 
106 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
107 |                  groups=1, width_per_group=64, replace_stride_with_dilation=None,
108 |                  norm_layer=None):
109 |         super(ResNet, self).__init__()
110 |         if norm_layer is None:
111 |             norm_layer = nn.BatchNorm2d
112 |         self._norm_layer = norm_layer
113 | 
114 |         self.inplanes = 64
115 |         self.dilation = 1
116 |         if replace_stride_with_dilation is None:
117 |             # each element in the tuple indicates if we should replace
118 |             # the 2x2 stride with a dilated convolution instead
119 |             replace_stride_with_dilation = [False, False, False]
120 |         if len(replace_stride_with_dilation) != 3:
121 |             raise ValueError("replace_stride_with_dilation should be None "
122 |                              "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
123 |         self.groups = groups
124 |         self.base_width = width_per_group
125 |         self.conv1 = nn.Conv2d(1, self.inplanes, kernel_size=7, stride=2, padding=3,
126 |                                bias=False)
127 |         self.bn1 = norm_layer(self.inplanes)
128 |         self.relu = nn.ReLU(inplace=True)
129 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
130 |         self.layer1 = self._make_layer(block, 64, layers[0])
131 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
132 |                                        dilate=replace_stride_with_dilation[0])
133 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
134 |                                        dilate=replace_stride_with_dilation[1])
135 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
136 |                                        dilate=replace_stride_with_dilation[2])
137 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
138 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
139 | 
140 |         for m in self.modules():
141 |             if isinstance(m, nn.Conv2d):
142 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
143 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
144 |                 nn.init.constant_(m.weight, 1)
145 |                 nn.init.constant_(m.bias, 0)
146 | 
147 |         # Zero-initialize the last BN in each residual branch,
148 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
149 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
150 |         if zero_init_residual:
151 |             for m in self.modules():
152 |                 if isinstance(m, Bottleneck):
153 |                     nn.init.constant_(m.bn3.weight, 0)
154 |                 elif isinstance(m, BasicBlock):
155 |                     nn.init.constant_(m.bn2.weight, 0)
156 | 
157 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
158 |         norm_layer = self._norm_layer
159 |         downsample = None
160 |         previous_dilation = self.dilation
161 |         if dilate:
162 |             self.dilation *= stride
163 |             stride = 1
164 |         if stride != 1 or self.inplanes != planes * block.expansion:
165 |             downsample = nn.Sequential(
166 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
167 |                 norm_layer(planes * block.expansion),
168 |             )
169 | 
170 |         layers = []
171 |         layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
172 |                             self.base_width, previous_dilation, norm_layer))
173 |         self.inplanes = planes * block.expansion
174 |         for _ in range(1, blocks):
175 |             layers.append(block(self.inplanes, planes, groups=self.groups,
176 |                                 base_width=self.base_width, dilation=self.dilation,
177 |                                 norm_layer=norm_layer))
178 | 
179 |         return nn.Sequential(*layers)
180 | 
181 |     def _forward_impl(self, x):
182 |         # See note [TorchScript super()]
183 |         x = self.conv1(x)
184 |         x = self.bn1(x)
185 |         x = self.relu(x)
186 |         x = self.maxpool(x)
187 | 
188 |         x = self.layer1(x)
189 |         x = self.layer2(x)
190 |         x = self.layer3(x)
191 |         x = self.layer4(x)
192 | 
193 |         x = self.avgpool(x)
194 |         x = torch.flatten(x, 1)
195 |         x = self.fc(x)
196 | 
197 |         return x
198 | 
199 |     def forward(self, x):
200 |         return self._forward_impl(x)
201 | 
202 | 
203 | def _resnet(arch, block, layers, pretrained, progress, **kwargs):
204 |     model = ResNet(block, layers, **kwargs)
205 |     return model
206 | 
207 | 
208 | def resnet18(pretrained=False, progress=True, **kwargs):
209 |     r"""ResNet-18 model from
210 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
211 | 
212 |     Args:
213 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
214 |         progress (bool): If True, displays a progress bar of the download to stderr
215 |     """
216 |     return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
217 |                    **kwargs)
218 | 
219 | 
220 | def resnet34(pretrained=False, progress=True, **kwargs):
221 |     r"""ResNet-34 model from
222 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
223 | 
224 |     Args:
225 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
226 |         progress (bool): If True, displays a progress bar of the download to stderr
227 |     """
228 |     return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
229 |                    **kwargs)
230 | 
231 | 
232 | def resnet50(pretrained=False, progress=True, **kwargs):
233 |     r"""ResNet-50 model from
234 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
235 | 
236 |     Args:
237 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
238 |         progress (bool): If True, displays a progress bar of the download to stderr
239 |     """
240 |     return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
241 |                    **kwargs)
242 | 
243 | 
244 | def resnet101(pretrained=False, progress=True, **kwargs):
245 |     r"""ResNet-101 model from
246 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
247 | 
248 |     Args:
249 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
250 |         progress (bool): If True, displays a progress bar of the download to stderr
251 |     """
252 |     return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
253 |                    **kwargs)
254 | 
255 | 
256 | def resnet152(pretrained=False, progress=True, **kwargs):
257 |     r"""ResNet-152 model from
258 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
259 | 
260 |     Args:
261 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
262 |         progress (bool): If True, displays a progress bar of the download to stderr
263 |     """
264 |     return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
265 |                    **kwargs)
266 | 
267 | 
268 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
269 |     r"""ResNeXt-50 32x4d model from
270 |     `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
271 | 
272 |     Args:
273 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
274 |         progress (bool): If True, displays a progress bar of the download to stderr
275 |     """
276 |     kwargs['groups'] = 32
277 |     kwargs['width_per_group'] = 4
278 |     return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
279 |                    pretrained, progress, **kwargs)
280 | 
281 | 
282 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
283 |     r"""ResNeXt-101 32x8d model from
284 |     `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
285 | 
286 |     Args:
287 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
288 |         progress (bool): If True, displays a progress bar of the download to stderr
289 |     """
290 |     kwargs['groups'] = 32
291 |     kwargs['width_per_group'] = 8
292 |     return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
293 |                    pretrained, progress, **kwargs)
294 | 
295 | 
296 | def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
297 |     r"""Wide ResNet-50-2 model from
298 |     `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
299 | 
300 |     The model is the same as ResNet except for the bottleneck number of channels
301 |     which is twice larger in every block. The number of channels in outer 1x1
302 |     convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
303 |     channels, and in Wide ResNet-50-2 has 2048-1024-2048.
304 | 
305 |     Args:
306 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
307 |         progress (bool): If True, displays a progress bar of the download to stderr
308 |     """
309 |     kwargs['width_per_group'] = 64 * 2
310 |     return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
311 |                    pretrained, progress, **kwargs)
312 | 
313 | 
314 | def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
315 |     r"""Wide ResNet-101-2 model from
316 |     `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
317 | 
318 |     The model is the same as ResNet except for the bottleneck number of channels
319 |     which is twice larger in every block. The number of channels in outer 1x1
320 |     convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
321 |     channels, and in Wide ResNet-50-2 has 2048-1024-2048.
322 | 
323 |     Args:
324 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
325 |         progress (bool): If True, displays a progress bar of the download to stderr
326 |     """
327 |     kwargs['width_per_group'] = 64 * 2
328 |     return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
329 |                    pretrained, progress, **kwargs)
330 | 


--------------------------------------------------------------------------------
/GeoQA+/save/test/stderr.log:
--------------------------------------------------------------------------------
 1 | Traceback (most recent call last):
 2 |   File "D:\python37\lib\runpy.py", line 193, in _run_module_as_main
 3 |     "__main__", mod_spec)
 4 |   File "D:\python37\lib\runpy.py", line 85, in _run_code
 5 |     exec(code, run_globals)
 6 |   File "D:\python37\Scripts\allennlp.exe\__main__.py", line 7, in <module>
 7 |   File "D:\python37\lib\site-packages\allennlp\run.py", line 18, in run
 8 |     main(prog="allennlp")
 9 |   File "D:\python37\lib\site-packages\allennlp\commands\__init__.py", line 102, in main
10 |     args.func(args)
11 |   File "D:\python37\lib\site-packages\allennlp\commands\train.py", line 124, in train_model_from_args
12 |     args.cache_prefix)
13 |   File "D:\python37\lib\site-packages\allennlp\commands\train.py", line 168, in train_model_from_file
14 |     cache_directory, cache_prefix)
15 |   File "D:\python37\lib\site-packages\allennlp\commands\train.py", line 212, in train_model
16 |     check_for_gpu(cuda_device)
17 |   File "D:\python37\lib\site-packages\allennlp\common\checks.py", line 81, in check_for_gpu
18 |     raise ConfigurationError("Experiment specified a GPU but none is available;"
19 | allennlp.common.checks.ConfigurationError: "Experiment specified a GPU but none is available; if you want to run on CPU use the override 'trainer.cuda_device=-1' in the json config file."
20 | 


--------------------------------------------------------------------------------
/GeoQA+/save/test/stdout.log:
--------------------------------------------------------------------------------
1 | 2022-02-23 10:50:48,193 - INFO - allennlp.common.params - random_seed = 13370
2 | 2022-02-23 10:50:48,194 - INFO - allennlp.common.params - numpy_seed = 1337
3 | 2022-02-23 10:50:48,194 - INFO - allennlp.common.params - pytorch_seed = 133
4 | 2022-02-23 10:50:48,204 - INFO - allennlp.common.checks - Pytorch version: 1.9.1+cpu
5 | 


--------------------------------------------------------------------------------
/GeoQA+/save/test/txt:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/GeoQA+/utils.py:
--------------------------------------------------------------------------------
  1 | from allennlp.data.fields import *
  2 | from allennlp.data.instance import Instance
  3 | from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
  4 | from allennlp.nn.util import get_text_field_mask
  5 | from allennlp.data.tokenizers import Token
  6 | from allennlp.models import BasicClassifier, Model
  7 | from allennlp.training.metrics.fbeta_measure import FBetaMeasure
  8 | from allennlp.data import Vocabulary
  9 | from allennlp.models.model import Model
 10 | from allennlp.modules import Seq2SeqEncoder, Seq2VecEncoder, TextFieldEmbedder
 11 | from allennlp.nn import InitializerApplicator, RegularizerApplicator
 12 | from allennlp.training.metrics import F1Measure, Average, Metric
 13 | from allennlp.common.params import Params
 14 | from allennlp.commands.train import train_model
 15 | from allennlp.data import Instance
 16 | from allennlp.data.dataset_readers import DatasetReader
 17 | from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
 18 | from allennlp.data.tokenizers import Tokenizer
 19 | from allennlp.common.util import START_SYMBOL, END_SYMBOL
 20 | from allennlp.training.metrics.metric import Metric
 21 | from allennlp.nn import util
 22 | 
 23 | from typing import *
 24 | from overrides import overrides
 25 | import jieba
 26 | import numpy as np
 27 | import pickle
 28 | import torch
 29 | import torch.nn as nn
 30 | import torch.nn.functional as F
 31 | import resnet
 32 | import cv2 as cv
 33 | import os
 34 | torch.manual_seed(123)
 35 | 
 36 | 
 37 | def process_image(img, min_side=224):  # 等比例缩放与填充
 38 |     size = img.shape
 39 |     h, w = size[0], size[1]
 40 |     # 长边缩放为min_side
 41 |     scale = max(w, h) / float(min_side)
 42 |     new_w, new_h = int(w/scale), int(h/scale)
 43 |     resize_img = cv.resize(img, (new_w, new_h))
 44 |     # 填充至min_side * min_side
 45 |     # 下右填充
 46 |     top, bottom, left, right = 0, min_side-new_h, 0, min_side-new_w
 47 | 
 48 |     pad_img = cv.copyMakeBorder(resize_img, int(top), int(bottom), int(left), int(right),
 49 |                                 cv.BORDER_CONSTANT, value=[255,255,255]) # 从图像边界向上,下,左,右扩的像素数目
 50 | 
 51 |     return pad_img
 52 | 
 53 | 
 54 | 
 55 | 
 56 | @DatasetReader.register("s2s_manual_reader")
 57 | class SeqReader(DatasetReader):
 58 |     def __init__(self,
 59 |                  tokenizer: Tokenizer = None,
 60 |                  source_token_indexer: Dict[str, TokenIndexer] = None,
 61 |                  target_token_indexer: Dict[str, TokenIndexer] = None,
 62 |                  model_name: str = None) -> None:
 63 |         super().__init__(lazy=False)
 64 |         self._tokenizer = tokenizer
 65 |         self._source_token_indexer = source_token_indexer
 66 |         self._target_token_indexer = target_token_indexer
 67 |         self._model_name = model_name
 68 | 
 69 |         sub_dict_path = "data/sub_dataset_dict.pk"  # problems type
 70 |         with open(sub_dict_path, 'rb') as file:
 71 |             subset_dict = pickle.load(file)
 72 |         self.subset_dict = subset_dict
 73 | 
 74 |         self.all_points = ['切线', '垂径定理', '勾股定理', '同位角', '平行线', '三角形内角和', '三角形中位线', '平行四边形',
 75 |                   '相似三角形', '正方形', '圆周角', '直角三角形', '距离', '邻补角', '圆心角', '圆锥的计算', '三角函数',
 76 |                   '矩形', '旋转', '等腰三角形', '外接圆', '内错角', '菱形', '多边形', '对顶角', '三角形的外角', '角平分线',
 77 |                   '对称', '立体图形', '三视图', '圆内接四边形', '垂直平分线', '垂线', '扇形面积', '等边三角形', '平移',
 78 |                   '含30度角的直角三角形', '仰角', '三角形的外接圆与外心', '方向角', '坡角', '直角三角形斜边上的中线', '位似',
 79 |                   '平行线分线段成比例', '坐标与图形性质', '圆柱的计算', '俯角', '射影定理', '黄金分割', '钟面角', '多边形内角和', '外接圆', '弦长', '长度', '中垂线',
 80 |                   '相交线', '全等三角形', '梯形', '锐角', '补角', '比例线段', '比例角度', '圆形', '正多边形', '同旁内角', '余角', '三角形的重心', '旋转角', '中心对称',
 81 |                   '三角形的内心', '投影', '对角线','弧长的计算' , '平移的性质' , '位似变换' ,'菱形的性质' ,'正方形的性质']
 82 |         #弧长的计算  平移的性质  位似变换 菱形的 性质 正方形的性质
 83 | 
 84 | 
 85 |         #self.all_points = ['切线']
 86 |     @overrides
 87 |     def _read(self, file_path: str):
 88 |         with open(file_path, 'rb') as f:
 89 |             dataset = pickle.load(f)
 90 |             for sample in dataset:
 91 |                 yield self.text_to_instance(sample)
 92 | 
 93 |     @overrides
 94 |     def text_to_instance(self, sample) -> Instance:
 95 |         fields = {}
 96 | 
 97 |         image = sample['image']
 98 |         image = process_image(image)
 99 |         image = image/255
100 |         img_rgb = np.zeros((3, image.shape[0], image.shape[1]))
101 |         for i in range(3):
102 |             img_rgb[i, :, :] = image
103 |         fields['image'] = ArrayField(img_rgb)
104 |      
105 |         s_token = self._tokenizer.tokenize(' '.join(sample['token_list']))
106 |         fields['source_tokens'] = TextField(s_token, self._source_token_indexer)
107 | 
108 |         t_token = self._tokenizer.tokenize(' '.join(sample['manual_program']))
109 |         t_token.insert(0, Token(START_SYMBOL))
110 |         t_token.append(Token(END_SYMBOL))
111 |         fields['target_tokens'] = TextField(t_token, self._target_token_indexer)
112 |         fields['source_nums'] = MetadataField(sample['numbers'])
113 |         fields['choice_nums'] = MetadataField(sample['choice_nums'])
114 |         fields['label'] = MetadataField(sample['label'])
115 | 
116 |         type = self.subset_dict[sample['id']]
117 |         fields['type'] = MetadataField(type)
118 |         fields['data_id'] = MetadataField(sample['id'])
119 |         equ_list = []
120 | 
121 |         equ = sample['manual_program']
122 |         equ_token = self._tokenizer.tokenize(' '.join(equ))
123 |         equ_token.insert(0, Token(START_SYMBOL))
124 |         equ_token.append(Token(END_SYMBOL))
125 |         equ_token = TextField(equ_token, self._source_token_indexer)
126 |         equ_list.append(equ_token)
127 | 
128 |         fields['equ_list'] = ListField(equ_list)
129 |         fields['manual_program'] = MetadataField(sample['manual_program'])
130 | 
131 |         point_label = np.zeros(77, np.float32)
132 |         exam_points = sample['formal_point']
133 |         for point in exam_points:
134 |             point_id = self.all_points.index(point)
135 |             point_label[point_id] = 1
136 |         fields['point_label'] = ArrayField(np.array(point_label))
137 | 
138 |         return Instance(fields)
139 | 
140 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GeoQA-Plus
 2 | 1. Download GeoQA+ benchmark [<a href="https://drive.google.com/file/d/1U0lEhr8v3khx-ukcZs92MvJqGQat7TBI/view?usp=drive_link">Google Drive</a>]
 3 | 2. Download the GeoQA.rar(If it need permission please apply it through the Google drive link or contact this e-mail address to get it: jiecao@m.scnu.edu.cn, You can also contact this email address for any other issues), move it to GeoQA+/data/GeoQA2.2 path, and unzip it. GeoQA-train.pk is the training set of GeoQA, Mix-train.pk are the training set after mixing our newly annotated problems with GeoQA-train.pk，Backtrans-train.pk is the training set after performing data augmentation on Mix-train, new-test.pk is the test set formed by our new dataset. When using data files as training sets make sure to rename the corresponding data files to "train.pk".
 4 | 3. Repalce the vocabulary file of Pre-training model with our "vocab.txt".
 5 | # Environment
 6 | pip install -r requirement.txt
 7 | 
 8 | # Train the model
 9 | cd GeoQA+
10 | 
11 | unzip and rename the dataset
12 | 
13 | run: allennlp train config/NGS_Aux.json --include-package NGS_Aux -s save/test
14 | 
15 | # Evaluation
16 | 
17 | run: allennlp evaluate save/test  data/GeoQA2.2/test.pk --include-package NGS_Aux_test --cuda-device 0
18 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | alabaster==0.7.12
  2 | allennlp==0.9.0
  3 | attrs==21.2.0
  4 | Babel==2.9.1
  5 | blis==0.2.4
  6 | 
  7 | botocore==1.20.91
  8 | cached-property==1.5.2
  9 | certifi==2021.5.30
 10 | chardet==4.0.0
 11 | click==8.0.1
 12 | conllu==1.3.1
 13 | cycler==0.10.0
 14 | cymem==2.0.5
 15 | Cython==0.29.23
 16 | 
 17 | distro==1.5.0
 18 | docutils==0.17.1
 19 | editdistance==0.5.3
 20 | flaky==3.7.0
 21 | Flask==2.0.1
 22 | Flask-Cors==3.0.10
 23 | ftfy==6.0.3
 24 | gevent==21.1.2
 25 | greenlet==1.1.0
 26 | h5py==3.1.0
 27 | idna==2.10
 28 | imagesize==1.2.0
 29 | importlib-metadata==4.5.0
 30 | iniconfig==1.1.1
 31 | itsdangerous==2.0.1
 32 | jieba==0.42.1
 33 | Jinja2==3.0.1
 34 | jmespath==0.10.0
 35 | joblib==1.0.1
 36 | jsonnet==0.17.0
 37 | jsonpickle==2.0.0
 38 | kiwisolver==1.3.1
 39 | MarkupSafe==2.0.1
 40 | matplotlib==3.3.4
 41 | murmurhash==1.0.5
 42 | nltk==3.6.2
 43 | numpy==1.19.5
 44 | numpydoc==1.1.0
 45 | opencv-python==4.2.0.32
 46 | overrides==3.1.0
 47 | packaging==20.9
 48 | parsimonious==0.8.1
 49 | Pillow==8.2.0
 50 | plac==0.9.6
 51 | pluggy==0.13.1
 52 | preshed==2.0.1
 53 | protobuf==3.17.3
 54 | py==1.10.0
 55 | Pygments==2.9.0
 56 | pyparsing==2.4.7
 57 | pytest==6.2.4
 58 | python-dateutil==2.8.1
 59 | pytorch-pretrained-bert==0.6.2
 60 | pytorch-transformers==1.1.0
 61 | pytz==2021.1
 62 | regex==2021.4.4
 63 | requests==2.25.1
 64 | responses==0.13.3
 65 | s3transfer==0.4.2
 66 | scikit-build==0.11.1
 67 | scikit-learn==0.24.2
 68 | scipy==1.5.4
 69 | sentencepiece==0.1.95
 70 | six==1.16.0
 71 | snowballstemmer==2.1.0
 72 | spacy==2.1.9
 73 | Sphinx==4.0.2
 74 | sphinxcontrib-applehelp==1.0.2
 75 | sphinxcontrib-devhelp==1.0.2
 76 | sphinxcontrib-htmlhelp==2.0.0
 77 | sphinxcontrib-jsmath==1.0.1
 78 | sphinxcontrib-qthelp==1.0.3
 79 | sphinxcontrib-serializinghtml==1.1.5
 80 | sqlparse==0.4.1
 81 | srsly==1.0.5
 82 | tensorboardX==2.2
 83 | thinc==7.0.8
 84 | threadpoolctl==2.1.0
 85 | toml==0.10.2
 86 | torch==1.2.0
 87 | torchvision==0.4.0
 88 | tqdm==4.61.0
 89 | typing-extensions==3.10.0.0
 90 | typing-utils==0.1.0
 91 | Unidecode==1.2.0
 92 | urllib3==1.26.5
 93 | wasabi==0.8.2
 94 | wcwidth==0.2.5
 95 | Werkzeug==2.0.1
 96 | word2number==1.1
 97 | zipp==3.4.1
 98 | zope.event==4.5.0
 99 | zope.interface==5.4.0
100 | 


--------------------------------------------------------------------------------