├── GeoQA+ ├── ManualProgram │ ├── eval_equ.py │ └── operators.py ├── NGS_Aux.py ├── NGS_Aux_CKPT │ ├── NGS_Aux_CKPT │ │ ├── config.json │ │ ├── txt │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── tokens.txt │ │ │ └── txt │ └── txt ├── NGS_Aux_test.py ├── config │ ├── NGS_Aux.json │ └── txt ├── data │ ├── GeoQA2.2 │ │ ├── dev.pk │ │ └── test.pk │ ├── pretrain │ │ └── txt │ ├── sub_dataset_dict.pk │ └── tokens.txt ├── mcan.py ├── requirements.txt ├── resnet.py ├── save │ └── test │ │ ├── stderr.log │ │ ├── stdout.log │ │ └── txt └── utils.py ├── README.md ├── requirements.txt └── vocab.txt /GeoQA+/ManualProgram/eval_equ.py: -------------------------------------------------------------------------------- 1 | from ManualProgram import operators 2 | from inspect import getmembers, isfunction 3 | import itertools 4 | import math 5 | 6 | #dfsfsfgs 7 | constant = [30, 60, 90, 180, 360, math.pi, 0.618, 72, 540] 8 | op_dict = {0: 'g_equal', 1: 'g_double', 2: 'g_half', 3: 'g_add', 4: 'g_minus', 9 | 5: 'g_sin', 6: 'g_cos', 7: 'g_tan', 8: 'g_asin', 9: 'g_acos', 10 | 10: 'gougu_add', 11: 'gougu_minus', 12: 'g_bili', 11 | 13: 'g_mul', 14: 'g_divide', 15: 'cal_circle_area', 16: 'cal_circle_perimeter', 17: 'cal_cone',18: 'g_sqrt'} 12 | op_list = [op_dict[key] for key in sorted(op_dict.keys())] 13 | 14 | 15 | class Equations: 16 | def __init__(self): 17 | 18 | 19 | self.op_list = op_list 20 | self.op_num = {} 21 | self.call_op = {} 22 | self.exp_info = None 23 | self.results = [] 24 | self.max_step = 3 25 | self.max_len = 7 26 | for op in self.op_list: 27 | self.call_op[op] = eval('operators.{}'.format(op)) 28 | # self.call_op[op] = eval(op) 29 | self.op_num[op] = self.call_op[op].__code__.co_argcount 30 | 31 | def str2exp(self, inputs): 32 | inputs = inputs.split(',') 33 | exp = inputs.copy() 34 | for i, s in enumerate(inputs): 35 | if 'n' in s or 'v' in s or 'c' in s: 36 | exp[i] = s.replace('n', 'N_').replace('v', 'V_').replace('c', 'C_') 37 | else: 38 | exp[i] = op_dict[int(s[2:])] 39 | exp[i] = exp[i].strip() 40 | 41 | self.exp = exp 42 | return exp 43 | 44 | def excuate_equation(self, exp, source_nums=None): 45 | if source_nums is None: 46 | source_nums = self.exp_info['nums'] 47 | vars = [] 48 | idx = 0 49 | while idx < len(exp): 50 | op = exp[idx] 51 | if op not in self.op_list: 52 | return None 53 | op_nums = self.op_num[op] 54 | if idx + op_nums >= len(exp): 55 | return None 56 | excuate_nums = [] 57 | for tmp in exp[idx + 1: idx + 1 + op_nums]: 58 | try: 59 | if tmp[0] == 'N' and int(tmp[-1]) < len(source_nums): 60 | excuate_nums.append(source_nums[int(tmp[-1])]) 61 | elif tmp[0] == 'V' and int(tmp[-1]) < len(vars): 62 | excuate_nums.append(vars[int(tmp[-1])]) 63 | elif tmp[0] == 'C' and int(tmp[-1]) < len(constant): 64 | excuate_nums.append(constant[int(tmp[-1])]) 65 | else: 66 | return None 67 | except: 68 | return None 69 | idx += op_nums + 1 70 | v = self.call_op[op](*excuate_nums) 71 | if v is None: 72 | return None 73 | vars.append(v) 74 | return vars 75 | 76 | 77 | if __name__ == '__main__': 78 | eq = Equations() 79 | 80 | -------------------------------------------------------------------------------- /GeoQA+/ManualProgram/operators.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def g_equal(n1): # 0 5 | return n1 6 | 7 | def g_sqrt(n1): 8 | return math.sqrt(n1) 9 | 10 | 11 | def g_double(n1): # 1 12 | return n1*2 13 | 14 | 15 | def g_half(n1): # 2 16 | return n1/2 17 | 18 | 19 | def g_add(n1, n2): # 3 20 | return n1 + n2 21 | 22 | 23 | def g_minus(n1, n2): # 4 24 | return math.fabs(n1 - n2) 25 | 26 | 27 | def g_sin(n1): # 5 28 | if n1 % 15 == 0 and 0 <= n1 <= 180: 29 | return math.sin(n1/180*math.pi) 30 | return False 31 | 32 | 33 | def g_cos(n1): # 6 34 | if n1 % 15 == 0 and 0 <= n1 <= 180: 35 | return math.cos(n1/180*math.pi) 36 | return False 37 | 38 | 39 | def g_tan(n1): # 7 40 | if n1 % 15 == 0 and 5 <= n1 <= 85: 41 | return math.tan(n1/180*math.pi) 42 | return False 43 | 44 | 45 | def g_asin(n1): # 8 46 | if -1 < n1 < 1: 47 | n1 = math.asin(n1) 48 | n1 = math.degrees(n1) 49 | return n1 50 | return False 51 | 52 | 53 | def g_acos(n1): # 9 54 | if -1 < n1 < 1: 55 | n1 = math.acos(n1) 56 | n1 = math.degrees(n1) 57 | return n1 58 | return False 59 | 60 | 61 | def gougu_add(n1, n2): # 13 62 | return math.sqrt(n1*n1+n2*n2) 63 | 64 | 65 | def gougu_minus(n1, n2): # 14 66 | if n1 != n2: 67 | return math.sqrt(math.fabs(n1*n1-n2*n2)) 68 | return False 69 | 70 | 71 | def g_bili(n1, n2, n3): # 16 72 | if n1 > 0 and n2 > 0 and n3 > 0: 73 | return n1/n2*n3 74 | else: 75 | return False 76 | 77 | 78 | def g_mul(n1, n2): # 17 79 | return n1*n2 80 | 81 | 82 | def g_divide(n1, n2): # 18 83 | if n1 > 0 and n2 > 0: 84 | return n1/n2 85 | return False 86 | 87 | 88 | def cal_circle_area(n1): # 19 89 | return n1*n1*math.pi 90 | 91 | 92 | def cal_circle_perimeter(n1): # 20 93 | return 2*math.pi*n1 94 | 95 | 96 | def cal_cone(n1, n2): # 21 97 | return n1*n2*math.pi 98 | 99 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Dict, List, Tuple 3 | 4 | import numpy 5 | import pytorch_transformers 6 | from overrides import overrides 7 | import torch 8 | import torch.nn.functional as F 9 | import torch.nn as nn 10 | from torch.nn.modules.linear import Linear 11 | from torch.nn.modules.rnn import LSTMCell, RNN 12 | from torch.nn.modules.rnn import GRUCell 13 | from torch.nn.modules.transformer import TransformerEncoder 14 | 15 | from torch.nn.modules.transformer import TransformerDecoder 16 | from allennlp.common.checks import ConfigurationError 17 | from allennlp.common.util import START_SYMBOL, END_SYMBOL 18 | from allennlp.data.vocabulary import Vocabulary 19 | from allennlp.modules.attention import LegacyAttention 20 | from allennlp.modules import Attention, TextFieldEmbedder, Seq2SeqEncoder 21 | from allennlp.modules.similarity_functions import SimilarityFunction 22 | from allennlp.models.model import Model 23 | from allennlp.modules.token_embedders import Embedding 24 | from allennlp.nn import util 25 | from allennlp.nn.beam_search import BeamSearch 26 | from allennlp.training.metrics import BLEU 27 | # from allennlp.modules.seq2seq_encoders.pytorch_seq2seq_wrapper 28 | from ManualProgram.eval_equ import Equations 29 | from allennlp.modules.seq2seq_encoders import PytorchSeq2SeqWrapper, StackedSelfAttentionEncoder 30 | 31 | from transformers import AutoModel, AutoTokenizer 32 | 33 | import random 34 | import warnings 35 | import math 36 | 37 | warnings.filterwarnings("ignore") 38 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 39 | 40 | torch.cuda.set_device(0) 41 | from utils import * 42 | 43 | from mcan import * 44 | 45 | # torch.backends.cudnn.enabled = False 46 | model_name = "data/pretrain/Roberta" 47 | 48 | @Model.register("MyEncoder") 49 | class Encoder(Model): 50 | def __init__(self, 51 | vocab: Vocabulary, 52 | input_dim: int, 53 | emb_dim: int, 54 | hid_dim: int, 55 | dropout: int): 56 | super(Encoder, self).__init__(vocab) 57 | self.input_dim = input_dim 58 | self.emb_dim = emb_dim 59 | self.hid_dim = hid_dim 60 | self.embedding = AutoModel.from_pretrained(model_name) 61 | self.trans = nn.Linear(emb_dim, hid_dim) 62 | self.norm = nn.LayerNorm(hid_dim) 63 | self.dropout = nn.Dropout(dropout) 64 | 65 | self.lstm_embedding = nn.Embedding(22128, hid_dim, padding_idx=0) 66 | self.lstm_dropout = nn.Dropout(0.5) 67 | self.lstm = torch.nn.LSTM(hid_dim, hid_dim, batch_first=True, bidirectional=True , num_layers=2, dropout=0.5) 68 | self.concat_trans = nn.Linear(hid_dim, hid_dim) 69 | self.concat_norm = nn.LayerNorm(hid_dim) 70 | self._encoder_output_dim = 512 71 | self._decoder_output_dim = 512 72 | 73 | @overrides 74 | def forward(self, src, source_mask): 75 | 76 | embedded = self.embedding(src, attention_mask=source_mask, return_dict=True, output_hidden_states=True) 77 | bert_output = embedded.last_hidden_state 78 | output = self.dropout(self.norm(torch.relu(self.trans(bert_output)))) 79 | lstm_embedding = self.lstm_dropout(self.lstm_embedding(src)) 80 | input_length = torch.sum(source_mask, dim=1).long().view(-1,).cpu() 81 | packed = nn.utils.rnn.pack_padded_sequence(lstm_embedding, input_length, batch_first=True, enforce_sorted=False) 82 | lstm_output, _ = self.lstm(packed) 83 | lstm_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output, batch_first=True) 84 | lstm_output = lstm_output[:, :, :self.hid_dim] + lstm_output[:, :, self.hid_dim:] 85 | output = output + lstm_output 86 | # output = torch.cat((output,lstm_output), dim=-1) 87 | output = self.concat_norm(torch.relu(self.concat_trans(output))) 88 | return output 89 | 90 | def get_output_dim(self): 91 | return self._encoder_output_dim 92 | 93 | def is_bidirectional(self) -> bool: 94 | return True 95 | 96 | # class lstm_encoder(torch.nn.Module): 97 | # def __init__(self,hid_dim:512): 98 | # super(RNN, self).__init__() 99 | # self.lstm_embedding = nn.Embedding(22128, hid_dim) 100 | # self.lstm_dropout = nn.Dropout(0.5) 101 | # self.lstm = torch.nn.LSTM(hid_dim, hid_dim, batch_first=True, bidirectional=True) 102 | # 103 | # def forward(self,src,source_mask): 104 | # lstm_embedding = self.lstm_dropout(self.lstm_embedding(src)) 105 | # packed = nn.utils.rnn.pack_padded_sequence(lstm_embedding, source_mask) 106 | # lstm_output, _ = self.lstm(packed) 107 | # lstm_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output) 108 | # lstm_output = lstm_output[:, :, :self.hid_dim] + lstm_output[:, :, self.hid_dim:] 109 | # 110 | # return lstm_output 111 | 112 | 113 | @Model.register("geo_s2s") 114 | class SimpleSeq2Seq(Model): 115 | """ 116 | This ``SimpleSeq2Seq`` class is a :class:`Model` which takes a sequence, encodes it, and then 117 | uses the encoded representations to decode another sequence. You can use this as the basis for 118 | a neural machine translation system, an abstractive summarization system, or any other common 119 | seq2seq problem. The model here is simple, but should be a decent starting place for 120 | implementing recent models for these tasks. 121 | 122 | Parameters 123 | ---------- 124 | vocab : ``Vocabulary``, required 125 | Vocabulary containing source and target vocabularies. They may be under the same namespace 126 | (`tokens`) or the target tokens can have a different namespace, in which case it needs to 127 | be specified as `target_namespace`. 128 | source_embedder : ``TextFieldEmbedder``, required 129 | Embedder for source side sequences 130 | encoder : ``Seq2SeqEncoder``, required 131 | The encoder of the "encoder/decoder" model 132 | max_decoding_steps : ``int`` 133 | Maximum length of decoded sequences. 134 | target_namespace : ``str``, optional (default = 'tokens') 135 | If the target side vocabulary is different from the source side's, you need to specify the 136 | target's namespace here. If not, we'll assume it is "tokens", which is also the default 137 | choice for the source side, and this might cause them to share vocabularies. 138 | target_embedding_dim : ``int``, optional (default = source_embedding_dim) 139 | You can specify an embedding dimensionality for the target side. If not, we'll use the same 140 | value as the source embedder's. 141 | attention : ``Attention``, optional (default = None) 142 | If you want to use attention to get a dynamic summary of the encoder outputs at each step 143 | of decoding, this is the function used to compute similarity between the decoder hidden 144 | state and encoder outputs. 145 | attention_function: ``SimilarityFunction``, optional (default = None) 146 | This is if you want to use the legacy implementation of attention. This will be deprecated 147 | since it consumes more memory than the specialized attention modules. 148 | beam_size : ``int``, optional (default = None) 149 | Width of the beam for beam search. If not specified, greedy decoding is used. 150 | scheduled_sampling_ratio : ``float``, optional (default = 0.) 151 | At each timestep during training, we sample a random number between 0 and 1, and if it is 152 | not less than this value, we use the ground truth labels for the whole batch. Else, we use 153 | the predictions from the previous time step for the whole batch. If this value is 0.0 154 | (default), this corresponds to teacher forcing, and if it is 1.0, it corresponds to not 155 | using target side ground truth labels. See the following paper for more information: 156 | `Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks. Bengio et al., 157 | 2015 `_. 158 | use_bleu : ``bool``, optional (default = True) 159 | If True, the BLEU metric will be calculated during validation. 160 | """ 161 | 162 | def __init__(self, 163 | vocab: Vocabulary, 164 | source_embedder: TextFieldEmbedder, 165 | encoder: Encoder, 166 | max_decoding_steps: int, 167 | knowledge_points_ratio=0, 168 | attention: Attention = True, 169 | attention_function: SimilarityFunction = None, 170 | beam_size: int = None, 171 | target_namespace: str = "tokens", 172 | target_embedding_dim: int = None, 173 | scheduled_sampling_ratio: float = 0., 174 | resnet_pretrained=None, 175 | use_bleu: bool = True) -> None: 176 | super(SimpleSeq2Seq, self).__init__(vocab) 177 | resnet = build_model() 178 | 179 | if resnet_pretrained is not None: 180 | resnet.load_state_dict(torch.load(resnet_pretrained)) 181 | print('##### Checkpoint Loaded! #####') 182 | else: 183 | print("No Diagram Pretrain !!!") 184 | self.resnet = resnet 185 | #encoder_layer = nn.TransformerDecoderLayer(1024, 8, batch_first=True) 186 | #self.image_tfm = nn.TransformerEncoder(encoder_layer, num_layers=1) 187 | 188 | self.channel_transform = torch.nn.Linear(1024, 512) 189 | 190 | __C = Cfgs() 191 | self.mcan = MCA_ED(__C) 192 | self.attflat_img = AttFlat(__C) 193 | self.attflat_lang = AttFlat(__C) # not use 194 | self.decode_transform = torch.nn.Linear(1024, 512) 195 | self._equ = Equations() 196 | 197 | self._target_namespace = target_namespace 198 | self._scheduled_sampling_ratio = scheduled_sampling_ratio 199 | 200 | # We need the start symbol to provide as the input at the first timestep of decoding, and 201 | # end symbol as a way to indicate the end of the decoded sequence. 202 | self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) 203 | self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) 204 | 205 | if use_bleu: 206 | pad_index = self.vocab.get_token_index(self.vocab._padding_token, 207 | self._target_namespace) # pylint: disable=protected-access 208 | self._bleu = BLEU(ngram_weights=(1, 0, 0, 0), 209 | exclude_indices={pad_index, self._end_index, self._start_index}) 210 | else: 211 | self._bleu = None 212 | self._acc = Average() 213 | self._no_result = Average() 214 | 215 | # remember to clear after evaluation 216 | self.new_acc = [] 217 | self.angle = [] 218 | self.length = [] 219 | self.area = [] 220 | self.other = [] 221 | self.point_acc_list = [] 222 | 223 | # At prediction time, we use a beam search to find the most likely sequence of target tokens. 224 | beam_size = beam_size or 1 225 | self._max_decoding_steps = max_decoding_steps 226 | self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) 227 | 228 | # Dense embedding of source vocab tokens. 229 | self._source_embedder = source_embedder 230 | 231 | # Encodes the sequence of source embeddings into a sequence of hidden states. 232 | self._encoder = encoder # encoder 233 | 234 | # self.multiHead_Attn = nn.MultiheadAttention(512, num_heads=4, dropout=0.2) 235 | 236 | num_classes = self.vocab.get_vocab_size(self._target_namespace) 237 | 238 | # Attention mechanism applied to the encoder output for each step. 239 | # TODO: attention 240 | if attention: 241 | if attention_function: 242 | raise ConfigurationError("You can only specify an attention module or an " 243 | "attention function, but not both.") 244 | self._attention = LegacyAttention() 245 | elif attention_function: 246 | self._attention = LegacyAttention(attention_function) 247 | else: 248 | self._attention = None 249 | print("No Attention!") 250 | exit() 251 | 252 | # Dense embedding of vocab words in the target space. 253 | target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim() 254 | self._target_embedder = Embedding(num_classes, target_embedding_dim) 255 | 256 | # Decoder output dim needs to be the same as the encoder output dim since we initialize the 257 | # hidden state of the decoder with the final hidden state of the encoder. 258 | self._encoder_output_dim = self._encoder.get_output_dim() 259 | self._decoder_output_dim = self._encoder_output_dim 260 | 261 | if self._attention: 262 | # If using attention, a weighted average over encoder outputs will be concatenated 263 | # to the previous target embedding to form the input to the decoder at each 264 | # time step. 265 | 266 | self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim 267 | 268 | else: 269 | # Otherwise, the input to the decoder is just the previous target embedding. 270 | self._decoder_input_dim = target_embedding_dim 271 | 272 | # We'll use an LSTM cell as the recurrent cell that produces a hidden state 273 | # for the decoder at each time step. 274 | self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) 275 | # self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim) 276 | # We project the hidden state from the decoder into the output vocabulary space 277 | # in order to get log probabilities of each target token, at each time step. 278 | self._output_projection_layer = Linear(self._decoder_output_dim, num_classes) 279 | 280 | # knowledge points 281 | self.point_ratio = knowledge_points_ratio 282 | if self.point_ratio != 0: 283 | self.points_norm = LayerNorm(__C.FLAT_OUT_SIZE) 284 | self.points_proj = nn.Linear(__C.FLAT_OUT_SIZE, 77) 285 | self.points_criterion = nn.BCELoss() 286 | 287 | def take_step(self, 288 | last_predictions: torch.Tensor, 289 | state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: 290 | """ 291 | Take a decoding step. This is called by the beam search class. 292 | 293 | Parameters 294 | ---------- 295 | last_predictions : ``torch.Tensor`` 296 | A tensor of shape ``(group_size,)``, which gives the indices of the predictions 297 | during the last time step. 298 | state : ``Dict[str, torch.Tensor]`` 299 | A dictionary of tensors that contain the current state information 300 | needed to predict the next step, which includes the encoder outputs, 301 | the source mask, and the decoder hidden state and context. Each of these 302 | tensors has shape ``(group_size, *)``, where ``*`` can be any other number 303 | of dimensions. 304 | 305 | Returns 306 | ------- 307 | Tuple[torch.Tensor, Dict[str, torch.Tensor]] 308 | A tuple of ``(log_probabilities, updated_state)``, where ``log_probabilities`` 309 | is a tensor of shape ``(group_size, num_classes)`` containing the predicted 310 | log probability of each class for the next step, for each item in the group, 311 | while ``updated_state`` is a dictionary of tensors containing the encoder outputs, 312 | source mask, and updated decoder hidden state and context. 313 | 314 | Notes 315 | ----- 316 | We treat the inputs as a batch, even though ``group_size`` is not necessarily 317 | equal to ``batch_size``, since the group may contain multiple states 318 | for each source sentence in the batch. 319 | """ 320 | # shape: (group_size, num_classes) 321 | output_projections, state = self._prepare_output_projections(last_predictions, state) 322 | 323 | # shape: (group_size, num_classes) 324 | class_log_probabilities = F.log_softmax(output_projections, dim=-1) 325 | 326 | return class_log_probabilities, state 327 | 328 | @overrides 329 | def forward(self, # type: ignore 330 | image, source_nums, choice_nums, label, type, 331 | source_tokens: Dict[str, torch.LongTensor], 332 | point_label=None, 333 | target_tokens: Dict[str, torch.LongTensor] = None, **kwargs) -> Dict[str, torch.Tensor]: 334 | # pylint: disable=arguments-differ 335 | """ 336 | Make foward pass with decoder logic for producing the entire target sequence. 337 | 338 | Parameters 339 | ---------- 340 | source_tokens : ``Dict[str, torch.LongTensor]`` 341 | The output of `TextField.as_array()` applied on the source `TextField`. This will be 342 | passed through a `TextFieldEmbedder` and then through an encoder. 343 | target_tokens : ``Dict[str, torch.LongTensor]``, optional (default = None) 344 | Output of `Textfield.as_array()` applied on target `TextField`. We assume that the 345 | target tokens are also represented as a `TextField`. 346 | 347 | Returns 348 | ------- 349 | Dict[str, torch.Tensor] 350 | """ 351 | bs = len(label) 352 | state = self._encode(source_tokens) 353 | 354 | with torch.no_grad(): 355 | img_feats = self.resnet(image) 356 | # (N, C, 14, 14) -> (N, 196, C) 357 | img_feats = img_feats.reshape(img_feats.shape[0], img_feats.shape[1], -1).transpose(1, 2) 358 | img_mask = make_mask(img_feats) 359 | #print(img_feats.size()) 360 | #img_feats = self.image_tfm(img_feats) 361 | img_feats = self.channel_transform(img_feats) 362 | 363 | lang_feats = state['encoder_outputs'] 364 | # mask the digital encoding question without embedding, i.e. source_tokens(already index to number) 365 | lang_mask = make_mask(source_tokens['tokens'].unsqueeze(2)) 366 | 367 | _, img_feats = self.mcan(lang_feats, img_feats, lang_mask, img_mask) 368 | 369 | # (N, 308, 512) 370 | # for attention, image first and then lang, using mask 371 | state['encoder_outputs'] = torch.cat([img_feats, lang_feats], 1) 372 | 373 | # decode 374 | state = self._init_decoder_state(state, lang_feats, img_feats, img_mask) 375 | output_dict = self._forward_loop(state, target_tokens) # recurrent decoding for LSTM 376 | 377 | # knowledge points 378 | if self.point_ratio != 0: 379 | concat_feature = state["concat_feature"] 380 | point_feat = self.points_norm(concat_feature) 381 | point_feat = self.points_proj(point_feat) 382 | point_pred = torch.sigmoid(point_feat) 383 | point_loss = self.points_criterion(point_pred, point_label) * self.point_ratio 384 | output_dict["point_pred"] = point_pred 385 | output_dict["point_loss"] = point_loss 386 | output_dict["loss"] += point_loss 387 | 388 | # TODO: if testing, beam search and evaluation 389 | if not self.training: 390 | state = self._init_decoder_state(state, lang_feats, img_feats, img_mask) # TODO 391 | predictions = self._forward_beam_search(state) 392 | output_dict.update(predictions) 393 | 394 | if target_tokens and self._bleu: 395 | # shape: (batch_size, beam_size, max_sequence_length) 396 | top_k_predictions = output_dict["predictions"] 397 | 398 | # execute the decode programs to calculate the accuracy 399 | # suc_knt, no_knt = 0, 0 400 | suc_knt, no_knt, = 0, 0 401 | 402 | selected_programs = [] 403 | for b in range(bs): 404 | hypo = None 405 | used_hypo = None 406 | choice = None 407 | for i in range(self._beam_search.beam_size): 408 | if choice is not None: 409 | break 410 | hypo = list(top_k_predictions[b][i]) 411 | if self._end_index in list(hypo): 412 | hypo = hypo[:hypo.index(self._end_index)] 413 | hypo = [self.vocab.get_token_from_index(idx.item()) for idx in hypo] 414 | # print(hypo) 415 | res = self._equ.excuate_equation(hypo, source_nums[b]) 416 | # print(res, choice_nums[b]) 417 | if res is not None and len(res) > 0: 418 | for j in range(4): 419 | if choice_nums[b][j] is not None and math.fabs(res[-1] - choice_nums[b][j]) < 0.001: 420 | choice = j 421 | used_hypo = hypo 422 | 423 | selected_programs.append([hypo]) 424 | 425 | if choice is None: 426 | no_knt += 1 427 | if choice == label[b]: 428 | suc_knt += 1 429 | 430 | if random.random() < 0.05: 431 | print('selected_programs', selected_programs) 432 | 433 | # calculate BLEU 434 | best_predictions = top_k_predictions[:, 0, :] 435 | self._bleu(best_predictions, target_tokens["tokens"]) 436 | self._acc(suc_knt / bs) 437 | self._no_result(no_knt / bs) 438 | 439 | return output_dict 440 | 441 | def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 442 | """ 443 | Finalize predictions. 444 | 445 | This method overrides ``Model.decode``, which gets called after ``Model.forward``, at test 446 | time, to finalize predictions. The logic for the decoder part of the encoder-decoder lives 447 | within the ``forward`` method. 448 | 449 | This method trims the output predictions to the first end symbol, replaces indices with 450 | corresponding tokens, and adds a field called ``predicted_tokens`` to the ``output_dict``. 451 | """ 452 | predicted_indices = output_dict["predictions"] 453 | if not isinstance(predicted_indices, numpy.ndarray): 454 | predicted_indices = predicted_indices.detach().cpu().numpy() 455 | all_predicted_tokens = [] 456 | for indices in predicted_indices: 457 | # Beam search gives us the top k results for each source sentence in the batch 458 | # but we just want the single best. 459 | if len(indices.shape) > 1: 460 | indices = indices[0] 461 | indices = list(indices) 462 | # Collect indices till the first end_symbol 463 | if self._end_index in indices: 464 | indices = indices[:indices.index(self._end_index)] 465 | predicted_tokens = [self.vocab.get_token_from_index(x, namespace=self._target_namespace) 466 | for x in indices] 467 | 468 | all_predicted_tokens.append(predicted_tokens) 469 | output_dict["predicted_tokens"] = all_predicted_tokens 470 | return output_dict 471 | 472 | def _encode(self, source_tokens: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 473 | # shape: (batch_size, max_input_sequence_length, encoder_input_dim) 474 | 475 | # embedded_input = self._source_embedder(source_tokens) 476 | # shape: (batch_size, max_input_sequence_length) 477 | source_mask = util.get_text_field_mask(source_tokens) 478 | # source mask are used in attention 479 | img_mask = torch.ones(source_mask.shape[0], 196).long().cuda() 480 | concat_mask = torch.cat([img_mask, source_mask], 1) 481 | # shape: 482 | 483 | encoder_outputs = self._encoder(source_tokens['tokens'], source_mask) 484 | 485 | return { 486 | "source_mask": source_mask, # source_mask, 487 | "concat_mask": concat_mask, 488 | "encoder_outputs": encoder_outputs, 489 | } 490 | 491 | def _init_decoder_state(self, state, lang_feats, img_feats, img_mask): 492 | 493 | batch_size = state["source_mask"].size(0) 494 | final_lang_feat = util.get_final_encoder_states( 495 | lang_feats, 496 | state["source_mask"], 497 | self._encoder.is_bidirectional()) 498 | img_feat = self.attflat_img(img_feats, img_mask) 499 | feat = torch.cat([final_lang_feat, img_feat], 1) 500 | feat = self.decode_transform(feat) 501 | state["concat_feature"] = feat 502 | 503 | state["decoder_hidden"] = feat 504 | # C0 shape: (batch_size, decoder_output_dim) 505 | state["decoder_context"] = torch.zeros(batch_size, self._decoder_output_dim).cuda() 506 | # state["decoder_context"] = state["encoder_outputs"].new_zeros(batch_size, self._decoder_output_dim) 507 | return state 508 | 509 | def _forward_loop(self, 510 | state: Dict[str, torch.Tensor], 511 | target_tokens: Dict[str, torch.LongTensor] = None) -> Dict[str, torch.Tensor]: 512 | """ 513 | Make forward pass during training or do greedy search during prediction. 514 | 515 | Notes 516 | ----- 517 | We really only use the predictions from the method to test that beam search 518 | with a beam size of 1 gives the same results. 519 | """ 520 | # shape: (batch_size, max_input_sequence_length) 521 | source_mask = state["source_mask"] 522 | 523 | batch_size = source_mask.size()[0] 524 | 525 | if target_tokens: 526 | # shape: (batch_size, max_target_sequence_length) 527 | targets = target_tokens["tokens"] 528 | 529 | _, target_sequence_length = targets.size() 530 | 531 | # The last input from the target is either padding or the end symbol. 532 | # Either way, we don't have to process it. 533 | num_decoding_steps = target_sequence_length - 1 534 | else: 535 | num_decoding_steps = self._max_decoding_steps 536 | 537 | # Initialize target predictions with the start index. 538 | # shape: (batch_size,) 539 | last_predictions = source_mask.new_full((batch_size,), fill_value=self._start_index) 540 | 541 | step_logits: List[torch.Tensor] = [] 542 | step_predictions: List[torch.Tensor] = [] 543 | for timestep in range(num_decoding_steps): 544 | if self.training and torch.rand(1).item() < self._scheduled_sampling_ratio: 545 | # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio 546 | # during training. 547 | # shape: (batch_size,) 548 | input_choices = last_predictions 549 | elif not target_tokens: 550 | # shape: (batch_size,) 551 | input_choices = last_predictions 552 | else: 553 | # shape: (batch_size,) 554 | input_choices = targets[:, timestep] 555 | 556 | # shape: (batch_size, num_classes) 557 | # recurrent decoding 558 | output_projections, state = self._prepare_output_projections(input_choices, state) 559 | 560 | # list of tensors, shape: (batch_size, 1, num_classes) 561 | step_logits.append(output_projections.unsqueeze(1)) 562 | 563 | # shape: (batch_size, num_classes) 564 | class_probabilities = F.softmax(output_projections, dim=-1) 565 | 566 | # shape (predicted_classes): (batch_size,) 567 | _, predicted_classes = torch.max(class_probabilities, 1) 568 | 569 | # shape (predicted_classes): (batch_size,) 570 | last_predictions = predicted_classes 571 | 572 | step_predictions.append(last_predictions.unsqueeze(1)) 573 | 574 | # shape: (batch_size, num_decoding_steps) 575 | predictions = torch.cat(step_predictions, 1) 576 | 577 | output_dict = {"predictions": predictions} 578 | 579 | if target_tokens: 580 | # shape: (batch_size, num_decoding_steps, num_classes) 581 | logits = torch.cat(step_logits, 1) 582 | 583 | # Compute loss. 584 | target_mask = util.get_text_field_mask(target_tokens) 585 | loss = self._get_loss(logits, targets, target_mask) 586 | output_dict["loss"] = loss 587 | 588 | return output_dict 589 | 590 | def _forward_beam_search(self, state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 591 | """Make forward pass during prediction using a beam search.""" 592 | batch_size = state["source_mask"].size()[0] 593 | start_predictions = state["source_mask"].new_full((batch_size,), fill_value=self._start_index) 594 | 595 | # shape (all_top_k_predictions): (batch_size, beam_size, num_decoding_steps) 596 | # shape (log_probabilities): (batch_size, beam_size) 597 | all_top_k_predictions, log_probabilities = self._beam_search.search( 598 | start_predictions, state, self.take_step) 599 | 600 | output_dict = { 601 | "class_log_probabilities": log_probabilities, 602 | "predictions": all_top_k_predictions, 603 | } 604 | return output_dict 605 | 606 | def _prepare_output_projections(self, 607 | last_predictions: torch.Tensor, 608 | state: Dict[str, torch.Tensor]) -> Tuple[ 609 | torch.Tensor, Dict[str, torch.Tensor]]: # pylint: disable=line-too-long 610 | """ 611 | Decode current state and last prediction to produce produce projections 612 | into the target space, which can then be used to get probabilities of 613 | each target token for the next step. 614 | Inputs are the same as for `take_step()`. 615 | """ 616 | # shape: (group_size, max_input_sequence_length, encoder_output_dim) 617 | encoder_outputs = state["encoder_outputs"] 618 | 619 | # shape: (group_size, max_input_sequence_length) 620 | # source_mask = state["source_mask"] 621 | source_mask = state["concat_mask"] 622 | 623 | # decoder_hidden and decoder_context are get from encoder_outputs in _init_decoder_state() 624 | # shape: (group_size, decoder_output_dim) 625 | decoder_hidden = state["decoder_hidden"] 626 | # shape: (group_size, decoder_output_dim) 627 | decoder_context = state["decoder_context"] 628 | 629 | # shape: (group_size, target_embedding_dim) 630 | embedded_input = self._target_embedder(last_predictions) 631 | 632 | if self._attention: 633 | # shape: (group_size, encoder_output_dim) 634 | attended_input = self._prepare_attended_input(decoder_hidden, encoder_outputs, source_mask) 635 | 636 | # shape: (group_size, decoder_output_dim + target_embedding_dim) 637 | decoder_input = torch.cat((attended_input, embedded_input), -1) 638 | 639 | else: 640 | # shape: (group_size, target_embedding_dim) 641 | decoder_input = embedded_input 642 | 643 | # shape (decoder_hidden): (batch_size, decoder_output_dim) 644 | # shape (decoder_context): (batch_size, decoder_output_dim) 645 | 646 | decoder_hidden, decoder_context = self._decoder_cell( 647 | decoder_input, 648 | (decoder_hidden, decoder_context)) 649 | 650 | state["decoder_hidden"] = decoder_hidden 651 | state["decoder_context"] = decoder_context 652 | 653 | # shape: (group_size, num_classes) 654 | output_projections = self._output_projection_layer(decoder_hidden) 655 | """ 656 | decoder_hidden = self._decoder_cell( 657 | decoder_input, 658 | (decoder_hidden)) 659 | 660 | state["decoder_hidden"] = decoder_hidden 661 | state["decoder_context"] = decoder_hidden 662 | 663 | # shape: (group_size, num_classes) 664 | output_projections = self._output_projection_layer(decoder_hidden) 665 | """ 666 | return output_projections, state 667 | 668 | def _prepare_attended_input(self, 669 | decoder_hidden_state: torch.LongTensor = None, 670 | encoder_outputs: torch.LongTensor = None, 671 | encoder_outputs_mask: torch.LongTensor = None) -> torch.Tensor: 672 | """Apply attention over encoder outputs and decoder state.""" 673 | # Ensure mask is also a FloatTensor. Or else the multiplication within 674 | # attention will complain. 675 | # shape: (batch_size, max_input_sequence_length) 676 | encoder_outputs_mask = encoder_outputs_mask.float() 677 | 678 | # shape: (batch_size, max_input_sequence_length) 679 | input_weights = self._attention( 680 | decoder_hidden_state, encoder_outputs, encoder_outputs_mask) 681 | 682 | # shape: (batch_size, encoder_output_dim) 683 | attended_input = util.weighted_sum(encoder_outputs, input_weights) 684 | 685 | return attended_input 686 | 687 | def multi_label_evaluation(self, input, target): 688 | one = torch.ones(target.shape).cuda() 689 | zero = torch.zeros(target.shape).cuda() 690 | res = torch.where(input > 0.5, one, zero) 691 | 692 | over = (res * target).sum(dim=1) 693 | union = res.sum(dim=1) + target.sum(dim=1) - over 694 | acc = over / union 695 | 696 | index = torch.isnan(acc) # nan appear when both pred and target are zeros, which means makes right answer 697 | acc_fix = torch.where(index, torch.ones(acc.shape).cuda(), acc) 698 | 699 | acc_sum = acc_fix.sum().item() 700 | 701 | return acc_sum 702 | 703 | @staticmethod 704 | def _get_loss(logits: torch.LongTensor, 705 | targets: torch.LongTensor, 706 | target_mask: torch.LongTensor) -> torch.Tensor: 707 | """ 708 | Compute loss. 709 | 710 | Takes logits (unnormalized outputs from the decoder) of size (batch_size, 711 | num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1) 712 | and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross 713 | entropy loss while taking the mask into account. 714 | 715 | The length of ``targets`` is expected to be greater than that of ``logits`` because the 716 | decoder does not need to compute the output corresponding to the last timestep of 717 | ``targets``. This method aligns the inputs appropriately to compute the loss. 718 | 719 | During training, we want the logit corresponding to timestep i to be similar to the target 720 | token from timestep i + 1. That is, the targets should be shifted by one timestep for 721 | appropriate comparison. Consider a single example where the target has 3 words, and 722 | padding is to 7 tokens. 723 | The complete sequence would correspond to w1 w2 w3

724 | and the mask would be 1 1 1 1 1 0 0 725 | and let the logits be l1 l2 l3 l4 l5 l6 726 | We actually need to compare: 727 | the sequence w1 w2 w3

728 | with masks 1 1 1 1 0 0 729 | against l1 l2 l3 l4 l5 l6 730 | (where the input was) w1 w2 w3

731 | """ 732 | # shape: (batch_size, num_decoding_steps) 733 | relevant_targets = targets[:, 1:].contiguous() 734 | 735 | # shape: (batch_size, num_decoding_steps) 736 | relevant_mask = target_mask[:, 1:].contiguous() 737 | 738 | return util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask) 739 | 740 | @overrides 741 | def get_metrics(self, reset: bool = False) -> Dict[str, float]: 742 | all_metrics: Dict[str, float] = {} 743 | if self._bleu and not self.training: 744 | all_metrics.update(self._bleu.get_metric(reset=reset)) 745 | all_metrics.update({'acc': self._acc.get_metric(reset=reset)}) 746 | all_metrics.update({'no_result': self._no_result.get_metric(reset=reset)}) 747 | 748 | return all_metrics 749 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "s2s_manual_reader", 4 | "tokenizer": { 5 | "word_splitter":{ 6 | "type": "just_spaces" 7 | } 8 | }, 9 | "source_token_indexer": { 10 | "tokens": { 11 | "type": "pretrained_transformer", 12 | "model_name": "data/pretrain/Roberta", 13 | "do_lowercase": false 14 | } 15 | }, 16 | "target_token_indexer": { 17 | "tokens": { 18 | "type": "single_id" 19 | } 20 | } 21 | }, 22 | 23 | "train_data_path": "data/GeoQA2.2/train.pk", 24 | "validation_data_path": "data/GeoQA2.2/dev.pk", 25 | "test_data_path" : "data/GeoQA2.2/test.pk", 26 | "model": { 27 | "type": "geo_s2s", 28 | "max_decoding_steps": 16, 29 | "beam_size": 10, 30 | 31 | "target_embedding_dim": 512, 32 | "scheduled_sampling_ratio": 0, 33 | "resnet_pretrained": "data/pretrain/best_jigsaw_model_state_dict", 34 | "knowledge_points_ratio": 0, 35 | "source_embedder": { 36 | "token_embedders": { 37 | 38 | } 39 | }, 40 | "encoder": { 41 | "input_dim": 21128, 42 | "emb_dim": 768, 43 | "hid_dim": 512, 44 | "dropout": 0.5 45 | } 46 | }, 47 | "iterator": { 48 | "type": "basic", 49 | "batch_size": 32 50 | }, 51 | "trainer": { 52 | "validation_metric": "+acc", 53 | "learning_rate_scheduler": { 54 | "type": "reduce_on_plateau", 55 | "factor": 0.5, 56 | "mode": "max", 57 | "patience": 5 58 | }, 59 | "num_epochs": 100, 60 | "grad_norm": 10.0, 61 | "cuda_device": 0, 62 | 63 | "optimizer": { 64 | "type": "adam", 65 | "lr": 1e-3, 66 | "parameter_groups": [ 67 | [["mcan", "channel_transform", "attflat_img", "attflat_lang", "decode_transform"], {"lr": 1e-5}], 68 | [["resnet"], {"lr": 1e-5}], 69 | [["source_embedder","encoder.embedding"],{"lr": 2e-5}], 70 | [[ "encoder.concat_trans", "encoder.lstm_embedding","encoder.trans", "encoder.norm", "encoder.concat_norm"],{"lr": 1e-3}] 71 | ] 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/txt: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | , 3 | N_0 4 | A 5 | B 6 | C 7 | @start@ 8 | @end@ 9 | 的 10 | D 11 | N_1 12 | = 13 | ∠ 14 | V_0 15 | g_minus 16 | O 17 | ( 18 | ) 19 | 如 20 | 图 21 | 点 22 | 为 23 | 则 24 | E 25 | ° 26 | 是 27 | g_half 28 | N_2 29 | V_1 30 | ⊙ 31 | 于 32 | C_3 33 | g_double 34 | 在 35 | 、 36 | 度 37 | 中 38 | 线 39 | 长 40 | 上 41 | 直 42 | △ 43 | g_add 44 | 若 45 | C_2 46 | F 47 | 数 48 | P 49 | g_divide 50 | 一 51 | 径 52 | 边 53 | 形 54 | 分 55 | 交 56 | m 57 | 角 58 | 圆 59 | g_bili 60 | c 61 | g_mul 62 | 面 63 | 平 64 | ∥ 65 | 与 66 | 等 67 | V_2 68 | 接 69 | 知 70 | 已 71 | 切 72 | 别 73 | } 74 | { 75 | . 76 | gougu_minus 77 | 半 78 | 小 79 | 四 80 | 且 81 | 两 82 | 相 83 | 弦 84 | ⊥ 85 | M 86 | gougu_add 87 | 个 88 | 三 89 | 高 90 | a 91 | 1 92 | 连 93 | N_3 94 | 周 95 | g_equal 96 | 米 97 | 积 98 | 所 99 | 大 100 | 2 101 | 内 102 | 距 103 | 到 104 | 行 105 | 那 106 | 么 107 | 测 108 | C_4 109 | 延 110 | N 111 | 过 112 | 方 113 | g_sin 114 | 离 115 | 时 116 | 值 117 | 对 118 | G 119 | 和 120 | 示 121 | 得 122 | 心 123 | 地 124 | 顶 125 | t 126 | 段 127 | 垂 128 | 处 129 | \ 130 | ~ 131 | 作 132 | 影 133 | ′ 134 | 条 135 | n 136 | l 137 | 果 138 | 外 139 | 向 140 | ▱ 141 | : 142 | 量 143 | 树 144 | 坡 145 | 正 146 | r 147 | 以 148 | 将 149 | 动 150 | R 151 | b 152 | f 153 | 部 154 | 水 155 | 杆 156 | 锥 157 | 弧 158 | 这 159 | 板 160 | s 161 | ⁀ 162 | + 163 | 底 164 | g_tan 165 | N_4 166 | 沿 167 | 子 168 | 斜 169 | 使 170 | 放 171 | 最 172 | cal_circle_area 173 | 同 174 | 纸 175 | cal_cone 176 | 有 177 | 足 178 | 端 179 | 用 180 | Q 181 | 从 182 | 位 183 | H 184 | 成 185 | i 186 | 了 187 | 下 188 | 侧 189 | 不 190 | 阴 191 | 菱 192 | 后 193 | S 194 | 学 195 | 尺 196 | _ 197 | 扇 198 | 其 199 | 宽 200 | 把 201 | 转 202 | 某 203 | 都 204 | 3 205 | 置 206 | 重 207 | 好 208 | 梯 209 | 旗 210 | 间 211 | 折 212 | 楼 213 | 合 214 | 当 215 | g_cos 216 | 出 217 | 它 218 | 路 219 | 落 220 | 经 221 | 偏 222 | 旋 223 | 射 224 | 并 225 | 之 226 | N_5 227 | 明 228 | C_1 229 | 块 230 | 要 231 | 矩 232 | 东 233 | 北 234 | 恰 235 | 标 236 | 灯 237 | 结 238 | 他 239 | 此 240 | 河 241 | 竹 242 | 移 243 | o 244 | 意 245 | 绕 246 | 叠 247 | α 248 | 竿 249 | . 250 | 比 251 | 截 252 | 计 253 | 海 254 | 片 255 | 墙 256 | 光 257 | 发 258 | 该 259 | 针 260 | 船 261 | 应 262 | 含 263 | 达 264 | 球 265 | 腰 266 | 着 267 | 棵 268 | 电 269 | 看 270 | 走 271 | 母 272 | 根 273 | 塔 274 | 轴 275 | 夹 276 | 任 277 | 定 278 | 校 279 | 开 280 | 似 281 | 器 282 | 帽 283 | 自 284 | N_6 285 | 视 286 | 镜 287 | 桌 288 | 木 289 | 里 290 | 公 291 | 均 292 | 组 293 | 称 294 | 现 295 | 再 296 | 至 297 | 航 298 | 体 299 | 4 300 | 西 301 | 设 302 | ∽ 303 | 身 304 | 山 305 | / 306 | 表 307 | 前 308 | 断 309 | 横 310 | 取 311 | 臂 312 | C_5 313 | 按 314 | 由 315 | 油 316 | 少 317 | 柱 318 | 岛 319 | 想 320 | 顺 321 | 工 322 | 第 323 | 可 324 | 画 325 | 坝 326 | 铁 327 | 邻 328 | 们 329 | 道 330 | 制 331 | 次 332 | 做 333 | C_0 334 | 管 335 | 她 336 | 短 337 | 张 338 | 需 339 | ^ 340 | 口 341 | 求 342 | 能 343 | 被 344 | 伞 345 | 池 346 | 轮 347 | 缝 348 | 围 349 | 阳 350 | 物 351 | 岸 352 | 关 353 | 仰 354 | 速 355 | 桥 356 | 升 357 | 刻 358 | 机 359 | 车 360 | 人 361 | 目 362 | 种 363 | N_7 364 | I 365 | 运 366 | 来 367 | 优 368 | 另 369 | 观 370 | 网 371 | 建 372 | ≈ 373 | 塘 374 | 摆 375 | 艘 376 | 南 377 | 劣 378 | 台 379 | 坐 380 | 起 381 | 进 382 | 互 383 | 站 384 | 约 385 | 去 386 | 钢 387 | 天 388 | N_8 389 | - 390 | 飞 391 | 俯 392 | 厘 393 | 逆 394 | 选 395 | ; 396 | 具 397 | 入 398 | 反 399 | 园 400 | 脚 401 | 右 402 | 村 403 | 0 404 | 架 405 | 五 406 | 展 407 | 忽 408 | 略 409 | 何 410 | 又 411 | 原 412 | 扶 413 | 古 414 | 多 415 | 生 416 | ' 417 | ≌ 418 | 绳 419 | 空 420 | 据 421 | 甲 422 | h 423 | 排 424 | 己 425 | 共 426 | 家 427 | 环 428 | 副 429 | 活 430 | 打 431 | 城 432 | 照 433 | 无 434 | 场 435 | " 436 | 乙 437 | 利 438 | 保 439 | 头 440 | 系 441 | x 442 | 跷 443 | L 444 | 算 445 | d 446 | 驶 447 | 立 448 | 然 449 | 区 450 | 力 451 | 各 452 | 剪 453 | 刚 454 | 二 455 | 全 456 | cal_circle_perimeter 457 | 皮 458 | 撑 459 | 准 460 | 击 461 | 支 462 | 渔 463 | 课 464 | 几 465 | 索 466 | 热 467 | 气 468 | 栋 469 | 锐 470 | 式 471 | 兴 472 | 修 473 | 满 474 | 近 475 | 固 476 | 桶 477 | 华 478 | T 479 | 座 480 | ? 481 | $ 482 | 爬 483 | 通 484 | 而 485 | 市 486 | ” 487 | 遮 488 | 烟 489 | 囱 490 | 样 491 | 备 492 | θ 493 | 余 494 | 左 495 | 景 496 | 也 497 | 筑 498 | y 499 | 滑 500 | 每 501 | 货 502 | 株 503 | 堤 504 | 只 505 | 找 506 | 带 507 | 趣 508 | 毯 509 | “ 510 | 补 511 | 深 512 | 料 513 | 考 514 | 状 515 | 加 516 | 投 517 | 探 518 | 察 519 | N_9 520 | 倾 521 | 化 522 | 先 523 | 程 524 | 靠 525 | 门 526 | 我 527 | 布 528 | 持 529 | 法 530 | 窗 531 | 石 532 | 引 533 | K 534 | 马 535 | 单 536 | 秒 537 | 模 538 | 手 539 | 异 540 | 铺 541 | 些 542 | 钟 543 | 风 544 | 亮 545 | 凉 546 | 亭 547 | 阶 548 | 玻 549 | 璃 550 | 房 551 | 拉 552 | 爸 553 | • 554 | 隧 555 | 午 556 | 送 557 | 商 558 | 迎 559 | 绿 560 | 库 561 | 栏 562 | 降 563 | 请 564 | 你 565 | 规 566 | 框 567 | 花 568 | 拐 569 | □ 570 | 就 571 | 装 572 | 型 573 | 卷 574 | 年 575 | 竖 576 | 筒 577 | 调 578 | 颖 579 | 源 580 | 太 581 | 即 582 | 棒 583 | 王 584 | 踏 585 | 泡 586 | 读 587 | 传 588 | 棱 589 | 痕 590 | ≠ 591 | 帮 592 | β 593 | 记 594 | 草 595 | 弯 596 | 强 597 | 倒 598 | 斗 599 | 整 600 | 估 601 | 者 602 | 零 603 | 件 604 | 孔 605 | 捣 606 | 住 607 | 格 608 | 教 609 | 回 610 | 师 611 | 基 612 | 号 613 | 牧 614 | 蚂 615 | 蚁 616 | 主 617 | 细 618 | 虚 619 | 丝 620 | 变 621 | 题 622 | 实 623 | 名 624 | 游 625 | 问 626 | 会 627 | 参 628 | 冰 629 | 淇 630 | 淋 631 | 域 632 | 试 633 | 拱 634 | 较 635 | 翻 636 | ② 637 | 玲 638 | 缺 639 | 舰 640 | 登 641 | ⌒ 642 | 辆 643 | 植 644 | 隔 645 | 指 646 | 监 647 | 显 648 | Ð 649 | 童 650 | 白 651 | 拼 652 | 很 653 | 习 654 | 信 655 | 超 656 | 缘 657 | 完 658 | 盖 659 | 漏 660 | 杯 661 | 新 662 | 节 663 | 日 664 | 综 665 | 眼 666 | 睛 667 | 退 668 | 旁 669 | 案 670 | 军 671 | 划 672 | 步 673 | N_11 674 | 壁 675 | 字 676 | 残 677 | 继 678 | 续 679 | 傅 680 | ʹ 681 | 拦 682 | 挥 683 | 街 684 | 港 685 | g_asin 686 | ’ 687 | 杠 688 | 丄 689 | 但 690 | 够 691 | 始 692 | 践 693 | 双 694 | 覆 695 | 览 696 | 低 697 | 乘 698 | e 699 | 李 700 | 损 701 | 致 702 | 构 703 | 伸 704 | 缩 705 | 插 706 | 芳 707 | 拍 708 | 远 709 | 留 710 | 束 711 | 锯 712 | 千 713 | 差 714 | ① 715 | 梁 716 | 美 717 | 盆 718 | 质 719 | 民 720 | 陆 721 | 改 722 | 见 723 | 确 724 | 垫 725 | 盘 726 | g 727 | 雨 728 | 桩 729 | 险 730 | 林 731 | 庄 732 | 盒 733 | + 734 | 压 735 | ㎝ 736 | 篱 737 | 笆 738 | 总 739 | 翼 740 | 国 741 | 除 742 | 污 743 | 腿 744 | 叉 745 | 洋 746 | 丑 747 | 扫 748 | 晚 749 | 虑 750 | 彩 751 | 圣 752 | 诞 753 | 呈 754 | 梢 755 | 像 756 | 办 757 | 律 758 | 老 759 | 碎 760 | 份 761 | 槟 762 | 榔 763 | 望 764 | 挡 765 | 广 766 | 牌 767 | 厚 768 | 跳 769 | 假 770 | 星 771 | 决 772 | 室 773 | 槽 774 | 屋 775 | 菜 776 | N_10 777 | ③ 778 | 钝 779 | 给 780 | 扎 781 | 精 782 | 玩 783 | 坪 784 | 护 785 | 巡 786 | 政 787 | 府 788 | 胜 789 | 抢 790 | 餐 791 | 滚 792 | 厅 793 | 黑 794 | 终 795 | 9 796 | 户 797 | 控 798 | 5 799 | 粗 800 | 露 801 | 快 802 | 文 803 | 湖 804 | 买 805 | 闸 806 | 著 807 | 匀 808 | 红 809 | 州 810 | 操 811 | 凡 812 | 例 813 | 螺 814 | 态 815 | 农 816 | 虎 817 | 告 818 | 弹 819 | 营 820 | ○ 821 | 割 822 | 本 823 | 情 824 | 秋 825 | 伯 826 | 羊 827 | 因 828 | 联 829 | 列 830 | 吴 831 | 圈 832 | 义 833 | … 834 | 六 835 | 没 836 | 凿 837 | 蛋 838 | 8 839 | 月 840 | 铅 841 | 增 842 | 常 843 | 适 844 | 庆 845 | 金 846 | 止 847 | 刮 848 | 刷 849 | 背 850 | 员 851 | 厦 852 | 队 853 | 简 854 | 店 855 | 踩 856 | 档 857 | 必 858 | 须 859 | 证 860 | 珠 861 | 吸 862 | 还 863 | 柄 864 | 客 865 | 楔 866 | 七 867 | 巧 868 | 骨 869 | 答 870 | 喷 871 | 荆 872 | 赛 873 | 镭 874 | 聪 875 | 箱 876 | 曲 877 | 隙 878 |  879 | 息 880 | 收 881 | < 882 | 输 883 | € 884 | 裁 885 | 兰 886 | 十 887 | 艺 888 | 术 889 | 耗 890 | 复 891 | 戏 892 | 丽 893 | 薄 894 | 材 895 | 瓷 896 | 壶 897 | 或 898 | 钉 899 | 抽 900 | 采 901 | 攀 902 | 九 903 | 级 904 | 班 905 | 忙 906 | 伟 907 | 沾 908 | 碰 909 | 盏 910 | 夏 911 | 矮 912 | g_acos 913 | 伐 914 | 跨 915 | 孩 916 | 象 917 | 限 918 | 养 919 | 拴 920 | 吃 921 | 6 922 | 骤 923 | 7 924 | 箭 925 | 院 926 | 范 927 | ☉ 928 | 墨 929 | 央 930 | 委 931 | 舒 932 | 便 933 | 越 934 | C_6 935 | > 936 | 別 937 | 究 938 | 解 939 | > 940 | 随 941 | 停 942 | 汽 943 | 境 944 | 栓 945 | 兔 946 | 套 947 | 包 948 | 居 949 | 潜 950 | 艇 951 | 失 952 | 搜 953 | 笔 954 | 礁 955 | 触 956 | V_N_0 957 | 泳 958 | 幢 959 | 鹅 960 | 岭 961 | 瞰 962 | 末 963 | 寻 964 | 疑 965 | 震 966 | 派 967 | 悬 968 | 崖 969 | 易 970 | 说 971 | 助 972 | 注 973 | 往 974 | 宾 975 | 馆 976 | 撞 977 | 袋 978 | 书 979 | 属 980 | 克 981 | 造 982 | 灰 983 | 栽 984 | 占 985 | N_21 986 | 绍 987 | 乡 988 | p 989 | 牵 990 | 饮 991 | 赶 992 | 蔬 993 | 刘 994 | 筝 995 | 雕 996 | 塑 997 | 撬 998 | 翘 999 | 依 1000 | 施 1001 | 挖 1002 | 爷 1003 | 轩 1004 | 凯 1005 | 冒 1006 | 拢 1007 | 帐 1008 | 篷 1009 | 择 1010 | 蜡 1011 | 烛 1012 | 毛 1013 | 丹 1014 | 迪 1015 | 男 1016 | 安 1017 | X 1018 | Y 1019 | 仪 1020 | 裂 1021 | 迭 1022 | ɑ 1023 | 颗 1024 | 柏 1025 | 代 1026 | 赵 1027 | 爽 1028 | 乐 1029 | 剩 1030 | w 1031 | 罐 1032 | 演 1033 | 届 1034 | 硬 1035 | 尖 1036 | 浸 1037 | 焦 1038 | 摄 1039 | 育 1040 | 突 1041 | 事 1042 | 令 1043 | 靶 1044 | 训 1045 | 练 1046 | 枪 1047 | 瞄 1048 | 轻 1049 | 微 1050 | 抖 1051 | 拿 1052 | 《 1053 | 科 1054 | 》 1055 | 散 1056 | 负 1057 | 荡 1058 | 链 1059 | 让 1060 | 真 1061 | ◎ 1062 | 鸡 1063 | 辟 1064 | 绣 1065 | 遵 1066 | 哪 1067 | 糕 1068 | 衡 1069 | 掉 1070 | 倍 1071 | 隆 1072 | 举 1073 | 阅 1074 | 兵 1075 | 崭 1076 | 貌 1077 | 激 1078 | 坚 1079 | 念 1080 | 欲 1081 | 既 1082 | 牢 1083 | 匠 1084 | N_12 1085 | 享 1086 | 验 1087 | 觉 1088 | 骑 1089 | 黄 1090 | 唯 1091 | 宣 1092 | 世 1093 | 博 1094 | 研 1095 | ≥ 1096 | z 1097 | 写 1098 | 绝 1099 | 括 1100 | 京 1101 | 奥 1102 | 声 1103 | 纳 1104 | 静 1105 | 提 1106 | 防 1107 | 洪 1108 | 业 1109 | 印 1110 | 救 1111 | 杭 1112 | 郊 1113 | 暗 1114 | 跟 1115 | 踪 1116 | 鱼 1117 | 群 1118 | 危 1119 | 惠 1120 | 雅 1121 | k 1122 | 故 1123 | 霍 1124 | 邱 1125 | 县 1126 | 纵 1127 | 浅 1128 | 早 1129 | 私 1130 | 礼 1131 | 旅 1132 | 欣 1133 | 赏 1134 | 色 1135 | 嘉 1136 | 毗 1137 | 社 1138 | 漂 1139 | 浮 1140 | 幕 1141 | 朝 1142 | 汶 1143 | 川 1144 | 砍 1145 | 敏 1146 | 供 1147 | 酒 1148 | 绑 1149 | 厂 1150 | 衣 1151 | J 1152 | 破 1153 | V 1154 | 镶 1155 | 嵌 1156 | 坏 1157 | 椅 1158 | 识 1159 | 挂 1160 | 狭 1161 | 吮 1162 | 烧 1163 | 豆 1164 | 浆 1165 | 巾 1166 | 飓 1167 | 灾 1168 | 害 1169 | 干 1170 | - 1171 | 圳 1172 | 巨 1173 | 济 1174 | 垃 1175 | 圾 1176 | 贤 1177 | 則 1178 | 辺 1179 | 及 1180 | 局 1181 | 际 1182 | 清 1183 | 顾 1184 | 食 1185 | 棚 1186 | 剖 1187 | 绸 1188 | 衔 1189 | 况 1190 | 温 1191 | 季 1192 | 凳 1193 | 阻 1194 | 吗 1195 | 叫 1196 | 翔 1197 | 迹 1198 | 枳 1199 | 牛 1200 | u 1201 | q 1202 | 雪 1203 | 春 1204 | 荒 1205 | 井 1206 | 扬 1207 | 灌 1208 | 汇 1209 | γ 1210 | 品 1211 | 查 1212 | 理 1213 | 键 1214 | 涨 1215 | 妙 1216 | 莹 1217 | 羽 1218 | 脱 1219 | 战 1220 | 曾 1221 | 眩 1222 | 勇 1223 | 夺 1224 | 冠 1225 | 檐 1226 | 卡 1227 | 坯 1228 | ⊿ 1229 | 错 1230 | ∘ 1231 | 购 1232 | 厢 1233 | 搭 1234 | 辅 1235 | 勾 1236 | 股 1237 | 善 1238 | 性 1239 | C_7 1240 | C_8 1241 | g_sqrt 1242 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_CKPT/NGS_Aux_CKPT/vocabulary/txt: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_CKPT/txt: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /GeoQA+/NGS_Aux_test.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple 2 | 3 | import numpy 4 | from overrides import overrides 5 | import torch 6 | import torch.nn.functional as F 7 | import torch.nn as nn 8 | from torch.nn.modules.linear import Linear 9 | from torch.nn.modules.rnn import LSTMCell 10 | from torch.nn.modules.rnn import GRUCell 11 | from allennlp.common.checks import ConfigurationError 12 | from allennlp.common.util import START_SYMBOL, END_SYMBOL 13 | from allennlp.data.vocabulary import Vocabulary 14 | from allennlp.modules.attention import LegacyAttention 15 | from allennlp.modules import Attention, TextFieldEmbedder, Seq2SeqEncoder 16 | from allennlp.modules.similarity_functions import SimilarityFunction 17 | from allennlp.models.model import Model 18 | from allennlp.modules.token_embedders import Embedding 19 | from allennlp.nn import util 20 | from allennlp.nn.beam_search import BeamSearch 21 | from allennlp.training.metrics import BLEU 22 | 23 | from ManualProgram.eval_equ import Equations 24 | from transformers import AutoModel, AutoTokenizer 25 | 26 | import random 27 | import warnings 28 | import math 29 | warnings.filterwarnings("ignore") 30 | torch.cuda.set_device(0) 31 | 32 | no_result_id=[] 33 | right_id=[] 34 | wrong_manual=[] 35 | noresult_manual=[] 36 | from utils import * 37 | 38 | from mcan import * 39 | import json 40 | model_name = "data/pretrain/Roberta" 41 | @Model.register("MyEncoder") 42 | class Encoder(Model): 43 | def __init__(self, 44 | vocab: Vocabulary, 45 | input_dim: int, 46 | emb_dim: int, 47 | hid_dim: int, 48 | dropout: int): 49 | super(Encoder, self).__init__(vocab) 50 | self.input_dim = input_dim 51 | self.emb_dim = emb_dim 52 | self.hid_dim = hid_dim 53 | self.embedding = AutoModel.from_pretrained(model_name) 54 | self.trans = nn.Linear(emb_dim, hid_dim) 55 | self.norm = nn.LayerNorm(hid_dim) 56 | self.dropout = nn.Dropout(dropout) 57 | self.lstm_embedding = nn.Embedding(22128, hid_dim, padding_idx=0) 58 | self.lstm_dropout = nn.Dropout(0.5) 59 | self.lstm = torch.nn.LSTM(hid_dim, hid_dim, batch_first=True, bidirectional=True, num_layers=2, dropout=0.5) 60 | self.concat_trans = nn.Linear(hid_dim, hid_dim) 61 | self.concat_norm = nn.LayerNorm(hid_dim) 62 | self._encoder_output_dim = 512 63 | self._decoder_output_dim = 512 64 | 65 | @overrides 66 | def forward(self, src, source_mask): 67 | 68 | embedded = self.embedding(src, attention_mask=source_mask, return_dict=True, output_hidden_states=True) 69 | bert_output = embedded.last_hidden_state 70 | output = self.dropout(self.norm(torch.relu(self.trans(bert_output)))) 71 | lstm_embedding = self.lstm_dropout(self.lstm_embedding(src)) 72 | input_length = torch.sum(source_mask, dim=1).long().view(-1,).cpu() 73 | packed = nn.utils.rnn.pack_padded_sequence(lstm_embedding, input_length, batch_first=True, enforce_sorted=False) 74 | lstm_output, _ = self.lstm(packed) 75 | lstm_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output, batch_first=True) 76 | lstm_output = lstm_output[:, :, :self.hid_dim] + lstm_output[:, :, self.hid_dim:] 77 | output = output + lstm_output 78 | # output = torch.cat((output,lstm_output), dim=-1) 79 | output = self.concat_norm(torch.relu(self.concat_trans(output))) 80 | return output 81 | 82 | def get_output_dim(self): 83 | return self._encoder_output_dim 84 | 85 | def is_bidirectional(self) -> bool: 86 | return True 87 | 88 | @Model.register("geo_s2s") 89 | class SimpleSeq2Seq(Model): 90 | """ 91 | This ``SimpleSeq2Seq`` class is a :class:`Model` which takes a sequence, encodes it, and then 92 | uses the encoded representations to decode another sequence. You can use this as the basis for 93 | a neural machine translation system, an abstractive summarization system, or any other common 94 | seq2seq problem. The model here is simple, but should be a decent starting place for 95 | implementing recent models for these tasks. 96 | 97 | Parameters 98 | ---------- 99 | vocab : ``Vocabulary``, required 100 | Vocabulary containing source and target vocabularies. They may be under the same namespace 101 | (`tokens`) or the target tokens can have a different namespace, in which case it needs to 102 | be specified as `target_namespace`. 103 | source_embedder : ``TextFieldEmbedder``, required 104 | Embedder for source side sequences 105 | encoder : ``Seq2SeqEncoder``, required 106 | The encoder of the "encoder/decoder" model 107 | max_decoding_steps : ``int`` 108 | Maximum length of decoded sequences. 109 | target_namespace : ``str``, optional (default = 'tokens') 110 | If the target side vocabulary is different from the source side's, you need to specify the 111 | target's namespace here. If not, we'll assume it is "tokens", which is also the default 112 | choice for the source side, and this might cause them to share vocabularies. 113 | target_embedding_dim : ``int``, optional (default = source_embedding_dim) 114 | You can specify an embedding dimensionality for the target side. If not, we'll use the same 115 | value as the source embedder's. 116 | attention : ``Attention``, optional (default = None) 117 | If you want to use attention to get a dynamic summary of the encoder outputs at each step 118 | of decoding, this is the function used to compute similarity between the decoder hidden 119 | state and encoder outputs. 120 | attention_function: ``SimilarityFunction``, optional (default = None) 121 | This is if you want to use the legacy implementation of attention. This will be deprecated 122 | since it consumes more memory than the specialized attention modules. 123 | beam_size : ``int``, optional (default = None) 124 | Width of the beam for beam search. If not specified, greedy decoding is used. 125 | scheduled_sampling_ratio : ``float``, optional (default = 0.) 126 | At each timestep during training, we sample a random number between 0 and 1, and if it is 127 | not less than this value, we use the ground truth labels for the whole batch. Else, we use 128 | the predictions from the previous time step for the whole batch. If this value is 0.0 129 | (default), this corresponds to teacher forcing, and if it is 1.0, it corresponds to not 130 | using target side ground truth labels. See the following paper for more information: 131 | `Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks. Bengio et al., 132 | 2015 `_. 133 | use_bleu : ``bool``, optional (default = True) 134 | If True, the BLEU metric will be calculated during validation. 135 | """ 136 | 137 | def __init__(self, 138 | vocab: Vocabulary, 139 | source_embedder: TextFieldEmbedder, 140 | encoder: Encoder, 141 | max_decoding_steps: int, 142 | knowledge_points_ratio = 0, 143 | attention: Attention = True, 144 | attention_function: SimilarityFunction = None, 145 | beam_size: int = None, 146 | target_namespace: str = "tokens", 147 | target_embedding_dim: int = None, 148 | scheduled_sampling_ratio: float = 0., 149 | resnet_pretrained = None, 150 | use_bleu: bool = True) -> None: 151 | super(SimpleSeq2Seq, self).__init__(vocab) 152 | 153 | resnet = build_model() 154 | 155 | if resnet_pretrained is not None: 156 | resnet.load_state_dict(torch.load(resnet_pretrained)) 157 | print('##### Checkpoint Loaded! #####') 158 | else: 159 | print("No Diagram Pretrain !!!") 160 | self.resnet = resnet 161 | 162 | self.channel_transform = torch.nn.Linear(1024, 512) 163 | 164 | __C = Cfgs() 165 | self.mcan = MCA_ED(__C) 166 | self.attflat_img = AttFlat(__C) 167 | self.attflat_lang = AttFlat(__C) # not use 168 | 169 | self.decode_transform = torch.nn.Linear(1024, 512) 170 | 171 | self._equ = Equations() 172 | 173 | self._target_namespace = target_namespace 174 | self._scheduled_sampling_ratio = scheduled_sampling_ratio 175 | 176 | # We need the start symbol to provide as the input at the first timestep of decoding, and 177 | # end symbol as a way to indicate the end of the decoded sequence. 178 | self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) 179 | self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) 180 | 181 | if use_bleu: 182 | pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access 183 | self._bleu = BLEU(ngram_weights=(1, 0, 0, 0), exclude_indices={pad_index, self._end_index, self._start_index}) 184 | else: 185 | self._bleu = None 186 | self._acc = Average() 187 | self._no_result = Average() 188 | 189 | # remember to clear after evaluation 190 | self.new_acc = [] 191 | self.angle = [] 192 | self.length = [] 193 | self.area = [] 194 | self.other = [] 195 | self.point_acc_list = [] 196 | self.save_results = dict() 197 | 198 | # At prediction time, we use a beam search to find the most likely sequence of target tokens. 199 | beam_size = beam_size or 1 200 | self._max_decoding_steps = max_decoding_steps 201 | self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) 202 | 203 | # Dense embedding of source vocab tokens. 204 | self._source_embedder = source_embedder 205 | 206 | # Encodes the sequence of source embeddings into a sequence of hidden states. 207 | self._encoder = encoder 208 | 209 | num_classes = self.vocab.get_vocab_size(self._target_namespace) 210 | 211 | # Attention mechanism applied to the encoder output for each step. 212 | # TODO: attention 213 | if attention: 214 | if attention_function: 215 | raise ConfigurationError("You can only specify an attention module or an " 216 | "attention function, but not both.") 217 | self._attention = LegacyAttention() 218 | elif attention_function: 219 | self._attention = LegacyAttention(attention_function) 220 | else: 221 | self._attention = None 222 | print("No Attention!") 223 | exit() 224 | 225 | # Dense embedding of vocab words in the target space. 226 | target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim() 227 | self._target_embedder = Embedding(num_classes, target_embedding_dim) 228 | 229 | # Decoder output dim needs to be the same as the encoder output dim since we initialize the 230 | # hidden state of the decoder with the final hidden state of the encoder. 231 | self._encoder_output_dim = self._encoder.get_output_dim() 232 | self._decoder_output_dim = self._encoder_output_dim 233 | 234 | if self._attention: 235 | # If using attention, a weighted average over encoder outputs will be concatenated 236 | # to the previous target embedding to form the input to the decoder at each 237 | # time step. 238 | self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim 239 | else: 240 | # Otherwise, the input to the decoder is just the previous target embedding. 241 | self._decoder_input_dim = target_embedding_dim 242 | 243 | # We'll use an LSTM cell as the recurrent cell that produces a hidden state 244 | # for the decoder at each time step. 245 | self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) 246 | #self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim) 247 | # We project the hidden state from the decoder into the output vocabulary space 248 | # in order to get log probabilities of each target token, at each time step. 249 | self._output_projection_layer = Linear(self._decoder_output_dim, num_classes) 250 | # knowledge points 251 | self.point_ratio = knowledge_points_ratio 252 | if self.point_ratio != 0: 253 | self.points_norm = LayerNorm(__C.FLAT_OUT_SIZE) 254 | self.points_proj = nn.Linear(__C.FLAT_OUT_SIZE, 77) 255 | self.points_criterion = nn.BCELoss() 256 | 257 | def take_step(self, 258 | last_predictions: torch.Tensor, 259 | state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: 260 | """ 261 | Take a decoding step. This is called by the beam search class. 262 | 263 | Parameters 264 | ---------- 265 | last_predictions : ``torch.Tensor`` 266 | A tensor of shape ``(group_size,)``, which gives the indices of the predictions 267 | during the last time step. 268 | state : ``Dict[str, torch.Tensor]`` 269 | A dictionary of tensors that contain the current state information 270 | needed to predict the next step, which includes the encoder outputs, 271 | the source mask, and the decoder hidden state and context. Each of these 272 | tensors has shape ``(group_size, *)``, where ``*`` can be any other number 273 | of dimensions. 274 | 275 | Returns 276 | ------- 277 | Tuple[torch.Tensor, Dict[str, torch.Tensor]] 278 | A tuple of ``(log_probabilities, updated_state)``, where ``log_probabilities`` 279 | is a tensor of shape ``(group_size, num_classes)`` containing the predicted 280 | log probability of each class for the next step, for each item in the group, 281 | while ``updated_state`` is a dictionary of tensors containing the encoder outputs, 282 | source mask, and updated decoder hidden state and context. 283 | 284 | Notes 285 | ----- 286 | We treat the inputs as a batch, even though ``group_size`` is not necessarily 287 | equal to ``batch_size``, since the group may contain multiple states 288 | for each source sentence in the batch. 289 | """ 290 | # shape: (group_size, num_classes) 291 | output_projections, state = self._prepare_output_projections(last_predictions, state) 292 | 293 | # shape: (group_size, num_classes) 294 | class_log_probabilities = F.log_softmax(output_projections, dim=-1) 295 | 296 | return class_log_probabilities, state 297 | 298 | @overrides 299 | def forward(self, # type: ignore 300 | image, source_nums, choice_nums, label, type, data_id, manual_program, 301 | source_tokens: Dict[str, torch.LongTensor], 302 | point_label = None, 303 | target_tokens: Dict[str, torch.LongTensor] = None, **kwargs) -> Dict[str, torch.Tensor]: 304 | # pylint: disable=arguments-differ 305 | """ 306 | Make foward pass with decoder logic for producing the entire target sequence. 307 | 308 | Parameters 309 | ---------- 310 | source_tokens : ``Dict[str, torch.LongTensor]`` 311 | The output of `TextField.as_array()` applied on the source `TextField`. This will be 312 | passed through a `TextFieldEmbedder` and then through an encoder. 313 | target_tokens : ``Dict[str, torch.LongTensor]``, optional (default = None) 314 | Output of `Textfield.as_array()` applied on target `TextField`. We assume that the 315 | target tokens are also represented as a `TextField`. 316 | 317 | Returns 318 | ------- 319 | Dict[str, torch.Tensor] 320 | """ 321 | bs = len(label) 322 | state = self._encode(source_tokens) 323 | 324 | with torch.no_grad(): 325 | img_feats = self.resnet(image) 326 | # (N, C, 14, 14) -> (N, 196, C) 327 | img_feats = img_feats.reshape(img_feats.shape[0], img_feats.shape[1], -1).transpose(1, 2) 328 | img_mask = make_mask(img_feats) 329 | img_feats = self.channel_transform(img_feats) 330 | 331 | lang_feats = state['encoder_outputs'] 332 | # mask the digital encoding question without embedding, i.e. source_tokens(already index to number) 333 | lang_mask = make_mask(source_tokens['tokens'].unsqueeze(2)) 334 | 335 | _, img_feats = self.mcan(lang_feats, img_feats, lang_mask, img_mask) 336 | 337 | # (N, 308, 512) 338 | # for attention, image first and then lang, using mask 339 | state['encoder_outputs'] = torch.cat([img_feats, lang_feats], 1) 340 | 341 | # decode 342 | state = self._init_decoder_state(state, lang_feats, img_feats, img_mask) 343 | output_dict = self._forward_loop(state, target_tokens) # recurrent decoding for LSTM 344 | 345 | # knowledge points 346 | if self.point_ratio != 0: 347 | concat_feature = state["concat_feature"] 348 | point_feat = self.points_norm(concat_feature) 349 | point_feat = self.points_proj(point_feat) 350 | point_pred = torch.sigmoid(point_feat) 351 | point_loss = self.points_criterion(point_pred, point_label) * self.point_ratio 352 | output_dict["point_pred"] = point_pred 353 | output_dict["point_loss"] = point_loss 354 | output_dict["loss"] += point_loss 355 | 356 | # if testing, beam search and evaluation 357 | if not self.training: 358 | # state = self._init_decoder_state(state) 359 | state = self._init_decoder_state(state, lang_feats, img_feats, img_mask) # TODO 360 | predictions = self._forward_beam_search(state) 361 | output_dict.update(predictions) 362 | 363 | if target_tokens and self._bleu: 364 | # shape: (batch_size, beam_size, max_sequence_length) 365 | top_k_predictions = output_dict["predictions"] 366 | 367 | # execute the decode programs to calculate the accuracy 368 | suc_knt, no_knt, = 0, 0 369 | 370 | selected_programs = [] 371 | wrong_id = [] 372 | noresult_id = [] 373 | 374 | for b in range(bs): 375 | 376 | hypo = None 377 | used_hypo = None 378 | choice = None 379 | for i in range(self._beam_search.beam_size): 380 | if choice is not None: 381 | break 382 | hypo = list(top_k_predictions[b][i]) 383 | if self._end_index in list(hypo): 384 | hypo = hypo[:hypo.index(self._end_index)] 385 | hypo = [self.vocab.get_token_from_index(idx.item()) for idx in hypo] 386 | res = self._equ.excuate_equation(hypo, source_nums[b]) 387 | 388 | if res is not None and len(res) > 0: 389 | for j in range(4): 390 | if choice_nums[b][j] is not None and math.fabs(res[-1] - choice_nums[b][j]) < 0.001: 391 | choice = j 392 | used_hypo = hypo 393 | selected_programs.append([hypo]) 394 | if choice is None: 395 | no_knt += 1 396 | answer_state = 'no_result' 397 | #no_result_id.append(data_id[b]) 398 | 399 | self.new_acc.append(0) 400 | elif choice == label[b]: 401 | suc_knt += 1 402 | answer_state = 'right' 403 | self.new_acc.append(1) 404 | right_id.append(data_id[b]) 405 | else: 406 | answer_state = 'false' 407 | wrong_id.append(b) 408 | self.new_acc.append(0) 409 | 410 | self.save_results[data_id[b]] = dict(manual_program=manual_program[b], 411 | predict_program=hypo, predict_res=res, 412 | choice=choice_nums[b], right_answer=label[b], 413 | answer_state=answer_state) 414 | 415 | flag = 1 if choice == label[b] else 0 416 | if type[b] == 'angle': 417 | self.angle.append(flag) 418 | elif type[b] == 'length': 419 | self.length.append(flag) 420 | else: 421 | self.other.append(flag) 422 | 423 | # knowledge points 424 | # if self.point_ratio != 0: 425 | # point_acc = self.multi_label_evaluation(point_pred[b].unsqueeze(0), point_label[b].unsqueeze(0)) 426 | # self.point_acc_list.append(point_acc) 427 | 428 | # with open('save/test.json', 'w') as f: 429 | # json.dump(self.save_results, f) 430 | 431 | if random.random() < 0.05: 432 | print('selected_programs', selected_programs) 433 | """ 434 | for item in noresult_id: 435 | noresult_manual.append(selected_programs[item]) 436 | 437 | for item in wrong_id: 438 | wrong_manual.append(selected_programs[item]) 439 | 440 | print((wrong_manual),(noresult_manual)) 441 | """ 442 | # calculate BLEU 443 | best_predictions = top_k_predictions[:, 0, :] 444 | self._bleu(best_predictions, target_tokens["tokens"]) 445 | self._acc(suc_knt / bs) 446 | self._no_result(no_knt / bs) 447 | 448 | print(right_id) 449 | print(len(right_id)) 450 | return output_dict 451 | 452 | def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 453 | """ 454 | Finalize predictions. 455 | 456 | This method overrides ``Model.decode``, which gets called after ``Model.forward``, at test 457 | time, to finalize predictions. The logic for the decoder part of the encoder-decoder lives 458 | within the ``forward`` method. 459 | 460 | This method trims the output predictions to the first end symbol, replaces indices with 461 | corresponding tokens, and adds a field called ``predicted_tokens`` to the ``output_dict``. 462 | """ 463 | predicted_indices = output_dict["predictions"] 464 | if not isinstance(predicted_indices, numpy.ndarray): 465 | predicted_indices = predicted_indices.detach().cpu().numpy() 466 | all_predicted_tokens = [] 467 | for indices in predicted_indices: 468 | # Beam search gives us the top k results for each source sentence in the batch 469 | # but we just want the single best. 470 | if len(indices.shape) > 1: 471 | indices = indices[0] 472 | indices = list(indices) 473 | # Collect indices till the first end_symbol 474 | if self._end_index in indices: 475 | indices = indices[:indices.index(self._end_index)] 476 | predicted_tokens = [self.vocab.get_token_from_index(x, namespace=self._target_namespace) 477 | for x in indices] 478 | all_predicted_tokens.append(predicted_tokens) 479 | output_dict["predicted_tokens"] = all_predicted_tokens 480 | return output_dict 481 | 482 | def _encode(self, source_tokens: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 483 | # shape: (batch_size, max_input_sequence_length, encoder_input_dim) 484 | #embedded_input = self._source_embedder(source_tokens) 485 | # shape: (batch_size, max_input_sequence_length) 486 | source_mask = util.get_text_field_mask(source_tokens) 487 | 488 | img_mask = torch.ones(source_mask.shape[0], 196).long().cuda() 489 | concat_mask = torch.cat([img_mask, source_mask], 1) 490 | 491 | # shape: (batch_size, max_input_sequence_length, encoder_output_dim) 492 | #encoder_outputs = self._encoder(embedded_input, source_mask) 493 | encoder_outputs = self._encoder(source_tokens['tokens'], source_mask) 494 | 495 | return { 496 | "source_mask": source_mask, # source_mask, 497 | "concat_mask": concat_mask, 498 | "encoder_outputs": encoder_outputs, 499 | } 500 | 501 | def _init_decoder_state(self, state, lang_feats, img_feats, img_mask): 502 | 503 | batch_size = state["source_mask"].size(0) 504 | final_lang_feat = util.get_final_encoder_states( 505 | lang_feats, 506 | state["source_mask"], 507 | self._encoder.is_bidirectional()) 508 | img_feat = self.attflat_img(img_feats, img_mask) 509 | feat = torch.cat([final_lang_feat, img_feat], 1) 510 | feat = self.decode_transform(feat) 511 | state["concat_feature"] = feat 512 | 513 | state["decoder_hidden"] = feat 514 | # C0 shape: (batch_size, decoder_output_dim) 515 | state["decoder_context"] = torch.zeros(batch_size, self._decoder_output_dim).cuda() 516 | # state["decoder_context"] = state["encoder_outputs"].new_zeros(batch_size, self._decoder_output_dim) 517 | return state 518 | 519 | def _forward_loop(self, 520 | state: Dict[str, torch.Tensor], 521 | target_tokens: Dict[str, torch.LongTensor] = None) -> Dict[str, torch.Tensor]: 522 | """ 523 | Make forward pass during training or do greedy search during prediction. 524 | 525 | Notes 526 | ----- 527 | We really only use the predictions from the method to test that beam search 528 | with a beam size of 1 gives the same results. 529 | """ 530 | # shape: (batch_size, max_input_sequence_length) 531 | source_mask = state["source_mask"] 532 | 533 | batch_size = source_mask.size()[0] 534 | 535 | if target_tokens: 536 | # shape: (batch_size, max_target_sequence_length) 537 | targets = target_tokens["tokens"] 538 | 539 | _, target_sequence_length = targets.size() 540 | 541 | # The last input from the target is either padding or the end symbol. 542 | # Either way, we don't have to process it. 543 | num_decoding_steps = target_sequence_length - 1 544 | else: 545 | num_decoding_steps = self._max_decoding_steps 546 | 547 | # Initialize target predictions with the start index. 548 | # shape: (batch_size,) 549 | last_predictions = source_mask.new_full((batch_size,), fill_value=self._start_index) 550 | 551 | step_logits: List[torch.Tensor] = [] 552 | step_predictions: List[torch.Tensor] = [] 553 | for timestep in range(num_decoding_steps): 554 | if self.training and torch.rand(1).item() < self._scheduled_sampling_ratio: 555 | # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio 556 | # during training. 557 | # shape: (batch_size,) 558 | input_choices = last_predictions 559 | elif not target_tokens: 560 | # shape: (batch_size,) 561 | input_choices = last_predictions 562 | else: 563 | # shape: (batch_size,) 564 | input_choices = targets[:, timestep] 565 | 566 | # shape: (batch_size, num_classes) 567 | # recurrent decoding 568 | output_projections, state = self._prepare_output_projections(input_choices, state) 569 | 570 | # list of tensors, shape: (batch_size, 1, num_classes) 571 | step_logits.append(output_projections.unsqueeze(1)) 572 | 573 | # shape: (batch_size, num_classes) 574 | class_probabilities = F.softmax(output_projections, dim=-1) 575 | 576 | # shape (predicted_classes): (batch_size,) 577 | _, predicted_classes = torch.max(class_probabilities, 1) 578 | 579 | # shape (predicted_classes): (batch_size,) 580 | last_predictions = predicted_classes 581 | 582 | step_predictions.append(last_predictions.unsqueeze(1)) 583 | 584 | # shape: (batch_size, num_decoding_steps) 585 | predictions = torch.cat(step_predictions, 1) 586 | 587 | output_dict = {"predictions": predictions} 588 | 589 | if target_tokens: 590 | # shape: (batch_size, num_decoding_steps, num_classes) 591 | logits = torch.cat(step_logits, 1) 592 | 593 | # Compute loss. 594 | target_mask = util.get_text_field_mask(target_tokens) 595 | loss = self._get_loss(logits, targets, target_mask) 596 | output_dict["loss"] = loss 597 | 598 | return output_dict 599 | 600 | def _forward_beam_search(self, state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 601 | """Make forward pass during prediction using a beam search.""" 602 | batch_size = state["source_mask"].size()[0] 603 | start_predictions = state["source_mask"].new_full((batch_size,), fill_value=self._start_index) 604 | 605 | # shape (all_top_k_predictions): (batch_size, beam_size, num_decoding_steps) 606 | # shape (log_probabilities): (batch_size, beam_size) 607 | all_top_k_predictions, log_probabilities = self._beam_search.search( 608 | start_predictions, state, self.take_step) 609 | 610 | output_dict = { 611 | "class_log_probabilities": log_probabilities, 612 | "predictions": all_top_k_predictions, 613 | } 614 | return output_dict 615 | 616 | def _prepare_output_projections(self, 617 | last_predictions: torch.Tensor, 618 | state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: # pylint: disable=line-too-long 619 | """ 620 | Decode current state and last prediction to produce produce projections 621 | into the target space, which can then be used to get probabilities of 622 | each target token for the next step. 623 | 624 | Inputs are the same as for `take_step()`. 625 | """ 626 | # shape: (group_size, max_input_sequence_length, encoder_output_dim) 627 | encoder_outputs = state["encoder_outputs"] 628 | 629 | # shape: (group_size, max_input_sequence_length) 630 | # source_mask = state["source_mask"] 631 | source_mask = state["concat_mask"] 632 | 633 | # decoder_hidden and decoder_context are get from encoder_outputs in _init_decoder_state() 634 | # shape: (group_size, decoder_output_dim) 635 | decoder_hidden = state["decoder_hidden"] 636 | # shape: (group_size, decoder_output_dim) 637 | decoder_context = state["decoder_context"] 638 | 639 | # shape: (group_size, target_embedding_dim) 640 | embedded_input = self._target_embedder(last_predictions) 641 | 642 | if self._attention: 643 | # shape: (group_size, encoder_output_dim) 644 | attended_input = self._prepare_attended_input(decoder_hidden, encoder_outputs, source_mask) 645 | 646 | # shape: (group_size, decoder_output_dim + target_embedding_dim) 647 | decoder_input = torch.cat((attended_input, embedded_input), -1) 648 | 649 | else: 650 | # shape: (group_size, target_embedding_dim) 651 | decoder_input = embedded_input 652 | 653 | # shape (decoder_hidden): (batch_size, decoder_output_dim) 654 | # shape (decoder_context): (batch_size, decoder_output_dim) 655 | 656 | decoder_hidden, decoder_context = self._decoder_cell( 657 | decoder_input, 658 | (decoder_hidden, decoder_context)) 659 | 660 | state["decoder_hidden"] = decoder_hidden 661 | state["decoder_context"] = decoder_context 662 | 663 | # shape: (group_size, num_classes) 664 | output_projections = self._output_projection_layer(decoder_hidden) 665 | """ 666 | decoder_hidden = self._decoder_cell( 667 | decoder_input, 668 | (decoder_hidden)) 669 | 670 | state["decoder_hidden"] = decoder_hidden 671 | state["decoder_context"] = decoder_hidden 672 | 673 | # shape: (group_size, num_classes) 674 | output_projections = self._output_projection_layer(decoder_hidden) 675 | """ 676 | return output_projections, state 677 | 678 | def _prepare_attended_input(self, 679 | decoder_hidden_state: torch.LongTensor = None, 680 | encoder_outputs: torch.LongTensor = None, 681 | encoder_outputs_mask: torch.LongTensor = None) -> torch.Tensor: 682 | """Apply attention over encoder outputs and decoder state.""" 683 | # Ensure mask is also a FloatTensor. Or else the multiplication within 684 | # attention will complain. 685 | # shape: (batch_size, max_input_sequence_length) 686 | encoder_outputs_mask = encoder_outputs_mask.float() 687 | 688 | # shape: (batch_size, max_input_sequence_length) 689 | input_weights = self._attention( 690 | decoder_hidden_state, encoder_outputs, encoder_outputs_mask) 691 | 692 | # shape: (batch_size, encoder_output_dim) 693 | attended_input = util.weighted_sum(encoder_outputs, input_weights) 694 | 695 | return attended_input 696 | 697 | def multi_label_evaluation(self, input, target): 698 | one = torch.ones(target.shape).cuda() 699 | zero = torch.zeros(target.shape).cuda() 700 | res = torch.where(input > 0.5, one, zero) 701 | 702 | over = (res * target).sum(dim=1) 703 | union = res.sum(dim=1) + target.sum(dim=1) - over 704 | acc = over / union 705 | 706 | index = torch.isnan(acc) # nan appear when both pred and target are zeros, which means makes right answer 707 | acc_fix = torch.where(index, torch.ones(acc.shape).cuda(), acc) 708 | 709 | acc_sum = acc_fix.sum().item() 710 | 711 | return acc_sum 712 | 713 | @staticmethod 714 | def _get_loss(logits: torch.LongTensor, 715 | targets: torch.LongTensor, 716 | target_mask: torch.LongTensor) -> torch.Tensor: 717 | """ 718 | Compute loss. 719 | 720 | Takes logits (unnormalized outputs from the decoder) of size (batch_size, 721 | num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1) 722 | and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross 723 | entropy loss while taking the mask into account. 724 | 725 | The length of ``targets`` is expected to be greater than that of ``logits`` because the 726 | decoder does not need to compute the output corresponding to the last timestep of 727 | ``targets``. This method aligns the inputs appropriately to compute the loss. 728 | 729 | During training, we want the logit corresponding to timestep i to be similar to the target 730 | token from timestep i + 1. That is, the targets should be shifted by one timestep for 731 | appropriate comparison. Consider a single example where the target has 3 words, and 732 | padding is to 7 tokens. 733 | The complete sequence would correspond to w1 w2 w3

734 | and the mask would be 1 1 1 1 1 0 0 735 | and let the logits be l1 l2 l3 l4 l5 l6 736 | We actually need to compare: 737 | the sequence w1 w2 w3

738 | with masks 1 1 1 1 0 0 739 | against l1 l2 l3 l4 l5 l6 740 | (where the input was) w1 w2 w3

741 | """ 742 | # shape: (batch_size, num_decoding_steps) 743 | relevant_targets = targets[:, 1:].contiguous() 744 | 745 | # shape: (batch_size, num_decoding_steps) 746 | relevant_mask = target_mask[:, 1:].contiguous() 747 | 748 | return util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask) 749 | 750 | @overrides 751 | def get_metrics(self, reset: bool = False) -> Dict[str, float]: 752 | all_metrics: Dict[str, float] = {} 753 | if self._bleu and not self.training: 754 | all_metrics.update(self._bleu.get_metric(reset=reset)) 755 | # all_metrics.update({'acc': self._acc.get_metric(reset=reset)}) 756 | all_metrics.update({'acc': self._acc.get_metric(reset=reset)}) 757 | if len(self.new_acc) != 0: 758 | all_metrics.update({'new_acc': sum(self.new_acc)/len(self.new_acc)}) 759 | print('Num of total, angle, len, other', len(self.new_acc), len(self.angle), len(self.length), len(self.other)) 760 | if len(self.angle) != 0: 761 | all_metrics.update({'angle_acc': sum(self.angle)/len(self.angle)}) 762 | if len(self.length) != 0: 763 | all_metrics.update({'length_acc': sum(self.length)/len(self.length)}) 764 | if len(self.other) != 0: 765 | all_metrics.update({'other_acc': sum(self.other)/len(self.other)}) 766 | all_metrics.update({'no_result': self._no_result.get_metric(reset=reset)}) 767 | 768 | # if len(self.point_acc_list) != 0: 769 | # all_metrics.update({'point_acc': sum(self.point_acc_list) / len(self.point_acc_list)}) 770 | 771 | return all_metrics 772 | -------------------------------------------------------------------------------- /GeoQA+/config/NGS_Aux.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "s2s_manual_reader", 4 | "tokenizer": { 5 | "word_splitter":{ 6 | "type": "just_spaces" 7 | } 8 | }, 9 | "source_token_indexer": { 10 | "tokens": { 11 | "type": "pretrained_transformer", 12 | "model_name": "data/pretrain/Roberta", 13 | "do_lowercase": false 14 | } 15 | }, 16 | "target_token_indexer": { 17 | "tokens": { 18 | "type": "single_id" 19 | } 20 | } 21 | }, 22 | 23 | "train_data_path": "data/GeoQA2.2/train.pk", 24 | "validation_data_path": "data/GeoQA2.2/dev.pk", 25 | "test_data_path" : "data/GeoQA2.2/test.pk", 26 | "model": { 27 | "type": "geo_s2s", 28 | "max_decoding_steps": 16, 29 | "beam_size": 10, 30 | 31 | "target_embedding_dim": 512, 32 | "scheduled_sampling_ratio": 0, 33 | "resnet_pretrained": "data/pretrain/best_jigsaw_model_state_dict", 34 | "knowledge_points_ratio": 0, 35 | "source_embedder": { 36 | "token_embedders": { 37 | 38 | } 39 | }, 40 | "encoder": { 41 | "input_dim": 21128, 42 | "emb_dim": 768, 43 | "hid_dim": 512, 44 | "dropout": 0.5 45 | } 46 | }, 47 | "iterator": { 48 | "type": "basic", 49 | "batch_size": 32 50 | }, 51 | "trainer": { 52 | "validation_metric": "+acc", 53 | "learning_rate_scheduler": { 54 | "type": "reduce_on_plateau", 55 | "factor": 0.5, 56 | "mode": "max", 57 | "patience": 5 58 | }, 59 | "num_epochs": 100, 60 | "grad_norm": 10.0, 61 | "cuda_device": 0, 62 | 63 | "optimizer": { 64 | "type": "adam", 65 | "lr": 1e-3, 66 | "parameter_groups": [ 67 | [["mcan", "channel_transform", "attflat_img", "attflat_lang", "decode_transform"], {"lr": 1e-5}], 68 | [["resnet"], {"lr": 1e-5}], 69 | [["source_embedder","encoder.embedding"],{"lr": 2e-5}], 70 | [[ "encoder.concat_trans", "encoder.lstm_embedding","encoder.trans", "encoder.norm", "encoder.concat_norm"],{"lr": 1e-3}] 71 | ] 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /GeoQA+/config/txt: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /GeoQA+/data/GeoQA2.2/dev.pk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCNU203/GeoQA-Plus/0d0525766a9c22bd097554651e31030e503a8d67/GeoQA+/data/GeoQA2.2/dev.pk -------------------------------------------------------------------------------- /GeoQA+/data/GeoQA2.2/test.pk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCNU203/GeoQA-Plus/0d0525766a9c22bd097554651e31030e503a8d67/GeoQA+/data/GeoQA2.2/test.pk -------------------------------------------------------------------------------- /GeoQA+/data/pretrain/txt: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /GeoQA+/data/sub_dataset_dict.pk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SCNU203/GeoQA-Plus/0d0525766a9c22bd097554651e31030e503a8d67/GeoQA+/data/sub_dataset_dict.pk -------------------------------------------------------------------------------- /GeoQA+/data/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | , 3 | . 4 | A 5 | B 6 | C 7 | D 8 | 的 9 | ° 10 | } 11 | { 12 | 0 13 | 2 14 | 1 15 | 3 16 | 5 17 | = 18 | c 19 | ) 20 | ( 21 | 点 22 | 图 23 | ∠ 24 | 为 25 | 4 26 | 如 27 | O 28 | E 29 | 则 30 | 6 31 | a 32 | 是 33 | m 34 | \ 35 | r 36 | f 37 | √ 38 | 在 39 | 于 40 | 中 41 | 8 42 | 形 43 | ⊙ 44 | 长 45 | 、 46 | 上 47 | F 48 | 线 49 | △ 50 | 一 51 | 度 52 | 边 53 | 直 54 | 7 55 | P 56 | 面 57 | 角 58 | 分 59 | 若 60 | 9 61 | 圆 62 | 径 63 | 数 64 | π 65 | 交 66 | 米 67 | 个 68 | ~ 69 | 平 70 | 与 71 | 方 72 | . 73 | 积 74 | 等 75 | 小 76 | 别 77 | 知 78 | 半 79 | 三 80 | 接 81 | 已 82 | 正 83 | M 84 | 切 85 | ∥ 86 | + 87 | : 88 | 相 89 | 高 90 | 四 91 | 且 92 | - 93 | 两 94 | 所 95 | ⊥ 96 | 到 97 | n 98 | 值 99 | 测 100 | 示 101 | G 102 | ^ 103 | 连 104 | t 105 | 得 106 | N 107 | 弦 108 | 顶 109 | 和 110 | 坡 111 | 时 112 | 大 113 | 处 114 | 部 115 | 过 116 | 内 117 | 距 118 | 行 119 | 作 120 | 以 121 | 对 122 | 心 123 | 离 124 | 延 125 | ² 126 | s 127 | 影 128 | 动 129 | 段 130 | 周 131 | 体 132 | 地 133 | ′ 134 | 么 135 | 那 136 | 向 137 | 有 138 | 条 139 | S 140 | i 141 | _ 142 | 水 143 | 这 144 | 将 145 | l 146 | R 147 | H 148 | 垂 149 | 成 150 | 锥 151 | 最 152 | 阴 153 | 位 154 | 同 155 | 底 156 | 斜 157 | 果 158 | 不 159 | 下 160 | 弧 161 | 矩 162 | 外 163 | Q 164 | 从 165 | 视 166 | 沿 167 | 标 168 | 量 169 | ≈ 170 | 扇 171 | 树 172 | 菱 173 | 杆 174 | 楼 175 | 某 176 | 纸 177 | 何 178 | b 179 | 其 180 | 学 181 | 几 182 | 端 183 | 格 184 | 侧 185 | 放 186 | 置 187 | ⁀ 188 | 用 189 | 使 190 | 后 191 | 板 192 | ▱ 193 | α 194 | 坐 195 | 都 196 | 子 197 | 仰 198 | 据 199 | o 200 | 结 201 | 出 202 | 山 203 | 该 204 | 了 205 | 轴 206 | 足 207 | 转 208 | 重 209 | 此 210 | 间 211 | x 212 | 确 213 | 之 214 | 明 215 | 当 216 | 或 217 | 比 218 | 尺 219 | 网 220 | 把 221 | 旗 222 | 宽 223 | 定 224 | 它 225 | 路 226 | 海 227 | 好 228 | 梯 229 | 塔 230 | 旋 231 | 可 232 | 合 233 | 要 234 | 表 235 | 能 236 | 片 237 | 意 238 | 移 239 | 经 240 | 根 241 | 块 242 | 计 243 | 俯 244 | 走 245 | 折 246 | 落 247 | 达 248 | 叠 249 | 并 250 | 偏 251 | 发 252 | 射 253 | 建 254 | 运 255 | 北 256 | 第 257 | 约 258 | 里 259 | " 260 | 针 261 | 东 262 | 灯 263 | 截 264 | 前 265 | 恰 266 | 球 267 | 开 268 | 着 269 | 由 270 | y 271 | 他 272 | 次 273 | 绕 274 | 均 275 | 物 276 | 道 277 | 似 278 | 组 279 | 任 280 | 看 281 | 系 282 | 柱 283 | 按 284 | 再 285 | 河 286 | 腰 287 | 考 288 | 现 289 | 无 290 | 围 291 | 参 292 | 至 293 | 取 294 | 画 295 | 设 296 | 木 297 | 台 298 | 列 299 | 五 300 | $ 301 | 法 302 | 电 303 | 公 304 | 剪 305 | 单 306 | ; 307 | 观 308 | 筑 309 | 应 310 | 光 311 | 人 312 | 种 313 | 校 314 | 横 315 | 全 316 | 断 317 | k 318 | 每 319 | / 320 | 船 321 | 夹 322 | 关 323 | 多 324 | 称 325 | 原 326 | 墙 327 | 去 328 | 进 329 | 工 330 | 速 331 | 少 332 | h 333 | 共 334 | 器 335 | 棵 336 | 们 337 | 张 338 | 西 339 | 母 340 | 左 341 | 立 342 | 竹 343 | 桌 344 | e 345 | 铁 346 | 求 347 | 含 348 | 展 349 | 坝 350 | 身 351 | 桥 352 | 站 353 | 右 354 | 厘 355 | 竿 356 | 起 357 | 顺 358 | 油 359 | 精 360 | ” 361 | 六 362 | 棱 363 | 堤 364 | 需 365 | 管 366 | 区 367 | × 368 | 南 369 | 主 370 | 然 371 | 算 372 | 机 373 | g 374 | 自 375 | 制 376 | 来 377 | “ 378 | 被 379 | 另 380 | 秒 381 | 邻 382 | 阶 383 | 场 384 | 脚 385 | 皮 386 | 互 387 | 航 388 | 保 389 | 变 390 | 轮 391 | 车 392 | I 393 | 样 394 | 市 395 | 口 396 | 岸 397 | 飞 398 | 各 399 | 做 400 | 帽 401 | 件 402 | 天 403 | 想 404 | 入 405 | d 406 | 阳 407 | 式 408 | 升 409 | 镜 410 | 滑 411 | 副 412 | 活 413 | 步 414 | 刻 415 | 古 416 | 又 417 | ∽ 418 | 逆 419 | 园 420 | 摆 421 | 整 422 | 短 423 | 照 424 | 缝 425 | 她 426 | 气 427 | K 428 | ① 429 | ② 430 | 状 431 | 象 432 | 号 433 | 座 434 | 头 435 | 池 436 | 空 437 | 二 438 | 我 439 | ' 440 | 反 441 | 利 442 | < 443 | 甲 444 | 排 445 | 规 446 | 劣 447 | 教 448 | 乙 449 | 迎 450 | 花 451 | 留 452 | 绳 453 | 臂 454 | 程 455 | 风 456 | 门 457 | 桶 458 | 察 459 | 依 460 | 锐 461 | 深 462 | 岛 463 | 目 464 | 优 465 | 架 466 | 即 467 | 近 468 | 信 469 | 城 470 | 具 471 | 忽 472 | 略 473 | p 474 | 草 475 | 生 476 | 选 477 | L 478 | 余 479 | 家 480 | 华 481 | 景 482 | 房 483 | 装 484 | ? 485 | 先 486 | 国 487 | 刚 488 | β 489 | 打 490 | 拉 491 | 论 492 | 加 493 | 构 494 | 字 495 | 环 496 | 实 497 | 而 498 | 塘 499 | 色 500 | T 501 | 只 502 | 广 503 | 钟 504 | 窗 505 | 仪 506 | 案 507 | 艘 508 | ③ 509 | 金 510 | ≤ 511 | 通 512 | 隧 513 | 满 514 | 化 515 | 也 516 | 靠 517 | 新 518 | 索 519 | 游 520 | 倾 521 | 兴 522 | 钢 523 | 包 524 | 盒 525 | 寸 526 | 些 527 | 记 528 | 问 529 | … 530 | > 531 | - 532 | 续 533 | 眼 534 | 年 535 | 扶 536 | 美 537 | 始 538 | • 539 | 住 540 | 完 541 | 型 542 | 热 543 | 元 544 | 师 545 | 课 546 | 料 547 | 员 548 | 伞 549 | 村 550 | 准 551 | 拱 552 | 拼 553 | ≌ 554 | 固 555 | 域 556 | 布 557 | u 558 | 趣 559 | 理 560 | 继 561 | 石 562 | 级 563 | 扫 564 | 支 565 | 黄 566 | 带 567 | 红 568 | 商 569 | 竖 570 | 棒 571 | 限 572 | 像 573 | θ 574 | ④ 575 | 杯 576 | 太 577 | 栋 578 | 名 579 | 持 580 | 湖 581 | 模 582 | 铺 583 | 星 584 | 代 585 | 手 586 | 函 587 | 读 588 | 律 589 | 题 590 | 己 591 | 李 592 | 渔 593 | 及 594 | 庆 595 | 睛 596 | 牌 597 | 异 598 | 遮 599 | 盖 600 | 会 601 | 击 602 | 说 603 | 备 604 | 千 605 | 痕 606 | 改 607 | 修 608 | 找 609 | 望 610 | 黑 611 | 零 612 | 驶 613 | 拍 614 | 跷 615 | 割 616 | 例 617 | 造 618 | 爬 619 | 停 620 | 力 621 | 八 622 | 估 623 | 艇 624 | 户 625 | 划 626 | 株 627 | 干 628 | + 629 | 请 630 | 毯 631 | 解 632 | 术 633 | 践 634 | 判 635 | 就 636 | < 637 | 壁 638 | 王 639 | 补 640 | 翻 641 | 库 642 | 室 643 | 调 644 | 注 645 | 剩 646 | 末 647 | 斗 648 | 终 649 | 止 650 | 往 651 | 亮 652 | 植 653 | 输 654 | 槽 655 | 者 656 | 丽 657 | 习 658 | 白 659 | 安 660 | ’ 661 | 弯 662 | 栏 663 | 品 664 | 较 665 | 今 666 | 井 667 | 九 668 | 民 669 | → 670 | 显 671 | 日 672 | 笔 673 | 旁 674 | 烟 675 | 囱 676 | 你 677 | 倍 678 | 错 679 | 假 680 | 贴 681 | 漏 682 | 倒 683 | 远 684 | 指 685 | 探 686 | 投 687 | q 688 | 基 689 | 筒 690 | 隔 691 | 随 692 | 框 693 | 性 694 | 十 695 | 午 696 | 盘 697 | 触 698 | 月 699 | 损 700 | 乘 701 | 效 702 | ⌒ 703 | 铅 704 | 雪 705 | 降 706 | 细 707 | 识 708 | 材 709 | 钉 710 | · 711 | 孔 712 | 情 713 | 货 714 | 引 715 | 桩 716 | 撑 717 | 冰 718 | 踏 719 | 绿 720 | 告 721 | 差 722 | 文 723 | ㎝ 724 | 著 725 | 源 726 | 林 727 | 压 728 | 丝 729 | 《 730 | 》 731 | 误 732 | 容 733 | 传 734 | 况 735 | 裁 736 | 束 737 | 换 738 | 致 739 | 搭 740 | 江 741 | 层 742 | 泡 743 | 屋 744 | 玻 745 | 璃 746 | Z 747 | 傅 748 | 给 749 | 够 750 | 述 751 | 节 752 | 马 753 | 超 754 | 范 755 | 棋 756 | 常 757 | 跳 758 | 缘 759 | 耗 760 | 办 761 | 客 762 | | 763 | 防 764 | 摩 765 | 退 766 | 港 767 | 推 768 | 箭 769 | 厚 770 | 老 771 | 七 772 | ∶ 773 | 操 774 | 见 775 | 军 776 | 亭 777 | 缩 778 | 滚 779 | 锯 780 | 幢 781 | 居 782 | 坛 783 | 综 784 | 章 785 | 钝 786 | 便 787 | 班 788 | 络 789 | 背 790 | 勾 791 | 志 792 | 强 793 | 登 794 | 受 795 | 礼 796 | 残 797 | 螺 798 | 卷 799 | 雨 800 | 彩 801 | 伸 802 | 芳 803 | 挂 804 | 雕 805 | 峰 806 | 封 807 | 救 808 | 州 809 | 本 810 | 堆 811 | □ 812 | 送 813 | 罐 814 | 丙 815 | 试 816 | 晚 817 | 颖 818 | 馆 819 | 质 820 | 朵 821 | 队 822 | 览 823 | 卡 824 | 梁 825 | 曲 826 | 首 827 | 提 828 | 岩 829 | 坪 830 | 险 831 | [ 832 | 拐 833 | 但 834 | 帮 835 | 露 836 | 回 837 | 虚 838 | 紧 839 | 快 840 | 除 841 | 涂 842 | 没 843 | 股 844 | 序 845 | 克 846 | 还 847 | 响 848 | 际 849 | 匀 850 | 虑 851 | 诞 852 | 凉 853 | 爸 854 | 拦 855 | 答 856 | 缆 857 | z 858 | v 859 | 庄 860 | 寻 861 | ≠ 862 | 买 863 | ] 864 | 蚂 865 | 蚁 866 | 很 867 | 箱 868 | 玲 869 | 剖 870 | 纵 871 | 毛 872 | 辆 873 | 幅 874 | 弹 875 | 弓 876 | 附 877 | 越 878 | 牧 879 | 菜 880 | 轨 881 | 净 882 | 拴 883 | 叫 884 | 土 885 | 街 886 | 期 887 | 鹰 888 | 塑 889 | 堂 890 | 监 891 | 鱼 892 | 搜 893 | 舰 894 | 供 895 | 坎 896 | 佛 897 | 总 898 | 境 899 | 粗 900 | ⁰ 901 | 证 902 | 既 903 | 院 904 | 临 905 | 简 906 | 择 907 | 适 908 | 助 909 | 雷 910 | 因 911 | 珠 912 | 颜 913 | 陆 914 | 覆 915 | 低 916 | 义 917 | 叉 918 | 玩 919 | 演 920 | 圣 921 | 淇 922 | 淋 923 | 农 924 | 捣 925 | 书 926 | 施 927 | 息 928 | 闭 929 | 挡 930 | 胜 931 | 缺 932 | 秋 933 | 羊 934 | 占 935 | 幕 936 | 份 937 | V 938 | 瓶 939 | 液 940 | 悬 941 | 赏 942 | ≥ 943 | 衣 944 | 洛 945 | 薄 946 | 刷 947 | 护 948 | 伟 949 | 挥 950 | 厦 951 | 宝 952 | 购 953 | 抛 954 | 攀 955 | 梢 956 | 复 957 | X 958 | 添 959 | 增 960 | 佳 961 | 稳 962 | 荆 963 | Ð 964 | 戏 965 | 府 966 | 博 967 | 赛 968 | 暗 969 | 失 970 | γ 971 | 流 972 | 童 973 | 双 974 | 收 975 | 团 976 | 轻 977 | 呈 978 | 蛋 979 | 碎 980 | 拿 981 | 插 982 | 筝 983 | 罩 984 | 决 985 | 敏 986 | 丁 987 | ○ 988 | 丄 989 | 喷 990 | 屏 991 | 警 992 | 裂 993 | 京 994 | > 995 | 扩 996 | 丈 997 | 舞 998 | 店 999 | 堵 1000 | 绘 1001 | 业 1002 | 疑 1003 | 政 1004 | 渠 1005 | 火 1006 | 刀 1007 | 哨 1008 | 阅 1009 | 旧 1010 | 盆 1011 | 厂 1012 | 思 1013 | 杂 1014 | 项 1015 | 必 1016 | 报 1017 | 属 1018 | 礁 1019 | 巨 1020 | 盛 1021 | 碰 1022 | 乐 1023 | 采 1024 | 蔬 1025 | 毡 1026 | 洋 1027 | 士 1028 | 峙 1029 | 迹 1030 | 事 1031 | 杠 1032 | 跨 1033 | 宾 1034 | ⨀ 1035 | 写 1036 | 初 1037 | ʹ 1038 | 究 1039 | j 1040 | 晾 1041 | U 1042 | J 1043 | 膜 1044 | 餐 1045 | 亚 1046 | 央 1047 | 援 1048 | 温 1049 | 巡 1050 | 楔 1051 | 朝 1052 | 码 1053 | 勘 1054 | 鹅 1055 | 界 1056 | 巧 1057 | 梅 1058 | 括 1059 | 镇 1060 | 隙 1061 | 营 1062 | 挖 1063 | 特 1064 | 医 1065 | 吸 1066 | 拟 1067 | 须 1068 | 爱 1069 | 育 1070 | 闸 1071 | 慧 1072 | 棚 1073 | 腿 1074 | 牛 1075 | 骨 1076 | 类 1077 | 穿 1078 | 莹 1079 | 尖 1080 | 抽 1081 | 刘 1082 | 陈 1083 | 拔 1084 | 虎 1085 | 枪 1086 | 檐 1087 | 厅 1088 | 英 1089 | 早 1090 | 础 1091 | 垫 1092 | 雄 1093 | 旅 1094 | 宣 1095 | ! 1096 | 菁 1097 | 易 1098 | 载 1099 | 未 1100 | 汉 1101 | 埋 1102 | 休 1103 | 社 1104 | 栓 1105 | 浮 1106 | 漂 1107 | 洪 1108 | 晨 1109 | 配 1110 | 扎 1111 | 福 1112 | 岭 1113 | 萌 1114 | 神 1115 | 存 1116 | 碑 1117 | 瞰 1118 | 篱 1119 | 笆 1120 | 乡 1121 | 验 1122 | 聪 1123 | 食 1124 | 云 1125 | 宅 1126 | 描 1127 | 填 1128 | 科 1129 | ☉ 1130 | 嵌 1131 | 仓 1132 | 喜 1133 | 森 1134 | 令 1135 | 莲 1136 | 齐 1137 | 翼 1138 | w 1139 | 污 1140 | 丑 1141 | 感 1142 | 摄 1143 | 借 1144 | 撬 1145 | 丹 1146 | 夏 1147 | 语 1148 | 抖 1149 | 迪 1150 | 勇 1151 | 练 1152 | 矮 1153 | 轩 1154 | 伐 1155 | 伯 1156 | 联 1157 | 雅 1158 | 尾 1159 | 破 1160 | 嘉 1161 | 售 1162 | 研 1163 | 邑 1164 | 技 1165 | 醋 1166 | 柄 1167 | 闲 1168 | 灰 1169 | 艺 1170 | 钓 1171 | 侦 1172 | 司 1173 | 杭 1174 | 避 1175 | 胶 1176 | 扬 1177 | 帆 1178 | 映 1179 | 席 1180 | 震 1181 | 更 1182 | 峭 1183 | 坊 1184 | 圈 1185 | 举 1186 | 岗 1187 | 田 1188 | 套 1189 | 牙 1190 | 膏 1191 | ∁ 1192 | 妈 1193 | 控 1194 | 抢 1195 | л 1196 | 槟 1197 | 榔 1198 | 郎 1199 | 绍 1200 |  1201 | 吃 1202 | 赵 1203 | 爽 1204 | § 1205 | 获 1206 | 哪 1207 | 殊 1208 | 汽 1209 | 统 1210 | 春 1211 | 故 1212 | 饮 1213 | 炮 1214 | 凿 1215 | 非 1216 | 鼻 1217 | 镶 1218 | 圳 1219 | 奥 1220 | — 1221 | 泳 1222 | 欲 1223 | 训 1224 | 局 1225 | 突 1226 | 吹 1227 | 符 1228 | 赶 1229 | 产 1230 | 掉 1231 | 陀 1232 | 觉 1233 | 壶 1234 | 凡 1235 | 掌 1236 | 浸 1237 | 态 1238 | 翘 1239 | 蜡 1240 | 烛 1241 | 盏 1242 | 圃 1243 | 季 1244 | 瞄 1245 | 微 1246 | 吴 1247 | 辉 1248 | 预 1249 | 孩 1250 | 荡 1251 | Y 1252 | 硬 1253 | 糕 1254 | 纹 1255 | 齿 1256 | 纳 1257 | 肚 1258 | 脐 1259 | 维 1260 | 咽 1261 | 喉 1262 | 逸 1263 | 夫 1264 | 万 1265 | 百 1266 | 价 1267 | 世 1268 | 纪 1269 | 貌 1270 | ➝ 1271 | 欣 1272 | 密 1273 | 粘 1274 | 狗 1275 | 潜 1276 | 静 1277 | 融 1278 | 银 1279 | 杏 1280 | 录 1281 | 印 1282 | 查 1283 | 派 1284 | 骸 1285 | 遇 1286 | 危 1287 | 镭 1288 | 私 1289 | 砍 1290 | 墩 1291 | 寺 1292 | 竣 1293 | 激 1294 | 匣 1295 | 誉 1296 | 缓 1297 | 拥 1298 | 省 1299 | 杉 1300 | 聊 1301 | 洲 1302 | 锤 1303 | 蓬 1304 | 踩 1305 | 档 1306 | 药 1307 | 服 1308 | 务 1309 | 壳 1310 | 芦 1311 | 苇 1312 | 族 1313 | 历 1314 | 刮 1315 | 洁 1316 | 堑 1317 | ; 1318 | 乌 1319 | 颗 1320 | 漆 1321 | 酒 1322 | 职 1323 | 释 1324 | 叙 1325 | 帅 1326 | 辅 1327 | 墨 1328 | 撞 1329 | 袋 1330 | 藏 1331 | 惠 1332 | 话 1333 | 循 1334 | 虫 1335 | O 1336 | 绩 1337 | 泥 1338 | 济 1339 | 健 1340 | 匹 1341 | 珑 1342 | 允 1343 | 许 1344 | 渡 1345 | 官 1346 | 牵 1347 | 蒙 1348 | € 1349 | 冒 1350 | 拢 1351 | 帐 1352 | 篷 1353 | 兰 1354 | 尽 1355 | 跟 1356 | 瓷 1357 | 威 1358 | 奶 1359 | 拾 1360 | 浦 1361 | 舍 1362 | 庙 1363 | 仿 1364 | 吊 1365 | 羽 1366 | 翔 1367 | 鸟 1368 | 率 1369 | 沾 1370 | 忙 1371 | 毫 1372 | 男 1373 | 曾 1374 | 散 1375 | 吗 1376 | 皆 1377 | 別 1378 | 负 1379 | 链 1380 | 养 1381 | ⑤ 1382 | 骤 1383 | 凸 1384 | 厨 1385 | 柜 1386 | 滨 1387 | 彼 1388 | 坚 1389 | 衡 1390 | 认 1391 | 真 1392 | 坏 1393 | 善 1394 | 舒 1395 | 希 1396 | 腊 1397 | 唯 1398 | 汇 1399 | 充 1400 | 莱 1401 | 批 1402 | 素 1403 | 梦 1404 | 介 1405 | 洞 1406 | 叶 1407 | 杨 1408 | 挨 1409 | 祝 1410 | 苑 1411 | 谈 1412 | 拆 1413 | 侨 1414 | 纷 1415 | 绚 1416 | 烂 1417 | 势 1418 | 伊 1419 | 朗 1420 | 委 1421 | 遭 1422 | 检 1423 | 命 1424 | 清 1425 | 煤 1426 | 炭 1427 | 厢 1428 | 猜 1429 | 椅 1430 | 栽 1431 | 返 1432 | 什 1433 | 途 1434 | 橇 1435 | 泊 1436 | 宛 1437 | 核 1438 | 芯 1439 | 趸 1440 | 友 1441 | 荐 1442 | 撼 1443 | 票 1444 | 陡 1445 | ɑ 1446 | 协 1447 | 歌 1448 | 崖 1449 | 宇 1450 | 堪 1451 | 徽 1452 | 辟 1453 | 隆 1454 | 典 1455 | 凯 1456 | 届 1457 | 议 1458 | 言 1459 | 玉 1460 | 棍 1461 | 兔 1462 | 租 1463 | 冉 1464 | 才 1465 | 念 1466 | 贤 1467 | 筷 1468 | 贝 1469 | 编 1470 | 莉 1471 | 爷 1472 | 墅 1473 | 枚 1474 | 霞 1475 | 妹 1476 | 幸 1477 | 绣 1478 | 泰 1479 | 详 1480 | 坯 1481 | 俗 1482 | 魅 1483 | 陕 1484 | 宫 1485 | 鼓 1486 | 谁 1487 | ⊿ 1488 | 蜂 1489 | 巢 1490 | ∘ 1491 | 笑 1492 | 脸 1493 | ★ 1494 | 隐 1495 | 吮 1496 | 烧 1497 | 豆 1498 | 浆 1499 | 巾 1500 | M 1501 | 飓 1502 | 灾 1503 | 害 1504 | 沙 1505 | 坑 1506 | 弋 1507 | 波 1508 | 阵 1509 | 則 1510 | 辺 1511 | 青 1512 | 伙 1513 | 伴 1514 | 守 1515 | 偶 1516 | 绸 1517 | 衔 1518 | 废 1519 | 弃 1520 | 女 1521 | 鞋 1522 | 携 1523 | 焦 1524 | 昔 1525 | 浙 1526 | 庵 1527 | 蠡 1528 | 耸 1529 | 筋 1530 | 混 1531 | 阁 1532 | 概 1533 | 句 1534 | 虽 1535 | 导 1536 | 脱 1537 | 战 1538 | 眩 1539 | 夺 1540 | 冠 1541 | 凳 1542 | 靶 1543 | 储 1544 | 阻 1545 | 扣 1546 | 渐 1547 | 让 1548 | 析 1549 | ◎ 1550 | 罗 1551 | ∴ 1552 | 戊 1553 | 销 1554 | 魔 1555 | 征 1556 | 消 1557 | 溶 1558 | 谐 1559 | 候 1560 | 享 1561 | 骑 1562 | 測 1563 | 极 1564 | 丰 1565 | 富 1566 | 斯 1567 | 鹞 1568 | 踞 1569 | 蓉 1570 | 仅 1571 | 党 1572 | 译 1573 | 专 1574 | 唐 1575 | 扳 1576 | 订 1577 | 盲 1578 | 粮 1579 | 剧 1580 | 绝 1581 | 宠 1582 | 锁 1583 | 笼 1584 | 仍 1585 | 祖 1586 | Ⅰ 1587 | Ⅱ 1588 | 谓 1589 | 畅 1590 | 宁 1591 | 违 1592 | 资 1593 | 声 1594 | 汛 1595 | 飘 1596 | 冷 1597 | 萧 1598 | 瑟 1599 | 冬 1600 | 悠 1601 | 瞬 1602 | 姿 1603 | 惊 1604 | 艳 1605 | 郊 1606 | 追 1607 | 袭 1608 | 严 1609 | 踪 1610 | 群 1611 | 缉 1612 | 逻 1613 | 锅 1614 | 炉 1615 | 趟 1616 | 廓 1617 | 顾 1618 | 霍 1619 | 邱 1620 | 县 1621 | 荒 1622 | 灌 1623 | 抗 1624 | 疾 1625 | 浏 1626 | 稍 1627 | 峡 1628 | 荫 1629 | 尊 1630 | 聚 1631 | ˚ 1632 | 楚 1633 | 锋 1634 | 浅 1635 | 松 1636 | 宜 1637 | 众 1638 | 缙 1639 | 邢 1640 | 贯 1641 | 纽 1642 | 冲 1643 | 娱 1644 | 铡 1645 | 篙 1646 | 遣 1647 | 巴 1648 | 灵 1649 | 畔 1650 | 瞭 1651 | 髙 1652 | 徒 1653 | 骇 1654 | 媒 1655 | 史 1656 | 敌 1657 | 溪 1658 | 凤 1659 | 凰 1660 | 氢 1661 | 困 1662 | 难 1663 | 鑫 1664 | 浪 1665 | 贸 1666 | 纠 1667 | 缠 1668 | 绑 1669 | 召 1670 | 阿 1671 | 词 1672 | 脑 1673 | 遵 1674 | 跑 1675 | 傍 1676 | 陪 1677 | 夜 1678 | 紫 1679 | 奔 1680 | 腾 1681 | 集 1682 | 狭 1683 | 恒 1684 | 键 1685 | 崛 1686 | 钱 1687 | 龙 1688 | 欧 1689 | 撕 1690 | 透 1691 | ℃ 1692 | 诲 1693 | 津 1694 | 音 1695 | 朋 1696 | 挺 1697 | 俩 1698 | 挤 1699 | 汶 1700 | 川 1701 | 旦 1702 | 领 1703 | 舟 1704 | 迭 1705 | 柏 1706 | 婴 1707 | 儿 1708 | 铆 1709 | 茶 1710 | ‘ 1711 | 仕 1712 | 垃 1713 | 圾 1714 | 软 1715 | 功 1716 | 涨 1717 | 妙 1718 | 戴 1719 | 疆 1720 | 哈 1721 | 萨 1722 | 苗 1723 | 尔 1724 | 鸡 1725 | 兵 1726 | 崭 1727 | 牢 1728 | 匠 1729 | 枳 1730 | 湿 1731 | 沈 1732 | 欢 1733 | 坦 1734 | 武 1735 | 免 1736 | ∅ 1737 | 梭 1738 | 培 1739 | 彬 1740 | 魄 1741 | 渝 1742 | 湾 1743 | 碧 1744 | 煌 1745 | 桷 1746 | 垭 1747 | 毗 1748 | 喻 1749 | 晓 1750 | 谣 1751 | 昌 1752 | 皇 1753 | 皋 1754 | 宋 1755 | 奇 1756 | 扁 1757 | -------------------------------------------------------------------------------- /GeoQA+/mcan.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | import torchvision 5 | import math 6 | 7 | #ddcddc 8 | def build_model(): 9 | cnn = getattr(torchvision.models, 'resnet101')(pretrained=True) 10 | layers = [cnn.conv1, 11 | cnn.bn1, 12 | cnn.relu, 13 | cnn.maxpool] 14 | for i in range(3): 15 | name = 'layer%d' % (i + 1) 16 | layers.append(getattr(cnn, name)) 17 | model = torch.nn.Sequential(*layers) 18 | model.cuda() 19 | model.eval() 20 | return model 21 | 22 | 23 | def make_mask(feature): 24 | return (torch.sum(torch.abs(feature),dim=-1) == 0).unsqueeze(1).unsqueeze(2) 25 | 26 | 27 | class Cfgs: 28 | def __init__(self): 29 | super(Cfgs, self).__init__() 30 | self.LAYER = 6 31 | self.HIDDEN_SIZE =512 32 | self.BBOXFEAT_EMB_SIZE = 2048 33 | self.FF_SIZE = 2048 34 | self.MULTI_HEAD = 8 35 | self.DROPOUT_R = 0.1 36 | self.FLAT_MLP_SIZE = 512 37 | self.FLAT_GLIMPSES = 1 38 | # self.FLAT_OUT_SIZE = 1024 39 | self.FLAT_OUT_SIZE = 512 40 | self.USE_AUX_FEAT = False 41 | self.USE_BBOX_FEAT = False 42 | 43 | 44 | class MCA_ED(nn.Module): 45 | def __init__(self, __C): 46 | super(MCA_ED, self).__init__() 47 | self.enc_list = nn.ModuleList([SA(__C) for _ in range(__C.LAYER)]) 48 | self.dec_list = nn.ModuleList([SGA(__C) for _ in range(__C.LAYER)]) 49 | 50 | def forward(self, lang, image, lang_mask, image_mask): # lang, image 51 | for enc in self.enc_list: 52 | lang = enc(lang, lang_mask) 53 | 54 | for dec in self.dec_list: 55 | image = dec(image, lang, image_mask, lang_mask) 56 | 57 | return lang, image 58 | 59 | 60 | class SA(nn.Module): 61 | def __init__(self, __C): 62 | super(SA, self).__init__() 63 | 64 | self.mhatt = MHAtt(__C) 65 | self.ffn = FFN(__C) 66 | 67 | self.dropout1 = nn.Dropout(__C.DROPOUT_R) 68 | self.norm1 = LayerNorm(__C.HIDDEN_SIZE) 69 | 70 | self.dropout2 = nn.Dropout(__C.DROPOUT_R) 71 | self.norm2 = LayerNorm(__C.HIDDEN_SIZE) 72 | 73 | def forward(self, y, y_mask): 74 | y = self.norm1(y + self.dropout1( 75 | self.mhatt(y, y, y, y_mask) 76 | )) 77 | 78 | y = self.norm2(y + self.dropout2( 79 | self.ffn(y) 80 | )) 81 | 82 | return y 83 | 84 | 85 | class SGA(nn.Module): 86 | def __init__(self, __C): 87 | super(SGA, self).__init__() 88 | 89 | self.mhatt1 = MHAtt(__C) 90 | self.mhatt2 = MHAtt(__C) 91 | self.ffn = FFN(__C) 92 | 93 | self.dropout1 = nn.Dropout(__C.DROPOUT_R) 94 | self.norm1 = LayerNorm(__C.HIDDEN_SIZE) 95 | 96 | self.dropout2 = nn.Dropout(__C.DROPOUT_R) 97 | self.norm2 = LayerNorm(__C.HIDDEN_SIZE) 98 | 99 | self.dropout3 = nn.Dropout(__C.DROPOUT_R) 100 | self.norm3 = LayerNorm(__C.HIDDEN_SIZE) 101 | 102 | def forward(self, x, y, x_mask, y_mask): 103 | x = self.norm1(x + self.dropout1( 104 | self.mhatt1(v=x, k=x, q=x, mask=x_mask) 105 | )) 106 | 107 | x = self.norm2(x + self.dropout2( 108 | self.mhatt2(v=y, k=y, q=x, mask=y_mask) 109 | )) 110 | 111 | x = self.norm3(x + self.dropout3( 112 | self.ffn(x) 113 | )) 114 | 115 | return x 116 | 117 | 118 | class LayerNorm(nn.Module): 119 | def __init__(self, size, eps=1e-6): 120 | super(LayerNorm, self).__init__() 121 | self.eps = eps 122 | 123 | self.a_2 = nn.Parameter(torch.ones(size)) 124 | self.b_2 = nn.Parameter(torch.zeros(size)) 125 | 126 | def forward(self, x): 127 | mean = x.mean(-1, keepdim=True) 128 | std = x.std(-1, keepdim=True) 129 | 130 | return self.a_2 * (x - mean) / (std + self.eps) + self.b_2 131 | 132 | 133 | class MHAtt(nn.Module): 134 | def __init__(self, __C): 135 | super(MHAtt, self).__init__() 136 | self.__C = __C 137 | 138 | self.linear_v = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE) 139 | self.linear_k = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE) 140 | self.linear_q = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE) 141 | self.linear_merge = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE) 142 | 143 | self.dropout = nn.Dropout(__C.DROPOUT_R) 144 | 145 | def forward(self, v, k, q, mask): 146 | n_batches = q.size(0) 147 | 148 | v = self.linear_v(v).view( 149 | n_batches, 150 | -1, 151 | self.__C.MULTI_HEAD, 152 | int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD) 153 | ).transpose(1, 2) 154 | 155 | k = self.linear_k(k).view( 156 | n_batches, 157 | -1, 158 | self.__C.MULTI_HEAD, 159 | int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD) 160 | ).transpose(1, 2) 161 | 162 | q = self.linear_q(q).view( 163 | n_batches, 164 | -1, 165 | self.__C.MULTI_HEAD, 166 | int(self.__C.HIDDEN_SIZE / self.__C.MULTI_HEAD) 167 | ).transpose(1, 2) 168 | 169 | atted = self.att(v, k, q, mask) 170 | atted = atted.transpose(1, 2).contiguous().view( 171 | n_batches, 172 | -1, 173 | self.__C.HIDDEN_SIZE 174 | ) 175 | 176 | atted = self.linear_merge(atted) 177 | 178 | return atted 179 | 180 | def att(self, value, key, query, mask): 181 | d_k = query.size(-1) 182 | 183 | scores = torch.matmul( 184 | query, key.transpose(-2, -1) 185 | ) / math.sqrt(d_k) 186 | 187 | if mask is not None: 188 | scores = scores.masked_fill(mask, -1e9) 189 | 190 | att_map = F.softmax(scores, dim=-1) 191 | att_map = self.dropout(att_map) 192 | 193 | return torch.matmul(att_map, value) 194 | 195 | 196 | class FFN(nn.Module): 197 | def __init__(self, __C): 198 | super(FFN, self).__init__() 199 | 200 | self.mlp = MLP( 201 | in_size=__C.HIDDEN_SIZE, 202 | mid_size=__C.FF_SIZE, 203 | out_size=__C.HIDDEN_SIZE, 204 | dropout_r=__C.DROPOUT_R, 205 | use_relu=True 206 | ) 207 | 208 | def forward(self, x): 209 | return self.mlp(x) 210 | 211 | 212 | class MLP(nn.Module): 213 | def __init__(self, in_size, mid_size, out_size, dropout_r=0., use_relu=True): 214 | super(MLP, self).__init__() 215 | 216 | self.fc = FC(in_size, mid_size, dropout_r=dropout_r, use_relu=use_relu) 217 | self.linear = nn.Linear(mid_size, out_size) 218 | 219 | def forward(self, x): 220 | return self.linear(self.fc(x)) 221 | 222 | 223 | class FC(nn.Module): 224 | def __init__(self, in_size, out_size, dropout_r=0., use_relu=True): 225 | super(FC, self).__init__() 226 | self.dropout_r = dropout_r 227 | self.use_relu = use_relu 228 | 229 | self.linear = nn.Linear(in_size, out_size) 230 | 231 | if use_relu: 232 | self.relu = nn.ReLU(inplace=True) 233 | 234 | if dropout_r > 0: 235 | self.dropout = nn.Dropout(dropout_r) 236 | 237 | def forward(self, x): 238 | x = self.linear(x) 239 | 240 | if self.use_relu: 241 | x = self.relu(x) 242 | 243 | if self.dropout_r > 0: 244 | x = self.dropout(x) 245 | 246 | return x 247 | 248 | 249 | class AttFlat(nn.Module): 250 | def __init__(self, __C): 251 | super(AttFlat, self).__init__() 252 | self.__C = __C 253 | 254 | self.mlp = MLP( 255 | in_size=__C.HIDDEN_SIZE, 256 | mid_size=__C.FLAT_MLP_SIZE, 257 | out_size=__C.FLAT_GLIMPSES, 258 | dropout_r=__C.DROPOUT_R, 259 | use_relu=True 260 | ) 261 | 262 | self.linear_merge = nn.Linear( 263 | __C.HIDDEN_SIZE * __C.FLAT_GLIMPSES, 264 | __C.FLAT_OUT_SIZE 265 | ) 266 | 267 | def forward(self, x, x_mask): 268 | att = self.mlp(x) 269 | att = att.masked_fill( 270 | x_mask.squeeze(1).squeeze(1).unsqueeze(2), 271 | -1e9 272 | ) 273 | att = F.softmax(att, dim=1) 274 | 275 | att_list = [] 276 | for i in range(self.__C.FLAT_GLIMPSES): 277 | att_list.append( 278 | torch.sum(att[:, :, i: i + 1] * x, dim=1) 279 | ) 280 | 281 | x_atted = torch.cat(att_list, dim=1) 282 | x_atted = self.linear_merge(x_atted) 283 | 284 | return x_atted -------------------------------------------------------------------------------- /GeoQA+/requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.12 2 | allennlp==0.9.0 3 | attrs==21.2.0 4 | Babel==2.9.1 5 | blis==0.2.4 6 | 7 | botocore==1.20.91 8 | cached-property==1.5.2 9 | certifi==2021.5.30 10 | chardet==4.0.0 11 | click==8.0.1 12 | conllu==1.3.1 13 | cycler==0.10.0 14 | cymem==2.0.5 15 | Cython==0.29.23 16 | 17 | distro==1.5.0 18 | docutils==0.17.1 19 | editdistance==0.5.3 20 | flaky==3.7.0 21 | Flask==2.0.1 22 | Flask-Cors==3.0.10 23 | ftfy==6.0.3 24 | gevent==21.1.2 25 | greenlet==1.1.0 26 | h5py==3.1.0 27 | idna==2.10 28 | imagesize==1.2.0 29 | importlib-metadata==4.5.0 30 | iniconfig==1.1.1 31 | itsdangerous==2.0.1 32 | jieba==0.42.1 33 | Jinja2==3.0.1 34 | jmespath==0.10.0 35 | joblib==1.0.1 36 | jsonnet==0.17.0 37 | jsonpickle==2.0.0 38 | kiwisolver==1.3.1 39 | MarkupSafe==2.0.1 40 | matplotlib==3.3.4 41 | murmurhash==1.0.5 42 | nltk==3.6.2 43 | numpy==1.19.5 44 | numpydoc==1.1.0 45 | opencv-python==4.2.0.32 46 | overrides==3.1.0 47 | packaging==20.9 48 | parsimonious==0.8.1 49 | Pillow==8.2.0 50 | plac==0.9.6 51 | pluggy==0.13.1 52 | preshed==2.0.1 53 | protobuf==3.17.3 54 | py==1.10.0 55 | Pygments==2.9.0 56 | pyparsing==2.4.7 57 | pytest==6.2.4 58 | python-dateutil==2.8.1 59 | pytorch-pretrained-bert==0.6.2 60 | pytorch-transformers==1.1.0 61 | pytz==2021.1 62 | regex==2021.4.4 63 | requests==2.25.1 64 | responses==0.13.3 65 | s3transfer==0.4.2 66 | scikit-build==0.11.1 67 | scikit-learn==0.24.2 68 | scipy==1.5.4 69 | sentencepiece==0.1.95 70 | six==1.16.0 71 | snowballstemmer==2.1.0 72 | spacy==2.1.9 73 | Sphinx==4.0.2 74 | sphinxcontrib-applehelp==1.0.2 75 | sphinxcontrib-devhelp==1.0.2 76 | sphinxcontrib-htmlhelp==2.0.0 77 | sphinxcontrib-jsmath==1.0.1 78 | sphinxcontrib-qthelp==1.0.3 79 | sphinxcontrib-serializinghtml==1.1.5 80 | sqlparse==0.4.1 81 | srsly==1.0.5 82 | tensorboardX==2.2 83 | thinc==7.0.8 84 | threadpoolctl==2.1.0 85 | toml==0.10.2 86 | torch==1.2.0 87 | torchvision==0.4.0 88 | tqdm==4.61.0 89 | typing-extensions==3.10.0.0 90 | typing-utils==0.1.0 91 | Unidecode==1.2.0 92 | urllib3==1.26.5 93 | wasabi==0.8.2 94 | wcwidth==0.2.5 95 | Werkzeug==2.0.1 96 | word2number==1.1 97 | zipp==3.4.1 98 | zope.event==4.5.0 99 | zope.interface==5.4.0 100 | -------------------------------------------------------------------------------- /GeoQA+/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | #torch.backends.cudnn.enabled = False 5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 6 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 7 | 'wide_resnet50_2', 'wide_resnet101_2'] 8 | 9 | 10 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 11 | """3x3 convolution with padding""" 12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 13 | padding=dilation, groups=groups, bias=False, dilation=dilation) 14 | 15 | 16 | def conv1x1(in_planes, out_planes, stride=1): 17 | """1x1 convolution""" 18 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 19 | 20 | 21 | class BasicBlock(nn.Module): 22 | expansion = 1 23 | __constants__ = ['downsample'] 24 | 25 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 26 | base_width=64, dilation=1, norm_layer=None): 27 | super(BasicBlock, self).__init__() 28 | if norm_layer is None: 29 | norm_layer = nn.BatchNorm2d 30 | if groups != 1 or base_width != 64: 31 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 32 | if dilation > 1: 33 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 34 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 35 | self.conv1 = conv3x3(inplanes, planes, stride) 36 | self.bn1 = norm_layer(planes) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.bn2 = norm_layer(planes) 40 | self.downsample = downsample 41 | self.stride = stride 42 | 43 | def forward(self, x): 44 | identity = x 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | out = self.conv2(out) 49 | out = self.bn2(out) 50 | 51 | if self.downsample is not None: 52 | identity = self.downsample(x) 53 | 54 | out += identity 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class Bottleneck(nn.Module): 61 | expansion = 4 62 | __constants__ = ['downsample'] 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 65 | base_width=64, dilation=1, norm_layer=None): 66 | super(Bottleneck, self).__init__() 67 | if norm_layer is None: 68 | norm_layer = nn.BatchNorm2d 69 | width = int(planes * (base_width / 64.)) * groups 70 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 71 | self.conv1 = conv1x1(inplanes, width) 72 | self.bn1 = norm_layer(width) 73 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 74 | self.bn2 = norm_layer(width) 75 | self.conv3 = conv1x1(width, planes * self.expansion) 76 | self.bn3 = norm_layer(planes * self.expansion) 77 | self.relu = nn.ReLU(inplace=True) 78 | self.downsample = downsample 79 | self.stride = stride 80 | 81 | def forward(self, x): 82 | identity = x 83 | 84 | out = self.conv1(x) 85 | out = self.bn1(out) 86 | out = self.relu(out) 87 | 88 | out = self.conv2(out) 89 | out = self.bn2(out) 90 | out = self.relu(out) 91 | 92 | out = self.conv3(out) 93 | out = self.bn3(out) 94 | 95 | if self.downsample is not None: 96 | identity = self.downsample(x) 97 | 98 | out += identity 99 | out = self.relu(out) 100 | 101 | return out 102 | 103 | 104 | class ResNet(nn.Module): 105 | 106 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, 107 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 108 | norm_layer=None): 109 | super(ResNet, self).__init__() 110 | if norm_layer is None: 111 | norm_layer = nn.BatchNorm2d 112 | self._norm_layer = norm_layer 113 | 114 | self.inplanes = 64 115 | self.dilation = 1 116 | if replace_stride_with_dilation is None: 117 | # each element in the tuple indicates if we should replace 118 | # the 2x2 stride with a dilated convolution instead 119 | replace_stride_with_dilation = [False, False, False] 120 | if len(replace_stride_with_dilation) != 3: 121 | raise ValueError("replace_stride_with_dilation should be None " 122 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 123 | self.groups = groups 124 | self.base_width = width_per_group 125 | self.conv1 = nn.Conv2d(1, self.inplanes, kernel_size=7, stride=2, padding=3, 126 | bias=False) 127 | self.bn1 = norm_layer(self.inplanes) 128 | self.relu = nn.ReLU(inplace=True) 129 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 130 | self.layer1 = self._make_layer(block, 64, layers[0]) 131 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 132 | dilate=replace_stride_with_dilation[0]) 133 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 134 | dilate=replace_stride_with_dilation[1]) 135 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 136 | dilate=replace_stride_with_dilation[2]) 137 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 138 | self.fc = nn.Linear(512 * block.expansion, num_classes) 139 | 140 | for m in self.modules(): 141 | if isinstance(m, nn.Conv2d): 142 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 143 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 144 | nn.init.constant_(m.weight, 1) 145 | nn.init.constant_(m.bias, 0) 146 | 147 | # Zero-initialize the last BN in each residual branch, 148 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 149 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 150 | if zero_init_residual: 151 | for m in self.modules(): 152 | if isinstance(m, Bottleneck): 153 | nn.init.constant_(m.bn3.weight, 0) 154 | elif isinstance(m, BasicBlock): 155 | nn.init.constant_(m.bn2.weight, 0) 156 | 157 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 158 | norm_layer = self._norm_layer 159 | downsample = None 160 | previous_dilation = self.dilation 161 | if dilate: 162 | self.dilation *= stride 163 | stride = 1 164 | if stride != 1 or self.inplanes != planes * block.expansion: 165 | downsample = nn.Sequential( 166 | conv1x1(self.inplanes, planes * block.expansion, stride), 167 | norm_layer(planes * block.expansion), 168 | ) 169 | 170 | layers = [] 171 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 172 | self.base_width, previous_dilation, norm_layer)) 173 | self.inplanes = planes * block.expansion 174 | for _ in range(1, blocks): 175 | layers.append(block(self.inplanes, planes, groups=self.groups, 176 | base_width=self.base_width, dilation=self.dilation, 177 | norm_layer=norm_layer)) 178 | 179 | return nn.Sequential(*layers) 180 | 181 | def _forward_impl(self, x): 182 | # See note [TorchScript super()] 183 | x = self.conv1(x) 184 | x = self.bn1(x) 185 | x = self.relu(x) 186 | x = self.maxpool(x) 187 | 188 | x = self.layer1(x) 189 | x = self.layer2(x) 190 | x = self.layer3(x) 191 | x = self.layer4(x) 192 | 193 | x = self.avgpool(x) 194 | x = torch.flatten(x, 1) 195 | x = self.fc(x) 196 | 197 | return x 198 | 199 | def forward(self, x): 200 | return self._forward_impl(x) 201 | 202 | 203 | def _resnet(arch, block, layers, pretrained, progress, **kwargs): 204 | model = ResNet(block, layers, **kwargs) 205 | return model 206 | 207 | 208 | def resnet18(pretrained=False, progress=True, **kwargs): 209 | r"""ResNet-18 model from 210 | `"Deep Residual Learning for Image Recognition" `_ 211 | 212 | Args: 213 | pretrained (bool): If True, returns a model pre-trained on ImageNet 214 | progress (bool): If True, displays a progress bar of the download to stderr 215 | """ 216 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, 217 | **kwargs) 218 | 219 | 220 | def resnet34(pretrained=False, progress=True, **kwargs): 221 | r"""ResNet-34 model from 222 | `"Deep Residual Learning for Image Recognition" `_ 223 | 224 | Args: 225 | pretrained (bool): If True, returns a model pre-trained on ImageNet 226 | progress (bool): If True, displays a progress bar of the download to stderr 227 | """ 228 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, 229 | **kwargs) 230 | 231 | 232 | def resnet50(pretrained=False, progress=True, **kwargs): 233 | r"""ResNet-50 model from 234 | `"Deep Residual Learning for Image Recognition" `_ 235 | 236 | Args: 237 | pretrained (bool): If True, returns a model pre-trained on ImageNet 238 | progress (bool): If True, displays a progress bar of the download to stderr 239 | """ 240 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, 241 | **kwargs) 242 | 243 | 244 | def resnet101(pretrained=False, progress=True, **kwargs): 245 | r"""ResNet-101 model from 246 | `"Deep Residual Learning for Image Recognition" `_ 247 | 248 | Args: 249 | pretrained (bool): If True, returns a model pre-trained on ImageNet 250 | progress (bool): If True, displays a progress bar of the download to stderr 251 | """ 252 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, 253 | **kwargs) 254 | 255 | 256 | def resnet152(pretrained=False, progress=True, **kwargs): 257 | r"""ResNet-152 model from 258 | `"Deep Residual Learning for Image Recognition" `_ 259 | 260 | Args: 261 | pretrained (bool): If True, returns a model pre-trained on ImageNet 262 | progress (bool): If True, displays a progress bar of the download to stderr 263 | """ 264 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, 265 | **kwargs) 266 | 267 | 268 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs): 269 | r"""ResNeXt-50 32x4d model from 270 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 271 | 272 | Args: 273 | pretrained (bool): If True, returns a model pre-trained on ImageNet 274 | progress (bool): If True, displays a progress bar of the download to stderr 275 | """ 276 | kwargs['groups'] = 32 277 | kwargs['width_per_group'] = 4 278 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], 279 | pretrained, progress, **kwargs) 280 | 281 | 282 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs): 283 | r"""ResNeXt-101 32x8d model from 284 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 285 | 286 | Args: 287 | pretrained (bool): If True, returns a model pre-trained on ImageNet 288 | progress (bool): If True, displays a progress bar of the download to stderr 289 | """ 290 | kwargs['groups'] = 32 291 | kwargs['width_per_group'] = 8 292 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], 293 | pretrained, progress, **kwargs) 294 | 295 | 296 | def wide_resnet50_2(pretrained=False, progress=True, **kwargs): 297 | r"""Wide ResNet-50-2 model from 298 | `"Wide Residual Networks" `_ 299 | 300 | The model is the same as ResNet except for the bottleneck number of channels 301 | which is twice larger in every block. The number of channels in outer 1x1 302 | convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 303 | channels, and in Wide ResNet-50-2 has 2048-1024-2048. 304 | 305 | Args: 306 | pretrained (bool): If True, returns a model pre-trained on ImageNet 307 | progress (bool): If True, displays a progress bar of the download to stderr 308 | """ 309 | kwargs['width_per_group'] = 64 * 2 310 | return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], 311 | pretrained, progress, **kwargs) 312 | 313 | 314 | def wide_resnet101_2(pretrained=False, progress=True, **kwargs): 315 | r"""Wide ResNet-101-2 model from 316 | `"Wide Residual Networks" `_ 317 | 318 | The model is the same as ResNet except for the bottleneck number of channels 319 | which is twice larger in every block. The number of channels in outer 1x1 320 | convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 321 | channels, and in Wide ResNet-50-2 has 2048-1024-2048. 322 | 323 | Args: 324 | pretrained (bool): If True, returns a model pre-trained on ImageNet 325 | progress (bool): If True, displays a progress bar of the download to stderr 326 | """ 327 | kwargs['width_per_group'] = 64 * 2 328 | return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], 329 | pretrained, progress, **kwargs) 330 | -------------------------------------------------------------------------------- /GeoQA+/save/test/stderr.log: -------------------------------------------------------------------------------- 1 | Traceback (most recent call last): 2 | File "D:\python37\lib\runpy.py", line 193, in _run_module_as_main 3 | "__main__", mod_spec) 4 | File "D:\python37\lib\runpy.py", line 85, in _run_code 5 | exec(code, run_globals) 6 | File "D:\python37\Scripts\allennlp.exe\__main__.py", line 7, in 7 | File "D:\python37\lib\site-packages\allennlp\run.py", line 18, in run 8 | main(prog="allennlp") 9 | File "D:\python37\lib\site-packages\allennlp\commands\__init__.py", line 102, in main 10 | args.func(args) 11 | File "D:\python37\lib\site-packages\allennlp\commands\train.py", line 124, in train_model_from_args 12 | args.cache_prefix) 13 | File "D:\python37\lib\site-packages\allennlp\commands\train.py", line 168, in train_model_from_file 14 | cache_directory, cache_prefix) 15 | File "D:\python37\lib\site-packages\allennlp\commands\train.py", line 212, in train_model 16 | check_for_gpu(cuda_device) 17 | File "D:\python37\lib\site-packages\allennlp\common\checks.py", line 81, in check_for_gpu 18 | raise ConfigurationError("Experiment specified a GPU but none is available;" 19 | allennlp.common.checks.ConfigurationError: "Experiment specified a GPU but none is available; if you want to run on CPU use the override 'trainer.cuda_device=-1' in the json config file." 20 | -------------------------------------------------------------------------------- /GeoQA+/save/test/stdout.log: -------------------------------------------------------------------------------- 1 | 2022-02-23 10:50:48,193 - INFO - allennlp.common.params - random_seed = 13370 2 | 2022-02-23 10:50:48,194 - INFO - allennlp.common.params - numpy_seed = 1337 3 | 2022-02-23 10:50:48,194 - INFO - allennlp.common.params - pytorch_seed = 133 4 | 2022-02-23 10:50:48,204 - INFO - allennlp.common.checks - Pytorch version: 1.9.1+cpu 5 | -------------------------------------------------------------------------------- /GeoQA+/save/test/txt: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /GeoQA+/utils.py: -------------------------------------------------------------------------------- 1 | from allennlp.data.fields import * 2 | from allennlp.data.instance import Instance 3 | from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer 4 | from allennlp.nn.util import get_text_field_mask 5 | from allennlp.data.tokenizers import Token 6 | from allennlp.models import BasicClassifier, Model 7 | from allennlp.training.metrics.fbeta_measure import FBetaMeasure 8 | from allennlp.data import Vocabulary 9 | from allennlp.models.model import Model 10 | from allennlp.modules import Seq2SeqEncoder, Seq2VecEncoder, TextFieldEmbedder 11 | from allennlp.nn import InitializerApplicator, RegularizerApplicator 12 | from allennlp.training.metrics import F1Measure, Average, Metric 13 | from allennlp.common.params import Params 14 | from allennlp.commands.train import train_model 15 | from allennlp.data import Instance 16 | from allennlp.data.dataset_readers import DatasetReader 17 | from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer 18 | from allennlp.data.tokenizers import Tokenizer 19 | from allennlp.common.util import START_SYMBOL, END_SYMBOL 20 | from allennlp.training.metrics.metric import Metric 21 | from allennlp.nn import util 22 | 23 | from typing import * 24 | from overrides import overrides 25 | import jieba 26 | import numpy as np 27 | import pickle 28 | import torch 29 | import torch.nn as nn 30 | import torch.nn.functional as F 31 | import resnet 32 | import cv2 as cv 33 | import os 34 | torch.manual_seed(123) 35 | 36 | 37 | def process_image(img, min_side=224): # 等比例缩放与填充 38 | size = img.shape 39 | h, w = size[0], size[1] 40 | # 长边缩放为min_side 41 | scale = max(w, h) / float(min_side) 42 | new_w, new_h = int(w/scale), int(h/scale) 43 | resize_img = cv.resize(img, (new_w, new_h)) 44 | # 填充至min_side * min_side 45 | # 下右填充 46 | top, bottom, left, right = 0, min_side-new_h, 0, min_side-new_w 47 | 48 | pad_img = cv.copyMakeBorder(resize_img, int(top), int(bottom), int(left), int(right), 49 | cv.BORDER_CONSTANT, value=[255,255,255]) # 从图像边界向上,下,左,右扩的像素数目 50 | 51 | return pad_img 52 | 53 | 54 | 55 | 56 | @DatasetReader.register("s2s_manual_reader") 57 | class SeqReader(DatasetReader): 58 | def __init__(self, 59 | tokenizer: Tokenizer = None, 60 | source_token_indexer: Dict[str, TokenIndexer] = None, 61 | target_token_indexer: Dict[str, TokenIndexer] = None, 62 | model_name: str = None) -> None: 63 | super().__init__(lazy=False) 64 | self._tokenizer = tokenizer 65 | self._source_token_indexer = source_token_indexer 66 | self._target_token_indexer = target_token_indexer 67 | self._model_name = model_name 68 | 69 | sub_dict_path = "data/sub_dataset_dict.pk" # problems type 70 | with open(sub_dict_path, 'rb') as file: 71 | subset_dict = pickle.load(file) 72 | self.subset_dict = subset_dict 73 | 74 | self.all_points = ['切线', '垂径定理', '勾股定理', '同位角', '平行线', '三角形内角和', '三角形中位线', '平行四边形', 75 | '相似三角形', '正方形', '圆周角', '直角三角形', '距离', '邻补角', '圆心角', '圆锥的计算', '三角函数', 76 | '矩形', '旋转', '等腰三角形', '外接圆', '内错角', '菱形', '多边形', '对顶角', '三角形的外角', '角平分线', 77 | '对称', '立体图形', '三视图', '圆内接四边形', '垂直平分线', '垂线', '扇形面积', '等边三角形', '平移', 78 | '含30度角的直角三角形', '仰角', '三角形的外接圆与外心', '方向角', '坡角', '直角三角形斜边上的中线', '位似', 79 | '平行线分线段成比例', '坐标与图形性质', '圆柱的计算', '俯角', '射影定理', '黄金分割', '钟面角', '多边形内角和', '外接圆', '弦长', '长度', '中垂线', 80 | '相交线', '全等三角形', '梯形', '锐角', '补角', '比例线段', '比例角度', '圆形', '正多边形', '同旁内角', '余角', '三角形的重心', '旋转角', '中心对称', 81 | '三角形的内心', '投影', '对角线','弧长的计算' , '平移的性质' , '位似变换' ,'菱形的性质' ,'正方形的性质'] 82 | #弧长的计算 平移的性质 位似变换 菱形的 性质 正方形的性质 83 | 84 | 85 | #self.all_points = ['切线'] 86 | @overrides 87 | def _read(self, file_path: str): 88 | with open(file_path, 'rb') as f: 89 | dataset = pickle.load(f) 90 | for sample in dataset: 91 | yield self.text_to_instance(sample) 92 | 93 | @overrides 94 | def text_to_instance(self, sample) -> Instance: 95 | fields = {} 96 | 97 | image = sample['image'] 98 | image = process_image(image) 99 | image = image/255 100 | img_rgb = np.zeros((3, image.shape[0], image.shape[1])) 101 | for i in range(3): 102 | img_rgb[i, :, :] = image 103 | fields['image'] = ArrayField(img_rgb) 104 | 105 | s_token = self._tokenizer.tokenize(' '.join(sample['token_list'])) 106 | fields['source_tokens'] = TextField(s_token, self._source_token_indexer) 107 | 108 | t_token = self._tokenizer.tokenize(' '.join(sample['manual_program'])) 109 | t_token.insert(0, Token(START_SYMBOL)) 110 | t_token.append(Token(END_SYMBOL)) 111 | fields['target_tokens'] = TextField(t_token, self._target_token_indexer) 112 | fields['source_nums'] = MetadataField(sample['numbers']) 113 | fields['choice_nums'] = MetadataField(sample['choice_nums']) 114 | fields['label'] = MetadataField(sample['label']) 115 | 116 | type = self.subset_dict[sample['id']] 117 | fields['type'] = MetadataField(type) 118 | fields['data_id'] = MetadataField(sample['id']) 119 | equ_list = [] 120 | 121 | equ = sample['manual_program'] 122 | equ_token = self._tokenizer.tokenize(' '.join(equ)) 123 | equ_token.insert(0, Token(START_SYMBOL)) 124 | equ_token.append(Token(END_SYMBOL)) 125 | equ_token = TextField(equ_token, self._source_token_indexer) 126 | equ_list.append(equ_token) 127 | 128 | fields['equ_list'] = ListField(equ_list) 129 | fields['manual_program'] = MetadataField(sample['manual_program']) 130 | 131 | point_label = np.zeros(77, np.float32) 132 | exam_points = sample['formal_point'] 133 | for point in exam_points: 134 | point_id = self.all_points.index(point) 135 | point_label[point_id] = 1 136 | fields['point_label'] = ArrayField(np.array(point_label)) 137 | 138 | return Instance(fields) 139 | 140 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GeoQA-Plus 2 | 1. Download GeoQA+ benchmark [Google Drive] 3 | 2. Download the GeoQA.rar(If it need permission please apply it through the Google drive link or contact this e-mail address to get it: jiecao@m.scnu.edu.cn, You can also contact this email address for any other issues), move it to GeoQA+/data/GeoQA2.2 path, and unzip it. GeoQA-train.pk is the training set of GeoQA, Mix-train.pk are the training set after mixing our newly annotated problems with GeoQA-train.pk,Backtrans-train.pk is the training set after performing data augmentation on Mix-train, new-test.pk is the test set formed by our new dataset. When using data files as training sets make sure to rename the corresponding data files to "train.pk". 4 | 3. Repalce the vocabulary file of Pre-training model with our "vocab.txt". 5 | # Environment 6 | pip install -r requirement.txt 7 | 8 | # Train the model 9 | cd GeoQA+ 10 | 11 | unzip and rename the dataset 12 | 13 | run: allennlp train config/NGS_Aux.json --include-package NGS_Aux -s save/test 14 | 15 | # Evaluation 16 | 17 | run: allennlp evaluate save/test data/GeoQA2.2/test.pk --include-package NGS_Aux_test --cuda-device 0 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.12 2 | allennlp==0.9.0 3 | attrs==21.2.0 4 | Babel==2.9.1 5 | blis==0.2.4 6 | 7 | botocore==1.20.91 8 | cached-property==1.5.2 9 | certifi==2021.5.30 10 | chardet==4.0.0 11 | click==8.0.1 12 | conllu==1.3.1 13 | cycler==0.10.0 14 | cymem==2.0.5 15 | Cython==0.29.23 16 | 17 | distro==1.5.0 18 | docutils==0.17.1 19 | editdistance==0.5.3 20 | flaky==3.7.0 21 | Flask==2.0.1 22 | Flask-Cors==3.0.10 23 | ftfy==6.0.3 24 | gevent==21.1.2 25 | greenlet==1.1.0 26 | h5py==3.1.0 27 | idna==2.10 28 | imagesize==1.2.0 29 | importlib-metadata==4.5.0 30 | iniconfig==1.1.1 31 | itsdangerous==2.0.1 32 | jieba==0.42.1 33 | Jinja2==3.0.1 34 | jmespath==0.10.0 35 | joblib==1.0.1 36 | jsonnet==0.17.0 37 | jsonpickle==2.0.0 38 | kiwisolver==1.3.1 39 | MarkupSafe==2.0.1 40 | matplotlib==3.3.4 41 | murmurhash==1.0.5 42 | nltk==3.6.2 43 | numpy==1.19.5 44 | numpydoc==1.1.0 45 | opencv-python==4.2.0.32 46 | overrides==3.1.0 47 | packaging==20.9 48 | parsimonious==0.8.1 49 | Pillow==8.2.0 50 | plac==0.9.6 51 | pluggy==0.13.1 52 | preshed==2.0.1 53 | protobuf==3.17.3 54 | py==1.10.0 55 | Pygments==2.9.0 56 | pyparsing==2.4.7 57 | pytest==6.2.4 58 | python-dateutil==2.8.1 59 | pytorch-pretrained-bert==0.6.2 60 | pytorch-transformers==1.1.0 61 | pytz==2021.1 62 | regex==2021.4.4 63 | requests==2.25.1 64 | responses==0.13.3 65 | s3transfer==0.4.2 66 | scikit-build==0.11.1 67 | scikit-learn==0.24.2 68 | scipy==1.5.4 69 | sentencepiece==0.1.95 70 | six==1.16.0 71 | snowballstemmer==2.1.0 72 | spacy==2.1.9 73 | Sphinx==4.0.2 74 | sphinxcontrib-applehelp==1.0.2 75 | sphinxcontrib-devhelp==1.0.2 76 | sphinxcontrib-htmlhelp==2.0.0 77 | sphinxcontrib-jsmath==1.0.1 78 | sphinxcontrib-qthelp==1.0.3 79 | sphinxcontrib-serializinghtml==1.1.5 80 | sqlparse==0.4.1 81 | srsly==1.0.5 82 | tensorboardX==2.2 83 | thinc==7.0.8 84 | threadpoolctl==2.1.0 85 | toml==0.10.2 86 | torch==1.2.0 87 | torchvision==0.4.0 88 | tqdm==4.61.0 89 | typing-extensions==3.10.0.0 90 | typing-utils==0.1.0 91 | Unidecode==1.2.0 92 | urllib3==1.26.5 93 | wasabi==0.8.2 94 | wcwidth==0.2.5 95 | Werkzeug==2.0.1 96 | word2number==1.1 97 | zipp==3.4.1 98 | zope.event==4.5.0 99 | zope.interface==5.4.0 100 | --------------------------------------------------------------------------------