├── .gitignore ├── README.md ├── allennlp-book-r-and-d.jpg ├── allennlp-book.jpeg ├── classifier-model ├── configs │ └── experiment.jsonnet ├── datasets │ └── download.sh └── src │ ├── __init__.py │ ├── data │ ├── __init__.py │ └── dataset_readers │ │ ├── __init__.py │ │ └── ag_news_reader.py │ └── models │ ├── __init__.py │ └── text_classifier.py ├── jp-classifier-model ├── configs │ └── experiment.jsonnet ├── datasets │ └── download.sh └── src │ ├── __init__.py │ ├── data │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ └── livedoor_news_reader.py │ └── tokenizers │ │ ├── __init__.py │ │ └── janome_tokenizer.py │ └── models │ ├── __init__.py │ └── text_classifier.py ├── mlflow ├── .gitignore ├── MLproject ├── README.md ├── conda.yaml ├── configs │ ├── ner.jsonnet │ └── sequence_tagging.jsonnet ├── data │ └── ner_data.json ├── scripts │ └── train.py └── src │ ├── __init__.py │ └── training │ ├── __init__.py │ └── callbacks │ ├── __init__.py │ └── mlflow_metrics.py ├── ner-model ├── configs │ ├── bert-experiment.jsonnet │ └── experiment.jsonnet ├── datasets │ └── download.sh └── src │ ├── __init__.py │ ├── data │ ├── __init__.py │ └── dataset_readers │ │ ├── __init__.py │ │ └── conll_2003_reader.py │ ├── models │ ├── __init__.py │ └── ner_tagger.py │ ├── predictors │ ├── __init__.py │ └── conll_2003_predictor.py │ └── tests │ ├── __init__.py │ ├── data │ ├── __init__.py │ └── dataset_readers │ │ ├── __init__.py │ │ └── conll_2003_reader_test.py │ ├── fixtures │ ├── configs │ │ └── experiment.jsonnet │ └── data │ │ └── conll2003.txt │ └── models │ ├── __init__.py │ └── ner_tagger_test.py ├── nli ├── .gitignore ├── configs │ ├── esim.jsonnet │ ├── san.jsonnet │ └── san_test.jsonnet ├── data │ └── snli_test.jsonl └── src │ ├── __init__.py │ ├── models │ ├── __init__.py │ └── san.py │ └── modules │ ├── __init__.py │ └── full_layer_lstm.py ├── requirements.txt └── seq2seq ├── .gitignore ├── configs ├── common.jsonnet ├── composed_seq2seq.jsonnet └── simple_seq2seq.jsonnet ├── data └── dataset.py └── decoder.py /.gitignore: -------------------------------------------------------------------------------- 1 | .venv_book 2 | .vscode 3 | __pycache__ 4 | .DS_Store 5 | .pytest_cache 6 | 7 | */datasets/* 8 | !*/datasets/download.sh 9 | */tmp 10 | pretrain_bert/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AllenNLP入門 2 | 3 | 本リポジトリはAnnanAIによる「AllenNLP入門」のソースコード置き場です。[Amazon](https://www.amazon.co.jp/dp/B08GLG39DF/ref=cm_sw_em_r_mt_dp_2pGsFbYJDFYJT)または[BOOTH](https://annan-ai.booth.pm/items/1881126)にて販売中です。 4 | 5 |
6 |
7 |
10 |
11 |
327 | and the mask would be 1 1 1 1 1 0 0
328 | and let the logits be l1 l2 l3 l4 l5 l6
329 | We actually need to compare:
330 | the sequence w1 w2 w3
331 | with masks 1 1 1 1 0 0
332 | against l1 l2 l3 l4 l5 l6
333 | (where the input was)
334 | """
335 | # shape: (batch_size, num_decoding_steps)
336 | relevant_targets = targets[:, 1:].contiguous()
337 |
338 | # shape: (batch_size, num_decoding_steps)
339 | relevant_mask = target_mask[:, 1:].contiguous()
340 |
341 | return util.sequence_cross_entropy_with_logits(
342 | logits, relevant_targets, relevant_mask, label_smoothing=self._label_smoothing_ratio
343 | )
344 |
345 | def get_output_dim(self):
346 | return self._decoder_net.get_output_dim()
347 |
348 | def take_step(
349 | self, last_predictions: torch.Tensor, state: Dict[str, torch.Tensor]
350 | ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
351 | """
352 | Take a decoding step. This is called by the beam search class.
353 |
354 | Parameters
355 | ----------
356 | last_predictions : ``torch.Tensor``
357 | A tensor of shape ``(group_size,)``, which gives the indices of the predictions
358 | during the last time step.
359 | state : ``Dict[str, torch.Tensor]``
360 | A dictionary of tensors that contain the current state information
361 | needed to predict the next step, which includes the encoder outputs,
362 | the source mask, and the decoder hidden state and context. Each of these
363 | tensors has shape ``(group_size, *)``, where ``*`` can be any other number
364 | of dimensions.
365 |
366 | Returns
367 | -------
368 | Tuple[torch.Tensor, Dict[str, torch.Tensor]]
369 | A tuple of ``(log_probabilities, updated_state)``, where ``log_probabilities``
370 | is a tensor of shape ``(group_size, num_classes)`` containing the predicted
371 | log probability of each class for the next step, for each item in the group,
372 | while ``updated_state`` is a dictionary of tensors containing the encoder outputs,
373 | source mask, and updated decoder hidden state and context.
374 |
375 | Notes
376 | -----
377 | We treat the inputs as a batch, even though ``group_size`` is not necessarily
378 | equal to ``batch_size``, since the group may contain multiple states
379 | for each source sentence in the batch.
380 | """
381 | # shape: (group_size, num_classes)
382 | output_projections, state = self._prepare_output_projections(last_predictions, state)
383 |
384 | # shape: (group_size, num_classes)
385 | class_log_probabilities = F.log_softmax(output_projections, dim=-1)
386 |
387 | return class_log_probabilities, state
388 |
389 | @overrides
390 | def get_metrics(self, reset: bool = False) -> Dict[str, float]:
391 | all_metrics: Dict[str, float] = {}
392 | if not self.training:
393 | if self._tensor_based_metric is not None:
394 | all_metrics.update(
395 | self._tensor_based_metric.get_metric(reset=reset) # type: ignore
396 | )
397 | if self._token_based_metric is not None:
398 | all_metrics.update(self._token_based_metric.get_metric(reset=reset)) # type: ignore
399 | return all_metrics
400 |
401 | @overrides
402 | def forward(
403 | self,
404 | encoder_out: Dict[str, torch.LongTensor],
405 | target_tokens: Dict[str, torch.LongTensor] = None,
406 | ) -> Dict[str, torch.Tensor]:
407 | state = encoder_out
408 | decoder_init_state = self._decoder_net.init_decoder_state(state)
409 | state.update(decoder_init_state)
410 |
411 | if target_tokens:
412 | state_forward_loss = state if self.training else {k: v.clone() for k, v in state.items()}
413 | output_dict = self._forward_loss(state_forward_loss, target_tokens)
414 | else:
415 | output_dict = {}
416 |
417 | if not self.training:
418 | predictions = self._forward_beam_search(state)
419 | output_dict.update(predictions)
420 |
421 | if target_tokens:
422 | if self._tensor_based_metric is not None:
423 | # shape: (batch_size, beam_size, max_sequence_length)
424 | top_k_predictions = output_dict["predictions"]
425 | # shape: (batch_size, max_predicted_sequence_length)
426 | best_predictions = top_k_predictions[:, 0, :]
427 |
428 | self._tensor_based_metric( # type: ignore
429 | best_predictions, target_tokens["tokens"]
430 | )
431 |
432 | if self._token_based_metric is not None:
433 | output_dict = self.post_process(output_dict)
434 | predicted_tokens = output_dict["predicted_tokens"]
435 |
436 | self._token_based_metric( # type: ignore
437 | predicted_tokens, self.indices_to_tokens(target_tokens["tokens"][:, 1:])
438 | )
439 |
440 | return output_dict
441 |
442 | @overrides
443 | def post_process(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
444 | """
445 | This method trims the output predictions to the first end symbol, replaces indices with
446 | corresponding tokens, and adds a field called ``predicted_tokens`` to the ``output_dict``.
447 | """
448 | predicted_indices = output_dict["predictions"]
449 | all_predicted_tokens = self.indices_to_tokens(predicted_indices)
450 | output_dict["predicted_tokens"] = all_predicted_tokens
451 | return output_dict
452 |
453 | def indices_to_tokens(self, batch_indeces: numpy.ndarray) -> List[List[str]]:
454 |
455 | if not isinstance(batch_indeces, numpy.ndarray):
456 | batch_indeces = batch_indeces.detach().cpu().numpy()
457 |
458 | all_tokens = []
459 | for indices in batch_indeces:
460 | # Beam search gives us the top k results for each source sentence in the batch
461 | # but we just want the single best.
462 | if len(indices.shape) > 1:
463 | indices = indices[0]
464 | indices = list(indices)
465 | # Collect indices till the first end_symbol
466 | if self._end_index in indices:
467 | indices = indices[: indices.index(self._end_index)]
468 | tokens = [
469 | self._vocab.get_token_from_index(x, namespace=self._target_namespace)
470 | for x in indices
471 | ]
472 | all_tokens.append(tokens)
473 |
474 | return all_tokens
475 |
--------------------------------------------------------------------------------
w1 w2 w3