├── README.md
└── code
    ├── ChatYuan_mindspore.ipynb
    ├── ChatYuan_pytorch.ipynb
    ├── T5Attention模块对齐.ipynb
    ├── T5Model预训练参数加载对齐_1.ipynb
    ├── T5Model预训练参数加载对齐_2.ipynb
    ├── T5Model预训练参数转换.ipynb
    ├── T5Tokenizer.ipynb
    ├── mindnlp
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-38.pyc
        │   └── configs.cpython-38.pyc
        ├── _legacy
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── functional.cpython-38.pyc
        │   │   └── initializer.cpython-38.pyc
        │   ├── amp.py
        │   ├── functional.py
        │   ├── initializer.py
        │   ├── nn
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── dropout.cpython-38.pyc
        │   │   │   └── transformer.cpython-38.pyc
        │   │   ├── dropout.py
        │   │   └── transformer.py
        │   └── transforms
        │   │   ├── __init__.py
        │   │   ├── add_token.py
        │   │   └── truncate.py
        ├── abc
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── callback.cpython-38.pyc
        │   │   ├── metric.cpython-38.pyc
        │   │   └── register.cpython-38.pyc
        │   ├── backbones
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── base.cpython-38.pyc
        │   │   │   ├── pretrained.cpython-38.pyc
        │   │   │   ├── seq2seq.cpython-38.pyc
        │   │   │   └── seq2vec.cpython-38.pyc
        │   │   ├── base.py
        │   │   ├── pretrained.py
        │   │   ├── seq2seq.py
        │   │   └── seq2vec.py
        │   ├── callback.py
        │   ├── metric.py
        │   ├── modules
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── decoder.cpython-38.pyc
        │   │   │   ├── embedding.cpython-38.pyc
        │   │   │   └── encoder.cpython-38.pyc
        │   │   ├── decoder.py
        │   │   ├── embedding.py
        │   │   ├── encoder.py
        │   │   └── generator.py
        │   └── register.py
        ├── configs.py
        ├── dataset
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── process.cpython-38.pyc
        │   │   ├── register.cpython-38.pyc
        │   │   └── utils.cpython-38.pyc
        │   ├── hf_datasets
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   └── hf_imdb.cpython-38.pyc
        │   │   └── hf_imdb.py
        │   ├── machine_translation
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── iwslt2016.cpython-38.pyc
        │   │   │   ├── iwslt2017.cpython-38.pyc
        │   │   │   └── multi30k.cpython-38.pyc
        │   │   ├── iwslt2016.py
        │   │   ├── iwslt2017.py
        │   │   └── multi30k.py
        │   ├── process.py
        │   ├── question_answer
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── squad1.cpython-38.pyc
        │   │   │   └── squad2.cpython-38.pyc
        │   │   ├── squad1.py
        │   │   └── squad2.py
        │   ├── register.py
        │   ├── sequence_tagging
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── conll2000chunking.cpython-38.pyc
        │   │   │   └── udpos.cpython-38.pyc
        │   │   ├── conll2000chunking.py
        │   │   └── udpos.py
        │   ├── text_classification
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── agnews.cpython-38.pyc
        │   │   │   ├── amazonreviewfull.cpython-38.pyc
        │   │   │   ├── amazonreviewpolarity.cpython-38.pyc
        │   │   │   ├── cola.cpython-38.pyc
        │   │   │   ├── dbpedia.cpython-38.pyc
        │   │   │   ├── imdb.cpython-38.pyc
        │   │   │   ├── mnli.cpython-38.pyc
        │   │   │   ├── mrpc.cpython-38.pyc
        │   │   │   ├── qnli.cpython-38.pyc
        │   │   │   ├── qqp.cpython-38.pyc
        │   │   │   ├── rte.cpython-38.pyc
        │   │   │   ├── sogounews.cpython-38.pyc
        │   │   │   ├── sst2.cpython-38.pyc
        │   │   │   ├── stsb.cpython-38.pyc
        │   │   │   ├── wnli.cpython-38.pyc
        │   │   │   ├── yahooanswers.cpython-38.pyc
        │   │   │   ├── yelpreviewfull.cpython-38.pyc
        │   │   │   └── yelpreviewpolarity.cpython-38.pyc
        │   │   ├── agnews.py
        │   │   ├── amazonreviewfull.py
        │   │   ├── amazonreviewpolarity.py
        │   │   ├── cola.py
        │   │   ├── dbpedia.py
        │   │   ├── imdb.py
        │   │   ├── mnli.py
        │   │   ├── mrpc.py
        │   │   ├── qnli.py
        │   │   ├── qqp.py
        │   │   ├── rte.py
        │   │   ├── sogounews.py
        │   │   ├── sst2.py
        │   │   ├── stsb.py
        │   │   ├── wnli.py
        │   │   ├── yahooanswers.py
        │   │   ├── yelpreviewfull.py
        │   │   └── yelpreviewpolarity.py
        │   ├── text_generation
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── lcsts.cpython-38.pyc
        │   │   │   ├── penntreebank.cpython-38.pyc
        │   │   │   ├── wikitext103.cpython-38.pyc
        │   │   │   └── wikitext2.cpython-38.pyc
        │   │   ├── lcsts.py
        │   │   ├── penntreebank.py
        │   │   ├── wikitext103.py
        │   │   └── wikitext2.py
        │   └── utils.py
        ├── engine
        │   ├── __init__.py
        │   ├── callbacks
        │   │   ├── __init__.py
        │   │   ├── best_model_callback.py
        │   │   ├── callback_manager.py
        │   │   ├── checkpoint_callback.py
        │   │   ├── earlystop_callback.py
        │   │   └── timer_callback.py
        │   ├── evaluator.py
        │   ├── export.py
        │   └── trainer.py
        ├── metrics
        │   ├── __init__.py
        │   ├── accuracy.py
        │   ├── bleu.py
        │   ├── confusion_matrix.py
        │   ├── distinct.py
        │   ├── em_score.py
        │   ├── f1.py
        │   ├── matthews.py
        │   ├── pearson.py
        │   ├── perplexity.py
        │   ├── precision.py
        │   ├── recall.py
        │   ├── rouge.py
        │   ├── spearman.py
        │   └── utils.py
        ├── models
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   └── __init__.cpython-38.pyc
        │   ├── bart
        │   │   ├── __init__.py
        │   │   ├── bart.py
        │   │   └── bart_config.py
        │   ├── bert
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── bert.cpython-38.pyc
        │   │   │   └── bert_config.cpython-38.pyc
        │   │   ├── bert.py
        │   │   └── bert_config.py
        │   ├── elmo
        │   │   ├── __init__.py
        │   │   └── elmo.py
        │   ├── gpt
        │   │   ├── __init__.py
        │   │   ├── gpt.py
        │   │   └── gpt_config.py
        │   ├── gpt2
        │   │   ├── __init__.py
        │   │   ├── config_gpt2.py
        │   │   └── gpt2.py
        │   ├── gpt_neo
        │   │   ├── __init__.py
        │   │   ├── gpt_neo.py
        │   │   └── gpt_neo_config.py
        │   ├── longformer
        │   │   ├── __init__.py
        │   │   ├── longformer.py
        │   │   └── longformer_config.py
        │   ├── luke
        │   │   ├── __init__.py
        │   │   ├── luke.py
        │   │   └── luke_config.py
        │   ├── megatron_bert
        │   │   ├── __init__.py
        │   │   ├── megatron_bert.py
        │   │   └── megatron_bert_config.py
        │   ├── megatron_gpt2
        │   │   ├── __init__.py
        │   │   ├── megatron_gpt2.py
        │   │   └── megatron_gpt2_config.py
        │   ├── mobilebert
        │   │   ├── __init__.py
        │   │   ├── mobilebert.py
        │   │   └── mobilebert_config.py
        │   ├── nezha
        │   │   ├── __init__.py
        │   │   ├── nezha.py
        │   │   └── nezha_config.py
        │   ├── opt
        │   │   ├── __init__.py
        │   │   ├── opt.py
        │   │   └── opt_config.py
        │   ├── pangu_alpha
        │   │   ├── __init__.py
        │   │   ├── pangu_alpha.py
        │   │   └── pangu_alpha_config.py
        │   ├── roberta
        │   │   ├── __init__.py
        │   │   ├── roberta.py
        │   │   └── roberta_config.py
        │   ├── t5
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── t5.cpython-38.pyc
        │   │   │   └── t5_config.cpython-38.pyc
        │   │   ├── t5.py
        │   │   └── t5_config.py
        │   ├── tinybert
        │   │   ├── __init__.py
        │   │   ├── tinybert.py
        │   │   └── tinybert_config.py
        │   ├── transformer
        │   │   ├── __init__.py
        │   │   ├── transformer.py
        │   │   └── transformer_config.py
        │   ├── utils
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-38.pyc
        │   │   │   ├── activations.cpython-38.pyc
        │   │   │   ├── logging.cpython-38.pyc
        │   │   │   └── mixin.cpython-38.pyc
        │   │   ├── activations.py
        │   │   ├── logging.py
        │   │   ├── mixin.py
        │   │   └── utils.py
        │   └── xlm
        │   │   ├── __init__.py
        │   │   ├── xlm.py
        │   │   └── xlm_config.py
        ├── modules
        │   ├── __init__.py
        │   ├── attentions.py
        │   ├── beam_search.py
        │   ├── crf.py
        │   ├── decoder
        │   │   ├── __init__.py
        │   │   └── rnn_decoder.py
        │   ├── embeddings
        │   │   ├── __init__.py
        │   │   ├── fasttext_embedding.py
        │   │   └── glove_embedding.py
        │   ├── encoder
        │   │   ├── __init__.py
        │   │   ├── cnn_encoder.py
        │   │   └── rnn_encoder.py
        │   ├── loss.py
        │   └── rnns.py
        ├── transforms
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── lookup.cpython-38.pyc
        │   │   └── pad_transform.cpython-38.pyc
        │   ├── lookup.py
        │   ├── pad_transform.py
        │   └── tokenizers
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │       ├── __init__.cpython-38.pyc
        │   │       ├── basic_tokenizer.cpython-38.pyc
        │   │       └── bert_tokenizer.cpython-38.pyc
        │   │   ├── basic_tokenizer.py
        │   │   ├── bert_tokenizer.py
        │   │   └── t5_tokenizer.py
        ├── utils
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── compatibility.cpython-38.pyc
        │   │   ├── decompress.cpython-38.pyc
        │   │   └── download.cpython-38.pyc
        │   ├── compatibility.py
        │   ├── decompress.py
        │   └── download.py
        ├── vocab
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   └── vocab.cpython-38.pyc
        │   └── vocab.py
        └── workflow
        │   ├── __init__.py
        │   ├── __pycache__
        │       ├── __init__.cpython-38.pyc
        │       ├── work.cpython-38.pyc
        │       └── workflow.cpython-38.pyc
        │   ├── downstream
        │       ├── __init__.py
        │       ├── __pycache__
        │       │   ├── __init__.cpython-38.pyc
        │       │   └── sentiment_analysis_model.cpython-38.pyc
        │       └── sentiment_analysis_model.py
        │   ├── utils.py
        │   ├── work.py
        │   ├── workflow.py
        │   └── works
        │       ├── __init__.py
        │       ├── __pycache__
        │           ├── __init__.cpython-38.pyc
        │           └── sentiment_analysis.cpython-38.pyc
        │       ├── classification.py
        │       ├── ner.py
        │       ├── qa.py
        │       └── sentiment_analysis.py
    ├── t5-small参数名称对比.xlsx
    ├── t5_test.py
    └── utils.ipynb


/code/ChatYuan_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "os.environ['HTTP_PROXY'] = 'http://172.26.1.159:1090'\n",
 12 |     "os.environ['HTTPS_PROXY'] = 'http://172.26.1.159:1090'\n",
 13 |     "\n",
 14 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"6\""
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "def preprocess(text):\n",
 24 |     "  text = text.replace(\"\\n\", \"\\\\n\").replace(\"\\t\", \"\\\\t\")\n",
 25 |     "  return text\n",
 26 |     "\n",
 27 |     "def postprocess(text):\n",
 28 |     "  return text.replace(\"\\\\n\", \"\\n\").replace(\"\\\\t\", \"\\t\").replace('%20','  ')"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "text = \"美国的首都是哪座城市\"\n",
 38 |     "text = f\"用户：{text}\\n小元：\"\n",
 39 |     "text = text.strip()\n",
 40 |     "text = preprocess(text)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 4,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stderr",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "/home/geaming/anaconda3/envs/mindspore/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 53 |       "  from .autonotebook import tqdm as notebook_tqdm\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "# load tokenizer and model\n",
 59 |     "\n",
 60 |     "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
 61 |     "\n",
 62 |     "tokenizer = T5Tokenizer.from_pretrained(\"ClueAI/ChatYuan-large-v2\")\n",
 63 |     "model = T5ForConditionalGeneration.from_pretrained(\"ClueAI/ChatYuan-large-v2\")"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 5,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stdout",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "====================encoding对齐====================\n",
 76 |       "tensor([[   12,   623,     5, 11026,  4627,    15,  1843,   939,   399,     7,\n",
 77 |       "            51,   158,     5,     1]])\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "# encoding对齐\n",
 83 |     "print(\"encoding对齐\".center(50, \"=\"))\n",
 84 |     "encoding = tokenizer(text=[text], truncation=True, padding=True, max_length=1024, return_tensors=\"pt\")\n",
 85 |     "print(encoding['input_ids'])"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 6,
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "=====================model对齐======================\n"
 98 |      ]
 99 |     },
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "tensor([[    0, 14958,    10,  4627,    15, 11646, 14376,     6,     1]])\n"
105 |      ]
106 |     }
107 |    ],
108 |    "source": [
109 |     "# model对齐\n",
110 |     "print(\"model对齐\".center(50, \"=\"))\n",
111 |     "out = model.generate(**encoding, return_dict_in_generate=True, output_scores=False, max_new_tokens=1024, num_beams=1, length_penalty=0.6)\n",
112 |     "print(out[\"sequences\"])"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 7,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "====================decoding对齐====================\n",
125 |       "美国的首都是华盛顿特区。\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "# decoding对齐\n",
131 |     "print(\"decoding对齐\".center(50, \"=\"))\n",
132 |     "out_text = tokenizer.batch_decode(out[\"sequences\"], skip_special_tokens=True)\n",
133 |     "print(out_text[0])"
134 |    ]
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "py310",
140 |    "language": "python",
141 |    "name": "python3"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 3
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython3",
153 |    "version": "3.9.18"
154 |   },
155 |   "orig_nbformat": 4
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/code/mindnlp/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | MindNLP library.
17 | """
18 | 
19 | from mindnlp.dataset import load_dataset, process
20 | from mindnlp.utils import less_min_pynative_first
21 | from mindnlp.workflow.workflow import Workflow
22 | from mindnlp.vocab import Vocab
23 | 
24 | if less_min_pynative_first:
25 |     from mindspore import context
26 |     from mindspore import ms_function as ms_jit
27 |     context.set_context(mode=context.PYNATIVE_MODE)
28 | else:
29 |     from mindspore import jit as ms_jit
30 | 
31 | 
32 | __all__ = ['ms_jit', 'load_dataset', 'process', 'Workflow', 'Vocab']
33 | 


--------------------------------------------------------------------------------
/code/mindnlp/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/__pycache__/configs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/__pycache__/configs.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | 
17 | """Custom api for legacy mindspore"""
18 | 


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/_legacy/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/__pycache__/functional.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/_legacy/__pycache__/functional.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/__pycache__/initializer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/_legacy/__pycache__/initializer.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/initializer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | # pylint: disable=W0702
16 | 
17 | """"Classes and functions for Initializer"""
18 | 
19 | import math
20 | import numpy as np
21 | from mindspore.common.initializer import Initializer, _calculate_fan_in_and_fan_out, _assignment
22 | try:
23 |     from mindspore._c_expression import random_normal as _random_normal
24 | except:
25 |     from mindspore._c_expression import _random_normal
26 | 
27 | def _numpy_seed():
28 |     # This will produce same value after call numpy.random.seed with same seed.
29 |     return np.random.randint(low=1, high=(1 << 63), dtype=np.int64)
30 | 
31 | def _init_random_normal(mean, sigma, shape):
32 |     if sigma < 0:
33 |         raise ValueError("sigma < 0")
34 |     data = np.ndarray(shape=shape, dtype=np.float32)
35 |     _random_normal(_numpy_seed(), data, mean, sigma)
36 |     return data
37 | 
38 | class XavierNormal(Initializer):
39 |     r"""
40 |     Generates an array with values sampled from Xavier normal distribution
41 |     :math::math:`\mathcal{N}(0, \text{std}^2)` in order to initialize a tensor, where
42 | 
43 |     .. math::
44 |         boundary = gain * \sqrt{\frac{2}{n_{in} + n_{out}}}
45 | 
46 |     where :math:`gain` is an optional scaling factor, :math:`n_{in}` is the number of input units in the weight tensor,
47 |     :math:`n_{out}` is the number of output units in the weight tensor.
48 | 
49 |     Args:
50 |         gain (float): An optional scaling factor. Default: 1.
51 | 
52 |     Examples:
53 |         >>> import mindspore
54 |         >>> from mindspore.common.initializer import initializer
55 |         >>> from text.common.initializer import XavierNormal
56 |         >>> tensor1 = initializer(XavierNormal(), [1, 2, 3], mindspore.float32)
57 |         >>> tensor2 = initializer('XavierNormal', [1, 2, 3], mindspore.float32)
58 |     """
59 |     def __init__(self, gain=1):
60 |         super().__init__(gain=gain)
61 |         self.gain = gain
62 | 
63 |     def _initialize(self, arr):
64 |         fan_in, fan_out = _calculate_fan_in_and_fan_out(arr.shape)
65 | 
66 |         std = self.gain * math.sqrt(2.0 / float(fan_in + fan_out))
67 |         data = _init_random_normal(0, std, arr.shape)
68 | 
69 |         _assignment(arr, data)
70 | 


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/nn/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """nn layer for legacy mindspore"""
17 | 
18 | from .transformer import Transformer, TransformerDecoder, TransformerEncoder, \
19 |     TransformerEncoderLayer, TransformerDecoderLayer, MultiheadAttention
20 | 
21 | from .dropout import Dropout
22 | __all__ = [
23 |     'Transformer', 'TransformerEncoder', 'TransformerDecoder',
24 |     'TransformerEncoderLayer', 'TransformerDecoderLayer',
25 |     'MultiheadAttention', 'Dropout']
26 | 


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/nn/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/_legacy/nn/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/nn/__pycache__/dropout.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/_legacy/nn/__pycache__/dropout.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/nn/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/_legacy/nn/__pycache__/transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/nn/dropout.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | # pylint: disable=C0103
16 | """Dropout"""
17 | 
18 | from mindspore import nn
19 | from mindspore._checkparam import Validator
20 | from mindspore.common.seed import _get_graph_seed
21 | from mindspore.ops import operations as P
22 | 
23 | class Dropout(nn.Cell):
24 |     r"""
25 |     Dropout layer for the input.
26 | 
27 |     Randomly set some elements of the input tensor to zero with probability `p` during training
28 |     using samples from a Bernoulli distribution.
29 | 
30 |     The outputs are scaled by a factor of :math:`\frac{1}{1-p}` during training so
31 |     that the output layer remains at a similar scale. During inference, this
32 |     layer returns the same tensor as the `x`.
33 | 
34 |     This technique is proposed in paper `Dropout: A Simple Way to Prevent Neural Networks from Overfitting
35 |     <http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf>`_ and proved to be effective to reduce
36 |     over-fitting and prevents neurons from co-adaptation. See more details in `Improving neural networks by
37 |     preventing co-adaptation of feature detectors
38 |     <https://arxiv.org/pdf/1207.0580.pdf>`_.
39 | 
40 |     Note:
41 |         - Each channel will be zeroed out independently on every construct call.
42 |         - Parameter `p` means the probability of the element of the input tensor to be zeroed.
43 | 
44 |     Args:
45 |         p (Union[float, int, None]): The dropout rate, greater than or equal to 0 and less than 1.
46 |             E.g. rate=0.9, dropping out 90% of input neurons. Default: 0.5.
47 | 
48 |     Inputs:
49 |         x (Tensor): The input of Dropout with data type of float16 or float32.
50 |             The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
51 | 
52 |     Outputs:
53 |         Tensor, output tensor with the same shape as the `x`.
54 | 
55 |     Raises:
56 |         TypeError: If the dtype of `p` is not float or int.
57 |         TypeError: If dtype of `x` is not neither float16 nor float32.
58 |         ValueError: If `p` is not in range [0, 1).
59 |         ValueError: If length of shape of `x` is less than 1.
60 | 
61 |     Examples:
62 |         >>> x = Tensor(np.ones([2, 2, 3]), mindspore.float32)
63 |         >>> net = Dropout(p=0.2)
64 |         >>> net.set_train()
65 |         >>> output = net(x)
66 |         >>> print(output.shape)
67 |         (2, 2, 3)
68 |     """
69 | 
70 |     def __init__(self, p=0.5):
71 |         """
72 |         Initialize Dropout.
73 |         """
74 | 
75 |         super().__init__()
76 |         Validator.check_value_type('p', p, [float, int], self.cls_name)
77 |         if p < 0 or p >= 1:
78 |             raise ValueError(f"For '{self.cls_name}', the 'p' must be a number in range [0, 1), "
79 |                              f"but got {p}.")
80 |         seed0, seed1 = _get_graph_seed(0, "dropout")
81 |         self.dropout = P.Dropout(1.0 - p, seed0, seed1)
82 |         self.p = p
83 | 
84 |     def construct(self, x):
85 |         """
86 |         Compute Dropout.
87 |         """
88 | 
89 |         if not self.training or self.p == 0:
90 |             return x
91 | 
92 |         out, _ = self.dropout(x)
93 |         return out
94 | 


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """dataset transforms for legacy mindspore"""
17 | 
18 | from mindnlp._legacy.transforms.truncate import Truncate
19 | from mindnlp._legacy.transforms.add_token import AddToken
20 | 
21 | __all__ = [
22 |     'Truncate', 'AddToken'
23 | ]
24 | 


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/transforms/add_token.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """AddToken transform"""
16 | import numpy as np
17 | from mindspore.dataset.transforms.transforms import PyTensorOperation
18 | from mindspore.dataset.text.transforms import Implementation
19 | 
20 | 
21 | class AddToken(PyTensorOperation):
22 |     """
23 |     Add token to begin or end of sequence.
24 | 
25 |     Args:
26 |         token (str): String token.
27 |         begin (bool): Whether add to begin of sequence.
28 | 
29 |     Raises:
30 |         TypeError: If `token` is not of type str.
31 | 
32 |     Examples:
33 | 
34 |     """
35 | 
36 |     def __init__(self, token, begin=True):
37 |         super().__init__()
38 |         self.token = token
39 |         self.begin = begin
40 |         self.implementation = Implementation.PY
41 | 
42 |     def __call__(self, text_input):
43 |         """
44 |         Call method for input conversion for eager mode with C++ implementation.
45 |         """
46 |         if not isinstance(text_input, np.ndarray):
47 |             raise TypeError(
48 |                 f"Input should be a text line in 1-D ndarray contains string, got {type(text_input)}.")
49 |         return super().__call__(text_input)
50 | 
51 |     def execute_py(self, text_input):
52 |         """
53 |         Execute method.
54 |         """
55 |         return self._execute_py(text_input)
56 | 
57 |     def _execute_py(self, text_input):
58 |         """
59 |         Execute method.
60 |         """
61 |         if self.begin:
62 |             token = np.array([self.token])
63 |             text_input = np.concatenate([token, text_input], 0)
64 |         else:
65 |             text_input = np.append(text_input, self.token)
66 |         return text_input
67 | 


--------------------------------------------------------------------------------
/code/mindnlp/_legacy/transforms/truncate.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | # pylint: disable=import-outside-toplevel
17 | # pylint: disable=c-extension-no-member
18 | # pylint: disable=invalid-name
19 | # pylint: disable=too-many-boolean-expressions
20 | """
21 | Transforms to process sequence
22 | """
23 | import numpy as np
24 | from mindspore.dataset.transforms.transforms import PyTensorOperation
25 | from mindspore.dataset.text.transforms import Implementation
26 | 
27 | 
28 | class Truncate(PyTensorOperation):
29 |     """
30 |     Truncate the input sequence.
31 | 
32 |     Args:
33 |         max_seq_length (int): Maximum length required.
34 | 
35 |     Raises:
36 |         TypeError: If `max_length` is not of type int.
37 | 
38 |     Examples:
39 | 
40 |     """
41 | 
42 |     def __init__(self, max_seq_length):
43 |         super().__init__()
44 |         self.max_seq_length = max_seq_length
45 |         self.implementation = Implementation.PY
46 | 
47 |     def __call__(self, text_input):
48 |         """
49 |         Call method for input conversion for eager mode with C++ implementation.
50 |         """
51 |         if not isinstance(text_input, np.ndarray):
52 |             raise TypeError(
53 |                 f"Input should be a text line in 1-D NumPy format, got {type(text_input)}.")
54 |         return super().__call__(text_input)
55 | 
56 |     def execute_py(self, text_input):
57 |         """
58 |         Execute method.
59 |         """
60 |         return self._execute_py(text_input)
61 | 
62 |     def _execute_py(self, text_input):
63 |         """
64 |         Execute method.
65 |         """
66 |         return text_input[:self.max_seq_length]
67 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """
17 | Abstract classes.
18 | """
19 | from .backbones import *
20 | from .modules import *
21 | from .callback import Callback
22 | from .metric import Metric
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/__pycache__/callback.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/__pycache__/callback.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/__pycache__/metric.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/__pycache__/metric.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/__pycache__/register.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/__pycache__/register.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """
17 | Abstract classes for Backbones.
18 | """
19 | 
20 | from .base import BaseModel
21 | from .seq2seq import Seq2seqModel
22 | from .seq2vec import Seq2vecModel
23 | from .pretrained import PretrainedConfig, PretrainedModel
24 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/backbones/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/__pycache__/base.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/backbones/__pycache__/base.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/__pycache__/pretrained.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/backbones/__pycache__/pretrained.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/__pycache__/seq2seq.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/backbones/__pycache__/seq2seq.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/__pycache__/seq2vec.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/backbones/__pycache__/seq2vec.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/seq2seq.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Sequence-to-sequence basic model"""
16 | # pylint: disable=abstract-method
17 | # pylint: disable=arguments-differ
18 | from mindnlp.abc.backbones.base import BaseModel
19 | 
20 | 
21 | class Seq2seqModel(BaseModel):
22 |     r"""
23 |     Basic class for seq2seq models
24 | 
25 |     Args:
26 |         encoder (EncoderBase): The encoder.
27 |         decoder (DecoderBase): The decoder.
28 |     """
29 | 
30 |     def __init__(self, encoder, decoder):
31 |         super().__init__()
32 |         self.encoder = encoder
33 |         self.decoder = decoder
34 | 
35 |     def construct(self, src_tokens, tgt_tokens, src_length, mask=None):
36 |         """
37 |         Construct method.
38 | 
39 |         Args:
40 |             src_tokens (Tensor): Tokens of source sentences with shape [batch, src_len].
41 |             tgt_tokens (Tensor): Tokens of targets with shape [batch, src_len].
42 |             src_length (Tensor): Lengths of each source sentence with shape [batch].
43 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
44 |                 If True, not padding token. If False, padding token. Defaults to None.
45 | 
46 |         Returns:
47 |             Tensor, The result vector of seq2seq model with shape [batch, max_len, vocab_size].
48 |         """
49 |         encoder_out = self.encoder(src_tokens, src_length=src_length, mask=mask)
50 | 
51 |         decoder_out = self.decoder(tgt_tokens, encoder_out=encoder_out)
52 |         return decoder_out
53 | 
54 |     def get_context(self, src_tokens, mask=None):
55 |         """
56 |         Get Context from encoder.
57 | 
58 |         Args:
59 |             src_tokens (Tensor): Tokens of source sentences with shape [batch, src_len].
60 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
61 |                 If True, not padding token. If False, padding token. Defaults to None.
62 | 
63 |         Returns:
64 |             Union[Tensor, tuple], the output of encoder.
65 |         """
66 |         return self.encoder(src_tokens, mask=mask)
67 | 
68 |     def extract_features(self, src_tokens, tgt_tokens, src_length):
69 |         """
70 |         Extract features of encoder output.
71 | 
72 |         Args:
73 |             src_tokens (Tensor): Tokens of source sentences with shape [batch, src_len].
74 |             tgt_tokens (Tensor): Tokens of targets with shape [batch, src_len].
75 |             src_length (Tensor): Lengths of each source sentence with shape [batch].
76 | 
77 |         Returns:
78 |             Tensor, the extracted features.
79 |         """
80 |         encoder_out = self.encoder(src_tokens, src_length=src_length)
81 |         features = self.decoder.extract_features(tgt_tokens, encoder_out=encoder_out)
82 |         return features
83 | 
84 |     def output_layer(self, features):
85 |         """
86 |         Project features to the default output size.
87 | 
88 |         Args:
89 |             features (Tensor): The extracted features.
90 | 
91 |         Returns:
92 |             Tensor, the output of decoder.
93 |         """
94 |         return self.decoder.output_layer(features)
95 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/backbones/seq2vec.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Sequence-to-vector basic model"""
16 | # pylint: disable=abstract-method
17 | # pylint: disable=arguments-differ
18 | from mindspore import nn
19 | from mindspore import ops
20 | from mindnlp.abc.backbones.base import BaseModel
21 | 
22 | 
23 | class Seq2vecModel(BaseModel):
24 |     r"""
25 |     Basic class for seq2vec models
26 | 
27 |     Args:
28 |         encoder (EncoderBase): The encoder.
29 |         head (nn.Cell): The module to process encoder output.
30 |         dropout (float): The drop out rate, greater than 0 and less equal than 1.
31 |             If None, not dropping out input units. Drfault: None.
32 |     """
33 | 
34 |     def __init__(self, encoder, head, dropout: float = None):
35 |         super().__init__()
36 |         self.encoder = encoder
37 |         self.head = head
38 |         if dropout is None:
39 |             self.dropout = None
40 |         else:
41 |             self.dropout = nn.Dropout(p=dropout)
42 | 
43 |     def construct(self, src_tokens, mask=None):
44 |         """
45 |         Construct method.
46 | 
47 |         Args:
48 |             src_tokens (Tensor): Tokens of source sentences with shape [batch, src_len].
49 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
50 |                 If True, not padding token. If False, padding token. Defaults to None.
51 | 
52 |         Returns:
53 |             Tensor, the result vector of seq2vec model with shape [batch, label_num].
54 |         """
55 |         if mask is None:
56 |             mask = self._gen_mask(src_tokens)
57 | 
58 |         context = self.get_context(src_tokens, mask)
59 | 
60 |         if self.dropout is not None:
61 |             context = self.dropout(context)
62 | 
63 |         result = self.head(context)
64 |         # TODO: Whether to add reduction
65 |         return result
66 | 
67 |     def get_context(self, src_tokens, mask=None):
68 |         """
69 |         Get Context from encoder.
70 | 
71 |         Args:
72 |             src_tokens (Tensor): Tokens of source sentences with shape [batch, src_len].
73 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
74 |                 If True, not padding token. If False, padding token. Defaults to None.
75 | 
76 |         Returns:
77 |             Union[Tensor, tuple], the output of encoder.
78 |         """
79 |         if mask is None:
80 |             mask = self._gen_mask(src_tokens)
81 |         return self.encoder(src_tokens, mask=mask)
82 | 
83 |     def _gen_mask(self, inputs):
84 |         """Generate mask tensor"""
85 |         return ops.ones_like(inputs)
86 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/callback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Abstract class for Callbacks
17 | """
18 | 
19 | class Callback():
20 |     """
21 |     Abstract base class used to build a callback class. Callbacks are context managers
22 |     which will be entered and exited when passing into the Model.
23 |     You can use this mechanism to do some custom operations.
24 | 
25 |     Callback function can perform some operations before and after step or epoch.
26 |     To create a custom callback, subclass Callback and override the method associated
27 |     with the stage of interest.
28 |     """
29 | 
30 |     def train_begin(self, run_context):
31 |         """Called once before network training."""
32 | 
33 |     def train_end(self, run_context):
34 |         """Called once after network training."""
35 | 
36 |     def train_epoch_begin(self, run_context):
37 |         """Called before each train epoch beginning."""
38 | 
39 |     def train_epoch_end(self, run_context):
40 |         """Called after each train epoch finished."""
41 | 
42 |     def fetch_data_begin(self, run_context):
43 |         """Called before fetch each batch/ds_sink_size data."""
44 | 
45 |     def fetch_data_end(self, run_context):
46 |         """Called after fetch each batch/ds_sink_size data."""
47 | 
48 |     def train_step_begin(self, run_context):
49 |         """Called before each train step beginning."""
50 | 
51 |     def train_step_end(self, run_context):
52 |         """Called after each train step finished."""
53 | 
54 |     def forward_begin(self, run_context):
55 |         """Called before each forward beginning."""
56 | 
57 |     def forward_end(self, run_context):
58 |         """Called after each step finished."""
59 | 
60 |     def backward_begin(self, run_context):
61 |         """Called before each forward beginning."""
62 | 
63 |     def backward_end(self, run_context):
64 |         """Called after each backward finished."""
65 | 
66 |     def ds_sink_begin(self, run_context):
67 |         """Called before each data_sink beginning."""
68 | 
69 |     def ds_sink_end(self, run_context):
70 |         """Called after each data_sink finished."""
71 | 
72 |     def load_model(self, run_context):
73 |         """Called before loading model."""
74 | 
75 |     def save_model(self, run_context):
76 |         """Called before saving model."""
77 | 
78 |     def evaluate_begin(self, run_context):
79 |         """Called before evaluating."""
80 | 
81 |     def evaluate_end(self, run_context):
82 |         """Called after evaluating."""
83 | 
84 |     def exception(self, run_context):
85 |         """Called if having exceptions."""
86 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Abstract class for Metrics
17 | """
18 | from abc import ABCMeta, abstractmethod
19 | 
20 | class Metric(metaclass=ABCMeta):
21 |     """
22 |     Base class of all metrics. Never use this class directly, but instantiate one of
23 |     its subclasses instead.
24 | 
25 |     Functions `update` will accumulate intermediate results in the evaluation process,
26 |     `eval` will evaluate the final result, and `clear` will reinitialize the intermediate
27 |     results. Function `get_metric_name` will provide class name.
28 | 
29 |     """
30 |     def __init__(self):
31 |         pass
32 | 
33 |     @abstractmethod
34 |     def clear(self):
35 |         """
36 |         An interface describes the behavior of clearing the internal evaluation result.
37 |         All subclasses of `Metrics` must override this interface.
38 | 
39 |         Raises:
40 |             NotImplementedError: If this interface is called.
41 | 
42 |         """
43 |         raise NotImplementedError(f'Function `clear` not implemented in {self.__class__.__name__}')
44 | 
45 |     @abstractmethod
46 |     def eval(self):
47 |         """
48 |         An interface describes the behavior of computing the evaluation result.
49 |         All subclasses of `Metrics` must override this interface.
50 | 
51 |         Raises:
52 |             NotImplementedError: If this interface is called.
53 | 
54 |         """
55 |         raise NotImplementedError(f'Function `eval` not implemented in {self.__class__.__name__}')
56 | 
57 |     @abstractmethod
58 |     def update(self, *inputs):
59 |         """
60 |         An interface describes the behavior of updating the internal evaluation result.
61 |         All subclasses of `Metrics` must override this interface.
62 | 
63 |         Args:
64 |             inputs: Variable parameter list.
65 | 
66 |         Raises:
67 |             NotImplementedError: If this interface is called.
68 | 
69 |         """
70 |         raise NotImplementedError(f'Function `update` not implemented in {self.__class__.__name__}')
71 | 
72 |     @abstractmethod
73 |     def get_metric_name(self):
74 |         """
75 |         An interface returns the name of the metric. All subclasses of `Metrics` must
76 |         override this interface.
77 | 
78 |         Raises:
79 |             NotImplementedError: If this interface is called.
80 | 
81 |         """
82 |         raise NotImplementedError(f'Function `get_metric_name` not implemented '
83 |                                   f'in {self.__class__.__name__}')
84 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """
17 | Abstract classes for Modules.
18 | """
19 | 
20 | from .encoder import EncoderBase
21 | from .decoder import DecoderBase
22 | from .embedding import TokenEmbedding
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/modules/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/__pycache__/decoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/modules/__pycache__/decoder.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/__pycache__/embedding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/modules/__pycache__/embedding.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/__pycache__/encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/modules/__pycache__/encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Decoder basic model"""
16 | 
17 | from mindspore import nn
18 | 
19 | 
20 | class DecoderBase(nn.Cell):
21 |     r"""
22 |     Basic class for dedcoders
23 | 
24 |     Args:
25 |         embedding (Cell): The embedding layer.
26 |     """
27 | 
28 |     def __init__(self, embedding):
29 |         super().__init__()
30 |         self.embedding = embedding
31 |         self.softmax = nn.Softmax()
32 |         self.log_softmax = nn.LogSoftmax()
33 | 
34 |     def construct(self, prev_output_tokens, encoder_out=None):
35 |         """
36 |         Construct method.
37 | 
38 |         Args:
39 |             prev_output_tokens (Tensor): output tokens for teacher forcing with shape [batch, tgt_len].
40 |             encoder_out (Tensor): output of encoder. Defaults to None.
41 | 
42 |         Returns:
43 |             Tensor, The result vector of decoder.
44 |         """
45 |         result = self.extract_features(prev_output_tokens, encoder_out)
46 |         result = self.output_layer(result)
47 |         return result
48 | 
49 |     def extract_features(self, prev_output_tokens, encoder_out=None):
50 |         """
51 |         Extract features of encoder output.
52 | 
53 |         Args:
54 |             prev_output_tokens (Tensor): output tokens for teacher forcing with shape [batch, tgt_len].
55 |             encoder_out (Tensor): output of encoder. Defaults to None.
56 |         """
57 |         raise NotImplementedError
58 | 
59 |     def output_layer(self, features):
60 |         """
61 |         Project features to the default output size.
62 | 
63 |         Args:
64 |             features (Tensor): The extracted features.
65 |         """
66 |         raise NotImplementedError
67 | 
68 |     def get_normalized_probs(self, net_output, log_probs):
69 |         """
70 |         Get normalized probabilities from net's output.
71 | 
72 |         Args:
73 |             net_output (tuple): The net's output.
74 |             log_probs (bool): Decide whether to use log_softmax or softmax. If True, use log_softmax.
75 |                 If False, user softmax.
76 | 
77 |         Return:
78 |             Tensor, the ormalized probabilities from net's output.
79 |         """
80 |         logits = net_output[0]
81 |         if log_probs:
82 |             result = self.log_softmax(logits)
83 |         else:
84 |             result = self.softmax(logits)
85 |         return result
86 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """embedding"""
16 | 
17 | __all__ = [
18 |     "TokenEmbedding"
19 | ]
20 | 
21 | from abc import abstractmethod
22 | from mindspore import nn
23 | from mindspore import Parameter
24 | 
25 | 
26 | class TokenEmbedding(nn.Cell):
27 |     r"""
28 |     Create Embedding from a given pre-trained vector file.
29 | 
30 |     Args:
31 |         init_embed (Tensor): Passing into Vocab and Tensor,use these values to initialize Embedding directly.
32 |         requires_grad (bool): Whether this parameter needs to be gradient to update.
33 |         dropout (float): Dropout of the output of Embedding.
34 | 
35 |     """
36 |     def __init__(self, init_embed, requires_grad: bool = True, dropout=0.0):
37 |         super().__init__()
38 | 
39 |         self.embed = Parameter(init_embed, name='embed', requires_grad=requires_grad)
40 |         self.dropout_layer = nn.Dropout(p=dropout)
41 |         self._embed_size = self.embed.shape
42 | 
43 |     def dropout(self, words):
44 |         r"""
45 |         drop the word after embedding.
46 | 
47 |         Args:
48 |             words (Tensor): Tensor about to be dropout.
49 | 
50 |         Returns:
51 |             Tensor, Dropout processed data.
52 | 
53 |         """
54 |         return self.dropout_layer(words)
55 | 
56 |     def __len__(self):
57 |         """
58 |         embed len
59 |         """
60 |         return len(self.embed)
61 | 
62 |     def embed_size(self):
63 |         """
64 |         embed size
65 |         """
66 |         return self._embed_size
67 | 
68 |     def num_embeddings(self):
69 |         """
70 |         num embeddings
71 |         """
72 |         return len(self.embed)
73 | 
74 |     @abstractmethod
75 |     def construct(self, ids):
76 |         r"""
77 | 
78 |         Args:
79 |             ids (Tensor): Ids to query.
80 | 
81 |         Raises:
82 |             NotImplementedError: If construct interface is not called.
83 | 
84 |         """
85 |         raise NotImplementedError(f'Function `construct` not implemented in {self.__class__.__name__}')
86 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Encoder basic model"""
16 | 
17 | from mindspore import nn
18 | from mindspore import ops
19 | 
20 | 
21 | class EncoderBase(nn.Cell):
22 |     r"""
23 |     Basic class for encoders
24 | 
25 |     Args:
26 |         embedding (Cell): The embedding layer.
27 |     """
28 | 
29 |     def __init__(self, embedding):
30 |         super().__init__()
31 |         self.embedding = embedding
32 | 
33 |     def construct(self, src_token, src_length=None):
34 |         """
35 |         Construct method.
36 | 
37 |         Args:
38 |             src_token (Tensor): Tokens in the source language with shape [batch, max_len].
39 |             src_length (Tensor): Lengths of each sentence with shape [batch].
40 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
41 |                 If True, not padding token. If False, padding token. Defaults to None.
42 |         """
43 |         raise NotImplementedError("Model must implement the construct method")
44 | 
45 |     def reorder_encoder_out(self, encoder_out, new_order):
46 |         """
47 |         Reorder encoder output according to `new_order`.
48 | 
49 |         Args:
50 |             encoder_out (Union[Tensor, tuple]): The encoder's output.
51 |             new_order (Tensor): Desired order.
52 |         """
53 |         raise NotImplementedError
54 | 
55 |     def reset_parameters(self, mask=None):
56 |         """
57 |         Reset model's parameters
58 | 
59 |         Args:
60 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
61 |                 If True, not padding token. If False, padding token. Defaults to None.
62 |         """
63 |         raise NotImplementedError
64 | 
65 |     def _gen_mask(self, inputs):
66 |         """Generate mask tensor"""
67 |         return ops.ones_like(inputs)
68 | 


--------------------------------------------------------------------------------
/code/mindnlp/abc/modules/generator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/abc/modules/generator.py


--------------------------------------------------------------------------------
/code/mindnlp/abc/register.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Abstract class for Register
17 | """
18 | 
19 | from functools import wraps
20 | 
21 | class Register():
22 |     """Register abstract class"""
23 |     def __init__(self, name, map_rule):
24 |         self.name = name
25 |         self.mem_dict = {}
26 |         self.map_rule = map_rule
27 | 
28 |     def register(self, func):
29 |         """register function."""
30 |         @wraps(func)
31 |         def wrapper(*args, **kwargs):
32 |             dataset = func(*args, **kwargs)
33 |             return dataset
34 |         name = self.map_rule(func)
35 |         self.mem_dict[name] = wrapper
36 |         return wrapper
37 | 
38 |     def __call__(self, name, *args, **kwargs):
39 |         lname = name.lower()
40 |         if lname not in self.mem_dict:
41 |             raise ValueError(f'{name} is not registered. Please check the dataset list.')
42 |         return self.mem_dict[name.lower()](*args, **kwargs)
43 | 


--------------------------------------------------------------------------------
/code/mindnlp/configs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Global configs
17 | """
18 | import os
19 | 
20 | DEFAULT_ROOT = os.path.join(os.path.expanduser('~'), ".mindnlp")
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Dataset init
17 | """
18 | 
19 | from .text_classification import *
20 | from .machine_translation import *
21 | from .question_answer import *
22 | from .sequence_tagging import *
23 | from .text_generation import *
24 | # from .hf_datasets import *
25 | from .register import load_dataset, process
26 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/__pycache__/process.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/__pycache__/process.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/__pycache__/register.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/__pycache__/register.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/hf_datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Directly load huggingface datasets
17 | """
18 | 
19 | from .hf_imdb import HF_IMDB, HF_IMDB_Process
20 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/hf_datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/hf_datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/hf_datasets/__pycache__/hf_imdb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/hf_datasets/__pycache__/hf_imdb.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/machine_translation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | MachineTranslation dataset init
17 | """
18 | 
19 | from .multi30k import Multi30k, Multi30k_Process
20 | from .iwslt2016 import IWSLT2016
21 | from .iwslt2017 import IWSLT2017, IWSLT2017_Process
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/machine_translation/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/machine_translation/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/machine_translation/__pycache__/iwslt2016.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/machine_translation/__pycache__/iwslt2016.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/machine_translation/__pycache__/iwslt2017.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/machine_translation/__pycache__/iwslt2017.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/machine_translation/__pycache__/multi30k.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/machine_translation/__pycache__/multi30k.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/process.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | common process
17 | """
18 | 
19 | from mindspore.dataset import text
20 | 
21 | def common_process(dataset, column, tokenizer, vocab):
22 |     '''
23 |     common process
24 | 
25 |     Args:
26 |         dataset (GeneratorDataset|ZipDataset): dataset needs to be process
27 |         column (str): The language column name
28 |         tokenizer (TextTensorOperation): Tokenizer you what to used
29 |         vocab (Vocab): The vocab to be used, defaults to None. If None, a new vocab will be created
30 | 
31 |     Returns:
32 |         - **dataset** (MapDataset) -dataset after process
33 |         - **newVocab** (Vocab) -new vocab created from dataset if 'vocab' is None
34 | 
35 |     '''
36 | 
37 |     if vocab is None :
38 |         dataset = dataset.map(tokenizer, column)
39 |         new_vocab = text.Vocab.from_dataset(dataset, column, special_tokens=["<pad>", "<unk>"])
40 |         return dataset.map(text.Lookup(new_vocab, unknown_token='<unk>'), column), new_vocab
41 | 
42 |     dataset = dataset.map(tokenizer, column)
43 |     return dataset.map(text.Lookup(vocab, unknown_token='<unk>'), column)
44 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/question_answer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | QuestionAnswer dataset init
17 | """
18 | 
19 | from .squad1 import SQuAD1, SQuAD1_Process
20 | from .squad2 import SQuAD2
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/question_answer/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/question_answer/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/question_answer/__pycache__/squad1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/question_answer/__pycache__/squad1.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/question_answer/__pycache__/squad2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/question_answer/__pycache__/squad2.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/register.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Dataset register
17 | """
18 | 
19 | from mindnlp.abc.register import Register
20 | load_dataset = Register('load', lambda x: x.__name__.lower())
21 | process = Register('process', lambda x: x.__name__.lower().replace('_process', ''))
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/sequence_tagging/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | SequenceTagging dataset init
17 | """
18 | 
19 | from .conll2000chunking import CoNLL2000Chunking,CoNLL2000Chunking_Process
20 | from .udpos import UDPOS
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/sequence_tagging/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/sequence_tagging/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/sequence_tagging/__pycache__/conll2000chunking.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/sequence_tagging/__pycache__/conll2000chunking.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/sequence_tagging/__pycache__/udpos.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/sequence_tagging/__pycache__/udpos.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | TextClassification dataset init
17 | """
18 | 
19 | from .agnews import AG_NEWS, AG_NEWS_Process
20 | from .cola import CoLA, CoLA_Process
21 | from .sst2 import SST2, SST2_Process
22 | from .amazonreviewfull import AmazonReviewFull, AmazonReviewFull_Process
23 | from .amazonreviewpolarity import AmazonReviewPolarity, AmazonReviewPolarity_Process
24 | from .stsb import STSB, STSB_Process
25 | from .dbpedia import DBpedia, DBpedia_Process
26 | from .imdb import IMDB
27 | from .mnli import MNLI, MNLI_Process
28 | from .mrpc import MRPC, MRPC_Process
29 | from .qnli import QNLI, QNLI_Process
30 | from .qqp import QQP, QQP_Process
31 | from .wnli import WNLI, WNLI_Process
32 | from .rte import RTE, RTE_Process
33 | from .sogounews import SogouNews
34 | from .yelpreviewpolarity import YelpReviewPolarity, YelpReviewPolarity_Process
35 | from .yelpreviewfull import YelpReviewFull, YelpReviewFull_Process
36 | from .yahooanswers import YahooAnswers, YahooAnswers_Process
37 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/agnews.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/agnews.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/amazonreviewfull.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/amazonreviewfull.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/amazonreviewpolarity.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/amazonreviewpolarity.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/cola.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/cola.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/dbpedia.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/dbpedia.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/imdb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/imdb.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/mnli.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/mnli.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/mrpc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/mrpc.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/qnli.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/qnli.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/qqp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/qqp.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/rte.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/rte.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/sogounews.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/sogounews.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/sst2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/sst2.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/stsb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/stsb.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/wnli.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/wnli.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/yahooanswers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/yahooanswers.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/yelpreviewfull.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/yelpreviewfull.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/__pycache__/yelpreviewpolarity.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_classification/__pycache__/yelpreviewpolarity.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_classification/sogounews.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """
 16 | SogouNews load function
 17 | """
 18 | # pylint: disable=C0103
 19 | 
 20 | import os
 21 | import csv
 22 | from typing import Union, Tuple
 23 | from mindspore.dataset import GeneratorDataset
 24 | from mindnlp.utils.download import cache_file
 25 | from mindnlp.dataset.register import load_dataset
 26 | from mindnlp.configs import DEFAULT_ROOT
 27 | from mindnlp.utils import untar
 28 | 
 29 | csv.field_size_limit(500000)
 30 | 
 31 | URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE&confirm=t"
 32 | 
 33 | MD5 = "0c1700ba70b73f964dd8de569d3fd03e"
 34 | 
 35 | 
 36 | class Sogounews:
 37 |     """
 38 |     SogouNews dataset source
 39 |     """
 40 | 
 41 |     def __init__(self, path) -> None:
 42 |         self.path: str = path
 43 |         self._label, self._text = [], []
 44 |         self._load()
 45 | 
 46 |     def _load(self):
 47 |         csvfile = open(self.path, "r", encoding="utf-8")
 48 |         dict_reader = csv.reader(csvfile)
 49 |         for row in dict_reader:
 50 |             self._label.append(int(row[0]))
 51 |             self._text.append(f"{row[1]} {row[2]}")
 52 | 
 53 |     def __getitem__(self, index):
 54 |         return self._label[index], self._text[index]
 55 | 
 56 |     def __len__(self):
 57 |         return len(self._label)
 58 | 
 59 | 
 60 | @load_dataset.register
 61 | def SogouNews(
 62 |     root: str = DEFAULT_ROOT,
 63 |     split: Union[Tuple[str], str] = ("train", "test"),
 64 |     proxies=None,
 65 | ):
 66 |     r"""
 67 |     Load the SogouNews dataset
 68 | 
 69 |     Args:
 70 |         root (str): Directory where the datasets are saved.
 71 |             Default:~/.mindnlp
 72 |         split (str|Tuple[str]): Split or splits to be returned.
 73 |             Default:('train', 'test').
 74 |         proxies (dict): a dict to identify proxies,for example: {"https": "https://127.0.0.1:7890"}.
 75 | 
 76 |     Returns:
 77 |         - **datasets_list** (list) -A list of loaded datasets.
 78 |           If only one type of dataset is specified,such as 'trian',
 79 |           this dataset is returned instead of a list of datasets.
 80 | 
 81 |     Examples:
 82 |         >>> root = "~/.mindnlp"
 83 |         >>> split = ("train", "test")
 84 |         >>> dataset_train,dataset_test = SogouNews(root, split)
 85 |         >>> train_iter = dataset_train.create_tuple_iterator()
 86 |         >>> print(next(train_iter))
 87 |     """
 88 | 
 89 |     cache_dir = os.path.join(root, "datasets", "SogouNews")
 90 |     path_dict = {
 91 |         "train": "train.csv",
 92 |         "test": "test.csv",
 93 |     }
 94 |     column_names = ["label", "text"]
 95 |     path_list = []
 96 |     datasets_list = []
 97 |     path, _ = cache_file(
 98 |         None,
 99 |         cache_dir=cache_dir,
100 |         url=URL,
101 |         md5sum=MD5,
102 |         download_file_name="sogou_news_csv.tar.gz",
103 |         proxies=proxies,
104 |     )
105 | 
106 |     untar(path, cache_dir)
107 |     if isinstance(split, str):
108 |         path_list.append(os.path.join(cache_dir, "sogou_news_csv", path_dict[split]))
109 |     else:
110 |         for s in split:
111 |             path_list.append(os.path.join(cache_dir, "sogou_news_csv", path_dict[s]))
112 |     for path in path_list:
113 |         datasets_list.append(
114 |             GeneratorDataset(
115 |                 source=Sogounews(path), column_names=column_names, shuffle=False
116 |             )
117 |         )
118 |     if len(path_list) == 1:
119 |         return datasets_list[0]
120 |     return datasets_list
121 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | TextGeneration dataset init
17 | """
18 | 
19 | from .wikitext2 import WikiText2
20 | from .wikitext103 import WikiText103
21 | from .penntreebank import PennTreebank
22 | from .lcsts import LCSTS
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_generation/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/__pycache__/lcsts.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_generation/__pycache__/lcsts.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/__pycache__/penntreebank.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_generation/__pycache__/penntreebank.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/__pycache__/wikitext103.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_generation/__pycache__/wikitext103.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/__pycache__/wikitext2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/dataset/text_generation/__pycache__/wikitext2.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/lcsts.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """
 16 | LCSTS load function
 17 | """
 18 | # pylint: disable=C0103
 19 | 
 20 | import os
 21 | import json
 22 | from typing import Union, Tuple
 23 | from mindspore.dataset import GeneratorDataset
 24 | from mindnlp.utils.download import cache_file
 25 | from mindnlp.dataset.register import load_dataset
 26 | from mindnlp.configs import DEFAULT_ROOT
 27 | 
 28 | URL = {
 29 |     "train": "https://bj.bcebos.com/paddlenlp/datasets/LCSTS_new/train.json",
 30 |     "dev": "https://bj.bcebos.com/paddlenlp/datasets/LCSTS_new/dev.json",
 31 | }
 32 | 
 33 | MD5 = {
 34 |     "train": "4e06fd1cfd5e7f0380499df8cbe17237",
 35 |     "dev": "9c39d49d25d5296bdc537409208ddc85",
 36 | }
 37 | 
 38 | 
 39 | class Lcsts:
 40 |     """
 41 |     LCSTS dataset source
 42 |     """
 43 | 
 44 |     def __init__(self, path):
 45 |         self.path = path
 46 |         self._source, self._target = [], []
 47 |         self._load()
 48 | 
 49 |     def _load(self):
 50 |         with open(self.path, 'r', encoding='utf8') as data:
 51 |             for line in data:
 52 |                 line = line.strip()
 53 |                 if not line:
 54 |                     continue
 55 |                 json_data = json.loads(line)
 56 |                 self._source.append(json_data["content"])
 57 |                 self._target.append(json_data.get("summary", ''))
 58 | 
 59 |     def __getitem__(self, index):
 60 |         return self._source[index], self._target[index]
 61 | 
 62 |     def __len__(self):
 63 |         return len(self._source)
 64 | 
 65 | @load_dataset.register
 66 | def LCSTS(root: str = DEFAULT_ROOT, split: Union[Tuple[str], str] = ('train', 'dev'), proxies=None):
 67 |     r"""
 68 |     Load the LCSTS dataset
 69 | 
 70 |     Args:
 71 |         root (str): Directory where the datasets are saved.
 72 |         split (str|Tuple[str]): Split or splits to be returned.
 73 |             Default:('train', 'dev').
 74 |         proxies (dict): a dict to identify proxies,for example: {"https": "https://127.0.0.1:7890"}.
 75 | 
 76 |     Returns:
 77 |         - **datasets_list** (list) -A list of loaded datasets.
 78 |           If only one type of dataset is specified,such as 'trian',
 79 |           this dataset is returned instead of a list of datasets.
 80 | 
 81 |     Raises:
 82 |         TypeError: If `root` is not a string.
 83 |         TypeError: If `split` is not a string or Tuple[str].
 84 | 
 85 |     Examples:
 86 |         >>> root = "~/.mindnlp"
 87 |         >>> split = ('train', 'dev')
 88 |         >>> dataset_train, dataset_dev = LCSTS(root, split)
 89 |         >>> train_iter = dataset_train.create_dict_iterator()
 90 |         >>> print(next(train_iter))
 91 |         {'source': Tensor(shape=[], dtype=String, value= '一辆小轿车，一名女司机，\
 92 |             竟造成9死24伤。日前，深圳市交警局对事故进行通报：从目前证据看，事故系司机超速行驶且操作不当导致。\
 93 |                 目前24名伤员已有6名治愈出院，其余正接受治疗，预计事故赔偿费或超一千万元。'),
 94 |         'target': Tensor(shape=[], dtype=String, value= '深圳机场9死24伤续：司机全责赔偿或超千万')}
 95 | 
 96 |     """
 97 | 
 98 |     cache_dir = os.path.join(root, "datasets", "LCSTS")
 99 |     file_list = []
100 |     datasets_list = []
101 |     if isinstance(split, str):
102 |         split = split.split()
103 |     for key in split:
104 |         path, _ = cache_file(
105 |             None, url=URL[key], cache_dir=cache_dir, md5sum=MD5[key], proxies=proxies
106 |         )
107 |         file_list.append(path)
108 | 
109 |     for _, file in enumerate(file_list):
110 |         dataset = GeneratorDataset(source=Lcsts(file), column_names=["source", "target"],
111 |                                    shuffle=False)
112 |         datasets_list.append(dataset)
113 |     if len(file_list) == 1:
114 |         return datasets_list[0]
115 |     return datasets_list
116 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/penntreebank.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | PennTreebank load function
17 | """
18 | # pylint: disable=C0103
19 | 
20 | import os
21 | from typing import Union, Tuple
22 | from mindspore.dataset import PennTreebankDataset
23 | from mindnlp.utils.download import cache_file
24 | from mindnlp.dataset.register import load_dataset
25 | from mindnlp.configs import DEFAULT_ROOT
26 | 
27 | URL = {
28 |     "train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt",
29 |     "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt",
30 |     "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt",
31 | }
32 | 
33 | MD5 = {
34 |     "train": "f26c4b92c5fdc7b3f8c7cdcb991d8420",
35 |     "valid": "aa0affc06ff7c36e977d7cd49e3839bf",
36 |     "test": "8b80168b89c18661a38ef683c0dc3721",
37 | }
38 | 
39 | 
40 | @load_dataset.register
41 | def PennTreebank(root: str = DEFAULT_ROOT,
42 |                  split: Union[Tuple[str], str] = ('train', 'valid', 'test'), proxies=None):
43 |     r"""
44 |     Load the PennTreebank dataset
45 | 
46 |     Args:
47 |         root (str): Directory where the datasets are saved.
48 |         split (str|Tuple[str]): Split or splits to be returned.
49 |             Default:('train', 'valid', 'test').
50 |         proxies (dict): a dict to identify proxies,for example: {"https": "https://127.0.0.1:7890"}.
51 | 
52 |     Returns:
53 |         - **datasets_list** (list) -A list of loaded datasets.
54 |           If only one type of dataset is specified,such as 'trian',
55 |           this dataset is returned instead of a list of datasets.
56 | 
57 |     Raises:
58 |         TypeError: If `root` is not a string.
59 |         TypeError: If `split` is not a string or Tuple[str].
60 | 
61 |     Examples:
62 |         >>> root = "~/.mindnlp"
63 |         >>> split = ('train', 'valid', 'test')
64 |         >>> dataset_train, dataset_valid, dataset_test = PennTreebank(root, split)
65 |         >>> train_iter = dataset_train.create_tuple_iterator()
66 |         >>> print(next(train_iter))
67 |         [Tensor(shape=[], dtype=String, value= ' aer banknote berlitz calloway centrust \
68 |             cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts \
69 |                 rake regatta rubens sim snack-food ssangyong swapo wachter ')]
70 | 
71 |     """
72 |     cache_dir = os.path.join(root, "datasets", "PennTreebank")
73 |     datasets_list = []
74 | 
75 |     for key, value in URL.items():
76 |         cache_file(None, cache_dir=cache_dir, url=value, md5sum=MD5[key], proxies=proxies)
77 |     if isinstance(split, str):
78 |         split = split.split()
79 |     for s in split:
80 |         dataset = PennTreebankDataset(
81 |             dataset_dir=cache_dir, usage=s, shuffle=False)
82 |         datasets_list.append(dataset)
83 |     if len(datasets_list) == 1:
84 |         return datasets_list[0]
85 |     return datasets_list
86 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/wikitext103.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | WikiText103 load function
17 | """
18 | # pylint: disable=C0103
19 | 
20 | import os
21 | import re
22 | from typing import Union, Tuple
23 | from mindspore.dataset import TextFileDataset
24 | from mindnlp.utils.download import cache_file
25 | from mindnlp.dataset.register import load_dataset
26 | from mindnlp.configs import DEFAULT_ROOT
27 | from mindnlp.utils import unzip
28 | 
29 | URL = "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip"
30 | 
31 | MD5 = "9ddaacaf6af0710eda8c456decff7832"
32 | 
33 | 
34 | @load_dataset.register
35 | def WikiText103(root: str = DEFAULT_ROOT,
36 |                 split: Union[Tuple[str], str] = ('train', 'valid', 'test'), proxies=None):
37 |     r"""
38 |     Load the WikiText103 dataset
39 | 
40 |     Args:
41 |         root (str): Directory where the datasets are saved.
42 |         split (str|Tuple[str]): Split or splits to be returned.
43 |             Default:('train', 'valid', 'test').
44 |         proxies (dict): a dict to identify proxies,for example: {"https": "https://127.0.0.1:7890"}.
45 | 
46 |     Returns:
47 |         - **datasets_list** (list) -A list of loaded datasets.
48 |           If only one type of dataset is specified,such as 'trian',
49 |           this dataset is returned instead of a list of datasets.
50 | 
51 |     Raises:
52 |         TypeError: If `root` is not a string.
53 |         TypeError: If `split` is not a string or Tuple[str].
54 | 
55 |     Examples:
56 |         >>> root = "~/.mindnlp"
57 |         >>> split = ('train', 'valid', 'test')
58 |         >>> dataset_train, dataset_valid, dataset_test = WikiText103(root, split)
59 |         >>> train_iter = dataset_train.create_tuple_iterator()
60 |         >>> print(next(train_iter))
61 |         >>> print(next(train_iter))
62 |         [Tensor(shape=[], dtype=String, value= ' ')]
63 |         [Tensor(shape=[], dtype=String, value= ' = Valkyria Chronicles III = ')]
64 | 
65 |     """
66 |     cache_dir = os.path.join(root, "datasets", "WikiText103")
67 | 
68 |     datasets_list = []
69 | 
70 |     file_path, _ = cache_file(None, cache_dir=cache_dir,
71 |                               url=URL, md5sum=MD5, proxies=proxies)
72 |     textdir_name = unzip(file_path, os.path.dirname(file_path))
73 |     files_names = os.listdir(os.path.join(cache_dir, textdir_name[0]))
74 |     if isinstance(split, str):
75 |         split = split.split()
76 |     for s in split:
77 |         for filename in files_names:
78 |             if re.search(s, filename):
79 |                 dataset = TextFileDataset(os.path.join(
80 |                     cache_dir, textdir_name[0], filename), shuffle=False)
81 |                 datasets_list.append(dataset)
82 |     if len(datasets_list) == 1:
83 |         return datasets_list[0]
84 |     return datasets_list
85 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/text_generation/wikitext2.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | WikiText2 load function
17 | """
18 | # pylint: disable=C0103
19 | 
20 | import os
21 | import re
22 | from typing import Union, Tuple
23 | from mindspore.dataset import TextFileDataset
24 | from mindnlp.utils.download import cache_file
25 | from mindnlp.dataset.register import load_dataset
26 | from mindnlp.configs import DEFAULT_ROOT
27 | from mindnlp.utils import unzip
28 | 
29 | URL = "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip"
30 | 
31 | MD5 = "542ccefacc6c27f945fb54453812b3cd"
32 | 
33 | 
34 | @load_dataset.register
35 | def WikiText2(root: str = DEFAULT_ROOT, split: Union[Tuple[str], str] = ('train', 'valid', 'test'), proxies=None):
36 |     r"""
37 |     Load the WikiText2 dataset
38 | 
39 |     Args:
40 |         root (str): Directory where the datasets are saved.
41 |         split (str|Tuple[str]): Split or splits to be returned.
42 |             Default:('train', 'valid', 'test').
43 |         proxies (dict): a dict to identify proxies,for example: {"https": "https://127.0.0.1:7890"}.
44 | 
45 |     Returns:
46 |         - **datasets_list** (list) -A list of loaded datasets.
47 |           If only one type of dataset is specified,such as 'trian',
48 |           this dataset is returned instead of a list of datasets.
49 | 
50 |     Raises:
51 |         TypeError: If `root` is not a string.
52 |         TypeError: If `split` is not a string or Tuple[str].
53 | 
54 |     Examples:
55 |         >>> root = "~/.mindnlp"
56 |         >>> split = ('train', 'valid', 'test')
57 |         >>> dataset_train, dataset_valid, dataset_test = WikiText2(root, split)
58 |         >>> train_iter = dataset_train.create_tuple_iterator()
59 |         >>> print(next(train_iter))
60 |         >>> print(next(train_iter))
61 |         [Tensor(shape=[], dtype=String, value= ' ')]
62 |         [Tensor(shape=[], dtype=String, value= ' = Valkyria Chronicles III = ')]
63 | 
64 |     """
65 |     cache_dir = os.path.join(root, "datasets", "WikiText2")
66 | 
67 |     datasets_list = []
68 | 
69 |     file_path, _ = cache_file(None, cache_dir=cache_dir,
70 |                               url=URL, md5sum=MD5, proxies=proxies)
71 |     textdir_name = unzip(file_path, os.path.dirname(file_path))
72 |     files_names = os.listdir(os.path.join(cache_dir, textdir_name[0]))
73 |     if isinstance(split, str):
74 |         split = split.split()
75 |     for s in split:
76 |         for filename in files_names:
77 |             if re.search(s, filename):
78 |                 dataset = TextFileDataset(os.path.join(
79 |                     cache_dir, textdir_name[0], filename), shuffle=False)
80 |                 datasets_list.append(dataset)
81 |     if len(datasets_list) == 1:
82 |         return datasets_list[0]
83 |     return datasets_list
84 | 


--------------------------------------------------------------------------------
/code/mindnlp/dataset/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Dataset utils
17 | """
18 | 
19 | def make_bucket(dataset, column_name, pad_index, \
20 |                 bucket_boundaries, bucket_batch_sizes, drop_remainder):
21 |     """make bucket function."""
22 |     pad_info = {column_name: ([None], pad_index)}
23 | 
24 |     dataset = dataset.bucket_batch_by_length(
25 |             [column_name],
26 |             element_length_function=lambda elem:elem.shape[0],
27 |             bucket_boundaries=bucket_boundaries,
28 |             bucket_batch_sizes=bucket_batch_sizes,
29 |             pad_info=pad_info,
30 |             pad_to_bucket_boundary=True,
31 |             drop_remainder=drop_remainder)
32 | 
33 |     return dataset
34 | 
35 | def make_bucket_2cloums(dataset, column_name, pad_value1, pad_value2, \
36 |                 bucket_boundaries, bucket_batch_sizes, drop_remainder):
37 |     """make bucket 2cloums function."""
38 |     pad_info = {column_name[0]: ([None], pad_value1),column_name[1]: ([None], pad_value2)}
39 | 
40 |     dataset = dataset.bucket_batch_by_length(
41 |             column_name,
42 |             element_length_function=lambda elem1,elem2:elem1.shape[0],
43 |             bucket_boundaries=bucket_boundaries,
44 |             bucket_batch_sizes=bucket_batch_sizes,
45 |             pad_info=pad_info,
46 |             pad_to_bucket_boundary=True,
47 |             drop_remainder=drop_remainder)
48 | 
49 |     return dataset
50 | 


--------------------------------------------------------------------------------
/code/mindnlp/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """
17 | Engine of text.
18 | """
19 | from .trainer import Trainer
20 | from .evaluator import Evaluator
21 | from .callbacks import *
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/engine/callbacks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Callbacks.
17 | """
18 | from .callback_manager import CallbackManager
19 | from .timer_callback import TimerCallback
20 | from .earlystop_callback import EarlyStopCallback
21 | from .checkpoint_callback import CheckpointCallback
22 | from .best_model_callback import BestModelCallback
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/engine/callbacks/callback_manager.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """
 16 | Callback Manager.
 17 | """
 18 | from mindnlp.abc import Callback
 19 | 
 20 | def _transfer(func):
 21 |     """
 22 |     Forward the call to the callback
 23 |     manager to each callback subclass.
 24 | 
 25 |     Args:
 26 |         func: callback function.
 27 |     """
 28 | 
 29 |     def wrapper(manager, *arg):
 30 |         returns = []
 31 |         for callback in manager.callbacks:
 32 |             returns.append(getattr(callback, func.__name__)(*arg))
 33 |         return returns
 34 | 
 35 |     return wrapper
 36 | 
 37 | class CallbackManager(Callback):
 38 |     """
 39 |     Callback Manager.
 40 | 
 41 |     Args:
 42 |         callbacks (Optional[list[Callback], Callback]): List of callback objects which should be executed
 43 |             while training. Default: None.
 44 | 
 45 |     """
 46 | 
 47 |     def __init__(self, callbacks):
 48 |         self.callbacks = callbacks
 49 |         if callbacks is None:
 50 |             self.callbacks = []
 51 | 
 52 |     @_transfer
 53 |     def train_begin(self, run_context):
 54 |         """Called once before the network executing."""
 55 | 
 56 |     @_transfer
 57 |     def train_end(self, run_context):
 58 |         """Called once after network training."""
 59 | 
 60 |     @_transfer
 61 |     def train_epoch_begin(self, run_context):
 62 |         """Called before each epoch beginning."""
 63 | 
 64 |     @_transfer
 65 |     def train_epoch_end(self, run_context):
 66 |         """Called after each epoch finished."""
 67 | 
 68 |     @_transfer
 69 |     def fetch_data_begin(self, run_context):
 70 |         """Called before fetch each batch/ds_sink_size data."""
 71 | 
 72 |     @_transfer
 73 |     def fetch_data_end(self, run_context):
 74 |         """Called after fetch each batch/ds_sink_size data."""
 75 | 
 76 |     @_transfer
 77 |     def train_step_begin(self, run_context):
 78 |         """Called before each train step beginning."""
 79 | 
 80 |     @_transfer
 81 |     def train_step_end(self, run_context):
 82 |         """Called after each train step finished."""
 83 | 
 84 |     @_transfer
 85 |     def forward_begin(self, run_context):
 86 |         """Called before each forward beginning."""
 87 | 
 88 |     @_transfer
 89 |     def forward_end(self, run_context):
 90 |         """Called after each step finished."""
 91 | 
 92 |     @_transfer
 93 |     def backward_begin(self, run_context):
 94 |         """Called before each forward beginning."""
 95 | 
 96 |     @_transfer
 97 |     def backward_end(self, run_context):
 98 |         """Called after each backward finished."""
 99 | 
100 |     @_transfer
101 |     def ds_sink_begin(self, run_context):
102 |         """Called before each data_sink beginning."""
103 | 
104 |     @_transfer
105 |     def ds_sink_end(self, run_context):
106 |         """Called after each data_sink finished."""
107 | 
108 |     @_transfer
109 |     def load_model(self, run_context):
110 |         """Called before loading model."""
111 | 
112 |     @_transfer
113 |     def save_model(self, run_context):
114 |         """Called before saving model."""
115 | 
116 |     @_transfer
117 |     def evaluate_begin(self, run_context):
118 |         """Called before evaluating."""
119 | 
120 |     @_transfer
121 |     def evaluate_end(self, run_context):
122 |         """Called after evaluating."""
123 | 
124 |     @_transfer
125 |     def exception(self, run_context):
126 |         """Called if having exceptions."""
127 | 
128 | class RunContext:
129 |     """
130 |         Provide information about the model.
131 |         This class needs to be used with :class:`mindspore.train.callback.Callback`.
132 | 
133 |         Args:
134 |             engine_args (dict): Holding the related information of model.
135 |     """
136 |     def __init__(self, engine_args):
137 |         if not isinstance(engine_args, dict):
138 |             raise TypeError(f"The argument 'original_args' of RunContext should be dict type, "
139 |                             f"but got {type(engine_args)}.")
140 |         for arg, value in engine_args.items():
141 |             setattr(self, arg, value)
142 | 


--------------------------------------------------------------------------------
/code/mindnlp/engine/callbacks/checkpoint_callback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Callback for saving checkpoint.
17 | """
18 | import os
19 | 
20 | import mindspore
21 | from mindnlp.abc import Callback
22 | 
23 | 
24 | class CheckpointCallback(Callback):
25 |     """
26 |     Save checkpoint of the model. save the current Trainer state at the end of each epoch, which can be used to
27 |     resume previous operations.
28 |     Continue training a sample code using the most recent epoch
29 | 
30 |     Args:
31 |         save_path (str): The path to save the state. A specific path needs to be specified,
32 |             such as 'checkpoints/chtp.pt'. Default: None.
33 |         epochs (int): Save a checkpoint file every n epochs.
34 |         keep_checkpoint_max (int): Save checkpoint files at most. Default:5.
35 | 
36 |     """
37 |     def __init__(self, save_path=None, epochs=None, keep_checkpoint_max=5):
38 |         if save_path is not None:
39 |             os.makedirs(save_path, exist_ok=True)
40 |         else:
41 |             os.makedirs(os.path.expanduser('~'), exist_ok=True)
42 |         self.save_path = save_path
43 |         self.epochs = epochs
44 |         self.keep_checkpoint_max = keep_checkpoint_max
45 |         self.checkpoint_nums = 0
46 | 
47 |         # to do
48 | 
49 |         # self.steps = steps
50 |         # if (self.epochs is not None) & (self.steps is not None):
51 |         #     raise ValueError("The parameter epochs and steps cannot be assigned at the same time,\
52 |         #                         you can only keep one of them.")
53 |         # elif (self.epochs is None) & (self.steps is None):
54 |         #     raise ValueError("The parameter epochs and steps both are None,\
55 |         #                         you must assign one of them.")
56 | 
57 |     def train_begin(self, run_context):
58 |         """
59 |         Notice the file saved path of checkpoints at the beginning of training.
60 | 
61 |         Args:
62 |             run_context (RunContext): Information about the model.
63 | 
64 |         """
65 |         if self.epochs is None:
66 |             print('For saving checkpoints, epoch cannont be `None` !')
67 |         print(f"\nThe train will start from the checkpoint saved in {self.save_path}.\n")
68 | 
69 |     def train_epoch_end(self, run_context):
70 |         """
71 |         Save checkpoint every n epochs at the end of the epoch.
72 | 
73 |         Args:
74 |             run_context (RunContext): Information about the model.
75 | 
76 |         """
77 |         if self.checkpoint_nums == self.keep_checkpoint_max:
78 |             print('The maximum number of stored checkpoints has been reached.')
79 |             return
80 |         if self.epochs is None:
81 |             return
82 |         if (run_context.cur_epoch_nums % self.epochs != 0) & (run_context.cur_epoch_nums != run_context.epochs):
83 |             return
84 |         model = run_context.network
85 |         ckpt_name = type(model).__name__ + '_epoch_' + str(run_context.cur_epoch_nums-1) + '.ckpt'
86 |         mindspore.save_checkpoint(model, self.save_path + '/' + ckpt_name)
87 |         self.checkpoint_nums += 1
88 |         print(f"Checkpoint: {ckpt_name} has been saved in epoch:{run_context.cur_epoch_nums - 1}.")
89 | 


--------------------------------------------------------------------------------
/code/mindnlp/engine/callbacks/earlystop_callback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Callback for Early Stop.
17 | """
18 | from mindspore import log
19 | from mindnlp.abc import Callback
20 | 
21 | class EarlyStopCallback(Callback):
22 |     """
23 |     Stop training without getting better after n epochs.
24 | 
25 |     Args:
26 |         patience (int): Numbers of epochs evaluations without raising. Default:10.
27 |         larger_better (bool): Whether the larger value of the metric is better. Default:True.
28 |     """
29 |     def __init__(self, patience=10, larger_better=True):
30 |         self.wait = 0
31 |         self.patience = patience
32 |         self.best_metrics_values = []
33 |         self.larger_better = larger_better
34 | 
35 |     def evaluate_end(self, run_context):
36 |         """
37 |         Called after evaluating.
38 | 
39 |         Args:
40 |             run_context (RunContext): Information about the model.
41 | 
42 |         """
43 |         metrics_values = run_context.metrics_values
44 |         if metrics_values is None:
45 |             return
46 |         if self.is_better_metric_value(metrics_values):
47 |             self.wait = 0
48 |             self.best_metrics_values = metrics_values
49 |         else:
50 |             self.wait += 1
51 |         if self.wait >= self.patience:
52 |             run_context.earlystop = True
53 |             log.warning(f"After {self.wait} Evaluations, no improvement for "
54 |                         f"metric `{run_context.metrics_names}`(best value: {self.best_metrics_values})")
55 | 
56 |     def is_better_metric_value(self, metrics_values):
57 |         """
58 |         Compare each metrics values with the best metrics values.
59 | 
60 |         Args:
61 |             metrics_values (float): metrics values used to compared with the best metrics values so far.
62 | 
63 |         """
64 |         if self.best_metrics_values == {}:
65 |             return True
66 |         values_larger = metrics_values > self.best_metrics_values
67 |         better_or_not = values_larger & self.larger_better
68 |         return better_or_not
69 | 


--------------------------------------------------------------------------------
/code/mindnlp/engine/export.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | export models to other IR format
17 | """
18 | 
19 | # TODO: use `mindspore.export` api to achieve such function.
20 | 


--------------------------------------------------------------------------------
/code/mindnlp/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Callbacks.
17 | """
18 | from mindnlp.metrics import accuracy, bleu, confusion_matrix, distinct, em_score, \
19 |     f1, matthews, pearson, perplexity, precision, recall, rouge, spearman
20 | 
21 | from .perplexity import *
22 | from .bleu import *
23 | from .rouge import *
24 | from .distinct import *
25 | from .accuracy import *
26 | from .precision import *
27 | from .recall import *
28 | from .f1 import *
29 | from .matthews import *
30 | from .pearson import *
31 | from .spearman import *
32 | from .em_score import *
33 | from .confusion_matrix import *
34 | 
35 | __all__ = []
36 | __all__.extend(accuracy.__all__)
37 | __all__.extend(bleu.__all__)
38 | __all__.extend(confusion_matrix.__all__)
39 | __all__.extend(distinct.__all__)
40 | __all__.extend(em_score.__all__)
41 | __all__.extend(f1.__all__)
42 | __all__.extend(matthews.__all__)
43 | __all__.extend(pearson.__all__)
44 | __all__.extend(perplexity.__all__)
45 | __all__.extend(precision.__all__)
46 | __all__.extend(recall.__all__)
47 | __all__.extend(rouge.__all__)
48 | __all__.extend(spearman.__all__)
49 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Models init
17 | """
18 | from mindnlp.models import bert
19 | from mindnlp.models.bert import *
20 | 
21 | __all__ = []
22 | __all__.extend(bert.__all__)
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/bart/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/bart/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/bart/bart.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/bart/bart.py


--------------------------------------------------------------------------------
/code/mindnlp/models/bart/bart_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/bart/bart_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/bert/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Bert Model.
17 | """
18 | from mindnlp.models.bert import bert, bert_config
19 | from mindnlp.models.bert.bert import *
20 | from mindnlp.models.bert.bert_config import *
21 | 
22 | __all__ = []
23 | __all__.extend(bert.__all__)
24 | __all__.extend(bert_config.__all__)
25 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/bert/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/bert/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/bert/__pycache__/bert.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/bert/__pycache__/bert.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/bert/__pycache__/bert_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/bert/__pycache__/bert_config.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/bert/bert_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Bert Model config
17 | """
18 | from mindnlp.abc.backbones.pretrained import PretrainedConfig
19 | 
20 | class BertConfig(PretrainedConfig):
21 |     """
22 |     Configuration for BERT-base
23 |     """
24 |     def __init__(self,
25 |                  vocab_size=30522,
26 |                  hidden_size=768,
27 |                  num_hidden_layers=12,
28 |                  num_attention_heads=12,
29 |                  intermediate_size=3072,
30 |                  hidden_act="gelu",
31 |                  hidden_dropout_prob=0.1,
32 |                  attention_probs_dropout_prob=0.1,
33 |                  max_position_embeddings=512,
34 |                  type_vocab_size=2,
35 |                  initializer_range=0.02,
36 |                  layer_norm_eps=1e-12,
37 |                  **kwargs):
38 |         super().__init__(**kwargs)
39 |         self.vocab_size = vocab_size
40 |         self.hidden_size = hidden_size
41 |         self.num_hidden_layers = num_hidden_layers
42 |         self.num_attention_heads = num_attention_heads
43 |         self.hidden_act = hidden_act
44 |         self.intermediate_size = intermediate_size
45 |         self.hidden_dropout_prob = hidden_dropout_prob
46 |         self.attention_probs_dropout_prob = attention_probs_dropout_prob
47 |         self.max_position_embeddings = max_position_embeddings
48 |         self.type_vocab_size = type_vocab_size
49 |         self.initializer_range = initializer_range
50 |         self.layer_norm_eps = layer_norm_eps
51 | 
52 | __all__ = ['BertConfig']
53 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/elmo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/elmo/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/elmo/elmo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/elmo/elmo.py


--------------------------------------------------------------------------------
/code/mindnlp/models/gpt/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | GPT Model.
17 | """
18 | 
19 | from mindnlp.models.gpt import gpt, gpt_config
20 | from mindnlp.models.gpt.gpt import *
21 | from mindnlp.models.gpt.gpt_config import *
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/gpt/gpt_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """MindNLP gpt config"""
16 | 
17 | from ...abc.backbones.pretrained import PretrainedConfig
18 | 
19 | 
20 | class GPTConfig(PretrainedConfig):
21 |     r"""
22 |     GPT config
23 |     """
24 |     model_type = "gpt"
25 |     attribute_map = {
26 |         "max_position_embeddings": "n_positions",
27 |         "hidden_size": "n_embd",
28 |         "num_attention_heads": "n_head",
29 |         "num_hidden_layers": "n_layer",
30 |     }
31 | 
32 |     def __init__(
33 |         self,
34 |         vocab_size=40478,
35 |         n_positions=512,
36 |         n_embd=768,
37 |         n_layer=12,
38 |         n_head=12,
39 |         afn="gelu_new",
40 |         resid_pdrop=0.1,
41 |         embd_pdrop=0.1,
42 |         attn_pdrop=0.1,
43 |         layer_norm_epsilon=1e-5,
44 |         initializer_range=0.02,
45 |         summary_type="cls_index",
46 |         summary_use_proj=True,
47 |         summary_activation=None,
48 |         summary_proj_to_labels=True,
49 |         summary_first_dropout=0.1,
50 |         **kwargs
51 |     ):
52 |         self.vocab_size = vocab_size
53 |         self.n_positions = n_positions
54 |         self.n_embd = n_embd
55 |         self.n_layer = n_layer
56 |         self.n_head = n_head
57 |         self.afn = afn
58 |         self.resid_pdrop = resid_pdrop
59 |         self.embd_pdrop = embd_pdrop
60 |         self.attn_pdrop = attn_pdrop
61 |         self.layer_norm_epsilon = layer_norm_epsilon
62 |         self.initializer_range = initializer_range
63 |         self.summary_type = summary_type
64 |         self.summary_use_proj = summary_use_proj
65 |         self.summary_activation = summary_activation
66 |         self.summary_first_dropout = summary_first_dropout
67 |         self.summary_proj_to_labels = summary_proj_to_labels
68 |         super().__init__(**kwargs)
69 |         


--------------------------------------------------------------------------------
/code/mindnlp/models/gpt2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | GPT2 Models init
17 | """
18 | import mindspore
19 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/gpt2/config_gpt2.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """MindNLP gpt2 config"""
16 | 
17 | from ...abc.backbones.pretrained import PretrainedConfig
18 | 
19 | 
20 | class GPT2Config(PretrainedConfig):
21 |     """
22 |     Configuration for gpt2-base
23 |     """
24 |     def __init__(
25 |         self,
26 |         vocab_size=50257,
27 |         max_position_embeddings=1024,
28 |         hidden_size=768,
29 |         num_hidden_layers=12,
30 |         num_attention_heads=12,
31 |         n_inner=None,
32 |         activation_function="gelu_new",
33 |         resid_pdrop=0.1,
34 |         embd_pdrop=0.1,
35 |         attn_pdrop=0.1,
36 |         layer_norm_epsilon=1e-5,
37 |         initializer_range=0.02,
38 |         summary_type="cls_index",
39 |         summary_use_proj=True,
40 |         summary_activation=None,
41 |         summary_proj_to_labels=True,
42 |         summary_first_dropout=0.1,
43 |         scale_attn_weights=True,
44 |         use_cache=True,
45 |         bos_token_id=50256,
46 |         eos_token_id=50256,
47 |         scale_attn_by_inverse_layer_idx=False,
48 |         reorder_and_upcast_attn=False,
49 |         **kwargs,
50 |     ):
51 |         self.vocab_size = vocab_size
52 |         self.max_position_embeddings = max_position_embeddings
53 |         self.hidden_size = hidden_size
54 |         self.num_hidden_layers = num_hidden_layers
55 |         self.num_attention_heads = num_attention_heads
56 |         self.n_inner = n_inner
57 |         self.activation_function = activation_function
58 |         self.resid_pdrop = resid_pdrop
59 |         self.embd_pdrop = embd_pdrop
60 |         self.attn_pdrop = attn_pdrop
61 |         self.layer_norm_epsilon = layer_norm_epsilon
62 |         self.initializer_range = initializer_range
63 |         self.summary_type = summary_type
64 |         self.summary_use_proj = summary_use_proj
65 |         self.summary_activation = summary_activation
66 |         self.summary_first_dropout = summary_first_dropout
67 |         self.summary_proj_to_labels = summary_proj_to_labels
68 |         self.scale_attn_weights = scale_attn_weights
69 |         self.use_cache = use_cache
70 |         self.scale_attn_by_inverse_layer_idx = scale_attn_by_inverse_layer_idx
71 |         self.reorder_and_upcast_attn = reorder_and_upcast_attn
72 | 
73 |         self.bos_token_id = bos_token_id
74 |         self.eos_token_id = eos_token_id
75 | 
76 |         super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
77 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/gpt_neo/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Eleuther AI and HuggingFace Inc. team. All rights reserved.
 3 | # Copyright 2023 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """
17 | GPT Neo Models init
18 | """
19 | from .gpt_neo_config import *
20 | from .gpt_neo import *
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/gpt_neo/gpt_neo_config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Eleuther AI and HuggingFace Inc. team. All rights reserved.
 3 | # Copyright 2023 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ GPT Neo model configuration"""
17 | 
18 | from ...abc.backbones.pretrained import PretrainedConfig
19 | 
20 | 
21 | class GPTNeoConfig(PretrainedConfig):
22 |     """
23 |     Configuration class to store the configuration of a `GPTNeoModel`.
24 |     """
25 |     model_type = "gpt_neo"
26 |     keys_to_ignore_at_inference = ["past_key_values"]
27 |     attribute_map = {"num_attention_heads": "num_heads", "num_hidden_layers": "num_layers"}
28 | 
29 |     def __init__(
30 |         self,
31 |         vocab_size=50257,
32 |         max_position_embeddings=2048,
33 |         hidden_size=2048,
34 |         num_layers=24,
35 |         attention_types=None,
36 |         num_heads=16,
37 |         intermediate_size=None,
38 |         window_size=256,
39 |         activation_function="gelu_new",
40 |         resid_dropout=0.0,
41 |         embed_dropout=0.0,
42 |         attention_dropout=0.0,
43 |         layer_norm_epsilon=1e-5,
44 |         initializer_range=0.02,
45 |         use_cache=True,
46 |         bos_token_id=50256,
47 |         eos_token_id=50256,
48 |         **kwargs,
49 |         ):
50 |         """
51 |         Initialization of the GPTNeo Configuration.
52 |         """
53 |         if attention_types is None:
54 |             attention_types=[[["global", "local"], 12]]
55 | 
56 |         self.vocab_size = vocab_size
57 |         self.max_position_embeddings = max_position_embeddings
58 |         self.hidden_size = hidden_size
59 |         self.num_layers = num_layers
60 |         self.num_heads = num_heads
61 |         self.intermediate_size = intermediate_size
62 |         self.window_size = window_size
63 |         self.activation_function = activation_function
64 |         self.resid_dropout = resid_dropout
65 |         self.embed_dropout = embed_dropout
66 |         self.attention_dropout = attention_dropout
67 |         self.layer_norm_epsilon = layer_norm_epsilon
68 |         self.initializer_range = initializer_range
69 |         self.use_cache = use_cache
70 | 
71 |         self.bos_token_id = bos_token_id
72 |         self.eos_token_id = eos_token_id
73 | 
74 |         self.attention_types = attention_types
75 |         self.attention_layers = self.expand_attention_types_params(attention_types)
76 | 
77 |         if len(self.attention_layers) != self.num_layers:
78 |             raise ValueError(
79 |                 "Configuration for convolutional module is incorrect. "
80 |                 "It is required that `len(config.attention_layers)` == `config.num_layers` "
81 |                 f"but is `len(config.attention_layers) = {len(self.attention_layers)}`, "
82 |                 f"`config.num_layers = {self.num_layers}`. "
83 |                 "`config.attention_layers` is prepared using `config.attention_types`. "
84 |                 "Please verify the value of `config.attention_types` argument."
85 |             )
86 | 
87 |         super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
88 | 
89 |     @staticmethod
90 |     def expand_attention_types_params(attention_types):
91 |         """
92 |         Expand_attention_types_params.
93 |         """
94 |         attentions = []
95 |         for item in attention_types:
96 |             for _ in range(item[1]):
97 |                 attentions.extend(item[0])
98 |         return attentions
99 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/longformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/longformer/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/longformer/longformer_config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Allen Institute for AI team and The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # pylint: disable=relative-beyond-top-level
16 | # pylint: disable=too-many-instance-attributes
17 | # pylint: disable=too-many-locals
18 | # pylint: disable=too-few-public-methods
19 | # pylint: disable=too-many-arguments
20 | """ Longformer configuration"""
21 | from typing import List, Union
22 | from ...abc.backbones.pretrained import PretrainedConfig
23 | 
24 | class LongformerConfig(PretrainedConfig):
25 |     r"""
26 |     Example:
27 | 
28 |     ```python
29 |     >>> from transformers import LongformerConfig, LongformerModel
30 | 
31 |     >>> # Initializing a Longformer configuration
32 |     >>> configuration = LongformerConfig()
33 | 
34 |     >>> # Initializing a model from the configuration
35 |     >>> model = LongformerModel(configuration)
36 | 
37 |     >>> # Accessing the model configuration
38 |     >>> configuration = model.config
39 |     ```"""
40 |     model_type = "longformer"
41 | 
42 |     def __init__(
43 |         self,
44 |         attention_window: Union[List[int], int] = 512,
45 |         sep_token_id: int = 2,
46 |         pad_token_id: int = 1,
47 |         bos_token_id: int = 0,
48 |         eos_token_id: int = 2,
49 |         vocab_size: int = 30522,
50 |         hidden_size: int = 768,
51 |         num_hidden_layers: int = 12,
52 |         num_attention_heads: int = 12,
53 |         intermediate_size: int = 3072,
54 |         hidden_act: str = "gelu",
55 |         hidden_dropout_prob: float = 0.1,
56 |         attention_probs_dropout_prob: float = 0.1,
57 |         max_position_embeddings: int = 512,
58 |         type_vocab_size: int = 2,
59 |         initializer_range: float = 0.02,
60 |         layer_norm_eps: float = 1e-12,
61 |         position_embedding_type: str = "absolute",
62 |         classifier_dropout: float = None,
63 |         onnx_export: bool = False,
64 |         **kwargs
65 |     ):
66 |         """Constructs LongformerConfig."""
67 |         super().__init__(pad_token_id=pad_token_id, **kwargs)
68 | 
69 |         self.attention_window = attention_window
70 |         self.sep_token_id = sep_token_id
71 |         self.bos_token_id = bos_token_id
72 |         self.eos_token_id = eos_token_id
73 |         self.vocab_size = vocab_size
74 |         self.hidden_size = hidden_size
75 |         self.num_hidden_layers = num_hidden_layers
76 |         self.num_attention_heads = num_attention_heads
77 |         self.hidden_act = hidden_act
78 |         self.intermediate_size = intermediate_size
79 |         self.hidden_dropout_prob = hidden_dropout_prob
80 |         self.attention_probs_dropout_prob = attention_probs_dropout_prob
81 |         self.max_position_embeddings = max_position_embeddings
82 |         self.type_vocab_size = type_vocab_size
83 |         self.initializer_range = initializer_range
84 |         self.layer_norm_eps = layer_norm_eps
85 |         self.position_embedding_type = position_embedding_type
86 |         self.classifier_dropout = classifier_dropout
87 |         self.onnx_export = onnx_export
88 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/luke/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Eleuther AI and HuggingFace Inc. team. All rights reserved.
 3 | # Copyright 2023 Huawei Technologies Co., Ltd
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | """
18 | LUKE Model init
19 | """
20 | from mindnlp.models.luke.luke_config import *
21 | from mindnlp.models.luke.luke import *
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/luke/luke_config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Eleuther AI and HuggingFace Inc. team. All rights reserved.
 3 | # Copyright 2023 Huawei Technologies Co., Ltd
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | """ LUKE configuration"""
18 | 
19 | from mindnlp.abc.backbones.pretrained import PretrainedConfig
20 | 
21 | 
22 | class LukeConfig(PretrainedConfig):
23 |     """
24 |     Configurations for Luke
25 |     """
26 |     def __init__(
27 |             self,
28 |             vocab_size=50267,
29 |             entity_vocab_size=500000,
30 |             hidden_size=768,
31 |             entity_emb_size=256,
32 |             num_hidden_layers=12,
33 |             num_attention_heads=12,
34 |             intermediate_size=3072,
35 |             hidden_act="gelu",
36 |             hidden_dropout_prob=0.1,
37 |             attention_probs_dropout_prob=0.1,
38 |             max_position_embeddings=512,
39 |             type_vocab_size=2,
40 |             initializer_range=0.02,
41 |             layer_norm_eps=1e-12,
42 |             use_entity_aware_attention=True,
43 |             classifier_dropout=None,
44 |             pad_token_id=1,
45 |             bos_token_id=0,
46 |             eos_token_id=2,
47 |             **kwargs,
48 |     ):
49 |         """Constructs LukeConfig."""
50 |         super().__init__(pad_token_id=pad_token_id,
51 |                          bos_token_id=bos_token_id,
52 |                          eos_token_id=eos_token_id,
53 |                          **kwargs)
54 | 
55 |         self.vocab_size = vocab_size
56 |         self.entity_vocab_size = entity_vocab_size
57 |         self.hidden_size = hidden_size
58 |         self.entity_emb_size = entity_emb_size
59 |         self.num_hidden_layers = num_hidden_layers
60 |         self.num_attention_heads = num_attention_heads
61 |         self.hidden_act = hidden_act
62 |         self.intermediate_size = intermediate_size
63 |         self.hidden_dropout_prob = hidden_dropout_prob
64 |         self.attention_probs_dropout_prob = attention_probs_dropout_prob
65 |         self.max_position_embeddings = max_position_embeddings
66 |         self.type_vocab_size = type_vocab_size
67 |         self.initializer_range = initializer_range
68 |         self.layer_norm_eps = layer_norm_eps
69 |         self.use_entity_aware_attention = use_entity_aware_attention
70 |         self.classifier_dropout = classifier_dropout
71 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/megatron_bert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/megatron_bert/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/megatron_bert/megatron_bert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/megatron_bert/megatron_bert.py


--------------------------------------------------------------------------------
/code/mindnlp/models/megatron_bert/megatron_bert_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/megatron_bert/megatron_bert_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/megatron_gpt2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/megatron_gpt2/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/megatron_gpt2/megatron_gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/megatron_gpt2/megatron_gpt2.py


--------------------------------------------------------------------------------
/code/mindnlp/models/megatron_gpt2/megatron_gpt2_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/megatron_gpt2/megatron_gpt2_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/mobilebert/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Eleuther AI and HuggingFace Inc. team. All rights reserved.
 3 | # Copyright 2023 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | """
18 | MobileBert Models init
19 | """
20 | 
21 | from .mobilebert_config import *
22 | from .mobilebert import *
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/mobilebert/mobilebert_config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Eleuther AI and HuggingFace Inc. team. All rights reserved.
 3 | # Copyright 2023 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | """
18 | MobileBERT model configuration
19 | """
20 | 
21 | from mindnlp.abc.backbones.pretrained import PretrainedConfig
22 | 
23 | class MobileBertConfig(PretrainedConfig):
24 |     """
25 |     MobileBertConfig
26 |     """
27 |     def __init__(
28 |         self,
29 |         vocab_size=30522,
30 |         hidden_size=512,
31 |         num_hidden_layers=24,
32 |         num_attention_heads=4,
33 |         intermediate_size=512,
34 |         hidden_act="relu",
35 |         hidden_dropout_prob=0.0,
36 |         attention_probs_dropout_prob=0.1,
37 |         max_position_embeddings=512,
38 |         type_vocab_size=2,
39 |         initializer_range=0.02,
40 |         layer_norm_eps=1e-12,
41 |         pad_token_id=0,
42 |         embedding_size=128,
43 |         trigram_input=True,
44 |         use_bottleneck=True,
45 |         intra_bottleneck_size=128,
46 |         use_bottleneck_attention=False,
47 |         key_query_shared_bottleneck=True,
48 |         num_feedforward_networks=4,
49 |         normalization_type="no_norm",
50 |         classifier_activation=True,
51 |         classifier_dropout=None,
52 |         **kwargs,
53 |     ):
54 |         super().__init__(pad_token_id=pad_token_id, **kwargs)
55 | 
56 |         self.vocab_size = vocab_size
57 |         self.hidden_size = hidden_size
58 |         self.num_hidden_layers = num_hidden_layers
59 |         self.num_attention_heads = num_attention_heads
60 |         self.hidden_act = hidden_act
61 |         self.intermediate_size = intermediate_size
62 |         self.hidden_dropout_prob = hidden_dropout_prob
63 |         self.attention_probs_dropout_prob = attention_probs_dropout_prob
64 |         self.max_position_embeddings = max_position_embeddings
65 |         self.type_vocab_size = type_vocab_size
66 |         self.initializer_range = initializer_range
67 |         self.layer_norm_eps = layer_norm_eps
68 |         self.embedding_size = embedding_size
69 |         self.trigram_input = trigram_input
70 |         self.use_bottleneck = use_bottleneck
71 |         self.intra_bottleneck_size = intra_bottleneck_size
72 |         self.use_bottleneck_attention = use_bottleneck_attention
73 |         self.key_query_shared_bottleneck = key_query_shared_bottleneck
74 |         self.num_feedforward_networks = num_feedforward_networks
75 |         self.normalization_type = normalization_type
76 |         self.classifier_activation = classifier_activation
77 | 
78 |         if self.use_bottleneck:
79 |             self.true_hidden_size = intra_bottleneck_size
80 |         else:
81 |             self.true_hidden_size = hidden_size
82 | 
83 |         self.classifier_dropout = classifier_dropout
84 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/nezha/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/nezha/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/nezha/nezha.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/nezha/nezha.py


--------------------------------------------------------------------------------
/code/mindnlp/models/nezha/nezha_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/nezha/nezha_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/opt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/opt/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/opt/opt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/opt/opt.py


--------------------------------------------------------------------------------
/code/mindnlp/models/opt/opt_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/opt/opt_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/pangu_alpha/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/pangu_alpha/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/pangu_alpha/pangu_alpha.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/pangu_alpha/pangu_alpha.py


--------------------------------------------------------------------------------
/code/mindnlp/models/pangu_alpha/pangu_alpha_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/pangu_alpha/pangu_alpha_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/roberta/__init__.py:
--------------------------------------------------------------------------------
1 | # __init__.py
2 | # LancasterLiu
3 | # 2023/3/8
4 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/roberta/roberta.py:
--------------------------------------------------------------------------------
1 | # model
2 | # LancasterLiu
3 | # 2023/3/8
4 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/roberta/roberta_config.py:
--------------------------------------------------------------------------------
1 | # roberta_config
2 | # LancasterLiu
3 | # 2023/3/8
4 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/t5/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | T5 Model init
17 | """
18 | 
19 | from .t5_config import *
20 | from .t5 import *
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/t5/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/t5/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/t5/__pycache__/t5.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/t5/__pycache__/t5.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/t5/__pycache__/t5_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/t5/__pycache__/t5_config.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/t5/t5_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | T5 Model config
17 | """
18 | 
19 | from mindnlp.abc.backbones.pretrained import PretrainedConfig
20 | class T5Config(PretrainedConfig):
21 |     """
22 |     Configuration for T5
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         vocab_size=32128,
28 |         d_model=512,
29 |         d_kv=64,
30 |         d_ff=2048,
31 |         num_layers=6,
32 |         num_decoder_layers=None,
33 |         num_heads=8,
34 |         relative_attention_num_buckets=32,
35 |         relative_attention_max_distance=128,
36 |         dropout_rate=0.1,
37 |         layer_norm_epsilon=1e-6,
38 |         initializer_factor=1.0,
39 |         feed_forward_proj="relu",
40 |         is_encoder_decoder=True,
41 |         use_cache=True,
42 |         pad_token_id=0,
43 |         eos_token_id=1,
44 |         **kwargs
45 |     ):
46 |         self.vocab_size = vocab_size
47 |         self.d_model = d_model
48 |         self.d_kv = d_kv
49 |         self.d_ff = d_ff
50 |         self.num_layers = num_layers
51 |         self.num_decoder_layers = (
52 |             num_decoder_layers if num_decoder_layers is not None else self.num_layers
53 |         )  # default = symmetry
54 |         self.num_heads = num_heads
55 |         self.relative_attention_num_buckets = relative_attention_num_buckets
56 |         self.relative_attention_max_distance = relative_attention_max_distance
57 |         self.dropout_rate = dropout_rate
58 |         self.layer_norm_epsilon = layer_norm_epsilon
59 |         self.initializer_factor = initializer_factor
60 |         self.feed_forward_proj = feed_forward_proj
61 |         self.use_cache = use_cache
62 | 
63 |         act_info = self.feed_forward_proj.split("-")
64 |         self.dense_act_fn = act_info[-1]
65 |         self.is_gated_act = act_info[0] == "gated"
66 | 
67 |         if len(act_info) > 1 and act_info[0] != "gated" or len(act_info) > 2:
68 |             raise ValueError(
69 |                 f"`feed_forward_proj`: {feed_forward_proj} is not a valid activation function of the dense layer."
70 |                 "Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. "
71 |                 "'gated-gelu' or 'relu'"
72 |             )
73 | 
74 |         # for backwards compatibility
75 |         if feed_forward_proj == "gated-gelu":
76 |             self.dense_act_fn = "gelu_new"
77 | 
78 |         super().__init__(
79 |             pad_token_id=pad_token_id,
80 |             eos_token_id=eos_token_id,
81 |             is_encoder_decoder=is_encoder_decoder,
82 |             **kwargs,
83 |         )
84 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/tinybert/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | TinyBert Models init
17 | """
18 | 
19 | from .tinybert_config import *
20 | from .tinybert import *
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/transformer/__init__.py


--------------------------------------------------------------------------------
/code/mindnlp/models/transformer/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/transformer/transformer.py


--------------------------------------------------------------------------------
/code/mindnlp/models/transformer/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/transformer/transformer_config.py


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """
17 | Common utils for models
18 | """
19 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/__pycache__/activations.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/utils/__pycache__/activations.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/__pycache__/logging.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/utils/__pycache__/logging.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/__pycache__/mixin.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/models/utils/__pycache__/mixin.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """MindNLP Activations"""
16 | 
17 | from collections import OrderedDict
18 | from mindspore import nn
19 | 
20 | 
21 | class ClassInstantier(OrderedDict):
22 |     r"""
23 |     Class Instantier
24 |     """
25 | 
26 |     def __getitem__(self, key):
27 |         content = super().__getitem__(key)
28 |         cls, kwargs = content if isinstance(content, tuple) else (content, {})
29 |         return cls(**kwargs)
30 | 
31 | 
32 | ACT2CLS = {
33 |     """
34 |     Excitation equation matrix
35 |     """
36 |     'relu': nn.ReLU,
37 |     'gelu': (nn.GELU, {"approximate=": False}),
38 |     'gelu_new': nn.GELU,
39 |     'gelu_approximate': (nn.GELU, {"approximate=": True}),
40 |     "swish": nn.SiLU,  # MindSpore的SiLU激活函数是Swish函数
41 |     "gelu_10": nn.GELU,  # MindSpore的GELU激活函数不支持设置最大值和最小值
42 |     "gelu_fast": nn.FastGelu,
43 |     "gelu_python": nn.GELU,  # MindSpore的GELU激活函数不支持选择是否使用Python实现
44 |     "linear": nn.ReLU,  # MindSpore没有Linear激活函数，使用ReLU代替
45 |     "mish": nn.Mish,
46 |     "quick_gelu": nn.FastGelu,
47 |     "relu": nn.ReLU,
48 |     "relu6": nn.ReLU6,
49 |     "sigmoid": nn.Sigmoid,
50 |     "silu": nn.SiLU,
51 |     "tanh": nn.Tanh,
52 | }
53 | ACT2FN = ClassInstantier(ACT2CLS)
54 | 
55 | 
56 | def get_activation(activation_string):
57 |     """
58 |     Obtained parameters required for outputting self. activation in the SequenceSummary class
59 |     :param activation_string:
60 |     :return:
61 |     """
62 |     if activation_string in ACT2FN:
63 |         return ACT2FN[activation_string]
64 |     raise KeyError(f"function {activation_string} not found in ACT2FN mapping {list(ACT2FN.keys())}")
65 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/utils/logging.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """MindNLP Logging"""
16 | 
17 | import logging
18 | import os
19 | import sys
20 | import threading
21 | 
22 | _lock = threading.Lock()
23 | 
24 | log_levels = {
25 |     "debug": logging.DEBUG,
26 |     "info": logging.INFO,
27 |     "warning": logging.WARNING,
28 |     "error": logging.ERROR,
29 |     "critical": logging.CRITICAL,
30 | }
31 | 
32 | _DEFAULT_LOG_LEVEL = logging.WARNING
33 | 
34 | 
35 | def _get_default_handler(default_handle=None):
36 |     return default_handle
37 | 
38 | 
39 | def _get_default_logging_level():
40 |     """
41 |     If TRANSFORMERS_VERBOSITY env var is set to one of the valid choices return that as the new default level. If it is
42 |     not - fall back to `_DEFAULT_LOG_LEVEL`
43 |     """
44 |     env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
45 |     if env_level_str:
46 |         if env_level_str in log_levels:
47 |             return log_levels[env_level_str]
48 |         logging.getLogger().warning("Unknown option TRANSFORMERS_VERBOSITY= %s,  has to be one of: %s", env_level_str,
49 |                                     ', '.join(log_levels.keys()))
50 |     return _DEFAULT_LOG_LEVEL
51 | 
52 | 
53 | def _get_library_name() -> str:
54 |     return __name__.split('.', maxsplit=1)[0]
55 | 
56 | 
57 | def _get_library_root_logger() -> logging.Logger:
58 |     return logging.getLogger(_get_library_name())
59 | 
60 | 
61 | def _configure_library_root_logger() -> None:
62 |     with _lock:
63 |         _default_handler = _get_default_handler()
64 |         if _default_handler is not None:
65 |             # This library has already configured the library root logger.
66 |             return
67 |         _default_handler = logging.StreamHandler()  # Set sys.stderr as stream.
68 |         _default_handler.flush = sys.stderr.flush
69 | 
70 |         # Apply our default configuration to the library root logger.
71 |         library_root_logger = _get_library_root_logger()
72 |         library_root_logger.addHandler(_default_handler)
73 |         library_root_logger.setLevel(_get_default_logging_level())
74 |         library_root_logger.propagate = False
75 | 
76 | 
77 | def get_logger(name=None):
78 |     """
79 |     Return a logger with the specified name.
80 | 
81 |     This function is not supposed to be directly accessed unless you are writing a custom transformers module.
82 |     """
83 | 
84 |     if name is None:
85 |         name = _get_library_name()
86 | 
87 |     _configure_library_root_logger()
88 |     return logging.getLogger(name)
89 | 


--------------------------------------------------------------------------------
/code/mindnlp/models/xlm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | XLM Models init
17 | """
18 | 
19 | from .xlm_config import *
20 | from .xlm import *
21 | 


--------------------------------------------------------------------------------
/code/mindnlp/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """modules init"""
16 | 
17 | from mindnlp.utils import less_min_pynative_first
18 | from mindnlp._legacy.nn import transformer
19 | from mindnlp.modules import encoder, decoder, embeddings, loss, attentions, crf, rnns
20 | from mindnlp.modules.attentions import ScaledDotAttention, SelfAttention, \
21 |     BinaryAttention, AdditiveAttention, CosineAttention, LocationAwareAttention, \
22 |     LinearAttention
23 | from mindnlp.modules.encoder import RNNEncoder, CNNEncoder
24 | from mindnlp.modules.decoder import RNNDecoder
25 | from mindnlp.modules.embeddings import Fasttext, Glove
26 | from mindnlp.modules.crf import CRF
27 | from mindnlp.modules.loss import RDropLoss, CMRC2018Loss
28 | from mindnlp.modules.rnns import *
29 | 
30 | if less_min_pynative_first:
31 |     from mindnlp._legacy.nn.transformer import Transformer, TransformerDecoder, TransformerEncoder, \
32 |         TransformerEncoderLayer, TransformerDecoderLayer, MultiheadAttention
33 | else:
34 |     from mindspore.nn import Transformer, TransformerDecoder, TransformerEncoder, \
35 |         TransformerEncoderLayer, TransformerDecoderLayer, MultiheadAttention
36 | 
37 | __all__ = []
38 | 
39 | __all__.extend(transformer.__all__)
40 | __all__.extend(encoder.__all__)
41 | __all__.extend(decoder.__all__)
42 | __all__.extend(embeddings.__all__)
43 | __all__.extend(attentions.__all__)
44 | __all__.extend(crf.__all__)
45 | __all__.extend(loss.__all__)
46 | __all__.extend(rnns.__all__)
47 | 


--------------------------------------------------------------------------------
/code/mindnlp/modules/beam_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/modules/beam_search.py


--------------------------------------------------------------------------------
/code/mindnlp/modules/decoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Decoder class
17 | """
18 | 
19 | from .rnn_decoder import RNNDecoder
20 | 
21 | __all__ = ['RNNDecoder']
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/modules/embeddings/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Embedding class
17 | """
18 | 
19 | from .fasttext_embedding import Fasttext
20 | from .glove_embedding import Glove
21 | 
22 | __all__ = ["Fasttext", "Glove"]
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/modules/encoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Encoder class
17 | """
18 | 
19 | from .rnn_encoder import RNNEncoder
20 | from .cnn_encoder import CNNEncoder
21 | 
22 | __all__ = ["RNNEncoder", "CNNEncoder"]
23 | 


--------------------------------------------------------------------------------
/code/mindnlp/modules/encoder/rnn_encoder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """RNN encoder modules"""
 16 | # pylint: disable=abstract-method
 17 | 
 18 | from mindnlp.abc import EncoderBase
 19 | from mindnlp.modules.rnns import _RNNBase
 20 | 
 21 | class RNNEncoder(EncoderBase):
 22 |     r"""
 23 |     Stacked Elman RNN Encoder.
 24 | 
 25 |     Args:
 26 |         embedding (Cell): The embedding layer.
 27 |         rnn (Cell): The RNN Layer.
 28 | 
 29 |     Examples:
 30 |         >>> vocab_size = 1000
 31 |         >>> embedding_size = 32
 32 |         >>> hidden_size = 16
 33 |         >>> num_layers = 2
 34 |         >>> has_bias = True
 35 |         >>> dropout = 0.1
 36 |         >>> bidirectional = False
 37 |         >>> embedding = nn.Embedding(vocab_size, embedding_size)
 38 |         >>> rnn = nn.RNN(embedding_size, hidden_size, num_layers=num_layers, has_bias=has_bias,
 39 |         ...              batch_first=True, dropout=dropout, bidirectional=bidirectional)
 40 |         >>> rnn_encoder = RNNEncoder(embedding, rnn)
 41 |         >>> src_tokens = Tensor(np.ones([8, 16]), mindspore.int32)
 42 |         >>> src_length = Tensor(np.ones([8]), mindspore.int32)
 43 |         >>> mask = Tensor(np.ones([8, 16]), mindspore.int32)
 44 |         >>> output, hiddens_n, mask = rnn_encoder(src_tokens, src_length, mask=mask)
 45 |         >>> print(output.shape)
 46 |         >>> print(hiddens_n.shape)
 47 |         >>> print(mask.shape)
 48 |         (8, 16, 16)
 49 |         (2, 8, 16)
 50 |         (8, 16)
 51 |     """
 52 | 
 53 |     def __init__(self, embedding, rnn):
 54 |         super().__init__(embedding)
 55 |         self.rnn = rnn
 56 |         self.static = False
 57 |         if isinstance(rnn, _RNNBase):
 58 |             self.static = True
 59 | 
 60 |     def construct(self, src_token, src_length=None, mask=None):
 61 |         """
 62 |         Construct method.
 63 | 
 64 |         Args:
 65 |             src_token (Tensor): Tokens in the source language with shape [batch, max_len].
 66 |             src_length (Tensor): Lengths of each sentence with shape [batch].
 67 |             mask (Tensor): Its elements identify whether the corresponding input token is padding or not.
 68 |                 If the value is 1, not padding token. If the value is 0, padding token. Defaults to None.
 69 | 
 70 |         Returns:
 71 |             Tuple, a tuple contains (`output`, `hiddens_n`, `mask`).
 72 | 
 73 |             - output (Tensor): Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
 74 |             - hiddens_n (Tensor): Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
 75 |             - mask (Tensor): Mask Tensor used in decoder.
 76 |         """
 77 |         if mask is None:
 78 |             mask = self._gen_mask(src_token)
 79 |         src_token = src_token * mask
 80 |         embed = self.embedding(src_token)
 81 | 
 82 |         if self.static:
 83 |             output, hiddens_n = self.rnn(embed)
 84 |         else:
 85 |             output, hiddens_n = self.rnn(embed, seq_length=src_length)
 86 | 
 87 |         return output, hiddens_n, mask
 88 | 
 89 |     def reorder_encoder_out(self, encoder_out, new_order):
 90 |         """
 91 |         Reorder encoder output according to `new_order`.
 92 | 
 93 |         Args:
 94 |             encoder_out (Union[Tensor, tuple]): The encoder's output.
 95 |             new_order (Tensor): Desired order.
 96 | 
 97 |         Returns:
 98 |             Tuple, encoder_out rearranged according to new_order.
 99 |         """
100 |         encoder_output = encoder_out[0]
101 |         encoder_hiddens = encoder_out[1]
102 |         encoder_padding_mask = encoder_out[2]
103 | 
104 |         new_output = encoder_output.gather(new_order, 1)
105 |         new_hiddens = encoder_hiddens.gather(new_order, 1)
106 |         new_padding_mask = encoder_padding_mask.gather(new_order, 0)
107 | 
108 |         return new_output, new_hiddens, new_padding_mask
109 | 


--------------------------------------------------------------------------------
/code/mindnlp/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | # pylint: disable=C0413
16 | # pylint: disable=C0412
17 | # pylint: disable=C0411
18 | 
19 | """
20 | dataset processing transforms
21 | """
22 | 
23 | from mindnlp.utils import less_min_pynative_first
24 | 
25 | if less_min_pynative_first:
26 |     from mindnlp._legacy.transforms import Truncate, AddToken
27 | else:
28 |     from mindspore.dataset.text import Truncate, AddToken
29 | 
30 | from mindnlp.transforms.lookup import Lookup
31 | from mindnlp.transforms.tokenizers import BasicTokenizer
32 | from mindnlp.transforms.pad_transform import PadTransform
33 | 
34 | __all__ = [
35 |     'Truncate', 'AddToken', 'Lookup', 'PadTransform', 'BasicTokenizer',
36 | ]
37 | 
38 | from .tokenizers import *
39 | 


--------------------------------------------------------------------------------
/code/mindnlp/transforms/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/transforms/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/transforms/__pycache__/lookup.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/transforms/__pycache__/lookup.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/transforms/__pycache__/pad_transform.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/transforms/__pycache__/pad_transform.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/transforms/lookup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | # pylint:disable=I1101
16 | # pylint:disable=W0212
17 | 
18 | """
19 | lookup transforms
20 | """
21 | import mindspore._c_dataengine as cde
22 | from mindspore.dataset.text.transforms import TextTensorOperation
23 | from mindspore.dataset.core.datatypes import mstype_to_detype
24 | from mindspore.common import dtype as mstype
25 | from mindspore.dataset.text import Vocab as msVocab
26 | from mindnlp.vocab import Vocab as nlpVocab
27 | 
28 | 
29 | class Lookup(TextTensorOperation):
30 |     """
31 |     Look up a word into an id according to the input vocabulary table.
32 | 
33 |     Args:
34 |         vocab (Vocab): A vocabulary object.
35 |         unk_token (str): unknow token for OOV.
36 |         return_dtype (mindspore.dtype, optional): The data type that lookup operation maps
37 |             string to. Default: mindspore.int32.
38 | 
39 |     Raises:
40 |         TypeError: If `vocab` is not of type text.Vocab.
41 |         TypeError: If `return_dtype` is not of type mindspore.dtype.
42 | 
43 |     Examples:
44 |         >>> from mindnlp import Vocab
45 |         >>> from mindnlp.transforms import Lookup
46 |         >>> # Load vocabulary from list
47 |         >>> vocab = Vocab(['深', '圳', '欢', '迎', '您'])
48 |         >>> # Use Lookup operation to map tokens to ids
49 |         >>> lookup = Lookup(vocab)
50 |         >>> text_file_dataset = text_file_dataset.map(operations=[lookup])
51 |     """
52 | 
53 |     def __init__(self, vocab, unk_token, return_dtype=mstype.int32):
54 |         super().__init__()
55 |         if isinstance(vocab, nlpVocab):
56 |             self._vocab = cde.Vocab.from_dict(vocab._token_dict)
57 |         elif isinstance(vocab, msVocab):
58 |             self._vocab = vocab.c_vocab
59 |         else:
60 |             raise ValueError(f'do not support vocab type {type(vocab)}.')
61 | 
62 |         self._unk_token = unk_token
63 |         self._return_dtype = return_dtype
64 | 
65 |     def parse(self):
66 |         return cde.LookupOperation(self._vocab, self._unk_token, str(mstype_to_detype(self._return_dtype)))
67 |     


--------------------------------------------------------------------------------
/code/mindnlp/transforms/pad_transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """AddToken transform"""
16 | import numpy as np
17 | from mindspore.dataset.transforms.transforms import PyTensorOperation
18 | from mindspore.dataset.text.transforms import Implementation
19 | 
20 | 
21 | class PadTransform(PyTensorOperation):
22 |     """
23 |     Pad tensor to a fixed length with given padding value.
24 | 
25 |     Args:
26 |         max_length (int): Maximum length to pad to.
27 |         pad_value (int): Value to pad the tensor with.
28 |         return_length (bool): Whether return auxiliary sequence length.
29 | 
30 |     Raises:
31 |         TypeError: If `token` is not of type str.
32 | 
33 |     Supported Platforms:
34 |         ``CPU``
35 | 
36 |     Examples:
37 | 
38 |     """
39 | 
40 |     # @check_decode
41 |     def __init__(self, max_length: int, pad_value:int, return_length:bool = False):
42 |         super().__init__()
43 |         self.max_length = max_length
44 |         self.pad_value = pad_value
45 |         self.return_length = return_length
46 |         self.implementation = Implementation.PY
47 | 
48 |     def __call__(self, text_input):
49 |         """
50 |         Call method for input conversion for eager mode with C++ implementation.
51 |         """
52 |         if not isinstance(text_input, np.ndarray):
53 |             raise TypeError(
54 |                 f"Input should be a text line in 1-D ndarray contains string, got {type(text_input)}.")
55 |         return super().__call__(text_input)
56 | 
57 |     def execute_py(self, text_input):
58 |         """
59 |         Execute method.
60 |         """
61 |         return self._execute_py(text_input)
62 | 
63 |     def _execute_py(self, text_input):
64 |         """
65 |         Execute method.
66 |         """
67 |         text_input = text_input[:self.max_length]
68 |         text_length = len(text_input)
69 | 
70 |         pad_value = np.array([self.pad_value] * (self.max_length - text_length), text_input.dtype)
71 |         text_output = np.concatenate([text_input, pad_value], 0)
72 | 
73 |         if self.return_length:
74 |             length = np.array(text_length)
75 |             return text_output, length
76 | 
77 |         return text_output
78 | 


--------------------------------------------------------------------------------
/code/mindnlp/transforms/tokenizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | tokenizers init
17 | """
18 | 
19 | from .basic_tokenizer import BasicTokenizer
20 | from .bert_tokenizer import BertTokenizer
21 | from .t5_tokenizer import T5Tokenizer
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/transforms/tokenizers/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/transforms/tokenizers/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/transforms/tokenizers/__pycache__/basic_tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/transforms/tokenizers/__pycache__/basic_tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/transforms/tokenizers/__pycache__/bert_tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/transforms/tokenizers/__pycache__/bert_tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/transforms/tokenizers/bert_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | BertTokenizer
17 | """
18 | 
19 | import numpy as np
20 | from mindspore.dataset.transforms.transforms import PyTensorOperation
21 | from mindspore.dataset.text.transforms import Implementation
22 | from tokenizers.implementations import BertWordPieceTokenizer
23 | 
24 | class BertTokenizer(PyTensorOperation):
25 |     """
26 |     Tokenizer used for Bert text process.
27 | 
28 |     Args:
29 |         vocab (Vocab): Vocabulary used to look up words.
30 |         lower_case (bool, optional): Whether to perform lowercase processing on the text. If True, will fold the
31 |             text to lower case. Default: True.
32 |         py_transform (bool, optional): Whether use python implementation. Default: False.
33 | 
34 |     Raises:
35 |         TypeError: If `lower_case` is not of type bool.
36 |         TypeError: If `py_transform` is not of type bool.
37 |         RuntimeError: If dtype of input Tensor is not str.
38 | 
39 |     Examples:
40 |         >>> from mindspore.dataset import text
41 |         >>> from mindnlp.dataset.transforms import BertTokenizer
42 |         >>> vocab_list = ["床", "前", "明", "月", "光", "疑", "是", "地", "上", "霜", "举", "头", "望", "低",
43 |               "思", "故", "乡","繁", "體", "字", "嘿", "哈", "大", "笑", "嘻", "i", "am", "mak",
44 |               "make", "small", "mistake", "##s", "during", "work", "##ing", "hour", "😀", "😃",
45 |               "😄", "😁", "+", "/", "-", "=", "12", "28", "40", "16", " ", "I", "[CLS]", "[SEP]",
46 |               "[UNK]", "[PAD]", "[MASK]", "[unused1]", "[unused10]"]
47 |         >>> vocab = text.Vocab.from_list(vocab_list)
48 |         >>> tokenizer_op = BertTokenizer(vocab=vocab, lower_case=True)
49 |         >>> text = "i make a small mistake when i\'m working! 床前明月光😀"
50 |         >>> tokenized_text = tokenizer_op(text)
51 | 
52 |     """
53 | 
54 |     # @check_decode
55 |     def __init__(self, vocab, lower_case:bool = True, return_token = False):
56 |         super().__init__()
57 |         self.tokenizer = BertWordPieceTokenizer(vocab=vocab.vocab(), lowercase=lower_case)
58 |         self.return_token = return_token
59 |         self.implementation = Implementation.PY
60 | 
61 |     def __call__(self, text_input):
62 |         """
63 |         Call method for input conversion for eager mode with C++ implementation.
64 |         """
65 |         if isinstance(text_input, str):
66 |             text_input = np.array(text_input)
67 |         elif not isinstance(text_input, np.ndarray):
68 |             raise TypeError(
69 |                 f"Input should be a text line in 1-D NumPy format, got {type(text_input)}.")
70 |         return super().__call__(text_input)
71 | 
72 |     def execute_py(self, text_input):
73 |         """
74 |         Execute method.
75 |         """
76 |         return self._execute_py(text_input)
77 | 
78 |     def _execute_py(self, text_input):
79 |         """
80 |         Execute method.
81 |         """
82 |         text = self._convert_to_unicode(text_input)
83 |         output = self.tokenizer.encode(text)
84 |         if self.return_token is True:
85 |             return np.array(output.tokens)
86 |         return np.array(output.ids)
87 | 
88 |     def _convert_to_unicode(self, text_input):
89 |         """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
90 |         if isinstance(text_input, str):
91 |             return text_input
92 |         if isinstance(text_input, bytes):
93 |             return text_input.decode("utf-8", "ignore")
94 |         if isinstance(text_input, np.ndarray):
95 |             if text_input.dtype.type is np.bytes_:
96 |                 text_input = np.char.decode(text_input, "utf-8")
97 |             return str(text_input)
98 |         raise ValueError(f"Unsupported string type: {type(text_input)}, {text_input.dtype}")
99 | 


--------------------------------------------------------------------------------
/code/mindnlp/transforms/tokenizers/t5_tokenizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """
 16 | T5Tokenizer
 17 | """
 18 | 
 19 | import os
 20 | import numpy as np
 21 | from tokenizers import Tokenizer, models
 22 | from mindspore.dataset.transforms.transforms import PyTensorOperation
 23 | from mindspore.dataset.text.transforms import Implementation
 24 | from mindnlp.utils.download import cache_file
 25 | from mindnlp.configs import DEFAULT_ROOT
 26 | 
 27 | URL = {
 28 |     "t5-small": "https://huggingface.co/t5-small/resolve/main/tokenizer.json",
 29 |     "t5-base": "https://huggingface.co/t5-base/resolve/main/tokenizer.json",
 30 |     "t5-large": "https://huggingface.co/t5-large/resolve/main/tokenizer.json",
 31 |     "t5-3b": "https://huggingface.co/t5-3b/resolve/main/tokenizer.json",
 32 |     "t5-11b": "https://huggingface.co/t5-11b/resolve/main/tokenizer.json"
 33 | }
 34 | 
 35 | class T5Tokenizer(PyTensorOperation):
 36 |     """
 37 |     Tokenizer used for Bert text process.
 38 |     Args:
 39 |         tokenizer_file (Str): The path of the tokenizer.json 
 40 |     Examples:
 41 |         >>> from mindspore.dataset import text
 42 |         >>> from mindnlp.transforms import T5Tokenizer
 43 |         >>> text = "Believing that faith can triumph over everything is in itself the greatest belief"
 44 |         >>> tokenizer = T5Tokenizer.from_pretrained('t5-base')
 45 |         >>> tokens = tokenizer.encode(text)
 46 |     """
 47 |     def __init__(
 48 |         self,
 49 |         tokenizer_file=None,
 50 |     ):
 51 |         super().__init__()
 52 |         if tokenizer_file is not None:
 53 |             self._tokenizer = Tokenizer(models.Unigram()).from_file(tokenizer_file)
 54 |         self.implementation = Implementation.PY
 55 | 
 56 |     def __call__(self, text_input):
 57 |         if isinstance(text_input, str):
 58 |             text_input = np.array(text_input)
 59 |         elif not isinstance(text_input, np.ndarray):
 60 |             raise TypeError(
 61 |                 f"Input should be a text line in 1-D NumPy format, got {type(text_input)}.")
 62 |         return super().__call__(text_input)
 63 | 
 64 |     @classmethod
 65 |     def from_pretrained(cls, size:str):
 66 |         """load T5Tokenizer from pretrained tokenizer.json"""
 67 |         cache_dir = os.path.join(DEFAULT_ROOT, "tokenizers", size)
 68 |         path, _ = cache_file(None, url=URL[size], cache_dir=cache_dir)
 69 |         tokenizer = cls(tokenizer_file=str(path))
 70 |         return tokenizer
 71 | 
 72 |     def encode(self, text_input):
 73 |         """encode function"""
 74 |         tokens = self._tokenizer.encode(text_input)
 75 |         return tokens
 76 | 
 77 |     def decode(self, ids: list):
 78 |         """decode function"""
 79 |         return self._tokenizer.decode(ids)
 80 | 
 81 |     def execute_py(self, text_input):
 82 |         """
 83 |         Execute method.
 84 |         """
 85 |         return self._execute_py(text_input)
 86 | 
 87 |     def _execute_py(self, text_input):
 88 |         """
 89 |         Execute method.
 90 |         """
 91 |         text_input = self._convert_to_unicode(text_input)
 92 |         tokens = self._tokenizer.encode(text_input)
 93 |         return tokens.ids
 94 | 
 95 |     def _convert_to_unicode(self, text_input):
 96 |         """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
 97 |         if isinstance(text_input, str):
 98 |             return text_input
 99 |         if isinstance(text_input, bytes):
100 |             return text_input.decode("utf-8", "ignore")
101 |         if isinstance(text_input, np.ndarray):
102 |             if text_input.dtype.type is np.bytes_:
103 |                 text_input = np.char.decode(text_input, "utf-8")
104 |             return str(text_input)
105 |         raise ValueError(f"Unsupported string type: {type(text_input)}, {text_input.dtype}")


--------------------------------------------------------------------------------
/code/mindnlp/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """
17 | Common utils
18 | """
19 | from .decompress import unzip, untar, ungz
20 | from .download import cache_file
21 | from .compatibility import *
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/utils/__pycache__/compatibility.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/utils/__pycache__/compatibility.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/utils/__pycache__/decompress.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/utils/__pycache__/decompress.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/utils/__pycache__/download.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/utils/__pycache__/download.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/utils/compatibility.py:
--------------------------------------------------------------------------------
 1 | """utils for mindspore backward compatibility."""
 2 | import mindspore
 3 | from packaging import version
 4 | 
 5 | MIN_COMPATIBLE_VERSION = '1.8.1'
 6 | MAX_GRAPH_FIRST_VERSION = '1.10.0'
 7 | 
 8 | less_min_compatible = version.parse(mindspore.__version__) < version.parse(MIN_COMPATIBLE_VERSION)
 9 | less_min_pynative_first = version.parse(mindspore.__version__) <= version.parse(MAX_GRAPH_FIRST_VERSION)
10 | 
11 | __all__ = ['less_min_compatible', 'less_min_pynative_first']
12 | 


--------------------------------------------------------------------------------
/code/mindnlp/utils/decompress.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """
 16 | Decompress functions
 17 | """
 18 | 
 19 | import os
 20 | import tarfile
 21 | import zipfile
 22 | import gzip
 23 | 
 24 | def untar(file_path: str, untar_path: str):
 25 |     r"""
 26 |     Untar tar.gz file
 27 | 
 28 |     Args:
 29 |         file_path (str): The path where the tgz file is located.
 30 |         multiple (str): The directory where the files were unzipped.
 31 | 
 32 |     Returns:
 33 |         - **names** (list) -All filenames in the tar.gz file.
 34 | 
 35 |     Raises:
 36 |         TypeError: If `file_path` is not a string.
 37 |         TypeError: If `untar_path` is not a string.
 38 | 
 39 |     Examples:
 40 |         >>> file_path = "./mindnlp/datasets/IWSLT2016/2016-01.tgz"
 41 |         >>> untar_path = "./mindnlp/datasets/IWSLT2016"
 42 |         >>> output = untar(file_path,untar_path)
 43 |         >>> print(output[0])
 44 |         '2016-01'
 45 | 
 46 |     """
 47 |     tar = tarfile.open(file_path)
 48 |     names = tar.getnames()
 49 |     for name in names:
 50 |         if os.path.exists(os.path.join(untar_path, name)):
 51 |             continue
 52 |         tar.extract(name, untar_path)
 53 |     tar.close()
 54 |     return names
 55 | 
 56 | 
 57 | def unzip(file_path: str, unzip_path: str):
 58 |     r"""
 59 |     Untar .zip file
 60 | 
 61 |     Args:
 62 |         file_path (str): The path where the .zip file is located.
 63 |         unzip_path (str): The directory where the files were unzipped.
 64 | 
 65 |     Returns:
 66 |         - **names** (list) -All filenames in the .zip file.
 67 | 
 68 |     Raises:
 69 |         TypeError: If `file_path` is not a string.
 70 |         TypeError: If `untar_path` is not a string.
 71 | 
 72 |     """
 73 |     zipf = zipfile.ZipFile(file_path, "r")
 74 |     for name in zipf.namelist():
 75 |         zipf.extract(name, unzip_path)
 76 |     zipf.close()
 77 |     return zipf.namelist()
 78 | 
 79 | def ungz(file_path: str, unzip_path: str = None):
 80 |     r"""
 81 |     Untar .gz file
 82 | 
 83 |     Args:
 84 |         file_path (str): The path where the .gz file is located.
 85 |         unzip_path (str): The directory where the files were unzipped.
 86 | 
 87 |     Returns:
 88 |         - **unzip_path** (str): The directory where the files were unzipped.
 89 | 
 90 |     Raises:
 91 |         TypeError: If `file_path` is not a string.
 92 |         TypeError: If `untar_path` is not a string.
 93 | 
 94 |     """
 95 |     if not isinstance(unzip_path,str):
 96 |         unzip_path = str(file_path)[:-3]
 97 |     with open(unzip_path,'wb') as file:
 98 |         gz_file = gzip.open(file_path, mode = 'rb')
 99 |         file.write(gz_file.read())
100 |         gz_file.close()
101 |     return unzip_path
102 | 


--------------------------------------------------------------------------------
/code/mindnlp/vocab/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | vocab init
17 | """
18 | 
19 | from .vocab import Vocab
20 | 


--------------------------------------------------------------------------------
/code/mindnlp/vocab/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/vocab/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/vocab/__pycache__/vocab.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/vocab/__pycache__/vocab.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | MindNLP workflow module.
17 | """
18 | 
19 | from .works import *
20 | from .work import *
21 | from .downstream import *
22 | 


--------------------------------------------------------------------------------
/code/mindnlp/workflow/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/__pycache__/work.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/__pycache__/work.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/__pycache__/workflow.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/__pycache__/workflow.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/downstream/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Downstream Models
17 | """
18 | 
19 | from .sentiment_analysis_model import BertForSentimentAnalysis
20 | 


--------------------------------------------------------------------------------
/code/mindnlp/workflow/downstream/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/downstream/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/downstream/__pycache__/sentiment_analysis_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/downstream/__pycache__/sentiment_analysis_model.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/downstream/sentiment_analysis_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | """ Sentiment Analysis Model """
17 | 
18 | # pylint: disable=arguments-differ
19 | 
20 | from mindspore import nn
21 | from mindnlp.models import BertModel
22 | 
23 | class BertForSentimentAnalysis(nn.Cell):
24 |     """Bert Model for classification tasks"""
25 |     def __init__(self, config):
26 |         super().__init__(config)
27 |         self.num_labels = config.num_labels
28 |         self.config = config
29 |         self.bert = BertModel(config)
30 |         self.classifier = nn.Dense(config.hidden_size, config.num_labels)
31 | 
32 |     def construct(self, input_ids, attention_mask=None, token_type_ids=None, \
33 |         position_ids=None, head_mask=None):
34 |         outputs = self.bert(
35 |             input_ids,
36 |             attention_mask=attention_mask,
37 |             token_type_ids=token_type_ids,
38 |             position_ids=position_ids,
39 |             head_mask=head_mask
40 |         )
41 |         pooled_output = outputs[1]
42 |         logits = self.classifier(pooled_output)
43 |         return logits
44 | 


--------------------------------------------------------------------------------
/code/mindnlp/workflow/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Workflow Utils
17 | """
18 | 
19 | import re
20 | 
21 | def cut_chinese_sent(para):
22 |     """
23 |     Cut the Chinese sentences more precisely.
24 |     """
25 |     para = re.sub(r"([。！？\?])([^”’])", r"\1\n\2", para)
26 |     para = re.sub(r"(\.{6})([^”’])", r"\1\n\2", para)
27 |     para = re.sub(r"(\…{2})([^”’])", r"\1\n\2", para)
28 |     para = re.sub(r"([。！？\?][”’])([^，。！？\?])", r"\1\n\2", para)
29 |     para = para.rstrip()
30 |     return para.split("\n")
31 | 


--------------------------------------------------------------------------------
/code/mindnlp/workflow/works/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Works
17 | """
18 | 
19 | from .sentiment_analysis import SentimentAnalysisWork
20 | 


--------------------------------------------------------------------------------
/code/mindnlp/workflow/works/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/works/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/works/__pycache__/sentiment_analysis.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/works/__pycache__/sentiment_analysis.cpython-38.pyc


--------------------------------------------------------------------------------
/code/mindnlp/workflow/works/classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/works/classification.py


--------------------------------------------------------------------------------
/code/mindnlp/workflow/works/ner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/works/ner.py


--------------------------------------------------------------------------------
/code/mindnlp/workflow/works/qa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/mindnlp/workflow/works/qa.py


--------------------------------------------------------------------------------
/code/t5-small参数名称对比.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/T5-Model-migration/ca31e367e580c22fe3bc193b0f5aa8b8f6cd0a47/code/t5-small参数名称对比.xlsx


--------------------------------------------------------------------------------