├── requirements.txt
├── .github
    └── workflows
    │   └── auto_mirror.yaml
├── LICENSE
├── 5-2.BERT
    ├── layers.py
    ├── BERT.py
    ├── BERT_pytorch.ipynb
    └── BERT.ipynb
├── 5-1.Transformer
    ├── layers.py
    └── Transformer.py
├── .gitignore
├── README.md
├── 3-2.TextLSTM
    ├── TextLSTM_pytorch.ipynb
    └── TextLSTM.ipynb
├── 1-1.NNLM
    ├── NNLM_pytorch.ipynb
    └── NNLM.ipynb
├── 3-3.Bi-LSTM
    ├── Bi-LSTM_pytorch.ipynb
    └── Bi-LSTM.ipynb
├── 1-2.Word2Vec
    ├── Word2Vec_pytorch.ipynb
    └── Word2Vec.ipynb
├── 3-1.TextRNN
    ├── TextRNN_pytorch.ipynb
    └── TextRNN.ipynb
├── 2-1.TextCNN
    ├── TextCNN_pytorch.ipynb
    └── TextCNN.ipynb
├── 4-3.Bi-LSTM(Attention)
    ├── Bi-LSTM-Attention_pytorch.ipynb
    └── Bi-LSTM-Attention.ipynb
├── 4-1.Seq2Seq
    ├── Seq2Seq_pytorch.ipynb
    └── Seq2Seq.ipynb
└── 4-2.Seq2Seq(Attention)
    ├── Seq2Seq-Attention_pytorch.ipynb
    └── Seq2Seq-Attention.ipynb


/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib


--------------------------------------------------------------------------------
/.github/workflows/auto_mirror.yaml:
--------------------------------------------------------------------------------
 1 | name: Mirroring
 2 | 
 3 | on: [push, delete]
 4 | 
 5 | jobs:
 6 |   sync_to_openi:
 7 |     runs-on: ubuntu-latest
 8 |     steps:                                              # <-- must use actions/checkout before mirroring!
 9 |       - uses: actions/checkout@v2
10 |         with:
11 |           fetch-depth: 0
12 |       - uses: pixta-dev/repository-mirroring-action@v1
13 |         with:
14 |           target_repo_url:
15 |             git@git.openi.org.cn:lvyufeng/mindspore_nlp_tutorial.git
16 |           ssh_private_key:                              # <-- use 'secrets' to pass credential information.
17 |             ${{ secrets.OPENI_SSH_PRIVATE_KEY }}


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 nate.river
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/5-2.BERT/layers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import mindspore
 3 | import mindspore.nn as nn
 4 | from mindspore import Parameter, Tensor
 5 | from mindspore.common.initializer import initializer, HeUniform, Uniform, Normal, _calculate_fan_in_and_fan_out
 6 | 
 7 | class Conv1d(nn.Conv1d):
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=True):
 9 |         super().__init__(in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init='normal', bias_init='zeros')
10 |         self.reset_parameters()
11 |         
12 |     def reset_parameters(self):
13 |         self.weight.set_data(initializer(HeUniform(math.sqrt(5)), self.weight.shape))
14 |         #self.weight = Parameter(initializer(HeUniform(math.sqrt(5)), self.weight.shape), name='weight')
15 |         if self.has_bias:
16 |             fan_in, _ = _calculate_fan_in_and_fan_out(self.weight.shape)
17 |             bound = 1 / math.sqrt(fan_in)
18 |             self.bias.set_data(initializer(Uniform(bound), [self.out_channels]))
19 | 
20 | class Dense(nn.Dense):
21 |     def __init__(self, in_channels, out_channels, has_bias=True, activation=None):
22 |         super().__init__(in_channels, out_channels, weight_init='normal', bias_init='zeros', has_bias=has_bias, activation=activation)
23 |         self.reset_parameters()
24 |         
25 |     def reset_parameters(self):
26 |         self.weight.set_data(initializer(HeUniform(math.sqrt(5)), self.weight.shape))
27 |         if self.has_bias:
28 |             fan_in, _ = _calculate_fan_in_and_fan_out(self.weight.shape)
29 |             bound = 1 / math.sqrt(fan_in)
30 |             self.bias.set_data(initializer(Uniform(bound), [self.out_channels]))
31 | 
32 | class Embedding(nn.Embedding):
33 |     def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
34 |         if embedding_table == 'normal':
35 |             embedding_table = Normal(1.0)
36 |         super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
37 |     @classmethod
38 |     def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
39 |         rows, cols = embeddings.shape
40 |         embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
41 |         embedding.embedding_table.requires_grad = not freeze
42 |         return embedding


--------------------------------------------------------------------------------
/5-1.Transformer/layers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import mindspore
 3 | import mindspore.nn as nn
 4 | from mindspore import Parameter, Tensor
 5 | from mindspore.common.initializer import initializer, HeUniform, Uniform, Normal, _calculate_fan_in_and_fan_out
 6 | 
 7 | class Conv1d(nn.Conv1d):
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=True):
 9 |         super().__init__(in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init='normal', bias_init='zeros')
10 |         self.reset_parameters()
11 |         
12 |     def reset_parameters(self):
13 |         self.weight.set_data(initializer(HeUniform(math.sqrt(5)), self.weight.shape))
14 |         #self.weight = Parameter(initializer(HeUniform(math.sqrt(5)), self.weight.shape), name='weight')
15 |         if self.has_bias:
16 |             fan_in, _ = _calculate_fan_in_and_fan_out(self.weight.shape)
17 |             bound = 1 / math.sqrt(fan_in)
18 |             self.bias.set_data(initializer(Uniform(bound), [self.out_channels]))
19 | 
20 | class Dense(nn.Dense):
21 |     def __init__(self, in_channels, out_channels, has_bias=True, activation=None):
22 |         super().__init__(in_channels, out_channels, weight_init='normal', bias_init='zeros', has_bias=has_bias, activation=activation)
23 |         self.reset_parameters()
24 |         
25 |     def reset_parameters(self):
26 |         self.weight.set_data(initializer(HeUniform(math.sqrt(5)), self.weight.shape))
27 |         if self.has_bias:
28 |             fan_in, _ = _calculate_fan_in_and_fan_out(self.weight.shape)
29 |             bound = 1 / math.sqrt(fan_in)
30 |             self.bias.set_data(initializer(Uniform(bound), [self.out_channels]))
31 | 
32 | class Embedding(nn.Embedding):
33 |     def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mindspore.float32, padding_idx=None):
34 |         if embedding_table == 'normal':
35 |             embedding_table = Normal(1.0)
36 |         super().__init__(vocab_size, embedding_size, use_one_hot, embedding_table, dtype, padding_idx)
37 |     @classmethod
38 |     def from_pretrained_embedding(cls, embeddings:Tensor, freeze=True, padding_idx=None):
39 |         rows, cols = embeddings.shape
40 |         embedding = cls(rows, cols, embedding_table=embeddings, padding_idx=padding_idx)
41 |         embedding.embedding_table.requires_grad = not freeze
42 |         return embedding


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | rank_0/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mindspore-nlp-tutorial
 2 | 
 3 | <p align="center"><img width="400" src="https://raw.githubusercontent.com/mindspore-ai/mindspore/master/docs/MindSpore-logo.png" /></p>
 4 | 
 5 | `mindspore-nlp-tutorial` is a tutorial for who is studying NLP(Natural Language Processing) using **MindSpore**. This repository is migrated from [nlp-tutorial](https://github.com/graykode/nlp-tutorial). Most of the models in NLP were migrated from Pytorch version with less than **100 lines** of code.(except comments or blank lines)
 6 | 
 7 | - **Notice**: All models are tested on CPU(Linux and macOS), GPU and Ascend.
 8 | 
 9 | ## Curriculum - (Example Purpose)
10 | 
11 | #### 1. Basic Embedding Model
12 | 
13 | - 1-1. [NNLM(Neural Network Language Model)](1-1.NNLM) - **Predict Next Word**
14 |   - Paper -  [A Neural Probabilistic Language Model(2003)](http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)
15 | - 1-2. [Word2Vec(Skip-gram)](1-2.Word2Vec) - **Embedding Words and Show Graph**
16 |   - Paper - [Distributed Representations of Words and Phrases
17 |     and their Compositionality(2013)](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)
18 | <!-- - 1-3. [FastText(Application Level)](1-3.FastText) - **Sentence Classification**
19 |   - Paper - [Bag of Tricks for Efficient Text Classification(2016)](https://arxiv.org/pdf/1607.01759.pdf)
20 |   - Colab - [FastText.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-3.FastText/FastText.ipynb)  -->
21 | 
22 | 
23 | 
24 | #### 2. CNN(Convolutional Neural Network)
25 | 
26 | - 2-1. [TextCNN](2-1.TextCNN) - **Binary Sentiment Classification**
27 |   - Paper - [Convolutional Neural Networks for Sentence Classification(2014)](http://www.aclweb.org/anthology/D14-1181)
28 | 
29 | 
30 | 
31 | #### 3. RNN(Recurrent Neural Network)
32 | 
33 | - 3-1. [TextRNN](3-1.TextRNN) - **Predict Next Step**
34 |   - Paper - [Finding Structure in Time(1990)](http://psych.colorado.edu/~kimlab/Elman1990.pdf)
35 | - 3-2. [TextLSTM](3-2.TextLSTM) - **Autocomplete**
36 |   - Paper - [LONG SHORT-TERM MEMORY(1997)](https://www.bioinf.jku.at/publications/older/2604.pdf)
37 | - 3-3. [Bi-LSTM](3-3.Bi-LSTM) - **Predict Next Word in Long Sentence**
38 | 
39 | 
40 | #### 4. Attention Mechanism
41 | 
42 | - 4-1. [Seq2Seq](4-1.Seq2Seq) - **Change Word**
43 |   - Paper - [Learning Phrase Representations using RNN Encoder–Decoder
44 |     for Statistical Machine Translation(2014)](https://arxiv.org/pdf/1406.1078.pdf)
45 | - 4-2. [Seq2Seq with Attention](4-2.Seq2Seq(Attention)) - **Translate**
46 |   - Paper - [Neural Machine Translation by Jointly Learning to Align and Translate(2014)](https://arxiv.org/abs/1409.0473)
47 | - 4-3. [Bi-LSTM with Attention](4-3.Bi-LSTM(Attention)) - **Binary Sentiment Classification**
48 | 
49 | #### 5. Model based on Transformer
50 | 
51 | - 5-1.  [The Transformer](5-1.Transformer) - **Translate**
52 |   - Paper - [Attention Is All You Need(2017)](https://arxiv.org/abs/1706.03762)
53 | 
54 | - 5-2. [BERT](5-2.BERT) - **Classification Next Sentence & Predict Masked Tokens**
55 |   - Paper - [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding(2018)](https://arxiv.org/abs/1810.04805)
56 | 
57 | ## Dependencies
58 | 
59 | - Python >= 3.7.5
60 | - MindSpore 1.9.0
61 | - Pytorch 1.7.1(for comparation)
62 | 
63 | ## Author
64 | 
65 | - Yufeng Lyu
66 | - Author Email : lvyufeng2007@hotmail.com
67 | - Acknowledgements to [graykode](https://github.com/graykode) who opensource the Pytorch and Tensorflow version.
68 | 


--------------------------------------------------------------------------------
/3-2.TextLSTM/TextLSTM_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "collect-government",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# %%\n",
 11 |     "# code by Tae Hwan Jung @graykode\n",
 12 |     "import numpy as np\n",
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torch.optim as optim\n",
 16 |     "\n",
 17 |     "def make_batch():\n",
 18 |     "    input_batch, target_batch = [], []\n",
 19 |     "\n",
 20 |     "    for seq in seq_data:\n",
 21 |     "        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\n",
 22 |     "        target = word_dict[seq[-1]] # 'e' is target\n",
 23 |     "        input_batch.append(np.eye(n_class)[input])\n",
 24 |     "        target_batch.append(target)\n",
 25 |     "\n",
 26 |     "    return input_batch, target_batch\n",
 27 |     "\n",
 28 |     "class TextLSTM(nn.Module):\n",
 29 |     "    def __init__(self):\n",
 30 |     "        super(TextLSTM, self).__init__()\n",
 31 |     "\n",
 32 |     "        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)\n",
 33 |     "        self.W = nn.Linear(n_hidden, n_class, bias=False)\n",
 34 |     "        self.b = nn.Parameter(torch.ones([n_class]))\n",
 35 |     "\n",
 36 |     "    def forward(self, X):\n",
 37 |     "        input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]\n",
 38 |     "\n",
 39 |     "        hidden_state = torch.zeros(1, len(X), n_hidden)  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 40 |     "        cell_state = torch.zeros(1, len(X), n_hidden)     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 41 |     "\n",
 42 |     "        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n",
 43 |     "        outputs = outputs[-1]  # [batch_size, n_hidden]\n",
 44 |     "        model = self.W(outputs) + self.b  # model : [batch_size, n_class]\n",
 45 |     "        return model\n",
 46 |     "\n",
 47 |     "if __name__ == '__main__':\n",
 48 |     "    n_step = 3 # number of cells(= number of Step)\n",
 49 |     "    n_hidden = 128 # number of hidden units in one cell\n",
 50 |     "\n",
 51 |     "    char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\n",
 52 |     "    word_dict = {n: i for i, n in enumerate(char_arr)}\n",
 53 |     "    number_dict = {i: w for i, w in enumerate(char_arr)}\n",
 54 |     "    n_class = len(word_dict)  # number of class(=number of vocab)\n",
 55 |     "\n",
 56 |     "    seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']\n",
 57 |     "\n",
 58 |     "    model = TextLSTM()\n",
 59 |     "\n",
 60 |     "    criterion = nn.CrossEntropyLoss()\n",
 61 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 62 |     "\n",
 63 |     "    input_batch, target_batch = make_batch()\n",
 64 |     "    input_batch = torch.FloatTensor(input_batch)\n",
 65 |     "    target_batch = torch.LongTensor(target_batch)\n",
 66 |     "\n",
 67 |     "    # Training\n",
 68 |     "    for epoch in range(1000):\n",
 69 |     "        optimizer.zero_grad()\n",
 70 |     "\n",
 71 |     "        output = model(input_batch)\n",
 72 |     "        loss = criterion(output, target_batch)\n",
 73 |     "        if (epoch + 1) % 100 == 0:\n",
 74 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 75 |     "\n",
 76 |     "        loss.backward()\n",
 77 |     "        optimizer.step()\n",
 78 |     "\n",
 79 |     "    inputs = [sen[:3] for sen in seq_data]\n",
 80 |     "\n",
 81 |     "    predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
 82 |     "    print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])\n"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "id": "imposed-facility",
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": []
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.7.5"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 5
115 | }
116 | 


--------------------------------------------------------------------------------
/1-1.NNLM/NNLM_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "hired-pride",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# code by Tae Hwan Jung @graykode\n",
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.optim as optim\n",
 14 |     "\n",
 15 |     "def make_batch():\n",
 16 |     "    input_batch = []\n",
 17 |     "    target_batch = []\n",
 18 |     "\n",
 19 |     "    for sen in sentences:\n",
 20 |     "        word = sen.split() # space tokenizer\n",
 21 |     "        input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\n",
 22 |     "        target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\n",
 23 |     "\n",
 24 |     "        input_batch.append(input)\n",
 25 |     "        target_batch.append(target)\n",
 26 |     "\n",
 27 |     "    return input_batch, target_batch\n",
 28 |     "\n",
 29 |     "# Model\n",
 30 |     "class NNLM(nn.Module):\n",
 31 |     "    def __init__(self):\n",
 32 |     "        super(NNLM, self).__init__()\n",
 33 |     "        self.C = nn.Embedding(n_class, m)\n",
 34 |     "        self.H = nn.Linear(n_step * m, n_hidden, bias=False)\n",
 35 |     "        self.d = nn.Parameter(torch.ones(n_hidden))\n",
 36 |     "        self.U = nn.Linear(n_hidden, n_class, bias=False)\n",
 37 |     "        self.W = nn.Linear(n_step * m, n_class, bias=False)\n",
 38 |     "        self.b = nn.Parameter(torch.ones(n_class))\n",
 39 |     "\n",
 40 |     "    def forward(self, X):\n",
 41 |     "        X = self.C(X) # X : [batch_size, n_step, m]\n",
 42 |     "        X = X.view(-1, n_step * m) # [batch_size, n_step * m]\n",
 43 |     "        tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]\n",
 44 |     "        output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]\n",
 45 |     "        return output\n",
 46 |     "\n",
 47 |     "if __name__ == '__main__':\n",
 48 |     "    n_step = 2 # number of steps, n-1 in paper\n",
 49 |     "    n_hidden = 2 # number of hidden size, h in paper\n",
 50 |     "    m = 2 # embedding size, m in paper\n",
 51 |     "\n",
 52 |     "    sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
 53 |     "\n",
 54 |     "    word_list = \" \".join(sentences).split()\n",
 55 |     "    word_list = list(set(word_list))\n",
 56 |     "    word_dict = {w: i for i, w in enumerate(word_list)}\n",
 57 |     "    number_dict = {i: w for i, w in enumerate(word_list)}\n",
 58 |     "    n_class = len(word_dict)  # number of Vocabulary\n",
 59 |     "\n",
 60 |     "    model = NNLM()\n",
 61 |     "\n",
 62 |     "    input_batch, target_batch = make_batch()\n",
 63 |     "    input_batch = torch.LongTensor(input_batch)\n",
 64 |     "    target_batch = torch.LongTensor(target_batch)\n",
 65 |     "\n",
 66 |     "    criterion = nn.CrossEntropyLoss()\n",
 67 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 68 |     "\n",
 69 |     "    # Training\n",
 70 |     "    for epoch in range(5000):\n",
 71 |     "        optimizer.zero_grad()\n",
 72 |     "        output = model(input_batch)\n",
 73 |     "\n",
 74 |     "        # output : [batch_size, n_class], target_batch : [batch_size]\n",
 75 |     "        loss = criterion(output, target_batch)\n",
 76 |     "        if (epoch + 1) % 1000 == 0:\n",
 77 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 78 |     "\n",
 79 |     "        loss.backward()\n",
 80 |     "        optimizer.step()\n",
 81 |     "\n",
 82 |     "    # Predict\n",
 83 |     "    predict = model(input_batch)\n",
 84 |     "    predict = predict.data.max(1, keepdim=True)[1]\n",
 85 |     "    # Test\n",
 86 |     "    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "id": "collected-tracy",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": []
 96 |   }
 97 |  ],
 98 |  "metadata": {
 99 |   "kernelspec": {
100 |    "display_name": "Python 3",
101 |    "language": "python",
102 |    "name": "python3"
103 |   },
104 |   "language_info": {
105 |    "codemirror_mode": {
106 |     "name": "ipython",
107 |     "version": 3
108 |    },
109 |    "file_extension": ".py",
110 |    "mimetype": "text/x-python",
111 |    "name": "python",
112 |    "nbconvert_exporter": "python",
113 |    "pygments_lexer": "ipython3",
114 |    "version": "3.7.5"
115 |   }
116 |  },
117 |  "nbformat": 4,
118 |  "nbformat_minor": 5
119 | }
120 | 


--------------------------------------------------------------------------------
/3-3.Bi-LSTM/Bi-LSTM_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "qualified-shaft",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# %%\n",
 11 |     "# code by Tae Hwan Jung @graykode\n",
 12 |     "import numpy as np\n",
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torch.optim as optim\n",
 16 |     "\n",
 17 |     "def make_batch():\n",
 18 |     "    input_batch = []\n",
 19 |     "    target_batch = []\n",
 20 |     "\n",
 21 |     "    words = sentence.split()\n",
 22 |     "    for i, word in enumerate(words[:-1]):\n",
 23 |     "        input = [word_dict[n] for n in words[:(i + 1)]]\n",
 24 |     "        input = input + [0] * (max_len - len(input))\n",
 25 |     "        target = word_dict[words[i + 1]]\n",
 26 |     "        input_batch.append(np.eye(n_class)[input])\n",
 27 |     "        target_batch.append(target)\n",
 28 |     "\n",
 29 |     "    return input_batch, target_batch\n",
 30 |     "\n",
 31 |     "class BiLSTM(nn.Module):\n",
 32 |     "    def __init__(self):\n",
 33 |     "        super(BiLSTM, self).__init__()\n",
 34 |     "\n",
 35 |     "        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)\n",
 36 |     "        self.W = nn.Linear(n_hidden * 2, n_class, bias=False)\n",
 37 |     "        self.b = nn.Parameter(torch.ones([n_class]))\n",
 38 |     "\n",
 39 |     "    def forward(self, X):\n",
 40 |     "        input = X.transpose(0, 1)  # input : [n_step, batch_size, n_class]\n",
 41 |     "\n",
 42 |     "        hidden_state = torch.zeros(1*2, len(X), n_hidden)   # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
 43 |     "        cell_state = torch.zeros(1*2, len(X), n_hidden)     # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
 44 |     "\n",
 45 |     "        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n",
 46 |     "        outputs = outputs[-1]  # [batch_size, n_hidden]\n",
 47 |     "        model = self.W(outputs) + self.b  # model : [batch_size, n_class]\n",
 48 |     "        return model\n",
 49 |     "\n",
 50 |     "if __name__ == '__main__':\n",
 51 |     "    n_hidden = 5 # number of hidden units in one cell\n",
 52 |     "\n",
 53 |     "    sentence = (\n",
 54 |     "        'Lorem ipsum dolor sit amet consectetur adipisicing elit '\n",
 55 |     "        'sed do eiusmod tempor incididunt ut labore et dolore magna '\n",
 56 |     "        'aliqua Ut enim ad minim veniam quis nostrud exercitation'\n",
 57 |     "    )\n",
 58 |     "\n",
 59 |     "    word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\n",
 60 |     "    number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\n",
 61 |     "    n_class = len(word_dict)\n",
 62 |     "    max_len = len(sentence.split())\n",
 63 |     "\n",
 64 |     "    model = BiLSTM()\n",
 65 |     "\n",
 66 |     "    criterion = nn.CrossEntropyLoss()\n",
 67 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 68 |     "\n",
 69 |     "    input_batch, target_batch = make_batch()\n",
 70 |     "    input_batch = torch.FloatTensor(input_batch)\n",
 71 |     "    target_batch = torch.LongTensor(target_batch)\n",
 72 |     "    \n",
 73 |     "    print(input_batch.shape, target_batch.shape)\n",
 74 |     "    # Training\n",
 75 |     "    for epoch in range(10000):\n",
 76 |     "        optimizer.zero_grad()\n",
 77 |     "        output = model(input_batch)\n",
 78 |     "        loss = criterion(output, target_batch)\n",
 79 |     "        if (epoch + 1) % 1000 == 0:\n",
 80 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 81 |     "\n",
 82 |     "        loss.backward()\n",
 83 |     "        optimizer.step()\n",
 84 |     "\n",
 85 |     "    predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
 86 |     "    print(sentence)\n",
 87 |     "    print([number_dict[n.item()] for n in predict.squeeze()])\n"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "advance-dressing",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": []
 97 |   }
 98 |  ],
 99 |  "metadata": {
100 |   "kernelspec": {
101 |    "display_name": "Python 3",
102 |    "language": "python",
103 |    "name": "python3"
104 |   },
105 |   "language_info": {
106 |    "codemirror_mode": {
107 |     "name": "ipython",
108 |     "version": 3
109 |    },
110 |    "file_extension": ".py",
111 |    "mimetype": "text/x-python",
112 |    "name": "python",
113 |    "nbconvert_exporter": "python",
114 |    "pygments_lexer": "ipython3",
115 |    "version": "3.7.5"
116 |   }
117 |  },
118 |  "nbformat": 4,
119 |  "nbformat_minor": 5
120 | }
121 | 


--------------------------------------------------------------------------------
/1-2.Word2Vec/Word2Vec_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# %%\n",
 10 |     "# code by Tae Hwan Jung @graykode\n",
 11 |     "import numpy as np\n",
 12 |     "import torch\n",
 13 |     "import torch.nn as nn\n",
 14 |     "import torch.optim as optim\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "\n",
 17 |     "def random_batch():\n",
 18 |     "    random_inputs = []\n",
 19 |     "    random_labels = []\n",
 20 |     "    random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)\n",
 21 |     "\n",
 22 |     "    for i in random_index:\n",
 23 |     "        random_inputs.append(np.eye(voc_size)[skip_grams[i][0]])  # target\n",
 24 |     "        random_labels.append(skip_grams[i][1])  # context word\n",
 25 |     "\n",
 26 |     "    return random_inputs, random_labels\n",
 27 |     "\n",
 28 |     "# Model\n",
 29 |     "class Word2Vec(nn.Module):\n",
 30 |     "    def __init__(self):\n",
 31 |     "        super(Word2Vec, self).__init__()\n",
 32 |     "        # W and WT is not Traspose relationship\n",
 33 |     "        self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight\n",
 34 |     "        self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight\n",
 35 |     "\n",
 36 |     "    def forward(self, X):\n",
 37 |     "        # X : [batch_size, voc_size]\n",
 38 |     "        hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]\n",
 39 |     "        output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]\n",
 40 |     "        return output_layer\n",
 41 |     "\n",
 42 |     "if __name__ == '__main__':\n",
 43 |     "    batch_size = 2 # mini-batch size\n",
 44 |     "    embedding_size = 2 # embedding size\n",
 45 |     "\n",
 46 |     "    sentences = [\"apple banana fruit\", \"banana orange fruit\", \"orange banana fruit\",\n",
 47 |     "                 \"dog cat animal\", \"cat monkey animal\", \"monkey dog animal\"]\n",
 48 |     "\n",
 49 |     "    word_sequence = \" \".join(sentences).split()\n",
 50 |     "    word_list = \" \".join(sentences).split()\n",
 51 |     "    word_list = list(set(word_list))\n",
 52 |     "    word_dict = {w: i for i, w in enumerate(word_list)}\n",
 53 |     "    voc_size = len(word_list)\n",
 54 |     "\n",
 55 |     "    # Make skip gram of one size window\n",
 56 |     "    skip_grams = []\n",
 57 |     "    for i in range(1, len(word_sequence) - 1):\n",
 58 |     "        target = word_dict[word_sequence[i]]\n",
 59 |     "        context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n",
 60 |     "        for w in context:\n",
 61 |     "            skip_grams.append([target, w])\n",
 62 |     "\n",
 63 |     "    model = Word2Vec()\n",
 64 |     "\n",
 65 |     "    criterion = nn.CrossEntropyLoss()\n",
 66 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 67 |     "\n",
 68 |     "    # Training\n",
 69 |     "    for epoch in range(5000):\n",
 70 |     "        input_batch, target_batch = random_batch()\n",
 71 |     "        input_batch = torch.Tensor(input_batch)\n",
 72 |     "        target_batch = torch.LongTensor(target_batch)\n",
 73 |     "\n",
 74 |     "        optimizer.zero_grad()\n",
 75 |     "        output = model(input_batch)\n",
 76 |     "\n",
 77 |     "        # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)\n",
 78 |     "        loss = criterion(output, target_batch)\n",
 79 |     "        if (epoch + 1) % 1000 == 0:\n",
 80 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 81 |     "\n",
 82 |     "        loss.backward()\n",
 83 |     "        optimizer.step()\n",
 84 |     "\n",
 85 |     "    for i, label in enumerate(word_list):\n",
 86 |     "        W, WT = model.parameters()\n",
 87 |     "        x, y = W[0][i].item(), W[1][i].item()\n",
 88 |     "        plt.scatter(x, y)\n",
 89 |     "        plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\n",
 90 |     "    plt.show()"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.7.5"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 4
115 | }
116 | 


--------------------------------------------------------------------------------
/3-1.TextRNN/TextRNN_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "heated-fighter",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# %%\n",
 11 |     "# code by Tae Hwan Jung @graykode\n",
 12 |     "import numpy as np\n",
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torch.optim as optim\n",
 16 |     "\n",
 17 |     "def make_batch():\n",
 18 |     "    input_batch = []\n",
 19 |     "    target_batch = []\n",
 20 |     "\n",
 21 |     "    for sen in sentences:\n",
 22 |     "        word = sen.split()  # space tokenizer\n",
 23 |     "        input = [word_dict[n] for n in word[:-1]]  # create (1~n-1) as input\n",
 24 |     "        target = word_dict[word[-1]]  # create (n) as target, We usually call this 'casual language model'\n",
 25 |     "\n",
 26 |     "        input_batch.append(np.eye(n_class)[input])\n",
 27 |     "        target_batch.append(target)\n",
 28 |     "\n",
 29 |     "    return input_batch, target_batch\n",
 30 |     "\n",
 31 |     "class TextRNN(nn.Module):\n",
 32 |     "    def __init__(self):\n",
 33 |     "        super(TextRNN, self).__init__()\n",
 34 |     "        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)\n",
 35 |     "        self.W = nn.Linear(n_hidden, n_class, bias=False)\n",
 36 |     "        self.b = nn.Parameter(torch.ones([n_class]))\n",
 37 |     "\n",
 38 |     "    def forward(self, hidden, X):\n",
 39 |     "        X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\n",
 40 |     "        outputs, hidden = self.rnn(X, hidden)\n",
 41 |     "        # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]\n",
 42 |     "        # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 43 |     "        outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]\n",
 44 |     "        model = self.W(outputs) + self.b # model : [batch_size, n_class]\n",
 45 |     "        return model\n",
 46 |     "\n",
 47 |     "if __name__ == '__main__':\n",
 48 |     "    n_step = 2 # number of cells(= number of Step)\n",
 49 |     "    n_hidden = 5 # number of hidden units in one cell\n",
 50 |     "\n",
 51 |     "    sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
 52 |     "\n",
 53 |     "    word_list = \" \".join(sentences).split()\n",
 54 |     "    word_list = list(set(word_list))\n",
 55 |     "    word_dict = {w: i for i, w in enumerate(word_list)}\n",
 56 |     "    number_dict = {i: w for i, w in enumerate(word_list)}\n",
 57 |     "    n_class = len(word_dict)\n",
 58 |     "    batch_size = len(sentences)\n",
 59 |     "\n",
 60 |     "    model = TextRNN()\n",
 61 |     "\n",
 62 |     "    criterion = nn.CrossEntropyLoss()\n",
 63 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 64 |     "\n",
 65 |     "    input_batch, target_batch = make_batch()\n",
 66 |     "    input_batch = torch.FloatTensor(input_batch)\n",
 67 |     "    target_batch = torch.LongTensor(target_batch)\n",
 68 |     "\n",
 69 |     "    # Training\n",
 70 |     "    for epoch in range(5000):\n",
 71 |     "        optimizer.zero_grad()\n",
 72 |     "\n",
 73 |     "        # hidden : [num_layers * num_directions, batch, hidden_size]\n",
 74 |     "        hidden = torch.zeros(1, batch_size, n_hidden)\n",
 75 |     "        # input_batch : [batch_size, n_step, n_class]\n",
 76 |     "        output = model(hidden, input_batch)\n",
 77 |     "\n",
 78 |     "        # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\n",
 79 |     "        loss = criterion(output, target_batch)\n",
 80 |     "        if (epoch + 1) % 1000 == 0:\n",
 81 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 82 |     "\n",
 83 |     "        loss.backward()\n",
 84 |     "        optimizer.step()\n",
 85 |     "\n",
 86 |     "    input = [sen.split()[:2] for sen in sentences]\n",
 87 |     "\n",
 88 |     "    # Predict\n",
 89 |     "    hidden = torch.zeros(1, batch_size, n_hidden)\n",
 90 |     "    predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]\n",
 91 |     "    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "id": "informational-channel",
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": []
101 |   }
102 |  ],
103 |  "metadata": {
104 |   "kernelspec": {
105 |    "display_name": "Python 3",
106 |    "language": "python",
107 |    "name": "python3"
108 |   },
109 |   "language_info": {
110 |    "codemirror_mode": {
111 |     "name": "ipython",
112 |     "version": 3
113 |    },
114 |    "file_extension": ".py",
115 |    "mimetype": "text/x-python",
116 |    "name": "python",
117 |    "nbconvert_exporter": "python",
118 |    "pygments_lexer": "ipython3",
119 |    "version": "3.7.5"
120 |   }
121 |  },
122 |  "nbformat": 4,
123 |  "nbformat_minor": 5
124 | }
125 | 


--------------------------------------------------------------------------------
/2-1.TextCNN/TextCNN_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "healthy-stationery",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# %%\n",
 11 |     "# code by Tae Hwan Jung @graykode\n",
 12 |     "import numpy as np\n",
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torch.optim as optim\n",
 16 |     "import torch.nn.functional as F\n",
 17 |     "\n",
 18 |     "class TextCNN(nn.Module):\n",
 19 |     "    def __init__(self):\n",
 20 |     "        super(TextCNN, self).__init__()\n",
 21 |     "        self.num_filters_total = num_filters * len(filter_sizes)\n",
 22 |     "        self.W = nn.Embedding(vocab_size, embedding_size)\n",
 23 |     "        self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)\n",
 24 |     "        self.Bias = nn.Parameter(torch.ones([num_classes]))\n",
 25 |     "        self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes])\n",
 26 |     "\n",
 27 |     "    def forward(self, X):\n",
 28 |     "        embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]\n",
 29 |     "        embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\n",
 30 |     "\n",
 31 |     "        pooled_outputs = []\n",
 32 |     "        for i, conv in enumerate(self.filter_list):\n",
 33 |     "            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]\n",
 34 |     "            h = F.relu(conv(embedded_chars))\n",
 35 |     "            # mp : ((filter_height, filter_width))\n",
 36 |     "            mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1))\n",
 37 |     "            # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]\n",
 38 |     "            pooled = mp(h).permute(0, 3, 2, 1)\n",
 39 |     "            pooled_outputs.append(pooled)\n",
 40 |     "\n",
 41 |     "        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]\n",
 42 |     "        h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]\n",
 43 |     "        model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]\n",
 44 |     "        return model\n",
 45 |     "\n",
 46 |     "if __name__ == '__main__':\n",
 47 |     "    embedding_size = 2 # embedding size\n",
 48 |     "    sequence_length = 3 # sequence length\n",
 49 |     "    num_classes = 2 # number of classes\n",
 50 |     "    filter_sizes = [2, 2, 2] # n-gram windows\n",
 51 |     "    num_filters = 3 # number of filters\n",
 52 |     "\n",
 53 |     "    # 3 words sentences (=sequence_length is 3)\n",
 54 |     "    sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
 55 |     "    labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n",
 56 |     "\n",
 57 |     "    word_list = \" \".join(sentences).split()\n",
 58 |     "    word_list = list(set(word_list))\n",
 59 |     "    word_dict = {w: i for i, w in enumerate(word_list)}\n",
 60 |     "    vocab_size = len(word_dict)\n",
 61 |     "\n",
 62 |     "    model = TextCNN()\n",
 63 |     "\n",
 64 |     "    criterion = nn.CrossEntropyLoss()\n",
 65 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 66 |     "\n",
 67 |     "    inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n",
 68 |     "    targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\n",
 69 |     "\n",
 70 |     "    # Training\n",
 71 |     "    for epoch in range(5000):\n",
 72 |     "        optimizer.zero_grad()\n",
 73 |     "        output = model(inputs)\n",
 74 |     "\n",
 75 |     "        # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)\n",
 76 |     "        loss = criterion(output, targets)\n",
 77 |     "        if (epoch + 1) % 1000 == 0:\n",
 78 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 79 |     "\n",
 80 |     "        loss.backward()\n",
 81 |     "        optimizer.step()\n",
 82 |     "\n",
 83 |     "    # Test\n",
 84 |     "    test_text = 'sorry hate you'\n",
 85 |     "    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
 86 |     "    test_batch = torch.LongTensor(tests)\n",
 87 |     "\n",
 88 |     "    # Predict\n",
 89 |     "    predict = model(test_batch).data.max(1, keepdim=True)[1]\n",
 90 |     "    if predict[0][0] == 0:\n",
 91 |     "        print(test_text,\"is Bad Mean...\")\n",
 92 |     "    else:\n",
 93 |     "        print(test_text,\"is Good Mean!!\")"
 94 |    ]
 95 |   }
 96 |  ],
 97 |  "metadata": {
 98 |   "kernelspec": {
 99 |    "display_name": "Python 3",
100 |    "language": "python",
101 |    "name": "python3"
102 |   },
103 |   "language_info": {
104 |    "codemirror_mode": {
105 |     "name": "ipython",
106 |     "version": 3
107 |    },
108 |    "file_extension": ".py",
109 |    "mimetype": "text/x-python",
110 |    "name": "python",
111 |    "nbconvert_exporter": "python",
112 |    "pygments_lexer": "ipython3",
113 |    "version": "3.7.5"
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 5
118 | }
119 | 


--------------------------------------------------------------------------------
/4-3.Bi-LSTM(Attention)/Bi-LSTM-Attention_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "# code by Tae Hwan Jung(Jeff Jung) @graykode\n",
  8 |         "# Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py\n",
  9 |         "import numpy as np\n",
 10 |         "import torch\n",
 11 |         "import torch.nn as nn\n",
 12 |         "import torch.optim as optim\n",
 13 |         "import torch.nn.functional as F\n",
 14 |         "import matplotlib.pyplot as plt\n",
 15 |         "\n",
 16 |         "class BiLSTM_Attention(nn.Module):\n",
 17 |         "    def __init__(self):\n",
 18 |         "        super(BiLSTM_Attention, self).__init__()\n",
 19 |         "\n",
 20 |         "        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
 21 |         "        self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)\n",
 22 |         "        self.out = nn.Linear(n_hidden * 2, num_classes)\n",
 23 |         "\n",
 24 |         "    # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix\n",
 25 |         "    def attention_net(self, lstm_output, final_state):\n",
 26 |         "        hidden = final_state.view(-1, n_hidden * 2, 1)   # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]\n",
 27 |         "        attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]\n",
 28 |         "        soft_attn_weights = F.softmax(attn_weights, 1)\n",
 29 |         "        # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]\n",
 30 |         "        context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)\n",
 31 |         "        return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]\n",
 32 |         "\n",
 33 |         "    def forward(self, X):\n",
 34 |         "        input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]\n",
 35 |         "        input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]\n",
 36 |         "\n",
 37 |         "        hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
 38 |         "        cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
 39 |         "\n",
 40 |         "        # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
 41 |         "        output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))\n",
 42 |         "        output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]\n",
 43 |         "        attn_output, attention = self.attention_net(output, final_hidden_state)\n",
 44 |         "        return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]\n",
 45 |         "\n",
 46 |         "if __name__ == '__main__':\n",
 47 |         "    embedding_dim = 2 # embedding size\n",
 48 |         "    n_hidden = 5  # number of hidden units in one cell\n",
 49 |         "    num_classes = 2  # 0 or 1\n",
 50 |         "\n",
 51 |         "    # 3 words sentences (=sequence_length is 3)\n",
 52 |         "    sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
 53 |         "    labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n",
 54 |         "\n",
 55 |         "    word_list = \" \".join(sentences).split()\n",
 56 |         "    word_list = list(set(word_list))\n",
 57 |         "    word_dict = {w: i for i, w in enumerate(word_list)}\n",
 58 |         "    vocab_size = len(word_dict)\n",
 59 |         "\n",
 60 |         "    model = BiLSTM_Attention()\n",
 61 |         "\n",
 62 |         "    criterion = nn.CrossEntropyLoss()\n",
 63 |         "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
 64 |         "\n",
 65 |         "    inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n",
 66 |         "    targets = torch.LongTensor([out for out in labels])  # To using Torch Softmax Loss function\n",
 67 |         "\n",
 68 |         "    # Training\n",
 69 |         "    for epoch in range(5000):\n",
 70 |         "        optimizer.zero_grad()\n",
 71 |         "        output, attention = model(inputs)\n",
 72 |         "        loss = criterion(output, targets)\n",
 73 |         "        if (epoch + 1) % 1000 == 0:\n",
 74 |         "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 75 |         "\n",
 76 |         "        loss.backward()\n",
 77 |         "        optimizer.step()\n",
 78 |         "\n",
 79 |         "    # Test\n",
 80 |         "    test_text = 'sorry hate you'\n",
 81 |         "    tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
 82 |         "    test_batch = torch.LongTensor(tests)\n",
 83 |         "\n",
 84 |         "    # Predict\n",
 85 |         "    predict, _ = model(test_batch)\n",
 86 |         "    predict = predict.data.max(1, keepdim=True)[1]\n",
 87 |         "    if predict[0][0] == 0:\n",
 88 |         "        print(test_text,\"is Bad Mean...\")\n",
 89 |         "    else:\n",
 90 |         "        print(test_text,\"is Good Mean!!\")\n",
 91 |         "\n",
 92 |         "    fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\n",
 93 |         "    ax = fig.add_subplot(1, 1, 1)\n",
 94 |         "    ax.matshow(attention, cmap='viridis')\n",
 95 |         "    ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\n",
 96 |         "    ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\n",
 97 |         "    plt.show()"
 98 |       ],
 99 |       "outputs": [],
100 |       "execution_count": null
101 |     }
102 |   ],
103 |   "metadata": {
104 |     "anaconda-cloud": {},
105 |     "kernelspec": {
106 |       "display_name": "Python 3",
107 |       "language": "python",
108 |       "name": "python3"
109 |     },
110 |     "language_info": {
111 |       "codemirror_mode": {
112 |         "name": "ipython",
113 |         "version": 3
114 |       },
115 |       "file_extension": ".py",
116 |       "mimetype": "text/x-python",
117 |       "name": "python",
118 |       "nbconvert_exporter": "python",
119 |       "pygments_lexer": "ipython3",
120 |       "version": "3.6.1"
121 |     }
122 |   },
123 |   "nbformat": 4,
124 |   "nbformat_minor": 4
125 | }


--------------------------------------------------------------------------------
/4-1.Seq2Seq/Seq2Seq_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "tested-performance",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# code by Tae Hwan Jung @graykode\n",
 11 |     "import argparse\n",
 12 |     "import numpy as np\n",
 13 |     "import torch\n",
 14 |     "import torch.nn as nn\n",
 15 |     "\n",
 16 |     "# S: Symbol that shows starting of decoding input\n",
 17 |     "# E: Symbol that shows starting of decoding output\n",
 18 |     "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
 19 |     "\n",
 20 |     "def make_batch(seq_data, num_dic, n_step):\n",
 21 |     "    input_batch, output_batch, target_batch = [], [], []\n",
 22 |     "\n",
 23 |     "    for seq in seq_data:\n",
 24 |     "        for i in range(2):\n",
 25 |     "            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\n",
 26 |     "\n",
 27 |     "        input = [num_dic[n] for n in seq[0]]\n",
 28 |     "        output = [num_dic[n] for n in ('S' + seq[1])]\n",
 29 |     "        target = [num_dic[n] for n in (seq[1] + 'E')]\n",
 30 |     "\n",
 31 |     "        input_batch.append(np.eye(n_class)[input])\n",
 32 |     "        output_batch.append(np.eye(n_class)[output])\n",
 33 |     "        target_batch.append(target) # not one-hot\n",
 34 |     "\n",
 35 |     "    # make tensor\n",
 36 |     "    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n",
 37 |     "\n",
 38 |     "# Model\n",
 39 |     "class Seq2Seq(nn.Module):\n",
 40 |     "    def __init__(self):\n",
 41 |     "        super(Seq2Seq, self).__init__()\n",
 42 |     "\n",
 43 |     "        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
 44 |     "        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
 45 |     "        self.fc = nn.Linear(n_hidden, n_class)\n",
 46 |     "\n",
 47 |     "    def forward(self, enc_input, enc_hidden, dec_input):\n",
 48 |     "        enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]\n",
 49 |     "        dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]\n",
 50 |     "\n",
 51 |     "        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 52 |     "        _, enc_states = self.enc_cell(enc_input, enc_hidden)\n",
 53 |     "        # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]\n",
 54 |     "        outputs, _ = self.dec_cell(dec_input, enc_states)\n",
 55 |     "\n",
 56 |     "        model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]\n",
 57 |     "        return model\n",
 58 |     "\n",
 59 |     "if __name__ == '__main__':\n",
 60 |     "    n_step = 5\n",
 61 |     "    n_hidden = 128\n",
 62 |     "\n",
 63 |     "    char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\n",
 64 |     "    num_dic = {n: i for i, n in enumerate(char_arr)}\n",
 65 |     "    seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\n",
 66 |     "\n",
 67 |     "    n_class = len(num_dic)\n",
 68 |     "    batch_size = len(seq_data)\n",
 69 |     "\n",
 70 |     "    model = Seq2Seq()\n",
 71 |     "\n",
 72 |     "    criterion = nn.CrossEntropyLoss()\n",
 73 |     "    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
 74 |     "\n",
 75 |     "    input_batch, output_batch, target_batch = make_batch(seq_data, num_dic, n_step)\n",
 76 |     "\n",
 77 |     "    for epoch in range(5000):\n",
 78 |     "        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n",
 79 |     "        hidden = torch.zeros(1, batch_size, n_hidden)\n",
 80 |     "\n",
 81 |     "        optimizer.zero_grad()\n",
 82 |     "        # input_batch : [batch_size, max_len(=n_step, time step), n_class]\n",
 83 |     "        # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]\n",
 84 |     "        # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot\n",
 85 |     "        output = model(input_batch, hidden, output_batch)\n",
 86 |     "        # output : [max_len+1, batch_size, n_class]\n",
 87 |     "        output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]\n",
 88 |     "        loss = 0\n",
 89 |     "        for i in range(0, len(target_batch)):\n",
 90 |     "            # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]\n",
 91 |     "            loss += criterion(output[i], target_batch[i])\n",
 92 |     "        if (epoch + 1) % 1000 == 0:\n",
 93 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
 94 |     "        loss.backward()\n",
 95 |     "        optimizer.step()\n",
 96 |     "\n",
 97 |     "    # Test\n",
 98 |     "    def translate(word):\n",
 99 |     "        input_batch, output_batch, _ = make_batch([[word, 'P' * len(word)]],  num_dic, n_step)\n",
100 |     "        # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n",
101 |     "        hidden = torch.zeros(1, 1, n_hidden)\n",
102 |     "        output = model(input_batch, hidden, output_batch)\n",
103 |     "        # output : [max_len+1(=6), batch_size(=1), n_class]\n",
104 |     "\n",
105 |     "        predict = output.data.max(2, keepdim=True)[1] # select n_class dimension\n",
106 |     "        decoded = [char_arr[i] for i in predict]\n",
107 |     "        end = decoded.index('E')\n",
108 |     "        translated = ''.join(decoded[:end])\n",
109 |     "\n",
110 |     "        return translated.replace('P', '')\n",
111 |     "\n",
112 |     "    print('test')\n",
113 |     "    print('man ->', translate('man'))\n",
114 |     "    print('mans ->', translate('mans'))\n",
115 |     "    print('king ->', translate('king'))\n",
116 |     "    print('black ->', translate('black'))\n",
117 |     "    print('upp ->', translate('upp'))"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "id": "equivalent-preview",
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": []
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "Python 3",
132 |    "language": "python",
133 |    "name": "python3"
134 |   },
135 |   "language_info": {
136 |    "codemirror_mode": {
137 |     "name": "ipython",
138 |     "version": 3
139 |    },
140 |    "file_extension": ".py",
141 |    "mimetype": "text/x-python",
142 |    "name": "python",
143 |    "nbconvert_exporter": "python",
144 |    "pygments_lexer": "ipython3",
145 |    "version": "3.7.5"
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 5
150 | }
151 | 


--------------------------------------------------------------------------------
/3-1.TextRNN/TextRNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "id": "conditional-growing",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import math\n",
 11 |     "import mindspore\n",
 12 |     "import numpy as np\n",
 13 |     "import mindspore.nn as nn\n",
 14 |     "import mindspore.ops as ops\n",
 15 |     "from mindspore import Tensor, Parameter, ms_function"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "id": "wanted-black",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "TextRNN Model:"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 4,
 29 |    "id": "superb-decrease",
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def make_batch(sentences, word_dict, n_class):\n",
 34 |     "    input_batch = []\n",
 35 |     "    target_batch = []\n",
 36 |     "\n",
 37 |     "    for sen in sentences:\n",
 38 |     "        word = sen.split()  # space tokenizer\n",
 39 |     "        input = [word_dict[n] for n in word[:-1]]  # create (1~n-1) as input\n",
 40 |     "        target = word_dict[word[-1]]  # create (n) as target, We usually call this 'casual language model'\n",
 41 |     "\n",
 42 |     "        input_batch.append(np.eye(n_class)[input])\n",
 43 |     "        target_batch.append(target)\n",
 44 |     "\n",
 45 |     "    return input_batch, target_batch"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 5,
 51 |    "id": "suitable-receiver",
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "class TextRNN(nn.Cell):\n",
 56 |     "    def __init__(self, n_class, n_hidden, batch_size):\n",
 57 |     "        super(TextRNN, self).__init__()\n",
 58 |     "        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden, batch_first=True)\n",
 59 |     "        self.W = nn.Dense(n_hidden, n_class, has_bias=False)\n",
 60 |     "        self.b = Parameter(Tensor(np.ones([n_class]), mindspore.float32), 'b')\n",
 61 |     "\n",
 62 |     "    def construct(self, X):\n",
 63 |     "        X = X.swapaxes(0, 1) # X : [n_step, batch_size, n_class]\n",
 64 |     "        outputs, _ = self.rnn(X)\n",
 65 |     "        # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]\n",
 66 |     "        outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]\n",
 67 |     "        model = self.W(outputs)# model : [batch_size, n_class]\n",
 68 |     "        \n",
 69 |     "        return model"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 6,
 75 |    "id": "greenhouse-state",
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "n_step = 2 # number of cells(= number of Step)\n",
 80 |     "n_hidden = 5 # number of hidden units in one cell\n",
 81 |     "\n",
 82 |     "sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
 83 |     "\n",
 84 |     "word_list = \" \".join(sentences).split()\n",
 85 |     "word_list = list(set(word_list))\n",
 86 |     "word_dict = {w: i for i, w in enumerate(word_list)}\n",
 87 |     "number_dict = {i: w for i, w in enumerate(word_list)}\n",
 88 |     "n_class = len(word_dict)\n",
 89 |     "batch_size = len(sentences)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 7,
 95 |    "id": "quantitative-superintendent",
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "model = TextRNN(n_class, n_hidden, batch_size)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 8,
105 |    "id": "enabling-shore",
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "criterion = nn.CrossEntropyLoss()\n",
110 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 9,
116 |    "id": "afraid-pharmacology",
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "input_batch, target_batch = make_batch(sentences, word_dict, n_class)\n",
121 |     "input_batch = Tensor(input_batch, mindspore.float32)\n",
122 |     "target_batch = Tensor(target_batch, mindspore.int32)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 10,
128 |    "id": "e443bf76",
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "def forward(inputs, targets):\n",
133 |     "    logits = model(inputs)\n",
134 |     "    loss = criterion(logits, targets)\n",
135 |     "    return loss"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 11,
141 |    "id": "8d489907",
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 12,
151 |    "id": "9dd3d835",
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "@ms_function\n",
156 |     "def train_step(inputs, targets):\n",
157 |     "    loss, grads = grad_fn(inputs, targets)\n",
158 |     "    optimizer(grads)\n",
159 |     "    return loss"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 13,
165 |    "id": "banner-backup",
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "Epoch: 1000 cost = 0.141270\n",
173 |       "Epoch: 2000 cost = 0.025611\n",
174 |       "Epoch: 3000 cost = 0.010544\n",
175 |       "Epoch: 4000 cost = 0.005329\n",
176 |       "Epoch: 5000 cost = 0.002940\n"
177 |      ]
178 |     }
179 |    ],
180 |    "source": [
181 |     "model.set_train()\n",
182 |     "\n",
183 |     "# Training\n",
184 |     "for epoch in range(5000):\n",
185 |     "    # hidden : [num_layers * num_directions, batch, hidden_size]\n",
186 |     "    loss = train_step(input_batch, target_batch)\n",
187 |     "    if (epoch + 1) % 1000 == 0:\n",
188 |     "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 14,
194 |    "id": "established-solid",
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "name": "stdout",
199 |      "output_type": "stream",
200 |      "text": [
201 |       "[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']\n"
202 |      ]
203 |     }
204 |    ],
205 |    "source": [
206 |     "# Predict\n",
207 |     "predict = model(input_batch).asnumpy().argmax(1)\n",
208 |     "print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
209 |    ]
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "Python 3.7.13 ('ms1.8')",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.7.13"
229 |   },
230 |   "vscode": {
231 |    "interpreter": {
232 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
233 |    }
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 5
238 | }
239 | 


--------------------------------------------------------------------------------
/2-1.TextCNN/TextCNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 11,
  6 |    "id": "confidential-attendance",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import mindspore\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "from mindspore import Parameter, Tensor, ms_function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 12,
 20 |    "id": "promotional-smart",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "class TextCNN(nn.Cell):\n",
 25 |     "    def __init__(self, embedding_size, sequence_length, num_classes, filter_sizes, num_filters, vocab_size):\n",
 26 |     "        super(TextCNN, self).__init__()\n",
 27 |     "        self.num_filters_total = num_filters * len(filter_sizes)\n",
 28 |     "        self.filter_sizes = filter_sizes\n",
 29 |     "        self.sequence_length = sequence_length\n",
 30 |     "        self.W = nn.Embedding(vocab_size, embedding_size)\n",
 31 |     "        self.Weight = nn.Dense(self.num_filters_total, num_classes, has_bias=False)\n",
 32 |     "        self.Bias = Parameter(Tensor(np.ones(num_classes), mindspore.float32), name='bias')\n",
 33 |     "        self.filter_list = nn.CellList()\n",
 34 |     "        for size in filter_sizes:\n",
 35 |     "            seq_cell = nn.SequentialCell([\n",
 36 |     "                nn.Conv2d(1, num_filters, (size, embedding_size), pad_mode='valid'),\n",
 37 |     "                nn.ReLU(),\n",
 38 |     "                nn.MaxPool2d(kernel_size=(sequence_length - size + 1, 1))\n",
 39 |     "            ])\n",
 40 |     "            self.filter_list.append(seq_cell)\n",
 41 |     "\n",
 42 |     "    def construct(self, X):\n",
 43 |     "        embedded_chars = self.W(X)\n",
 44 |     "        embedded_chars = embedded_chars.expand_dims(1)\n",
 45 |     "        pooled_outputs = []\n",
 46 |     "        for conv in self.filter_list:\n",
 47 |     "            pooled = conv(embedded_chars)\n",
 48 |     "            pooled = pooled.transpose((0, 3, 2, 1))\n",
 49 |     "            pooled_outputs.append(pooled)\n",
 50 |     "            \n",
 51 |     "        h_pool = ops.concat(pooled_outputs, len(self.filter_sizes))\n",
 52 |     "        h_pool_flat = h_pool.view(-1, self.num_filters_total)\n",
 53 |     "        model = self.Weight(h_pool_flat) + self.Bias\n",
 54 |     "        return model"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 13,
 60 |    "id": "prime-lindsay",
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "\n",
 65 |     "embedding_size = 2\n",
 66 |     "sequence_length = 3\n",
 67 |     "num_classes = 2\n",
 68 |     "filter_sizes = [2, 2, 2]\n",
 69 |     "num_filters = 3\n",
 70 |     "\n",
 71 |     "sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \" i hate you\", \"sorry for that\", \"this is awful\"]\n",
 72 |     "labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n",
 73 |     "\n",
 74 |     "word_list = \" \".join(sentences).split()\n",
 75 |     "word_list = list(set(word_list))\n",
 76 |     "word_dict = {w: i for i, w in enumerate(word_list)}\n",
 77 |     "vocab_size = len(word_dict)\n",
 78 |     "\n",
 79 |     "model = TextCNN(embedding_size, sequence_length, num_classes, filter_sizes, num_filters, vocab_size)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 14,
 85 |    "id": "gorgeous-weekly",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "criterion = nn.CrossEntropyLoss()\n",
 90 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 15,
 96 |    "id": "instructional-scheduling",
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "inputs = Tensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences], mindspore.int32)\n",
101 |     "targets = Tensor([out for out in labels], mindspore.int32) "
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 16,
107 |    "id": "hundred-conclusion",
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "def forward(inputs, targets):\n",
112 |     "    logits = model(inputs)\n",
113 |     "    loss = criterion(logits, targets)\n",
114 |     "    return loss"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 17,
120 |    "id": "8bf68174",
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 18,
130 |    "id": "c4ee2913",
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "@ms_function\n",
135 |     "def train_step(inputs, targets):\n",
136 |     "    loss, grads = grad_fn(inputs, targets)\n",
137 |     "    optimizer(grads)\n",
138 |     "    return loss"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 19,
144 |    "id": "interesting-worthy",
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "Epoch: 1000 cost = 0.002617\n",
152 |       "Epoch: 2000 cost = 0.000449\n",
153 |       "Epoch: 3000 cost = 0.000152\n",
154 |       "Epoch: 4000 cost = 0.000066\n",
155 |       "Epoch: 5000 cost = 0.000031\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "model.set_train()\n",
161 |     "\n",
162 |     "epoch = 5000\n",
163 |     "for step in range(epoch):\n",
164 |     "    loss = train_step(inputs, targets)\n",
165 |     "    \n",
166 |     "    if (step + 1) % 1000 == 0:\n",
167 |     "        print('Epoch:', '%04d' % (step + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 20,
173 |    "id": "decent-breakdown",
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "sorry hate you is Bad Mean...\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "test_text = 'sorry hate you'\n",
186 |     "tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
187 |     "test_batch = Tensor(tests, mindspore.int32)\n",
188 |     "\n",
189 |     "# Predict\n",
190 |     "predict = model(test_batch).asnumpy().argmax(1)\n",
191 |     "if predict[0] == 0:\n",
192 |     "    print(test_text,\"is Bad Mean...\")\n",
193 |     "else:\n",
194 |     "    print(test_text,\"is Good Mean!!\")"
195 |    ]
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "Python 3.7.13 ('ms1.8')",
201 |    "language": "python",
202 |    "name": "python3"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 3
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython3",
214 |    "version": "3.7.13"
215 |   },
216 |   "vscode": {
217 |    "interpreter": {
218 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
219 |    }
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 5
224 | }
225 | 


--------------------------------------------------------------------------------
/3-2.TextLSTM/TextLSTM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "dying-communications",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import mindspore\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "from mindspore import Parameter, Tensor, ms_function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "suited-southeast",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "def make_batch(seq_data, word_dict,vocab_size):\n",
 25 |     "    input_batch, target_batch = [], []\n",
 26 |     "\n",
 27 |     "    for seq in seq_data:\n",
 28 |     "        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\n",
 29 |     "        target = word_dict[seq[-1]] # 'e' is target\n",
 30 |     "        input_batch.append(np.eye(vocab_size)[input])\n",
 31 |     "        target_batch.append(target)\n",
 32 |     "\n",
 33 |     "    return input_batch, target_batch"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "id": "saving-print",
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "class TextLSTM(nn.Cell):\n",
 44 |     "    def __init__(self, batch_size, vocab_size, hidden_size):\n",
 45 |     "        super(TextLSTM,self).__init__()\n",
 46 |     "        self.lstm = nn.LSTM(input_size=vocab_size, hidden_size=hidden_size)\n",
 47 |     "        self.W = nn.Dense(hidden_size, vocab_size, has_bias=False)\n",
 48 |     "        self.b = Parameter(Tensor(np.ones(vocab_size), mindspore.float32), 'b')\n",
 49 |     "        \n",
 50 |     "        self.n_steps = n_steps\n",
 51 |     "\n",
 52 |     "    def construct(self, X):\n",
 53 |     "        input = X.transpose((1, 0, 2))  \n",
 54 |     "        outputs, (_, _) = self.lstm(input)\n",
 55 |     "        outputs = outputs[-1] \n",
 56 |     "        model = self.W(outputs) + self.b  \n",
 57 |     "        return model"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "id": "changed-facial",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "n_steps = 3 \n",
 68 |     "hidden_size = 128 \n",
 69 |     "\n",
 70 |     "char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\n",
 71 |     "word_dict = {n: i for i, n in enumerate(char_arr)}\n",
 72 |     "number_dict = {i: w for i, w in enumerate(char_arr)}\n",
 73 |     "vocab_size = len(word_dict)  \n",
 74 |     "\n",
 75 |     "seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 5,
 81 |    "id": "growing-stroke",
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "input_batch, target_batch = make_batch(seq_data, word_dict, vocab_size)\n",
 86 |     "input_batch = Tensor(input_batch, mindspore.float32)\n",
 87 |     "target_batch = Tensor(target_batch, mindspore.int32)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 6,
 93 |    "id": "happy-medline",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "batch_size = len(input_batch)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 7,
103 |    "id": "fancy-detroit",
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "model = TextLSTM(batch_size, vocab_size, hidden_size)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 8,
113 |    "id": "electronic-mirror",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "criterion = nn.CrossEntropyLoss()\n",
118 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 9,
124 |    "id": "suspended-shuttle",
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "def forward(inputs, targets):\n",
129 |     "    logits = model(inputs)\n",
130 |     "    loss = criterion(logits, targets)\n",
131 |     "    return loss"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 10,
137 |    "id": "6d65e68d",
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 11,
147 |    "id": "da270df2",
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "@ms_function\n",
152 |     "def train_step(inputs, targets):\n",
153 |     "    loss, grads = grad_fn(inputs, targets)\n",
154 |     "    optimizer(grads)\n",
155 |     "    return loss"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 12,
161 |    "id": "sublime-exercise",
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "name": "stdout",
166 |      "output_type": "stream",
167 |      "text": [
168 |       "Epoch: 0100 cost =  1.123684\n",
169 |       "Epoch: 0200 cost =  0.125290\n",
170 |       "Epoch: 0300 cost =  0.027129\n",
171 |       "Epoch: 0400 cost =  0.010317\n",
172 |       "Epoch: 0500 cost =  0.005415\n",
173 |       "Epoch: 0600 cost =  0.003387\n",
174 |       "Epoch: 0700 cost =  0.002342\n",
175 |       "Epoch: 0800 cost =  0.001727\n",
176 |       "Epoch: 0900 cost =  0.001330\n",
177 |       "Epoch: 1000 cost =  0.001058\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "model.set_train()\n",
183 |     "# Training\n",
184 |     "epoch = 1000\n",
185 |     "for step in range(epoch):\n",
186 |     "    loss = train_step(input_batch, target_batch)\n",
187 |     "    if (step + 1) % 100 == 0:\n",
188 |     "        print('Epoch:', '%04d' % (step + 1), 'cost = ', '{:.6f}'.format(loss.asnumpy()))"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 13,
194 |    "id": "seven-tunisia",
195 |    "metadata": {},
196 |    "outputs": [],
197 |    "source": [
198 |     "inputs = [sen[:3] for sen in seq_data]"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 14,
204 |    "id": "norwegian-mounting",
205 |    "metadata": {},
206 |    "outputs": [
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "['mak', 'nee', 'coa', 'wor', 'lov', 'hat', 'liv', 'hom', 'has', 'sta'] -> ['e', 'd', 'l', 'd', 'e', 'e', 'e', 'e', 'h', 'r']\n"
212 |      ]
213 |     }
214 |    ],
215 |    "source": [
216 |     "predict = model(input_batch).asnumpy().argmax(axis=1)\n",
217 |     "print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])"
218 |    ]
219 |   }
220 |  ],
221 |  "metadata": {
222 |   "kernelspec": {
223 |    "display_name": "Python 3.7.13 ('ms1.8')",
224 |    "language": "python",
225 |    "name": "python3"
226 |   },
227 |   "language_info": {
228 |    "codemirror_mode": {
229 |     "name": "ipython",
230 |     "version": 3
231 |    },
232 |    "file_extension": ".py",
233 |    "mimetype": "text/x-python",
234 |    "name": "python",
235 |    "nbconvert_exporter": "python",
236 |    "pygments_lexer": "ipython3",
237 |    "version": "3.7.13"
238 |   },
239 |   "vscode": {
240 |    "interpreter": {
241 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
242 |    }
243 |   }
244 |  },
245 |  "nbformat": 4,
246 |  "nbformat_minor": 5
247 | }
248 | 


--------------------------------------------------------------------------------
/1-1.NNLM/NNLM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 50,
  6 |    "id": "celtic-passenger",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import mindspore\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "from mindspore import Parameter, Tensor, ms_function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 51,
 20 |    "id": "lined-travel",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "def make_batch(sentences, word_dict):\n",
 25 |     "    input_batch = []\n",
 26 |     "    target_batch = []\n",
 27 |     "    \n",
 28 |     "    for sent in sentences:\n",
 29 |     "        word = sent.split()\n",
 30 |     "        inp = [word_dict[n] for n in word[:-1]]\n",
 31 |     "        tgt = word_dict[word[-1]]\n",
 32 |     "        \n",
 33 |     "        input_batch.append(inp)\n",
 34 |     "        target_batch.append(tgt)\n",
 35 |     "    return input_batch, target_batch"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 52,
 41 |    "id": "opened-guinea",
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "class NNLM(nn.Cell):\n",
 46 |     "    def __init__(self, n_steps, vocab_size, embed_size, hidden_size):\n",
 47 |     "        super().__init__()\n",
 48 |     "        self.C = nn.Embedding(vocab_size, embed_size)\n",
 49 |     "        self.H = nn.Dense(n_steps * embed_size, hidden_size, has_bias=False)\n",
 50 |     "        self.d = Parameter(Tensor(np.ones(hidden_size), mindspore.float32), name='d')\n",
 51 |     "        self.U = nn.Dense(hidden_size, vocab_size, has_bias=False)\n",
 52 |     "        self.W = nn.Dense(n_steps * embed_size, vocab_size, has_bias=False)\n",
 53 |     "        self.b = Parameter(Tensor(np.ones(vocab_size), mindspore.float32), name='b')\n",
 54 |     "        self.n_steps = n_steps\n",
 55 |     "        self.embed_size = embed_size\n",
 56 |     "        self.tanh = nn.Tanh()\n",
 57 |     "\n",
 58 |     "    def construct(self, X):\n",
 59 |     "        X = self.C(X)\n",
 60 |     "        X = X.view(-1, self.n_steps * self.embed_size)\n",
 61 |     "        tanh = self.tanh(self.d + self.H(X))\n",
 62 |     "        output = self.b + self.W(X) + self.U(tanh)\n",
 63 |     "        return output\n",
 64 |     "        "
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 53,
 70 |    "id": "boolean-outline",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "n_steps = 2\n",
 75 |     "hidden_size = 2\n",
 76 |     "embed_size = 2\n",
 77 |     "\n",
 78 |     "sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
 79 |     "\n",
 80 |     "word_list = \" \".join(sentences).split()\n",
 81 |     "word_list = list(set(word_list))\n",
 82 |     "word_dict = {w: i for i, w in enumerate(word_list)}\n",
 83 |     "number_dict = {i: w for i, w in enumerate(word_list)}\n",
 84 |     "vocab_size = len(word_dict)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 54,
 90 |    "id": "vocational-adaptation",
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "data": {
 95 |       "text/plain": [
 96 |        "Tensor(shape=[3], dtype=Int32, value= [1, 6, 3])"
 97 |       ]
 98 |      },
 99 |      "execution_count": 54,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "input_batch, target_batch = make_batch(sentences, word_dict)\n",
106 |     "input_batch = Tensor(input_batch, mindspore.int32)\n",
107 |     "target_batch = Tensor(target_batch, mindspore.int32)\n",
108 |     "target_batch"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 55,
114 |    "id": "certain-spouse",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "model = NNLM(n_steps, vocab_size, embed_size, hidden_size)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 56,
124 |    "id": "municipal-hypothetical",
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "criterion = nn.CrossEntropyLoss()\n",
129 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 57,
135 |    "id": "f1a65d23",
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "def forward(inputs, targets):\n",
140 |     "    logits = model(inputs)\n",
141 |     "    loss = criterion(logits, targets)\n",
142 |     "    return loss"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 58,
148 |    "id": "6121b71e",
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 59,
158 |    "id": "ff2c4e89",
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "@ms_function\n",
163 |     "def train_step(inputs, targets):\n",
164 |     "    loss, grads = grad_fn(inputs, targets)\n",
165 |     "    optimizer(grads)\n",
166 |     "    return loss"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 60,
172 |    "id": "efficient-slope",
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "Epoch: 1000 cost =  0.159208\n",
180 |       "Epoch: 2000 cost =  0.016804\n",
181 |       "Epoch: 3000 cost =  0.005246\n",
182 |       "Epoch: 4000 cost =  0.002221\n",
183 |       "Epoch: 5000 cost =  0.001076\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "model.set_train()\n",
189 |     "\n",
190 |     "epoch = 5000\n",
191 |     "for step in range(epoch):\n",
192 |     "    loss = train_step(input_batch, target_batch)\n",
193 |     "    if (step + 1) % 1000 == 0:\n",
194 |     "        print('Epoch:', '%04d' % (step + 1), 'cost = ', '{:.6f}'.format(loss.asnumpy()))"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 61,
200 |    "id": "hourly-senegal",
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "[1 6 3]\n",
208 |       "[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "model.set_train(False)\n",
214 |     "predict = model(input_batch).asnumpy().argmax(axis=1)\n",
215 |     "print(predict)\n",
216 |     "print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict])"
217 |    ]
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3.7.13 ('ms1.8')",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.7.13"
237 |   },
238 |   "vscode": {
239 |    "interpreter": {
240 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
241 |    }
242 |   }
243 |  },
244 |  "nbformat": 4,
245 |  "nbformat_minor": 5
246 | }
247 | 


--------------------------------------------------------------------------------
/3-3.Bi-LSTM/Bi-LSTM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "handled-script",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import mindspore\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "from mindspore import Parameter, Tensor, ms_function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "worthy-samba",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "def make_batch(sentence, word_dict, n_class, max_len):\n",
 25 |     "    input_batch = []\n",
 26 |     "    target_batch = []\n",
 27 |     "\n",
 28 |     "    words = sentence.split()\n",
 29 |     "    for i, word in enumerate(words[:-1]):\n",
 30 |     "        input = [word_dict[n] for n in words[:(i + 1)]]\n",
 31 |     "        input = input + [0] * (max_len - len(input))\n",
 32 |     "        target = word_dict[words[i + 1]]\n",
 33 |     "        input_batch.append(np.eye(n_class)[input])\n",
 34 |     "        target_batch.append(target)\n",
 35 |     "\n",
 36 |     "    return input_batch, target_batch"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "id": "religious-portland",
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "class BiLSTM(nn.Cell):\n",
 47 |     "    def __init__(self, n_class, n_hidden, batch_size):\n",
 48 |     "        super().__init__()\n",
 49 |     "        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)\n",
 50 |     "        self.W = nn.Dense(n_hidden * 2, n_class, has_bias=False)\n",
 51 |     "        self.b = Parameter(Tensor(np.ones([n_class], dtype=np.float32), mindspore.float32), 'b')\n",
 52 |     "\n",
 53 |     "    def construct(self, X):\n",
 54 |     "        input = X.transpose((1, 0, 2))\n",
 55 |     "        output, (_, _) = self.lstm(input)\n",
 56 |     "        outputs = output[-1]\n",
 57 |     "        model = self.W(outputs) + self.b\n",
 58 |     "        \n",
 59 |     "        return model"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "id": "imposed-waters",
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "n_hidden = 5 # number of hidden units in one cell\n",
 70 |     "\n",
 71 |     "sentence = (\n",
 72 |     "    'Lorem ipsum dolor sit amet consectetur adipisicing elit '\n",
 73 |     "    'sed do eiusmod tempor incididunt ut labore et dolore magna '\n",
 74 |     "    'aliqua Ut enim ad minim veniam quis nostrud exercitation'\n",
 75 |     ")\n",
 76 |     "\n",
 77 |     "word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\n",
 78 |     "number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\n",
 79 |     "n_class = len(word_dict)\n",
 80 |     "max_len = len(sentence.split())\n",
 81 |     "vocab_size = len(word_dict)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 5,
 87 |    "id": "generic-vessel",
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "name": "stdout",
 92 |      "output_type": "stream",
 93 |      "text": [
 94 |       "(26, 27, 27) (26,)\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "input_batch, target_batch = make_batch(sentence, word_dict, n_class, max_len)\n",
100 |     "# print(input_batch, target_batch)\n",
101 |     "input_batch = Tensor(input_batch, mindspore.float32)\n",
102 |     "target_batch = Tensor(target_batch, mindspore.int32)\n",
103 |     "print(input_batch.shape, target_batch.shape)\n",
104 |     "\n",
105 |     "batch_size = len(input_batch)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "id": "random-entertainment",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "model = BiLSTM(n_class, n_hidden, batch_size)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 7,
121 |    "id": "southwest-baltimore",
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "criterion = nn.CrossEntropyLoss()\n",
126 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 8,
132 |    "id": "conventional-munich",
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "def forward(inputs, targets):\n",
137 |     "    logits = model(inputs)\n",
138 |     "    loss = criterion(logits, targets)\n",
139 |     "    return loss"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 9,
145 |    "id": "b9633c3c",
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 10,
155 |    "id": "9b5a2242",
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "@ms_function\n",
160 |     "def train_step(inputs, targets):\n",
161 |     "    loss, grads = grad_fn(inputs, targets)\n",
162 |     "    optimizer(grads)\n",
163 |     "    return loss"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 11,
169 |    "id": "accredited-manual",
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "name": "stdout",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "Epoch: 1000 cost =  2.585795\n",
177 |       "Epoch: 2000 cost =  2.581421\n",
178 |       "Epoch: 3000 cost =  2.569982\n",
179 |       "Epoch: 4000 cost =  2.311544\n",
180 |       "Epoch: 5000 cost =  1.974983\n",
181 |       "Epoch: 6000 cost =  1.053331\n",
182 |       "Epoch: 7000 cost =  0.681154\n",
183 |       "Epoch: 8000 cost =  0.568491\n",
184 |       "Epoch: 9000 cost =  0.448840\n",
185 |       "Epoch: 10000 cost =  0.375638\n"
186 |      ]
187 |     }
188 |    ],
189 |    "source": [
190 |     "model.set_train()\n",
191 |     "\n",
192 |     "epoch = 10000\n",
193 |     "for step in range(epoch):\n",
194 |     "    loss = train_step(input_batch, target_batch)\n",
195 |     "    if (step + 1) % 1000 == 0:\n",
196 |     "        print('Epoch:', '%04d' % (step + 1), 'cost = ', '{:.6f}'.format(loss.asnumpy()))"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 12,
202 |    "id": "revised-description",
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "Lorem ipsum dolor sit amet consectetur adipisicing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation\n",
210 |       "['dolor', 'dolor', 'sit', 'amet', 'consectetur', 'adipisicing', 'elit', 'sed', 'sed', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', 'magna', 'aliqua', 'ad', 'ad', 'ad', 'minim', 'veniam', 'quis', 'nostrud', 'exercitation']\n"
211 |      ]
212 |     }
213 |    ],
214 |    "source": [
215 |     "model.set_train(False)\n",
216 |     "predict = model(input_batch).asnumpy().argmax(axis=1)\n",
217 |     "print(sentence)\n",
218 |     "print([number_dict[n.item()] for n in predict.squeeze()])"
219 |    ]
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 3.7.13 ('ms1.8')",
225 |    "language": "python",
226 |    "name": "python3"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.7.13"
239 |   },
240 |   "vscode": {
241 |    "interpreter": {
242 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
243 |    }
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 5
248 | }
249 | 


--------------------------------------------------------------------------------
/4-2.Seq2Seq(Attention)/Seq2Seq-Attention_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "# code by Tae Hwan Jung @graykode\n",
  8 |         "# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py\n",
  9 |         "import numpy as np\n",
 10 |         "import torch\n",
 11 |         "import torch.nn as nn\n",
 12 |         "import torch.nn.functional as F\n",
 13 |         "import matplotlib.pyplot as plt\n",
 14 |         "\n",
 15 |         "# S: Symbol that shows starting of decoding input\n",
 16 |         "# E: Symbol that shows starting of decoding output\n",
 17 |         "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
 18 |         "\n",
 19 |         "def make_batch():\n",
 20 |         "    input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\n",
 21 |         "    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\n",
 22 |         "    target_batch = [[word_dict[n] for n in sentences[2].split()]]\n",
 23 |         "\n",
 24 |         "    # make tensor\n",
 25 |         "    return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n",
 26 |         "\n",
 27 |         "class Attention(nn.Module):\n",
 28 |         "    def __init__(self):\n",
 29 |         "        super(Attention, self).__init__()\n",
 30 |         "        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
 31 |         "        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
 32 |         "\n",
 33 |         "        # Linear for attention\n",
 34 |         "        self.attn = nn.Linear(n_hidden, n_hidden)\n",
 35 |         "        self.out = nn.Linear(n_hidden * 2, n_class)\n",
 36 |         "\n",
 37 |         "    def forward(self, enc_inputs, hidden, dec_inputs):\n",
 38 |         "        enc_inputs = enc_inputs.transpose(0, 1)  # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]\n",
 39 |         "        dec_inputs = dec_inputs.transpose(0, 1)  # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]\n",
 40 |         "\n",
 41 |         "        # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F\n",
 42 |         "        # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 43 |         "        enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden)\n",
 44 |         "\n",
 45 |         "        trained_attn = []\n",
 46 |         "        hidden = enc_hidden\n",
 47 |         "        n_step = len(dec_inputs)\n",
 48 |         "        model = torch.empty([n_step, 1, n_class])\n",
 49 |         "\n",
 50 |         "        for i in range(n_step):  # each time step\n",
 51 |         "            # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]\n",
 52 |         "            # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]\n",
 53 |         "            dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden)\n",
 54 |         "            attn_weights = self.get_att_weight(dec_output, enc_outputs)  # attn_weights : [1, 1, n_step]\n",
 55 |         "            trained_attn.append(attn_weights.squeeze().data.numpy())\n",
 56 |         "\n",
 57 |         "            # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]\n",
 58 |         "            context = attn_weights.bmm(enc_outputs.transpose(0, 1))\n",
 59 |         "            dec_output = dec_output.squeeze(0)  # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]\n",
 60 |         "            context = context.squeeze(1)  # [1, num_directions(=1) * n_hidden]\n",
 61 |         "            model[i] = self.out(torch.cat((dec_output, context), 1))\n",
 62 |         "\n",
 63 |         "        # make model shape [n_step, n_class]\n",
 64 |         "        return model.transpose(0, 1).squeeze(0), trained_attn\n",
 65 |         "\n",
 66 |         "    def get_att_weight(self, dec_output, enc_outputs):  # get attention weight one 'dec_output' with 'enc_outputs'\n",
 67 |         "        n_step = len(enc_outputs)\n",
 68 |         "        attn_scores = torch.zeros(n_step)  # attn_scores : [n_step]\n",
 69 |         "\n",
 70 |         "        for i in range(n_step):\n",
 71 |         "            attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])\n",
 72 |         "\n",
 73 |         "        # Normalize scores to weights in range 0 to 1\n",
 74 |         "        return F.softmax(attn_scores).view(1, 1, -1)\n",
 75 |         "\n",
 76 |         "    def get_att_score(self, dec_output, enc_output):  # enc_outputs [batch_size, num_directions(=1) * n_hidden]\n",
 77 |         "        score = self.attn(enc_output)  # score : [batch_size, n_hidden]\n",
 78 |         "        return torch.dot(dec_output.view(-1), score.view(-1))  # inner product make scalar value\n",
 79 |         "\n",
 80 |         "if __name__ == '__main__':\n",
 81 |         "    n_step = 5 # number of cells(= number of Step)\n",
 82 |         "    n_hidden = 128 # number of hidden units in one cell\n",
 83 |         "\n",
 84 |         "    sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n",
 85 |         "\n",
 86 |         "    word_list = \" \".join(sentences).split()\n",
 87 |         "    word_list = list(set(word_list))\n",
 88 |         "    word_dict = {w: i for i, w in enumerate(word_list)}\n",
 89 |         "    number_dict = {i: w for i, w in enumerate(word_list)}\n",
 90 |         "    n_class = len(word_dict)  # vocab list\n",
 91 |         "\n",
 92 |         "    # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 93 |         "    hidden = torch.zeros(1, 1, n_hidden)\n",
 94 |         "\n",
 95 |         "    model = Attention()\n",
 96 |         "    criterion = nn.CrossEntropyLoss()\n",
 97 |         "    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
 98 |         "\n",
 99 |         "    input_batch, output_batch, target_batch = make_batch()\n",
100 |         "\n",
101 |         "    # Train\n",
102 |         "    for epoch in range(2000):\n",
103 |         "        optimizer.zero_grad()\n",
104 |         "        output, _ = model(input_batch, hidden, output_batch)\n",
105 |         "\n",
106 |         "        loss = criterion(output, target_batch.squeeze(0))\n",
107 |         "        if (epoch + 1) % 400 == 0:\n",
108 |         "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
109 |         "\n",
110 |         "        loss.backward()\n",
111 |         "        optimizer.step()\n",
112 |         "\n",
113 |         "    # Test\n",
114 |         "    test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]\n",
115 |         "    test_batch = torch.FloatTensor(test_batch)\n",
116 |         "    predict, trained_attn = model(input_batch, hidden, test_batch)\n",
117 |         "    predict = predict.data.max(1, keepdim=True)[1]\n",
118 |         "    print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n",
119 |         "\n",
120 |         "    # Show Attention\n",
121 |         "    fig = plt.figure(figsize=(5, 5))\n",
122 |         "    ax = fig.add_subplot(1, 1, 1)\n",
123 |         "    ax.matshow(trained_attn, cmap='viridis')\n",
124 |         "    ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\n",
125 |         "    ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\n",
126 |         "    plt.show()"
127 |       ],
128 |       "outputs": [],
129 |       "execution_count": null
130 |     }
131 |   ],
132 |   "metadata": {
133 |     "anaconda-cloud": {},
134 |     "kernelspec": {
135 |       "display_name": "Python 3",
136 |       "language": "python",
137 |       "name": "python3"
138 |     },
139 |     "language_info": {
140 |       "codemirror_mode": {
141 |         "name": "ipython",
142 |         "version": 3
143 |       },
144 |       "file_extension": ".py",
145 |       "mimetype": "text/x-python",
146 |       "name": "python",
147 |       "nbconvert_exporter": "python",
148 |       "pygments_lexer": "ipython3",
149 |       "version": "3.6.1"
150 |     }
151 |   },
152 |   "nbformat": 4,
153 |   "nbformat_minor": 4
154 | }


--------------------------------------------------------------------------------
/4-1.Seq2Seq/Seq2Seq.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 12,
  6 |    "id": "metropolitan-married",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import mindspore\n",
 11 |     "import numpy as np\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "from mindspore import Tensor, ms_function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 13,
 20 |    "id": "internal-covering",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "def make_batch(seq_data, num_dic, n_step):\n",
 25 |     "    input_batch, output_batch, target_batch = [], [], []\n",
 26 |     "\n",
 27 |     "    for seq in seq_data:\n",
 28 |     "        for i in range(2):\n",
 29 |     "            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\n",
 30 |     "\n",
 31 |     "        input = [num_dic[n] for n in seq[0]]\n",
 32 |     "        output = [num_dic[n] for n in ('S' + seq[1])]\n",
 33 |     "        target = [num_dic[n] for n in (seq[1] + 'E')]\n",
 34 |     "\n",
 35 |     "        input_batch.append(np.eye(n_class)[input])\n",
 36 |     "        output_batch.append(np.eye(n_class)[output])\n",
 37 |     "        target_batch.append(target) # not one-hot\n",
 38 |     "\n",
 39 |     "    # make tensor\n",
 40 |     "    return Tensor(input_batch, mindspore.float32), Tensor(output_batch, mindspore.float32), Tensor(target_batch, mindspore.int32)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 14,
 46 |    "id": "compressed-resort",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Model\n",
 51 |     "class Seq2Seq(nn.Cell):\n",
 52 |     "    def __init__(self, n_class, n_hidden, dropout):\n",
 53 |     "        super(Seq2Seq, self).__init__()\n",
 54 |     "\n",
 55 |     "        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=dropout)\n",
 56 |     "        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=dropout)\n",
 57 |     "        self.fc = nn.Dense(n_hidden, n_class)\n",
 58 |     "        \n",
 59 |     "        \n",
 60 |     "    def construct(self, enc_input, dec_input):\n",
 61 |     "        enc_input = enc_input.transpose((1, 0, 2)) # enc_input: [max_len(=n_step, time step), batch_size, n_class]\n",
 62 |     "        dec_input = dec_input.transpose((1, 0, 2)) # dec_input: [max_len(=n_step, time step), batch_size, n_class]\n",
 63 |     "\n",
 64 |     "        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 65 |     "        _, enc_states = self.enc_cell(enc_input)\n",
 66 |     "        # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]\n",
 67 |     "        outputs, _ = self.dec_cell(dec_input, enc_states)\n",
 68 |     "\n",
 69 |     "        model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]\n",
 70 |     "        return model"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 15,
 76 |    "id": "impaired-treat",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "n_step = 5\n",
 81 |     "n_hidden = 128\n",
 82 |     "dropout = 0.5\n",
 83 |     "char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\n",
 84 |     "num_dic = {n: i for i, n in enumerate(char_arr)}\n",
 85 |     "seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\n",
 86 |     "\n",
 87 |     "n_class = len(num_dic)\n",
 88 |     "batch_size = len(seq_data)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 16,
 94 |    "id": "structured-external",
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stderr",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "[WARNING] ME(257088:139675582087424,MainProcess):2022-08-12-21:11:28.654.667 [mindspore/nn/layer/rnns.py:392] dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n",
102 |       "[WARNING] ME(257088:139675582087424,MainProcess):2022-08-12-21:11:28.663.657 [mindspore/nn/layer/rnns.py:392] dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "model = Seq2Seq(n_class, n_hidden, dropout)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 17,
113 |    "id": "japanese-platform",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "criterion = nn.CrossEntropyLoss()\n",
118 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 18,
124 |    "id": "governmental-narrative",
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "input_batch, output_batch, target_batch = make_batch(seq_data, num_dic, n_step)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 19,
134 |    "id": "20affee4",
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "def forward(enc_input, dec_input, target):\n",
139 |     "    output = model(enc_input, dec_input)\n",
140 |     "    output = output.transpose((1, 0, 2))\n",
141 |     "    return criterion(output.view(-1, output.shape[-1]), target.view(-1))"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 20,
147 |    "id": "0727166e",
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 21,
157 |    "id": "bd24c88f",
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "@ms_function\n",
162 |     "def train_step(enc_input, dec_input, target):\n",
163 |     "    loss, grads = grad_fn(enc_input, dec_input, target)\n",
164 |     "    optimizer(grads)\n",
165 |     "    return loss"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 22,
171 |    "id": "celtic-variety",
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "Epoch: 1000 cost = 0.000974\n",
179 |       "Epoch: 2000 cost = 0.000262\n",
180 |       "Epoch: 3000 cost = 0.000112\n",
181 |       "Epoch: 4000 cost = 0.000056\n",
182 |       "Epoch: 5000 cost = 0.000030\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "model.set_train()\n",
188 |     "\n",
189 |     "for epoch in range(5000):\n",
190 |     "    # input_batch : [batch_size, max_len(=n_step, time step), n_class]\n",
191 |     "    # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]\n",
192 |     "    # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot\n",
193 |     "    loss = train_step(input_batch, output_batch, target_batch)\n",
194 |     "    if (epoch + 1) % 1000 == 0:\n",
195 |     "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 23,
201 |    "id": "resident-debate",
202 |    "metadata": {},
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "test\n",
209 |       "man -> women\n",
210 |       "mans -> women\n",
211 |       "king -> queen\n",
212 |       "black -> white\n",
213 |       "upp -> down\n"
214 |      ]
215 |     }
216 |    ],
217 |    "source": [
218 |     "model.set_train(False)\n",
219 |     "# Test\n",
220 |     "def translate(word):\n",
221 |     "    input_batch, output_batch, _ = make_batch([[word, 'P' * len(word)]],  num_dic, n_step)\n",
222 |     "    output = model(input_batch, output_batch)\n",
223 |     "    # output : [max_len+1(=6), batch_size(=1), n_class]\n",
224 |     "\n",
225 |     "    predict = output.asnumpy().argmax(2) # select n_class dimension\n",
226 |     "    decoded = [char_arr[i[0]] for i in predict]\n",
227 |     "    end = decoded.index('E')\n",
228 |     "    translated = ''.join(decoded[:end])\n",
229 |     "\n",
230 |     "    return translated.replace('P', '')\n",
231 |     "\n",
232 |     "print('test')\n",
233 |     "print('man ->', translate('man'))\n",
234 |     "print('mans ->', translate('mans'))\n",
235 |     "print('king ->', translate('king'))\n",
236 |     "print('black ->', translate('black'))\n",
237 |     "print('upp ->', translate('upp'))"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "kernelspec": {
243 |    "display_name": "Python 3.7.13 ('ms1.8')",
244 |    "language": "python",
245 |    "name": "python3"
246 |   },
247 |   "language_info": {
248 |    "codemirror_mode": {
249 |     "name": "ipython",
250 |     "version": 3
251 |    },
252 |    "file_extension": ".py",
253 |    "mimetype": "text/x-python",
254 |    "name": "python",
255 |    "nbconvert_exporter": "python",
256 |    "pygments_lexer": "ipython3",
257 |    "version": "3.7.13"
258 |   },
259 |   "vscode": {
260 |    "interpreter": {
261 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
262 |    }
263 |   }
264 |  },
265 |  "nbformat": 4,
266 |  "nbformat_minor": 5
267 | }
268 | 


--------------------------------------------------------------------------------
/5-2.BERT/BERT.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | 
  7 | import re
  8 | from random import *
  9 | import mindspore
 10 | import mindspore.nn as nn
 11 | import mindspore.ops as ops
 12 | import mindspore.numpy as mnp
 13 | from layers import Dense, Embedding
 14 | 
 15 | 
 16 | # In[2]:
 17 | 
 18 | 
 19 | # sample IsNext and NotNext to be same in small batch size
 20 | def make_batch():
 21 |     batch = []
 22 |     positive = negative = 0
 23 |     while positive != batch_size/2 or negative != batch_size/2:
 24 |         tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences
 25 |         tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]
 26 |         input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]
 27 |         segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)
 28 | 
 29 |         # MASK LM
 30 |         n_pred =  min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence
 31 |         cand_maked_pos = [i for i, token in enumerate(input_ids)
 32 |                           if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]
 33 |         shuffle(cand_maked_pos)
 34 |         masked_tokens, masked_pos = [], []
 35 |         for pos in cand_maked_pos[:n_pred]:
 36 |             masked_pos.append(pos)
 37 |             masked_tokens.append(input_ids[pos])
 38 |             if random() < 0.8:  # 80%
 39 |                 input_ids[pos] = word_dict['[MASK]'] # make mask
 40 |             elif random() < 0.5:  # 10%
 41 |                 index = randint(0, vocab_size - 1) # random index in vocabulary
 42 |                 input_ids[pos] = word_dict[number_dict[index]] # replace
 43 | 
 44 |         # Zero Paddings
 45 |         n_pad = maxlen - len(input_ids)
 46 |         input_ids.extend([0] * n_pad)
 47 |         segment_ids.extend([0] * n_pad)
 48 | 
 49 |         # Zero Padding (100% - 15%) tokens
 50 |         if max_pred > n_pred:
 51 |             n_pad = max_pred - n_pred
 52 |             masked_tokens.extend([0] * n_pad)
 53 |             masked_pos.extend([0] * n_pad)
 54 | 
 55 |         if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:
 56 |             batch.append([input_ids, segment_ids, masked_tokens, masked_pos, 1]) # IsNext
 57 |             positive += 1
 58 |         elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:
 59 |             batch.append([input_ids, segment_ids, masked_tokens, masked_pos, 0]) # NotNext
 60 |             negative += 1
 61 |     return batch
 62 | # Proprecessing Finished
 63 | 
 64 | 
 65 | # In[3]:
 66 | 
 67 | 
 68 | def get_attn_pad_mask(seq_q, seq_k):
 69 |     batch_size, len_q = seq_q.shape
 70 |     batch_size, len_k = seq_k.shape
 71 |     
 72 |     pad_attn_mask = ops.equal(seq_k, 0)
 73 |     pad_attn_mask = pad_attn_mask.expand_dims(1) # batch_size x 1 x len_k(=len_q), one is masking
 74 | 
 75 |     return ops.broadcast_to(pad_attn_mask, (batch_size, len_q, len_k)) # batch_size x len_q x len_k
 76 | 
 77 | 
 78 | # In[4]:
 79 | 
 80 | 
 81 | class BertEmbedding(nn.Cell):
 82 |     def __init__(self):
 83 |         super(BertEmbedding, self).__init__()
 84 |         self.tok_embed = Embedding(vocab_size, d_model)  # token embedding
 85 |         self.pos_embed = Embedding(maxlen, d_model)  # position embedding
 86 |         self.seg_embed = Embedding(n_segments, d_model)  # segment(token type) embedding
 87 |         self.norm = nn.LayerNorm([d_model,])
 88 | 
 89 |     def construct(self, x, seg):
 90 |         seq_len = x.shape[1]
 91 |         pos = ops.arange(seq_len, dtype=mindspore.int64)
 92 |         pos = pos.expand_dims(0).expand_as(x)  # (seq_len,) -> (batch_size, seq_len)
 93 |         embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)
 94 |         return self.norm(embedding)
 95 | 
 96 | 
 97 | # In[5]:
 98 | 
 99 | 
100 | class ScaledDotProductAttention(nn.Cell):
101 |     def __init__(self):
102 |         super(ScaledDotProductAttention, self).__init__()
103 |         self.softmax = nn.Softmax(axis=-1)
104 | 
105 |     def construct(self, Q, K, V, attn_mask):
106 |         scores = ops.matmul(Q, K.swapaxes(-1, -2)) / ops.sqrt(ops.scalar_to_tensor(d_k)) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
107 |         scores = scores.masked_fill(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.
108 |         attn = self.softmax(scores)
109 |         context = ops.matmul(attn, V)
110 |         return context, attn
111 | 
112 | 
113 | # In[6]:
114 | 
115 | 
116 | class MultiHeadAttention(nn.Cell):
117 |     def __init__(self):
118 |         super(MultiHeadAttention, self).__init__()
119 |         self.W_Q = Dense(d_model, d_k * n_heads)
120 |         self.W_K = Dense(d_model, d_k * n_heads)
121 |         self.W_V = Dense(d_model, d_v * n_heads)
122 |         self.attn = ScaledDotProductAttention()
123 |         self.out_fc = Dense(n_heads * d_v, d_model)
124 |         self.norm = nn.LayerNorm([d_model,])
125 | 
126 |     def construct(self, Q, K, V, attn_mask):
127 |         # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]
128 |         residual, batch_size = Q, Q.shape[0]
129 |         # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
130 |         q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).swapaxes(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]
131 |         k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).swapaxes(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]
132 |         v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).swapaxes(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]
133 | 
134 |         attn_mask = attn_mask.expand_dims(1)
135 |         attn_mask = ops.tile(attn_mask, (1, n_heads, 1, 1))
136 |         
137 |         # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
138 |         context, attn = self.attn(q_s, k_s, v_s, attn_mask)
139 |         context = context.swapaxes(1, 2).view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]
140 |         output = self.out_fc(context)
141 |         return self.norm(output + residual), attn # output: [batch_size x len_q x d_model]
142 | 
143 | 
144 | # In[7]:
145 | 
146 | 
147 | class PoswiseFeedForwardNet(nn.Cell):
148 |     def __init__(self):
149 |         super(PoswiseFeedForwardNet, self).__init__()
150 |         self.fc1 = Dense(d_model, d_ff)
151 |         self.fc2 = Dense(d_ff, d_model)
152 |         self.activation = nn.GELU(False)
153 | 
154 |     def construct(self, x):
155 |         # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)
156 |         return self.fc2(self.activation(self.fc1(x)))
157 | 
158 | 
159 | # In[8]:
160 | 
161 | 
162 | class EncoderLayer(nn.Cell):
163 |     def __init__(self):
164 |         super(EncoderLayer, self).__init__()
165 |         self.enc_self_attn = MultiHeadAttention()
166 |         self.pos_ffn = PoswiseFeedForwardNet()
167 | 
168 |     def construct(self, enc_inputs, enc_self_attn_mask):
169 |         enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V
170 |         enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]
171 |         return enc_outputs, attn
172 | 
173 | 
174 | # In[9]:
175 | 
176 | 
177 | class BERT(nn.Cell):
178 |     def __init__(self):
179 |         super(BERT, self).__init__()
180 |         self.embedding = BertEmbedding()
181 |         self.layers = nn.CellList([EncoderLayer() for _ in range(n_layers)])
182 |         self.fc = Dense(d_model, d_model)
183 |         self.activ1 = nn.Tanh()
184 |         self.linear = Dense(d_model, d_model)
185 |         self.activ2 = nn.GELU(False)
186 |         self.norm = nn.LayerNorm([d_model,])
187 |         self.classifier = Dense(d_model, 2)
188 |         # decoder is shared with embedding layer
189 |         embed_weight = self.embedding.tok_embed.embedding_table
190 |         n_vocab, n_dim = embed_weight.shape
191 |         self.decoder = Dense(n_dim, n_vocab, has_bias=False)
192 |         self.decoder.weight = embed_weight
193 |         self.decoder_bias = mindspore.Parameter(ops.zeros(n_vocab), 'decoder_bias')
194 | 
195 |     def construct(self, input_ids, segment_ids, masked_pos):
196 |         output = self.embedding(input_ids, segment_ids)
197 |         enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)
198 |         for layer in self.layers:
199 |             output, enc_self_attn = layer(output, enc_self_attn_mask)
200 |         # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]
201 |         # it will be decided by first token(CLS)
202 |         h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]
203 |         logits_clsf = self.classifier(h_pooled) # [batch_size, 2]
204 | 
205 |         masked_pos = ops.tile(masked_pos[:, :, None], (1, 1, output.shape[-1])) # [batch_size, max_pred, d_model]
206 |         # get masked position from final output of transformer.
207 |         h_masked = ops.gather_d(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]
208 |         h_masked = self.norm(self.activ2(self.linear(h_masked)))
209 |         logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]
210 | 
211 |         return logits_lm, logits_clsf
212 | 
213 | 
214 | # In[10]:
215 | 
216 | 
217 | # BERT Parameters
218 | maxlen = 30 # maximum of length
219 | batch_size = 6
220 | max_pred = 5  # max tokens of prediction
221 | n_layers = 6 # number of Encoder of Encoder Layer
222 | n_heads = 12 # number of heads in Multi-Head Attention
223 | d_model = 768 # Embedding Size
224 | d_ff = 768 * 4  # 4*d_model, FeedForward dimension
225 | d_k = d_v = 64  # dimension of K(=Q), V
226 | n_segments = 2
227 | 
228 | 
229 | # In[11]:
230 | 
231 | 
232 | text = (
233 |     'Hello, how are you? I am Romeo.\n'
234 |     'Hello, Romeo My name is Juliet. Nice to meet you.\n'
235 |     'Nice meet you too. How are you today?\n'
236 |     'Great. My baseball team won the competition.\n'
237 |     'Oh Congratulations, Juliet\n'
238 |     'Thanks you Romeo'
239 | )
240 | sentences = re.sub("[.,!?\\-]", '', text.lower()).split('\n')  # filter '.', ',', '?', '!'
241 | word_list = list(set(" ".join(sentences).split()))
242 | word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}
243 | for i, w in enumerate(word_list):
244 |     word_dict[w] = i + 4
245 | number_dict = {i: w for i, w in enumerate(word_dict)}
246 | vocab_size = len(word_dict)
247 | 
248 | token_list = list()
249 | for sentence in sentences:
250 |     arr = [word_dict[s] for s in sentence.split()]
251 |     token_list.append(arr)
252 | 
253 | 
254 | # In[12]:
255 | 
256 | 
257 | model = BERT()
258 | criterion = nn.CrossEntropyLoss()
259 | optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)
260 | 
261 | 
262 | # In[13]:
263 | 
264 | 
265 | def forward(input_ids, segment_ids, masked_pos, masked_tokens, isNext):
266 |     logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)
267 |     loss_lm = criterion(logits_lm.swapaxes(1, 2), masked_tokens.astype(mindspore.int32))
268 |     loss_lm = loss_lm.mean()
269 |     loss_clsf = criterion(logits_clsf, isNext.astype(mindspore.int32))
270 | 
271 |     return loss_lm + loss_clsf
272 | 
273 | 
274 | # In[14]:
275 | 
276 | 
277 | grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)
278 | 
279 | 
280 | # In[15]:
281 | 
282 | 
283 | @mindspore.jit
284 | def train_step(input_ids, segment_ids, masked_pos, masked_tokens, isNext):
285 |     loss, grads = grad_fn(input_ids, segment_ids, masked_pos, masked_tokens, isNext)
286 |     optimizer(grads)
287 |     return loss
288 | 
289 | 
290 | # In[16]:
291 | 
292 | 
293 | batch = make_batch()
294 | input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(mindspore.Tensor, zip(*batch))
295 | 
296 | model.set_train()
297 | for epoch in range(100):
298 |     loss = train_step(input_ids, segment_ids, masked_pos, masked_tokens, isNext) # for sentence classification
299 |     if (epoch + 1) % 10 == 0:
300 |         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))
301 | 
302 | 
303 | # In[ ]:
304 | 
305 | 
306 | # Predict mask tokens ans isNext
307 | input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(mindspore.Tensor, zip(batch[0]))
308 | print(text)
309 | print([number_dict[int(w.asnumpy())] for w in input_ids[0] if number_dict[int(w.asnumpy())] != '[PAD]'])
310 | 
311 | logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)
312 | logits_lm = logits_lm.argmax(2)[0].asnumpy()
313 | print('masked tokens list : ',[pos for pos in masked_tokens[0] if pos != 0])
314 | print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0])
315 | 
316 | logits_clsf = logits_clsf.argmax(1).asnumpy()[0]
317 | print('isNext : ', True if isNext else False)
318 | print('predict isNext : ',True if logits_clsf else False)
319 | 
320 | 


--------------------------------------------------------------------------------
/5-1.Transformer/Transformer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | 
  7 | import mindspore
  8 | import numpy as np
  9 | import mindspore.nn as nn
 10 | import mindspore.ops as ops
 11 | from mindspore import Tensor
 12 | import matplotlib.pyplot as plt
 13 | from layers import Dense, Embedding, Conv1d
 14 | # S: Symbol that shows starting of decoding input
 15 | # E: Symbol that shows starting of decoding output
 16 | # P: Symbol that will fill in blank sequence if current batch data size is short than time steps
 17 | 
 18 | 
 19 | # In[2]:
 20 | 
 21 | 
 22 | def make_batch(sentences, src_vocab, tgt_vocab):
 23 |     input_batch = [[src_vocab[n] for n in sentences[0].split()]]
 24 |     output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]
 25 |     target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]
 26 |     return Tensor(input_batch, mindspore.int32), Tensor(output_batch, mindspore.int32), Tensor(target_batch, mindspore.int32)
 27 | 
 28 | 
 29 | # In[3]:
 30 | 
 31 | 
 32 | def get_sinusoid_encoding_table(n_position, d_model):
 33 |     def cal_angle(position, hid_idx):
 34 |         return position / np.power(10000, 2 * (hid_idx // 2) / d_model)
 35 |     def get_posi_angle_vec(position):
 36 |         return [cal_angle(position, hid_j) for hid_j in range(d_model)]
 37 | 
 38 |     sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
 39 |     sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
 40 |     sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
 41 |     return Tensor(sinusoid_table, mindspore.float32)
 42 | 
 43 | 
 44 | # In[4]:
 45 | 
 46 | 
 47 | def get_attn_pad_mask(seq_q, seq_k):
 48 |     batch_size, len_q = seq_q.shape
 49 |     batch_size, len_k = seq_k.shape
 50 |     
 51 |     pad_attn_mask = seq_k.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking
 52 |     return pad_attn_mask.broadcast_to((batch_size, len_q, len_k))  # batch_size x len_q x len_k
 53 | 
 54 | 
 55 | # In[5]:
 56 | 
 57 | 
 58 | def get_attn_subsequent_mask(seq):
 59 |     attn_shape = [seq.shape[0], seq.shape[1], seq.shape[1]]
 60 |     subsequent_mask = np.triu(np.ones(attn_shape), k=1)
 61 |     subsequent_mask = Tensor.from_numpy(subsequent_mask).to(mindspore.uint8)
 62 |     return subsequent_mask
 63 | 
 64 | 
 65 | # In[6]:
 66 | 
 67 | 
 68 | class ScaledDotProductAttention(nn.Cell):
 69 |     def __init__(self, d_k):
 70 |         super().__init__()
 71 |         self.softmax = nn.Softmax(axis=-1)
 72 |         self.d_k = Tensor(d_k, mindspore.float32)
 73 |         
 74 |     def construct(self, Q, K, V, attn_mask):
 75 |         scores = ops.matmul(Q, K.swapaxes(-1, -2)) / ops.sqrt(self.d_k)# scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
 76 |         scores = scores.masked_fill(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.
 77 |         attn = ops.softmax(scores)
 78 |         context = ops.matmul(attn, V)
 79 |         return context, attn
 80 | 
 81 | 
 82 | # In[7]:
 83 | 
 84 | 
 85 | class MultiHeadAttention(nn.Cell):
 86 |     def __init__(self, d_model, d_k, d_v, n_heads):
 87 |         super().__init__()
 88 |         self.d_k = d_k
 89 |         self.d_v = d_v
 90 |         self.n_heads = n_heads
 91 |         self.W_Q = Dense(d_model, d_k * n_heads)
 92 |         self.W_K = Dense(d_model, d_k * n_heads)
 93 |         self.W_V = Dense(d_model, d_v * n_heads)
 94 |         self.linear = Dense(n_heads * d_v, d_model)
 95 |         self.layer_norm = nn.LayerNorm((d_model, ), epsilon=1e-5)
 96 |         self.attention = ScaledDotProductAttention(d_k)
 97 |         
 98 |     def construct(self, Q, K, V, attn_mask):
 99 |         # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]
100 |         residual, batch_size = Q, Q.shape[0]
101 |         # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
102 |         q_s = self.W_Q(Q).view(batch_size, -1, self.n_heads, self.d_k).swapaxes(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]
103 |         k_s = self.W_K(K).view(batch_size, -1, self.n_heads, self.d_k).swapaxes(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]
104 |         v_s = self.W_V(V).view(batch_size, -1, self.n_heads, self.d_v).swapaxes(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]
105 | 
106 |         attn_mask = attn_mask.unsqueeze(1).tile((1, n_heads, 1, 1)) # attn_mask : [batch_size x n_heads x len_q x len_k]
107 | 
108 |         # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
109 |         context, attn = self.attention(q_s, k_s, v_s, attn_mask)
110 |         context = context.swapaxes(1, 2).view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]
111 |         output = self.linear(context)
112 |         return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]
113 | 
114 | 
115 | # In[8]:
116 | 
117 | 
118 | class PoswiseFeedForward(nn.Cell):
119 |     def __init__(self, d_ff, d_model):
120 |         super().__init__()
121 |         self.conv1 = Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
122 |         self.conv2 = Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
123 |         self.layer_norm = nn.LayerNorm((d_model, ), epsilon=1e-5)
124 |         self.relu = nn.ReLU()
125 |         
126 |     def construct(self, inputs):
127 |         residual = inputs # inputs : [batch_size, len_q, d_model]
128 |         output = self.relu(self.conv1(inputs.swapaxes(1, 2)))
129 |         output = self.conv2(output).swapaxes(1, 2)
130 |         return self.layer_norm(output + residual)
131 | 
132 | 
133 | # In[9]:
134 | 
135 | 
136 | class EncoderLayer(nn.Cell):
137 |     def __init__(self, d_model, d_k, d_v, n_heads, d_ff):
138 |         super().__init__()
139 |         self.enc_self_attn = MultiHeadAttention(d_model, d_k, d_v, n_heads)
140 |         self.pos_ffn = PoswiseFeedForward(d_ff, d_model)
141 |         
142 |     def construct(self, enc_inputs, enc_self_attn_mask):
143 |         enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V
144 |         enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]
145 |         return enc_outputs, attn
146 | 
147 | 
148 | # In[10]:
149 | 
150 | 
151 | class DecoderLayer(nn.Cell):
152 |     def __init__(self, d_model, d_k, d_v, n_heads, d_ff):
153 |         super().__init__()
154 |         self.dec_self_attn = MultiHeadAttention(d_model, d_k, d_v, n_heads)
155 |         self.dec_enc_attn = MultiHeadAttention(d_model, d_k, d_v, n_heads)
156 |         self.pos_ffn = PoswiseFeedForward(d_ff, d_model)
157 |         
158 |     def construct(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):
159 |         dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)
160 |         dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)
161 |         dec_outputs = self.pos_ffn(dec_outputs)
162 |         return dec_outputs, dec_self_attn, dec_enc_attn
163 | 
164 | 
165 | # In[11]:
166 | 
167 | 
168 | class Encoder(nn.Cell):
169 |     def __init__(self, src_vocab_size, d_model, d_k, d_v, n_heads, d_ff, n_layers, src_len):
170 |         super().__init__()
171 |         self.src_emb = Embedding(src_vocab_size, d_model)
172 |         self.pos_emb = Embedding.from_pretrained_embedding(get_sinusoid_encoding_table(src_len+1, d_model), freeze=True)
173 |         self.layers = nn.CellList([EncoderLayer(d_model, d_k, d_v, n_heads, d_ff) for _ in range(n_layers)])
174 |         # temp positional indexes
175 |         self.pos = Tensor([[1, 2, 3, 4, 0]])
176 |         
177 |     def construct(self, enc_inputs):
178 |         # enc_inputs : [batch_size x source_len]
179 |         enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(self.pos)
180 |         enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)
181 |         enc_self_attns = []
182 |         for layer in self.layers:
183 |             enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)
184 |             enc_self_attns.append(enc_self_attn)
185 |         return enc_outputs, enc_self_attns
186 | 
187 | 
188 | # In[12]:
189 | 
190 | 
191 | class Decoder(nn.Cell):
192 |     def __init__(self, tgt_vocab_size, d_model, d_k, d_v, n_heads, d_ff, n_layers, tgt_len):
193 |         super().__init__()
194 |         self.tgt_emb = Embedding(tgt_vocab_size, d_model)
195 |         self.pos_emb = Embedding.from_pretrained_embedding(get_sinusoid_encoding_table(tgt_len+1, d_model), freeze=True)
196 |         self.layers = nn.CellList([DecoderLayer(d_model, d_k, d_v, n_heads, d_ff) for _ in range(n_layers)])
197 | 
198 |     def construct(self, dec_inputs, enc_inputs, enc_outputs):
199 |         # dec_inputs : [batch_size x target_len]
200 |         dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(Tensor([[5,1,2,3,4]]))
201 |         dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)
202 |         dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)
203 |         dec_self_attn_mask = ops.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)
204 |         
205 |         dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)
206 | 
207 |         dec_self_attns, dec_enc_attns = [], []
208 |         for layer in self.layers:
209 |             dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)
210 |             dec_self_attns.append(dec_self_attn)
211 |             dec_enc_attns.append(dec_enc_attn)
212 |         return dec_outputs, dec_self_attns, dec_enc_attns
213 | 
214 | 
215 | # In[13]:
216 | 
217 | 
218 | class Transformer(nn.Cell):
219 |     def __init__(self, d_model, d_k, d_v, n_heads, d_ff, n_layers, src_vocab_size, tgt_vocab_size, src_len, tgt_len):
220 |         super(Transformer, self).__init__()
221 |         self.encoder = Encoder(src_vocab_size, d_model, d_k, d_v, n_heads, d_ff, n_layers, src_len)
222 |         self.decoder = Decoder(tgt_vocab_size, d_model, d_k, d_v, n_heads, d_ff, n_layers, tgt_len)
223 |         self.projection = Dense(d_model, tgt_vocab_size, has_bias=False)
224 | 
225 |     def construct(self, enc_inputs, dec_inputs):
226 |         enc_outputs, enc_self_attns = self.encoder(enc_inputs)
227 |         dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)
228 |         dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]
229 |         return dec_logits.view((-1, dec_logits.shape[-1])), enc_self_attns, dec_self_attns, dec_enc_attns
230 | 
231 | 
232 | # In[14]:
233 | 
234 | 
235 | sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']
236 | 
237 | # Transformer Parameters
238 | # Padding Should be Zero
239 | src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}
240 | src_vocab_size = len(src_vocab)
241 | 
242 | tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}
243 | number_dict = {i: w for i, w in enumerate(tgt_vocab)}
244 | tgt_vocab_size = len(tgt_vocab)
245 | 
246 | src_len = 6 # length of source
247 | tgt_len = 5 # length of target
248 | 
249 | d_model = 512  # Embedding Size
250 | d_ff = 2048  # FeedForward dimension
251 | d_k = d_v = 64  # dimension of K(=Q), V
252 | n_layers = 6  # number of Encoder of Decoder Layer
253 | n_heads = 8  # number of heads in Multi-Head Attention
254 | 
255 | 
256 | # In[15]:
257 | 
258 | 
259 | model = Transformer(d_model, d_k, d_v, n_heads, d_ff, n_layers, src_vocab_size, tgt_vocab_size, src_len, tgt_len)
260 | 
261 | 
262 | # In[16]:
263 | 
264 | 
265 | criterion = nn.CrossEntropyLoss()
266 | optimizer = nn.Adam(model.trainable_params(), learning_rate=0.0001)
267 | # print(model.trainable_params())
268 | enc_inputs, dec_inputs, target_batch = make_batch(sentences, src_vocab, tgt_vocab)
269 | 
270 | 
271 | # In[17]:
272 | 
273 | 
274 | def forward(enc_inputs, dec_inputs, target_batch):
275 |     outputs, _, _, _, = model(enc_inputs, dec_inputs)
276 |     loss = criterion(outputs, target_batch)
277 | 
278 |     return loss
279 | 
280 | 
281 | # In[18]:
282 | 
283 | 
284 | grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)
285 | 
286 | 
287 | # In[19]:
288 | 
289 | 
290 | @mindspore.jit
291 | def train_step(enc_inputs, dec_inputs, target_batch):
292 |     loss, grads = grad_fn(enc_inputs, dec_inputs, target_batch)
293 |     optimizer(grads)
294 |     return loss
295 | 
296 | 
297 | # In[20]:
298 | 
299 | 
300 | model.set_train()
301 | 
302 | # Training
303 | for epoch in range(20):
304 |     # hidden : [num_layers * num_directions, batch, hidden_size]
305 |     loss = train_step(enc_inputs, dec_inputs, target_batch.view(-1))
306 |     print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))
307 | 
308 | 
309 | # In[21]:
310 | 
311 | 
312 | # Test
313 | predict, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)
314 | predict = predict.asnumpy().argmax(1)
315 | print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])
316 | 
317 | 
318 | # In[22]:
319 | 
320 | 
321 | def showgraph(attn):
322 |     attn = attn[-1].squeeze(0)[0]
323 |     attn = attn.asnumpy()
324 |     fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]
325 |     ax = fig.add_subplot(1, 1, 1)
326 |     ax.matshow(attn, cmap='viridis')
327 |     ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)
328 |     ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})
329 |     plt.show()
330 | 
331 | 
332 | # In[ ]:
333 | 
334 | 
335 | print('first head of last state enc_self_attns')
336 | showgraph(enc_self_attns)
337 | 
338 | print('first head of last state dec_self_attns')
339 | showgraph(dec_self_attns)
340 | 
341 | print('first head of last state dec_enc_attns')
342 | showgraph(dec_enc_attns)
343 | 
344 | 


--------------------------------------------------------------------------------
/5-2.BERT/BERT_pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# code by Tae Hwan Jung(Jeff Jung) @graykode\n",
 10 |     "# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n",
 11 |     "#           https://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bert\n",
 12 |     "import math\n",
 13 |     "import re\n",
 14 |     "from random import *\n",
 15 |     "import numpy as np\n",
 16 |     "import torch\n",
 17 |     "import torch.nn as nn\n",
 18 |     "import torch.optim as optim\n",
 19 |     "\n",
 20 |     "# sample IsNext and NotNext to be same in small batch size\n",
 21 |     "def make_batch():\n",
 22 |     "    batch = []\n",
 23 |     "    positive = negative = 0\n",
 24 |     "    while positive != batch_size/2 or negative != batch_size/2:\n",
 25 |     "        tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences\n",
 26 |     "        tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]\n",
 27 |     "        input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]\n",
 28 |     "        segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)\n",
 29 |     "\n",
 30 |     "        # MASK LM\n",
 31 |     "        n_pred =  min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence\n",
 32 |     "        cand_maked_pos = [i for i, token in enumerate(input_ids)\n",
 33 |     "                          if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]\n",
 34 |     "        shuffle(cand_maked_pos)\n",
 35 |     "        masked_tokens, masked_pos = [], []\n",
 36 |     "        for pos in cand_maked_pos[:n_pred]:\n",
 37 |     "            masked_pos.append(pos)\n",
 38 |     "            masked_tokens.append(input_ids[pos])\n",
 39 |     "            if random() < 0.8:  # 80%\n",
 40 |     "                input_ids[pos] = word_dict['[MASK]'] # make mask\n",
 41 |     "            elif random() < 0.5:  # 10%\n",
 42 |     "                index = randint(0, vocab_size - 1) # random index in vocabulary\n",
 43 |     "                input_ids[pos] = word_dict[number_dict[index]] # replace\n",
 44 |     "\n",
 45 |     "        # Zero Paddings\n",
 46 |     "        n_pad = maxlen - len(input_ids)\n",
 47 |     "        input_ids.extend([0] * n_pad)\n",
 48 |     "        segment_ids.extend([0] * n_pad)\n",
 49 |     "\n",
 50 |     "        # Zero Padding (100% - 15%) tokens\n",
 51 |     "        if max_pred > n_pred:\n",
 52 |     "            n_pad = max_pred - n_pred\n",
 53 |     "            masked_tokens.extend([0] * n_pad)\n",
 54 |     "            masked_pos.extend([0] * n_pad)\n",
 55 |     "\n",
 56 |     "        if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:\n",
 57 |     "            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext\n",
 58 |     "            positive += 1\n",
 59 |     "        elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:\n",
 60 |     "            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext\n",
 61 |     "            negative += 1\n",
 62 |     "    return batch\n",
 63 |     "# Proprecessing Finished\n",
 64 |     "\n",
 65 |     "def get_attn_pad_mask(seq_q, seq_k):\n",
 66 |     "    batch_size, len_q = seq_q.size()\n",
 67 |     "    batch_size, len_k = seq_k.size()\n",
 68 |     "    # eq(zero) is PAD token\n",
 69 |     "    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking\n",
 70 |     "    return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k\n",
 71 |     "\n",
 72 |     "def gelu(x):\n",
 73 |     "    \"Implementation of the gelu activation function by Hugging Face\"\n",
 74 |     "    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))\n",
 75 |     "\n",
 76 |     "class Embedding(nn.Module):\n",
 77 |     "    def __init__(self):\n",
 78 |     "        super(Embedding, self).__init__()\n",
 79 |     "        self.tok_embed = nn.Embedding(vocab_size, d_model)  # token embedding\n",
 80 |     "        self.pos_embed = nn.Embedding(maxlen, d_model)  # position embedding\n",
 81 |     "        self.seg_embed = nn.Embedding(n_segments, d_model)  # segment(token type) embedding\n",
 82 |     "        self.norm = nn.LayerNorm(d_model)\n",
 83 |     "\n",
 84 |     "    def forward(self, x, seg):\n",
 85 |     "        seq_len = x.size(1)\n",
 86 |     "        pos = torch.arange(seq_len, dtype=torch.long)\n",
 87 |     "        pos = pos.unsqueeze(0).expand_as(x)  # (seq_len,) -> (batch_size, seq_len)\n",
 88 |     "        embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)\n",
 89 |     "        return self.norm(embedding)\n",
 90 |     "\n",
 91 |     "class ScaledDotProductAttention(nn.Module):\n",
 92 |     "    def __init__(self):\n",
 93 |     "        super(ScaledDotProductAttention, self).__init__()\n",
 94 |     "\n",
 95 |     "    def forward(self, Q, K, V, attn_mask):\n",
 96 |     "        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
 97 |     "        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n",
 98 |     "        attn = nn.Softmax(dim=-1)(scores)\n",
 99 |     "        context = torch.matmul(attn, V)\n",
100 |     "        return context, attn\n",
101 |     "\n",
102 |     "class MultiHeadAttention(nn.Module):\n",
103 |     "    def __init__(self):\n",
104 |     "        super(MultiHeadAttention, self).__init__()\n",
105 |     "        self.W_Q = nn.Linear(d_model, d_k * n_heads)\n",
106 |     "        self.W_K = nn.Linear(d_model, d_k * n_heads)\n",
107 |     "        self.W_V = nn.Linear(d_model, d_v * n_heads)\n",
108 |     "    def forward(self, Q, K, V, attn_mask):\n",
109 |     "        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n",
110 |     "        residual, batch_size = Q, Q.size(0)\n",
111 |     "        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n",
112 |     "        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\n",
113 |     "        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\n",
114 |     "        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\n",
115 |     "\n",
116 |     "        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n",
117 |     "\n",
118 |     "        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
119 |     "        context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n",
120 |     "        context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n",
121 |     "        output = nn.Linear(n_heads * d_v, d_model)(context)\n",
122 |     "        return nn.LayerNorm(d_model)(output + residual), attn # output: [batch_size x len_q x d_model]\n",
123 |     "\n",
124 |     "class PoswiseFeedForwardNet(nn.Module):\n",
125 |     "    def __init__(self):\n",
126 |     "        super(PoswiseFeedForwardNet, self).__init__()\n",
127 |     "        self.fc1 = nn.Linear(d_model, d_ff)\n",
128 |     "        self.fc2 = nn.Linear(d_ff, d_model)\n",
129 |     "\n",
130 |     "    def forward(self, x):\n",
131 |     "        # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)\n",
132 |     "        return self.fc2(gelu(self.fc1(x)))\n",
133 |     "\n",
134 |     "class EncoderLayer(nn.Module):\n",
135 |     "    def __init__(self):\n",
136 |     "        super(EncoderLayer, self).__init__()\n",
137 |     "        self.enc_self_attn = MultiHeadAttention()\n",
138 |     "        self.pos_ffn = PoswiseFeedForwardNet()\n",
139 |     "\n",
140 |     "    def forward(self, enc_inputs, enc_self_attn_mask):\n",
141 |     "        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n",
142 |     "        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n",
143 |     "        return enc_outputs, attn\n",
144 |     "\n",
145 |     "class BERT(nn.Module):\n",
146 |     "    def __init__(self):\n",
147 |     "        super(BERT, self).__init__()\n",
148 |     "        self.embedding = Embedding()\n",
149 |     "        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n",
150 |     "        self.fc = nn.Linear(d_model, d_model)\n",
151 |     "        self.activ1 = nn.Tanh()\n",
152 |     "        self.linear = nn.Linear(d_model, d_model)\n",
153 |     "        self.activ2 = gelu\n",
154 |     "        self.norm = nn.LayerNorm(d_model)\n",
155 |     "        self.classifier = nn.Linear(d_model, 2)\n",
156 |     "        # decoder is shared with embedding layer\n",
157 |     "        embed_weight = self.embedding.tok_embed.weight\n",
158 |     "        n_vocab, n_dim = embed_weight.size()\n",
159 |     "        self.decoder = nn.Linear(n_dim, n_vocab, bias=False)\n",
160 |     "        self.decoder.weight = embed_weight\n",
161 |     "        self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))\n",
162 |     "\n",
163 |     "    def forward(self, input_ids, segment_ids, masked_pos):\n",
164 |     "        output = self.embedding(input_ids, segment_ids)\n",
165 |     "        enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)\n",
166 |     "        for layer in self.layers:\n",
167 |     "            output, enc_self_attn = layer(output, enc_self_attn_mask)\n",
168 |     "        # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]\n",
169 |     "        # it will be decided by first token(CLS)\n",
170 |     "        h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]\n",
171 |     "        logits_clsf = self.classifier(h_pooled) # [batch_size, 2]\n",
172 |     "\n",
173 |     "        masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model]\n",
174 |     "        # get masked position from final output of transformer.\n",
175 |     "        h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]\n",
176 |     "        h_masked = self.norm(self.activ2(self.linear(h_masked)))\n",
177 |     "        logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]\n",
178 |     "\n",
179 |     "        return logits_lm, logits_clsf\n",
180 |     "\n",
181 |     "if __name__ == '__main__':\n",
182 |     "    # BERT Parameters\n",
183 |     "    maxlen = 30 # maximum of length\n",
184 |     "    batch_size = 6\n",
185 |     "    max_pred = 5  # max tokens of prediction\n",
186 |     "    n_layers = 6 # number of Encoder of Encoder Layer\n",
187 |     "    n_heads = 12 # number of heads in Multi-Head Attention\n",
188 |     "    d_model = 768 # Embedding Size\n",
189 |     "    d_ff = 768 * 4  # 4*d_model, FeedForward dimension\n",
190 |     "    d_k = d_v = 64  # dimension of K(=Q), V\n",
191 |     "    n_segments = 2\n",
192 |     "\n",
193 |     "    text = (\n",
194 |     "        'Hello, how are you? I am Romeo.\\n'\n",
195 |     "        'Hello, Romeo My name is Juliet. Nice to meet you.\\n'\n",
196 |     "        'Nice meet you too. How are you today?\\n'\n",
197 |     "        'Great. My baseball team won the competition.\\n'\n",
198 |     "        'Oh Congratulations, Juliet\\n'\n",
199 |     "        'Thanks you Romeo'\n",
200 |     "    )\n",
201 |     "    sentences = re.sub(\"[.,!?\\\\-]\", '', text.lower()).split('\\n')  # filter '.', ',', '?', '!'\n",
202 |     "    word_list = list(set(\" \".join(sentences).split()))\n",
203 |     "    word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}\n",
204 |     "    for i, w in enumerate(word_list):\n",
205 |     "        word_dict[w] = i + 4\n",
206 |     "    number_dict = {i: w for i, w in enumerate(word_dict)}\n",
207 |     "    vocab_size = len(word_dict)\n",
208 |     "\n",
209 |     "    token_list = list()\n",
210 |     "    for sentence in sentences:\n",
211 |     "        arr = [word_dict[s] for s in sentence.split()]\n",
212 |     "        token_list.append(arr)\n",
213 |     "\n",
214 |     "    model = BERT()\n",
215 |     "    criterion = nn.CrossEntropyLoss()\n",
216 |     "    optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
217 |     "\n",
218 |     "    batch = make_batch()\n",
219 |     "    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(*batch))\n",
220 |     "\n",
221 |     "    for epoch in range(100):\n",
222 |     "        optimizer.zero_grad()\n",
223 |     "        logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n",
224 |     "        loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM\n",
225 |     "        loss_lm = (loss_lm.float()).mean()\n",
226 |     "        loss_clsf = criterion(logits_clsf, isNext) # for sentence classification\n",
227 |     "        loss = loss_lm + loss_clsf\n",
228 |     "        if (epoch + 1) % 10 == 0:\n",
229 |     "            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
230 |     "        loss.backward()\n",
231 |     "        optimizer.step()\n",
232 |     "\n",
233 |     "    # Predict mask tokens ans isNext\n",
234 |     "    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(batch[0]))\n",
235 |     "    print(text)\n",
236 |     "    print([number_dict[w.item()] for w in input_ids[0] if number_dict[w.item()] != '[PAD]'])\n",
237 |     "\n",
238 |     "    logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n",
239 |     "    logits_lm = logits_lm.data.max(2)[1][0].data.numpy()\n",
240 |     "    print('masked tokens list : ',[pos.item() for pos in masked_tokens[0] if pos.item() != 0])\n",
241 |     "    print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0])\n",
242 |     "\n",
243 |     "    logits_clsf = logits_clsf.data.max(1)[1].data.numpy()[0]\n",
244 |     "    print('isNext : ', True if isNext else False)\n",
245 |     "    print('predict isNext : ',True if logits_clsf else False)\n"
246 |    ]
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "anaconda-cloud": {},
251 |   "kernelspec": {
252 |    "display_name": "Python 3 (ipykernel)",
253 |    "language": "python",
254 |    "name": "python3"
255 |   },
256 |   "language_info": {
257 |    "codemirror_mode": {
258 |     "name": "ipython",
259 |     "version": 3
260 |    },
261 |    "file_extension": ".py",
262 |    "mimetype": "text/x-python",
263 |    "name": "python",
264 |    "nbconvert_exporter": "python",
265 |    "pygments_lexer": "ipython3",
266 |    "version": "3.9.18"
267 |   }
268 |  },
269 |  "nbformat": 4,
270 |  "nbformat_minor": 4
271 | }
272 | 


--------------------------------------------------------------------------------
/1-2.Word2Vec/Word2Vec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import mindspore\n",
 11 |     "import mindspore.nn as nn\n",
 12 |     "import mindspore.ops as ops\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "from mindspore import Tensor, ms_function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "def random_batch():\n",
 24 |     "    random_inputs = []\n",
 25 |     "    random_labels = []\n",
 26 |     "    random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)\n",
 27 |     "\n",
 28 |     "    for i in random_index:\n",
 29 |     "        random_inputs.append(np.eye(voc_size)[skip_grams[i][0]])  # target\n",
 30 |     "        random_labels.append(skip_grams[i][1])  # context word\n",
 31 |     "\n",
 32 |     "    return random_inputs, random_labels"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "class Word2Vec(nn.Cell):\n",
 42 |     "    def __init__(self, voc_size, embed_size):\n",
 43 |     "        super(Word2Vec, self).__init__()\n",
 44 |     "        # W and WT is not Traspose relationship\n",
 45 |     "        self.W = nn.Dense(voc_size, embed_size, has_bias=False) # voc_size > embedding_size Weight\n",
 46 |     "        self.WT = nn.Dense(embed_size, voc_size, has_bias=False) # embedding_size > voc_size Weight\n",
 47 |     "        \n",
 48 |     "    def construct(self, X):\n",
 49 |     "        # X : [batch_size, voc_size]\n",
 50 |     "        hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]\n",
 51 |     "        output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]\n",
 52 |     "        return output_layer"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 4,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "batch_size = 2 # mini-batch size\n",
 62 |     "embed_size = 2 # embedding size\n",
 63 |     "\n",
 64 |     "sentences = [\"apple banana fruit\", \"banana orange fruit\", \"orange banana fruit\",\n",
 65 |     "             \"dog cat animal\", \"cat monkey animal\", \"monkey dog animal\"]\n",
 66 |     "\n",
 67 |     "word_sequence = \" \".join(sentences).split()\n",
 68 |     "word_list = \" \".join(sentences).split()\n",
 69 |     "word_list = list(set(word_list))\n",
 70 |     "word_dict = {w: i for i, w in enumerate(word_list)}\n",
 71 |     "voc_size = len(word_list)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 5,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# Make skip gram of one size window\n",
 81 |     "skip_grams = []\n",
 82 |     "for i in range(1, len(word_sequence) - 1):\n",
 83 |     "    target = word_dict[word_sequence[i]]\n",
 84 |     "    context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n",
 85 |     "    for w in context:\n",
 86 |     "        skip_grams.append([target, w])"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 6,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "model = Word2Vec(voc_size, embed_size)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 7,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "criterion = nn.CrossEntropyLoss()\n",
105 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 8,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "def forward(inputs, targets):\n",
115 |     "    logits = model(inputs)\n",
116 |     "    loss = criterion(logits, targets)\n",
117 |     "    return loss"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 9,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 10,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "@ms_function\n",
136 |     "def train_step(inputs, targets):\n",
137 |     "    loss, grads = grad_fn(inputs, targets)\n",
138 |     "    optimizer(grads)\n",
139 |     "    return loss"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 11,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "Epoch: 1000 cost =  1.538420\n",
152 |       "Epoch: 2000 cost =  1.268426\n",
153 |       "Epoch: 3000 cost =  1.192565\n",
154 |       "Epoch: 4000 cost =  1.204078\n",
155 |       "Epoch: 5000 cost =  1.086928\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "model.set_train()\n",
161 |     "\n",
162 |     "epoch = 5000\n",
163 |     "for step in range(epoch):\n",
164 |     "    input_batch, target_batch = random_batch()\n",
165 |     "    input_batch = Tensor(input_batch, mindspore.float32)\n",
166 |     "    target_batch = Tensor(target_batch, mindspore.int32)\n",
167 |     "    loss = train_step(input_batch, target_batch)\n",
168 |     "    if (step + 1) % 1000 == 0:\n",
169 |     "        print('Epoch:', '%04d' % (step + 1), 'cost = ', '{:.6f}'.format(loss.asnumpy()))"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 12,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAe7klEQVR4nO3de3RU5b3/8fc3IQQFDVZAqFy1QCEXIASkcjGCVRArQvGuWD2IglS0rb/SxakGz7LLVls5VD0WDipaLFIUhUqLyqWCghI0plwr0silCOF+EZCQ7++PDDlccnUmMxP257XWrOz97Gee5zuzyIedfZkxd0dERM58CbEuQEREokOBLyISEAp8EZGAUOCLiASEAl9EJCAU+CIiARF24JtZCzNbaGarzWyVmY0po4+Z2UQzW29m+WaWGe68IiJSPXUiMEYR8FN3/9jMzgFWmNk77r76hD4DgLahxyXA/4R+iohIlIS9h+/uW93949DyfmANcOEp3QYBL3mJZUBDM2sW7twi8cbMWpvZyir0yzazS6NRk8hxkdjDL2VmrYEuwIenbLoQ2HTC+uZQ29aKxmvUqJG3bt06ghWK1Ky0tDTWr19PVlZWhbewN2vWjISEBLKysqJVmgTEihUrdrh747K2RSzwzawB8BrwgLvvC2OcEcAIgJYtW5KbmxuhCkVOVlBQQP/+/enRowcffPAB3bp148477+SRRx5h+/btTJs2je985zvcddddbNiwgbPPPptJkyaRkZFBTk4OGzduZMOGDWzcuJEHHniA+++/n9/+9reMGzeOo0ePctFFF/Hpp59y9tlnU1xczLZt27jwwgtJTk7m6NGjJCUlkZ+fz7x587j88svZt28fnTp14p///CdJSUmxfnukljKzL8rbFpHAN7MkSsJ+mru/XkaXLUCLE9abh9pO4+6TgElApXtJIuFav349f/7zn3n++efp1q0br7zyCkuWLGH27Nn86le/okWLFnTp0oU33niDBQsWMGzYMPLy8gBYu3YtCxcuZP/+/bRv354+ffrw9NNP07p1a2bMmMHQoUOZOnUqvXr14oorruDWW29l165d3HDDDdx0003cd999rFq1ir179wIwffp0hgwZorCXGhN24JuZAVOANe7+u3K6zQZGm9l0Sk7W7nX3Cg/niERDmzZtSE9PByA1NZV+/fphZqSnp1NQUMAXX3zBa6+9BkDfvn3ZuXMn+/aV/AE7cOBAkpOTSU5OpkmTJsyZM4err76amTNnMmjQIF5//XWOHTvGFVdcwcKFC3n//fepW7cu8+fPZ//+/QAMHz6c3/zmN1x33XW88MILTJ48OTZvhARCJK7D7wncDvQ1s7zQ42ozu9fM7g31mQtsANYDk4FREZhXJGzJycmlywkJCaXrCQkJFBUVVfm5iYmJHDt2DICUlBRatmzJkiVL+PGPf8zdd9/NBRdcwN/+9jcyMzPJy8tj9OjRAPTs2ZOCggIWLVrEsWPHSEtLi/RLFCkV9h6+uy8BrJI+DtwX7lwi0da7d2+mTZvGL3/5SxYtWkSjRo0499xzy+x76aWXcs8995CcnMysWbPo27cv27Zto23btrRp04bx48cD4O7s37+f4x9NPmzYMG655RZ++ctfRu11STDpTluRCuTk5LBixQoyMjIYO3YsU6dOLbdvu3btGD16NAUFBVx66aVcfPHF1KlTh4EDB7J//34KCgrIzc0lNTWVo0ePMmvWLDp37szFF1/M7t27ufnmm6P4yiSILJ6/ACUrK8t1lY6cKf754ZcsffNzDuw6QoNvJfO9QRfT7pKmzJw5kzfffJOXX3451iXKGcDMVrh7mdf7ag9fJAr++eGXLJy2lgO7jgBwYNcR5k/JY/C3v8tPhw3jgZ49Y1yhBIECXyQKlr75OUVfF5/UVpxQl4FX/hd/a9mK+s/9gb1z5sSoOgkKBb5IFBzfsz/VkeRvAeCHD7P9qQlRrEiCSIEvEgUNvpVcZnvykV2ly0VbdWuK1CwFvkgUfG/QxdSpe/KvW8KxI1y8YXbpep1m+jxBqVkR/fA0ESlbu0uaAoSu0jlMvSO7uejzN2m6veQqNKtXjyYPPhDDCiUIFPgiUdLukqalwb93zhy2P/Vvisyo06wZTR58gJQf/CDGFcqZToEvEgMpP/iBAl6iTsfwRUQCQoEvIhIQCnwRkYBQ4IuIBIQCX0QkIBT4IiIBocAXEQkIBb6ISEAo8EVEAkKBLyISEBEJfDN73sy2m9nKcrZnm9leM8sLPR6OxLwiIlJ1kfosnReBp4GXKuiz2N2vidB8IiJSTRHZw3f394BdlXYUEZGYieYx/O+Z2adm9lczSy2vk5mNMLNcM8stLCyMYnkiIme2aAX+x0Ard+8E/B54o7yO7j7J3bPcPatx48ZRKk9E5MwXlcB3933ufiC0PBdIMrNG0ZhbRERKRCXwzaypmVlouXto3p3RmFtEREpE5CodM/sTkA00MrPNwCNAEoC7PwcMBUaaWRFwCLjJ3T0Sc4uISNVEJPDd/eZKtj9NyWWbIiISI7rTVkQkIBT4IiIBocAXEQkIBb6ISEAo8EVEAkKBLyISEAp8EZGAUOCLiASEAl9EJCAU+CIiAaHAFxEJCAW+iEhAKPBFRAJCgS8iEhAKfBGRgFDgi4gEhAJfRCQgFPgiIgGhwBcRCYiIBL6ZPW9m281sZTnbzcwmmtl6M8s3s8xIzCsiIlUXqT38F4H+FWwfALQNPUYA/xOheUVEpIoiEvju/h6wq4Iug4CXvMQyoKGZNYvE3CIiUjXROoZ/IbDphPXNobbTmNkIM8s1s9zCwsKoFCciEgRxd9LW3Se5e5a7ZzVu3DjW5YiInDGiFfhbgBYnrDcPtYmISJREK/BnA8NCV+v0APa6+9YozS0iIkCdSAxiZn8CsoFGZrYZeARIAnD354C5wNXAeuAr4M5IzCsiIlUXkcB395sr2e7AfZGYS0REvpm4O2krIiI1Q4EvIhIQCnwRkYBQ4IuIBIQCX0QkIBT4IiIBocAXEQkIBb6ISEAo8EVEAkKBLyISEAp8EZGAUOCLiASEAl9EJCAU+CIiAaHAFxEJCAW+iEhAKPBFRAJCgS8iEhAKfBGRgIhI4JtZfzNbZ2brzWxsGdt/ZGaFZpYXegyPxLwiIlJ1YX+JuZklAs8A3wc2A8vNbLa7rz6l66vuPjrc+URE5JuJxB5+d2C9u29w96+B6cCgCIwrIiIRFInAvxDYdML65lDbqX5oZvlmNtPMWkRgXhERqYZonbSdA7R29wzgHWBqeR3NbISZ5ZpZbmFhYZTKExE580Ui8LcAJ+6xNw+1lXL3ne5+JLT6v0DX8gZz90nunuXuWY0bN45AeSIiApEJ/OVAWzNrY2Z1gZuA2Sd2MLNmJ6xeC6yJwLwiIlINYV+l4+5FZjYamAckAs+7+yozexTIdffZwP1mdi1QBOwCfhTuvCIiUj3m7rGuoVxZWVmem5sb6zJERGoNM1vh7lllbdOdtiIiEfbcc8/x0ksvRWSs1q1bs2PHjoiMFfYhHREROdm9994b6xLKpD18EZEquO666+jatSupqalMmjQJgAYNGjBu3Dg6depEjx492LZtGwA5OTk8+eSTAGRnZ/Pggw+SlZVFhw4dWL58OUOGDKFt27b853/+Z4XjR5oCX0SkCp5//nlWrFhBbm4uEydOZOfOnRw8eJAePXrw6aef0qdPHyZPnlzmc+vWrUtubi733nsvgwYN4plnnmHlypW8+OKL7Ny5s9zxI02BLyJSBRMnTizdk9+0aROfffYZdevW5ZprrgGga9euFBQUlPnca6+9FoD09HRSU1Np1qwZycnJXHTRRWzatKnc8SNNx/BFRCqxaNEi3n33XZYuXcrZZ59NdnY2hw8fJikpCTMDIDExkaKiojKfn5ycDEBCQkLp8vH1oqKicsePNO3hi4hUYu/evZx33nmcffbZrF27lmXLltWq8Y9T4IuIVKJ///4UFRXRoUMHxo4dS48ePWrV+MfpxisRkThx8JPt7JtXwLE9R0hsmMy5V7Wmfpcm1Rqj1t54tX37djp06MCtt95a5edcffXV7Nmzhz179vDss8/WYHUiIpFz8JPt7Hn9M47tKfmcyWN7jrDn9c84+Mn2iM0R13v49erV8/Xr19O8efPStqKiIurUqfxcc0FBAddccw0rV66syRJFRCJi6+MflYb9iRIbJtNsbPcqj1Mr9/DN7Lmvv/6aAQMGkJKSwu23307Pnj25/fbbefHFFxk9+v++LfGaa65h0aJFwP/dhjx27Fg+//xzOnfuzEMPPRSjVyGxcuq/EZF4V1bYV9T+TcTtZZnufm9ycvI9Cxcu5Omnn2bOnDksWbKEs846ixdffLHS5z/++OOsXLmSvLy8Gq9VRCRciQ2Ty93Dj5S43cM/1bXXXstZZ50V6zIkCsq7hf3BBx8kNTWVfv36cfzb0LKzsxkzZgydO3cmLS2Njz766LTxCgsL+eEPf0i3bt3o1q0b77//flRfj0hVnHtVayzp5Ei2pATOvap1xOaoNYFfv3790uU6depQXFxcul4TNyhI7JR3C3tWVharVq3isssuY/z48aX9v/rqK/Ly8nj22We56667ThtvzJgxPPjggyxfvpzXXnuN4cOHR/PliFRJ/S5NaDikbekefWLDZBoOaVvtq3QqEreHdCrSunVrnn32WYqLi9myZUuZe3XnnHMO+/fvj0F1Eq6JEycya9YsgNJbzBMSErjxxhsBuO222xgyZEhp/5tvvhmAPn36sG/fPvbs2XPSeO+++y6rV68uXd+3bx8HDhygQYMGNfxKak5OTg4NGjTgZz/7WaxLkQiq36VJRAP+VLUy8Hv27EmbNm3o2LEjHTp0IDMz87Q+559/Pj179iQtLY0BAwbwxBNPxKBSqa6q3mJ+/Hb2U5fLWi8uLmbZsmXUq1evZooWqSXi+pBOeno6jRo1Iicn56Q9GTNj2rRprF27llmzZrFo0SKys7MBeGbBM9yy6BYypmawY8gOfj371wr7WqS8W8yLi4uZOXMmAK+88gq9evUqfc6rr74KwJIlS0hJSSElJeWkMa+88kp+//vfl67X1hP5jz32GO3ataNXr16sW7cOKHktPXr0ICMjg8GDB7N7924Ali9fTkZGRulVamlpabEsXeJEXAd+db214S1yPshh68GtOM7Wg1vJ+SCHtza8FevSpIrKu8W8fv36fPTRR6SlpbFgwQIefvjh0ufUq1ePLl26cO+99zJlypTTxpw4cSK5ublkZGTQsWNHnnvuuai9nkhZsWIF06dPJy8vj7lz57J8+XIAhg0bxq9//Wvy8/NJT08vPbdx55138oc//IG8vDwSExNjWbrEkbi+8aq6H61w5cwr2Xpw62ntzeo34+2hb0eyNImyBg0acODAgdPas7OzefLJJ8nKKvM+E8ifAfMfhb2bIaU59HsYMm6o4Wojb8KECezatYtHH30UgJ/85CekpKQwZcoUNm7cCMDnn3/O9ddfz4IFC+jUqRNffPEFAPn5+dxyyy26CTEgavzGKzPrb2brzGy9mY0tY3uymb0a2v6hmbWOxLyn+vLgl9VqlzNc/gyYcz/s3QR4yc8595e0iwRQ2IFvZonAM8AAoCNws5l1PKXbfwC73f07wFPAr8OdtyxN6zetVrvUHmXt3UPJSd5y9+7nPwpHD53cdvRQSXst06dPH9544w0OHTrE/v37mTNnDvXr1+e8885j8eLFALz88stcdtllNGzYkHPOOYcPP/wQgOnTp8eydIkjkdjD7w6sd/cN7v41MB0YdEqfQcDU0PJMoJ+deilFBIzJHEO9xJOvxKiXWI8xmWMiPZXUBns3V689jmVmZnLjjTfSqVMnBgwYQLdu3QCYOnUqDz30EBkZGeTl5ZWe25gyZQp33303nTt35uDBg6edyJZgisRlmRcCm05Y3wxcUl4fdy8ys73A+cCOUwczsxHACICWLVtWq5CBFw0E4L8//m++PPglTes3ZUzmmNJ2CZiU5qHDOWW010Ljxo1j3Lhxp7WX9WUZqamp5OfnAyUfM1LuX0ESKHF3Hb67TwImQclJ2+o+f+BFAxXwUqLfwyXH7E88rJN0Vkn7GWzN4oX87r/GM3f5J5CQwHfatWfm7DmxLkviQCQCfwvQ4oT15qG2svpsNrM6QAoQ+a9kFznR8atxzoCrdKpqzeKFvD3pab57XgO+e2VvAOrUTWbH2pU0bnx5jKuTWItE4C8H2ppZG0qC/SbgllP6zAbuAJYCQ4EFHs/Xg8qZI+OGMzrgT7V4+ksUfX3yJy4WfX2ExdNfokNvBX7QhR34oWPyo4F5QCLwvLuvMrNHgVx3nw1MAV42s/XALkr+UxCRCNu/87TTYhW2S7BE5Bi+u88F5p7S9vAJy4eB6yMxl4iU75zzG7F/R2GZ7SJn1EcriARd75uGUafuyV+YUaduMr1vGhajiiSexN1VOiLyzR0/Tr94+kvs37mDc85vRO+bhun4vQAKfJEzTofelyvgpUw6pCMiEhAKfBGRgFDgi4gEhAJfRCQgFPgiIgGhwBcRCQgFvohIQCjwRUQCQoEvIhIQCnwRkYBQ4IuIBIQCX0QkIBT4IiIBocAXEQkIBb6ISEAo8EVEAiKswDezb5nZO2b2WejneeX0O2ZmeaHH7HDmFBGRbybcPfyxwHx3bwvMD62X5ZC7dw49rg1zThER+QbCDfxBwNTQ8lTgujDHExGRGhJu4F/g7ltDy18CF5TTr56Z5ZrZMjO7rqIBzWxEqG9uYWFhmOWJiMhxlX6JuZm9CzQtY9O4E1fc3c3MyxmmlbtvMbOLgAVm9g93/7ysju4+CZgEkJWVVd54IiJSTZUGvrtfUd42M9tmZs3cfauZNQO2lzPGltDPDWa2COgClBn4IiJSM8I9pDMbuCO0fAfw5qkdzOw8M0sOLTcCegKrw5xXRESqKdzAfxz4vpl9BlwRWsfMsszsf0N9OgC5ZvYpsBB43N0V+CIiUVbpIZ2KuPtOoF8Z7bnA8NDyB0B6OPOIiEj4dKetiEhAKPBFRAJCgS8iEhAKfBGRgFDgi4gEhAJfRCQgFPgiIgGhwBcRCQgFvohIQCjwRUQCQoEvIhIQCnwRkYBQ4IuIBIQCX0QkIBT4IiIBocAXEQkIBb6ISEAo8EVEAkKBLyISEGEFvpldb2arzKzYzLIq6NffzNaZ2XozGxvOnCIi8s2Eu4e/EhgCvFdeBzNLBJ4BBgAdgZvNrGOY84qISDXVCefJ7r4GwMwq6tYdWO/uG0J9pwODgNXhzC0iItUTjWP4FwKbTljfHGoTEZEoqnQP38zeBZqWsWmcu78Z6YLMbAQwAqBly5aRHl5EJLAqDXx3vyLMObYALU5Ybx5qK2++ScAkgKysLA9zbhERCYnGIZ3lQFsza2NmdYGbgNlRmFdERE4Q7mWZg81sM/A94C0zmxdq/7aZzQVw9yJgNDAPWAPMcPdV4ZUtIiLVFe5VOrOAWWW0/xu4+oT1ucDccOYSEZHw6E5bEZGAUOCLiASEAl9EJCAU+CIiAaHAFxEJCAW+iEhAKPBFRAJCgS8iEhAKfBGRgFDgi4gEhAJfRCQgFPgiIgGhwBcRCQgFvohIQCjwRUQCQoEvIhIQCnwRkYBQ4IuIBIQCX0QkIBT4IiIBEVbgm9n1ZrbKzIrNLKuCfgVm9g8zyzOz3HDmlG+uoKCAtLS0WJchIjFSJ8znrwSGAH+oQt/L3X1HmPOJiMg3FNYevruvcfd1kSommtyd4uLiWJcRdUVFRdx666106NCBoUOH8tVXX/Hoo4/SrVs30tLSGDFiBO4OQHZ2Nj//+c/p3r077dq1Y/HixUDJXwq9e/cmMzOTzMxMPvjgAwAWLVpEdnY2Q4cO5bvf/S633npr6VjlzSEi0ROtY/gOvG1mK8xsREUdzWyEmeWaWW5hYWFYk/7ud78jLS2NtLQ0JkyYQEFBAe3bt2fYsGGkpaWxadMmRo4cSVZWFqmpqTzyyCOlz23dujWPPPIImZmZpKens3btWgAKCwv5/ve/T2pqKsOHD6dVq1bs2FHyh8sf//hHunfvTufOnbnnnns4duxYWPXXhHXr1jFq1CjWrFnDueeey7PPPsvo0aNZvnw5K1eu5NChQ/zlL38p7V9UVMRHH33EhAkTGD9+PABNmjThnXfe4eOPP+bVV1/l/vvvL+3/ySefMGHCBFavXs2GDRt4//33ASqcQ0Sio9LAN7N3zWxlGY9B1Zinl7tnAgOA+8ysT3kd3X2Su2e5e1bjxo2rMcXJVqxYwQsvvMCHH37IsmXLmDx5Mrt37+azzz5j1KhRrFq1ilatWvHYY4+Rm5tLfn4+f//738nPzy8do1GjRnz88ceMHDmSJ598EoDx48fTt29fVq1axdChQ9m4cSMAa9as4dVXX+X9998nLy+PxMREpk2b9o3rryktWrSgZ8+eANx2220sWbKEhQsXcskll5Cens6CBQtYtWpVaf8hQ4YA0LVrVwoKCgA4evQod999N+np6Vx//fWsXr26tH/37t1p3rw5CQkJdO7cufQ5Fc0hItFR6TF8d78i3EncfUvo53YzmwV0B94Ld9yKLFmyhMGDB1O/fn2gJLgWL15Mq1at6NGjR2m/GTNmMGnSJIqKiti6dSurV68mIyOj9DlQEnavv/566bizZs0CoH///px33nkAzJ8/nxUrVtCtWzcADh06RJMmTWryJX4jZnba+qhRo8jNzaVFixbk5ORw+PDh0u3JyckAJCYmUlRUBMBTTz3FBRdcwKeffkpxcTH16tU7rf+Jzzl8+HCFc4hIdNT4IR0zq29m5xxfBq6k5GRvTBz/DwDgX//6F08++STz588nPz+fgQMHVhp25XF37rjjDvLy8sjLy2PdunXk5OTUyGsIx8aNG1m6dCkAr7zyCr169QJK/po5cOAAM2fOrHSMvXv30qxZMxISEnj55ZcrPXR1/D2tzhwiEnnhXpY52Mw2A98D3jKzeaH2b5vZ3FC3C4AlZvYp8BHwlrv/LZx5q6J379688cYbfPXVVxw8eJBZs2bRu3fvk/rs27eP+vXrk5KSwrZt2/jrX/9a6bg9e/ZkxowZALz99tvs3r0bgH79+jFz5ky2b98OwK5du/jiiy8i/KrC1759e5555hk6dOjA7t27GTlyJHfffTdpaWlcddVVpX+hVGTUqFFMnTqVTp06sXbt2pP+Ey1Lw4YNqz2HiNQAd4/bR9euXT0cv/3tbz01NdVTU1P9qaee8n/961+empp6Up877rjD27Zt63379vXBgwf7Cy+84O7urVq18sLCQnd3X758uV922WXu7r5t2zbv27evp6am+vDhw71p06Z++PBhd3efPn26d+rUydPT0z0zM9OXLl0aVv0iItUF5Ho5mWoex5fHZWVleW5ufN2ndeTIERITE6lTpw5Lly5l5MiR5OXlkZ+fz/z589m7dy8pKSn069ev9FxA0L3xyRaemLeOf+85xLcbnsVDV7Xnui4XxroskTOSma1w9zJvhA33xqvA2bhxIzfccAPFxcXUrVuXyZMnk5+fz5w5czh69ChQcox7zpw5AIEP/Tc+2cIvXv8Hh46WHOffsucQv3j9HwAKfZEoU+BXU9u2bfnkk09OanvqqadKw/64o0ePMn/+/MAH/hPz1pWG/XGHjh7jiXnrFPgiUaYPT4uAvXv3Vqs9SP6951C12kWk5ijwIyAlJaVa7UHy7YZnVatdRGqOAj8C+vXrR1JS0kltSUlJ9OvXL0YVxY+HrmrPWUmJJ7WdlZTIQ1e1j1FFIsGlY/gRcPw4va7SOd3x4/S6Skck9nRZpojIGaSiyzJ1SEdEJCAU+CIiAaHAFxEJCAW+iEhAKPBFRAIirq/SMbP9QK38ztwoagToy+Erp/epavQ+VS7e36NW7l7m1wXG+3X468q7vEhKmFmu3qPK6X2qGr1PlavN75EO6YiIBIQCX0QkIOI98CfFuoBaQO9R1eh9qhq9T5Wrte9RXJ+0FRGRyIn3PXwREYmQuA58M3vCzNaaWb6ZzTKzhrGuKR6Z2fVmtsrMis2sVl49UJPMrL+ZrTOz9WY2Ntb1xCMze97MtpvZyljXEq/MrIWZLTSz1aHftzGxrqm64jrwgXeANHfPAP4J/CLG9cSrlcAQ4L1YFxJvzCwReAYYAHQEbjazjrGtKi69CPSPdRFxrgj4qbt3BHoA99W2f0txHfju/ra7F4VWlwHNY1lPvHL3Ne6uG9TK1h1Y7+4b3P1rYDowKMY1xR13fw/YFes64pm7b3X3j0PL+4E1QK36Yoe4DvxT3AX8NdZFSK1zIbDphPXN1LJfUok/ZtYa6AJ8GONSqiXmd9qa2btA0zI2jXP3N0N9xlHy59S0aNYWT6ryPolIzTOzBsBrwAPuvi/W9VRHzAPf3a+oaLuZ/Qi4BujnAb6GtLL3Scq1BWhxwnrzUJtItZlZEiVhP83dX491PdUV14d0zKw/8P+Aa939q1jXI7XScqCtmbUxs7rATcDsGNcktZCZGTAFWOPuv4t1Pd9EXAc+8DRwDvCOmeWZ2XOxLigemdlgM9sMfA94y8zmxbqmeBE66T8amEfJSbYZ7r4qtlXFHzP7E7AUaG9mm83sP2JdUxzqCdwO9A3lUZ6ZXR3roqpDd9qKiAREvO/hi4hIhCjwRUQCQoEvIhIQCnwRkYBQ4IuIBIQCX0QkIBT4IiIBocAXEQmI/w81nm1VM/4ougAAAABJRU5ErkJggg==",
180 |       "text/plain": [
181 |        "<Figure size 432x288 with 1 Axes>"
182 |       ]
183 |      },
184 |      "metadata": {
185 |       "needs_background": "light"
186 |      },
187 |      "output_type": "display_data"
188 |     }
189 |    ],
190 |    "source": [
191 |     "for i, label in enumerate(word_list):\n",
192 |     "    W, WT = model.get_parameters()\n",
193 |     "    x, y = W[0][i].asnumpy(), W[1][i].asnumpy()\n",
194 |     "    plt.scatter(x, y)\n",
195 |     "    plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\n",
196 |     "plt.show()"
197 |    ]
198 |   }
199 |  ],
200 |  "metadata": {
201 |   "kernelspec": {
202 |    "display_name": "Python 3.7.13 ('ms1.8')",
203 |    "language": "python",
204 |    "name": "python3"
205 |   },
206 |   "language_info": {
207 |    "codemirror_mode": {
208 |     "name": "ipython",
209 |     "version": 3
210 |    },
211 |    "file_extension": ".py",
212 |    "mimetype": "text/x-python",
213 |    "name": "python",
214 |    "nbconvert_exporter": "python",
215 |    "pygments_lexer": "ipython3",
216 |    "version": "3.7.13"
217 |   },
218 |   "vscode": {
219 |    "interpreter": {
220 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
221 |    }
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 4
226 | }
227 | 


--------------------------------------------------------------------------------
/4-2.Seq2Seq(Attention)/Seq2Seq-Attention.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 103,
  6 |    "id": "8a0766e3-3816-4303-af74-2dfc50ba5c62",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import mindspore\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "import mindspore.numpy as mnp\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "from mindspore import ms_function"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 104,
 22 |    "id": "f4888e33-7ea9-437a-be7d-3151891ed97d",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# S: Symbol that shows starting of decoding input\n",
 27 |     "# E: Symbol that shows starting of decoding output\n",
 28 |     "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
 29 |     "\n",
 30 |     "def make_batch():\n",
 31 |     "    input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\n",
 32 |     "    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\n",
 33 |     "    target_batch = [[word_dict[n] for n in sentences[2].split()]]\n",
 34 |     "\n",
 35 |     "    # make tensor\n",
 36 |     "    return mindspore.Tensor(input_batch), mindspore.Tensor(output_batch), \\\n",
 37 |     "        mindspore.Tensor(target_batch, mindspore.int32)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 105,
 43 |    "id": "0242cf52-c02a-4670-ab34-e376cb140744",
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "class Attention(nn.Cell):\n",
 48 |     "    def __init__(self):\n",
 49 |     "        super(Attention, self).__init__()\n",
 50 |     "        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
 51 |     "        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
 52 |     "\n",
 53 |     "        # Linear for attention\n",
 54 |     "        self.attn = nn.Dense(n_hidden, n_hidden)\n",
 55 |     "        self.out = nn.Dense(n_hidden * 2, n_class)\n",
 56 |     "\n",
 57 |     "    def construct(self, enc_inputs, dec_inputs):\n",
 58 |     "        enc_inputs = enc_inputs.swapaxes(0, 1)  # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]\n",
 59 |     "        dec_inputs = dec_inputs.swapaxes(0, 1)  # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]\n",
 60 |     "\n",
 61 |     "        # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F\n",
 62 |     "        # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
 63 |     "        enc_outputs, enc_hidden = self.enc_cell(enc_inputs)\n",
 64 |     "\n",
 65 |     "        trained_attn = []\n",
 66 |     "        hidden = enc_hidden\n",
 67 |     "        n_step = len(dec_inputs)\n",
 68 |     "        model = []\n",
 69 |     "\n",
 70 |     "        for i in range(n_step):  # each time step\n",
 71 |     "            # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]\n",
 72 |     "            # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]\n",
 73 |     "            dec_output, hidden = self.dec_cell(dec_inputs[i].expand_dims(0), hidden)\n",
 74 |     "            attn_weights = self.get_att_weight(dec_output, enc_outputs)  # attn_weights : [1, 1, n_step]\n",
 75 |     "            trained_attn.append(attn_weights.squeeze())\n",
 76 |     "\n",
 77 |     "            # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]\n",
 78 |     "            context = ops.matmul(attn_weights, enc_outputs.swapaxes(0, 1))\n",
 79 |     "            dec_output = dec_output.squeeze(0)  # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]\n",
 80 |     "            context = context.squeeze(1)  # [1, num_directions(=1) * n_hidden]\n",
 81 |     "            out = self.out(ops.concat((dec_output, context), 1))\n",
 82 |     "            model.append(out)\n",
 83 |     "        \n",
 84 |     "        model = ops.stack(model)\n",
 85 |     "\n",
 86 |     "        # make model shape [n_step, n_class]\n",
 87 |     "        return model.swapaxes(0, 1).squeeze(0), trained_attn\n",
 88 |     "\n",
 89 |     "    def get_att_weight(self, dec_output, enc_outputs):  # get attention weight one 'dec_output' with 'enc_outputs'\n",
 90 |     "        n_step = len(enc_outputs)\n",
 91 |     "        attn_scores = ops.zeros(n_step, mindspore.float32)  # attn_scores : [n_step]\n",
 92 |     "\n",
 93 |     "        for i in range(n_step):\n",
 94 |     "            attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])\n",
 95 |     "\n",
 96 |     "        # Normalize scores to weights in range 0 to 1\n",
 97 |     "        return ops.Softmax()(attn_scores).view(1, 1, -1)\n",
 98 |     "\n",
 99 |     "    def get_att_score(self, dec_output, enc_output):  # enc_outputs [batch_size, num_directions(=1) * n_hidden]\n",
100 |     "        score = self.attn(enc_output)  # score : [batch_size, n_hidden]\n",
101 |     "        return mnp.dot(dec_output.view(-1), score.view(-1))  # inner product make scalar valuek"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 106,
107 |    "id": "ad18877e-322a-42af-97bf-f13b561760d3",
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "n_step = 5 # number of cells(= number of Step)\n",
112 |     "n_hidden = 128 # number of hidden units in one cell\n",
113 |     "\n",
114 |     "sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n",
115 |     "\n",
116 |     "word_list = \" \".join(sentences).split()\n",
117 |     "word_list = list(set(word_list))\n",
118 |     "word_dict = {w: i for i, w in enumerate(word_list)}\n",
119 |     "number_dict = {i: w for i, w in enumerate(word_list)}\n",
120 |     "n_class = len(word_dict)  # vocab list"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 107,
126 |    "id": "2ef50a8e-f4c0-44c8-82cd-750cc7ba3d5a",
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "name": "stderr",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "[WARNING] ME(258161:139710830719232,MainProcess):2022-08-12-21:41:32.952.304 [mindspore/nn/layer/rnns.py:392] dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n",
134 |       "[WARNING] ME(258161:139710830719232,MainProcess):2022-08-12-21:41:32.961.000 [mindspore/nn/layer/rnns.py:392] dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "model = Attention()\n",
140 |     "criterion = nn.CrossEntropyLoss()\n",
141 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 108,
147 |    "id": "612ce97a-4fc3-4953-b641-bcd9f800f700",
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "input_batch, output_batch, target_batch = make_batch()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 109,
157 |    "id": "002ae643",
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "def forward(enc_input, dec_input, target):\n",
162 |     "    output, attn = model(enc_input, dec_input)\n",
163 |     "    loss = criterion(output, target.squeeze(0))\n",
164 |     "    return loss, attn"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 110,
170 |    "id": "47904591",
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters, has_aux=True)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 111,
180 |    "id": "843167f2",
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "@ms_function\n",
185 |     "def train_step(enc_input, dec_input, target):\n",
186 |     "    (loss, _), grads = grad_fn(enc_input, dec_input, target)\n",
187 |     "    optimizer(grads)\n",
188 |     "    return loss"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 112,
194 |    "id": "5f192fcb-4bad-4b97-9611-e24c8a0d966d",
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "name": "stdout",
199 |      "output_type": "stream",
200 |      "text": [
201 |       "Epoch: 0400 cost = 0.000808\n",
202 |       "Epoch: 0800 cost = 0.000266\n",
203 |       "Epoch: 1200 cost = 0.000135\n",
204 |       "Epoch: 1600 cost = 0.000080\n",
205 |       "Epoch: 2000 cost = 0.000053\n"
206 |      ]
207 |     }
208 |    ],
209 |    "source": [
210 |     "# Train\n",
211 |     "for epoch in range(2000):\n",
212 |     "    loss = train_step(input_batch, output_batch, target_batch)\n",
213 |     "    if (epoch + 1) % 400 == 0:\n",
214 |     "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 113,
220 |    "id": "fa47c242-103d-471d-983d-2480830b7d6c",
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "name": "stdout",
225 |      "output_type": "stream",
226 |      "text": [
227 |       "ich mochte ein bier P -> ['i', 'want', 'a', 'beer', 'E']\n"
228 |      ]
229 |     }
230 |    ],
231 |    "source": [
232 |     "# Test\n",
233 |     "test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]\n",
234 |     "test_batch = mindspore.Tensor(test_batch)\n",
235 |     "predict, trained_attn = model(input_batch, test_batch)\n",
236 |     "predict = predict.argmax(1)\n",
237 |     "print(sentences[0], '->', [number_dict[int(n.asnumpy())] for n in predict])"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 114,
243 |    "id": "fc60f4c8-461b-4640-97f2-b50fb6da5f19",
244 |    "metadata": {},
245 |    "outputs": [
246 |     {
247 |      "name": "stderr",
248 |      "output_type": "stream",
249 |      "text": [
250 |       "/home/lvyufeng/miniconda3/envs/ms1.8/lib/python3.7/site-packages/ipykernel_launcher.py:5: UserWarning: FixedFormatter should only be used together with FixedLocator\n",
251 |       "  \"\"\"\n",
252 |       "/home/lvyufeng/miniconda3/envs/ms1.8/lib/python3.7/site-packages/ipykernel_launcher.py:6: UserWarning: FixedFormatter should only be used together with FixedLocator\n",
253 |       "  \n"
254 |      ]
255 |     },
256 |     {
257 |      "data": {
258 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUcAAAE2CAYAAADyN1APAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAARXUlEQVR4nO3de9BcdX3H8fcHEkK5RBvFQUFAwRtV6UC4eCUdaKF17HSU0dFCFWcMaK2oeGlrUTvqpN4GrCiayhidYkcdqVS8U8mgU67aVjFYUEHuN7kTSAJ++8ee6LL+EvI8ye5ZnrxfMzvJc/bsnu959nneOedskidVhSTpobbpewBJmkbGUZIajKMkNRhHSWowjpLUYBwlqcE4DkmyIsnZm7DeXkkqyeJJzNWHbv+O6nuOzfVI2o8kK5OcOtv7tWXN63uAKXMCkL6HeCRIshdwJXBgVV3S8zgb83jg9r6H2EJeAqzre4hxSLICeFX34QPANcCZwLur6t4+ZjKOQ6rqzr5n0JZVVTf2PcOWUlW3be5zJJlfVdMa2HOAY4D5wAuATwM7Aq/rYxhPq4cMn1Zn4MQkVyRZk+TaJMtGHrJnku8kWZ1kVZI/HtNcK5OcluQjSW5LckuSE5IsSPLxJHckuTrJMUOPeVaSc5Lc1z1mRZJHjTzvq5L8uNu/m5J8dmTTi5J8Kcm9SX6R5Oih+67sfr24O3VdOfS8x3afj/uTXJ7kzUnG8rXWvU5vT/Lzbl9/PDzn8Gn10OWQl07idZuleUk+muT27vah9Z+70dPqJNsl+UD3tbk6ycVJjhi6f0m3v3+W5KIka4EjGtucFmuq6saquqaqPg+cAfxFb9NUlbfuBqwAzu5+vwy4A3gNsA/wHOD13X17AQX8FHgx8BTgs8CvgJ3GMNdK4C7gPd22Tuy2/w0GlwL2Ad4LrGFwGrkjcD3wFeBZwKHA5cCXh57zOOB+4C3A04ADgLcN3V/AtcDR3fMvA9YCe3T3H9itcwSwK7CoW/5a4AbgKOBJ3efnRuANY3rN3g/8H3Bkt71XAvcCLxraj6P6eN1m+TrfDXwMeDrwMuBO4C1D9586tP4ZwAXAC4EnA2/oXqP9uvuXdPv7Y+BPunV26Xs/H+57b2jZPwO39jZT35+Uabqtf4GAnbpwHL+B9dZ/kx03tGy3btnzxzDXSuD8oY8D3AL8x9Cy+d03xlFdoO4Edh66f/03yj7dx9cC/7SRbRawbOjjecBq4OiRz8HikcddDRwzsuxNwKoxfF52BO4DXjCy/BTg60P7MRrHibxus3ydLwcytOwfgGuH7j+1+/3ewK/p/rAaWv8rwCdGXvOX9r1vm7DvD4kjcBBwK/CFvmbymmPbvsAC4D8fZr0fDf3++u7Xx41loqFtVVUluZnBEcH6ZeuS3N5tfx/gR1V199Dj/4vBN9O+Se5iEIVN3r+qeiDJLWxk/5LsAjwR+FSS04bumsd43ujaF9ge+GaS4f9BZT5w1UYeN8nXbaYuqK4OnfOB9yZZOLLe/gw+p6uSh3xqFwDfHVl3mt8wG3ZkknsYfL3MB84C/qavYYzj5vnNhe0uWDC+67ijF9FrA8sebvsz+W+YZvr86+87nkGMx2399l7M4Ih12MbedJjk6zYu2zB4PQ7kd/f1vpGPe3m3dxbOA5Yy2J/rq+c3joxj22UMrt8dBlzR8yyzcRnwmiQ7Dx09PpfBN9RlVXVzkusY7N93ZrmNtd2v265fUFU3Jbke2LuqPjfL552JVQxepz2ravRo6ZHq4CQZOno8hEEo7ho5QvxvBkeOu1bVuZMeckxWV9XP+h5iPePYUFV3J/kosCzJGgZ/oj0GOKCqTtv4o6fCGcA/Ap9L8i7g94FPAWcOffG9Hzg5yU3A14AdgMOq6iObuI2bGRyhHJHkKuD+GvxVqHcDH0tyB/B1BqdH+wO7VdXou/2bpXudPgx8OINynMfgevEhwK+ravmW3N6EPAE4JcknGLyZ9jbgfaMrVdXlSc4AViQ5EfghsIjBdcZfVNWZkxt5bjKOG/Z3DP7y8EnA7sBNwCSOhjZbVa3u/krHKcBFDN5cOovBO9vr1zmt+6sdJwIfAG5jELNN3cYDSd4IvItBEL8HLKmqTye5l8E39TIGAf0JMK5/2XESg9fmrcBpDN7V/x/gg2Pa3ridweBo/EIGp82nAydvYN1jgXcy2NfdGbyGFwFz5UiyV3notV9JEjzyLkJL0kQYR0lqMI6S1GAcJanBOEpSg3GUpAbjOENJlvY9wzjM1f2Cubtv7td4GceZm4oXbgzm6n7B3N0392uMjKMkNcyJfyGzXRbU9uw4kW2tYw3zWTCRbU3SXN0vmOy+5WnzJ7IdgLV33Md2j/69iW1v53n3T2Q79962lh0XbTeRbQFc95O7bq2qXUaXz4l/W709O3JwDut7DM3ENts+/DqPQPOXT8t/C7nlHfrYy/seYSz+9g++9cvWck+rJanBOEpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNUx3HJCuSnN33HJK2PtP+0wdPANL3EJK2PlMdx6q6s+8ZJG2dPK2WpIapjqMk9WWqT6s3JslSYCnA9uzQ8zSS5ppH7JFjVS2vqsVVtXg+C/oeR9Ic84iNoySNk3GUpAbjKEkNxlGSGqb63eqqenXfM0jaOnnkKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGoyjJDVM9c+Q0dyV+XPzS2+7bR7oe4SxufOBHfoeYaI8cpSkBuMoSQ3GUZIajKMkNRhHSWowjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIajKMkNRhHSWowjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIajKMkNRhHSWowjpLUYBwlqWEq45hkZZJT+55D0tZrKuMoSX172DgmOTLJ3UnmdR/vk6SSfHJonfclOSfJtklOT3JlkvuSXJHk7Um2GVp3RZKzk5yQ5Loktyf5TJId1t8PHAr8dbedSrLXlt5xSdqYeZuwzveB7YHFwAXAEuDW7tf1lgDfZBDb64CXAbcABwHLgV8Bpw+t/wLgBuBw4InAF4HLgWXACcBTgZ8Cf9+tf8vMdkuSNs/DHjlW1T3AD4A/6hYtAU4F9kzy+O6I70BgZVWtq6p3VdXFVXVVVX0R+CTwipGnvQs4vqouq6pvA18CDuu2dyewFlhdVTd2twdH50qyNMklSS5Zx5rZ7LskbdCmXnNcyW+PFA8FvgFc2C17LvAAcBFAkuO7aN2S5B7gzcAeI8+3aiR41wOPm8ngVbW8qhZX1eL5LJjJQyXpYc0kjs9L8gxgIYMjyZUMjiaXAOdX1dokLwdOAVYARwB/CHwC2G7k+daNfFwzmEWSxm5TrjnC4LrjAuDtwPer6sEkK4F/AW5icL0R4PnAhVX1m7+Gk2TvWcy1Fth2Fo+TpC1ik47Whq47Hg2c2y2+ANgdOITBUSQM3lTZP8mfJnlKkpMYnIbP1FXAQUn2SvLY4Xe7JWkSZhKdlQyONFcCVNX9DK47rqG73gh8isE7z58HLgb2Aj4yi7k+zODocRWDd6pHr1lK0lilqvqeYbMtzKI6OIf1PYZmIAvm5ptoO3xnYd8jjM2+C2/se4SxWLbfv/+gqhaPLvd0VZIajKMkNRhHSWowjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIajKMkNRhHSWowjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIajKMkNRhHSWowjpLUsKk/t1raorZZODd/ENW96+bmDw4DuO/B+X2PMFEeOUpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSw1TFMcmRSb6X5PYktyX5VpJn9D2XpK3PVMUR2BE4BTgIWALcCXw1yXajKyZZmuSSJJesY81Eh5Q0983re4BhVfXl4Y+THAvcxSCW3x9ZdzmwHGBhFtWkZpS0dZiqI8ckeyf5fJKfJ7kLuInBjHv0PJqkrcxUHTkCZwPXAscB1wEPAKuA3zmtlqRxmpo4JnkM8HTg9VV1brdsf6ZoRklbj2kKz+3ArcBrk1wD7AZ8iMHRoyRN1NRcc6yqXwMvB54NXAp8HDgJfCta0uRN05EjVfVd4Jkji3fqYxZJW7epOXKUpGliHCWpwThKUoNxlKQG4yhJDcZRkhqMoyQ1GEdJajCOktRgHCWpwThKUoNxlKQG4yhJDcZRkhqMoyQ1GEdJajCOktRgHCWpwThKUsNU/QyZ2Vqzxw5c/s6D+h5ji7vyz5f3PcLYHLH7AX2PMB6HPdj3BGNzad8DTJhHjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIajKMkNRhHSWowjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIajKMkNRhHSWowjpLUYBwlqcE4SlKDcZSkBuMoSQ3GUZIaZhTHJCuTnDquYSRpWnjkKEkNUx/HJNv1PYOkrc9s4jgvyUeT3N7dPpRkGxiELMkHklybZHWSi5McMfzgJPsm+VqSu5PcnOTfkuw6dP+KJGcneUeSa4FrN28XJWnmZhPHv+we9xzgOGAp8Kbuvs8AhwKvBJ4JfBb4apL9AJI8HjgPuBQ4CDgc2Ak4a31gO4cCzwaOBA6bxYyStFnmzeIxNwBvrKoCfprkqcBbkpwFvALYq6qu7tY9NcnhDCL6euB1wP9W1TvWP1mSvwJuAxYDF3WL7wdeU1VrNjREkqUMwsy2ix49i92QpA2bzZHjBV0Y1zsf2A14PhBgVZJ71t+AFwF7d+seALxw5P5ruvv2HnrOSzcWRoCqWl5Vi6tq8bY77TiL3ZCkDZvNkePGFHAgsG5k+X3dr9sAXwPe2njsTUO/v3cLzyVJMzKbOB6cJENHj4cA1zM4ggywa1Wdu4HH/hB4GfDLqhoNqCRNjdmcVj8BOCXJ05IcBbwNOLmqLgfOAFYkOSrJk5MsTvLWJC/pHvtx4FHAF5Ic3K1zeJLlSXbeInskSVvAbI4czwC2BS5kcBp9OnByd9+xwDuBDwK7M3ij5SLgXICquj7J84BlwDeB7YGrgW8DG73GKEmTNKM4VtWSoQ/f0Lh/HfCe7rah57gCOGoj9796JjNJ0jhM/b+QkaQ+GEdJajCOktRgHCWpwThKUoNxlKQG4yhJDcZRkhqMoyQ1GEdJajCOktRgHCWpwThKUoNxlKQG4yhJDcZRkhqMoyQ1GEdJajCOktSQ3/6E1UeuhVlUB+ewvsfQTCR9TzAW15/5jL5HGJtP7vevfY8wFi980i9+UFWLR5d75ChJDcZRkhqMoyQ1GEdJajCOktRgHCWpwThKUoNxlKQG4yhJDcZRkhqMoyQ1GEdJajCOktRgHCWpwThKUoNxlKQG4yhJDcZRkhqMoyQ1GEdJajCOktRgHCWpwThKUsPUxDHJiiTVuF3Q92yStj7z+h5gxDnAMSPL1vYxiKSt27TFcU1V3dj3EJI0NafVkjRNpi2ORya5Z+T2gdaKSZYmuSTJJetYM+k5Jc1x03ZafR6wdGTZHa0Vq2o5sBxgYRbVeMeStLWZtjiurqqf9T2EJE3babUkTYVpO3JckGTXkWUPVtUtvUwjaas1bXE8HLhhZNl1wO49zCJpKzY1p9VV9eqqSuNmGCVN3NTEUZKmiXGUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGoyjJDUYR0lqMI6S1GAcJanBOEpSg3GUpAbjKEkNxlGSGlJVfc+w2ZLcAvxyQpt7LHDrhLY1SXN1v2Du7pv7tWXsWVW7jC6cE3GcpCSXVNXivufY0ubqfsHc3Tf3a7w8rZakBuMoSQ3GceaW9z3AmMzV/YK5u2/u1xh5zVGSGjxylKQG4yhJDcZRkhqMoyQ1GEdJavh/Q8WGqu6Ak48AAAAASUVORK5CYII=",
259 |       "text/plain": [
260 |        "<Figure size 360x360 with 1 Axes>"
261 |       ]
262 |      },
263 |      "metadata": {
264 |       "needs_background": "light"
265 |      },
266 |      "output_type": "display_data"
267 |     }
268 |    ],
269 |    "source": [
270 |     "# Show Attention\n",
271 |     "fig = plt.figure(figsize=(5, 5))\n",
272 |     "ax = fig.add_subplot(1, 1, 1)\n",
273 |     "ax.matshow([attn.asnumpy() for attn in trained_attn], cmap='viridis')\n",
274 |     "ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\n",
275 |     "ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\n",
276 |     "plt.show()"
277 |    ]
278 |   }
279 |  ],
280 |  "metadata": {
281 |   "kernelspec": {
282 |    "display_name": "Python 3.7.13 ('ms1.8')",
283 |    "language": "python",
284 |    "name": "python3"
285 |   },
286 |   "language_info": {
287 |    "codemirror_mode": {
288 |     "name": "ipython",
289 |     "version": 3
290 |    },
291 |    "file_extension": ".py",
292 |    "mimetype": "text/x-python",
293 |    "name": "python",
294 |    "nbconvert_exporter": "python",
295 |    "pygments_lexer": "ipython3",
296 |    "version": "3.7.13"
297 |   },
298 |   "vscode": {
299 |    "interpreter": {
300 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
301 |    }
302 |   }
303 |  },
304 |  "nbformat": 4,
305 |  "nbformat_minor": 5
306 | }
307 | 


--------------------------------------------------------------------------------
/5-2.BERT/BERT.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "0d87f0c5-ea59-4411-8e02-1b338a2dee30",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import re\n",
 11 |     "from random import *\n",
 12 |     "import mindspore\n",
 13 |     "import mindspore.nn as nn\n",
 14 |     "import mindspore.ops as ops\n",
 15 |     "import mindspore.numpy as mnp\n",
 16 |     "from layers import Dense, Embedding"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "id": "cd4a9097-c20d-45d3-910c-cd1bb7f32a6a",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# sample IsNext and NotNext to be same in small batch size\n",
 27 |     "def make_batch():\n",
 28 |     "    batch = []\n",
 29 |     "    positive = negative = 0\n",
 30 |     "    while positive != batch_size/2 or negative != batch_size/2:\n",
 31 |     "        tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences\n",
 32 |     "        tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]\n",
 33 |     "        input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]\n",
 34 |     "        segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)\n",
 35 |     "\n",
 36 |     "        # MASK LM\n",
 37 |     "        n_pred =  min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence\n",
 38 |     "        cand_maked_pos = [i for i, token in enumerate(input_ids)\n",
 39 |     "                          if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]\n",
 40 |     "        shuffle(cand_maked_pos)\n",
 41 |     "        masked_tokens, masked_pos = [], []\n",
 42 |     "        for pos in cand_maked_pos[:n_pred]:\n",
 43 |     "            masked_pos.append(pos)\n",
 44 |     "            masked_tokens.append(input_ids[pos])\n",
 45 |     "            if random() < 0.8:  # 80%\n",
 46 |     "                input_ids[pos] = word_dict['[MASK]'] # make mask\n",
 47 |     "            elif random() < 0.5:  # 10%\n",
 48 |     "                index = randint(0, vocab_size - 1) # random index in vocabulary\n",
 49 |     "                input_ids[pos] = word_dict[number_dict[index]] # replace\n",
 50 |     "\n",
 51 |     "        # Zero Paddings\n",
 52 |     "        n_pad = maxlen - len(input_ids)\n",
 53 |     "        input_ids.extend([0] * n_pad)\n",
 54 |     "        segment_ids.extend([0] * n_pad)\n",
 55 |     "\n",
 56 |     "        # Zero Padding (100% - 15%) tokens\n",
 57 |     "        if max_pred > n_pred:\n",
 58 |     "            n_pad = max_pred - n_pred\n",
 59 |     "            masked_tokens.extend([0] * n_pad)\n",
 60 |     "            masked_pos.extend([0] * n_pad)\n",
 61 |     "\n",
 62 |     "        if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:\n",
 63 |     "            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, 1]) # IsNext\n",
 64 |     "            positive += 1\n",
 65 |     "        elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:\n",
 66 |     "            batch.append([input_ids, segment_ids, masked_tokens, masked_pos, 0]) # NotNext\n",
 67 |     "            negative += 1\n",
 68 |     "    return batch\n",
 69 |     "# Proprecessing Finished"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "id": "f024fd22-1226-40c1-a99c-cf1eb89471be",
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def get_attn_pad_mask(seq_q, seq_k):\n",
 80 |     "    batch_size, len_q = seq_q.shape\n",
 81 |     "    batch_size, len_k = seq_k.shape\n",
 82 |     "    \n",
 83 |     "    pad_attn_mask = ops.equal(seq_k, 0)\n",
 84 |     "    pad_attn_mask = pad_attn_mask.expand_dims(1) # batch_size x 1 x len_k(=len_q), one is masking\n",
 85 |     "\n",
 86 |     "    return ops.broadcast_to(pad_attn_mask, (batch_size, len_q, len_k)) # batch_size x len_q x len_k"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 4,
 92 |    "id": "55ddeca3-e465-4873-9677-f27a21e335e9",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "class BertEmbedding(nn.Cell):\n",
 97 |     "    def __init__(self):\n",
 98 |     "        super(BertEmbedding, self).__init__()\n",
 99 |     "        self.tok_embed = Embedding(vocab_size, d_model)  # token embedding\n",
100 |     "        self.pos_embed = Embedding(maxlen, d_model)  # position embedding\n",
101 |     "        self.seg_embed = Embedding(n_segments, d_model)  # segment(token type) embedding\n",
102 |     "        self.norm = nn.LayerNorm([d_model,])\n",
103 |     "\n",
104 |     "    def construct(self, x, seg):\n",
105 |     "        seq_len = x.shape[1]\n",
106 |     "        pos = ops.arange(seq_len, dtype=mindspore.int64)\n",
107 |     "        pos = pos.expand_dims(0).expand_as(x)  # (seq_len,) -> (batch_size, seq_len)\n",
108 |     "        embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)\n",
109 |     "        return self.norm(embedding)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 5,
115 |    "id": "f824473a-a320-42f1-827d-cb340b92a7c0",
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "class ScaledDotProductAttention(nn.Cell):\n",
120 |     "    def __init__(self):\n",
121 |     "        super(ScaledDotProductAttention, self).__init__()\n",
122 |     "        self.softmax = nn.Softmax(axis=-1)\n",
123 |     "\n",
124 |     "    def construct(self, Q, K, V, attn_mask):\n",
125 |     "        scores = ops.matmul(Q, K.swapaxes(-1, -2)) / ops.sqrt(ops.scalar_to_tensor(d_k)) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
126 |     "        scores = scores.masked_fill(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n",
127 |     "        attn = self.softmax(scores)\n",
128 |     "        context = ops.matmul(attn, V)\n",
129 |     "        return context, attn"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 6,
135 |    "id": "ef49a217-d48b-4a89-babd-ee2722745316",
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "class MultiHeadAttention(nn.Cell):\n",
140 |     "    def __init__(self):\n",
141 |     "        super(MultiHeadAttention, self).__init__()\n",
142 |     "        self.W_Q = Dense(d_model, d_k * n_heads)\n",
143 |     "        self.W_K = Dense(d_model, d_k * n_heads)\n",
144 |     "        self.W_V = Dense(d_model, d_v * n_heads)\n",
145 |     "        self.attn = ScaledDotProductAttention()\n",
146 |     "        self.out_fc = Dense(n_heads * d_v, d_model)\n",
147 |     "        self.norm = nn.LayerNorm([d_model,])\n",
148 |     "\n",
149 |     "    def construct(self, Q, K, V, attn_mask):\n",
150 |     "        # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n",
151 |     "        residual, batch_size = Q, Q.shape[0]\n",
152 |     "        # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n",
153 |     "        q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).swapaxes(1,2)  # q_s: [batch_size x n_heads x len_q x d_k]\n",
154 |     "        k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).swapaxes(1,2)  # k_s: [batch_size x n_heads x len_k x d_k]\n",
155 |     "        v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).swapaxes(1,2)  # v_s: [batch_size x n_heads x len_k x d_v]\n",
156 |     "\n",
157 |     "        attn_mask = attn_mask.expand_dims(1)\n",
158 |     "        attn_mask = ops.tile(attn_mask, (1, n_heads, 1, 1))\n",
159 |     "        \n",
160 |     "        # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
161 |     "        context, attn = self.attn(q_s, k_s, v_s, attn_mask)\n",
162 |     "        context = context.swapaxes(1, 2).view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n",
163 |     "        output = self.out_fc(context)\n",
164 |     "        return self.norm(output + residual), attn # output: [batch_size x len_q x d_model]"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 7,
170 |    "id": "f3ac0741-2515-413c-8aec-67a6ab654f44",
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "class PoswiseFeedForwardNet(nn.Cell):\n",
175 |     "    def __init__(self):\n",
176 |     "        super(PoswiseFeedForwardNet, self).__init__()\n",
177 |     "        self.fc1 = Dense(d_model, d_ff)\n",
178 |     "        self.fc2 = Dense(d_ff, d_model)\n",
179 |     "        self.activation = nn.GELU(False)\n",
180 |     "\n",
181 |     "    def construct(self, x):\n",
182 |     "        # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)\n",
183 |     "        return self.fc2(self.activation(self.fc1(x)))"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 8,
189 |    "id": "cac523ae-53a0-4678-a205-6f51d2e4f4a6",
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "class EncoderLayer(nn.Cell):\n",
194 |     "    def __init__(self):\n",
195 |     "        super(EncoderLayer, self).__init__()\n",
196 |     "        self.enc_self_attn = MultiHeadAttention()\n",
197 |     "        self.pos_ffn = PoswiseFeedForwardNet()\n",
198 |     "\n",
199 |     "    def construct(self, enc_inputs, enc_self_attn_mask):\n",
200 |     "        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n",
201 |     "        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n",
202 |     "        return enc_outputs, attn"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 9,
208 |    "id": "2891fc39-ccf0-4f8c-875a-821ad85ec029",
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "class BERT(nn.Cell):\n",
213 |     "    def __init__(self):\n",
214 |     "        super(BERT, self).__init__()\n",
215 |     "        self.embedding = BertEmbedding()\n",
216 |     "        self.layers = nn.CellList([EncoderLayer() for _ in range(n_layers)])\n",
217 |     "        self.fc = Dense(d_model, d_model)\n",
218 |     "        self.activ1 = nn.Tanh()\n",
219 |     "        self.linear = Dense(d_model, d_model)\n",
220 |     "        self.activ2 = nn.GELU(False)\n",
221 |     "        self.norm = nn.LayerNorm([d_model,])\n",
222 |     "        self.classifier = Dense(d_model, 2)\n",
223 |     "        # decoder is shared with embedding layer\n",
224 |     "        embed_weight = self.embedding.tok_embed.embedding_table\n",
225 |     "        n_vocab, n_dim = embed_weight.shape\n",
226 |     "        self.decoder = Dense(n_dim, n_vocab, has_bias=False)\n",
227 |     "        self.decoder.weight = embed_weight\n",
228 |     "        self.decoder_bias = mindspore.Parameter(ops.zeros(n_vocab), 'decoder_bias')\n",
229 |     "\n",
230 |     "    def construct(self, input_ids, segment_ids, masked_pos):\n",
231 |     "        output = self.embedding(input_ids, segment_ids)\n",
232 |     "        enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)\n",
233 |     "        for layer in self.layers:\n",
234 |     "            output, enc_self_attn = layer(output, enc_self_attn_mask)\n",
235 |     "        # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]\n",
236 |     "        # it will be decided by first token(CLS)\n",
237 |     "        h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]\n",
238 |     "        logits_clsf = self.classifier(h_pooled) # [batch_size, 2]\n",
239 |     "\n",
240 |     "        masked_pos = ops.tile(masked_pos[:, :, None], (1, 1, output.shape[-1])) # [batch_size, max_pred, d_model]\n",
241 |     "        # get masked position from final output of transformer.\n",
242 |     "        h_masked = ops.gather_d(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]\n",
243 |     "        h_masked = self.norm(self.activ2(self.linear(h_masked)))\n",
244 |     "        logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]\n",
245 |     "\n",
246 |     "        return logits_lm, logits_clsf"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 10,
252 |    "id": "9391e0d9-f019-4d3e-9c6c-fb57a3b6a8e6",
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "# BERT Parameters\n",
257 |     "maxlen = 30 # maximum of length\n",
258 |     "batch_size = 6\n",
259 |     "max_pred = 5  # max tokens of prediction\n",
260 |     "n_layers = 6 # number of Encoder of Encoder Layer\n",
261 |     "n_heads = 12 # number of heads in Multi-Head Attention\n",
262 |     "d_model = 768 # Embedding Size\n",
263 |     "d_ff = 768 * 4  # 4*d_model, FeedForward dimension\n",
264 |     "d_k = d_v = 64  # dimension of K(=Q), V\n",
265 |     "n_segments = 2"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 11,
271 |    "id": "24608b11-440c-4fb6-b070-45ff3d82c014",
272 |    "metadata": {},
273 |    "outputs": [],
274 |    "source": [
275 |     "text = (\n",
276 |     "    'Hello, how are you? I am Romeo.\\n'\n",
277 |     "    'Hello, Romeo My name is Juliet. Nice to meet you.\\n'\n",
278 |     "    'Nice meet you too. How are you today?\\n'\n",
279 |     "    'Great. My baseball team won the competition.\\n'\n",
280 |     "    'Oh Congratulations, Juliet\\n'\n",
281 |     "    'Thanks you Romeo'\n",
282 |     ")\n",
283 |     "sentences = re.sub(\"[.,!?\\\\-]\", '', text.lower()).split('\\n')  # filter '.', ',', '?', '!'\n",
284 |     "word_list = list(set(\" \".join(sentences).split()))\n",
285 |     "word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}\n",
286 |     "for i, w in enumerate(word_list):\n",
287 |     "    word_dict[w] = i + 4\n",
288 |     "number_dict = {i: w for i, w in enumerate(word_dict)}\n",
289 |     "vocab_size = len(word_dict)\n",
290 |     "\n",
291 |     "token_list = list()\n",
292 |     "for sentence in sentences:\n",
293 |     "    arr = [word_dict[s] for s in sentence.split()]\n",
294 |     "    token_list.append(arr)"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 12,
300 |    "id": "fe4e30ab-9e7d-4868-893f-b160cf090959",
301 |    "metadata": {},
302 |    "outputs": [],
303 |    "source": [
304 |     "model = BERT()\n",
305 |     "criterion = nn.CrossEntropyLoss()\n",
306 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": 13,
312 |    "id": "eaef3603-8154-495b-9e00-5916c65c9f3c",
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "def forward(input_ids, segment_ids, masked_pos, masked_tokens, isNext):\n",
317 |     "    logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n",
318 |     "    loss_lm = criterion(logits_lm.swapaxes(1, 2), masked_tokens.astype(mindspore.int32))\n",
319 |     "    loss_lm = loss_lm.mean()\n",
320 |     "    loss_clsf = criterion(logits_clsf, isNext.astype(mindspore.int32))\n",
321 |     "\n",
322 |     "    return loss_lm + loss_clsf"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 14,
328 |    "id": "0c1c152d-d4f0-4a66-b3e2-5cbf76d15d48",
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 15,
338 |    "id": "e2cd05a5-b034-46cd-980e-dfb15e7b6155",
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": [
342 |     "@mindspore.jit\n",
343 |     "def train_step(input_ids, segment_ids, masked_pos, masked_tokens, isNext):\n",
344 |     "    loss, grads = grad_fn(input_ids, segment_ids, masked_pos, masked_tokens, isNext)\n",
345 |     "    optimizer(grads)\n",
346 |     "    return loss"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 16,
352 |    "id": "81bf550b-8239-440d-9fda-c556dee4552c",
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stderr",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "[ERROR] CORE(1267049,7f74549fd4c0,python):2024-04-16-15:56:16.580.126 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_1267049/3083615623.py]\n",
360 |       "[ERROR] CORE(1267049,7f74549fd4c0,python):2024-04-16-15:56:16.580.172 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_1267049/3083615623.py]\n"
361 |      ]
362 |     },
363 |     {
364 |      "name": "stdout",
365 |      "output_type": "stream",
366 |      "text": [
367 |       "Epoch: 0010 cost = 46.552399\n",
368 |       "Epoch: 0020 cost = 19.055964\n",
369 |       "Epoch: 0030 cost = 15.114850\n",
370 |       "Epoch: 0040 cost = 9.543916\n",
371 |       "Epoch: 0050 cost = 6.100155\n",
372 |       "Epoch: 0060 cost = 2.962293\n",
373 |       "Epoch: 0070 cost = 3.004694\n",
374 |       "Epoch: 0080 cost = 2.631464\n",
375 |       "Epoch: 0090 cost = 2.321460\n",
376 |       "Epoch: 0100 cost = 2.230808\n"
377 |      ]
378 |     }
379 |    ],
380 |    "source": [
381 |     "batch = make_batch()\n",
382 |     "input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(mindspore.Tensor, zip(*batch))\n",
383 |     "\n",
384 |     "model.set_train()\n",
385 |     "for epoch in range(100):\n",
386 |     "    loss = train_step(input_ids, segment_ids, masked_pos, masked_tokens, isNext) # for sentence classification\n",
387 |     "    if (epoch + 1) % 10 == 0:\n",
388 |     "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "id": "f7da833d-9efb-475f-9aa1-93be15e3ea73",
395 |    "metadata": {},
396 |    "outputs": [],
397 |    "source": [
398 |     "# Predict mask tokens ans isNext\n",
399 |     "input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(mindspore.Tensor, zip(batch[0]))\n",
400 |     "print(text)\n",
401 |     "print([number_dict[int(w.asnumpy())] for w in input_ids[0] if number_dict[int(w.asnumpy())] != '[PAD]'])\n",
402 |     "\n",
403 |     "logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n",
404 |     "logits_lm = logits_lm.argmax(2)[0].asnumpy()\n",
405 |     "print('masked tokens list : ',[pos for pos in masked_tokens[0] if pos != 0])\n",
406 |     "print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0])\n",
407 |     "\n",
408 |     "logits_clsf = logits_clsf.argmax(1).asnumpy()[0]\n",
409 |     "print('isNext : ', True if isNext else False)\n",
410 |     "print('predict isNext : ',True if logits_clsf else False)"
411 |    ]
412 |   }
413 |  ],
414 |  "metadata": {
415 |   "kernelspec": {
416 |    "display_name": "Python 3 (ipykernel)",
417 |    "language": "python",
418 |    "name": "python3"
419 |   },
420 |   "language_info": {
421 |    "codemirror_mode": {
422 |     "name": "ipython",
423 |     "version": 3
424 |    },
425 |    "file_extension": ".py",
426 |    "mimetype": "text/x-python",
427 |    "name": "python",
428 |    "nbconvert_exporter": "python",
429 |    "pygments_lexer": "ipython3",
430 |    "version": "3.9.18"
431 |   },
432 |   "vscode": {
433 |    "interpreter": {
434 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
435 |    }
436 |   }
437 |  },
438 |  "nbformat": 4,
439 |  "nbformat_minor": 5
440 | }
441 | 


--------------------------------------------------------------------------------
/4-3.Bi-LSTM(Attention)/Bi-LSTM-Attention.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "444b3f0a-f9a8-48bc-8aa9-eb6970040e2a",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import mindspore\n",
 12 |     "import mindspore.nn as nn\n",
 13 |     "import mindspore.ops as ops\n",
 14 |     "import mindspore.numpy as mnp\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "from mindspore import ms_function"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "id": "4956c4aa-bf84-44c0-b271-58a26a26ba87",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "class BiLSTM_Attention(nn.Cell):\n",
 27 |     "    def __init__(self):\n",
 28 |     "        super(BiLSTM_Attention, self).__init__()\n",
 29 |     "\n",
 30 |     "        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
 31 |     "        self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)\n",
 32 |     "        self.out = nn.Dense(n_hidden * 2, num_classes)\n",
 33 |     "\n",
 34 |     "    # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix\n",
 35 |     "    def attention_net(self, lstm_output, final_state):\n",
 36 |     "        hidden = final_state.view(-1, n_hidden * 2, 1)   # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]\n",
 37 |     "        attn_weights = ops.matmul(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]\n",
 38 |     "        soft_attn_weights = ops.Softmax(1)(attn_weights)\n",
 39 |     "        # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]\n",
 40 |     "        context = ops.matmul(lstm_output.swapaxes(1, 2), soft_attn_weights.expand_dims(2)).squeeze(2)\n",
 41 |     "        return context, soft_attn_weights # context : [batch_size, n_hidden * num_directions(=2)]\n",
 42 |     "\n",
 43 |     "    def construct(self, X):\n",
 44 |     "        input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]\n",
 45 |     "        input = input.transpose(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]\n",
 46 |     "\n",
 47 |     "        # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
 48 |     "        output, (final_hidden_state, final_cell_state) = self.lstm(input)\n",
 49 |     "        output = output.transpose(1, 0, 2) # output : [batch_size, len_seq, n_hidden]\n",
 50 |     "        attn_output, attention = self.attention_net(output, final_hidden_state)\n",
 51 |     "        return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "id": "ab7e4fe2-0dd5-4473-bbd4-67f2115bd181",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "embedding_dim = 2 # embedding size\n",
 62 |     "n_hidden = 5  # number of hidden units in one cell\n",
 63 |     "num_classes = 2  # 0 or 1\n",
 64 |     "\n",
 65 |     "# 3 words sentences (=sequence_length is 3)\n",
 66 |     "sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
 67 |     "labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n",
 68 |     "\n",
 69 |     "word_list = \" \".join(sentences).split()\n",
 70 |     "word_list = list(set(word_list))\n",
 71 |     "word_dict = {w: i for i, w in enumerate(word_list)}\n",
 72 |     "vocab_size = len(word_dict)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "id": "f0d10505-d45e-481f-a406-e66c42b15775",
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "model = BiLSTM_Attention()\n",
 83 |     "criterion = nn.CrossEntropyLoss()\n",
 84 |     "optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 5,
 90 |    "id": "82c877f4-d0ab-48d5-a724-bff9acc7527a",
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "inputs = mindspore.Tensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n",
 95 |     "targets = mindspore.Tensor([out for out in labels], mindspore.int32)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "id": "dcad76d3",
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "def forward(input, target):\n",
106 |     "    output, attn = model(input)\n",
107 |     "    loss = criterion(output, target)\n",
108 |     "    return loss, attn"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 7,
114 |    "id": "758a3ab5",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "grad_fn = ops.value_and_grad(forward, None, optimizer.parameters, has_aux=True)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 8,
124 |    "id": "f0bffb1c",
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "@ms_function\n",
129 |     "def train_step(input, target):\n",
130 |     "    (loss, _), grads = grad_fn(input, target)\n",
131 |     "    optimizer(grads)\n",
132 |     "    return loss"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 9,
138 |    "id": "79044130-6d93-41cc-b4a4-faf67858be4c",
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "name": "stdout",
143 |      "output_type": "stream",
144 |      "text": [
145 |       "Epoch: 1000 cost = 0.004177\n",
146 |       "Epoch: 2000 cost = 0.000893\n",
147 |       "Epoch: 3000 cost = 0.000332\n",
148 |       "Epoch: 4000 cost = 0.000157\n",
149 |       "Epoch: 5000 cost = 0.000082\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "model.set_train()\n",
155 |     "# Training\n",
156 |     "for epoch in range(5000):\n",
157 |     "    loss = train_step(inputs, targets)\n",
158 |     "    if (epoch + 1) % 1000 == 0:\n",
159 |     "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss.asnumpy()))"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 10,
165 |    "id": "c29d2248-796b-44f5-890e-2ec9267a4de5",
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "sorry hate you is Bad Mean...\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "# Test\n",
178 |     "test_text = 'sorry hate you'\n",
179 |     "tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
180 |     "test_batch = mindspore.Tensor(tests)\n",
181 |     "\n",
182 |     "# Predict\n",
183 |     "predict, attention = model(test_batch)\n",
184 |     "predict = predict.argmax(1)\n",
185 |     "\n",
186 |     "if predict[0] == 0:\n",
187 |     "    print(test_text,\"is Bad Mean...\")\n",
188 |     "else:\n",
189 |     "    print(test_text,\"is Good Mean!!\")"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 11,
195 |    "id": "da2e17c5-2f00-4673-a2f7-b91cfa7e4c39",
196 |    "metadata": {},
197 |    "outputs": [
198 |     {
199 |      "name": "stderr",
200 |      "output_type": "stream",
201 |      "text": [
202 |       "/home/lvyufeng/miniconda3/envs/ms1.8/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: FixedFormatter should only be used together with FixedLocator\n",
203 |       "  after removing the cwd from sys.path.\n",
204 |       "/home/lvyufeng/miniconda3/envs/ms1.8/lib/python3.7/site-packages/ipykernel_launcher.py:5: UserWarning: FixedFormatter should only be used together with FixedLocator\n",
205 |       "  \"\"\"\n"
206 |      ]
207 |     },
208 |     {
209 |      "data": {
210 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAADjCAYAAABTnrngAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAfO0lEQVR4nO3de7hcVZnn8e+PQACNmR4ICARCREQJYmIIAXRAkEGxoWkEcZ6RANqtAVvkKjpkuqFHhIhcQmjQGORmNwPTPIiI3ZrGNAEZQ0hQGhkSrpIQAiQhaBIDIcA7f6x1TJ3KOTn7XKp21dm/z/PUU1V77ctbTyXnrXXZaykiMDMza6Qtyg7AzMwGPycbMzNrOCcbMzNrOCcbMzNrOCcbMzNrOCcbMzNrOCcbMzNrOCcbMzNrOCcbMzNruC3LDsDMNiXpbaDQ9B4RMaTB4Zj1m5ONWWv6LBuTzbuBbwJ3AnPztoOAY4ELmx6ZWR/Ic6OZtTZJPwHujojr6rZ/CTg2Io4qJzKz4pxszFqcpLXAuIh4um77nsB/RMQ7y4nMrDgPEDBrfSuBz3Sx/TPAiibHYtYn7rMxa30XADdKOoyNfTYHAv8V+OvSojLrBTejmbUBSROBM4G986aFwNURMa+8qMyKc7Ixa2GStgL+CZgSEc+UHY9ZX7nPxqyFRcQG4BMUvOfGrFU52Zi1vh8Bx5UdhFl/eICAWetbAvytpIOBBcAfawsj4spSojLrBffZmLU4Sb/bTHFExB5NC8asj5xszMys4dxnY9ZGJA2T5BkDrO042Zi1AUlfkbQE+AOwWtJiSX9TdlxmRXmAgFmLkzQFOB+4HHggbz4Y+Lak4RHx7dKCMyvIfTZmLS7XaL4REbfWbT8RuCQidi8nMrPiXLMxa307AvO72P4Qaa0bayF59GDRhe8qM5LQycas9T0JfI60gFqtzwFPND8c68E1Na+HAeeQfhjULnw3EbiiyXGVys1oZi1O0nHAPwNzgP+bN38U+BhwQkT8uJzIrCeSbgKejIhL6rafD+wTEZNKCawETjZmbUDSfsDZdJ71+YqI+E15UVlPJK0Gxnez8N2vI2J4OZE1n5vRzNpARDwMVOZX8CDyR+BQ4Om67YcC65odTJmcbAYpSfdSvJPy4w0Ox/pB0kzgXmBORLxYdjzWK9OAayVNAB7M2w4ETgH+vqygyuBkM3g9VvN6CHAi8BLQsdjWRGBn0lop1treAVwKjJT0DKnvZg4p+SwrMS7rQUR8R9JzpIXvPps3LwROiYh/Li2wErjPpgIkTSMlnDOj5guXdBXp38CZZcVmxeV2/o+RmmAOAXYFno6I95cZl3UtL3x3MXBtRCwuO56yOdlUgKRXgIMi4sm67XsBD0bEduVEZr0haQtgf+DjwGGkpPNCRLynzLise5LWAh+MiOfKjqVsnhutGgTs28X2rrZZi5H0dUn/CvweuBXYC7gFeJ8TTcubRfpxUHnus6mGG4AfSHofnTspvw7cWFpUVtS3gRXARcBNEbGi5HisuNnAJZI+BDzMpgvf/aiUqErgZrQKyM0vXyN1Uu6cN78ITCfdq/FWWbFZzyQdTmoyOxSYQBpGO4c0Qu2+iHilrNhs8yS9vZniiIghTQumZE42g5ykLYHJwI8jYpmk4QARsbrcyKwvJG0LfIQ0uvBEYIuI2KrcqMx65mRTAZL+CIzxiJj2JWlHNg4KOIzUb/MSqWbz30sMzawQDxCohgeB/coOwvpG0kJSs+c04M/y894RsYsTTeuTdJSk+yWtlLRC0n2S/rzsuJrNAwSq4Trgckmj6LqT8telRGVFXUW6gdMzPLcZSV8EvksaPXhz3nwwcKekL0fEDaUF12RuRqsAd1JWQ570cVxEPFt2LJZIegqYHhHX1G3/KvDViNirnMiazzWbavC9GNWgsgOwTYwCft7F9p+RlvmuDCebCvDAALPSLAGOYNNZnz8BVOr/pZNNReSbyr4GjCHNBv04cFlEPLbZA82sPy4H/kHSeOBXedtHgZOAr5YWVQmcbCpA0jHAj4BfkqrvAP8F+I2k4yLi7tKCMxvEIuL7kpYD5wLH5c0Lgc9GxF3lRdZ8HiBQAZIeBe6MiAvrtn8T+MuIGFtOZDaQPEDAWpnvs6mGvYB/7GL7PwKenn7w8ACBFiNpiqSD8kweleZkUw3L6fqmzv2Al5scizXOp4AXyg7COvkUaQ67VyX9W04+H6li8qncB66o64Dv58W3ajspvwZcVlpU1i1JhW/2i4i/ys8PNC4i64uIODjPZ/dR0sJ3nwL+DnhT0q8i4pOlBthE7rOpAEkCziJ1Uu6SNy8jJZqrw/8IWo6k+kEbhwBvA7/N7z9Iapm4PyKOaWZs1jeS3k1a2+Yo0hLRb0bEO8qNqnmcbCpG0rsAImJN2bFYMZLOBz4MfCEi/pi3vRO4HvhtRFxcZnzWPUmfZePkqaOAecB9pCUiHoyI9aUF12RONhUg6XPAvRHxYtmxWO9JehE4PCIer9u+DzA7InYqJzLrSZ4qagXpfptrI2JdySGVxgMEquHbwFJJT0qaKelzknbp8ShrFcPY2PxZa2egMs0wbWoy8G+kGziXSbpb0rmSxufm7cpwzaYi8uCAQ0mdlB8DRgLPkGo8p5YYmvVA0k3A4cB5dF7W+1LS9/f5ciKz3pD0XtL/wSOATwNrI2L7UoNqIiebipE0BJgIfAmYBAzxrM+tLY9mugL4K6BjVc43SX02X6ty00w7yMuy709KNB8njUwbCjwcEQeVGFpTOdlUgKSJbOyk/Ciwko2dlHM8UWd7yIMC3pvfPtMxWMBal6SfkZbx3pa0ltSc/Higat+fk00F1HVS/p+IWFJySGaVIGkqFU0u9ZxsKkDSt0j9NPuTpjq/l421mldKDM0KkLQNcCap32ZH6gb2RMSHyojLBo6k3wJ/HhHPlx1LozjZVEhu+/8IGwcKTASe8EScrS3PJvBp4HbSzbid/tNGxP8qIy4bOJLWAGMH8ySqnq6mWoYDI0i/jncidVKOKDUiK+JY4ISI+EXZgZj1le+zqQBJ35P0OOlX8TRS0rkC2DsiRpYanBWxDhi0zStWDa7ZVMOfAdNJfTRPlByL9d53gHMkneZ57Kxduc/G/kTSvwBf9LQ2rSVPynkw8AfSct4bass9EWf7c5+NVc0hpPsBrLWsBO4sOwiz/nCyMWtxEfGFsmOwhjuVQb6QoZONWZuQtAcwhjT0eeFgbnJpZ5IuKLpvRHwzP//vxkXUGtxnY39ShXbjdiRpOGketONJC6gBCLgD+GuvTdRa8g2atXYnzc69LL/fhTTC8Lkq3ZDroc9mrW868CHS3Hbb5sfhedtV5YVlXYmIfTsewJWkOdH2iIhRETEK2AOYT8W+O9ds7E9cs2lNkl4Bjo2IX9ZtPwS4s0rT1LcbSb8jfXf/Ubd9HHBXROxeSmAlcM2mAiQdImmT/jlJW+Y/WB0uAVY1LzIraFugqznsVgHbNDkW65130/UIz22o2OwdrtlUgKS3gJ0jYnnd9u2B5V7PprVJugdYDZzUsXZNXm7gh8DwiDiizPise5LuIjWbfYnUdBakOQm/D/wuIo4tL7rm8mi0ahB1kzdm2wOVnva8TZwNzAJekPRo3rYvqZP5k6VFZUV8EbgZ+BXwVt62Ben7/FJZQZXBNZtBTNJP8sujgF8A62uKhwAfJA2hPbLZsVnvSHoHcCLwgbxpIXBLRLxWXlS2OXmFzg8AS4Cdgb1z0aKIeLK0wErims3g1tHOL+BVoPYP0xvAA8B1zQ7Kei83n/m7ai8BPAKMiYingKfKDadcTjaDWMed55KeAy6v+kqB7UrSxcDzETGjbvtpwMiI+LtyIrPNiYiQ9ASwA2nRwkrzaLRquIiaWo2knSR9UdJHSozJijsJ+E0X2x8GTm5yLNY7XwculzROksoOpkzus6kAST8Dfh4R0yUNAxYB7wSGke5A/2GpAdpmSXqd1BTzbN32PYDHI8LDn1tUvndtG9IP+zfp3G9KRAwvI64yuBmtGiaQfmEBHEcaRvseUofz10hDaK11LSEtMVB/s+0hwNLmh2O9cHrZAbQKJ5tqGAb8Pr/+BOmu8w2S/h24trSorKjvA9MkDQX+PW87HJgKXFpaVNajiLi57BhahZNNNSwBPpoX4fokcELevh3pXg1rYRFxhaQRwNXA0Lz5DWB6RHynvMisK5K2i4hVHa83t2/HflXgPpsKkHQqcA2wFlgMjI+ItyWdQZq36eOlBmiF5FkDxuS3CyNibZnxWNdqZ+yQ9DZd31At0oC1ysze4ZpNBUTE9yUtAEYB90RExzT1zwAeNts+tiV1ND8SEet72tlK83E2zjF4WJmBtBLXbAY5SVuRbt48OSKeKDse6z1J7wJuIK1nE8D7IuJZSTOAlyLi78uMz6wI32czyEXEBtLIM/+qaF+XkhbcGk/nWSB+Cny6lIisVyTtku+1GV/7KDuuZnIzWjXcTJr077yyA7E+OQb4dEQ8Iqn2R8NC0ozC1qIkfRj4J9IcafU3dQZpjsJKcLKphncCJ0o6gnTXeadpayLijFKisqL+M12vZ/MuNs4kbK1pJvA86cfeMircwuBkUw17A7/Or+t/CVf2H38bmU+q3VyV33d8Z6eSpq631jUG+HAVZ3mu52RTARHhETHtbQowS9I+pP+z5+TXB5BmFrDW9VtgJ6Dyycaj0czagKQPkvrc9iMN7HkY+E5E/LbUwGwTdTdyjiMtt/63pMSzoXZf39RpbS8vnDYpIlbnmQO6/aIj4pjmRWa9JWkM8FbH0HVJnyDN9vz/SAnH/TYtpIsbOTsGBtRv802dNih8kI3/uFeWGYj12w2k/ponJO0G3AncB3wFGA6cX15o1oXaZuvRpAEC9T8ItiDdZF0ZrtkMUvnX1U55yoxngf0joqsRTdbiJP0emBgRT0o6GzgmIg6TdBhwY0SMLjVA61bt1DV127cHllepZuObOgevVaSbOSH9uvJ33b6GkCbehDTb87/m188A7y4lIitKdN2EPQx4vcmxlMrNaIPXHcB9kl4k/WNfkH9lbSIifGNga3sM+LKkn5KSTUez2UjcRNqSJF2dXwYwVVLt7OpDgInAI82Oq0xONoPXacBPgPcBVwI3AmtKjcj66hvAj0kL3d1cMwLtGOChsoKyzdo3P4t0n9sbNWVvkO57u7zZQZXJfTYVIOlG4IyIcLJpU5KGAMMj4tWabaOBdfX9AdY68v+9MyNiddmxlM3JxszMGs6dxmZm1nBONmZm1nBONhUkaXLZMVjf+ftrb1X9/pxsqqmS/9gHEX9/7a2S35+TjZmZNZxHo3VjxHZDYvRuW5UdRkOseOUtdth+cM+S8eTT2/W8U5va8OY6ttryHWWH0TjrBveN9RtYz1ZsXXYYDfE6f+SNWF+/Iingmzq7NXq3rXho1m5lh2F9dOQxk8oOwfooFjxWdgjWR/NidrdlbkYzM7OGc7IxM7OGc7IxM7OGc7IxM7OG6zHZSJoj6ZpmBFNzzdGSQtKEZl7XzMwao+E1G0mH5sQxotHXqrvudEkLJL0u6blmXtvMzDobzM1oWwA3Az8sOxAzs6ormmy2zDWFV/PjMklbAEiaJGm+pDWSlku6XdLIXDYauDefY0Wu4dyUyyTpXElPSVovaamkqXXX3V3SPZLWSXpc0hFFP1hEfDUi/gF4sugxZmbWGEWTzYl534OAU0lz+5yVy4YCFwJjgaOBEcCtuex54Pj8eh9gZ+DM/P4S4O+AqbnshLx/rYuBq/O55wO3SRpWMGYzM2sRRWcQeJG00mMAiyTtBZwDXBkRN9Ts96ykLwMLJe0aEUslrcplyyNiJUBOGGcDZ9Uc/zQwt+660yLi7nzMFOBkYBzwQK8+ZUF5NtbJAKNGenIFM7OBUrRm82B0nkRtLjBS0nBJ4yXdJWmxpDXAgrzPqM2cbwywNdD93AbJozWvl+XnHQvG3GsRMTMiJkTEhME+d5iZWTP1d4CAgFnAOuAkYH/gyFw2tJ/nBtjQ8aIm2Q3mQQ1mZoNS0T/cB0iqncnzQFJNY09SH82UiLg/Ihaxac3jjfxcW1VYCKwHDu99yGZm1m6KJptdgKskvV/SZ4DzgGnAElLSOF3SHpKOAi6qO3YxEMBRknaQNCwi1gDTgamSviDpvZIm5v6eASFpT0njcuxDJY3Lj4GocZmZWS8U7QW/hVQzmUdKHNeTOu/fknQKaWTZV0h9LOcAP+84MCJekHQhaWTZD0j3vXweOB94lTQibVfgZQb2npgfAB+ref+b/Pwe4LkBvI6ZmfXAi6d1Y8LYbcLr2bQvr2fTvryeTfuaF7NZHau6XDzNne1mZtZwbZlsJM2QtLabx4yy4zMzs87a9c7FC4DLuylb3cxAzMysZ22ZbCJiObC87DjMzKyYtmxGMzOz9uJkY2ZmDedkY2ZmDedkY2ZmDedkY2ZmDedkY2ZmDedkY2ZmDddjspE0R9I1zQim5pqjJYWkCc28rpmZNUbDazaSDs2JY0Sjr1VzzbGSbpX0vKTXJD0h6euSXJMzMytBW84gUMB+wArS6qFLgInAdaTPe0mJcZmZVVLRX/pbSpou6dX8uKyjliBpkqT5ktZIWi7pdkkjc9lo4N58jhW5hnNTLpOkcyU9JWm9pKWSptZdd3dJ90haJ+lxSUcUCTYiboiIMyJiTkQ8GxG3Ad8Dji/4ec3MbAAVTTYn5n0PAk4FJgNn5bKhwIXAWOBo0jLRt+ay59n4B34fYGfgzPz+EtLCaVNz2Ql5/1oXA1fnc88HbpM0rGDM9YaTFmvrlqTJkhZIWrDilbf6eBkzM6tXtBntReCMSCutLZK0F2lFzisj4oaa/Z7NSzsvlLRrRCyVtCqXLY+IlQA5YZwNnFVz/NPA3LrrTouIu/MxU4CTgXHAA735kJLGk1YHPXFz+0XETGAmpMXTenMNMzPrXtGazYPReUnPucBIScMljZd0l6TFktYAC/I+ozZzvjHA1sDsHq77aM3rZfl5x4IxAyDp/cC/AFdFxB29OdbMzAZGf0dnCZgFrCN1xu8PHJnLhvbz3AAbOl7UJLvCMUv6ADAHuC0i/scAxGNmZn1Q9A/3AZJq15U+kFTT2JPURzMlIu6PiEVsWvN4Iz8Pqdm2EFgPHN77kIuRNIaUaG6PiLMbdR0zM+tZ0WSzC3CVpPdL+gxwHjCNNKx4PXC6pD0kHQVcVHfsYiCAoyTtIGlYRKwBpgNTJX1B0nslTcz9Pf0maR/SKLg5wCWSdup4DMT5zcysd4omm1tINZN5pPtVrid13q8ATgGOBR4njUo7p/bAiHghb78YeBnomI3gfOBS0oi0hcAdwK59/yidnECqYf030uCG2oeZmTWZOvf7W4cJY7eJh2btVnYY1kdHHjOp7BCsj2LBY2WHYH00L2azOlapqzJP32JmZg3XlslG0gxJa7t5zCg7PjMz66xd50a7ALi8m7LVzQzEzMx61pbJJiKWA8vLjsPMzIppy2Y0MzNrL042ZmbWcE42ZmbWcE42ZmbWcE42ZmbWcE42ZmbWcD0mG0lzJF3T034DSdLovIT0hGZe18zMGqPhNRtJh+bEMaLR16q55g6SZklaJmm9pOclXSvpPzUrBjMz22iwNqO9DdwJ/AWwF2lJ6MNJM1abmVmTFU02W0qaLunV/LhM0hYAkiZJmi9pjaTlkm6XNDKXjSatKwOwItdwbsplknSupKdy7WOppKl1191d0j2S1kl6XNIRRYKNiFciYkZEPBwRiyNiNvBd4OCCn9fMzAZQ0WRzYt73IOBUYDJwVi4bSlqvZixwNGnlzltz2fPA8fn1PsDOwJn5/SWktWym5rIT8v61LgauzueeD9wmaVjBmP9E0i7AccB9vT3WzMz6r+jcaC8CZ0Ra/GaRpL1Ii6RdGRE31Oz3bF5tc6GkXSNiqaRVuWx5RKwEyAnjbOCsmuOfBubWXXdaRNydj5kCnAyMAx4oErSkW4G/BLYFfgp8oYf9J5MSKaNGtuW0cWZmLalozebB6LzK2lxgpKThksZLukvSYklrgAV5n1GbOd8YYGtgdg/XfbTm9bL8vGPBmCEltPGkhLMHcNXmdo6ImRExISIm7LD9kF5cxszMNqe/P98FzAJ+AZxEmol5BPBLUvNaf23oeBERIQl6MaghIl4CXiLVxlYBv5T0rYiob64zM7MGKvqH+wDlv/TZgaSaxp6k5DIlIu6PiEVsWvN4Iz/XVhUWAutJI8SapeOzbt3Ea5qZGcVrNrsAV0n6LrAvcB7wLWAJKWmcLulaYG/gorpjFwMBHCXpbuC1iFgjaTowVdJ64H5ge2C/iPhefz+UpKPz+R4G1pIGIFxGag58ur/nNzOz3imabG4h1UzmkRLH9aTO+7cknUIaWfYVUh/LOcDPOw6MiBckXUgaWfYD4Iek+17OB14ljUjbFXg5lw2E14HTSMlva9IotzuBbw/Q+c3MrBfUud/fOkwYu008NGu3ssOwPjrymEllh2B9FAseKzsE66N5MZvVsUpdlQ3WGQTMzKyFtGWykTRD0tpuHjPKjs/MzDpr1zsXLwAu76ZsdTMDMTOznrVlsomI5aR7eszMrA20ZTOamZm1FycbMzNrOCcbMzNrOCcbMzNrOCcbMzNrOCcbMzNruB6TjaQ5kq5pRjA11xydl5Ce0MzrmplZYzS8ZiPp0Jw4RjT6Wt1cf4SkF8qMwcys6qrQjHYj8EjZQZiZVVnRZLOlpOmSXs2PyyRtASBpkqT5ktZIWi7pdkkjc9lo4N58jhW5dnFTLpOkcyU9JWm9pKWSptZdd3dJ90haJ+lxSUf05sNJOhN4B3BFb44zM7OBVTTZnJj3PQg4FZgMnJXLhgIXAmOBo0krd96ay54Hjs+v9wF2Bs7M7y8hrWUzNZedkPevdTFwdT73fOA2ScOKBCzpw8A3gJOBtwt9SjMza4iic6O9CJwRafGbRZL2Ii2SdmVE3FCz37OSvgwslLRrRCyVtCqXLY+IlQA5YZwNnFVz/NPA3LrrTouIu/MxU0iJYxzwwOaClfRO4Dbgq3nxtvcV+ZCSJpMSKaNGtuW0cWZmLalozebB6LzK2lxgpKThksZLukvSYklrgAV5n1GbOd8Y0gqas3u47qM1r5fl5x0LxHs18EBE3FFg3z+JiJkRMSEiJuyw/ZDeHGpmZpvR3wECAmYB64CTgP2BI3PZ0H6eG2BDx4uaZFck5sOBz0t6U9KbbExqL0m6eADiMjOzXijaVnSAJNX8wT+QVNPYk9RHMyUifgcg6bi6Y9/Iz7VVhYXAelJSeKovgffgE3ROdvsDNwCHNuh6Zma2GUWTzS7AVZK+C+wLnAd8C1hCShqnS7oW2Bu4qO7YxUAAR0m6G3gtItZImg5MlbQeuB/YHtgvIr7X3w8VEU/Wvq+5v2ZRR7+RmZk1T9FmtFtINZN5wHXA9aTO+xXAKcCxwOOkUWnn1B4YES/k7RcDLwMdsxGcD1xKGpG2ELgD2LXvH8XMzFqVOvf7W4cJY7eJh2btVnYY1kdHHjOp7BCsj2LBY2WHYH00L2azOlapq7IqzCBgZmYla8tkI2mGpLXdPGaUHZ+ZmXXWrncuXgBc3k3Z6mYGYmZmPWvLZBMRy4HlZcdhZmbFtGUzmpmZtRcnGzMzazgnGzMzazgnGzMzazgnGzMzazgnGzMzazgnGzMza7gek42kOZKu6Wm/gSRptKSQNKGZ1zUzs8ZoeM1G0qE5cYzoee8BvW508TitmTGYmVnSljMI9MKXgJ/WvP9DWYGYmVVZ0ZrNlpKmS3o1Py6TtAWApEmS5ktaI2m5pNsljcxlo4F78zlW5NrFTblMks6V9JSk9ZKWSppad93dJd0jaZ2kxyUd0cvP9/uIeKnm8VovjzczswFQNNmcmPc9CDgVmAyclcuGkhZHGwscTVom+tZc9jxwfH69D7AzcGZ+fwlp4bSpueyEvH+ti4Gr87nnA7dJGlYwZoDpklbmZHhaR4LsjqTJkhZIWrDilbd6cRkzM9ucos1oLwJnRFppbZGkvUgrcl4ZETfU7PespC8DCyXtGhFLJa3KZcs7lmTOCeNs4Kya458G5tZdd1pE3J2PmQKcDIwDHigQ8wWkWtVa4HDgClIi/FZ3B0TETGAmpMXTClzDzMwKKJpsHozOS3rOBS6SNBzYk1SzGQdsB3Ss0jYKWNrN+cYAWwOze7juozWvl+XnHYsEHBEX1bx9RNIQ4H+ymWRjZmaN0d/RaAJmAeuAk4D9gSNz2dB+nhtgQ8eLmmTX15jnAcMlvbvfUZmZWa8U/cN9gKTadaUPJNU09iQ1TU2JiPsjYhGb1jzeyM9DarYtBNaTmreaZRzwOvD7Jl7TzMwo3oy2C3CVpO8C+wLnkZqjlpCSxumSrgX2Bi6qO3YxEMBRku4GXouINZKmA1MlrQfuB7YH9ouI7/X3Q0n6C2AnUnPfa8BhwDeBmRGxvr/nNzOz3imabG4h1UzmkRLH9aTO+7cknUIaWfYVUh/LOcDPOw6MiBckXUgaWfYD4IfA54HzgVdJI9J2BV7OZQNhA/A3wJWk2tuzpAED1w7Q+c3MrBfUud/fOkwYu008NGu3ssOwPjrymEllh2B9FAseKzsE66N5MZvVsUpdlXkiTjMza7i2TDaSZkha281jRtnxmZlZZ+06N9oFwOXdlK1uZiBmZtYz99l0Q9IK0ki6wWgEsLLsIKzP/P21t8H8/e0eETt0VeBkU0GSFkSE1wpqU/7+2ltVv7+27LMxM7P24mRjZmYN52RTTTPLDsD6xd9fe6vk9+c+GzMzazjXbMzMrOGcbMzMrOGcbMzMrOGcbMzMrOGcbMzMrOH+P9ckWoaiGI1RAAAAAElFTkSuQmCC",
211 |       "text/plain": [
212 |        "<Figure size 432x216 with 1 Axes>"
213 |       ]
214 |      },
215 |      "metadata": {
216 |       "needs_background": "light"
217 |      },
218 |      "output_type": "display_data"
219 |     }
220 |    ],
221 |    "source": [
222 |     "fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\n",
223 |     "ax = fig.add_subplot(1, 1, 1)\n",
224 |     "ax.matshow(attention.asnumpy(), cmap='viridis')\n",
225 |     "ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\n",
226 |     "ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\n",
227 |     "plt.show()"
228 |    ]
229 |   }
230 |  ],
231 |  "metadata": {
232 |   "kernelspec": {
233 |    "display_name": "Python 3.7.13 ('ms1.8')",
234 |    "language": "python",
235 |    "name": "python3"
236 |   },
237 |   "language_info": {
238 |    "codemirror_mode": {
239 |     "name": "ipython",
240 |     "version": 3
241 |    },
242 |    "file_extension": ".py",
243 |    "mimetype": "text/x-python",
244 |    "name": "python",
245 |    "nbconvert_exporter": "python",
246 |    "pygments_lexer": "ipython3",
247 |    "version": "3.7.13"
248 |   },
249 |   "vscode": {
250 |    "interpreter": {
251 |     "hash": "bd0943702584cdb580f8947884f31a9fb49482f77f8c89ed6532de3aa180e7ba"
252 |    }
253 |   }
254 |  },
255 |  "nbformat": 4,
256 |  "nbformat_minor": 5
257 | }
258 | 


--------------------------------------------------------------------------------