├── LICENSE ├── luna_transformer ├── __init__.py ├── feed_forward.py ├── embedding.py ├── mask.py ├── encoder.py ├── model.py └── attention.py ├── setup.py ├── .gitignore └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Soohwan Kim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /luna_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2021 Soohwan Kim 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | from .model import LunaTransformerEncoder 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2021 Soohwan Kim 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | from setuptools import setup, find_packages 24 | 25 | setup( 26 | name='luna-transformer', 27 | packages=find_packages(), 28 | version='latest', 29 | description='Luna: Linear Unified Nested Attention', 30 | author='Soohwan Kim', 31 | author_email='sh951011@gmail.com', 32 | url='https://github.com/sooftware/luna-transformer', 33 | install_requires=[ 34 | 'torch>=1.4.0', 35 | 'numpy', 36 | ], 37 | python_requires='>=3.7', 38 | ) 39 | -------------------------------------------------------------------------------- /luna_transformer/feed_forward.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2021 Soohwan Kim 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | 27 | class PositionwiseFeedForwardNetwork(nn.Module): 28 | """ 29 | Position-wise Feedforward Networks proposed in "Attention Is All You Need". 30 | Fully connected feed-forward network, which is applied to each position separately and identically. 31 | This consists of two linear transformations with a ReLU activation in between. 32 | Another way of describing this is as two convolutions with kernel size 1. 33 | """ 34 | def __init__(self, d_model: int = 512, d_ff: int = 2048, dropout_p: float = 0.3) -> None: 35 | super(PositionwiseFeedForwardNetwork, self).__init__() 36 | self.feed_forward = nn.Sequential( 37 | nn.Linear(d_model, d_ff), 38 | nn.Dropout(dropout_p), 39 | nn.ReLU(), 40 | nn.Linear(d_ff, d_model), 41 | nn.Dropout(dropout_p), 42 | ) 43 | 44 | def forward(self, inputs: torch.Tensor) -> torch.Tensor: 45 | return self.feed_forward(inputs) 46 | -------------------------------------------------------------------------------- /luna_transformer/embedding.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2021 Soohwan Kim 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import math 24 | import torch 25 | import torch.nn as nn 26 | from torch import Tensor 27 | 28 | 29 | class PositionalEncoding(nn.Module): 30 | """ 31 | Positional Encoding proposed in "Attention Is All You Need". 32 | Since transformer contains no recurrence and no convolution, in order for the model to make 33 | use of the order of the sequence, we must add some positional information. 34 | 35 | "Attention Is All You Need" use sine and cosine functions of different frequencies: 36 | PE_(pos, 2i) = sin(pos / power(10000, 2i / d_model)) 37 | PE_(pos, 2i+1) = cos(pos / power(10000, 2i / d_model)) 38 | """ 39 | def __init__(self, d_model: int = 80, max_length: int = 5000) -> None: 40 | super(PositionalEncoding, self).__init__() 41 | pe = torch.zeros(max_length, d_model, requires_grad=False) 42 | position = torch.arange(0, max_length, dtype=torch.float).unsqueeze(1).float() 43 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)) 44 | pe[:, 0::2] = torch.sin(position * div_term) 45 | pe[:, 1::2] = torch.cos(position * div_term) 46 | pe = pe.unsqueeze(0) 47 | self.register_buffer('pe', pe) 48 | 49 | def forward(self, length: int) -> Tensor: 50 | return self.pe[:, :length] 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | *.DS_Store 29 | .DS_Store 30 | .idea/* 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ -------------------------------------------------------------------------------- /luna_transformer/mask.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2021 Soohwan Kim 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import torch 24 | from torch import Tensor 25 | 26 | 27 | def get_attn_pad_mask(inputs, input_lengths, expand_length): 28 | """ mask position is set to 1 """ 29 | 30 | def get_transformer_non_pad_mask(inputs: Tensor, input_lengths: Tensor) -> Tensor: 31 | """ Padding position is set to 0, either use input_lengths or pad_id """ 32 | batch_size = inputs.size(0) 33 | 34 | if len(inputs.size()) == 2: 35 | non_pad_mask = inputs.new_ones(inputs.size()) # B x T 36 | elif len(inputs.size()) == 3: 37 | non_pad_mask = inputs.new_ones(inputs.size()[:-1]) # B x T 38 | else: 39 | raise ValueError(f"Unsupported input shape {inputs.size()}") 40 | 41 | for i in range(batch_size): 42 | non_pad_mask[i, input_lengths[i]:] = 0 43 | 44 | return non_pad_mask 45 | 46 | non_pad_mask = get_transformer_non_pad_mask(inputs, input_lengths) 47 | pad_mask = non_pad_mask.lt(1) 48 | attn_pad_mask = pad_mask.unsqueeze(1).expand(-1, expand_length, -1) 49 | return attn_pad_mask 50 | 51 | 52 | def get_attn_subsequent_mask(seq): 53 | assert seq.dim() == 2 54 | attn_shape = [seq.size(0), seq.size(1), seq.size(1)] 55 | subsequent_mask = torch.triu(torch.ones(attn_shape), diagonal=1) 56 | 57 | if seq.is_cuda: 58 | subsequent_mask = subsequent_mask.cuda() 59 | 60 | return subsequent_mask 61 | -------------------------------------------------------------------------------- /luna_transformer/encoder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2021 Soohwan Kim 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from luna_transformer.attention import LinearUnifiedNestedAttention 27 | from luna_transformer.feed_forward import PositionwiseFeedForwardNetwork 28 | 29 | 30 | class LunaTransformerEncoderLayer(nn.Module): 31 | def __init__( 32 | self, 33 | d_model: int = 512, 34 | num_attention_heads: int = 8, 35 | d_ff: int = 2048, 36 | dropout_p: float = 0.3, 37 | ) -> None: 38 | super(LunaTransformerEncoderLayer, self).__init__() 39 | self.luna_attention = LinearUnifiedNestedAttention(d_model, num_attention_heads) 40 | self.feed_forward = PositionwiseFeedForwardNetwork(d_model, d_ff, dropout_p) 41 | self.packed_context_layer_norm = nn.LayerNorm(d_model) 42 | self.unpacked_context_layer_norm = nn.LayerNorm(d_model) 43 | self.unpacked_context_layer_norm = nn.LayerNorm(d_model) 44 | self.feed_forward_layer_norm = nn.LayerNorm(d_model) 45 | 46 | def forward( 47 | self, 48 | inputs: torch.FloatTensor, 49 | p: torch.FloatTensor, 50 | attention_padding_mask: torch.FloatTensor = None, 51 | ): 52 | unpacked_context, packed_context = self.luna_attention( 53 | query=inputs, 54 | key=inputs, 55 | value=inputs, 56 | p=p, 57 | attention_padding_mask=attention_padding_mask, 58 | ) 59 | 60 | packed_context = self.packed_context_layer_norm(packed_context + p) 61 | unpacked_context = self.unpacked_context_layer_norm(unpacked_context + inputs) 62 | 63 | outputs = self.feed_forward(unpacked_context) 64 | outputs = self.feed_forward_layer_norm(outputs + unpacked_context) 65 | 66 | return outputs, packed_context 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 |
4 |
5 |