├── .nojekyll ├── docs ├── .nojekyll ├── build │ ├── html │ │ ├── objects.inv │ │ ├── _static │ │ │ ├── up.png │ │ │ ├── down.png │ │ │ ├── file.png │ │ │ ├── plus.png │ │ │ ├── comment.png │ │ │ ├── minus.png │ │ │ ├── ajax-loader.gif │ │ │ ├── down-pressed.png │ │ │ ├── up-pressed.png │ │ │ ├── comment-bright.png │ │ │ ├── comment-close.png │ │ │ ├── fonts │ │ │ │ ├── Lato │ │ │ │ │ ├── lato-bold.eot │ │ │ │ │ ├── lato-bold.ttf │ │ │ │ │ ├── lato-bold.woff │ │ │ │ │ ├── lato-bold.woff2 │ │ │ │ │ ├── lato-italic.eot │ │ │ │ │ ├── lato-italic.ttf │ │ │ │ │ ├── lato-italic.woff │ │ │ │ │ ├── lato-italic.woff2 │ │ │ │ │ ├── lato-regular.eot │ │ │ │ │ ├── lato-regular.ttf │ │ │ │ │ ├── lato-regular.woff │ │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ │ ├── lato-regular.woff2 │ │ │ │ │ └── lato-bolditalic.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ └── RobotoSlab │ │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── documentation_options.js │ │ │ ├── css │ │ │ │ └── badge_only.css │ │ │ └── js │ │ │ │ └── theme.js │ │ ├── .buildinfo │ │ └── _sources │ │ │ ├── modules.rst.txt │ │ │ ├── block_zoo.embedding.rst.txt │ │ │ ├── block_zoo.normalizations.rst.txt │ │ │ ├── index.rst.txt │ │ │ ├── block_zoo.transformer.rst.txt │ │ │ ├── block_zoo.encoder_decoder.rst.txt │ │ │ ├── block_zoo.op.rst.txt │ │ │ ├── block_zoo.math.rst.txt │ │ │ ├── block_zoo.attentions.rst.txt │ │ │ └── block_zoo.rst.txt │ └── doctrees │ │ ├── index.doctree │ │ ├── losses.doctree │ │ ├── modules.doctree │ │ ├── block_zoo.doctree │ │ ├── overview.doctree │ │ ├── block_zoo.op.doctree │ │ ├── environment.pickle │ │ ├── block_zoo.math.doctree │ │ ├── block_zoo.embedding.doctree │ │ ├── block_zoo.attentions.doctree │ │ ├── block_zoo.transformer.doctree │ │ ├── block_zoo.normalizations.doctree │ │ └── block_zoo.encoder_decoder.doctree ├── index.html └── source │ ├── index.rst │ └── layers.encoder_decoder.rst ├── .gitignore ├── core ├── __init__.py ├── LRScheduler.py ├── ChineseTokenizer.py ├── EnglishTextPreprocessor.py ├── EnglishTokenizer.py ├── EnglishPOSTagger.py ├── StreamingRecorder.py └── Stopwords.py ├── metrics └── __init__.py ├── utils ├── __init__.py ├── ProcessorsScheduler.py ├── exceptions.py ├── DocInherit.py └── BPEEncoder.py ├── preparation └── __init__.py ├── requirements.txt ├── optimizers └── __init__.py ├── block_zoo ├── normalizations │ ├── __init__.py │ └── LayerNorm.py ├── embedding │ └── __init__.py ├── transformer │ ├── __init__.py │ ├── MLP.py │ └── MultiHeadAttention.py ├── encoder_decoder │ └── __init__.py ├── op │ ├── __init__.py │ ├── Concat2D.py │ └── Concat3D.py ├── math │ ├── __init__.py │ ├── Add2D.py │ ├── Add3D.py │ ├── ElementWisedMultiply2D.py │ ├── Minus2D.py │ ├── ElementWisedMultiply3D.py │ ├── Minus3D.py │ └── MatrixMultiply.py ├── attentions │ ├── __init__.py │ ├── BilinearAttention.py │ ├── Attention.py │ ├── MatchAttention.py │ └── LinearAttention.py ├── Flatten.py ├── __init__.py ├── Dropout.py ├── BiLSTMLast.py ├── BiLSTM.py ├── BiGRU.py ├── HighwayLinear.py ├── BiGRULast.py └── Pooling.py ├── dataset ├── get_glove.sh ├── get_WikiQACorpus.py ├── get_QQP.py ├── get_QNLI.py └── get_SST-2.py ├── losses ├── __init__.py ├── BaseLossConf.py ├── FocalLoss.py └── Loss.py ├── LICENSE ├── model_visualizer ├── server │ ├── main.py │ └── templates │ │ └── model_visualizer.html └── README.md ├── Contributing.md ├── settings.py ├── tools └── calculate_AUC.py ├── model_zoo ├── nlp_tasks │ ├── slot_tagging │ │ └── conf_slot_tagging_encoder_decoder.json │ ├── text_classification │ │ ├── conf_text_classification_cnn.json │ │ └── conf_text_classification_bilstm_attn.json │ ├── sentiment_analysis │ │ ├── conf_sentiment_analysis_bilstm.json │ │ ├── conf_sentiment_analysis_bilstm_cnn.json │ │ └── conf_sentiment_analysis_bilstm_attn.json │ ├── chinese_sentiment_analysis │ │ ├── conf_chinese_sentiment_analysis_bilstm.json │ │ └── conf_chinese_sentiment_analysis_bilstm_emb.json │ ├── question_pairs │ │ └── conf_question_pairs_bigru.json │ ├── question_answer_matching │ │ ├── conf_question_answer_matching_cnn.json │ │ └── conf_question_answer_matching_bilstm.json │ └── question_nli │ │ └── conf_qnli_bilstm.json └── demo │ └── conf.json └── data_encoding.py /.nojekyll: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *~ 3 | *.pyc 4 | *.cache* 5 | dataset/GloVe/ 6 | models/ 7 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. -------------------------------------------------------------------------------- /docs/build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/objects.inv -------------------------------------------------------------------------------- /docs/build/html/_static/up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/up.png -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. -------------------------------------------------------------------------------- /docs/build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/down.png -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/plus.png -------------------------------------------------------------------------------- /preparation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/losses.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/losses.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/modules.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/comment.png -------------------------------------------------------------------------------- /docs/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/overview.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/overview.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.op.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.op.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/html/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/build/html/_static/up-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/up-pressed.png -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.math.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.math.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/comment-close.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.4.1 2 | gensim==3.7.2 3 | tqdm==4.31.1 4 | numpy==1.16.3 5 | scikit-learn==0.20.3 6 | ftfy==5.5.1 7 | jieba==0.39 8 | -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.embedding.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.embedding.doctree -------------------------------------------------------------------------------- /optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from torch.optim import * -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.attentions.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.attentions.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.transformer.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.transformer.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.normalizations.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.normalizations.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/build/doctrees/block_zoo.encoder_decoder.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/doctrees/block_zoo.encoder_decoder.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /block_zoo/normalizations/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .LayerNorm import LayerNorm, LayerNormConf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/NeuronBlocks/master/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /block_zoo/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .CNNCharEmbedding import CNNCharEmbeddingConf, CNNCharEmbedding -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /block_zoo/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .MLP import MLP,MLPConf 4 | from .MultiHeadAttention import MultiHeadAttention,MultiHeadAttentionConf -------------------------------------------------------------------------------- /block_zoo/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .SLUEncoder import SLUEncoder, SLUEncoderConf 4 | from .SLUDecoder import SLUDecoder, SLUDecoderConf 5 | -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 1c1cb8107c2e24351b88cfa0b613e113 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /block_zoo/op/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .Concat2D import Concat2D, Concat2DConf 4 | from .Concat3D import Concat3D, Concat3DConf 5 | from .Combination import Combination, CombinationConf -------------------------------------------------------------------------------- /dataset/get_glove.sh: -------------------------------------------------------------------------------- 1 | preprocess_exec="sed -f tokenizer.sed" 2 | 3 | glovepath='http://nlp.stanford.edu/data/glove.840B.300d.zip' 4 | 5 | ZIPTOOL="unzip" 6 | 7 | # GloVe 8 | echo $glovepath 9 | mkdir GloVe 10 | curl -LO $glovepath 11 | $ZIPTOOL glove.840B.300d.zip -d GloVe/ 12 | rm glove.840B.300d.zip 13 | 14 | -------------------------------------------------------------------------------- /docs/build/html/_sources/modules.rst.txt: -------------------------------------------------------------------------------- 1 | NeuronBlocks 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | LearningMachine 8 | Model 9 | ModelConf 10 | block_zoo 11 | core 12 | data_encoding 13 | losses 14 | metrics 15 | optimizers 16 | predict 17 | preparation 18 | problem 19 | settings 20 | test 21 | train 22 | utils 23 | -------------------------------------------------------------------------------- /docs/build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '1.1.0', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | FILE_SUFFIX: '.html', 7 | HAS_SOURCE: true, 8 | SOURCELINK_SUFFIX: '.txt', 9 | NAVIGATION_WITH_KEYS: false, 10 | }; -------------------------------------------------------------------------------- /losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .FocalLoss import FocalLoss 4 | from .Loss import Loss 5 | from torch.nn import CrossEntropyLoss, L1Loss, MSELoss, NLLLoss, PoissonNLLLoss, NLLLoss2d, KLDivLoss, BCELoss, BCEWithLogitsLoss, MarginRankingLoss, HingeEmbeddingLoss, MultiLabelMarginLoss, SmoothL1Loss, SoftMarginLoss, MultiLabelSoftMarginLoss, CosineEmbeddingLoss, MultiMarginLoss, TripletMarginLoss -------------------------------------------------------------------------------- /block_zoo/math/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .Add2D import Add2D, Add2DConf 4 | from .Add3D import Add3D, Add3DConf 5 | from .ElementWisedMultiply2D import ElementWisedMultiply2D, ElementWisedMultiply2DConf 6 | from .ElementWisedMultiply3D import ElementWisedMultiply3D, ElementWisedMultiply3DConf 7 | from .Minus2D import Minus2D, Minus2DConf 8 | from .Minus3D import Minus3D, Minus3DConf 9 | from .MatrixMultiply import MatrixMultiply, MatrixMultiplyConf -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.embedding.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.embedding package 2 | ============================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.embedding.CNNCharEmbedding module 8 | -------------------------------------------- 9 | 10 | .. automodule:: block_zoo.embedding.CNNCharEmbedding 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: block_zoo.embedding 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /block_zoo/attentions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .FullAttention import FullAttention, FullAttentionConf 4 | from .Seq2SeqAttention import Seq2SeqAttention, Seq2SeqAttentionConf 5 | from .LinearAttention import LinearAttention, LinearAttentionConf 6 | from .BiAttFlow import BiAttFlow, BiAttFlowConf 7 | from .MatchAttention import MatchAttention, MatchAttentionConf 8 | from .Attention import Attention, AttentionConf 9 | from .BilinearAttention import BilinearAttention, BilinearAttentionConf -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.normalizations.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.normalizations package 2 | ================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.normalizations.LayerNorm module 8 | ------------------------------------------ 9 | 10 | .. automodule:: block_zoo.normalizations.LayerNorm 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: block_zoo.normalizations 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /dataset/get_WikiQACorpus.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import urllib.request 6 | import zipfile 7 | 8 | data_path = 'https://download.microsoft.com/download/E/5/F/E5FCFCEE-7005-4814-853D-DAA7C66507E0/WikiQACorpus.zip' 9 | data_file = 'WikiQACorpus.zip' 10 | print("Downloading and extracting %s..." % data_file) 11 | urllib.request.urlretrieve(data_path, data_file) 12 | with zipfile.ZipFile(data_file) as zip_ref: 13 | zip_ref.extractall() 14 | os.remove(data_file) 15 | print("Completed!") 16 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. NeuronBlocks documentation master file, created by 2 | sphinx-quickstart on Sat Mar 10 04:36:25 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to NLP DNN Toolkit's documentation! 7 | ================================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | overview.rst 14 | 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. NeuronBlocks documentation master file, created by 2 | sphinx-quickstart on Sat Mar 10 04:36:25 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to NLP DNN Toolkit's documentation! 7 | ================================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | overview.rst 14 | 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /dataset/get_QQP.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import urllib.request 6 | import zipfile 7 | 8 | data_path = 'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQQP.zip?alt=media&token=700c6acf-160d-4d89-81d1-de4191d02cb5' 9 | data_file = 'QQP.zip' 10 | print("Downloading and extracting %s..." % data_file) 11 | urllib.request.urlretrieve(data_path, data_file) 12 | with zipfile.ZipFile(data_file) as zip_ref: 13 | zip_ref.extractall() 14 | os.remove(data_file) 15 | print("Completed!") 16 | -------------------------------------------------------------------------------- /dataset/get_QNLI.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import urllib.request 6 | import zipfile 7 | 8 | data_path = 'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQNLIv2.zip?alt=media&token=6fdcf570-0fc5-4631-8456-9505272d1601' 9 | data_file = 'QNLI.zip' 10 | print("Downloading and extracting %s..." % data_file) 11 | urllib.request.urlretrieve(data_path, data_file) 12 | with zipfile.ZipFile(data_file) as zip_ref: 13 | zip_ref.extractall() 14 | os.remove(data_file) 15 | print("Completed!") 16 | -------------------------------------------------------------------------------- /dataset/get_SST-2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import urllib.request 6 | import zipfile 7 | 8 | data_path = 'https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media&token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8' 9 | data_file = 'SST-2.zip' 10 | print("Downloading and extracting %s..." % data_file) 11 | urllib.request.urlretrieve(data_path, data_file) 12 | with zipfile.ZipFile(data_file) as zip_ref: 13 | zip_ref.extractall() 14 | os.remove(data_file) 15 | print("Completed!") 16 | -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.transformer.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.transformer package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.transformer.MLP module 8 | --------------------------------- 9 | 10 | .. automodule:: block_zoo.transformer.MLP 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | block\_zoo.transformer.MultiHeadAttention module 16 | ------------------------------------------------ 17 | 18 | .. automodule:: block_zoo.transformer.MultiHeadAttention 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: block_zoo.transformer 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/layers.encoder_decoder.rst: -------------------------------------------------------------------------------- 1 | layers\.encoder\_decoder package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | layers\.encoder\_decoder\.SLUDecoder module 8 | ------------------------------------------- 9 | 10 | .. automodule:: layers.encoder_decoder.SLUDecoder 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | layers\.encoder\_decoder\.SLUEncoder module 16 | ------------------------------------------- 17 | 18 | .. automodule:: layers.encoder_decoder.SLUEncoder 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: layers.encoder_decoder 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.encoder_decoder.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.encoder\_decoder package 2 | =================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.encoder\_decoder.SLUDecoder module 8 | --------------------------------------------- 9 | 10 | .. automodule:: block_zoo.encoder_decoder.SLUDecoder 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | block\_zoo.encoder\_decoder.SLUEncoder module 16 | --------------------------------------------- 17 | 18 | .. automodule:: block_zoo.encoder_decoder.SLUEncoder 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: block_zoo.encoder_decoder 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.op.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.op package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.op.Combination module 8 | -------------------------------- 9 | 10 | .. automodule:: block_zoo.op.Combination 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | block\_zoo.op.Concat2D module 16 | ----------------------------- 17 | 18 | .. automodule:: block_zoo.op.Concat2D 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | block\_zoo.op.Concat3D module 24 | ----------------------------- 25 | 26 | .. automodule:: block_zoo.op.Concat3D 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: block_zoo.op 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /model_visualizer/server/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | # -*- coding:utf-8 -*- 5 | import web 6 | import json 7 | from web import form 8 | from mv import json2graph 9 | 10 | render = web.template.render('templates/') 11 | urls = ( 12 | '/', 'index', 13 | '/mv', 'model_visualizer' 14 | ) 15 | 16 | mv_form = form.Form( 17 | form.Textarea("json", description="config_json"), 18 | form.Textarea("output", description="output"), 19 | form.Button("submit", type="submit", description="Submit"), 20 | ) 21 | 22 | 23 | class index: 24 | def GET(self): 25 | raise web.seeother('/mv') 26 | 27 | 28 | class model_visualizer: 29 | def GET(self): 30 | f = mv_form() 31 | status = False 32 | return render.model_visualizer(f, status) 33 | 34 | 35 | def POST(self): 36 | f = mv_form() 37 | post_value = web.input(json=None) 38 | f['json'].value = post_value.json 39 | json2graph(post_value.json) 40 | status = True 41 | return render.model_visualizer(f, status) 42 | 43 | if __name__ == "__main__": 44 | app = web.application(urls, globals()) 45 | app.run() 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /utils/ProcessorsScheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import multiprocessing 5 | from multiprocessing import cpu_count 6 | import math 7 | 8 | class ProcessorsScheduler(object): 9 | process_num = cpu_count() 10 | 11 | def __init__(self, cpu_num_workers=None): 12 | if cpu_num_workers != None and cpu_num_workers > 0: 13 | self.process_num = cpu_num_workers 14 | 15 | def run_data_parallel(self, func, func_args): 16 | data, rest_args = func_args[0], func_args[1:] 17 | res = [] 18 | # logging.info("multiprocess enabled, process num: %d" % (self.process_num)) 19 | process_p = multiprocessing.Pool(self.process_num) 20 | data_length = len(data) 21 | size = math.ceil(data_length/ self.process_num) 22 | 23 | for i in range(self.process_num): 24 | start = size * i 25 | end = (i + 1) * size if (i + 1) * size < data_length else data_length 26 | args = (data[start:end], ) + rest_args 27 | res.append((i, process_p.apply_async(func, args=args))) 28 | process_p.close() 29 | process_p.join() 30 | res = sorted(res, key=lambda x:x[0]) 31 | return res 32 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing 3 | 4 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 5 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 6 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 9 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 10 | provided by the bot. You will only need to do this once across all repos using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 14 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 15 | 16 | 17 | # How to Contribute 18 | - Contribute Model to **Model Zoo** 19 | - We encourage everyone to contribute their NLP models (namely JSON configuration files). Please follow the structure in model_zoo to create a pull request. 20 | - Contribute Block to **Block Zoo** 21 | - We encourage everyone to improve this toolkit by contributing code, such as customized Blocks. So other users can further benefit from these new Blocks. 22 | -------------------------------------------------------------------------------- /utils/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import sys 5 | 6 | 7 | class BaseError(RuntimeError): 8 | """ Error base class 9 | 10 | """ 11 | def __init__(self, arg, err_id=None): 12 | self.arg = arg 13 | self.err_id = err_id 14 | 15 | def __str__(self): 16 | if self.err_id is None: 17 | return self.arg 18 | else: 19 | return "error=%d, %s" % (self.err_id, self.arg) 20 | 21 | 22 | class LayerConfigUndefinedError(BaseError): 23 | """ Errors occur when the corresponding configuration class of a layer is not defined 24 | 25 | """ 26 | pass 27 | 28 | 29 | class LayerUndefinedError(BaseError): 30 | """ Errors occur when some undefined layers are used 31 | 32 | """ 33 | pass 34 | 35 | 36 | class LayerDefineError(BaseError): 37 | """ (For developers) Errors occurs when there are some problems with the defined layers 38 | 39 | """ 40 | pass 41 | 42 | 43 | class ConfigurationError(BaseError): 44 | """ Errors occur when model configuration 45 | 46 | """ 47 | pass 48 | 49 | 50 | class InputError(BaseError): 51 | """ Error occur when the input to model is wrong 52 | 53 | """ 54 | pass 55 | 56 | 57 | class PreprocessError(BaseError): 58 | """ Error occur when the input to model is wrong 59 | 60 | """ 61 | pass 62 | 63 | -------------------------------------------------------------------------------- /model_visualizer/server/templates/model_visualizer.html: -------------------------------------------------------------------------------- 1 | $def with(form, status) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |

Model Visualizer

15 | 16 |
17 | 18 |
19 |
20 | 21 | 22 |
23 |
24 | 25 |
26 | $if status == True: 27 |
28 |
29 |
30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /core/LRScheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import numpy as np 5 | 6 | class LRScheduler(): 7 | def __init__(self, optimizer, decay_rate=1, minimum_lr=0, epoch_start_decay=1): 8 | """ 9 | 10 | Args: 11 | optimizer: 12 | decay_rate: 13 | minimum_lr: if lr < minimum_lr, stop lr decay 14 | """ 15 | self.optimizer = optimizer 16 | self.decay_rate = decay_rate 17 | self.minimum_lr = minimum_lr 18 | self.epoch_cnt = 0 19 | self.epoch_start_decay = epoch_start_decay 20 | 21 | def step(self): 22 | """ adjust learning rate 23 | 24 | Args: 25 | optimizer: 26 | decay_rate: 27 | minimum_lr: 28 | 29 | Returns: 30 | None 31 | 32 | """ 33 | self.epoch_cnt += 1 34 | 35 | if self.epoch_cnt >= self.epoch_start_decay: 36 | for param_group in self.optimizer.param_groups: 37 | if param_group['lr'] * self.decay_rate >= self.minimum_lr: 38 | param_group['lr'] = param_group['lr'] * self.decay_rate 39 | else: 40 | param_group['lr'] = self.minimum_lr 41 | 42 | 43 | def get_lr(self): 44 | """ get average learning rate of optimizer.param_groups 45 | 46 | Args: 47 | optimizer: 48 | 49 | Returns: 50 | 51 | """ 52 | lr_total = [] 53 | for param_group in self.optimizer.param_groups: 54 | lr_total.append(param_group['lr']) 55 | return np.mean(lr_total) 56 | -------------------------------------------------------------------------------- /core/ChineseTokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import jieba 5 | import logging 6 | jieba.setLogLevel(logging.INFO) 7 | from nltk.tokenize.util import align_tokens 8 | from .Stopwords import Stopwords 9 | 10 | class ChineseTokenizer(object): 11 | def __init__(self, tokenizer='jieba', remove_stopwords=False): 12 | self.__tokenizer = tokenizer 13 | self.__remove_stopwords = remove_stopwords 14 | if self.__remove_stopwords: 15 | self.__stop_words = Stopwords.chinese_stopwords 16 | else: 17 | self.__stop_words = None 18 | 19 | def tokenize(self, string): 20 | if self.__tokenizer == 'jieba': 21 | tokens = list(jieba.cut(string)) 22 | 23 | if self.__remove_stopwords: 24 | tokens = [word for word in tokens if word not in self.__stop_words] 25 | return tokens 26 | 27 | def span_tokenize(self, string): 28 | if self.__tokenizer == 'jieba': 29 | tokens = self.tokenize(string) 30 | spans = align_tokens(tokens, string) 31 | return spans 32 | 33 | 34 | if __name__ == '__main__': 35 | import os 36 | # nltk.data.path.append(r"C:\Users\wutlin\OneDrive - Microsoft\workspace\DNNMatchingToolkit\dataset\nltk_data") 37 | tokenizer = ChineseTokenizer(tokenizer='jieba', remove_stopwords=True) 38 | print(tokenizer.tokenize("我爱北京天安门,天安门上太阳升。")) 39 | print(tokenizer.span_tokenize("我爱北京天安门,天安门上太阳升。")) 40 | print(tokenizer.tokenize("给每一条河每一座山取一个温暖的名字;陌生人,我也为你祝福;愿你有一个灿烂的前程;愿你有情人终成眷属;愿你在尘世获得幸福;我只愿面朝大海,春暖花开。")) 41 | -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.math.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.math package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.math.Add2D module 8 | ---------------------------- 9 | 10 | .. automodule:: block_zoo.math.Add2D 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | block\_zoo.math.Add3D module 16 | ---------------------------- 17 | 18 | .. automodule:: block_zoo.math.Add3D 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | block\_zoo.math.ElementWisedMultiply2D module 24 | --------------------------------------------- 25 | 26 | .. automodule:: block_zoo.math.ElementWisedMultiply2D 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | block\_zoo.math.ElementWisedMultiply3D module 32 | --------------------------------------------- 33 | 34 | .. automodule:: block_zoo.math.ElementWisedMultiply3D 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | block\_zoo.math.MatrixMultiply module 40 | ------------------------------------- 41 | 42 | .. automodule:: block_zoo.math.MatrixMultiply 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | block\_zoo.math.Minus2D module 48 | ------------------------------ 49 | 50 | .. automodule:: block_zoo.math.Minus2D 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | block\_zoo.math.Minus3D module 56 | ------------------------------ 57 | 58 | .. automodule:: block_zoo.math.Minus3D 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: block_zoo.math 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /core/EnglishTextPreprocessor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import ftfy 5 | 6 | 7 | class EnglishTextPreprocessor(object): 8 | def __init__(self, DBC2SBC=True, unicode_fix=True): 9 | self.__DBC2SBC = DBC2SBC 10 | self.__unicode_fix = unicode_fix 11 | 12 | def preprocess(self, string): 13 | if self.__unicode_fix: 14 | string = ftfy.fix_text(string) 15 | if self.__DBC2SBC: 16 | string = self.DBC2SBC(string) 17 | return string 18 | 19 | def DBC2SBC(self, ustring): 20 | """ DBC characters to SBC 21 | 22 | Args: 23 | ustring: 24 | 25 | Returns: 26 | 27 | """ 28 | rstring = "" 29 | for uchar in ustring: 30 | inside_code = ord(uchar) 31 | if inside_code == 0x3000: 32 | inside_code = 0x0020 33 | else: 34 | inside_code -= 0xfee0 35 | if not (0x0021 <= inside_code and inside_code <= 0x7e): 36 | rstring += uchar 37 | continue 38 | rstring += chr(inside_code) 39 | return rstring 40 | 41 | def SBC2DBC(ustring): 42 | """ SBC to DBC 43 | 44 | Returns: 45 | 46 | """ 47 | rstring = "" 48 | for uchar in ustring: 49 | inside_code = ord(uchar) 50 | if inside_code == 0x0020: 51 | inside_code = 0x3000 52 | else: 53 | if not (0x0021 <= inside_code and inside_code <= 0x7e): 54 | rstring += uchar 55 | continue 56 | inside_code += 0xfee0 57 | rstring += chr(inside_code) 58 | return rstring -------------------------------------------------------------------------------- /core/EnglishTokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import nltk 5 | import re 6 | from nltk.tokenize.util import align_tokens 7 | from .Stopwords import Stopwords 8 | 9 | class EnglishTokenizer(object): 10 | def __init__(self, tokenizer='nltk', remove_stopwords=False): 11 | self.__tokenizer = tokenizer 12 | self.__remove_stopwords = remove_stopwords 13 | if self.__remove_stopwords: 14 | self.__stop_words = Stopwords.english_stopwords 15 | else: 16 | self.__stop_words = None 17 | 18 | def tokenize(self, string): 19 | if self.__tokenizer == 'nltk': 20 | tokens = nltk.word_tokenize(string) 21 | 22 | if self.__remove_stopwords: 23 | tokens = [word for word in tokens if word not in self.__stop_words] 24 | return tokens 25 | 26 | def span_tokenize(self, string): 27 | if self.__tokenizer == 'nltk': 28 | raw_tokens = nltk.word_tokenize(string) 29 | if ('"' in string) or ("''" in string): 30 | matched = [m.group() for m in re.finditer(r"``|'{2}|\"", string)] 31 | tokens = [matched.pop(0) if tok in ['"', "``", "''"] else tok for tok in raw_tokens] 32 | else: 33 | tokens = raw_tokens 34 | spans = align_tokens(tokens, string) 35 | return spans 36 | 37 | 38 | if __name__ == '__main__': 39 | import os 40 | # nltk.data.path.append(r"C:\Users\wutlin\OneDrive - Microsoft\workspace\DNNMatchingToolkit\dataset\nltk_data") 41 | tokenizer = EnglishTokenizer(tokenizer='nltk', remove_stopwords=True) 42 | print(tokenizer.span_tokenize("""What singer did Beyonce record a song with for the movie, ''The Best Man"?""")) -------------------------------------------------------------------------------- /losses/BaseLossConf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | from torch.nn import CrossEntropyLoss 6 | import copy 7 | import logging 8 | 9 | class BaseLossConf(object): 10 | @staticmethod 11 | def get_conf(**kwargs): 12 | # judge loss num & setting 13 | kwargs['multiLoss'] = True if len(kwargs['losses']) > 1 else False 14 | # loss = copy.deepcopy(kwargs['losses']) 15 | if kwargs['multiLoss']: 16 | if kwargs.get('multi_loss_op', '') is None: 17 | kwargs['multi_loss_op'] = 'weighted_sum' 18 | logging.info('model has multi-loss but no multi_loss_op, we set default option {0}.'.format('weighted_sum')) 19 | if kwargs.get('weights', None) is None: 20 | kwargs['weights'] = [1] * len(kwargs['losses']) 21 | logging.warning("MultiLoss have no weights, set the weights to 1.") 22 | assert len(kwargs['weights']) == len(kwargs['losses']), "The number of loss is inconsistent with loss weights!" 23 | 24 | 25 | # IF NEEDED, TRANSFORM SOME INT OR FLOAT, OR NUMPY ARRAY TO TENSORS. 26 | for single_loss in kwargs['losses']: 27 | if 'inputs' not in single_loss: 28 | raise Exception("Each loss must have inputs") 29 | if not isinstance(single_loss['inputs'], list): 30 | raise Exception('The inputs of loss must be list') 31 | if len(single_loss['inputs']) != 2: 32 | raise Exception('The length of loss inputs must be 2') 33 | if 'weight' in single_loss['conf']: 34 | single_loss['conf']['weight'] = torch.FloatTensor(single_loss['conf']['weight']) 35 | 36 | return kwargs 37 | 38 | 39 | -------------------------------------------------------------------------------- /utils/DocInherit.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | from functools import wraps 5 | 6 | class DocInherit(object): 7 | """ 8 | Docstring inheriting method descriptor 9 | The class itself is also used as a decorator doc_inherit decorator 10 | 11 | """ 12 | 13 | def __init__(self, mthd): 14 | self.mthd = mthd 15 | self.name = mthd.__name__ 16 | 17 | def __get__(self, obj, cls): 18 | if obj: 19 | return self.get_with_inst(obj, cls) 20 | else: 21 | return self.get_no_inst(cls) 22 | 23 | def get_with_inst(self, obj, cls): 24 | #overridden = getattr(super(cls, obj), self.name, None) 25 | for parent in cls.__mro__[1:]: 26 | overridden = getattr(parent, self.name, None) 27 | if overridden: 28 | break 29 | 30 | @wraps(self.mthd, assigned=('__name__', '__module__', '__doc__')) 31 | def f(*args, **kwargs): 32 | return self.mthd(obj, *args, **kwargs) 33 | 34 | return self.use_parent_doc(f, overridden) 35 | 36 | def get_no_inst(self, cls): 37 | for parent in cls.__mro__[1:]: 38 | overridden = getattr(parent, self.name, None) 39 | if overridden: 40 | break 41 | 42 | @wraps(self.mthd, assigned=('__name__', '__module__', '__doc__')) 43 | def f(*args, **kwargs): 44 | return self.mthd(*args, **kwargs) 45 | 46 | return self.use_parent_doc(f, overridden) 47 | 48 | def use_parent_doc(self, func, source): 49 | if source is None: 50 | raise NameError("Can't find '%s' in parents" % self.name) 51 | if func.__doc__ is None: 52 | func.__doc__ = source.__doc__ 53 | return func 54 | 55 | -------------------------------------------------------------------------------- /model_visualizer/README.md: -------------------------------------------------------------------------------- 1 | # ***NeuronBlocks*** Model Visualizer 2 | 3 | In ***NeuronBlocks***, a model visualizer is provided for visualization and configuration correctness checking, 4 | which helps users to visualize the model architecture easily during debugging. 5 | 6 | ## Installation 7 | Two libraries are needed for ***NeuronBlocks*** Model Visualizer: graphviz, web.py. 8 | 9 | You can install them via pip: 10 | 11 | ```bash 12 | pip install graphviz 13 | pip install web.py==0.40.dev0 14 | ``` 15 | 16 | *Note: For Windows, please visit [Graphviz Windows Packages](https://graphviz.gitlab.io/_pages/Download/Download_windows.html) to 17 | download graphviz, then install it and set the PATH variable.* 18 | 19 | ## Usage 20 | 21 | Model Visualizer has 2 modes: 22 | - **Command Line Mode**: View model architecture via command line. 23 | - **Browser Mode**: Firstly build a model visualizer server, then view model architecture via a browser. 24 | 25 | ### Command Line Mode 26 | 27 | View model architecture via command line mode, by running: 28 | ```bash 29 | python get_model_graph.py --conf_path ../model_zoo/demo/conf.json --graph_path ./graph 30 | ``` 31 | Arguments: 32 | ```bash 33 | --conf_path: [necessary] Path of the input JSON config file. 34 | --graph_path: [optional, default: './graph'] Path of the ouput model graph file. 35 | ``` 36 | You will get two file: *graph.gv* and *graph.gv.svg*. 37 | Open *graph.gv.svg*, then view the model architecture. 38 | 39 | ### Browser Mode 40 | 41 | Firstly, start Model Visualizer server: 42 | 43 | ```bash 44 | cd server/ 45 | python main.py 8080 46 | ``` 47 | Then, you can access a model visualizer in your browser. 48 | For Linux, visit http://:8080 . 49 | For Windows, visit http://localhost:8080 . 50 | 51 | Finally, input the JSON config in *Config Json* field, click *Submit* button, 52 | and get the model architecture. 53 | -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.attentions.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo.attentions package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | block\_zoo.attentions.Attention module 8 | -------------------------------------- 9 | 10 | .. automodule:: block_zoo.attentions.Attention 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | block\_zoo.attentions.BiAttFlow module 16 | -------------------------------------- 17 | 18 | .. automodule:: block_zoo.attentions.BiAttFlow 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | block\_zoo.attentions.BilinearAttention module 24 | ---------------------------------------------- 25 | 26 | .. automodule:: block_zoo.attentions.BilinearAttention 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | block\_zoo.attentions.FullAttention module 32 | ------------------------------------------ 33 | 34 | .. automodule:: block_zoo.attentions.FullAttention 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | block\_zoo.attentions.LinearAttention module 40 | -------------------------------------------- 41 | 42 | .. automodule:: block_zoo.attentions.LinearAttention 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | block\_zoo.attentions.MatchAttention module 48 | ------------------------------------------- 49 | 50 | .. automodule:: block_zoo.attentions.MatchAttention 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | block\_zoo.attentions.Seq2SeqAttention module 56 | --------------------------------------------- 57 | 58 | .. automodule:: block_zoo.attentions.Seq2SeqAttention 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: block_zoo.attentions 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /block_zoo/normalizations/LayerNorm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from block_zoo.BaseLayer import BaseLayer,BaseConf 8 | from utils.DocInherit import DocInherit 9 | import copy 10 | 11 | class LayerNormConf(BaseConf): 12 | """ Configuration of LayerNorm Layer 13 | 14 | """ 15 | def __init__(self,**kwargs): 16 | super(LayerNormConf, self).__init__(**kwargs) 17 | 18 | # @DocInherit 19 | # def default(self): 20 | 21 | @DocInherit 22 | def declare(self): 23 | self.num_of_inputs = 1 24 | self.input_ranks = [3] 25 | 26 | @DocInherit 27 | def inference(self): 28 | self.output_dim = copy.deepcopy(self.input_dims[0]) 29 | super(LayerNormConf, self).inference() 30 | 31 | @DocInherit 32 | def verify(self): 33 | super(LayerNormConf, self).verify() 34 | 35 | class LayerNorm(nn.Module): 36 | """ LayerNorm layer 37 | 38 | Args: 39 | layer_conf (LayerNormConf): configuration of a layer 40 | 41 | """ 42 | def __init__(self,layer_conf): 43 | super(LayerNorm, self).__init__() 44 | self.layer_conf = layer_conf 45 | self.g = nn.Parameter(torch.ones(self.layer_conf.input_dims[0][-1])) 46 | self.b = nn.Parameter(torch.zeros(self.layer_conf.input_dims[0][-1])) 47 | self.e = 1e-5 48 | 49 | def forward(self, string, string_len): 50 | """ process input 51 | 52 | Args: 53 | string, string_len 54 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 55 | 56 | Returns: 57 | Tensor: [batch_size, seq_len, output_dim], [batch_size] 58 | """ 59 | u = string.mean(-1,keepdim=True) 60 | s = (string - u).pow(2).mean(-1,keepdim=True) 61 | string = (string - u)/torch.sqrt(s+self.e) 62 | return self.g * string + self.b, string_len -------------------------------------------------------------------------------- /block_zoo/Flatten.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch.nn as nn 5 | import logging 6 | 7 | from block_zoo.BaseLayer import BaseConf 8 | from utils.DocInherit import DocInherit 9 | from utils.exceptions import ConfigurationError 10 | import copy 11 | 12 | class FlattenConf(BaseConf): 13 | """Configuration of Flatten layer 14 | 15 | """ 16 | 17 | #init the args 18 | def __init__(self, **kwargs): 19 | super(FlattenConf, self).__init__(**kwargs) 20 | 21 | #set default params 22 | #@DocInherit 23 | #def default(self): 24 | 25 | @DocInherit 26 | def declare(self): 27 | self.num_of_inputs = 1 28 | self.input_ranks = [3] 29 | 30 | @DocInherit 31 | def inference(self): 32 | self.output_dim = [] 33 | if self.input_dims[0][1] == -1: 34 | raise ConfigurationError("For Flatten layer, the sequence length should be fixed") 35 | else: 36 | self.output_dim.append(self.input_dims[0][0]) 37 | self.output_dim.append(self.input_dims[0][1]*self.input_dims[0][-1]) 38 | 39 | super(FlattenConf, self).inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(FlattenConf, self).verify() 44 | 45 | class Flatten(nn.Module): 46 | """ Flatten layer to flatten the input from [bsatch_size, seq_len, dim] to [batch_size, seq_len*dim] 47 | 48 | Args: 49 | layer_conf(FlattenConf): configuration of a layer 50 | """ 51 | 52 | def __init__(self, layer_conf): 53 | super(Flatten, self).__init__() 54 | self.layer_conf = layer_conf 55 | 56 | def forward(self, string, string_len): 57 | """ process input 58 | 59 | Args: 60 | *args: (Tensor): string,string_len 61 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 62 | Returns: 63 | Tensor: [batch_size, seq_len*dim], [batch_size] 64 | """ 65 | return string.view(string.shape[0], -1), string_len 66 | 67 | 68 | -------------------------------------------------------------------------------- /block_zoo/attentions/BilinearAttention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch.nn as nn 5 | import copy 6 | 7 | from block_zoo.BaseLayer import BaseLayer, BaseConf 8 | from utils.DocInherit import DocInherit 9 | 10 | class BilinearAttentionConf(BaseConf): 11 | """Configuration for Bilinear attention layer 12 | 13 | """ 14 | def __init__(self, **kwargs): 15 | super(BilinearAttentionConf, self).__init__(**kwargs) 16 | 17 | @DocInherit 18 | def default(self): 19 | pass 20 | 21 | @DocInherit 22 | def declare(self): 23 | self.num_of_inputs = 2 24 | self.input_ranks = [3, 2] 25 | 26 | @DocInherit 27 | def inference(self): 28 | self.output_dim = copy.deepcopy(self.input_dims[0]) 29 | self.output_dim[-1] = 1 30 | super(BilinearAttentionConf, self).inference() # PUT THIS LINE AT THE END OF inference() 31 | 32 | @DocInherit 33 | def verify(self): 34 | super(BilinearAttentionConf, self).verify() 35 | 36 | 37 | class BilinearAttention(BaseLayer): 38 | """ BilinearAttention layer for DrQA 39 | [paper] https://arxiv.org/abs/1704.00051 40 | Args: 41 | layer_conf (BilinearAttentionConf): configuration of a layer 42 | 43 | """ 44 | def __init__(self, layer_conf): 45 | super(BilinearAttention, self).__init__(layer_conf) 46 | self.linear = nn.Linear(layer_conf.input_dims[1][-1], layer_conf.input_dims[0][-1]) 47 | 48 | def forward(self, x, x_len, y, y_len): 49 | """ process inputs 50 | 51 | Args: 52 | x (Tensor): [batch_size, x_len, x_dim]. 53 | x_len (Tensor): [batch_size], default is None. 54 | y (Tensor): [batch_size, y_dim]. 55 | y_len (Tensor): [batch_size], default is None. 56 | Returns: 57 | output: [batch_size, x_len, 1]. 58 | x_len: 59 | 60 | """ 61 | 62 | Wy = self.linear(y) # [batch_size, x_dim] 63 | xWy = x.bmm(Wy.unsqueeze(2)) # [batch_size, x_len, 1] 64 | 65 | return xWy, x_len 66 | 67 | 68 | -------------------------------------------------------------------------------- /block_zoo/transformer/MLP.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import math 7 | 8 | from block_zoo.BaseLayer import BaseLayer,BaseConf 9 | from utils.DocInherit import DocInherit 10 | import copy 11 | 12 | class MLPConf(BaseConf): 13 | """ Configuration of MLP layer 14 | 15 | Args: 16 | dropout (float): the dropout of MLP layer 17 | 18 | """ 19 | def __init__(self, **kwargs): 20 | super(MLPConf, self).__init__(**kwargs) 21 | 22 | @DocInherit 23 | def default(self): 24 | self.dropout = 0.1 25 | 26 | @DocInherit 27 | def declare(self): 28 | self.num_of_inputs = 1 29 | self.input_ranks = [3] 30 | 31 | @DocInherit 32 | def inference(self): 33 | self.output_dim = copy.deepcopy(self.input_dims[0]) 34 | super(MLPConf, self).inference() 35 | 36 | @DocInherit 37 | def verify(self): 38 | super(MLPConf, self).verify() 39 | 40 | class MLP(nn.Module): 41 | """ MLP layer 42 | 43 | Args: 44 | layer_conf (MLPConf): configuration of a layer 45 | 46 | """ 47 | def __init__(self, layer_conf): 48 | super(MLP, self).__init__() 49 | self.layer_conf = layer_conf 50 | self.n_state = self.layer_conf.input_dims[0][-1] 51 | self.c_fc = nn.Linear(self.layer_conf.input_dims[0][-1], 4*self.n_state) 52 | self.c_proj = nn.Linear(4*self.n_state, self.layer_conf.input_dims[0][-1]) 53 | 54 | def gelu(self,x): 55 | return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 56 | 57 | def forward(self, string, string_len): 58 | """ process input 59 | 60 | Args: 61 | string, string_len 62 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 63 | 64 | Returns: 65 | Tensor: [batch_size, seq_len, output_dim], [batch_size] 66 | 67 | """ 68 | h = self.gelu(self.c_fc(string)) 69 | h2 = self.c_proj(h) 70 | return nn.Dropout(self.layer_conf.dropout)(h2), string_len 71 | -------------------------------------------------------------------------------- /block_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | from .Embedding import Embedding, EmbeddingConf 4 | from .BiLSTM import BiLSTM, BiLSTMConf 5 | from .BiLSTMLast import BiLSTMLast, BiLSTMLastConf 6 | from .BiGRU import BiGRU, BiGRUConf 7 | from .BiGRULast import BiGRULast, BiGRULastConf 8 | from .Linear import Linear, LinearConf 9 | from .BaseLayer import BaseLayer, BaseConf 10 | from .BiLSTMAtt import BiLSTMAtt, BiLSTMAttConf 11 | from .BiQRNN import BiQRNN, BiQRNNConf 12 | from .Conv import Conv, ConvConf 13 | from .Pooling import Pooling, PoolingConf 14 | from .ConvPooling import ConvPooling, ConvPoolingConf 15 | from .Flatten import Flatten, FlattenConf 16 | 17 | from .Dropout import Dropout, DropoutConf 18 | 19 | from .embedding import CNNCharEmbedding, CNNCharEmbeddingConf 20 | 21 | from .attentions import FullAttention, FullAttentionConf 22 | from .attentions import Seq2SeqAttention, Seq2SeqAttentionConf 23 | from .attentions import LinearAttention, LinearAttentionConf # The output rank of this layer can be either unchanged or reduced 24 | from .attentions import BiAttFlow, BiAttFlowConf 25 | from .attentions import MatchAttention, MatchAttentionConf 26 | from .attentions import Attention, AttentionConf 27 | from .attentions import BilinearAttention, BilinearAttentionConf 28 | 29 | # Combination classes 30 | from .op import Concat3D, Concat3DConf 31 | from .op import Concat2D, Concat2DConf 32 | from .op import Combination, CombinationConf 33 | 34 | # Math operations 35 | from .math import Add2D, Add2DConf 36 | from .math import Add3D, Add3DConf 37 | from .math import Minus2D, Minus2DConf 38 | from .math import Minus3D, Minus3DConf 39 | from .math import ElementWisedMultiply2D, ElementWisedMultiply2DConf 40 | from .math import ElementWisedMultiply3D, ElementWisedMultiply3DConf 41 | from .math import MatrixMultiply, MatrixMultiplyConf 42 | 43 | # Transformer layer 44 | from .Transformer import Transformer, TransformerConf 45 | 46 | # Encoder Decoder classes 47 | from .EncoderDecoder import EncoderDecoder, EncoderDecoderConf 48 | 49 | from .normalizations import LayerNorm, LayerNormConf 50 | 51 | from .HighwayLinear import HighwayLinear, HighwayLinearConf -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | # add the project root to python path 5 | import os 6 | import sys 7 | sys.path.append(sys.path[0]) 8 | from enum import Enum 9 | import nltk 10 | 11 | 12 | version = '1.1.0' 13 | 14 | # Supported languages 15 | LanguageTypes = Enum('LanguageTypes', ('english', 'chinese')) 16 | 17 | # Supported problems 18 | ProblemTypes = Enum('ProblemTypes', ('sequence_tagging', 'classification', 'regression', 'mrc')) 19 | 20 | # Supported sequence tagging scheme 21 | TaggingSchemes = Enum('TaggingSchemes', ('BIO')) 22 | 23 | # supported metrics 24 | SupportedMetrics = { 25 | ProblemTypes.sequence_tagging: set(['seq_tag_f1', 'accuracy']), 26 | ProblemTypes.classification: set(['auc', 'accuracy', 'f1', 'macro_f1', 'macro_precision', 'macro_recall', 'micro_f1', 'micro_precision', 'micro_recall', 'weighted_f1', 'weighted_precision', 'weighted_recall']), 27 | # In addition, for auc in multi-type classification, 28 | # if there is a type named 1, auc@1 means use 1 as the positive label 29 | # auc@average means enumerate all the types as the positive label and obtain the average auc. 30 | ProblemTypes.regression: set(['MSE', 'RMSE']), 31 | ProblemTypes.mrc: set(['f1', 'em']), 32 | } 33 | 34 | # Supported prediction types 35 | PredictionTypes = { 36 | ProblemTypes.sequence_tagging: set(['prediction']), 37 | ProblemTypes.classification: set(['prediction', 'confidence']), # In addition, if there is a type named positive, confidence@positive means the confidence of positive 38 | ProblemTypes.regression: set(['prediction']), 39 | ProblemTypes.mrc: set(['prediction']), 40 | } 41 | 42 | # Supported multi_loss operation 43 | LossOperationType = Enum('LossOperationType', ('weighted_sum')) 44 | 45 | # If prediction_field is not defined, use the default fields below 46 | DefaultPredictionFields = { 47 | ProblemTypes.sequence_tagging: ['prediction'], 48 | ProblemTypes.classification: ['prediction', 'confidence'], 49 | ProblemTypes.regression: ['prediction'], 50 | ProblemTypes.mrc: ['prediction'], 51 | } 52 | 53 | # nltk's models 54 | nltk.data.path.append(os.path.join(os.getcwd(), 'dataset', 'nltk_data')) 55 | 56 | -------------------------------------------------------------------------------- /tools/calculate_AUC.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import argparse 5 | from sklearn.metrics import roc_auc_score 6 | 7 | def read_tsv(params): 8 | prediction, label = [], [] 9 | predict_index, label_index = int(params.predict_index), int(params.label_index) 10 | min_column_num = max(predict_index, label_index) + 1 11 | with open(params.input_file, mode='r', encoding='utf-8') as f: 12 | for index, line in enumerate(f): 13 | if params.header and index == 0: 14 | continue 15 | line = line.rstrip() 16 | # skip empty line 17 | if not line: 18 | continue 19 | line = line.split('\t') 20 | if len(line) < min_column_num: 21 | print("at line:%s, %s"%(predict_index, line)) 22 | raise Exception("the given index of predict or label is exceed the index of the column") 23 | prediction.append(float(line[predict_index])) 24 | label.append(int(line[label_index])) 25 | return prediction, label 26 | 27 | def calculate_AUC(prediction, label): 28 | return roc_auc_score(label, prediction) 29 | 30 | def main(params): 31 | prediction, label = read_tsv(params) 32 | auc = calculate_AUC(prediction, label) 33 | print("AUC is ", auc) 34 | return auc 35 | 36 | if __name__ == "__main__": 37 | parser = argparse.ArgumentParser(description="AUC") 38 | parser.add_argument("--input_file", type=str, help="tsv file") 39 | parser.add_argument("--predict_index", type=str, help="the column index of prediction of model, start from 0") 40 | parser.add_argument("--label_index", type=str, help="the column index of label, start from 0") 41 | parser.add_argument("--header", action='store_true', default=False, help="whether contains header row or not, default is False") 42 | 43 | params, _ = parser.parse_known_args() 44 | 45 | assert params.input_file, 'Please specify a input file via --input_file' 46 | assert params.predict_index, 'Please specify the column index of prediction via --predict_index' 47 | assert params.label_index, 'Please specify the column index of label via --label_index' 48 | main(params) -------------------------------------------------------------------------------- /block_zoo/Dropout.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from collections import OrderedDict 8 | import copy 9 | 10 | from block_zoo.BaseLayer import BaseLayer, BaseConf 11 | from utils.DocInherit import DocInherit 12 | 13 | class DropoutConf(BaseConf): 14 | """ Configuration for Dropout 15 | 16 | Args: 17 | dropout (float): dropout rate, probability of an element to be zeroed 18 | 19 | Returns: 20 | 21 | """ 22 | def __int__(self, **kwargs): 23 | 24 | super(DropoutConf, self).__init__(**kwargs) 25 | 26 | @DocInherit 27 | def default(self): 28 | self.dropout = 0.5 29 | 30 | @DocInherit 31 | def declare(self): 32 | self.num_of_inputs = 1 33 | self.input_ranks = [-1] 34 | 35 | @DocInherit 36 | def inference(self): 37 | self.output_dim = copy.deepcopy(self.input_dims[0]) 38 | 39 | super(DropoutConf, self).inference() # PUT THIS LINE AT THE END OF inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(DropoutConf, self).verify() 44 | 45 | necessary_attrs_for_user = ['dropout'] 46 | for attr in necessary_attrs_for_user: 47 | self.add_attr_exist_assertion_for_user(attr) 48 | 49 | range_checks = [('dropout', (0, 1), (True, True))] 50 | for attr, ranges, bound_legal in range_checks: 51 | self.add_attr_range_assertion(attr, ranges, bound_legal) 52 | 53 | 54 | class Dropout(BaseLayer): 55 | """ Dropout 56 | 57 | Args: 58 | layer_conf (DropoutConf): configuration of a layer 59 | """ 60 | def __init__(self, layer_conf): 61 | super(Dropout, self).__init__(layer_conf) 62 | self.dropout_layer = nn.Dropout(layer_conf.dropout) 63 | 64 | def forward(self, string, string_len=None): 65 | """ process inputs 66 | 67 | Args: 68 | string (Tensor): any shape. 69 | string_len (Tensor): [batch_size], default is None. 70 | 71 | Returns: 72 | Tensor: has the same shape as string. 73 | """ 74 | string_out = self.dropout_layer(string) 75 | return string_out, string_len 76 | 77 | -------------------------------------------------------------------------------- /core/EnglishPOSTagger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | ''' 6 | if not ("STANFORD_MODELS" in os.environ and "STANFORD_POSTAGGER_PATH" in os.environ \ 7 | and "CLASSPATH" in os.environ \ 8 | and os.environ['CLASSPATH'].find('stanford-postagger.jar') != -1): 9 | raise Exception("To use Stanford POS tagger, please set the corresponding environment " 10 | "variables first") 11 | from nltk.tag import StanfordPOSTagger 12 | ''' 13 | #from nltk.tag import pos_tag, pos_tag_sents 14 | from nltk.tag.perceptron import PerceptronTagger 15 | import nltk 16 | class EnglishPOSTagger(object): 17 | def __init__(self, model_type='english-bidirectional-distsim.tagger'): 18 | """ 19 | Args: 20 | model: model available in $STANFORD_MODELS: 21 | english-bidirectional-distsim.tagger 22 | english-caseless-left3words-distsim.tagger 23 | english-left3words-distsim.tagger 24 | """ 25 | #self.eng_tagger = StanfordPOSTagger(model_type, java_options='-mx16000m') 26 | self.eng_tagger = PerceptronTagger() 27 | 28 | def postag(self, word_list): 29 | """ 30 | Args: 31 | word_list: word list 32 | Returns: 33 | pos tag list 34 | """ 35 | #word_pos_pairs = self.eng_tagger.tag(word_list) 36 | 37 | #word_pos_pairs = pos_tag(word_list) 38 | word_pos_pairs = nltk.tag._pos_tag(word_list, None, self.eng_tagger) 39 | pos_list = [pos for (word, pos) in word_pos_pairs] 40 | return pos_list 41 | 42 | def postag_multi(self, multi_sentence): 43 | """ tag multiple sentences one time 44 | RECOMMAND! Because the efficiency of stanford pos tagger in NLTK is too slow. 45 | Args: 46 | multi_sentence: [[token1, token2], ..., [...]] 47 | Returns: 48 | """ 49 | #word_pos_pairs_multi_sent = self.eng_tagger.tag_sents(multi_sentence) 50 | ''' 51 | word_pos_pairs_multi_sent = pos_tag_sents(multi_sentence) 52 | pos_lists = [] 53 | for word_pos_pairs in word_pos_pairs_multi_sent: 54 | pos_lists.append([pos for (word, pos) in word_pos_pairs]) 55 | return pos_lists 56 | ''' 57 | return [self.postag(sent) for sent in multi_sentence] 58 | 59 | -------------------------------------------------------------------------------- /losses/FocalLoss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | class FocalLoss(nn.Module): 9 | """ Focal loss 10 | reference: Lin T Y, Goyal P, Girshick R, et al. Focal loss for dense object detection[J]. arXiv preprint arXiv:1708.02002, 2017. 11 | Args: 12 | gamma (float): gamma >= 0. 13 | alpha (float): 0 <= alpha <= 1 14 | size_average (bool, optional): By default, the losses are averaged over observations for each minibatch. However, if the field size_average is set to False, the losses are instead summed for each minibatch. Default is True 15 | 16 | """ 17 | def __init__(self, **kwargs): 18 | super(FocalLoss, self).__init__() 19 | 20 | # default parameters 21 | self.gamma = 0 22 | self.alpha = 0.5 23 | self.size_average = True 24 | 25 | for key in kwargs: 26 | setattr(self, key, kwargs[key]) 27 | 28 | # varification 29 | assert self.alpha <= 1 and self.alpha >= 0, "The parameter alpha in Focal Loss must be in range [0, 1]." 30 | if self.alpha is not None: 31 | self.alpha = torch.Tensor([self.alpha, 1 - self.alpha]) 32 | 33 | def forward(self, input, target): 34 | """ Get focal loss 35 | 36 | Args: 37 | input (Variable): the prediction with shape [batch_size, number of classes] 38 | target (Variable): the answer with shape [batch_size, number of classes] 39 | 40 | Returns: 41 | Variable (float): loss 42 | """ 43 | if input.dim()>2: 44 | input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W 45 | input = input.transpose(1,2) # N,C,H*W => N,H*W,C 46 | input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C 47 | target = target.view(-1, 1) 48 | 49 | logpt = F.log_softmax(input) 50 | logpt = logpt.gather(1,target) 51 | logpt = logpt.view(-1) 52 | pt = logpt.data.exp() 53 | 54 | if self.alpha is not None: 55 | if self.alpha.type() != input.data.type(): 56 | self.alpha = self.alpha.type_as(input.data) 57 | at = self.alpha.gather(0, target.data.view(-1)) 58 | logpt = logpt * at 59 | 60 | loss = -1 * (1-pt)**self.gamma * logpt 61 | 62 | if self.size_average: 63 | return loss.mean() 64 | else: 65 | return loss.sum() 66 | -------------------------------------------------------------------------------- /block_zoo/attentions/Attention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import numpy as np 6 | import torch.nn as nn 7 | import copy 8 | 9 | from block_zoo.BaseLayer import BaseLayer, BaseConf 10 | from utils.DocInherit import DocInherit 11 | 12 | 13 | class AttentionConf(BaseConf): 14 | """Configuration for Attention layer 15 | 16 | """ 17 | def __init__(self, **kwargs): 18 | super(AttentionConf, self).__init__(**kwargs) 19 | 20 | @DocInherit 21 | def default(self): 22 | pass 23 | 24 | @DocInherit 25 | def declare(self): 26 | self.num_of_inputs = 2 27 | self.input_ranks = [3, 3] 28 | 29 | @DocInherit 30 | def inference(self): 31 | self.output_dim = copy.deepcopy(self.input_dims[0]) 32 | super(AttentionConf, self).inference() # PUT THIS LINE AT THE END OF inference() 33 | 34 | @DocInherit 35 | def verify(self): 36 | super(AttentionConf, self).verify() 37 | 38 | 39 | class Attention(BaseLayer): 40 | """ Attention layer 41 | 42 | Given sequences X and Y, match sequence Y to each element in X. 43 | 44 | Args: 45 | layer_conf (AttentionConf): configuration of a layer 46 | 47 | """ 48 | def __init__(self, layer_conf): 49 | 50 | super(Attention, self).__init__(layer_conf) 51 | assert layer_conf.input_dims[0][-1] == layer_conf.input_dims[1][-1] 52 | self.softmax = nn.Softmax(dim=-1) 53 | 54 | def forward(self, x, x_len, y, y_len): 55 | """ 56 | 57 | Args: 58 | x (Tensor): [batch_size, x_max_len, dim]. 59 | x_len (Tensor): [batch_size], default is None. 60 | y (Tensor): [batch_size, y_max_len, dim]. 61 | y_len(Tensor): [batch_size], default is None. 62 | 63 | Returns: 64 | output: has the same shape as x. 65 | 66 | """ 67 | 68 | scores = x.bmm(y.transpose(2, 1)) # [batch_size, x_max_len, y_max_len] 69 | 70 | batch_size, y_max_len, _ = y.size() 71 | y_length = y_len.cpu().numpy() 72 | y_mask = np.ones((batch_size, y_max_len)) 73 | for i, single_len in enumerate(y_length): 74 | y_mask[i][:single_len] = 0 75 | y_mask = torch.from_numpy(y_mask).byte().to(scores.device) 76 | y_mask = y_mask.unsqueeze(1).expand(scores.size()) 77 | scores.data.masked_fill_(y_mask.data, float('-inf')) 78 | 79 | alpha = self.softmax(scores) # [batch_size, x_max_len, y_len] 80 | output = alpha.bmm(y) # [batch_size, x_max_len, dim] 81 | 82 | return output, x_len 83 | 84 | 85 | -------------------------------------------------------------------------------- /block_zoo/BiLSTMLast.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import numpy as np 9 | from block_zoo.BaseLayer import BaseLayer, BaseConf 10 | from utils.DocInherit import DocInherit 11 | 12 | class BiLSTMLastConf(BaseConf): 13 | """ Configuration of BiLSTMLast 14 | 15 | Args: 16 | hidden_dim (int): dimension of hidden state 17 | dropout (float): dropout rate 18 | num_layers (int): number of BiLSTM layers 19 | """ 20 | def __init__(self, **kwargs): 21 | super(BiLSTMLastConf, self).__init__(**kwargs) 22 | 23 | @DocInherit 24 | def default(self): 25 | self.hidden_dim = 128 26 | self.dropout = 0.0 27 | self.num_layers = 1 28 | 29 | @DocInherit 30 | def declare(self): 31 | self.num_of_inputs = 1 32 | self.input_ranks = [3] 33 | 34 | @DocInherit 35 | def inference(self): 36 | self.output_dim = [-1] 37 | self.output_dim.append(2 * self.hidden_dim) 38 | 39 | super(BiLSTMLastConf, self).inference() # PUT THIS LINE AT THE END OF inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(BiLSTMLastConf, self).verify() 44 | 45 | necessary_attrs_for_user = ['hidden_dim', 'dropout', 'num_layers'] 46 | for attr in necessary_attrs_for_user: 47 | self.add_attr_exist_assertion_for_user(attr) 48 | 49 | 50 | class BiLSTMLast(BaseLayer): 51 | """ get last hidden states of Bidrectional LSTM 52 | 53 | Args: 54 | layer_conf (BiLSTMConf): configuration of a layer 55 | """ 56 | def __init__(self, layer_conf): 57 | super(BiLSTMLast, self).__init__(layer_conf) 58 | self.lstm = nn.LSTM(layer_conf.input_dims[0][-1], layer_conf.hidden_dim, layer_conf.num_layers, bidirectional=True, 59 | dropout=layer_conf.dropout, batch_first=True) 60 | 61 | def forward(self, string, string_len): 62 | """ process inputs 63 | 64 | Args: 65 | string (Tensor): [batch_size, seq_len, dim] 66 | string_len (Tensor): [batch_size] 67 | 68 | Returns: 69 | Tensor: [batch_size, 2 * hidden_dim] 70 | 71 | """ 72 | str_len, idx_sort = (-string_len).sort() 73 | str_len = -str_len 74 | idx_unsort = idx_sort.sort()[1] 75 | 76 | string = string.index_select(0, idx_sort) 77 | 78 | # Handling padding in Recurrent Networks 79 | string_packed = nn.utils.rnn.pack_padded_sequence(string, str_len, batch_first=True) 80 | self.lstm.flatten_parameters() 81 | string_output, (hn, cn) = self.lstm(string_packed) # seqlen x batch x 2*nhid 82 | 83 | emb = torch.cat((hn[0], hn[1]), 1) # batch x 2*nhid 84 | emb = emb.index_select(0, idx_unsort) 85 | 86 | return emb, string_len 87 | -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This model is used for slot tagging task. It achieved a f1-score of 94.66 on the dataset below", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "sequence_tagging", 8 | "tagging_scheme": "BIO", 9 | "data_paths": { 10 | "train_data_path": "./dataset/atis/atis.train.iob", 11 | "test_data_path": "./dataset/atis/atis.test.iob", 12 | "pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt" 13 | }, 14 | "add_start_end_for_seq": true, 15 | "file_header": { 16 | "word": 0, 17 | "tag": 1 18 | }, 19 | "model_inputs": { 20 | "words": ["word"] 21 | }, 22 | "target": ["tag"] 23 | }, 24 | "outputs":{ 25 | "save_base_dir": "./models/slot_tagging_encoder_decoder/", 26 | "model_name": "model_debug.nb", 27 | "train_log_name": "train_debug.log", 28 | "test_log_name": "test_debug.log", 29 | "predict_log_name": "predict_debug.log", 30 | "predict_fields": ["prediction"], 31 | "predict_output_name": "predict_debug.tsv", 32 | "cache_dir": ".cache.atis/" 33 | }, 34 | "training_params": { 35 | "vocabulary": { 36 | "min_word_frequency": 1 37 | }, 38 | "optimizer": { 39 | "name": "Adam", 40 | "params": { 41 | "lr": 0.001 42 | } 43 | }, 44 | "lr_decay": 1, 45 | "minimum_lr": 0.0001, 46 | "epoch_start_lr_decay": 1, 47 | "use_gpu": true, 48 | "batch_size": 32, 49 | "batch_num_to_show_results": 50, 50 | "max_epoch": 20, 51 | "valid_times_per_epoch": 1 52 | }, 53 | "architecture":[ 54 | { 55 | "layer": "Embedding", 56 | "conf": { 57 | "word": { 58 | "cols": ["word"], 59 | "dim": 300 60 | } 61 | } 62 | }, 63 | { 64 | "output_layer_flag": true, 65 | "layer_id": "encoder_decoder_1", 66 | "layer": "EncoderDecoder", 67 | "conf": { 68 | "encoder": "SLUEncoder", 69 | "encoder_conf": { 70 | "hidden_dim": 128, 71 | "dropout": 0, 72 | "num_layers": 1 73 | }, 74 | "decoder": "SLUDecoder", 75 | "decoder_conf": { 76 | "hidden_dim": 256, 77 | "dropout": 0, 78 | "num_layers": 1, 79 | "decoder_emb_dim": 30 80 | } 81 | }, 82 | "inputs": ["words"] 83 | } 84 | ], 85 | "loss": { 86 | "losses": [ 87 | { 88 | "type": "CrossEntropyLoss", 89 | "conf": { 90 | "size_average": true 91 | }, 92 | "inputs": ["encoder_decoder_1","tag"] 93 | } 94 | ] 95 | }, 96 | "metrics": ["seq_tag_f1"] 97 | } -------------------------------------------------------------------------------- /block_zoo/op/Concat2D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import logging 7 | 8 | from ..BaseLayer import BaseConf,BaseLayer 9 | from utils.DocInherit import DocInherit 10 | from utils.exceptions import ConfigurationError 11 | import copy 12 | 13 | class Concat2DConf(BaseConf): 14 | """ Configuration of Concat2D Layer 15 | 16 | Args: 17 | concat2D_axis(int): which axis to conduct concat2D, default is 1. 18 | """ 19 | 20 | # init the args 21 | def __init__(self,**kwargs): 22 | super(Concat2DConf, self).__init__(**kwargs) 23 | 24 | # set default params 25 | @DocInherit 26 | def default(self): 27 | self.concat2D_axis = 1 28 | 29 | @DocInherit 30 | def declare(self): 31 | self.num_of_inputs = -1 32 | self.input_ranks = [2] 33 | 34 | @DocInherit 35 | def inference(self): 36 | self.output_dim = copy.deepcopy(self.input_dims[0]) 37 | self.output_dim[-1] = 0 38 | self.output_dim[-1] += sum([input_dim[-1] for input_dim in self.input_dims]) 39 | 40 | super(Concat2DConf, self).inference() 41 | 42 | @DocInherit 43 | def verify(self): 44 | super(Concat2DConf, self).verify() 45 | 46 | # to check if the ranks of all the inputs are equal 47 | rank_equal_flag = True 48 | for i in range(len(self.input_ranks)): 49 | if self.input_ranks[i] != self.input_ranks[0]: 50 | rank_equal_flag = False 51 | break 52 | if rank_equal_flag == False: 53 | raise ConfigurationError("For layer Concat2D, the ranks of each inputs should be equal!") 54 | 55 | # to check if the concat2D_axis is legal 56 | if self.concat2D_axis != 1: 57 | raise ConfigurationError("For layer Concat2D, the concat axis must be 1!") 58 | 59 | class Concat2D(nn.Module): 60 | """ Concat2D layer to merge sum of sequences(2D representation) 61 | 62 | Args: 63 | layer_conf (Concat2DConf): configuration of a layer 64 | """ 65 | def __init__(self, layer_conf): 66 | super(Concat2D, self).__init__() 67 | self.layer_conf = layer_conf 68 | 69 | logging.warning("The length Concat2D layer returns is the length of first input") 70 | 71 | def forward(self, *args): 72 | """ process inputs 73 | 74 | Args: 75 | *args: (Tensor): string, string_len, string2, string2_len, ... 76 | e.g. string (Tensor): [batch_size, dim], string_len (Tensor): [batch_size] 77 | 78 | Returns: 79 | Tensor: [batch_size, output_dim], [batch_size] 80 | 81 | """ 82 | result = [] 83 | for idx, input in enumerate(args): 84 | if idx % 2 == 0: 85 | result.append(input) 86 | return torch.cat(result,self.layer_conf.concat2D_axis), args[1] -------------------------------------------------------------------------------- /block_zoo/math/Add2D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import logging 7 | 8 | from ..BaseLayer import BaseConf 9 | from utils.DocInherit import DocInherit 10 | from utils.exceptions import ConfigurationError 11 | import copy 12 | 13 | class Add2DConf(BaseConf): 14 | """ Configuration of Add2D layer 15 | 16 | """ 17 | 18 | #init the args 19 | def __init__(self, **kwargs): 20 | super(Add2DConf, self).__init__(**kwargs) 21 | 22 | #set default params 23 | #@DocInherit 24 | #def default(self): 25 | 26 | @DocInherit 27 | def declare(self): 28 | self.num_of_inputs = 2 29 | self.input_ranks = [2,2] 30 | 31 | @DocInherit 32 | def inference(self): 33 | self.output_dim = copy.deepcopy(self.input_dims[0]) 34 | if self.input_dims[0][1] != 1: 35 | self.output_dim[-1] = self.input_dims[0][1] 36 | else: 37 | self.output_dim[-1] = self.input_dims[1][1] 38 | 39 | super(Add2DConf, self).inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(Add2DConf, self).verify() 44 | 45 | # # to check if the ranks of all the inputs are equal 46 | # rank_equal_flag = True 47 | # for i in range(len(self.input_ranks)): 48 | # if self.input_ranks[i] != self.input_ranks[0]: 49 | # rank_equal_flag = False 50 | # break 51 | # if rank_equal_flag == False: 52 | # raise ConfigurationError("For layer Add2D, the ranks of each inputs should be equal!") 53 | 54 | # to check if the dimensions of all the inputs are equal or is 1 55 | dim_flag = True 56 | input_dims = list(self.input_dims) 57 | for i in range(len(input_dims)): 58 | if input_dims[i][1] != input_dims[0][1] and input_dims[i][1] != 1 and input_dims[0][1] != 1: 59 | dim_flag = False 60 | break 61 | if dim_flag == False: 62 | raise ConfigurationError("For layer Add2D, the dimensions of each inputs should be equal or 1") 63 | 64 | class Add2D(nn.Module): 65 | """ Add2D layer to get sum of two sequences(2D representation) 66 | 67 | Args: 68 | layer_conf (Add2DConf): configuration of a layer 69 | 70 | """ 71 | def __init__(self, layer_conf): 72 | super(Add2D, self).__init__() 73 | self.layer_conf = layer_conf 74 | 75 | logging.warning("The length Add2D layer returns is the length of first input") 76 | 77 | def forward(self, *args): 78 | """ process input 79 | 80 | Args: 81 | *args: (Tensor): string, string_len, string2, string2_len 82 | e.g. string (Tensor): [batch_size, dim], string_len (Tensor): [batch_size] 83 | 84 | Returns: 85 | Tensor: [batch_size, output_dim], [batch_size] 86 | """ 87 | return torch.add(args[0],args[2]),args[1] -------------------------------------------------------------------------------- /block_zoo/attentions/MatchAttention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import numpy as np 6 | import torch.nn as nn 7 | import copy 8 | 9 | from block_zoo.BaseLayer import BaseLayer, BaseConf 10 | from utils.DocInherit import DocInherit 11 | 12 | 13 | class MatchAttentionConf(BaseConf): 14 | """Configuration for MatchAttention layer 15 | 16 | """ 17 | def __init__(self, **kwargs): 18 | super(MatchAttentionConf, self).__init__(**kwargs) 19 | 20 | @DocInherit 21 | def default(self): 22 | pass 23 | 24 | @DocInherit 25 | def declare(self): 26 | self.num_of_inputs = 2 27 | self.input_ranks = [3, 3] 28 | 29 | @DocInherit 30 | def inference(self): 31 | self.output_dim = copy.deepcopy(self.input_dims[0]) 32 | super(MatchAttentionConf, self).inference() # PUT THIS LINE AT THE END OF inference() 33 | 34 | @DocInherit 35 | def verify(self): 36 | super(MatchAttentionConf, self).verify() 37 | 38 | 39 | class MatchAttention(BaseLayer): 40 | """ MatchAttention layer for DrQA 41 | [paper] https://arxiv.org/abs/1704.00051 42 | 43 | Given sequences X and Y, match sequence Y to each element in X. 44 | 45 | Args: 46 | layer_conf (MatchAttentionConf): configuration of a layer 47 | 48 | """ 49 | def __init__(self, layer_conf): 50 | 51 | super(MatchAttention, self).__init__(layer_conf) 52 | assert layer_conf.input_dims[0][-1] == layer_conf.input_dims[1][-1] 53 | self.linear = nn.Linear(layer_conf.input_dims[0][-1], layer_conf.input_dims[0][-1]) 54 | self.relu = nn.ReLU() 55 | self.softmax = nn.Softmax(dim=-1) 56 | 57 | def forward(self, x, x_len, y, y_len): 58 | """ 59 | 60 | Args: 61 | x: [batch_size, x_max_len, dim]. 62 | x_len: [batch_size], default is None. 63 | y: [batch_size, y_max_len, dim]. 64 | y_len: [batch_size], default is None. 65 | 66 | Returns: 67 | output: has the same shape as x. 68 | 69 | """ 70 | 71 | x_proj = self.relu(self.linear(x)) # [batch_size, x_max_len, dim] 72 | y_proj = self.relu(self.linear(y)) # [batch_size, y_max_len, dim] 73 | scores = x_proj.bmm(y_proj.transpose(2, 1)) # [batch_size, x_max_len, y_max_len] 74 | 75 | # batch_size, y_max_len, _ = y.size() 76 | # y_length = y_len.cpu().numpy() 77 | # y_mask = np.ones((batch_size, y_max_len)) 78 | # for i, single_len in enumerate(y_length): 79 | # y_mask[i][:single_len] = 0 80 | # y_mask = torch.from_numpy(y_mask).byte().to(scores.device) 81 | # y_mask = y_mask.unsqueeze(1).expand(scores.size()) 82 | # scores.data.masked_fill_(y_mask.data, float('-inf')) 83 | 84 | alpha = self.softmax(scores) # [batch_size, x_max_len, y_len] 85 | output = alpha.bmm(y) # [batch_size, x_max_len, dim] 86 | 87 | return output, x_len 88 | 89 | 90 | -------------------------------------------------------------------------------- /block_zoo/BiLSTM.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import numpy as np 9 | from block_zoo.BaseLayer import BaseLayer, BaseConf 10 | from utils.DocInherit import DocInherit 11 | import copy 12 | 13 | class BiLSTMConf(BaseConf): 14 | """ Configuration of BiLSTM 15 | 16 | Args: 17 | hidden_dim (int): dimension of hidden state 18 | dropout (float): dropout rate 19 | num_layers (int): number of BiLSTM layers 20 | """ 21 | def __init__(self, **kwargs): 22 | super(BiLSTMConf, self).__init__(**kwargs) 23 | 24 | @DocInherit 25 | def default(self): 26 | self.hidden_dim = 128 27 | self.dropout = 0.0 28 | self.num_layers = 1 29 | 30 | @DocInherit 31 | def declare(self): 32 | self.num_of_inputs = 1 33 | self.input_ranks = [3] 34 | 35 | @DocInherit 36 | def inference(self): 37 | self.output_dim = copy.deepcopy(self.input_dims[0]) 38 | self.output_dim[-1] = 2 * self.hidden_dim 39 | 40 | super(BiLSTMConf, self).inference() # PUT THIS LINE AT THE END OF inference() 41 | 42 | @DocInherit 43 | def verify(self): 44 | super(BiLSTMConf, self).verify() 45 | 46 | necessary_attrs_for_user = ['hidden_dim', 'dropout', 'num_layers'] 47 | for attr in necessary_attrs_for_user: 48 | self.add_attr_exist_assertion_for_user(attr) 49 | 50 | 51 | class BiLSTM(BaseLayer): 52 | """ Bidrectional LSTM 53 | 54 | Args: 55 | layer_conf (BiLSTMConf): configuration of a layer 56 | """ 57 | def __init__(self, layer_conf): 58 | super(BiLSTM, self).__init__(layer_conf) 59 | self.lstm = nn.LSTM(layer_conf.input_dims[0][-1], layer_conf.hidden_dim, layer_conf.num_layers, bidirectional=True, 60 | dropout=layer_conf.dropout, batch_first=True) 61 | 62 | def forward(self, string, string_len): 63 | """ process inputs 64 | 65 | Args: 66 | string (Tensor): [batch_size, seq_len, dim] 67 | string_len (Tensor): [batch_size] 68 | 69 | Returns: 70 | Tensor: [batch_size, seq_len, 2 * hidden_dim] 71 | 72 | """ 73 | padded_seq_len = string.shape[1] 74 | 75 | # Sort by length (keep idx) 76 | str_len, idx_sort = (-string_len).sort() 77 | str_len = -str_len 78 | idx_unsort = idx_sort.sort()[1] 79 | 80 | string = string.index_select(0, idx_sort) 81 | 82 | # Handling padding in Recurrent Networks 83 | string_packed = nn.utils.rnn.pack_padded_sequence(string, str_len, batch_first=True) 84 | self.lstm.flatten_parameters() 85 | string_output = self.lstm(string_packed)[0] # seqlen x batch x 2*nhid 86 | string_output = nn.utils.rnn.pad_packed_sequence(string_output, batch_first=True, total_length=padded_seq_len)[0] 87 | 88 | # Un-sort by length 89 | string_output = string_output.index_select(0, idx_unsort) 90 | 91 | return string_output, string_len 92 | -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/text_classification/conf_text_classification_cnn.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "this config is used for text classification use TextCNN, use 20 newsgroup dataset and achieved acc: 0.9610", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/20_newsgroups/train.tsv", 10 | "valid_data_path": "./dataset/20_newsgroups/test.tsv", 11 | "test_data_path": "./dataset/20_newsgroups/test.tsv" 12 | }, 13 | "file_with_col_header": false, 14 | "add_start_end_for_seq": true, 15 | "file_header": { 16 | "label": 0, 17 | "sentence_text": 1 18 | }, 19 | "model_inputs": { 20 | "sentence": ["sentence_text"] 21 | }, 22 | "target": ["label"] 23 | }, 24 | "outputs":{ 25 | "save_base_dir": "./models/20_newsgroups_cnn/", 26 | "model_name": "model.nb", 27 | "train_log_name": "train.log", 28 | "test_log_name": "test.log", 29 | "predict_log_name": "predict.log", 30 | "predict_fields": ["prediction", "confidence"], 31 | "predict_output_name": "predict.tsv", 32 | "cache_dir": ".cache.20_newsgroups/" 33 | }, 34 | "training_params": { 35 | "vocabulary": { 36 | "min_word_frequency": 1 37 | }, 38 | "optimizer": { 39 | "name": "Adam", 40 | "params": { 41 | 42 | } 43 | }, 44 | "use_gpu": true, 45 | "batch_size": 128, 46 | "batch_num_to_show_results": 30, 47 | "max_epoch": 20, 48 | "valid_times_per_epoch": 2, 49 | "fixed_lengths":{ 50 | "sentence": 500 51 | } 52 | }, 53 | "architecture":[ 54 | { 55 | "layer": "Embedding", 56 | "conf": { 57 | "word": { 58 | "cols": ["sentence_text"], 59 | "dim": 128 60 | } 61 | } 62 | }, 63 | { 64 | "layer_id": "hierarchical_cnn", 65 | "layer": "ConvPooling", 66 | "conf": { 67 | "stride": 1, 68 | "window_sizes": [1,2,3,4,5], 69 | "output_channel_num": 100, 70 | "batch_norm": true, 71 | "activation": "ReLU", 72 | "pool_type": "max", 73 | "pool_axis": 1 74 | }, 75 | "inputs": ["sentence"] 76 | }, 77 | { 78 | "output_layer_flag": true, 79 | "layer_id": "output", 80 | "layer": "Linear", 81 | "conf": { 82 | "hidden_dim": [128,20], 83 | "activation": "PReLU", 84 | "batch_norm": true, 85 | "last_hidden_activation": false 86 | }, 87 | "inputs": ["hierarchical_cnn"] 88 | } 89 | ], 90 | "loss": { 91 | "losses":[ 92 | { 93 | "type": "CrossEntropyLoss", 94 | "conf": { 95 | "size_average": true 96 | }, 97 | "inputs": ["output","label"] 98 | } 99 | ] 100 | }, 101 | "metrics": ["accuracy"] 102 | } -------------------------------------------------------------------------------- /block_zoo/math/Add3D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import logging 7 | 8 | from ..BaseLayer import BaseConf 9 | from utils.DocInherit import DocInherit 10 | from utils.exceptions import ConfigurationError 11 | import copy 12 | 13 | class Add3DConf(BaseConf): 14 | """ Configuration of Add3D layer 15 | 16 | """ 17 | 18 | #init the args 19 | def __init__(self, **kwargs): 20 | super(Add3DConf, self).__init__(**kwargs) 21 | 22 | #set default params 23 | #@DocInherit 24 | #def default(self): 25 | 26 | @DocInherit 27 | def declare(self): 28 | self.num_of_inputs = 2 29 | self.input_ranks = [3,3] 30 | 31 | @DocInherit 32 | def inference(self): 33 | self.output_dim = copy.deepcopy(self.input_dims[0]) 34 | if self.input_dims[0][-1] != 1: 35 | self.output_dim[-1] = self.input_dims[0][-1] 36 | else: 37 | self.output_dim[-1] = self.input_dims[1][-1] 38 | 39 | super(Add3DConf, self).inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(Add3DConf, self).verify() 44 | 45 | # # to check if the ranks of all the inputs are equal 46 | # rank_equal_flag = True 47 | # for i in range(len(self.input_ranks)): 48 | # if self.input_ranks[i] != self.input_ranks[0]: 49 | # rank_equal_flag = False 50 | # break 51 | # if rank_equal_flag == False: 52 | # raise ConfigurationError("For layer Add3D, the ranks of each inputs should be equal!") 53 | 54 | class Add3D(nn.Module): 55 | """ Add3D layer to get sum of two sequences(3D representation) 56 | 57 | Args: 58 | layer_conf (Add3DConf): configuration of a layer 59 | 60 | """ 61 | def __init__(self, layer_conf): 62 | super(Add3D, self).__init__() 63 | self.layer_conf = layer_conf 64 | 65 | logging.warning("The length Add3D layer returns is the length of first input") 66 | 67 | def forward(self, *args): 68 | """ process input 69 | 70 | Args: 71 | *args: (Tensor): string, string_len, string2, string2_len 72 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 73 | 74 | Returns: 75 | Tensor: [batch_size, seq_len, output_dim], [batch_size] 76 | """ 77 | dim_flag = True 78 | input_dims = list(self.layer_conf.input_dims) 79 | if (args[0].shape[1] * args[0].shape[2]) != (args[2].shape[1] * args[2].shape[2]): 80 | if args[0].shape[1] == args[2].shape[1] and (input_dims[1][-1] == 1 or input_dims[0][-1] == 1): 81 | dim_flag = True 82 | else: 83 | dim_flag = False 84 | 85 | if dim_flag == False: 86 | raise ConfigurationError("For layer Add3D, the dimensions of each inputs should be equal or 1 ,or the elements number of two inputs (expect for the first dimension) should be equal") 87 | 88 | 89 | return torch.add(args[0],args[2]),args[1] -------------------------------------------------------------------------------- /utils/BPEEncoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import nltk 5 | 6 | def get_pairs(word): 7 | """ Return set of symbol pairs in a word. 8 | word is represented as tuple of symbols (symbols being variable-length strings) 9 | """ 10 | pairs = set() 11 | prev_char = word[0] 12 | for char in word[1:]: 13 | pairs.add((prev_char, char)) 14 | prev_char = char 15 | return pairs 16 | 17 | class BPEEncoder(object): 18 | """ Byte Pair Encoding 19 | """ 20 | def __init__(self, bpe_path): 21 | merges = open(bpe_path, encoding='utf-8').read().split('\n')[1:-1] 22 | merges = [tuple(merge.split()) for merge in merges] 23 | self.bpe_ranks = dict(zip(merges, range(len(merges)))) 24 | self.cache = dict() 25 | 26 | def encode(self, sentence): 27 | tokens = nltk.word_tokenize(sentence) 28 | bpe_tokens = [] 29 | for token in tokens: 30 | bpe_tokens.extend(self.bpe(token)) 31 | 32 | return bpe_tokens 33 | 34 | def bpe(self, token): 35 | """ 36 | 37 | Args: 38 | token (string): a word token 39 | 40 | Returns: 41 | list: byte pair encodings 42 | 43 | """ 44 | word = tuple(token[:-1]) + (token[-1] + '',) 45 | if token in self.cache: 46 | return self.cache[token] 47 | pairs = get_pairs(word) 48 | 49 | if not pairs: 50 | return token+'' 51 | 52 | while True: 53 | bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) 54 | if bigram not in self.bpe_ranks: 55 | break 56 | first, second = bigram 57 | new_word = [] 58 | i = 0 59 | while i < len(word): 60 | try: 61 | j = word.index(first, i) 62 | new_word.extend(word[i:j]) 63 | i = j 64 | except: 65 | new_word.extend(word[i:]) 66 | break 67 | 68 | if word[i] == first and i < len(word)-1 and word[i+1] == second: 69 | new_word.append(first+second) 70 | i += 2 71 | else: 72 | new_word.append(word[i]) 73 | i += 1 74 | new_word = tuple(new_word) 75 | word = new_word 76 | if len(word) == 1: 77 | break 78 | else: 79 | pairs = get_pairs(word) 80 | word = ' '.join(word) 81 | if word == '\n ': 82 | word = '\n' 83 | self.cache[token] = word 84 | return word.split(' ') 85 | 86 | 87 | if __name__ == '__main__': 88 | sentences = 'trip cost to beijing' 89 | import nltk 90 | tokens = nltk.word_tokenize(sentences) 91 | bpe_encoder = BPEEncoder('../dataset/bpe/vocab_40000.bpe') 92 | bpe_tokens = [] 93 | for token in tokens: 94 | print(token) 95 | bpe_tokens.extend(bpe_encoder.bpe(token)) 96 | print(bpe_tokens) 97 | -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/text_classification/conf_text_classification_bilstm_attn.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "this config is used for text classification use LSTM with selfAttn, use 20 newsgroup dataset and achieved acc: 0.9707", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/20_newsgroups/train.tsv", 10 | "valid_data_path": "./dataset/20_newsgroups/test.tsv", 11 | "test_data_path": "./dataset/20_newsgroups/test.tsv" 12 | }, 13 | "file_with_col_header": false, 14 | "add_start_end_for_seq": true, 15 | "file_header": { 16 | "label": 0, 17 | "sentence_text": 1 18 | }, 19 | "model_inputs": { 20 | "sentence": ["sentence_text"] 21 | }, 22 | "target": ["label"] 23 | }, 24 | "outputs":{ 25 | "save_base_dir": "./models/20_newsgroup_bilstm_attn/", 26 | "model_name": "model.nb", 27 | "train_log_name": "train.log", 28 | "test_log_name": "test.log", 29 | "predict_log_name": "predict.log", 30 | "predict_fields": ["prediction", "confidence"], 31 | "predict_output_name": "predict.tsv", 32 | "cache_dir": ".cache.20_newsgroup/" 33 | }, 34 | "training_params": { 35 | "vocabulary": { 36 | "min_word_frequency": 1 37 | }, 38 | "optimizer": { 39 | "name": "Adam", 40 | "params": { 41 | 42 | } 43 | }, 44 | "use_gpu": true, 45 | "batch_size": 128, 46 | "batch_num_to_show_results": 30, 47 | "max_epoch": 15, 48 | "valid_times_per_epoch": 2, 49 | "fixed_lengths":{ 50 | "sentence": 500 51 | } 52 | }, 53 | "architecture":[ 54 | { 55 | "layer": "Embedding", 56 | "conf": { 57 | "word": { 58 | "cols": ["sentence_text"], 59 | "dim": 128 60 | } 61 | } 62 | }, 63 | { 64 | "layer_id": "sentence_1", 65 | "layer": "BiLSTM", 66 | "conf": { 67 | "hidden_dim": 128, 68 | "dropout": 0, 69 | "num_layers": 2 70 | }, 71 | "inputs": ["sentence"] 72 | }, 73 | { 74 | "layer_id": "linearAttn", 75 | "layer": "LinearAttention", 76 | "conf": { 77 | "keep_dim": false 78 | }, 79 | "inputs": ["sentence_1"] 80 | }, 81 | { 82 | "output_layer_flag": true, 83 | "layer_id": "output", 84 | "layer": "Linear", 85 | "conf": { 86 | "hidden_dim": [128,20], 87 | "activation": "PReLU", 88 | "batch_norm": true, 89 | "last_hidden_activation": false 90 | }, 91 | "inputs": ["linearAttn"] 92 | } 93 | ], 94 | "loss": { 95 | "losses":[ 96 | { 97 | "type": "CrossEntropyLoss", 98 | "conf": { 99 | "size_average": true 100 | }, 101 | "inputs": ["output","label"] 102 | } 103 | ] 104 | }, 105 | "metrics": ["accuracy"] 106 | } -------------------------------------------------------------------------------- /block_zoo/math/ElementWisedMultiply2D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import logging 7 | 8 | from ..BaseLayer import BaseConf 9 | from utils.DocInherit import DocInherit 10 | from utils.exceptions import ConfigurationError 11 | import copy 12 | 13 | class ElementWisedMultiply2DConf(BaseConf): 14 | """ Configuration of ElementWisedMultiply2D layer 15 | 16 | """ 17 | 18 | # init the args 19 | def __init__(self,**kwargs): 20 | super(ElementWisedMultiply2DConf, self).__init__(**kwargs) 21 | 22 | #set default params 23 | #@DocInherit 24 | #def default(self): 25 | 26 | @DocInherit 27 | def declare(self): 28 | self.num_of_inputs = 2 29 | self.input_ranks = [2,2] 30 | 31 | @DocInherit 32 | def inference(self): 33 | self.output_dim = copy.deepcopy(self.input_dims[0]) 34 | if self.input_dims[0][1] != 1: 35 | self.output_dim[-1] = self.input_dims[0][1] 36 | else: 37 | self.output_dim[-1] = self.input_dims[1][1] 38 | 39 | super(ElementWisedMultiply2DConf, self).inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(ElementWisedMultiply2DConf, self).verify() 44 | 45 | # # to check if the ranks of all the inputs are equal 46 | # rank_equal_flag = True 47 | # for i in range(len(self.input_ranks)): 48 | # if self.input_ranks[i] != self.input_ranks[0]: 49 | # rank_equal_flag = False 50 | # break 51 | # if rank_equal_flag == False: 52 | # raise ConfigurationError("For layer ElementWisedMultiply2D, the ranks of each inputs should be equal!") 53 | 54 | # to check if the dimensions of all the inputs are equal or is 1 55 | dim_flag = True 56 | input_dims = list(self.input_dims) 57 | for i in range(len(input_dims)): 58 | if input_dims[i][1] != input_dims[0][1] and input_dims[i] != 1 and input_dims[0][1] != 1: 59 | dim_flag = False 60 | break 61 | if dim_flag == False: 62 | raise ConfigurationError("For layer ElementWisedMultiply2D, the dimensions of each inputs should be equal or 1") 63 | 64 | class ElementWisedMultiply2D(nn.Module): 65 | """ ElementWisedMultiply2D layer to do Element-Wised Multiply of two sequences(2D representation) 66 | 67 | Args: 68 | layer_conf (ElementWisedMultiply2DConf): configuration of a layer 69 | 70 | """ 71 | def __init__(self, layer_conf): 72 | super(ElementWisedMultiply2D, self).__init__() 73 | self.layer_conf = layer_conf 74 | 75 | logging.warning("The length ElementWisedMultiply2D layer returns is the length of first input") 76 | 77 | def forward(self, *args): 78 | """ process input 79 | 80 | Args: 81 | *args: (Tensor): string, string_len, string2, string2_len 82 | e.g. string (Tensor): [batch_size, dim], string_len (Tensor): [batch_size] 83 | 84 | 85 | Returns: 86 | Tensor: [batch_size, output_dim], [batch_size] 87 | """ 88 | return torch.addcmul(torch.zeros(args[0].size()).to('cuda'),1,args[0],args[2]),args[1] 89 | -------------------------------------------------------------------------------- /docs/build/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} 2 | -------------------------------------------------------------------------------- /block_zoo/math/Minus2D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | import logging 8 | 9 | from ..BaseLayer import BaseConf 10 | from utils.DocInherit import DocInherit 11 | from utils.exceptions import ConfigurationError 12 | import copy 13 | 14 | class Minus2DConf(BaseConf): 15 | """ Configuration of Minus2D layer 16 | 17 | Args: 18 | abs_flag: if the result of the Minus2D is abs, default is False 19 | 20 | """ 21 | 22 | #init the args 23 | def __init__(self,**kwargs): 24 | super(Minus2DConf, self).__init__(**kwargs) 25 | 26 | #set default params 27 | @DocInherit 28 | def default(self): 29 | self.abs_flag = False 30 | 31 | @DocInherit 32 | def declare(self): 33 | self.num_of_inputs = 2 34 | self.input_ranks = [2,2] 35 | 36 | @DocInherit 37 | def inference(self): 38 | self.output_dim = copy.deepcopy(self.input_dims[0]) 39 | if self.input_dims[0][1] != 1: 40 | self.output_dim[-1] = self.input_dims[0][1] 41 | else: 42 | self.output_dim[-1] = self.input_dims[1][1] 43 | 44 | super(Minus2DConf, self).inference() 45 | 46 | @DocInherit 47 | def verify(self): 48 | super(Minus2DConf, self).verify() 49 | 50 | # # to check if the ranks of all the inputs are equal 51 | # rank_equal_flag = True 52 | # for i in range(len(self.input_ranks)): 53 | # if self.input_ranks[i] != self.input_ranks[0]: 54 | # rank_equal_flag = False 55 | # break 56 | # if rank_equal_flag == False: 57 | # raise ConfigurationError("For layer Minus2D, the ranks of each inputs should be equal!") 58 | 59 | # to check if the dimensions of all the inputs are equal or is 1 60 | dim_flag = True 61 | input_dims = list(self.input_dims) 62 | for i in range(len(input_dims)): 63 | if input_dims[i][1] != input_dims[0][1] and input_dims[i][1] != 1 and input_dims[0][1] != 1: 64 | dim_flag = False 65 | break 66 | if dim_flag == False: 67 | raise ConfigurationError("For layer Minus2D, the dimensions of each inputs should be equal or 1") 68 | 69 | class Minus2D(nn.Module): 70 | """Minus2D layer to get subtraction of two sequences(2D representation) 71 | 72 | Args: 73 | layer_conf (Minus2DConf): configuration of a layer 74 | 75 | """ 76 | def __init__(self, layer_conf): 77 | super(Minus2D, self).__init__() 78 | self.layer_conf = layer_conf 79 | 80 | logging.warning("The length Minus2D layer returns is the length of first input") 81 | 82 | def forward(self, *args): 83 | """ process inputs 84 | 85 | Args: 86 | *args: (Tensor): string, string_len, string2, string2_len 87 | e.g. string (Tensor): [batch_size, dim], string_len (Tensor): [batch_size] 88 | 89 | Returns: 90 | Tensor: [batch_size, output_dim], [batch_size] 91 | 92 | """ 93 | if self.layer_conf.abs_flag == False: 94 | return (args[0] - args[2]), args[1] 95 | if self.layer_conf.abs_flag == True: 96 | return torch.abs(args[0] - args[2]),args[1] 97 | 98 | 99 | -------------------------------------------------------------------------------- /block_zoo/BiGRU.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import numpy as np 8 | import copy 9 | 10 | from block_zoo.BaseLayer import BaseLayer, BaseConf 11 | from utils.DocInherit import DocInherit 12 | from utils.common_utils import transfer_to_gpu 13 | 14 | 15 | class BiGRUConf(BaseConf): 16 | """Configuration of BiGRU 17 | 18 | Args: 19 | hidden_dim (int): dimension of hidden state 20 | dropout (float): dropout rate 21 | 22 | """ 23 | def __init__(self, **kwargs): 24 | super(BiGRUConf, self).__init__(**kwargs) 25 | 26 | @DocInherit 27 | def default(self): 28 | self.hidden_dim = 128 29 | self.dropout = 0.0 30 | 31 | @DocInherit 32 | def declare(self): 33 | self.num_of_inputs = 1 34 | self.input_ranks = [3] 35 | 36 | @DocInherit 37 | def inference(self): 38 | self.output_dim = copy.deepcopy(self.input_dims[0]) 39 | self.output_dim[-1] = 2 * self.hidden_dim 40 | super(BiGRUConf, self).inference() # PUT THIS LINE AT THE END OF inference() 41 | 42 | @DocInherit 43 | def verify(self): 44 | super(BiGRUConf, self).verify() 45 | assert hasattr(self, 'hidden_dim'), "Please define hidden_dim attribute of BiGRUConf in default() or the configuration file" 46 | assert hasattr(self, 'dropout'), "Please define dropout attribute of BiGRUConf in default() or the configuration file" 47 | 48 | 49 | class BiGRU(BaseLayer): 50 | """Bidirectional GRU 51 | 52 | Args: 53 | layer_conf (BiGRUConf): configuration of a layer 54 | """ 55 | def __init__(self, layer_conf): 56 | super(BiGRU, self).__init__(layer_conf) 57 | self.GRU = nn.GRU(layer_conf.input_dims[0][-1], layer_conf.hidden_dim, 1, bidirectional=True, 58 | dropout=layer_conf.dropout, batch_first=True) 59 | 60 | def forward(self, string, string_len): 61 | """ process inputs 62 | 63 | Args: 64 | string (Tensor): [batch_size, seq_len, dim] 65 | string_len (Tensor): [batch_size] 66 | 67 | Returns: 68 | Tensor: [batch_size, seq_len, 2 * hidden_dim] 69 | 70 | """ 71 | 72 | padded_seq_len = string.shape[1] 73 | self.init_GRU = torch.FloatTensor(2, string.size(0), self.layer_conf.hidden_dim).zero_() 74 | if self.is_cuda(): 75 | self.init_GRU = transfer_to_gpu(self.init_GRU) 76 | 77 | # Sort by length (keep idx) 78 | str_len, idx_sort = (-string_len).sort() 79 | str_len = -str_len 80 | idx_unsort = idx_sort.sort()[1] 81 | 82 | string = string.index_select(0, idx_sort) 83 | 84 | # Handling padding in Recurrent Networks 85 | string_packed = nn.utils.rnn.pack_padded_sequence(string, str_len, batch_first=True) 86 | self.GRU.flatten_parameters() 87 | string_output, hn = self.GRU(string_packed, self.init_GRU) # seqlen x batch x 2*nhid 88 | string_output = nn.utils.rnn.pad_packed_sequence(string_output, batch_first=True, total_length=padded_seq_len)[0] 89 | 90 | # Un-sort by length 91 | string_output = string_output.index_select(0, idx_unsort) 92 | 93 | return string_output, string_len 94 | 95 | -------------------------------------------------------------------------------- /data_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | # add the project root to python path 5 | import os 6 | from settings import ProblemTypes, version 7 | 8 | import argparse 9 | import logging 10 | 11 | from ModelConf import ModelConf 12 | from problem import Problem 13 | from utils.common_utils import log_set, dump_to_pkl, load_from_pkl 14 | 15 | def main(params, data_path, save_path): 16 | conf = ModelConf("cache", params.conf_path, version, params) 17 | 18 | if ProblemTypes[conf.problem_type] == ProblemTypes.sequence_tagging: 19 | problem = Problem(conf.problem_type, conf.input_types, conf.answer_column_name, 20 | source_with_start=True, source_with_end=True, source_with_unk=True, source_with_pad=True, 21 | target_with_start=True, target_with_end=True, target_with_unk=True, target_with_pad=True, same_length=True, 22 | with_bos_eos=conf.add_start_end_for_seq, tagging_scheme=conf.tagging_scheme, tokenizer=conf.tokenizer, 23 | remove_stopwords=conf.remove_stopwords, DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix) 24 | elif ProblemTypes[conf.problem_type] == ProblemTypes.classification \ 25 | or ProblemTypes[conf.problem_type] == ProblemTypes.regression: 26 | problem = Problem(conf.problem_type, conf.input_types, conf.answer_column_name, 27 | source_with_start=True, source_with_end=True, source_with_unk=True, source_with_pad=True, 28 | target_with_start=False, target_with_end=False, target_with_unk=False, target_with_pad=False, same_length=True, 29 | with_bos_eos=conf.add_start_end_for_seq, tokenizer=conf.tokenizer, remove_stopwords=conf.remove_stopwords, 30 | DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix) 31 | 32 | if os.path.isfile(conf.problem_path): 33 | problem.load_problem(conf.problem_path) 34 | logging.info("Cache loaded!") 35 | logging.debug("Cache loaded from %s" % conf.problem_path) 36 | else: 37 | raise Exception("Cache does not exist!") 38 | 39 | data, length, target = problem.encode(data_path, conf.file_columns, conf.input_types, conf.file_with_col_header, 40 | conf.object_inputs, conf.answer_column_name, conf.min_sentence_len, 41 | extra_feature=conf.extra_feature,max_lengths=conf.max_lengths, file_format='tsv', 42 | cpu_num_workers=conf.cpu_num_workers) 43 | if not os.path.isdir(os.path.dirname(save_path)): 44 | os.makedirs(os.path.dirname(save_path)) 45 | dump_to_pkl({'data': data, 'length': length, 'target': target}, save_path) 46 | 47 | if __name__ == "__main__": 48 | parser = argparse.ArgumentParser(description='Data encoding') 49 | parser.add_argument("data_path", type=str) 50 | parser.add_argument("save_path", type=str) 51 | parser.add_argument("--conf_path", type=str, default='conf.json', help="configuration path") 52 | parser.add_argument("--debug", type=bool, default=False) 53 | parser.add_argument("--force", type=bool, default=False) 54 | 55 | log_set('encoding_data.log') 56 | 57 | params, _ = parser.parse_known_args() 58 | 59 | if params.debug is True: 60 | import debugger 61 | main(params, params.data_path, params.save_path) -------------------------------------------------------------------------------- /block_zoo/HighwayLinear.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import numpy as np 9 | import copy 10 | from block_zoo.BaseLayer import BaseLayer, BaseConf 11 | from utils.DocInherit import DocInherit 12 | 13 | 14 | class HighwayLinearConf(BaseConf): 15 | """ Configuration of BiLSTM 16 | 17 | Args: 18 | hidden_dim (int): dimension of hidden state 19 | dropout (float): dropout rate 20 | num_layers (int): number of BiLSTM layers 21 | """ 22 | def __init__(self, **kwargs): 23 | super(HighwayLinearConf, self).__init__(**kwargs) 24 | 25 | @DocInherit 26 | def default(self): 27 | self.num_layers = 1 28 | self.activation = 'PReLU' 29 | 30 | @DocInherit 31 | def declare(self): 32 | self.num_of_inputs = 1 33 | self.input_ranks = [-1] 34 | 35 | @DocInherit 36 | def inference(self): 37 | self.output_dim = copy.deepcopy(self.input_dims[0]) 38 | super(HighwayLinearConf, self).inference() # PUT THIS LINE AT THE END OF inference() 39 | 40 | @DocInherit 41 | def verify(self): 42 | super(HighwayLinearConf, self).verify() 43 | 44 | necessary_attrs_for_user = ['num_layers', 'activation'] 45 | for attr in necessary_attrs_for_user: 46 | self.add_attr_exist_assertion_for_user(attr) 47 | 48 | 49 | class HighwayLinear(BaseLayer): 50 | """ A `Highway layer `_ does a gated combination of a linear 51 | transformation and a non-linear transformation of its input. :math:`y = g * x + (1 - g) * 52 | f(A(x))`, where :math:`A` is a linear transformation, :math:`f` is an element-wise 53 | non-linearity, and :math:`g` is an element-wise gate, computed as :math:`sigmoid(B(x))`. 54 | This module will apply a fixed number of highway layers to its input, returning the final 55 | result. 56 | 57 | Args: 58 | layer_conf (HighwayLinearConf): configuration of a layer 59 | """ 60 | def __init__(self, layer_conf): 61 | super(HighwayLinear, self).__init__(layer_conf) 62 | self.layer_conf = layer_conf 63 | self.layers = torch.nn.ModuleList([torch.nn.Linear(layer_conf.input_dims[0][-1], layer_conf.input_dims[0][-1] * 2) for _ in range(layer_conf.num_layers)]) 64 | self.activation = eval("nn." + layer_conf.activation)() 65 | 66 | def forward(self, string, string_len): 67 | """ process inputs 68 | 69 | Args: 70 | string (Tensor): [batch_size, seq_len, dim] 71 | string_len (Tensor): [batch_size] 72 | 73 | Returns: 74 | Tensor: [batch_size, seq_len, 2 * hidden_dim] 75 | 76 | """ 77 | current_input = string 78 | for layer in self.layers: 79 | projected_input = layer(current_input) 80 | linear_part = current_input 81 | # NOTE: if you modify this, think about whether you should modify the initialization above, too. 82 | nonlinear_part, gate = projected_input.chunk(2, dim=-1) 83 | nonlinear_part = self.activation(nonlinear_part) 84 | gate = torch.sigmoid(gate) 85 | current_input = gate * linear_part + (1 - gate) * nonlinear_part 86 | return current_input, string_len 87 | 88 | -------------------------------------------------------------------------------- /block_zoo/math/ElementWisedMultiply3D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import logging 7 | 8 | from ..BaseLayer import BaseConf 9 | from utils.DocInherit import DocInherit 10 | from utils.exceptions import ConfigurationError 11 | import copy 12 | 13 | class ElementWisedMultiply3DConf(BaseConf): 14 | """ Configuration of ElementWisedMultiply3D layer 15 | 16 | """ 17 | 18 | # init the args 19 | def __init__(self,**kwargs): 20 | super(ElementWisedMultiply3DConf, self).__init__(**kwargs) 21 | 22 | #set default params 23 | #@DocInherit 24 | #def default(self): 25 | 26 | @DocInherit 27 | def declare(self): 28 | self.num_of_inputs = 2 29 | self.input_ranks = [3,3] 30 | 31 | @DocInherit 32 | def inference(self): 33 | self.output_dim = copy.deepcopy(self.input_dims[0]) 34 | if self.input_dims[0][-1] != 1: 35 | self.output_dim[-1] = self.input_dims[0][-1] 36 | else: 37 | self.output_dim[-1] = self.input_dims[1][-1] 38 | 39 | super(ElementWisedMultiply3DConf, self).inference() 40 | 41 | @DocInherit 42 | def verify(self): 43 | super(ElementWisedMultiply3DConf, self).verify() 44 | 45 | # # to check if the ranks of all the inputs are equal 46 | # rank_equal_flag = True 47 | # for i in range(len(self.input_ranks)): 48 | # if self.input_ranks[i] != self.input_ranks[0]: 49 | # rank_equal_flag = False 50 | # break 51 | # if rank_equal_flag == False: 52 | # raise ConfigurationError("For layer ElementWisedMultiply3D, the ranks of each inputs should be equal!") 53 | 54 | class ElementWisedMultiply3D(nn.Module): 55 | """ ElementWisedMultiply3D layer to do Element-Wised Multiply of two sequences(3D representation) 56 | 57 | Args: 58 | layer_conf (ElementWisedMultiply3DConf): configuration of a layer 59 | 60 | """ 61 | def __init__(self, layer_conf): 62 | super(ElementWisedMultiply3D, self).__init__() 63 | self.layer_conf = layer_conf 64 | 65 | logging.warning("The length ElementWisedMultiply3D layer returns is the length of first input") 66 | 67 | def forward(self, *args): 68 | """ process input 69 | 70 | Args: 71 | *args: (Tensor): string, string_len, string2, string2_len 72 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 73 | 74 | 75 | Returns: 76 | Tensor: [batch_size, seq_len, output_dim], [batch_size] 77 | """ 78 | dim_flag = True 79 | input_dims = list(self.layer_conf.input_dims) 80 | if (args[0].shape[1] * args[0].shape[2]) != (args[2].shape[1] * args[2].shape[2]): 81 | if args[0].shape[1] == args[2].shape[1] and (input_dims[1][-1] == 1 or input_dims[0][-1] == 1): 82 | dim_flag = True 83 | else: 84 | dim_flag = False 85 | if dim_flag == False: 86 | raise ConfigurationError("For layer ElementWisedMultiply3D, the dimensions of each inputs should be equal or 1 ,or the elements number of two inputs (expect for the first dimension) should be equal") 87 | return torch.addcmul(torch.zeros(args[0].size()).to('cuda'),1,args[0],args[2]),args[1] 88 | -------------------------------------------------------------------------------- /block_zoo/BiGRULast.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import numpy as np 9 | 10 | from block_zoo.BaseLayer import BaseLayer, BaseConf 11 | from utils.DocInherit import DocInherit 12 | from utils.common_utils import transfer_to_gpu 13 | 14 | class BiGRULastConf(BaseConf): 15 | """ Configuration of the layer BiGRULast 16 | 17 | Args: 18 | hidden_dim (int): dimension of hidden state 19 | dropout (float): dropout rate 20 | """ 21 | def __init__(self, **kwargs): 22 | 23 | super(BiGRULastConf, self).__init__(**kwargs) 24 | 25 | @DocInherit 26 | def default(self): 27 | self.hidden_dim = 128 28 | self.dropout = 0.0 29 | 30 | @DocInherit 31 | def declare(self): 32 | self.num_of_inputs = 1 33 | self.input_ranks = [3] 34 | 35 | @DocInherit 36 | def inference(self): 37 | self.output_dim = [-1] 38 | self.output_dim.append(2 * self.hidden_dim) 39 | 40 | super(BiGRULastConf, self).inference() # PUT THIS LINE AT THE END OF inference() 41 | 42 | @DocInherit 43 | def verify_before_inference(self): 44 | super(BiGRULastConf, self).verify_before_inference() 45 | necessary_attrs_for_user = ['hidden_dim'] 46 | for attr in necessary_attrs_for_user: 47 | self.add_attr_exist_assertion_for_user(attr) 48 | 49 | @DocInherit 50 | def verify(self): 51 | super(BiGRULastConf, self).verify() 52 | necessary_attrs_for_user = ['hidden_dim', 'dropout'] 53 | for attr in necessary_attrs_for_user: 54 | self.add_attr_exist_assertion_for_user(attr) 55 | 56 | 57 | class BiGRULast(BaseLayer): 58 | """ Get the last hidden state of Bi GRU 59 | 60 | Args: 61 | layer_conf (BiGRULastConf): configuration of a layer 62 | """ 63 | def __init__(self, layer_conf): 64 | super(BiGRULast, self).__init__(layer_conf) 65 | self.GRU = nn.GRU(layer_conf.input_dims[0][-1], layer_conf.hidden_dim, 1, bidirectional=True, 66 | dropout=layer_conf.dropout, batch_first=True) 67 | 68 | def forward(self, string, string_len): 69 | """ process inputs 70 | 71 | Args: 72 | string (Tensor): [batch_size, seq_len, dim] 73 | string_len (Tensor): [batch_size] 74 | 75 | Returns: 76 | Tensor: [batch_size, 2 * hidden_dim] 77 | """ 78 | #string = string.permute([1, 0, 2]) 79 | self.init_GRU = torch.FloatTensor(2, string.size(0), self.layer_conf.hidden_dim).zero_() 80 | if self.is_cuda(): 81 | self.init_GRU = transfer_to_gpu(self.init_GRU) 82 | 83 | # Sort by length (keep idx) 84 | str_len, idx_sort = (-string_len).sort() 85 | str_len = -str_len 86 | idx_unsort = idx_sort.sort()[1] 87 | 88 | string = string.index_select(0, idx_sort) 89 | 90 | # Handling padding in Recurrent Networks 91 | string_packed = nn.utils.rnn.pack_padded_sequence(string, str_len, batch_first=True) 92 | self.GRU.flatten_parameters() 93 | string_output, hn = self.GRU(string_packed, self.init_GRU) # seqlen x batch x 2*nhid 94 | 95 | emb = torch.cat((hn[0], hn[1]), 1) # batch x 2*nhid 96 | 97 | emb = emb.index_select(0, idx_unsort) 98 | return emb, string_len 99 | -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/sentiment_analysis/conf_sentiment_analysis_bilstm.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This config is used for sentiment analysis task with SST-2 dataset, it achieved acc: 0.876147 on dev set", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/SST-2/train.tsv", 10 | "valid_data_path": "./dataset/SST-2/dev.tsv", 11 | "test_data_path": "./dataset/SST-2/dev.tsv", 12 | "predict_data_path": "./dataset/SST-2/dev.tsv", 13 | "pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt" 14 | }, 15 | "file_with_col_header": true, 16 | "add_start_end_for_seq": true, 17 | "file_header": { 18 | "sentence_col": 0, 19 | "label": 1 20 | }, 21 | "model_inputs": { 22 | "sentence": ["sentence_col"] 23 | }, 24 | "target": ["label"] 25 | }, 26 | "outputs":{ 27 | "save_base_dir": "./models/sentiment_analysis_bilstm/", 28 | "model_name": "model.nb", 29 | "train_log_name": "train.log", 30 | "test_log_name": "test.log", 31 | "predict_log_name": "predict.log", 32 | "predict_fields": ["prediction", "confidence"], 33 | "predict_output_name": "predict.tsv", 34 | "cache_dir": ".cache.SST-2/" 35 | }, 36 | "training_params": { 37 | "optimizer": { 38 | "name": "Adam", 39 | "params": { 40 | } 41 | }, 42 | "vocabulary": { 43 | "min_word_frequency": 1 44 | }, 45 | "lr_decay": 0.95, 46 | "minimum_lr": 0.0001, 47 | "epoch_start_lr_decay": 1, 48 | "use_gpu": true, 49 | "batch_size": 128, 50 | "batch_num_to_show_results": 300, 51 | "max_epoch": 30, 52 | "valid_times_per_epoch": 2 53 | }, 54 | "architecture":[ 55 | { 56 | "layer": "Embedding", 57 | "conf": { 58 | "word": { 59 | "cols": ["sentence_col"], 60 | "dim": 300 61 | } 62 | } 63 | }, 64 | { 65 | "layer_id": "sentence_dropout", 66 | "layer": "Dropout", 67 | "conf": { 68 | "dropout": 0.2 69 | }, 70 | "inputs": ["sentence"] 71 | }, 72 | { 73 | "layer_id": "sentence_bilstm", 74 | "layer": "BiLSTM", 75 | "conf": { 76 | "hidden_dim": 256, 77 | "dropout": 0.2, 78 | "num_layers": 2 79 | }, 80 | "inputs": ["sentence_dropout"] 81 | }, 82 | { 83 | "layer_id": "sentence_pooling", 84 | "layer": "Pooling", 85 | "conf": { 86 | "pool_axis": 1, 87 | "pool_type": "max" 88 | }, 89 | "inputs": ["sentence_bilstm"] 90 | }, 91 | { 92 | "output_layer_flag": true, 93 | "layer_id": "output", 94 | "layer": "Linear", 95 | "conf": { 96 | "hidden_dim": [128, 2], 97 | "activation": "PReLU", 98 | "batch_norm": true, 99 | "last_hidden_activation": false 100 | }, 101 | "inputs": ["sentence_pooling"] 102 | } 103 | ], 104 | "loss": { 105 | "losses": [ 106 | { 107 | "type": "CrossEntropyLoss", 108 | "conf": { 109 | "size_average": true 110 | }, 111 | "inputs": ["output","label"] 112 | } 113 | ] 114 | }, 115 | "metrics": ["accuracy"] 116 | } -------------------------------------------------------------------------------- /block_zoo/Pooling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import numpy as np 9 | 10 | from block_zoo.BaseLayer import BaseLayer, BaseConf 11 | from utils.DocInherit import DocInherit 12 | 13 | 14 | class PoolingConf(BaseConf): 15 | """ 16 | 17 | Args: 18 | pool_type (str): 'max' or 'mean', default is 'max'. 19 | pool_axis (int): which axis to conduct pooling, default is 1. 20 | """ 21 | def __init__(self, **kwargs): 22 | super(PoolingConf, self).__init__(**kwargs) 23 | 24 | @DocInherit 25 | def default(self): 26 | #self.input_dim = 128 27 | self.pool_type = 'max' # Supported: ['max', mean'] 28 | self.pool_axis = 1 29 | 30 | @DocInherit 31 | def declare(self): 32 | self.num_of_inputs = 1 33 | self.input_ranks = [-1] 34 | 35 | @DocInherit 36 | def inference(self): 37 | self.output_dim = [] 38 | for idx, dim in enumerate(self.input_dims[0]): 39 | if idx != self.pool_axis: 40 | self.output_dim.append(dim) 41 | 42 | # DON'T MODIFY THIS 43 | self.output_rank = len(self.output_dim) 44 | 45 | @DocInherit 46 | def verify(self): 47 | super(PoolingConf, self).verify() 48 | 49 | necessary_attrs_for_user = ['pool_type', 'pool_axis'] 50 | for attr in necessary_attrs_for_user: 51 | self.add_attr_exist_assertion_for_user(attr) 52 | 53 | self.add_attr_value_assertion('pool_type', ['max', 'mean']) 54 | 55 | assert all([input_rank >= 3 for input_rank in self.input_ranks]), "Cannot apply a pooling layer on a tensor of which the rank is less than 3. Usually, a tensor whose rank is at least 3, e.g. [batch size, sequence length, feature]" 56 | 57 | assert self.output_dim[-1] != -1, "Pooling on the axis %d while the input shape is %s requires that the sequence lengths should be fixed! Please set it on conf/training_params/fixed_lengths" % (self.pool_axis, str(self.input_dims[0])) 58 | 59 | class Pooling(BaseLayer): 60 | """ Pooling layer 61 | 62 | Args: 63 | layer_conf (PoolingConf): configuration of a layer 64 | """ 65 | def __init__(self, layer_conf): 66 | super(Pooling, self).__init__(layer_conf) 67 | 68 | def forward(self, string, string_len=None): 69 | """ process inputs 70 | 71 | Args: 72 | string (Tensor): any shape. 73 | string_len (Tensor): [batch_size], default is None. 74 | 75 | Returns: 76 | Tensor: Pooling result of string 77 | 78 | """ 79 | if self.layer_conf.pool_type == "mean": 80 | assert not string_len is None, "Parameter string_len should not be None!" 81 | string = torch.sum(string, self.layer_conf.pool_axis).squeeze(self.layer_conf.pool_axis) 82 | if not torch.is_tensor(string_len): 83 | string_len = torch.FloatTensor(string_len).unsqueeze(1) 84 | if self.is_cuda(): 85 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 86 | string_len = string_len.to(device) 87 | string_len = string_len.unsqueeze(1) 88 | output = string / string_len.expand_as(string).float() 89 | elif self.layer_conf.pool_type == "max": 90 | output = torch.max(string, self.layer_conf.pool_axis)[0] 91 | 92 | return output, string_len 93 | 94 | 95 | -------------------------------------------------------------------------------- /block_zoo/math/Minus3D.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import logging 7 | 8 | from ..BaseLayer import BaseConf 9 | from utils.DocInherit import DocInherit 10 | from utils.exceptions import ConfigurationError 11 | import copy 12 | 13 | class Minus3DConf(BaseConf): 14 | """ Configuration of Minus3D layer 15 | 16 | Args: 17 | abs_flag: if the result of the Minus3D is abs, default is False 18 | 19 | """ 20 | 21 | # init the args 22 | def __init__(self, **kwargs): 23 | super(Minus3DConf, self).__init__(**kwargs) 24 | 25 | # set default params 26 | @DocInherit 27 | def default(self): 28 | self.abs_flag = False 29 | 30 | @DocInherit 31 | def declare(self): 32 | self.num_of_inputs = 2 33 | self.input_ranks = [3,3] 34 | 35 | @DocInherit 36 | def inference(self): 37 | self.output_dim = copy.deepcopy(self.input_dims[0]) 38 | if self.input_dims[0][-1] != 1: 39 | self.output_dim[-1] = self.input_dims[0][-1] 40 | else: 41 | self.output_dim[-1] = self.input_dims[1][-1] 42 | 43 | super(Minus3DConf, self).inference() 44 | 45 | @DocInherit 46 | def verify(self): 47 | super(Minus3DConf, self).verify() 48 | 49 | # # to check if the ranks of all the inputs are equal 50 | # rank_equal_flag = True 51 | # for i in range(len(self.input_ranks)): 52 | # if self.input_ranks[i] != self.input_ranks[0]: 53 | # rank_equal_flag = False 54 | # break 55 | # if rank_equal_flag == False: 56 | # raise ConfigurationError("For layer Minus3D, the ranks of each inputs should be equal!") 57 | 58 | class Minus3D(nn.Module): 59 | """ Minus3D layer to get subtraction of two sequences(3D representation) 60 | 61 | Args: 62 | layer_conf (Minus3DConf): configuration of a layer 63 | 64 | """ 65 | def __init__(self, layer_conf): 66 | super(Minus3D, self).__init__() 67 | self.layer_conf = layer_conf 68 | 69 | logging.warning("The length Minus3D layer returns is the length of first input") 70 | 71 | def forward(self, *args): 72 | """ process input 73 | 74 | Args: 75 | *args: (Tensor): string, string_len, string2, string2_len 76 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 77 | 78 | Returns: 79 | Tensor: [batch_size, seq_len, output_dim], [batch_size] 80 | """ 81 | 82 | # to check if the dimensions of all the inputs are legal for the Minus3D 83 | dim_flag = True 84 | input_dims = list(self.layer_conf.input_dims) 85 | if (args[0].shape[1] * args[0].shape[2]) != (args[2].shape[1] * args[2].shape[2]): 86 | if args[0].shape[1] == args[2].shape[1] and (input_dims[1][-1] == 1 or input_dims[0][-1] == 1): 87 | dim_flag = True 88 | else: 89 | dim_flag = False 90 | if dim_flag == False: 91 | raise ConfigurationError("For layer Minus3D, the dimensions of each inputs should be equal or 1 ,or the elements number of two inputs (expect for the first dimension) should be equal") 92 | 93 | 94 | if self.layer_conf.abs_flag == False: 95 | return (args[0] - args[2]), args[1] 96 | if self.layer_conf.abs_flag == True: 97 | return torch.abs(args[0] - args[2]),args[1] -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/sentiment_analysis/conf_sentiment_analysis_bilstm_cnn.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "this config is used for sentiment analysis task with SST-2 dataset, it achieved acc:0.8497; auc:0.9148 on dev set.", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/SST-2/train.tsv", 10 | "valid_data_path": "./dataset/SST-2/dev.tsv", 11 | "test_data_path": "./dataset/SST-2/dev.tsv", 12 | "pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt" 13 | }, 14 | "file_with_col_header": true, 15 | "add_start_end_for_seq": true, 16 | "file_header": { 17 | "sentence_text": 0, 18 | "label": 1 19 | }, 20 | "model_inputs": { 21 | "sentence": ["sentence_text"] 22 | }, 23 | "target": ["label"] 24 | }, 25 | "outputs":{ 26 | "save_base_dir": "./models/sentiment_analysis_bilstm_cnn/", 27 | "model_name": "model.nb", 28 | "train_log_name": "train.log", 29 | "test_log_name": "test.log", 30 | "predict_log_name": "predict.log", 31 | "predict_fields": ["prediction", "confidence"], 32 | "predict_output_name": "predict.tsv", 33 | "cache_dir": ".cache.SST-2/" 34 | }, 35 | "training_params": { 36 | "vocabulary": { 37 | "min_word_frequency": 1 38 | }, 39 | "optimizer": { 40 | "name": "SGD", 41 | "params": { 42 | "lr": 0.05, 43 | "momentum": 0.9, 44 | "nesterov": true 45 | } 46 | }, 47 | "lr_decay": 0.95, 48 | "minimum_lr": 0.005, 49 | "epoch_start_lr_decay": 1, 50 | "use_gpu": true, 51 | "batch_size": 64, 52 | "batch_num_to_show_results": 100, 53 | "max_epoch": 20, 54 | "valid_times_per_epoch": 3 55 | }, 56 | "architecture":[ 57 | { 58 | "layer": "Embedding", 59 | "conf": { 60 | "word": { 61 | "cols": ["sentence_text"], 62 | "dim": 300 63 | } 64 | } 65 | }, 66 | { 67 | "layer_id": "sentence_1", 68 | "layer": "BiLSTM", 69 | "conf": { 70 | "hidden_dim": 256, 71 | "dropout": 0, 72 | "num_layers": 2 73 | }, 74 | "inputs": ["sentence"] 75 | }, 76 | { 77 | "layer_id": "hierarchical_cnn", 78 | "layer": "ConvPooling", 79 | "conf": { 80 | "stride": 1, 81 | "window_size": [1,2,3,4,5], 82 | "output_channel_num": 128, 83 | "batch_norm": true, 84 | "activation": "ReLU", 85 | "pool_type": "max", 86 | "pool_axis": 1 87 | }, 88 | "inputs": ["sentence_1"] 89 | }, 90 | { 91 | "output_layer_flag": true, 92 | "layer_id": "output", 93 | "layer": "Linear", 94 | "conf": { 95 | "hidden_dim": [128, 2], 96 | "activation": "PReLU", 97 | "batch_norm": true, 98 | "last_hidden_activation": false 99 | }, 100 | "inputs": ["hierarchical_cnn"] 101 | } 102 | ], 103 | "loss": { 104 | "losses":[ 105 | { 106 | "type": "CrossEntropyLoss", 107 | "conf": { 108 | "size_average": true 109 | }, 110 | "inputs": ["output","label"] 111 | } 112 | ] 113 | }, 114 | "metrics": ["accuracy", "auc"] 115 | } -------------------------------------------------------------------------------- /core/StreamingRecorder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import numpy as np 5 | 6 | class StreamingRecorder(): 7 | def __init__(self, names): 8 | """ 9 | 10 | Args: 11 | names: ['prediction', ... ] 12 | """ 13 | self.__names = names 14 | self.__operators = dict() 15 | self.__recorder = dict() 16 | for name in names: 17 | self.__recorder[name] = [] 18 | 19 | def record(self, name, values, keep_dim=False): 20 | """ insert a col of multiple values 21 | 22 | Args: 23 | name: 24 | values: 25 | 26 | Returns: 27 | 28 | """ 29 | if isinstance(values, list) or isinstance(values, np.ndarray): 30 | if keep_dim is False: 31 | self.__recorder[name].extend(values) 32 | else: 33 | self.__recorder[name].append(values) 34 | else: 35 | self.__recorder[name].append(values) 36 | 37 | def record_one_row(self, values, keep_dim=False): 38 | """ insert a whole row 39 | 40 | Args: 41 | values: [col1, col2, col3, ...], each element can be either a list or a single number 42 | 43 | Returns: 44 | 45 | """ 46 | assert len(self.__names) == len(values) 47 | for name, value in zip(self.__names, values): 48 | self.record(name, value, keep_dim) 49 | 50 | def get(self, name, operator=None): 51 | """ 52 | 53 | Args: 54 | name: 55 | operator: has the same shape with names, supported operations: 56 | None or 'origin': return the original values 57 | 'mean': return mean of the values 58 | 'sum': return sum of the values 59 | 'min': return min of the values 60 | 'max': return max of the values 61 | 'distribution': return 0%, 10%, 20%, ..., 90%, 100% of values, from min to max 62 | 63 | Returns: 64 | 65 | """ 66 | 67 | if operator is None or operator == 'origin': 68 | return self.__recorder[name] 69 | elif operator == 'mean': 70 | return np.mean(self.__recorder[name]) 71 | elif operator == 'sum': 72 | return np.sum(self.__recorder[name]) 73 | elif operator == 'min': 74 | return np.min(self.__recorder[name]) 75 | elif operator == 'max': 76 | return np.max(self.__recorder[name]) 77 | elif operator == 'distribution': 78 | data_sorted = np.sort(self.__recorder[name]) 79 | distribution = [] 80 | for i in np.linspace(0, 1, 11): 81 | if i != 1: 82 | distribution.append(data_sorted[int(i * len(data_sorted))]) 83 | else: 84 | distribution.append(data_sorted[-1]) 85 | return distribution 86 | 87 | def clear_records(self, name=None): 88 | if name is None: 89 | for name in self.__names: 90 | self.__recorder[name] = [] 91 | else: 92 | self.__recorder[name] = [] 93 | 94 | 95 | 96 | 97 | if __name__ == "__main__": 98 | streaming_recorder = StreamingRecorder(['prediction']) 99 | streaming_recorder.record('prediction', [1, 2, 3]) 100 | streaming_recorder.record('prediction', [4, 5, 6]) 101 | print(streaming_recorder.get('prediction', 'origin')) 102 | print(streaming_recorder.get('prediction', 'distribution')) 103 | 104 | -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/chinese_sentiment_analysis/conf_chinese_sentiment_analysis_bilstm.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This config is used for Chinese sentiment analysis task.", 5 | "language": "Chinese", 6 | "inputs": { 7 | "use_cache": true, 8 | "dataset_type": "classification", 9 | "data_paths": { 10 | "train_data_path": "./dataset/chinese_sentiment_analysis/train.tsv", 11 | "valid_data_path": "./dataset/chinese_sentiment_analysis/dev.tsv", 12 | "test_data_path": "./dataset/chinese_sentiment_analysis/test.tsv", 13 | "predict_data_path": "./dataset/chinese_sentiment_analysis/test.tsv" 14 | }, 15 | "file_with_col_header": false, 16 | "add_start_end_for_seq": true, 17 | "file_header": { 18 | "sentence_col": 0, 19 | "label": 1 20 | }, 21 | "model_inputs": { 22 | "sentence": ["sentence_col"] 23 | }, 24 | "target": ["label"] 25 | }, 26 | "outputs":{ 27 | "save_base_dir": "./models/chinese_sentiment_analysis_bilstm/", 28 | "model_name": "model.nb", 29 | "train_log_name": "train.log", 30 | "test_log_name": "test.log", 31 | "predict_log_name": "predict.log", 32 | "predict_fields": ["prediction", "confidence"], 33 | "predict_output_name": "predict.tsv", 34 | "cache_dir": ".cache.chinese_sentiment_analysis/" 35 | }, 36 | "training_params": { 37 | "optimizer": { 38 | "name": "Adam", 39 | "params": { 40 | } 41 | }, 42 | "vocabulary": { 43 | "min_word_frequency": 1 44 | }, 45 | "lr_decay": 0.95, 46 | "minimum_lr": 0.0001, 47 | "epoch_start_lr_decay": 1, 48 | "use_gpu": true, 49 | "cpu_num_workers": -1, 50 | "batch_size": 32, 51 | "batch_num_to_show_results": 300, 52 | "max_epoch": 30, 53 | "valid_times_per_epoch": 2, 54 | "tokenizer": "jieba" 55 | }, 56 | "architecture":[ 57 | { 58 | "layer": "Embedding", 59 | "conf": { 60 | "word": { 61 | "cols": ["sentence_col"], 62 | "dim": 300 63 | } 64 | } 65 | }, 66 | { 67 | "layer_id": "sentence_dropout", 68 | "layer": "Dropout", 69 | "conf": { 70 | "dropout": 0.2 71 | }, 72 | "inputs": ["sentence"] 73 | }, 74 | { 75 | "layer_id": "sentence_bilstm", 76 | "layer": "BiLSTM", 77 | "conf": { 78 | "hidden_dim": 256, 79 | "dropout": 0.2, 80 | "num_layers": 2 81 | }, 82 | "inputs": ["sentence_dropout"] 83 | }, 84 | { 85 | "layer_id": "sentence_pooling", 86 | "layer": "Pooling", 87 | "conf": { 88 | "pool_axis": 1, 89 | "pool_type": "max" 90 | }, 91 | "inputs": ["sentence_bilstm"] 92 | }, 93 | { 94 | "output_layer_flag": true, 95 | "layer_id": "output", 96 | "layer": "Linear", 97 | "conf": { 98 | "hidden_dim": [128, 2], 99 | "activation": "PReLU", 100 | "batch_norm": true, 101 | "last_hidden_activation": false, 102 | "last_hidden_softmax": false 103 | }, 104 | "inputs": ["sentence_pooling"] 105 | } 106 | ], 107 | "loss": { 108 | "losses": [ 109 | { 110 | "type": "CrossEntropyLoss", 111 | "conf": { 112 | "size_average": true 113 | }, 114 | "inputs": ["output","label"] 115 | } 116 | ] 117 | }, 118 | "metrics": ["accuracy"] 119 | } -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/sentiment_analysis/conf_sentiment_analysis_bilstm_attn.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This config is used for sentiment analysis task with SST-2 dataset, it achieved acc: 0.883028 on dev set", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/SST-2/train.tsv", 10 | "valid_data_path": "./dataset/SST-2/dev.tsv", 11 | "test_data_path": "./dataset/SST-2/dev.tsv", 12 | "pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt" 13 | }, 14 | "file_with_col_header": true, 15 | "add_start_end_for_seq": true, 16 | "file_header": { 17 | "sentence_col": 0, 18 | "label": 1 19 | }, 20 | "model_inputs": { 21 | "sentence": ["sentence_col"] 22 | }, 23 | "target": ["label"] 24 | }, 25 | "outputs":{ 26 | "save_base_dir": "./models/sentiment_analysis_bilstm_attn/", 27 | "model_name": "model.nb", 28 | "train_log_name": "train.log", 29 | "test_log_name": "test.log", 30 | "predict_log_name": "predict.log", 31 | "predict_fields": ["prediction", "confidence"], 32 | "predict_output_name": "predict.tsv", 33 | "cache_dir": ".cache.SST-2/" 34 | }, 35 | "training_params": { 36 | "optimizer": { 37 | "name": "Adam", 38 | "params": { 39 | } 40 | }, 41 | "vocabulary": { 42 | "min_word_frequency": 1 43 | }, 44 | "lr_decay": 0.95, 45 | "minimum_lr": 0.0001, 46 | "epoch_start_lr_decay": 1, 47 | "use_gpu": true, 48 | "batch_size": 128, 49 | "batch_num_to_show_results": 300, 50 | "max_epoch": 30, 51 | "valid_times_per_epoch": 2 52 | }, 53 | "architecture":[ 54 | { 55 | "layer": "Embedding", 56 | "conf": { 57 | "word": { 58 | "cols": ["sentence_col"], 59 | "dim": 300 60 | } 61 | } 62 | }, 63 | { 64 | "layer_id": "sentence_dropout", 65 | "layer": "Dropout", 66 | "conf": { 67 | "dropout": 0.2 68 | }, 69 | "inputs": ["sentence"] 70 | }, 71 | { 72 | "layer_id": "sentence_bilstm", 73 | "layer": "BiLSTM", 74 | "conf": { 75 | "hidden_dim": 256, 76 | "dropout": 0.2, 77 | "num_layers": 2 78 | }, 79 | "inputs": ["sentence_dropout"] 80 | }, 81 | { 82 | "layer_id": "sentence_attn", 83 | "layer": "Attention", 84 | "conf": { 85 | }, 86 | "inputs": ["sentence_bilstm","sentence_bilstm"] 87 | }, 88 | { 89 | "layer_id": "sentence_pooling", 90 | "layer": "Pooling", 91 | "conf": { 92 | "pool_axis": 1, 93 | "pool_type": "max" 94 | }, 95 | "inputs": ["sentence_attn"] 96 | }, 97 | { 98 | "output_layer_flag": true, 99 | "layer_id": "output", 100 | "layer": "Linear", 101 | "conf": { 102 | "hidden_dim": [128, 2], 103 | "activation": "PReLU", 104 | "batch_norm": true, 105 | "last_hidden_activation": false 106 | }, 107 | "inputs": ["sentence_pooling"] 108 | } 109 | ], 110 | "loss": { 111 | "losses": [ 112 | { 113 | "type": "CrossEntropyLoss", 114 | "conf": { 115 | "size_average": true 116 | }, 117 | "inputs": ["output","label"] 118 | } 119 | ] 120 | }, 121 | "metrics": ["accuracy"] 122 | } -------------------------------------------------------------------------------- /docs/build/html/_sources/block_zoo.rst.txt: -------------------------------------------------------------------------------- 1 | block\_zoo package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | block_zoo.attentions 10 | block_zoo.embedding 11 | block_zoo.encoder_decoder 12 | block_zoo.math 13 | block_zoo.normalizations 14 | block_zoo.op 15 | block_zoo.transformer 16 | 17 | Submodules 18 | ---------- 19 | 20 | block\_zoo.BaseLayer module 21 | --------------------------- 22 | 23 | .. automodule:: block_zoo.BaseLayer 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | block\_zoo.BiGRU module 29 | ----------------------- 30 | 31 | .. automodule:: block_zoo.BiGRU 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | block\_zoo.BiGRULast module 37 | --------------------------- 38 | 39 | .. automodule:: block_zoo.BiGRULast 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | block\_zoo.BiLSTM module 45 | ------------------------ 46 | 47 | .. automodule:: block_zoo.BiLSTM 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | block\_zoo.BiLSTMAtt module 53 | --------------------------- 54 | 55 | .. automodule:: block_zoo.BiLSTMAtt 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | block\_zoo.BiLSTMLast module 61 | ---------------------------- 62 | 63 | .. automodule:: block_zoo.BiLSTMLast 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | block\_zoo.BiQRNN module 69 | ------------------------ 70 | 71 | .. automodule:: block_zoo.BiQRNN 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | 76 | block\_zoo.Conv module 77 | ---------------------- 78 | 79 | .. automodule:: block_zoo.Conv 80 | :members: 81 | :undoc-members: 82 | :show-inheritance: 83 | 84 | block\_zoo.ConvPooling module 85 | ----------------------------- 86 | 87 | .. automodule:: block_zoo.ConvPooling 88 | :members: 89 | :undoc-members: 90 | :show-inheritance: 91 | 92 | block\_zoo.Dropout module 93 | ------------------------- 94 | 95 | .. automodule:: block_zoo.Dropout 96 | :members: 97 | :undoc-members: 98 | :show-inheritance: 99 | 100 | block\_zoo.Embedding module 101 | --------------------------- 102 | 103 | .. automodule:: block_zoo.Embedding 104 | :members: 105 | :undoc-members: 106 | :show-inheritance: 107 | 108 | block\_zoo.EncoderDecoder module 109 | -------------------------------- 110 | 111 | .. automodule:: block_zoo.EncoderDecoder 112 | :members: 113 | :undoc-members: 114 | :show-inheritance: 115 | 116 | block\_zoo.Flatten module 117 | ------------------------- 118 | 119 | .. automodule:: block_zoo.Flatten 120 | :members: 121 | :undoc-members: 122 | :show-inheritance: 123 | 124 | block\_zoo.HighwayLinear module 125 | ------------------------------- 126 | 127 | .. automodule:: block_zoo.HighwayLinear 128 | :members: 129 | :undoc-members: 130 | :show-inheritance: 131 | 132 | block\_zoo.Linear module 133 | ------------------------ 134 | 135 | .. automodule:: block_zoo.Linear 136 | :members: 137 | :undoc-members: 138 | :show-inheritance: 139 | 140 | block\_zoo.Pooling module 141 | ------------------------- 142 | 143 | .. automodule:: block_zoo.Pooling 144 | :members: 145 | :undoc-members: 146 | :show-inheritance: 147 | 148 | block\_zoo.Transformer module 149 | ----------------------------- 150 | 151 | .. automodule:: block_zoo.Transformer 152 | :members: 153 | :undoc-members: 154 | :show-inheritance: 155 | 156 | 157 | Module contents 158 | --------------- 159 | 160 | .. automodule:: block_zoo 161 | :members: 162 | :undoc-members: 163 | :show-inheritance: 164 | -------------------------------------------------------------------------------- /model_zoo/demo/conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This example shows how to train/test/predict.", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/demo/train.tsv", 10 | "valid_data_path": "./dataset/demo/valid.tsv", 11 | "test_data_path": "./dataset/demo/test.tsv", 12 | "predict_data_path": "./dataset/demo/predict.tsv" 13 | }, 14 | "file_with_col_header": false, 15 | "add_start_end_for_seq": true, 16 | "file_header": { 17 | "question_text": 0, 18 | "answer_text": 1, 19 | "label": 2 20 | }, 21 | "predict_file_header": { 22 | "question_text": 0, 23 | "answer_text": 1 24 | }, 25 | "model_inputs": { 26 | "question": ["question_text"], 27 | "answer": ["answer_text"] 28 | }, 29 | "target": ["label"] 30 | }, 31 | "outputs": { 32 | "save_base_dir": "./models/demo/", 33 | "model_name": "model.nb", 34 | "train_log_name": "train.log", 35 | "test_log_name": "test.log", 36 | "predict_log_name": "predict.log", 37 | "predict_fields": ["prediction", "confidence"], 38 | "predict_output_name": "predict.tsv", 39 | "cache_dir": ".cache.demo/" 40 | }, 41 | "training_params": { 42 | "optimizer": { 43 | "name": "Adam", 44 | "params": { 45 | "lr": 0.001 46 | } 47 | }, 48 | "use_gpu": false, 49 | "batch_size": 30, 50 | "batch_num_to_show_results": 10, 51 | "max_epoch": 3, 52 | "valid_times_per_epoch": 1, 53 | "max_lengths": { 54 | "question_text": 30, 55 | "answer_text": 100 56 | } 57 | }, 58 | "architecture": [ 59 | { 60 | "layer": "Embedding", 61 | "conf": { 62 | "word": { 63 | "cols": ["question_text", "answer_text"], 64 | "dim": 300 65 | } 66 | } 67 | }, 68 | { 69 | "layer_id": "question_1", 70 | "layer": "BiLSTM", 71 | "conf": { 72 | "hidden_dim": 64, 73 | "dropout": 0.2, 74 | "num_layers": 2 75 | }, 76 | "inputs": ["question"] 77 | }, 78 | { 79 | "layer_id": "answer_1", 80 | "layer": "BiLSTM", 81 | "conf": { 82 | "hidden_dim": 64, 83 | "dropout": 0.2, 84 | "num_layers": 2 85 | }, 86 | "inputs": ["answer"] 87 | }, 88 | { 89 | "layer_id": "question_2", 90 | "layer": "Pooling", 91 | "conf": { 92 | "pool_axis": 1, 93 | "pool_type": "max" 94 | }, 95 | "inputs": ["question_1"] 96 | }, 97 | { 98 | "layer_id": "answer_2", 99 | "layer": "question_2", 100 | "inputs": ["answer_1"] 101 | }, 102 | { 103 | "layer_id": "comb", 104 | "layer": "Combination", 105 | "conf": { 106 | "operations": ["origin", "difference", "dot_multiply"] 107 | }, 108 | "inputs": ["question_2", "answer_2"] 109 | }, 110 | { 111 | "output_layer_flag": true, 112 | "layer_id": "output", 113 | "layer": "Linear", 114 | "conf": { 115 | "hidden_dim": [128, 2], 116 | "activation": "PReLU", 117 | "batch_norm": true, 118 | "last_hidden_activation": false 119 | }, 120 | "inputs": ["comb"] 121 | } 122 | ], 123 | "loss": { 124 | "losses": [ 125 | { 126 | "type": "CrossEntropyLoss", 127 | "conf": { 128 | "size_average": true 129 | }, 130 | "inputs": ["output", "label"] 131 | } 132 | ] 133 | }, 134 | "metrics": ["auc", "accuracy"] 135 | } -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/chinese_sentiment_analysis/conf_chinese_sentiment_analysis_bilstm_emb.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This config is used for Chinese sentiment analysis task.", 5 | "language": "Chinese", 6 | "inputs": { 7 | "use_cache": true, 8 | "dataset_type": "classification", 9 | "data_paths": { 10 | "train_data_path": "./dataset/chinese_sentiment_analysis/train.tsv", 11 | "valid_data_path": "./dataset/chinese_sentiment_analysis/dev.tsv", 12 | "test_data_path": "./dataset/chinese_sentiment_analysis/test.tsv", 13 | "predict_data_path": "./dataset/chinese_sentiment_analysis/test.tsv", 14 | "pre_trained_emb": "./dataset/chinese_word_vectors/sgns.merge.word" 15 | }, 16 | "file_with_col_header": false, 17 | "add_start_end_for_seq": true, 18 | "file_header": { 19 | "sentence_col": 0, 20 | "label": 1 21 | }, 22 | "model_inputs": { 23 | "sentence": ["sentence_col"] 24 | }, 25 | "target": ["label"] 26 | }, 27 | "outputs":{ 28 | "save_base_dir": "./models/chinese_sentiment_analysis_bilstm_emb/", 29 | "model_name": "model.nb", 30 | "train_log_name": "train.log", 31 | "test_log_name": "test.log", 32 | "predict_log_name": "predict.log", 33 | "predict_fields": ["prediction", "confidence"], 34 | "predict_output_name": "predict.tsv", 35 | "cache_dir": ".cache.chinese_sentiment_analysis/" 36 | }, 37 | "training_params": { 38 | "optimizer": { 39 | "name": "Adam", 40 | "params": { 41 | } 42 | }, 43 | "vocabulary": { 44 | "min_word_frequency": 1 45 | }, 46 | "lr_decay": 0.95, 47 | "minimum_lr": 0.0001, 48 | "epoch_start_lr_decay": 1, 49 | "use_gpu": true, 50 | "cpu_num_workers": -1, 51 | "batch_size": 32, 52 | "batch_num_to_show_results": 300, 53 | "max_epoch": 30, 54 | "valid_times_per_epoch": 2, 55 | "tokenizer": "jieba" 56 | }, 57 | "architecture":[ 58 | { 59 | "layer": "Embedding", 60 | "conf": { 61 | "word": { 62 | "cols": ["sentence_col"], 63 | "dim": 300, 64 | "fix_weight": true 65 | } 66 | } 67 | }, 68 | { 69 | "layer_id": "sentence_dropout", 70 | "layer": "Dropout", 71 | "conf": { 72 | "dropout": 0.2 73 | }, 74 | "inputs": ["sentence"] 75 | }, 76 | { 77 | "layer_id": "sentence_bilstm", 78 | "layer": "BiLSTM", 79 | "conf": { 80 | "hidden_dim": 256, 81 | "dropout": 0.2, 82 | "num_layers": 2 83 | }, 84 | "inputs": ["sentence_dropout"] 85 | }, 86 | { 87 | "layer_id": "sentence_pooling", 88 | "layer": "Pooling", 89 | "conf": { 90 | "pool_axis": 1, 91 | "pool_type": "max" 92 | }, 93 | "inputs": ["sentence_bilstm"] 94 | }, 95 | { 96 | "output_layer_flag": true, 97 | "layer_id": "output", 98 | "layer": "Linear", 99 | "conf": { 100 | "hidden_dim": [128, 2], 101 | "activation": "PReLU", 102 | "batch_norm": true, 103 | "last_hidden_activation": false, 104 | "last_hidden_softmax": false 105 | }, 106 | "inputs": ["sentence_pooling"] 107 | } 108 | ], 109 | "loss": { 110 | "losses": [ 111 | { 112 | "type": "CrossEntropyLoss", 113 | "conf": { 114 | "size_average": true 115 | }, 116 | "inputs": ["output","label"] 117 | } 118 | ] 119 | }, 120 | "metrics": ["accuracy"] 121 | } -------------------------------------------------------------------------------- /block_zoo/transformer/MultiHeadAttention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import math 7 | 8 | from block_zoo.BaseLayer import BaseLayer,BaseConf 9 | from utils.DocInherit import DocInherit 10 | import copy 11 | 12 | class MultiHeadAttentionConf(BaseConf): 13 | """ Configuration of MultiHeadAttention Layer 14 | 15 | Args: 16 | n_head (int): the head number of attention 17 | scale (bool): if need to scale 18 | attn_dropout (float): the dropout of attention layer 19 | resid_dropout (float): the dropout of last Linear 20 | """ 21 | 22 | def __init__(self,**kwargs): 23 | 24 | super(MultiHeadAttentionConf, self).__init__(**kwargs) 25 | 26 | @DocInherit 27 | def default(self): 28 | self.n_head = 12 29 | self.scale = True 30 | self.attn_dropout = 0.1 31 | self.resid_dropout = 0.1 32 | 33 | @DocInherit 34 | def declare(self): 35 | self.num_of_inputs = 1 36 | self.input_ranks = [3] 37 | 38 | @DocInherit 39 | def inference(self): 40 | self.output_dim = copy.deepcopy(self.input_dims[0]) 41 | super(MultiHeadAttentionConf, self).inference() 42 | 43 | @DocInherit 44 | def verify(self): 45 | super(MultiHeadAttentionConf, self).verify() 46 | 47 | class MultiHeadAttention(nn.Module): 48 | """ MultiHeadAttention Layer 49 | 50 | Args: 51 | layer_conf (MultiHeadAttentionConf): configuration of a layer 52 | 53 | """ 54 | def __init__(self, layer_conf): 55 | super(MultiHeadAttention, self).__init__() 56 | self.layer_conf = layer_conf 57 | self.split_size = self.layer_conf.input_dims[0][-1] 58 | self.n_state = self.layer_conf.input_dims[0][-1] 59 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 60 | assert self.n_state % self.layer_conf.n_head == 0 61 | 62 | self.c_attn = nn.Linear(self.layer_conf.input_dims[0][-1],self.n_state * 3) 63 | self.c_proj = nn.Linear(self.layer_conf.input_dims[0][-1],self.n_state) 64 | 65 | def _attn(self, q, k, v): 66 | w = torch.matmul(q, k).to(self.device) 67 | if self.layer_conf.scale: 68 | w = w / math.sqrt(v.size(-1)) 69 | w = w * self.b + -1e9 * (1 - self.b) 70 | w = nn.Softmax(dim=-1)(w) 71 | w = nn.Dropout(self.layer_conf.attn_dropout)(w) 72 | return torch.matmul(w, v) 73 | 74 | def merge_heads(self, x): 75 | x = x.permute(0, 2, 1, 3).contiguous() 76 | new_x_shape = x.size()[:-2] + (x.size(-2) * x.size(-1),) 77 | return x.view(*new_x_shape) 78 | 79 | def split_heads(self, x, k=False): 80 | new_x_shape = x.size()[:-1] + (self.layer_conf.n_head, x.size(-1) // self.layer_conf.n_head) 81 | x = x.view(*new_x_shape) 82 | if k: 83 | return x.permute(0, 2, 3, 1) 84 | else: 85 | return x.permute(0, 2, 1, 3) 86 | 87 | def forward(self, string, string_len): 88 | """ process input 89 | 90 | Args: 91 | string, string_len 92 | e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size] 93 | 94 | Returns: 95 | Tensor: [batch_size, seq_len, output_dim], [batch_size] 96 | """ 97 | self.register_buffer('b', torch.tril(torch.ones(string.shape[1], string.shape[1]).to(self.device)).view(1, 1, string.shape[1], string.shape[1])) 98 | x = self.c_attn(string) 99 | query, key, value = x.split(self.split_size, dim=2) 100 | query = self.split_heads(query) 101 | key = self.split_heads(key, k=True) 102 | value = self.split_heads(value) 103 | a = self._attn(query, key, value) 104 | a = self.merge_heads(a) 105 | a = self.c_proj(a) 106 | a = nn.Dropout(self.layer_conf.resid_dropout)(a) 107 | return a, string_len -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/question_pairs/conf_question_pairs_bigru.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This model is used for query query pairs task. It achieved acc:0.8324 on dev set", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/QQP/train.tsv", 10 | "valid_data_path": "./dataset/QQP/dev.tsv", 11 | "test_data_path": "./dataset/QQP/dev.tsv", 12 | "pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt" 13 | }, 14 | "file_with_col_header": true, 15 | "add_start_end_for_seq": true, 16 | "file_header": { 17 | "id": 0, 18 | "qid1": 1, 19 | "qid2": 2, 20 | "question1_text": 3, 21 | "question2_text": 4, 22 | "label": 5 23 | }, 24 | "model_inputs": { 25 | "question1": ["question1_text"], 26 | "question2": ["question2_text"] 27 | }, 28 | "target": ["label"] 29 | }, 30 | "outputs":{ 31 | "save_base_dir": "./models/question_pairs_bigru/", 32 | "model_name": "model.nb", 33 | "train_log_name": "train.log", 34 | "test_log_name": "test.log", 35 | "predict_log_name": "predict.log", 36 | "predict_fields": ["prediction", "confidence"], 37 | "predict_output_name": "predict.tsv", 38 | "cache_dir": ".cache.QQP/" 39 | }, 40 | "training_params": { 41 | "vocabulary": { 42 | "min_word_frequency": 1 43 | }, 44 | "optimizer": { 45 | "name": "SGD", 46 | "params": { 47 | "lr": 0.15, 48 | "momentum": 0.9, 49 | "nesterov": true 50 | } 51 | }, 52 | "lr_decay": 0.95, 53 | "minimum_lr": 0.005, 54 | "epoch_start_lr_decay": 1, 55 | "use_gpu": true, 56 | "batch_size": 32, 57 | "batch_num_to_show_results": 300, 58 | "max_epoch": 30, 59 | "valid_times_per_epoch": 5 60 | }, 61 | "architecture":[ 62 | { 63 | "layer": "Embedding", 64 | "conf": { 65 | "word": { 66 | "cols": ["question1_text", "question2_text"], 67 | "dim": 300 68 | } 69 | } 70 | }, 71 | { 72 | "layer_id": "question_1", 73 | "layer": "BiGRU", 74 | "conf": { 75 | "hidden_dim": 128, 76 | "dropout": 0, 77 | "num_layers": 2 78 | }, 79 | "inputs": ["question1"] 80 | }, 81 | { 82 | "layer_id": "question_2", 83 | "layer": "BiGRU", 84 | "conf": { 85 | "hidden_dim": 128, 86 | "dropout": 0, 87 | "num_layers": 2 88 | }, 89 | "inputs": ["question2"] 90 | }, 91 | { 92 | "layer_id": "pooling_1", 93 | "layer": "Pooling", 94 | "conf": { 95 | "pool_axis": 1, 96 | "pool_type": "max" 97 | }, 98 | "inputs": ["question_1"] 99 | }, 100 | { 101 | "layer_id": "pooling_2", 102 | "layer": "Pooling", 103 | "conf": { 104 | "pool_axis": 1, 105 | "pool_type": "max" 106 | }, 107 | "inputs": ["question_2"] 108 | }, 109 | { 110 | "layer_id": "comb_qq", 111 | "layer": "Combination", 112 | "conf": { 113 | "operations": ["origin", "difference", "dot_multiply"] 114 | }, 115 | "inputs": ["pooling_1", "pooling_2"] 116 | }, 117 | { 118 | "output_layer_flag": true, 119 | "layer_id": "output", 120 | "layer": "Linear", 121 | "conf": { 122 | "hidden_dim": [256, 2], 123 | "activation": "PReLU", 124 | "batch_norm": true, 125 | "last_hidden_activation": false 126 | }, 127 | "inputs": ["comb_qq"] 128 | } 129 | ], 130 | "loss": { 131 | "losses": [ 132 | { 133 | "type": "CrossEntropyLoss", 134 | "conf": { 135 | "size_average": true 136 | }, 137 | "inputs": ["output","label"] 138 | } 139 | ] 140 | }, 141 | "metrics": ["accuracy","micro_f1","macro_f1"] 142 | } -------------------------------------------------------------------------------- /model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_cnn.json: -------------------------------------------------------------------------------- 1 | { 2 | "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", 3 | "tool_version": "1.1.0", 4 | "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7479 in WikiQACorpus test set.", 5 | "inputs": { 6 | "use_cache": true, 7 | "dataset_type": "classification", 8 | "data_paths": { 9 | "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv", 10 | "valid_data_path": "./dataset/WikiQACorpus/WikiQA-dev.tsv", 11 | "test_data_path": "./dataset/WikiQACorpus/WikiQA-test.tsv", 12 | "pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt" 13 | }, 14 | "file_with_col_header": true, 15 | "add_start_end_for_seq": true, 16 | "file_header": { 17 | "question_id": 0, 18 | "question_text": 1, 19 | "document_id": 2, 20 | "document_title": 3, 21 | "passage_id": 4, 22 | "passage_text": 5, 23 | "label": 6 24 | }, 25 | "model_inputs": { 26 | "question": ["question_text"], 27 | "passage": ["passage_text"] 28 | }, 29 | "target": ["label"] 30 | }, 31 | "outputs":{ 32 | "save_base_dir": "./models/wikiqa_cnn/", 33 | "model_name": "model.nb", 34 | "train_log_name": "train.log", 35 | "test_log_name": "test.log", 36 | "predict_log_name": "predict.log", 37 | "predict_fields": ["prediction"], 38 | "predict_output_name": "predict.tsv", 39 | "cache_dir": ".cache.wikiqa/" 40 | }, 41 | "training_params": { 42 | "optimizer": { 43 | "name": "Adam", 44 | "params": { 45 | } 46 | }, 47 | "vocabulary": { 48 | "min_word_frequency": 1 49 | }, 50 | "use_gpu": true, 51 | "batch_size": 20, 52 | "batch_num_to_show_results": 500, 53 | "max_epoch": 10, 54 | "valid_times_per_epoch": 1 55 | }, 56 | "architecture":[ 57 | { 58 | "layer": "Embedding", 59 | "conf": { 60 | "word": { 61 | "cols": ["question_text", "passage_text"], 62 | "dim": 300, 63 | "fix_weight": true 64 | } 65 | } 66 | }, 67 | { 68 | "layer_id": "question_1", 69 | "layer": "ConvPooling", 70 | "conf": { 71 | "stride": 1, 72 | "padding": 0, 73 | "window_sizes": [2], 74 | "input_channel_num": 1, 75 | "output_channel_num": 128, 76 | "batch_norm": true, 77 | "activation": "ReLU", 78 | "pool_type": "max", 79 | "pool_axis": 1 80 | }, 81 | "inputs": ["question"] 82 | }, 83 | { 84 | "layer_id": "passage_1", 85 | "layer": "ConvPooling", 86 | "conf": { 87 | "stride": 1, 88 | "padding": 0, 89 | "window_sizes": [2], 90 | "input_channel_num": 1, 91 | "output_channel_num": 128, 92 | "batch_norm": true, 93 | "activation": "ReLU", 94 | "pool_type": "max", 95 | "pool_axis": 1 96 | }, 97 | "inputs": ["passage"] 98 | }, 99 | { 100 | "layer_id": "comb_qp", 101 | "layer": "Combination", 102 | "conf": { 103 | "operations": ["origin"] 104 | }, 105 | "inputs": ["question_1", "passage_1"] 106 | }, 107 | { 108 | "output_layer_flag": true, 109 | "layer_id": "output", 110 | "layer": "Linear", 111 | "conf": { 112 | "hidden_dim": [128,2], 113 | "activation": "PReLU", 114 | "last_hidden_activation": false 115 | }, 116 | "inputs": ["comb_qp"] 117 | } 118 | ], 119 | "loss": { 120 | "losses": [ 121 | { 122 | "type": "CrossEntropyLoss", 123 | "conf": { 124 | "weight": [0.1,0.9], 125 | "size_average": true 126 | }, 127 | "inputs": ["output","label"] 128 | } 129 | ] 130 | }, 131 | "metrics": ["auc","accuracy"] 132 | } -------------------------------------------------------------------------------- /docs/build/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | /* sphinx_rtd_theme version 0.4.3 | MIT license */ 2 | /* Built 20190212 16:02 */ 3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap("
"),i("table.docutils.citation").wrap("
"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;e