├── requirements.txt
├── test
    ├── __init__.py
    └── core
    │   ├── __init__.py
    │   └── test_fastHan.py
├── fastHan
    ├── model
    │   ├── __init__.py
    │   ├── camrModel.py
    │   ├── multitask_metric_base.py
    │   ├── weight_manager.py
    │   ├── UserDict.py
    │   ├── camr_finetune_dataloader.py
    │   ├── metrics.py
    │   ├── dependency_parsing_model.py
    │   ├── finetune_dataloader.py
    │   ├── baseModel.py
    │   └── camr_restore.py
    └── __init__.py
├── docs
    ├── requirements.txt
    ├── build
    │   ├── html
    │   │   ├── objects.inv
    │   │   ├── _static
    │   │   │   ├── file.png
    │   │   │   ├── minus.png
    │   │   │   ├── plus.png
    │   │   │   ├── fonts
    │   │   │   │   ├── FontAwesome.otf
    │   │   │   │   ├── lato-bold.woff
    │   │   │   │   ├── lato-bold.woff2
    │   │   │   │   ├── lato-normal.woff
    │   │   │   │   ├── Lato
    │   │   │   │   │   ├── lato-bold.eot
    │   │   │   │   │   ├── lato-bold.ttf
    │   │   │   │   │   ├── lato-bold.woff
    │   │   │   │   │   ├── lato-bold.woff2
    │   │   │   │   │   ├── lato-italic.eot
    │   │   │   │   │   ├── lato-italic.ttf
    │   │   │   │   │   ├── lato-italic.woff
    │   │   │   │   │   ├── lato-regular.eot
    │   │   │   │   │   ├── lato-regular.ttf
    │   │   │   │   │   ├── lato-italic.woff2
    │   │   │   │   │   ├── lato-regular.woff
    │   │   │   │   │   ├── lato-regular.woff2
    │   │   │   │   │   ├── lato-bolditalic.eot
    │   │   │   │   │   ├── lato-bolditalic.ttf
    │   │   │   │   │   ├── lato-bolditalic.woff
    │   │   │   │   │   └── lato-bolditalic.woff2
    │   │   │   │   ├── lato-normal.woff2
    │   │   │   │   ├── Roboto-Slab-Bold.woff
    │   │   │   │   ├── Roboto-Slab-Thin.woff
    │   │   │   │   ├── lato-bold-italic.woff
    │   │   │   │   ├── Roboto-Slab-Bold.woff2
    │   │   │   │   ├── Roboto-Slab-Light.woff
    │   │   │   │   ├── Roboto-Slab-Light.woff2
    │   │   │   │   ├── Roboto-Slab-Thin.woff2
    │   │   │   │   ├── fontawesome-webfont.eot
    │   │   │   │   ├── fontawesome-webfont.ttf
    │   │   │   │   ├── lato-bold-italic.woff2
    │   │   │   │   ├── lato-normal-italic.woff
    │   │   │   │   ├── Roboto-Slab-Regular.woff
    │   │   │   │   ├── Roboto-Slab-Regular.woff2
    │   │   │   │   ├── fontawesome-webfont.woff
    │   │   │   │   ├── fontawesome-webfont.woff2
    │   │   │   │   ├── lato-normal-italic.woff2
    │   │   │   │   └── RobotoSlab
    │   │   │   │   │   ├── roboto-slab-v7-bold.eot
    │   │   │   │   │   ├── roboto-slab-v7-bold.ttf
    │   │   │   │   │   ├── roboto-slab-v7-bold.woff
    │   │   │   │   │   ├── roboto-slab-v7-bold.woff2
    │   │   │   │   │   ├── roboto-slab-v7-regular.eot
    │   │   │   │   │   ├── roboto-slab-v7-regular.ttf
    │   │   │   │   │   ├── roboto-slab-v7-regular.woff
    │   │   │   │   │   └── roboto-slab-v7-regular.woff2
    │   │   │   ├── css
    │   │   │   │   ├── fonts
    │   │   │   │   │   ├── lato-bold.woff
    │   │   │   │   │   ├── lato-bold.woff2
    │   │   │   │   │   ├── lato-normal.woff
    │   │   │   │   │   ├── lato-normal.woff2
    │   │   │   │   │   ├── Roboto-Slab-Bold.woff
    │   │   │   │   │   ├── Roboto-Slab-Bold.woff2
    │   │   │   │   │   ├── lato-bold-italic.woff
    │   │   │   │   │   ├── lato-bold-italic.woff2
    │   │   │   │   │   ├── Roboto-Slab-Regular.woff
    │   │   │   │   │   ├── fontawesome-webfont.eot
    │   │   │   │   │   ├── fontawesome-webfont.ttf
    │   │   │   │   │   ├── fontawesome-webfont.woff
    │   │   │   │   │   ├── lato-normal-italic.woff
    │   │   │   │   │   ├── lato-normal-italic.woff2
    │   │   │   │   │   ├── Roboto-Slab-Regular.woff2
    │   │   │   │   │   └── fontawesome-webfont.woff2
    │   │   │   │   └── badge_only.css
    │   │   │   ├── documentation_options.js
    │   │   │   ├── js
    │   │   │   │   ├── badge_only.js
    │   │   │   │   ├── html5shiv.min.js
    │   │   │   │   ├── html5shiv-printshiv.min.js
    │   │   │   │   └── theme.js
    │   │   │   ├── translations.js
    │   │   │   └── pygments.css
    │   │   ├── _images
    │   │   │   └── procedures.PNG
    │   │   ├── _sources
    │   │   │   ├── fastHan.FastModel.rst.txt
    │   │   │   ├── fastHan.model.bert.rst.txt
    │   │   │   ├── fastHan.model.model.rst.txt
    │   │   │   ├── fastHan.model.utils.rst.txt
    │   │   │   ├── fastHan.model.UserDict.rst.txt
    │   │   │   ├── fastHan.model.CharParser.rst.txt
    │   │   │   ├── fastHan.model.BertCharParser.rst.txt
    │   │   │   ├── fastHan.model.old_fastNLP_bert.rst.txt
    │   │   │   ├── fastHan.model.bert_encoder_theseus.rst.txt
    │   │   │   ├── fastHan.rst.txt
    │   │   │   ├── index.rst.txt
    │   │   │   ├── fastHan.model.rst.txt
    │   │   │   └── user
    │   │   │   │   └── example.rst.txt
    │   │   ├── .buildinfo
    │   │   ├── search.html
    │   │   ├── _modules
    │   │   │   └── index.html
    │   │   ├── index.html
    │   │   ├── fastHan.model.html
    │   │   ├── fastHan.model.utils.html
    │   │   ├── py-modindex.html
    │   │   ├── fastHan.model.model.html
    │   │   ├── searchindex.js
    │   │   └── fastHan.model.UserDict.html
    │   └── doctrees
    │   │   ├── index.doctree
    │   │   ├── fastHan.doctree
    │   │   ├── environment.pickle
    │   │   ├── fastHan.model.doctree
    │   │   ├── user
    │   │       └── example.doctree
    │   │   ├── fastHan.FastModel.doctree
    │   │   ├── fastHan.model.bert.doctree
    │   │   ├── fastHan.model.model.doctree
    │   │   ├── fastHan.model.utils.doctree
    │   │   ├── fastHan.model.UserDict.doctree
    │   │   ├── fastHan.model.CharParser.doctree
    │   │   ├── fastHan.model.BertCharParser.doctree
    │   │   ├── fastHan.model.old_fastNLP_bert.doctree
    │   │   └── fastHan.model.bert_encoder_theseus.doctree
    ├── source
    │   ├── modules.rst
    │   ├── figures
    │   │   └── procedures.PNG
    │   ├── fastHan.FastModel.rst
    │   ├── fastHan.model.bert.rst
    │   ├── fastHan.model.model.rst
    │   ├── fastHan.model.utils.rst
    │   ├── fastHan.model.UserDict.rst
    │   ├── fastHan.model.CharParser.rst
    │   ├── fastHan.model.BertCharParser.rst
    │   ├── fastHan.model.old_fastNLP_bert.rst
    │   ├── fastHan.model.bert_encoder_theseus.rst
    │   ├── fastHan.rst
    │   ├── user
    │   │   ├── version.rst
    │   │   ├── installation.rst
    │   │   ├── example.rst
    │   │   └── quickstart.rst
    │   ├── fastHan.model.rst
    │   ├── index.rst
    │   └── conf.py
    ├── Makefile
    └── doc_utils.py
├── .vscode
    └── settings.json
├── setup.py
├── .Jenkinsfile
└── .gitignore


/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fastHan/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/fastHan/model/camrModel.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_rtd_theme


--------------------------------------------------------------------------------
/fastHan/__init__.py:
--------------------------------------------------------------------------------
1 | from .FastModel import FastHan
2 | from .FastCamrModel import FastCAMR


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "C:\\Users\\gzc\\Anaconda3\\python.exe"
3 | }


--------------------------------------------------------------------------------
/docs/build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/objects.inv


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | fastHan
2 | =======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    fastHan
8 | 


--------------------------------------------------------------------------------
/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/file.png


--------------------------------------------------------------------------------
/docs/build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/minus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/plus.png


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.doctree


--------------------------------------------------------------------------------
/docs/source/figures/procedures.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/source/figures/procedures.PNG


--------------------------------------------------------------------------------
/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/build/html/_images/procedures.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_images/procedures.PNG


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/user/example.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/user/example.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.FastModel.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.FastModel.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.bert.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.bert.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/FontAwesome.otf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal.woff


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.model.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.model.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.utils.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.utils.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal.woff2


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.UserDict.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.UserDict.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Thin.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Thin.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold-italic.woff


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.CharParser.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.CharParser.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Light.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Light.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Light.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Light.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Thin.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Thin.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold-italic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal-italic.woff


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.BertCharParser.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.BertCharParser.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold-italic.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold-italic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Regular.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Regular.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal-italic.woff2


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.old_fastNLP_bert.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.old_fastNLP_bert.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal-italic.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal-italic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/build/doctrees/fastHan.model.bert_encoder_theseus.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.bert_encoder_theseus.doctree


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2


--------------------------------------------------------------------------------
/docs/source/fastHan.FastModel.rst:
--------------------------------------------------------------------------------
1 | fastHan.FastModel module
2 | ========================
3 | 
4 | .. automodule:: fastHan.FastModel
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.bert.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.bert module
2 | =========================
3 | 
4 | .. automodule:: fastHan.model.bert
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.model.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.model module
2 | ==========================
3 | 
4 | .. automodule:: fastHan.model.model
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.utils.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.utils module
2 | ==========================
3 | 
4 | .. automodule:: fastHan.model.utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.FastModel.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.FastModel module
2 | ========================
3 | 
4 | .. automodule:: fastHan.FastModel
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.UserDict.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.UserDict module
2 | =============================
3 | 
4 | .. automodule:: fastHan.model.UserDict
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.bert.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.bert module
2 | =========================
3 | 
4 | .. automodule:: fastHan.model.bert
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.model.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.model module
2 | ==========================
3 | 
4 | .. automodule:: fastHan.model.model
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.utils.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.utils module
2 | ==========================
3 | 
4 | .. automodule:: fastHan.model.utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.CharParser.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.CharParser module
2 | ===============================
3 | 
4 | .. automodule:: fastHan.model.CharParser
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.UserDict.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.UserDict module
2 | =============================
3 | 
4 | .. automodule:: fastHan.model.UserDict
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.CharParser.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.CharParser module
2 | ===============================
3 | 
4 | .. automodule:: fastHan.model.CharParser
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.BertCharParser.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.BertCharParser module
2 | ===================================
3 | 
4 | .. automodule:: fastHan.model.BertCharParser
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.old_fastNLP_bert.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.old\_fastNLP\_bert module
2 | =======================================
3 | 
4 | .. automodule:: fastHan.model.old_fastNLP_bert
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.BertCharParser.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.BertCharParser module
2 | ===================================
3 | 
4 | .. automodule:: fastHan.model.BertCharParser
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: d710ae17477069bdfb78e0364e07f8a0
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.old_fastNLP_bert.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.old\_fastNLP\_bert module
2 | =======================================
3 | 
4 | .. automodule:: fastHan.model.old_fastNLP_bert
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.bert_encoder_theseus.rst:
--------------------------------------------------------------------------------
1 | fastHan.model.bert\_encoder\_theseus module
2 | ===========================================
3 | 
4 | .. automodule:: fastHan.model.bert_encoder_theseus
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.bert_encoder_theseus.rst.txt:
--------------------------------------------------------------------------------
1 | fastHan.model.bert\_encoder\_theseus module
2 | ===========================================
3 | 
4 | .. automodule:: fastHan.model.bert_encoder_theseus
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.rst:
--------------------------------------------------------------------------------
 1 | fastHan package
 2 | ===============
 3 | 
 4 | .. automodule:: fastHan
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | Subpackages
10 | -----------
11 | 
12 | .. toctree::
13 | 
14 |     fastHan.model
15 | 
16 | Submodules
17 | ----------
18 | 
19 | .. toctree::
20 | 
21 |    fastHan.FastModel
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.rst.txt:
--------------------------------------------------------------------------------
 1 | fastHan package
 2 | ===============
 3 | 
 4 | .. automodule:: fastHan
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | Subpackages
10 | -----------
11 | 
12 | .. toctree::
13 | 
14 |     fastHan.model
15 | 
16 | Submodules
17 | ----------
18 | 
19 | .. toctree::
20 | 
21 |    fastHan.FastModel
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
 1 | var DOCUMENTATION_OPTIONS = {
 2 |     URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
 3 |     VERSION: '0.5.0',
 4 |     LANGUAGE: 'zh_CN',
 5 |     COLLAPSE_INDEX: false,
 6 |     FILE_SUFFIX: '.html',
 7 |     HAS_SOURCE: true,
 8 |     SOURCELINK_SUFFIX: '.txt',
 9 |     NAVIGATION_WITH_KEYS: false,
10 | };


--------------------------------------------------------------------------------
/docs/source/user/version.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | 版本更新
 3 | ===============
 4 | 
 5 | .. contents::
 6 |    :local:
 7 | 
 8 | fastHan的版本修正如下问题::
 9 | 
10 |     1.1版本的fastHan与0.5.5版本的fastNLP会导致importerror。如果使用1.1版本的fastHan，请使用0.5.0版本的fastNLP。
11 |     1.2版本的fastHan修复了fastNLP版本兼容问题。小于等于1.2版本的fastHan在输入句子的首尾包含空格、换行符时会产生BUG。如果字符串首尾包含上述字符，请使用strip函数处理输入字符串。
12 |     1.3版本的fastHan自动对输入字符串做strip函数处理。
13 |     1.4版本的fastHan加入用户词典功能（仅限于分词任务）


--------------------------------------------------------------------------------
/docs/source/user/installation.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | 安装指南
 3 | ===============
 4 | 
 5 | .. contents::
 6 |    :local:
 7 | 
 8 | fastHan 依赖如下包::
 9 | 
10 |     torch>=1.0.0
11 |     fastNLP>=0.5.0
12 | 
13 | .. note::
14 | 
15 |     其中torch的安装可能与操作系统及 CUDA 的版本相关，请参见 `PyTorch 官网 <https://pytorch.org/>`_ 。
16 |     此外，如果使用0.5.0版本的fastNLP，建议使用1.0.0版本的torch，否则在解码阶段会有bug影响准确率。如果使用高版本的torch，请使用0.5.5版本的fastNLP。
17 | 
18 | ..  code:: shell
19 | 
20 |    >>> pip install fastHan
21 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | demo 中文文档
 2 | =====================
 3 | 
 4 | 
 5 | 用户手册
 6 | ----------------
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 1
10 | 
11 |     语法样例 </user/example>
12 | 
13 | API 文档
14 | -------------
15 | 
16 | 除了用户手册之外，你还可以通过查阅 API 文档来找到你所需要的工具。
17 | 
18 | .. toctree::
19 |    :titlesonly:
20 |    :maxdepth: 2
21 | 
22 |    fastHan
23 | 
24 | 
25 | 索引与搜索
26 | ==================
27 | 
28 | * :ref:`genindex`
29 | * :ref:`modindex`
30 | * :ref:`search`
31 | 


--------------------------------------------------------------------------------
/docs/source/fastHan.model.rst:
--------------------------------------------------------------------------------
 1 | fastHan.model package
 2 | =====================
 3 | 
 4 | .. automodule:: fastHan.model
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | Submodules
10 | ----------
11 | 
12 | .. toctree::
13 | 
14 |    fastHan.model.BertCharParser
15 |    fastHan.model.CharParser
16 |    fastHan.model.UserDict
17 |    fastHan.model.bert
18 |    fastHan.model.bert_encoder_theseus
19 |    fastHan.model.model
20 |    fastHan.model.old_fastNLP_bert
21 |    fastHan.model.utils
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/fastHan.model.rst.txt:
--------------------------------------------------------------------------------
 1 | fastHan.model package
 2 | =====================
 3 | 
 4 | .. automodule:: fastHan.model
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | Submodules
10 | ----------
11 | 
12 | .. toctree::
13 | 
14 |    fastHan.model.BertCharParser
15 |    fastHan.model.CharParser
16 |    fastHan.model.UserDict
17 |    fastHan.model.bert
18 |    fastHan.model.bert_encoder_theseus
19 |    fastHan.model.model
20 |    fastHan.model.old_fastNLP_bert
21 |    fastHan.model.utils
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | fastHan 中文文档
 2 | =====================
 3 | 
 4 | 
 5 | 用户手册
 6 | ----------------
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 1
10 | 
11 |     安装指南 </user/installation>
12 |     版本更新 </user/version>
13 |     快速入门 </user/quickstart>
14 | 
15 | API 文档
16 | -------------
17 | 
18 | 除了用户手册之外，你还可以通过查阅 API 文档来找到你所需要的工具。
19 | 
20 | .. toctree::
21 |    :titlesonly:
22 |    :maxdepth: 2
23 | 
24 |    fastHan
25 | 
26 | 
27 | 索引与搜索
28 | ==================
29 | 
30 | * :ref:`genindex`
31 | * :ref:`modindex`
32 | * :ref:`search`
33 | 
34 | 该项目在github的地址为：https://github.com/fastnlp/fastHan


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | 
 4 | from setuptools import setup, find_packages
 5 | 
 6 | with open('README.md', encoding='utf-8') as f:
 7 |     readme = f.read()
 8 | 
 9 | with open('requirements.txt', encoding='utf-8') as f:
10 |     reqs = f.read()
11 | 
12 | pkgs = [p for p in find_packages()]
13 | print(pkgs)
14 | 
15 | setup(
16 |     name='fastHan',
17 |     version='2.0',
18 |     url='https://github.com/fastnlp/fastHan',
19 |     description=(
20 |         '使用深度学习联合模型，解决中文分词、词性标注、命名实体识别、依存分析任务。'
21 |     ),
22 |     long_description=readme,
23 |     long_description_content_type='text/markdown',
24 |     author='王鹏宇',
25 |     license='Apache License',
26 |     python_requires='>=3.6',
27 |     packages=pkgs,
28 |     install_requires=reqs.strip().split('\n'),
29 | )
30 | 
31 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/js/badge_only.js:
--------------------------------------------------------------------------------
1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXAPIDOC  = sphinx-apidoc
 7 | SPHINXBUILD   = sphinx-build
 8 | SPHINXPROJ    = fastHan
 9 | SOURCEDIR     = source
10 | BUILDDIR      = build
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | apidoc:
17 | 	$(SPHINXAPIDOC) -efM -o source ../$(SPHINXPROJ)
18 | 
19 | server:
20 | 	cd build/html && python -m http.server
21 | 
22 | dev:
23 | 	rm -f source/$(SPHINXPROJ).* source/modules.rst && rm -rf build && make apidoc && make html && make server
24 | 
25 | prod:
26 | 	make apidoc && make html
27 | 
28 | .PHONY: help Makefile
29 | 
30 | # Catch-all target: route all unknown targets to Sphinx using the new
31 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
32 | %: Makefile
33 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/test/core/test_fastHan.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from fastHan import FastHan, FastCAMR
 3 | 
 4 | 
 5 | class TestFastHan(unittest.TestCase
 6 |                   ):  # 继承了unittest.TestCase这样可以直接利用unittest的一些function
 7 |     def test_init(self):
 8 |         # 测试是否可以正确initialize
 9 | 
10 |         model = FastHan()
11 |         camr_model = FastCAMR()
12 | 
13 |         model = FastHan(url="/remote-home/pywang/finetuned_model")
14 |         camr_model = FastCAMR(url="/remote-home/pywang/finetuned_camr_model")
15 | 
16 |     def test_call(self):
17 | 
18 |         sentence = [
19 |             '一行人下得山来，走不多时，忽听前面猛兽大吼之声一阵阵的传来。',
20 |             '韩宝驹一提缰，胯下黄马向前窜出，奔了一阵，忽地立定，不论如何催迫，黄马只是不动。',
21 |             '韩宝驹心知有异，远远望去，只见前面围了一群人，有几头猎豹在地上乱抓乱扒。'
22 |             '他知坐骑害怕豹子，跃下马来，抽出金龙鞭握在手中。'
23 |         ]
24 | 
25 |         targets = ['CWS', 'POS', 'CWS-guwen', 'POS-guwen', 'NER', 'Parsing']
26 |         model = FastHan()
27 |         for target in targets:
28 |             model(sentence, target)
29 | 
30 |         
31 |         model = FastCAMR()
32 |         model.set_device('cuda:0')
33 |         
34 |         sentence = "这样 的 活动 还 有 什么 意义 呢 ？"
35 |         answer = model(sentence)
36 |         print(answer)
37 |         for ans in answer:
38 |             print(ans)
39 | 


--------------------------------------------------------------------------------
/.Jenkinsfile:
--------------------------------------------------------------------------------
 1 | pipeline {
 2 |     agent {
 3 |         docker {
 4 |             image 'ubuntu_tester'
 5 |             args '-u root:root -v ${HOME}/html/docs:/docs -v ${HOME}/html/_ci:/ci'
 6 |         }
 7 |     }
 8 |     environment {
 9 |         PJ_NAME = 'fastHan'
10 |         POST_URL = 'https://open.feishu.cn/open-apis/bot/v2/hook/3aa3a3a1-88f2-4c36-853b-21361a8f1234'
11 |     }
12 |     stages {
13 |         stage('Package Installation') {
14 |             steps {
15 |                 sh 'python setup.py install'
16 |             }
17 |         }
18 |         stage('Parallel Stages') {
19 |             parallel {
20 |                 stage('Document Building') {
21 |                     steps {
22 |                         sh 'cd docs && make prod'
23 |                         sh 'rm -rf /docs/${PJ_NAME}'
24 |                         sh 'mv docs/build/html /docs/${PJ_NAME}'
25 |                     }
26 |                 }
27 |                 stage('Package Testing') {
28 |                     steps {
29 |                         sh 'pytest ./test --html=test_results.html --self-contained-html'
30 |                     }
31 |                 }
32 |             }
33 |         }
34 |     }
35 |     post {
36 |         always {
37 |             sh 'post'
38 |         }
39 | 
40 |     }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # IPython
 78 | profile_default/
 79 | ipython_config.py
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | .dmypy.json
112 | dmypy.json
113 | 
114 | # Pyre type checker
115 | .pyre/
116 | 


--------------------------------------------------------------------------------
/fastHan/model/multitask_metric_base.py:
--------------------------------------------------------------------------------
 1 | from fastNLP.core.metrics import Metric
 2 | from fastNLP import SpanFPreRecMetric
 3 | import torch
 4 | 
 5 | from .metrics import SegAppCharParseF1Metric,CWSMetric
 6 | 
 7 | class MultiTaskMetric(Metric):
 8 |     def __init__(self,all_tasks,task_vocab_map,biaffine_task='Parsing-ctb9'):
 9 |         super().__init__()
10 |         self.all_tasks=all_tasks
11 |         self.task_vocab_map=task_vocab_map
12 |         self.biaffine_task=biaffine_task
13 |         self.metrics=dict()
14 |         
15 |         for task in all_tasks:
16 |             if task==biaffine_task:
17 |                 self.metrics[task]=SegAppCharParseF1Metric(pun_index=task_vocab_map[self.biaffine_task].word2idx['punct'])
18 |                 continue    
19 |             self.metrics[task]=SpanFPreRecMetric(tag_vocab=task_vocab_map[task])
20 |         
21 |         self.parsing_cws_metric=CWSMetric()
22 |         
23 |         self.tasks_flag=set()
24 |     
25 |     def update(self,task,seq_len,labels,pred=None,heads=None,head_preds=None,label_preds=None):
26 |         task=task.tolist()[0]
27 |         task=self.all_tasks[task]
28 |         self.tasks_flag.add(task)
29 |         if task==self.biaffine_task:
30 |             assert heads is not None
31 |             assert head_preds is not None
32 |             assert label_preds is not None
33 |             self.metrics[task].update(labels,heads,head_preds,label_preds,seq_len)
34 |             self.parsing_cws_metric.update(labels,label_preds,seq_len)
35 |         else:
36 |             assert pred is not None
37 |             self.metrics[task].update(pred=pred,target=labels[:,1:],seq_len=seq_len)
38 |     
39 |     def reset(self):
40 |         for task in self.metrics:
41 |             self.metrics[task].reset()
42 |     
43 |     def get_metric(self,reset=True):
44 |         scores=dict()
45 |         for task in self.tasks_flag:
46 |             macro_task,corpus=task.split('-')
47 |             if macro_task not in scores:
48 |                 scores[macro_task]=dict()
49 |             scores[macro_task][corpus]=self.metrics[task].get_metric()
50 |         
51 |         
52 |         all_f=[]
53 |         for macro_task in scores:
54 |             ave_f=sum(map(lambda corpus:scores[macro_task][corpus]['f'],scores[macro_task]))/len(scores[macro_task])
55 |             all_f.append(ave_f)
56 | 
57 |         scores['avg_f']=sum(all_f)/len(all_f)
58 |         if self.biaffine_task in self.tasks_flag:
59 |             scores['Parsing']['ctb9-cws']=self.parsing_cws_metric.get_metric(reset=reset)
60 |         
61 |         scores['all_f']=all_f
62 |         if reset:
63 |             self.tasks_flag=set()
64 |         return scores


--------------------------------------------------------------------------------
/docs/build/html/_static/js/html5shiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);


--------------------------------------------------------------------------------
/docs/build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}


--------------------------------------------------------------------------------
/docs/source/user/example.rst:
--------------------------------------------------------------------------------
  1 | ======
  2 | 大标题
  3 | ======
  4 | 
  5 | .. note::
  6 |     中文标题需要符号的数量至少是中文字数的两倍
  7 | 
  8 | .. warning::
  9 |     符号的数量只可以多，不可以少。
 10 | 
 11 | 小标题1
 12 | ###########
 13 | 
 14 | 小标题2
 15 | *********
 16 | 
 17 | 小标题3(正常使用)
 18 | ========================
 19 | 
 20 | 小标题4
 21 | -------------------
 22 | 
 23 | 推荐使用大标题、小标题3和小标题4
 24 | 
 25 | 官方文档 http://docutils.sourceforge.net/docs/user/rst/quickref.html
 26 | 
 27 | `熟悉markdown的同学推荐参考这篇文章 <https://macplay.github.io/posts/cong-markdown-dao-restructuredtext/#id30>`_
 28 | 
 29 | \<\>内表示的是链接地址，\<\>外的是显示到外面的文字
 30 | 
 31 | 常见语法
 32 | ============
 33 | 
 34 | *emphasis*
 35 | 
 36 | **strong**
 37 | 
 38 | `text`
 39 | 
 40 | ``inline literal``
 41 | 
 42 | http://docutils.sf.net/ 孤立的网址会自动生成链接
 43 | 
 44 | 显示为特定的文字的链接 `sohu <http://www.sohu.com>`_
 45 | 
 46 | 突出显示的
 47 |     上面文字
 48 | 
 49 | 正常缩进
 50 | 
 51 |     形成锻炼
 52 | 
 53 | 
 54 | 
 55 | 特殊模块
 56 | ============
 57 | 
 58 | 选项会自动识别
 59 | 
 60 | -v           An option
 61 | -o file      Same with value
 62 | --delta      A long option
 63 | --delta=len  Same with value
 64 | 
 65 | 
 66 | 图片
 67 | 
 68 | .. image:: ../figures/procedures.PNG
 69 |     :height: 200
 70 |     :width: 560
 71 |     :scale: 50
 72 |     :alt: alternate text
 73 |     :align: center
 74 | 
 75 | 显示一个冒号的代码块::
 76 | 
 77 |     中间要空一行
 78 | 
 79 | ::
 80 | 
 81 |     不显示冒号的代码块
 82 | 
 83 | .. code-block:: python
 84 | 
 85 |     :linenos:
 86 |     :emphasize-lines: 1,3
 87 | 
 88 |     print("专业的代码块")
 89 |     print("")
 90 |     print("有行号和高亮")
 91 | 
 92 | 数学块
 93 | ==========
 94 | 
 95 | .. math::
 96 | 
 97 |     H_2O + Na = NaOH + H_2 \uparrow
 98 | 
 99 | 复杂表格
100 | ==========
101 | 
102 | +------------------------+------------+----------+----------+
103 | | Header row, column 1   | Header 2   | Header 3 | Header 4 |
104 | | (header rows optional) |            |          |          |
105 | +========================+============+==========+==========+
106 | | body row 1, column 1   | column 2   | column 3 | column 4 |
107 | +------------------------+------------+----------+----------+
108 | | body row 2             | Cells may span columns.          |
109 | +------------------------+------------+---------------------+
110 | | body row 3             | Cells may  | - Table cells       |
111 | +------------------------+ span rows. | - contain           |
112 | | body row 4             |            | - body elements.    |
113 | +------------------------+------------+---------------------+
114 | 
115 | 简易表格
116 | ==========
117 | 
118 | =====  =====  ======
119 |    Inputs     Output
120 | ------------  ------
121 |   A      B    A or B
122 | =====  =====  ======
123 | False  False  False
124 | True   True   True
125 | =====  =====  ======
126 | 
127 | csv 表格
128 | ============
129 | 
130 | .. csv-table::
131 |    :header: sentence, target
132 | 
133 |    This is the first instance ., 0
134 |    Second instance ., 1
135 |    Third instance ., 1
136 |    ..., ...
137 | 
138 | 
139 | 
140 | [重要]各种链接
141 | ===================
142 | 
143 | 各种链接帮助我们连接到fastNLP文档的各个位置
144 | 
145 | \<\>内表示的是链接地址，\<\>外的是显示到外面的文字
146 | 
147 | :doc:`根据文件名链接 </user/example>`
148 | 
149 | :mod:`~fastNLP.core.batch`
150 | 
151 | :class:`~fastNLP.Batch`
152 | 
153 | ~表示只显示最后一项
154 | 
155 | :meth:`fastNLP.DataSet.apply`
156 | 
157 | 下面这个代码是不可行的，必须要用 r""" 才行:
158 | 
159 | .. code::
160 | 
161 |     :param float beta: f_beta分数， :math:`f_{beta} = \frac{(1 + {beta}^{2})*(pre*rec)}{({beta}^{2}*pre + rec)}` . 常用为 `beta=0.5, 1, 2` 若为0.5则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
162 | 
163 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/user/example.rst.txt:
--------------------------------------------------------------------------------
  1 | ======
  2 | 大标题
  3 | ======
  4 | 
  5 | .. note::
  6 |     中文标题需要符号的数量至少是中文字数的两倍
  7 | 
  8 | .. warning::
  9 |     符号的数量只可以多，不可以少。
 10 | 
 11 | 小标题1
 12 | ###########
 13 | 
 14 | 小标题2
 15 | *********
 16 | 
 17 | 小标题3(正常使用)
 18 | ========================
 19 | 
 20 | 小标题4
 21 | -------------------
 22 | 
 23 | 推荐使用大标题、小标题3和小标题4
 24 | 
 25 | 官方文档 http://docutils.sourceforge.net/docs/user/rst/quickref.html
 26 | 
 27 | `熟悉markdown的同学推荐参考这篇文章 <https://macplay.github.io/posts/cong-markdown-dao-restructuredtext/#id30>`_
 28 | 
 29 | \<\>内表示的是链接地址，\<\>外的是显示到外面的文字
 30 | 
 31 | 常见语法
 32 | ============
 33 | 
 34 | *emphasis*
 35 | 
 36 | **strong**
 37 | 
 38 | `text`
 39 | 
 40 | ``inline literal``
 41 | 
 42 | http://docutils.sf.net/ 孤立的网址会自动生成链接
 43 | 
 44 | 显示为特定的文字的链接 `sohu <http://www.sohu.com>`_
 45 | 
 46 | 突出显示的
 47 |     上面文字
 48 | 
 49 | 正常缩进
 50 | 
 51 |     形成锻炼
 52 | 
 53 | 
 54 | 
 55 | 特殊模块
 56 | ============
 57 | 
 58 | 选项会自动识别
 59 | 
 60 | -v           An option
 61 | -o file      Same with value
 62 | --delta      A long option
 63 | --delta=len  Same with value
 64 | 
 65 | 
 66 | 图片
 67 | 
 68 | .. image:: ../figures/procedures.PNG
 69 |     :height: 200
 70 |     :width: 560
 71 |     :scale: 50
 72 |     :alt: alternate text
 73 |     :align: center
 74 | 
 75 | 显示一个冒号的代码块::
 76 | 
 77 |     中间要空一行
 78 | 
 79 | ::
 80 | 
 81 |     不显示冒号的代码块
 82 | 
 83 | .. code-block:: python
 84 | 
 85 |     :linenos:
 86 |     :emphasize-lines: 1,3
 87 | 
 88 |     print("专业的代码块")
 89 |     print("")
 90 |     print("有行号和高亮")
 91 | 
 92 | 数学块
 93 | ==========
 94 | 
 95 | .. math::
 96 | 
 97 |     H_2O + Na = NaOH + H_2 \uparrow
 98 | 
 99 | 复杂表格
100 | ==========
101 | 
102 | +------------------------+------------+----------+----------+
103 | | Header row, column 1   | Header 2   | Header 3 | Header 4 |
104 | | (header rows optional) |            |          |          |
105 | +========================+============+==========+==========+
106 | | body row 1, column 1   | column 2   | column 3 | column 4 |
107 | +------------------------+------------+----------+----------+
108 | | body row 2             | Cells may span columns.          |
109 | +------------------------+------------+---------------------+
110 | | body row 3             | Cells may  | - Table cells       |
111 | +------------------------+ span rows. | - contain           |
112 | | body row 4             |            | - body elements.    |
113 | +------------------------+------------+---------------------+
114 | 
115 | 简易表格
116 | ==========
117 | 
118 | =====  =====  ======
119 |    Inputs     Output
120 | ------------  ------
121 |   A      B    A or B
122 | =====  =====  ======
123 | False  False  False
124 | True   True   True
125 | =====  =====  ======
126 | 
127 | csv 表格
128 | ============
129 | 
130 | .. csv-table::
131 |    :header: sentence, target
132 | 
133 |    This is the first instance ., 0
134 |    Second instance ., 1
135 |    Third instance ., 1
136 |    ..., ...
137 | 
138 | 
139 | 
140 | [重要]各种链接
141 | ===================
142 | 
143 | 各种链接帮助我们连接到fastNLP文档的各个位置
144 | 
145 | \<\>内表示的是链接地址，\<\>外的是显示到外面的文字
146 | 
147 | :doc:`根据文件名链接 </user/example>`
148 | 
149 | :mod:`~fastNLP.core.batch`
150 | 
151 | :class:`~fastNLP.Batch`
152 | 
153 | ~表示只显示最后一项
154 | 
155 | :meth:`fastNLP.DataSet.apply`
156 | 
157 | 下面这个代码是不可行的，必须要用 r""" 才行:
158 | 
159 | .. code::
160 | 
161 |     :param float beta: f_beta分数， :math:`f_{beta} = \frac{(1 + {beta}^{2})*(pre*rec)}{({beta}^{2}*pre + rec)}` . 常用为 `beta=0.5, 1, 2` 若为0.5则精确率的权重高于召回率；若为1，则两者平等；若为2，则召回率权重高于精确率。
162 | 
163 | 


--------------------------------------------------------------------------------
/fastHan/model/weight_manager.py:
--------------------------------------------------------------------------------
 1 | def convert_cws_macro(all_tasks):
 2 |     result = dict()
 3 |     for task in all_tasks:
 4 |         if task.startswith('CWS'):
 5 |             result[task] = 'CWS'
 6 |         else:
 7 |             result[task] = task
 8 |     return result
 9 | 
10 | 
11 | class WeightManagerBase(object):
12 |     def __init__(self, all_tasks, key_mapper=None):
13 |         self.all_tasks = all_tasks
14 |         if key_mapper is not None:
15 |             self.key_mapper = key_mapper
16 |         else:
17 |             self.key_mapper = dict([(x, x) for x in all_tasks])
18 | 
19 |     def get(self, task):
20 |         task = self.key_mapper.get(task)
21 |         return self.weight.get(task)
22 | 
23 |     def update(self, task, loss):
24 |         pass
25 | 
26 | 
27 | class FixedWeightManager(WeightManagerBase):
28 |     def __init__(self, all_tasks, weight, key_mapper=None):
29 |         super().__init__(all_tasks=all_tasks, key_mapper=key_mapper)
30 |         self.weight = weight
31 | 
32 | 
33 | class QueueWeightManager(WeightManagerBase):
34 |     def __init__(self,
35 |                  all_tasks,
36 |                  max_steps=1000,
37 |                  norm_number=1000,
38 |                  key_mapper=None):
39 |         assert norm_number <= max_steps
40 | 
41 |         super().__init__(all_tasks=all_tasks, key_mapper=key_mapper)
42 |         self.max_steps = max_steps
43 |         self.norm_number = norm_number
44 | 
45 |         self.weight = dict([(x, 1) for x in self.key_mapper.values()])
46 |         self.task_loss = dict([(x, []) for x in self.key_mapper.values()])
47 | 
48 |     def update(self, task, loss):
49 |         task = self.key_mapper.get(task)
50 |         current_length = len(self.task_loss.get(task))
51 |         if current_length >= self.max_steps:
52 |             return
53 |         self.task_loss.get(task).append(loss)
54 |         if current_length == self.max_steps - 1:
55 |             self.weight[task] = sum(self.task_loss.get(task)) / self.max_steps
56 | 
57 | 
58 | class MomentumWeightManager(WeightManagerBase):
59 |     def __init__(self, all_tasks, beta=0.9, key_mapper=None):
60 |         super().__init__(all_tasks=all_tasks, key_mapper=key_mapper)
61 | 
62 |         self.beta = beta
63 |         self.weight = dict([(x, 0) for x in self.key_mapper.values()])
64 |         # 直接维护 beta**step
65 |         self.step_norm = dict([(x, 1) for x in self.key_mapper.values()])
66 | 
67 |     def update(self, task, loss):
68 |         task = self.key_mapper.get(task)
69 |         self.step_norm[task] = self.step_norm[task] * self.beta
70 |         self.weight[task] = self.weight[task] * self.beta + (1 -
71 |                                                              self.beta) * loss
72 | 
73 |     def get(self, task):
74 |         task = self.key_mapper.get(task)
75 |         return self.weight.get(task) / (1 - self.step_norm[task])
76 | 
77 | 
78 | class EnsembledWeightManagers(object):
79 |     def __init__(self, managers):
80 |         for manager in managers:
81 |             assert isinstance(manager[0], WeightManagerBase)
82 |         self.managers = managers
83 | 
84 |     def get(self, task):
85 |         weight = 1
86 |         for manager, pow in self.managers:
87 |             # print(manager,manager.weight,manager.key_mapper.get(task))
88 |             weight = weight * (manager.get(task)**pow)
89 |         return weight
90 | 
91 |     def update(self, task, loss):
92 |         for manager, pow in self.managers:
93 |             # print(manager,manager.weight,manager.key_mapper.get(task))
94 |             manager.update(task, loss)
95 | 


--------------------------------------------------------------------------------
/docs/source/user/quickstart.rst:
--------------------------------------------------------------------------------
  1 | ===============
  2 | 快速入门
  3 | ===============
  4 | 
  5 | 这篇教程可以带你从零开始了解 fastHan 的使用。
  6 | 
  7 | fastHan的使用流程分为如下两步：
  8 | 
  9 | 1.初始化模型
 10 | ~~~~~~~~~~~~
 11 | 
 12 | .. code-block:: python
 13 | 
 14 |     from fastHan import FastHan
 15 |     model=FastHan()
 16 | 
 17 | 此时若用户为首次初始化模型，将自动从服务器中下载参数。
 18 | 
 19 | 模型默认初始化为base，如果使用large版本，可在初始化时加入如下参数：
 20 | 
 21 | .. code-block:: python
 22 | 
 23 |     model=FastHan(model_type="large")
 24 | 
 25 | 2.将句子输入模型
 26 | ~~~~~~~~~~~~
 27 | 
 28 | .. code-block:: python
 29 | 
 30 |     sentence="郭靖是金庸笔下的一名男主。"
 31 |     answer=model(sentence,target="Parsing")
 32 |     print(answer)
 33 |     answer=model(sentence,target="NER")
 34 |     print(answer)
 35 | 
 36 | 模型将会输出如下信息：
 37 | 
 38 | .. code-block:: text
 39 | 
 40 |     [[['郭靖', 2, 'top', 'NR'], ['是', 0, 'root', 'VC'], ['金庸', 4, 'nn', 'NR'], ['笔', 5, 'lobj', 'NN'], ['下', 10, 'assmod', 'LC'], ['的', 5, 'assm', 'DEG'], ['一', 8, 'nummod', 'CD'], ['名', 10, 'clf', 'M'], ['男', 10, 'amod', 'JJ'], ['主', 2, 'attr', 'NN'], ['。', 2, 'punct', 'PU']]]
 41 |     [[['郭靖', 'NR'], ['金庸', 'NR']]]
 42 | 
 43 | 此外，模型拥有如下这些功能：
 44 | 
 45 | 任务选择
 46 | ~~~~~~~~~~~~
 47 | 
 48 | target参数可在'Parsing'、'CWS'、'POS'、'NER'四个选项中取值，模型将分别进行依存分析、分词、词性标注、命名实体识别任务,模型默认进行CWS任务。其中词性标注任务包含了分词的信息，而依存分析任务又包含了词性标注任务的信息。命名实体识别任务相较其他任务独立。
 49 | 
 50 | 如果分别运行CWS、POS、Parsing任务，模型输出的分词结果等可能存在冲突。如果想获得不冲突的各类信息，请直接运行包含全部所需信息的那项任务。
 51 | 
 52 | 模型的POS、Parsing任务均使用CTB标签集。NER使用msra标签集。
 53 | 
 54 | 
 55 | 分词风格
 56 | ~~~~~~~~~~~~
 57 | 分词风格，指的是训练模型中文分词模块的10个语料库，模型可以区分这10个语料库，设置分词style为S即令模型认为现在正在处理S语料库的分词。所以分词style实际上是与语料库的覆盖面、分词粒度相关的。如本模型默认的CTB语料库分词粒度较细。如果想切换不同的粒度，可以使用模型的set_cws_style函数，例子如下：
 58 | 
 59 | .. code-block:: python
 60 | 
 61 |     sentence="一个苹果。"
 62 |     print(model(sentence,'CWS'))
 63 |     model.set_cws_style('cnc')
 64 |     print(model(sentence,'CWS'))
 65 | 
 66 | 模型将输出如下内容：
 67 | 
 68 | .. code-block:: text
 69 | 
 70 |     [['一', '个', '苹果', '。']]
 71 |     [['一个', '苹果', '。']]
 72 | 
 73 | 对语料库的选取参考了下方CWS SOTA模型的论文，共包括：SIGHAN 2005的 MSR、PKU、AS、CITYU 语料库，由山西大学发布的 SXU 语料库，由斯坦福的CoreNLP 发布的 CTB6 语料库，由国家语委公布的 CNC 语料库，由王威廉先生公开的微博树库 WTB，由张梅山先生公开的诛仙语料库 ZX，Universal Dependencies 项目的 UD 语料库。
 74 | 
 75 | 输入与输出
 76 | ~~~~~~~~~~~~
 77 | 输入模型的可以是单独的字符串，也可是由字符串组成的列表。如果输入的是列表，模型将一次性处理所有输入的字符串，所以请自行控制 batch size。
 78 | 
 79 | 模型的输出是在fastHan模块中定义的sentence与token类。模型将输出一个由sentence组成的列表，而每个sentence又由token组成。每个token本身代表一个被分好的词，有pos、head、head_label、ner四项属性，代表了该词的词性、依存关系、命名实体识别信息。
 80 | 
 81 | 一则输入输出的例子如下所示：
 82 | 
 83 | .. code-block:: python
 84 | 
 85 |     sentence=["我爱踢足球。","林丹是冠军"]
 86 |     answer=model(sentence,'Parsing')
 87 |     for i,sentence in enumerate(answer):
 88 |         print(i)
 89 |         for token in sentence:
 90 |             print(token,token.pos,token.head,token.head_label)
 91 | 
 92 | 模型将输出如下内容：
 93 | 
 94 | .. code-block:: text
 95 | 
 96 |     0
 97 |     我 PN 2 nsubj
 98 |     爱 VV 0 root
 99 |     踢 VV 2 ccomp
100 |     足球 NN 3 dobj
101 |     。 PU 2 punct
102 |     1
103 |     林丹 NR 2 top
104 |     是 VC 0 root
105 |     冠军 NN 2 attr
106 |     ！ PU 2 punct
107 | 
108 | 可在分词风格中选择'as'、'cityu'进行繁体字分词，这两项为繁体语料库。
109 | 
110 | 此外，由于各项任务共享词表、词嵌入，即使不切换模型的分词风格，模型对繁体字、英文字母、数字均具有一定识别能力。
111 | 
112 | 切换设备
113 | ~~~~~~~~~~~~
114 | 可使用模型的 set_device 函数，令模型在cuda上运行或切换回cpu，示例如下：
115 | 
116 | .. code-block:: python
117 | 
118 |     model.set_device('cuda:0')
119 |     model.set_device('cpu')
120 | 
121 | 
122 | 词典分词
123 | ~~~~~~~~~~~~
124 | 用户可以使用模型的 add_user_dict 函数添加自定义词典，该词典会影响模型在分词任务中的权重分配。进行分词任务时，首先利用词典进行正向、反向最大匹配法进行分词，并将词典方法的分词结果乘上权重系数融入到深度学习模型的结果中。该函数的参数可以是由词组成的list，也可以是文件路径（文件中的内容是由'\n'分隔开的词）。
125 | 
126 | 用户可使用 set_user_dict_weight 函数设置权重系数（若不设置，默认为0.05）。我们在大规模的训练语料库中发现0.05-0.1即可取得较好的结果。条件允许的情况下，用户也可以自行设置验证集、测试集，找到最适合自己任务的权重系数。
127 | 
128 | 添加完用户词典后，需要在调用模型时令 use_dict 参数为True。再次申明，词典功能目前仅在'CWS'任务中有效。
129 | 
130 | 用户可调用 remove_user_dict 移除之前添加的用户词典。
131 | 
132 | 使用用户词典影响分词的一则例子如下：
133 | 
134 | .. code-block:: python
135 | 
136 |     sentence="奥利奥利奥"
137 |     print(model(sentence))
138 |     model.add_user_dict(["奥利","奥利奥"])
139 |     model.set_user_dict_weight(0.05)
140 |     print(model(sentence,use_dict=True))
141 | 
142 | 输出为：
143 | 
144 | .. code-block:: text
145 | 
146 |     [['奥利奥利奥']]
147 |     [['奥利', '奥利奥']]
148 |     


--------------------------------------------------------------------------------
/docs/build/html/_static/js/html5shiv-printshiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);


--------------------------------------------------------------------------------
/docs/doc_utils.py:
--------------------------------------------------------------------------------
  1 | r"""
  2 | 用于检测 Python 包的文档是否符合规范的脚本。
  3 | 
  4 | 用法 ``python doc_utils.py <path>``
  5 | 
  6 | 样例 ``python doc_utils.py ../../fastDemo``
  7 | 
  8 | .. csv-table::
  9 |    :header: "错误代号", "错误类型"
 10 | 
 11 |    0, "项目结构错误"
 12 |    1, "模块缺少 __doc__"
 13 |    2, "模块缺少 __all__"
 14 |    3, "__all__ 中导出的函数/类不应以下划线开头"
 15 |    4, "__all__ 中没有导出全部定义的函数/类等"
 16 |    5, "__all__ 中存在没有定义的函数/类"
 17 |    6, "函数/类中缺少 __doc__"
 18 |    7, "类的方法中缺少 __doc__"
 19 |    
 20 | """
 21 | 
 22 | __all__ = [
 23 |     "check",
 24 |     "check_module",
 25 |     "check_obj"
 26 | ]
 27 | 
 28 | from typing import List, Any
 29 | import inspect
 30 | import importlib
 31 | import sys
 32 | import os
 33 | 
 34 | 
 35 | class ModuleType:
 36 |     __name__: str
 37 |     __all__: List[str]
 38 | 
 39 | 
 40 | def _colored_string(string: str, color: str or int) -> str:
 41 |     r"""在终端中显示一串有颜色的文字
 42 | 
 43 |     :param string: 在终端中显示的文字
 44 |     :param color: 文字的颜色
 45 |     :return:
 46 |     """
 47 |     if isinstance(color, str):
 48 |         color = {
 49 |             "black": 30,
 50 |             "red": 31,
 51 |             "green": 32,
 52 |             "yellow": 33,
 53 |             "blue": 34,
 54 |             "purple": 35,
 55 |             "cyan": 36,
 56 |             "white": 37
 57 |         }[color]
 58 |     return "\033[%dm%s\033[0m" % (color, string)
 59 | 
 60 | 
 61 | def _alert(code: int, msg: str, color: str = 'red'):
 62 |     print(_colored_string("[ERROR-{}] {}".format(code, msg.strip()), color))
 63 | 
 64 | 
 65 | def check(path: str):
 66 |     r"""检查该项目目录下的所实现的包内的文档
 67 | 
 68 |     :param path: 项目目录
 69 |     :return:
 70 |     """
 71 |     path = os.path.abspath(path)
 72 |     print("Package path:", path)
 73 |     package_name = str(path.split(os.sep)[-1])
 74 |     print("Package name:", package_name)
 75 |     if not os.path.isdir(os.path.join(path, package_name)):
 76 |         _alert(0, "Package structure is wrong.")
 77 |         return
 78 |     sys.path.insert(0, path)
 79 |     importlib.import_module(package_name)
 80 |     module = sys.modules[package_name]
 81 |     check_module(module, package_name)
 82 | 
 83 | 
 84 | def check_module(module: ModuleType, base_name: str):
 85 |     r"""递归检查每个模块中对象是否有文档
 86 |     
 87 |     :param module: 模块对象
 88 |     :param base_name: 根模块的名称
 89 |     :return:
 90 |     """
 91 |     print("\n[M]", module.__name__)
 92 |     print([e for e in dir(module) if not e.startswith("_")])
 93 |     if module.__doc__ is None:
 94 |         _alert(1, f"""Module '{module.__name__}' don't have __doc__""")
 95 |     if "__all__" not in dir(module):
 96 |         _alert(2, f"""'{module.__name__}' don't have __all__""")
 97 |     else:
 98 |         set_all = set(module.__all__)
 99 |         for name, obj in inspect.getmembers(module):
100 |             if inspect.ismodule(obj) and obj.__name__.startswith(base_name):
101 |                 check_module(obj, base_name)
102 |             if inspect.isclass(obj) or inspect.isfunction(obj):
103 |                 if name.startswith("_"):
104 |                     continue
105 |                 if name not in set_all:
106 |                     _alert(4, f"""'{obj.__name__}' not in __all__ of '{module.__name__}' """)
107 |                 else:
108 |                     check_obj(obj, module.__name__)
109 |                     set_all.remove(name)
110 |         for obj_name in set_all:
111 |             if obj_name.startswith("_"):
112 |                 _alert(3, f"""'{obj_name}' in '{module.__name__}' should not start with '_'""")
113 |             else:
114 |                 _alert(5, f""" '{obj_name}' in __all__ of '{module.__name__}' does not exist""")
115 | 
116 |     print("\n")
117 | 
118 | 
119 | def check_obj(checked_obj: Any, module_name: str):
120 |     r"""检查某个函数或者类的文档
121 |     
122 |     .. todo:
123 |         
124 |         增加对函数的注释中是否介绍了参数的检查
125 |     
126 |     :param checked_obj: 检查是否有文档的函数或者类
127 |     :param module_name: 函数或者类所在地模块
128 |     :return:
129 |     """
130 |     if inspect.isclass(checked_obj):
131 |         for name, obj in inspect.getmembers(checked_obj):
132 |             if inspect.isfunction(obj) and not obj.__name__.startswith("_"):
133 |                 if obj.__doc__ is None:
134 |                     _alert(7, f""" '{checked_obj.__name__}.{obj.__name__}' in '{module_name}' does not have __doc__""")
135 |     elif checked_obj.__doc__ is None:
136 |         _alert(6, f""" '{checked_obj.__name__}' in '{module_name}' does not have __doc__""")
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     check(sys.argv[1])
141 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/translations.js:
--------------------------------------------------------------------------------
1 | Documentation.addTranslations({"locale": "zh_Hans_CN", "messages": {"%(filename)s &#8212; %(docstitle)s": "%(filename)s &#8212; %(docstitle)s", "&#169; <a href=\"%(path)s\">Copyright</a> %(copyright)s.": "&#169; <a href=\"%(path)s\"> \u7248\u6743\u6240\u6709</a> %(copyright)s.", "&#169; Copyright %(copyright)s.": "&#169; \u7248\u6743\u6240\u6709 %(copyright)s.", ", in ": "\uff0c\u5728", "About these documents": "\u5173\u4e8e\u8fd9\u4e9b\u6587\u6863", "Automatically generated list of changes in version %(version)s": "\u81ea\u52a8\u751f\u6210\u7684 %(version)s \u7248\u672c\u4e2d\u7684\u66f4\u6539\u5217\u8868", "C API changes": "C API \u66f4\u6539", "Changes in Version %(version)s &#8212; %(docstitle)s": "\u66f4\u6539\u53d1\u751f\u5728\u7248\u672c %(version)s&#8212; %(docstitle)s", "Collapse sidebar": "\u6298\u53e0\u8fb9\u680f", "Complete Table of Contents": "\u5b8c\u6574\u7684\u5185\u5bb9\u8868", "Contents": "\u76ee\u5f55", "Copyright": "\u7248\u6743\u6240\u6709", "Created using <a href=\"http://sphinx-doc.org/\">Sphinx</a> %(sphinx_version)s.": "\u7531 <a href=\"http://sphinx-doc.org/\">Sphinx</a> %(sphinx_version)s \u521b\u5efa\u3002", "Expand sidebar": "\u5c55\u5f00\u8fb9\u680f", "From here you can search these documents. Enter your search\n    words into the box below and click \"search\". Note that the search\n    function will automatically search for all of the words. Pages\n    containing fewer words won't appear in the result list.": "\u5728\u8fd9\u513f\uff0c\u4f60\u53ef\u4ee5\u5bf9\u8fd9\u4e9b\u6587\u6863\u8fdb\u884c\u641c\u7d22\u3002\u5411\u641c\u7d22\u6846\u4e2d\u8f93\u5165\u4f60\u6240\u8981\u641c\u7d22\u7684\u5173\u952e\u5b57\u5e76\u70b9\u51fb\u201c\u641c\u7d22\u201d\u3002\u6ce8\u610f\uff1a\u641c\u7d22\u5f15\u64ce\u4f1a\u81ea\u52a8\u641c\u7d22\u6240\u6709\u7684\u5173\u952e\u5b57\u3002\u5c06\u4e0d\u4f1a\u641c\u7d22\u5230\u90e8\u5206\u5173\u952e\u5b57\u7684\u9875\u9762.", "Full index on one page": "\u4e00\u9875\u7684\u5168\u90e8\u7d22\u5f15", "General Index": "\u603b\u76ee\u5f55", "Global Module Index": "\u5168\u5c40\u6a21\u5757\u7d22\u5f15", "Go": "\u8f6c\u5411", "Hide Search Matches": "\u9690\u85cf\u641c\u7d22\u7ed3\u679c", "Index": "\u7d22\u5f15", "Index &ndash; %(key)s": "\u7d22\u5f15 &ndash; %(key)s", "Index pages by letter": "\u6309\u7167\u5b57\u6bcd\u7684\u7d22\u5f15\u9875", "Indices and tables:": "\u7d22\u5f15\u548c\u8868\u683c\uff1a", "Last updated on %(last_updated)s.": "\u6700\u540e\u66f4\u65b0\u4e8e %(last_updated)s.", "Library changes": "\u5e93\u66f4\u6539", "Navigation": "\u5bfc\u822a", "Next topic": "\u4e0b\u4e00\u4e2a\u4e3b\u9898", "Other changes": "\u5176\u4ed6\u66f4\u6539", "Overview": "\u6982\u8ff0", "Permalink to this definition": "\u6c38\u4e45\u94fe\u63a5\u81f3\u76ee\u6807", "Permalink to this headline": "\u6c38\u4e45\u94fe\u63a5\u81f3\u6807\u9898", "Please activate JavaScript to enable the search\n    functionality.": "\u8bf7\u6fc0\u6d3b JavaScript \u4ee5\u5f00\u542f\u641c\u7d22\u529f\u80fd", "Preparing search...": "\u51c6\u5907\u641c\u7d22\u2026\u2026", "Previous topic": "\u4e0a\u4e00\u4e2a\u4e3b\u9898", "Quick search": "\u5feb\u901f\u641c\u7d22", "Search": "\u641c\u7d22", "Search Page": "\u641c\u7d22\u9875\u9762", "Search Results": "\u641c\u7d22\u7ed3\u679c", "Search finished, found %s page(s) matching the search query.": "\u641c\u7d22\u5b8c\u6210\uff0c\u6709 %s \u4e2a\u9875\u9762\u5339\u914d\u3002", "Search within %(docstitle)s": "\u5728 %(docstitle)s \u4e2d\u641c\u7d22", "Searching": "\u641c\u7d22\u4e2d", "Show Source": "\u663e\u793a\u6e90\u4ee3\u7801", "Table of Contents": "", "This Page": "\u672c\u9875", "Welcome! This is": "\u6b22\u8fce\uff01\u8fd9\u662f", "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories.": "\u6ca1\u6709\u4efb\u4f55\u6587\u6863\u5339\u914d\u60a8\u7684\u641c\u7d22\u3002\u8bf7\u786e\u4fdd\u4f60\u8f93\u5165\u7684\u8bcd\u62fc\u5199\u6b63\u786e\u5e76\u9009\u62e9\u4e86\u5408\u9002\u7684\u5206\u7c7b\u3002", "all functions, classes, terms": "\u6240\u7684\u51fd\u6570\uff0c\u7c7b\uff0c\u672f\u8bed", "can be huge": "\u53ef\u80fd\u4f1a\u5f88\u591a", "last updated": "\u6700\u540e\u66f4\u65b0\u4e8e", "lists all sections and subsections": "\u5217\u51fa\u6240\u6709\u7684\u7ae0\u8282\u548c\u90e8\u5206", "next chapter": "\u4e0b\u4e00\u7ae0", "previous chapter": "\u4e0a\u4e00\u7ae0", "quick access to all modules": "\u5feb\u901f\u67e5\u770b\u6240\u6709\u7684\u6a21\u5757", "search": "\u641c\u7d22", "search this documentation": "\u641c\u7d22\u6587\u6863", "the documentation for": "\u8fd9\u4efd\u6587\u6863\u662f"}, "plural_expr": "0"});


--------------------------------------------------------------------------------
/docs/build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */
51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */
65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/build/html/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n('<span class="toctree-expand"></span>'),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t<e.length&&!window.requestAnimationFrame;++t)window.requestAnimationFrame=window[e[t]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[t]+"CancelAnimationFrame"]||window[e[t]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,t){var i=(new Date).getTime(),o=Math.max(0,16-(i-n)),r=window.setTimeout((function(){e(i+o)}),o);return n=i+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()}).call(window)},function(n,e){n.exports=jQuery},function(n,e,t){}]);


--------------------------------------------------------------------------------
/fastHan/model/UserDict.py:
--------------------------------------------------------------------------------
  1 | #reference and modify from "https://blog.csdn.net/ANNILingMo/article/details/80879910"
  2 | 
  3 | 
  4 | class Trie:
  5 |     def __init__(self):
  6 |         """
  7 |         Initialize your data structure here.
  8 |         """
  9 |         self.root = {}
 10 |         self.end = -1
 11 | 
 12 |     def insert(self, word):
 13 |         """
 14 |         Inserts a word into the trie.
 15 |         :type word: str
 16 |         :rtype: void
 17 |         """
 18 |         curNode = self.root
 19 |         for c in word:
 20 |             if not c in curNode:
 21 |                 curNode[c] = {}
 22 |             curNode = curNode[c]
 23 |         curNode[self.end] = True
 24 | 
 25 |     def search(self, word):
 26 |         """
 27 |         Returns if the word is in the trie.
 28 |         :type word: str
 29 |         :rtype: bool
 30 |         """
 31 |         curNode = self.root
 32 |         for c in word:
 33 |             if not c in curNode:
 34 |                 return False
 35 |             curNode = curNode[c]
 36 |         # Doesn't end here
 37 |         if not self.end in curNode:
 38 |             return False
 39 |         return True
 40 | 
 41 |     def startsWith(self, prefix):
 42 |         """
 43 |         Returns if there is any word in the trie that starts with the given prefix.
 44 |         :type prefix: str
 45 |         :rtype: bool
 46 |         """
 47 |         curNode = self.root
 48 |         for c in prefix:
 49 |             if not c in curNode:
 50 |                 return False
 51 |             curNode = curNode[c]
 52 |         return True
 53 | 
 54 | 
 55 | class UserDict:
 56 |     def __init__(self):
 57 |         self.tree = Trie()
 58 |         self.tree_reverse = Trie()
 59 |         pass
 60 | 
 61 |     #根据词的列表构建trie树，包括正向和反向
 62 |     def load_list(self, word_list):
 63 |         for word in word_list:
 64 |             self.tree.insert(word)
 65 |             self.tree_reverse.insert(word[::-1])
 66 |         return 1
 67 | 
 68 |     #从文件中将词读取至一个list，并调用self.load_list
 69 |     def load_file(self, path, encoding='utf-8'):
 70 |         word_list = []
 71 |         with open(path, encoding=encoding) as file:
 72 |             for line in file:
 73 |                 word = line.strip()
 74 |                 word_list.append(word)
 75 |         return self.load_list(word_list)
 76 | 
 77 |     #根据输入的句子，进行正/反向最大匹配，返回匹配数最多的数量和结果（以BMESO标签的形式返回）
 78 |     def process_sentence(self, sentence, reverse=False):
 79 |         sentence = list(sentence)
 80 |         #正向匹配
 81 |         if reverse == False:
 82 |             tree = self.tree
 83 |         else:
 84 |             sentence = sentence[::-1]
 85 |             tree = self.tree_reverse
 86 | 
 87 |         sentence.append('<END-FASTHAN>')
 88 |         word_num = 0
 89 |         idx = 0
 90 |         tag_sequence = []
 91 |         word = ''
 92 |         #last_idx，上一个匹配到的词的末字符的索引
 93 |         last_idx = -1
 94 |         while idx < len(sentence):
 95 |             word = word + sentence[idx]
 96 |             if tree.search(word):
 97 |                 last_idx = idx
 98 |             if not tree.startsWith(word) or idx == len(sentence) - 1:
 99 |                 length_tag = len(tag_sequence)
100 |                 #如果匹配到，词的范围是[length_tag,last_idx]
101 |                 if length_tag == last_idx:
102 |                     tag_sequence.append('s')
103 |                     word_num += 1
104 |                 elif length_tag - 1 == last_idx:
105 |                     tag_sequence.append('o')
106 |                 elif length_tag < last_idx:
107 |                     word_num += 1
108 |                     tag_sequence.append('b')
109 |                     for i in range(length_tag + 1, last_idx):
110 |                         tag_sequence.append('m')
111 |                     tag_sequence.append('e')
112 |                 else:
113 |                     raise ValueError('error when using dict')
114 | 
115 |                 idx = len(tag_sequence)
116 |                 last_idx = idx - 1
117 |                 word = ''
118 |             else:
119 |                 idx += 1
120 | 
121 |         assert (len(tag_sequence) == len(sentence))
122 | 
123 |         tag_sequence = tag_sequence[:-1]
124 |         if reverse is False:
125 |             return word_num, tag_sequence
126 |         else:
127 |             tag_sequence = tag_sequence[::-1]
128 |             for i in range(len(tag_sequence)):
129 |                 if tag_sequence[i] == 'b':
130 |                     tag_sequence[i] = 'e'
131 |                 elif tag_sequence[i] == 'e':
132 |                     tag_sequence[i] = 'b'
133 |             return word_num, tag_sequence
134 | 
135 |     def __call__(self, sentence):
136 |         word_num, tag_sequence = self.process_sentence(sentence)
137 |         word_num_reverse, tag_sequence_reverse = self.process_sentence(
138 |             sentence, reverse=True)
139 |         if word_num >= word_num_reverse:
140 |             return word_num, tag_sequence
141 |         return word_num_reverse, tag_sequence_reverse
142 | 


--------------------------------------------------------------------------------
/docs/build/html/search.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>搜索 &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |     
 24 |   <!--[if lt IE 9]>
 25 |     <script src="_static/js/html5shiv.min.js"></script>
 26 |   <![endif]-->
 27 |   
 28 |     
 29 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 30 |         <script type="text/javascript" src="_static/jquery.js"></script>
 31 |         <script type="text/javascript" src="_static/underscore.js"></script>
 32 |         <script type="text/javascript" src="_static/doctools.js"></script>
 33 |         <script type="text/javascript" src="_static/language_data.js"></script>
 34 |         <script type="text/javascript" src="_static/translations.js"></script>
 35 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 36 |     
 37 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 38 | 
 39 |     
 40 |     <script type="text/javascript" src="_static/searchtools.js"></script>
 41 |     <link rel="index" title="索引" href="genindex.html" />
 42 |     <link rel="search" title="搜索" href="#" /> 
 43 | </head>
 44 | 
 45 | <body class="wy-body-for-nav">
 46 | 
 47 |    
 48 |   <div class="wy-grid-for-nav">
 49 |     
 50 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 51 |       <div class="wy-side-scroll">
 52 |         <div class="wy-side-nav-search" >
 53 |           
 54 | 
 55 |           
 56 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 57 |           
 58 | 
 59 |           
 60 |           </a>
 61 | 
 62 |           
 63 |             
 64 |             
 65 |               <div class="version">
 66 |                 0.5.0
 67 |               </div>
 68 |             
 69 |           
 70 | 
 71 |           
 72 | <div role="search">
 73 |   <form id="rtd-search-form" class="wy-form" action="#" method="get">
 74 |     <input type="text" name="q" placeholder="Search docs" />
 75 |     <input type="hidden" name="check_keywords" value="yes" />
 76 |     <input type="hidden" name="area" value="default" />
 77 |   </form>
 78 | </div>
 79 | 
 80 |           
 81 |         </div>
 82 | 
 83 |         
 84 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 85 |           
 86 |             
 87 |             
 88 |               
 89 |             
 90 |             
 91 |               <ul>
 92 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 93 | </ul>
 94 | 
 95 |             
 96 |           
 97 |         </div>
 98 |         
 99 |       </div>
100 |     </nav>
101 | 
102 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
103 | 
104 |       
105 |       <nav class="wy-nav-top" aria-label="top navigation">
106 |         
107 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
108 |           <a href="index.html">fastHan</a>
109 |         
110 |       </nav>
111 | 
112 | 
113 |       <div class="wy-nav-content">
114 |         
115 |         <div class="rst-content">
116 |         
117 |           
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | <div role="navigation" aria-label="breadcrumbs navigation">
134 | 
135 |   <ul class="wy-breadcrumbs">
136 |     
137 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
138 |         
139 |       <li>搜索</li>
140 |     
141 |     
142 |       <li class="wy-breadcrumbs-aside">
143 |         
144 |             
145 |         
146 |       </li>
147 |     
148 |   </ul>
149 | 
150 |   
151 |   <hr/>
152 | </div>
153 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
154 |            <div itemprop="articleBody">
155 |             
156 |   <noscript>
157 |   <div id="fallback" class="admonition warning">
158 |     <p class="last">
159 |       Please activate JavaScript to enable the search functionality.
160 |     </p>
161 |   </div>
162 |   </noscript>
163 | 
164 |   
165 |   <div id="search-results">
166 |   
167 |   </div>
168 | 
169 |            </div>
170 |            
171 |           </div>
172 |           <footer>
173 |   
174 | 
175 |   <hr/>
176 | 
177 |   <div role="contentinfo">
178 |     <p>
179 |         
180 |         &copy; 版权所有 2020, fastHan
181 | 
182 |     </p>
183 |   </div>
184 |     
185 |     
186 |     
187 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
188 |     
189 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
190 |     
191 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
192 | 
193 | </footer>
194 | 
195 |         </div>
196 |       </div>
197 | 
198 |     </section>
199 | 
200 |   </div>
201 |   
202 | 
203 |   <script type="text/javascript">
204 |       jQuery(function () {
205 |           SphinxRtdTheme.Navigation.enable(true);
206 |       });
207 |   </script>
208 | 
209 |   
210 |   
211 |     
212 |   
213 |   <script type="text/javascript">
214 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
215 |   </script>
216 |   
217 |   <script type="text/javascript" id="searchindexloader"></script>
218 |    
219 | 
220 | 
221 | </body>
222 | </html>


--------------------------------------------------------------------------------
/fastHan/model/camr_finetune_dataloader.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import torch
  3 | import pandas as pd
  4 | 
  5 | from datasets import Dataset
  6 | from .camr_to_tuples import CAMR
  7 | 
  8 | MAX_LEN = 300
  9 | 
 10 | 
 11 | def FastCAMR_Parsing_Loader(data_path, tokenizer):
 12 |     sid_list, sent_list, id_token_list, amr_list, convert_amr_list = var_free_camrs(
 13 |         data_path)
 14 |     input_ids, attention_mask, decoder_input_ids, labels = [], [], [], []
 15 |     discard_num = 0
 16 |     discard_index = []
 17 |     for idx, sid in enumerate(sid_list):
 18 |         tokenize_result = tokenizer(convert_amr_list[idx])
 19 |         if len(tokenize_result['input_ids']) > MAX_LEN:
 20 |             discard_num += 1
 21 |             discard_index.append(idx)
 22 |             continue
 23 |         sent_tokenize_result = tokenizer(' '.join(sent_list[idx]),
 24 |                                          max_length=MAX_LEN,
 25 |                                          padding='max_length',
 26 |                                          truncation=True)
 27 |         amr_tokenize_result = tokenizer(convert_amr_list[idx],
 28 |                                         max_length=MAX_LEN,
 29 |                                         padding='max_length',
 30 |                                         truncation=True)
 31 |         input_ids.append(sent_tokenize_result['input_ids'])
 32 |         attention_mask.append(sent_tokenize_result['attention_mask'])
 33 |         decode_ids = amr_tokenize_result['input_ids']
 34 |         decoder_input_ids.append(decode_ids[:-1])
 35 |         labels.append(decode_ids[1:])
 36 |     # print("******* {0} sentences are discard! *******".format(discard_num))
 37 |     for idx in reversed(discard_index):
 38 |         sid_list.pop(idx)
 39 |         sent_list.pop(idx)
 40 |         id_token_list.pop(idx)
 41 |         amr_list.pop(idx)
 42 |         convert_amr_list.pop(idx)
 43 |     amr_data = {
 44 |         "input_ids": input_ids,
 45 |         "attention_mask": attention_mask,
 46 |         "decoder_input_ids": decoder_input_ids,
 47 |         "labels": labels
 48 |     }
 49 |     amr_data = pd.DataFrame(amr_data)
 50 |     dataset = Dataset.from_pandas(amr_data, preserve_index=False)
 51 |     return dataset
 52 | 
 53 | 
 54 | def data_collator(amr_data):
 55 |     first = amr_data[0]
 56 |     batch = {}
 57 |     for k, v in first.items():
 58 |         if isinstance(v, torch.Tensor):
 59 |             batch[k] = torch.stack([f[k] for f in amr_data])
 60 |         else:
 61 |             batch[k] = torch.tensor([f[k] for f in amr_data])
 62 |     return batch
 63 | 
 64 | 
 65 | def var_free_camrs(input_file):
 66 |     with open(input_file, 'r', encoding='utf-8') as f:
 67 |         lines = f.readlines()
 68 |     sid_list, sent_list, id_token_list, amr_list = read_raw_camrs(lines)
 69 |     convert_amr_list = [
 70 |         delete_camr_variables(convert_camr_to_single_line(amr),
 71 |                               id_token_list[idx])
 72 |         for idx, amr in enumerate(amr_list)
 73 |     ]
 74 |     return sid_list, sent_list, id_token_list, amr_list, convert_amr_list
 75 | 
 76 | 
 77 | def read_raw_camrs(lines):
 78 |     sid_list, sent_list, id_token_list, amr_list = [], [], [], []
 79 |     # 迭代输入文件中的每个句子
 80 |     cur_sent, cur_amr = [], []
 81 |     id_token_dict = {}
 82 |     has_content = False
 83 |     for line in lines:
 84 |         line = line.strip()
 85 |         if '\ufeff' in line:
 86 |             line = line.replace('\ufeff', '')
 87 |         if '\u200b' in line:
 88 |             line = line.replace('\u200b', '')
 89 |         if line == "":
 90 |             if has_content:  # end of current CAMR
 91 |                 sent_list.append(cur_sent)
 92 |                 id_token_list.append(id_token_dict)
 93 |                 amr_list.append(cur_amr)
 94 |                 cur_sent, cur_amr = [], []
 95 |                 id_token_dict = {}
 96 |                 has_content = False
 97 |             continue
 98 |         if line.strip().startswith("#"):
 99 |             if '::id' in line:
100 |                 sid = re.findall(r'# ::id export_amr\.(.*?)\s*::', line)[0]
101 |                 sid_list.append(sid)
102 |             elif '::wid' in line:
103 |                 wid = line[len('# ::wid '):].strip().split(' ')
104 |                 for i in wid:
105 |                     token_id, token = i.split('_')
106 |                     if token != '':
107 |                         # key: id number, value: token (e.g. "1":"我")
108 |                         cur_sent.append(token)
109 |                         id_token_dict[int(token_id[1:])] = token
110 |             else:
111 |                 continue
112 |         else:
113 |             has_content = True
114 |             cur_amr.append(line)
115 |     if has_content:
116 |         sent_list.append(cur_sent)
117 |         id_token_list.append(id_token_dict)
118 |         amr_list.append(cur_amr)
119 |     return sid_list, sent_list, id_token_list, amr_list
120 | 
121 | 
122 | def convert_camr_to_single_line(amr):
123 |     return "".join([line.strip() for line in amr])
124 | 
125 | 
126 | def delete_camr_variables(amr, id_token_dict):
127 |     result_amr, coref_dict = CAMR.parse_AMR_line(amr, id_token_dict)
128 |     node_dict = dict(zip(result_amr.nodes, result_amr.node_values))
129 |     var = 'x\d+(?:_\d+)*(?:_x\d+(?:_\d+)*)*'
130 |     coref_vars = '(' + var + '\s*/\s*' + var + ')'
131 |     coref_var_list = re.findall(coref_vars, amr)
132 |     for coref_v in coref_var_list:
133 |         var0 = coref_v.split('/')[0].strip()
134 |         var1 = coref_v.split('/')[1].strip()
135 |         var0 = node_dict[var0[1:]]
136 |         var1 = node_dict[var1[1:]]
137 |         amr = re.sub(coref_v, var0 + '^' + var1, amr)
138 |     normal_var = var + '\s*/\s*'
139 |     amr = re.sub(normal_var, '', amr)
140 |     return amr


--------------------------------------------------------------------------------
/docs/build/html/_modules/index.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>概览：模块代码 &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="../_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="../_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="../_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="../_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="../_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="../_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="../_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="../genindex.html" />
 40 |     <link rel="search" title="搜索" href="../search.html" /> 
 41 | </head>
 42 | 
 43 | <body class="wy-body-for-nav">
 44 | 
 45 |    
 46 |   <div class="wy-grid-for-nav">
 47 |     
 48 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 49 |       <div class="wy-side-scroll">
 50 |         <div class="wy-side-nav-search" >
 51 |           
 52 | 
 53 |           
 54 |             <a href="../index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 55 |           
 56 | 
 57 |           
 58 |           </a>
 59 | 
 60 |           
 61 |             
 62 |             
 63 |               <div class="version">
 64 |                 0.5.0
 65 |               </div>
 66 |             
 67 |           
 68 | 
 69 |           
 70 | <div role="search">
 71 |   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
 72 |     <input type="text" name="q" placeholder="Search docs" />
 73 |     <input type="hidden" name="check_keywords" value="yes" />
 74 |     <input type="hidden" name="area" value="default" />
 75 |   </form>
 76 | </div>
 77 | 
 78 |           
 79 |         </div>
 80 | 
 81 |         
 82 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 83 |           
 84 |             
 85 |             
 86 |               
 87 |             
 88 |             
 89 |               <ul>
 90 | <li class="toctree-l1"><a class="reference internal" href="../user/example.html"> 语法样例</a></li>
 91 | </ul>
 92 | 
 93 |             
 94 |           
 95 |         </div>
 96 |         
 97 |       </div>
 98 |     </nav>
 99 | 
100 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
101 | 
102 |       
103 |       <nav class="wy-nav-top" aria-label="top navigation">
104 |         
105 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
106 |           <a href="../index.html">fastHan</a>
107 |         
108 |       </nav>
109 | 
110 | 
111 |       <div class="wy-nav-content">
112 |         
113 |         <div class="rst-content">
114 |         
115 |           
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | <div role="navigation" aria-label="breadcrumbs navigation">
132 | 
133 |   <ul class="wy-breadcrumbs">
134 |     
135 |       <li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
136 |         
137 |       <li>概览：模块代码</li>
138 |     
139 |     
140 |       <li class="wy-breadcrumbs-aside">
141 |         
142 |       </li>
143 |     
144 |   </ul>
145 | 
146 |   
147 |   <hr/>
148 | </div>
149 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
150 |            <div itemprop="articleBody">
151 |             
152 |   <h1>代码可用的所有模块</h1>
153 | <ul><li><a href="fastHan/FastModel.html">fastHan.FastModel</a></li>
154 | <li><a href="fastHan/model/BertCharParser.html">fastHan.model.BertCharParser</a></li>
155 | <li><a href="fastHan/model/CharParser.html">fastHan.model.CharParser</a></li>
156 | <li><a href="fastHan/model/UserDict.html">fastHan.model.UserDict</a></li>
157 | <li><a href="fastHan/model/bert.html">fastHan.model.bert</a></li>
158 | <li><a href="fastHan/model/bert_encoder_theseus.html">fastHan.model.bert_encoder_theseus</a></li>
159 | <li><a href="fastHan/model/model.html">fastHan.model.model</a></li>
160 | <li><a href="fastHan/model/old_fastNLP_bert.html">fastHan.model.old_fastNLP_bert</a></li>
161 | <li><a href="fastHan/model/utils.html">fastHan.model.utils</a></li>
162 | </ul>
163 | 
164 |            </div>
165 |            
166 |           </div>
167 |           <footer>
168 |   
169 | 
170 |   <hr/>
171 | 
172 |   <div role="contentinfo">
173 |     <p>
174 |         
175 |         &copy; 版权所有 2020, fastHan
176 | 
177 |     </p>
178 |   </div>
179 |     
180 |     
181 |     
182 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
183 |     
184 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
185 |     
186 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
187 | 
188 | </footer>
189 | 
190 |         </div>
191 |       </div>
192 | 
193 |     </section>
194 | 
195 |   </div>
196 |   
197 | 
198 |   <script type="text/javascript">
199 |       jQuery(function () {
200 |           SphinxRtdTheme.Navigation.enable(true);
201 |       });
202 |   </script>
203 | 
204 |   
205 |   
206 |     
207 |    
208 | 
209 | </body>
210 | </html>


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | 
 18 | sys.path.insert(0, os.path.abspath('../../'))
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'fastHan'
 23 | copyright = '2020, fastHan'
 24 | author = 'fastHan'
 25 | 
 26 | # The short X.Y version
 27 | version = '1.4'
 28 | # The full version, including alpha/beta/rc tags
 29 | release = '1.4'
 30 | 
 31 | # -- General configuration ---------------------------------------------------
 32 | 
 33 | # If your documentation needs a minimal Sphinx version, state it here.
 34 | #
 35 | # needs_sphinx = '1.0'
 36 | 
 37 | # Add any Sphinx extension module names here, as strings. They can be
 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 39 | # ones.
 40 | extensions = [
 41 |     'sphinx.ext.autodoc',
 42 |     'sphinx.ext.viewcode',
 43 |     'sphinx.ext.autosummary',
 44 |     'sphinx.ext.mathjax',
 45 |     'sphinx.ext.todo'
 46 | ]
 47 | 
 48 | autodoc_default_options = {
 49 |     'member-order': 'bysource',
 50 |     'special-members': '__init__',
 51 |     'undoc-members': False,
 52 | }
 53 | 
 54 | autoclass_content = "class"
 55 | 
 56 | # Add any paths that contain templates here, relative to this directory.
 57 | templates_path = ['_templates']
 58 | # template_bridge
 59 | # The suffix(es) of source filenames.
 60 | # You can specify multiple suffix as a list of string:
 61 | #
 62 | # source_suffix = ['.rst', '.md']
 63 | source_suffix = '.rst'
 64 | 
 65 | # The master toctree document.
 66 | master_doc = 'index'
 67 | 
 68 | # The language for content autogenerated by Sphinx. Refer to documentation
 69 | # for a list of supported languages.
 70 | #
 71 | # This is also used if you do content translation via gettext catalogs.
 72 | # Usually you set "language" from the command line for these cases.
 73 | language = "zh_CN"
 74 | 
 75 | # List of patterns, relative to source directory, that match files and
 76 | # directories to ignore when looking for source files.
 77 | # This pattern also affects html_static_path and html_extra_path .
 78 | exclude_patterns = ['modules.rst']
 79 | 
 80 | # The name of the Pygments (syntax highlighting) style to use.
 81 | pygments_style = 'sphinx'
 82 | 
 83 | # -- Options for HTML output -------------------------------------------------
 84 | 
 85 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 86 | # a list of builtin themes.
 87 | #
 88 | html_theme = 'sphinx_rtd_theme'
 89 | 
 90 | # Theme options are theme-specific and customize the look and feel of a theme
 91 | # further.  For a list of options available for each theme, see the
 92 | # documentation.
 93 | #
 94 | html_theme_options = {
 95 |     'collapse_navigation': False,
 96 |     'titles_only': True
 97 | }
 98 | 
 99 | # Add any paths that contain custom static files (such as style sheets) here,
100 | # relative to this directory. They are copied after the builtin static files,
101 | # so a file named "default.css" will overwrite the builtin "default.css".
102 | html_static_path = ['_static']
103 | 
104 | # Custom sidebar templates, must be a dictionary that maps document names
105 | # to template names.
106 | #
107 | # The default sidebars (for documents that don't match any pattern) are
108 | # defined by theme itself.  Builtin themes are using these templates by
109 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
110 | # 'searchbox.html']``.
111 | #
112 | # html_sidebars = {}
113 | 
114 | 
115 | # -- Options for HTMLHelp output ---------------------------------------------
116 | 
117 | # Output file base name for HTML help builder.
118 | htmlhelp_basename = 'fastHan'
119 | 
120 | # -- Options for LaTeX output ------------------------------------------------
121 | 
122 | latex_elements = {
123 |     # The paper size ('letterpaper' or 'a4paper').
124 |     #
125 |     # 'papersize': 'letterpaper',
126 |     
127 |     # The font size ('10pt', '11pt' or '12pt').
128 |     #
129 |     # 'pointsize': '10pt',
130 |     
131 |     # Additional stuff for the LaTeX preamble.
132 |     #
133 |     # 'preamble': '',
134 |     
135 |     # Latex figure (float) alignment
136 |     #
137 |     # 'figure_align': 'htbp',
138 | }
139 | 
140 | # Grouping the document tree into LaTeX files. List of tuples
141 | # (source start file, target name, title,
142 | #  author, documentclass [howto, manual, or own class]).
143 | latex_documents = []
144 | 
145 | # -- Options for manual page output ------------------------------------------
146 | 
147 | # One entry per manual page. List of tuples
148 | # (source start file, name, description, authors, manual section).
149 | man_pages = [
150 |     (master_doc, 'fastHan', 'fastHan Documentation',
151 |      [author], 1)
152 | ]
153 | 
154 | # -- Options for Texinfo output ----------------------------------------------
155 | 
156 | # Grouping the document tree into Texinfo files. List of tuples
157 | # (source start file, target name, title, author,
158 | #  dir menu entry, description, category)
159 | texinfo_documents = [
160 |     (master_doc, 'fastHan', 'fastHan Documentation',
161 |      author, 'fastHan', 'One line description of project.',
162 |      'Miscellaneous'),
163 | ]
164 | 
165 | 
166 | # -- Extension configuration -------------------------------------------------
167 | def maybe_skip_member(app, what, name, obj, skip, options):
168 |     if name == "__init__":
169 |         return False
170 |     if name.startswith("_"):
171 |         return True
172 |     return False
173 | 
174 | 
175 | def setup(app):
176 |     app.connect('autodoc-skip-member', maybe_skip_member)
177 | 


--------------------------------------------------------------------------------
/docs/build/html/index.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>demo 中文文档 &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="genindex.html" />
 40 |     <link rel="search" title="搜索" href="search.html" />
 41 |     <link rel="next" title="大标题" href="user/example.html" /> 
 42 | </head>
 43 | 
 44 | <body class="wy-body-for-nav">
 45 | 
 46 |    
 47 |   <div class="wy-grid-for-nav">
 48 |     
 49 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 50 |       <div class="wy-side-scroll">
 51 |         <div class="wy-side-nav-search" >
 52 |           
 53 | 
 54 |           
 55 |             <a href="#" class="icon icon-home" alt="Documentation Home"> fastHan
 56 |           
 57 | 
 58 |           
 59 |           </a>
 60 | 
 61 |           
 62 |             
 63 |             
 64 |               <div class="version">
 65 |                 0.5.0
 66 |               </div>
 67 |             
 68 |           
 69 | 
 70 |           
 71 | <div role="search">
 72 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 73 |     <input type="text" name="q" placeholder="Search docs" />
 74 |     <input type="hidden" name="check_keywords" value="yes" />
 75 |     <input type="hidden" name="area" value="default" />
 76 |   </form>
 77 | </div>
 78 | 
 79 |           
 80 |         </div>
 81 | 
 82 |         
 83 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 84 |           
 85 |             
 86 |             
 87 |               
 88 |             
 89 |             
 90 |               <ul>
 91 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 92 | </ul>
 93 | 
 94 |             
 95 |           
 96 |         </div>
 97 |         
 98 |       </div>
 99 |     </nav>
100 | 
101 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
102 | 
103 |       
104 |       <nav class="wy-nav-top" aria-label="top navigation">
105 |         
106 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
107 |           <a href="#">fastHan</a>
108 |         
109 |       </nav>
110 | 
111 | 
112 |       <div class="wy-nav-content">
113 |         
114 |         <div class="rst-content">
115 |         
116 |           
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | <div role="navigation" aria-label="breadcrumbs navigation">
133 | 
134 |   <ul class="wy-breadcrumbs">
135 |     
136 |       <li><a href="#" class="icon icon-home"></a> &raquo;</li>
137 |         
138 |       <li>demo 中文文档</li>
139 |     
140 |     
141 |       <li class="wy-breadcrumbs-aside">
142 |         
143 |             
144 |             <a href="_sources/index.rst.txt" rel="nofollow"> View page source</a>
145 |           
146 |         
147 |       </li>
148 |     
149 |   </ul>
150 | 
151 |   
152 |   <hr/>
153 | </div>
154 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
155 |            <div itemprop="articleBody">
156 |             
157 |   <div class="section" id="demo">
158 | <h1>demo 中文文档<a class="headerlink" href="#demo" title="永久链接至标题">¶</a></h1>
159 | <div class="section" id="id1">
160 | <h2>用户手册<a class="headerlink" href="#id1" title="永久链接至标题">¶</a></h2>
161 | <div class="toctree-wrapper compound">
162 | <ul>
163 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
164 | </ul>
165 | </div>
166 | </div>
167 | <div class="section" id="api">
168 | <h2>API 文档<a class="headerlink" href="#api" title="永久链接至标题">¶</a></h2>
169 | <p>除了用户手册之外，你还可以通过查阅 API 文档来找到你所需要的工具。</p>
170 | <div class="toctree-wrapper compound">
171 | </div>
172 | </div>
173 | </div>
174 | <div class="section" id="id2">
175 | <h1>索引与搜索<a class="headerlink" href="#id2" title="永久链接至标题">¶</a></h1>
176 | <ul class="simple">
177 | <li><a class="reference internal" href="genindex.html"><span class="std std-ref">索引</span></a></li>
178 | <li><a class="reference internal" href="py-modindex.html"><span class="std std-ref">模块索引</span></a></li>
179 | <li><a class="reference internal" href="search.html"><span class="std std-ref">搜索页面</span></a></li>
180 | </ul>
181 | </div>
182 | 
183 | 
184 |            </div>
185 |            
186 |           </div>
187 |           <footer>
188 |   
189 |     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
190 |       
191 |         <a href="user/example.html" class="btn btn-neutral float-right" title="大标题" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
192 |       
193 |       
194 |     </div>
195 |   
196 | 
197 |   <hr/>
198 | 
199 |   <div role="contentinfo">
200 |     <p>
201 |         
202 |         &copy; 版权所有 2020, fastHan
203 | 
204 |     </p>
205 |   </div>
206 |     
207 |     
208 |     
209 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
210 |     
211 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
212 |     
213 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
214 | 
215 | </footer>
216 | 
217 |         </div>
218 |       </div>
219 | 
220 |     </section>
221 | 
222 |   </div>
223 |   
224 | 
225 |   <script type="text/javascript">
226 |       jQuery(function () {
227 |           SphinxRtdTheme.Navigation.enable(true);
228 |       });
229 |   </script>
230 | 
231 |   
232 |   
233 |     
234 |    
235 | 
236 | </body>
237 | </html>


--------------------------------------------------------------------------------
/docs/build/html/fastHan.model.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>fastHan.model package &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="genindex.html" />
 40 |     <link rel="search" title="搜索" href="search.html" /> 
 41 | </head>
 42 | 
 43 | <body class="wy-body-for-nav">
 44 | 
 45 |    
 46 |   <div class="wy-grid-for-nav">
 47 |     
 48 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 49 |       <div class="wy-side-scroll">
 50 |         <div class="wy-side-nav-search" >
 51 |           
 52 | 
 53 |           
 54 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 55 |           
 56 | 
 57 |           
 58 |           </a>
 59 | 
 60 |           
 61 |             
 62 |             
 63 |               <div class="version">
 64 |                 0.5.0
 65 |               </div>
 66 |             
 67 |           
 68 | 
 69 |           
 70 | <div role="search">
 71 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 72 |     <input type="text" name="q" placeholder="Search docs" />
 73 |     <input type="hidden" name="check_keywords" value="yes" />
 74 |     <input type="hidden" name="area" value="default" />
 75 |   </form>
 76 | </div>
 77 | 
 78 |           
 79 |         </div>
 80 | 
 81 |         
 82 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 83 |           
 84 |             
 85 |             
 86 |               
 87 |             
 88 |             
 89 |               <ul>
 90 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 91 | </ul>
 92 | 
 93 |             
 94 |           
 95 |         </div>
 96 |         
 97 |       </div>
 98 |     </nav>
 99 | 
100 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
101 | 
102 |       
103 |       <nav class="wy-nav-top" aria-label="top navigation">
104 |         
105 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
106 |           <a href="index.html">fastHan</a>
107 |         
108 |       </nav>
109 | 
110 | 
111 |       <div class="wy-nav-content">
112 |         
113 |         <div class="rst-content">
114 |         
115 |           
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | <div role="navigation" aria-label="breadcrumbs navigation">
132 | 
133 |   <ul class="wy-breadcrumbs">
134 |     
135 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
136 |         
137 |       <li>fastHan.model package</li>
138 |     
139 |     
140 |       <li class="wy-breadcrumbs-aside">
141 |         
142 |             
143 |             <a href="_sources/fastHan.model.rst.txt" rel="nofollow"> View page source</a>
144 |           
145 |         
146 |       </li>
147 |     
148 |   </ul>
149 | 
150 |   
151 |   <hr/>
152 | </div>
153 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
154 |            <div itemprop="articleBody">
155 |             
156 |   <div class="section" id="module-fastHan.model">
157 | <span id="fasthan-model-package"></span><h1>fastHan.model package<a class="headerlink" href="#module-fastHan.model" title="永久链接至标题">¶</a></h1>
158 | <div class="section" id="submodules">
159 | <h2>Submodules<a class="headerlink" href="#submodules" title="永久链接至标题">¶</a></h2>
160 | <div class="toctree-wrapper compound">
161 | <ul>
162 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.BertCharParser.html">fastHan.model.BertCharParser module</a></li>
163 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.CharParser.html">fastHan.model.CharParser module</a></li>
164 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.UserDict.html">fastHan.model.UserDict module</a></li>
165 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.bert.html">fastHan.model.bert module</a></li>
166 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.bert_encoder_theseus.html">fastHan.model.bert_encoder_theseus module</a></li>
167 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.model.html">fastHan.model.model module</a></li>
168 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.old_fastNLP_bert.html">fastHan.model.old_fastNLP_bert module</a></li>
169 | <li class="toctree-l1"><a class="reference internal" href="fastHan.model.utils.html">fastHan.model.utils module</a></li>
170 | </ul>
171 | </div>
172 | </div>
173 | </div>
174 | 
175 | 
176 |            </div>
177 |            
178 |           </div>
179 |           <footer>
180 |   
181 | 
182 |   <hr/>
183 | 
184 |   <div role="contentinfo">
185 |     <p>
186 |         
187 |         &copy; 版权所有 2020, fastHan
188 | 
189 |     </p>
190 |   </div>
191 |     
192 |     
193 |     
194 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
195 |     
196 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
197 |     
198 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
199 | 
200 | </footer>
201 | 
202 |         </div>
203 |       </div>
204 | 
205 |     </section>
206 | 
207 |   </div>
208 |   
209 | 
210 |   <script type="text/javascript">
211 |       jQuery(function () {
212 |           SphinxRtdTheme.Navigation.enable(true);
213 |       });
214 |   </script>
215 | 
216 |   
217 |   
218 |     
219 |    
220 | 
221 | </body>
222 | </html>


--------------------------------------------------------------------------------
/fastHan/model/metrics.py:
--------------------------------------------------------------------------------
  1 | from fastNLP.core.metrics import Metric
  2 | import torch
  3 | 
  4 | # modified from https://github.com/yhcc/JointCwsParser
  5 | class SegAppCharParseF1Metric(Metric):
  6 |     #
  7 |     def __init__(self,pun_index,app_index=0):
  8 |         super().__init__()
  9 |         self.app_index = app_index
 10 |         self.pun_index = pun_index
 11 | 
 12 |         self.parse_head_tp = 0
 13 |         self.parse_label_tp = 0
 14 |         self.rec_tol = 0
 15 |         self.pre_tol = 0
 16 | 
 17 |     def get_word_pairs(self,head_preds,label_preds,seq_lens,pun_masks):
 18 |         # 去掉root
 19 |         head_preds = head_preds[:, 1:].tolist()
 20 |         label_preds = label_preds[:, 1:].tolist()
 21 |         seq_lens = (seq_lens - 1).tolist()
 22 | 
 23 |         head_dep_tuples=[]
 24 |         head_label_dep_tuples = []
 25 | 
 26 |         for b in range(len(head_preds)):
 27 |             seq_len = seq_lens[b]
 28 |             head_pred = head_preds[b][:seq_len]
 29 |             label_pred = label_preds[b][:seq_len]
 30 | 
 31 |             words = [] # 存放[word_start, word_end)，相对起始位置，不考虑root
 32 |             heads = []
 33 |             labels = []
 34 |             ranges = []  # 对应该char是第几个word，长度是seq_len+1
 35 |             word_idx = 0
 36 |             word_start_idx = 0
 37 |             for idx, (label, head) in enumerate(zip(label_pred, head_pred)):
 38 |                 ranges.append(word_idx)
 39 |                 if label == self.app_index:
 40 |                     pass
 41 |                 else:
 42 |                     labels.append(label)
 43 |                     heads.append(head)
 44 |                     words.append((word_start_idx, idx+1))
 45 |                     word_start_idx = idx+1
 46 |                     word_idx += 1
 47 | 
 48 |             head_dep_tuple = [] # head在前面
 49 |             head_label_dep_tuple = []
 50 |             for idx, head in enumerate(heads):
 51 |                 span = words[idx]
 52 |                 if span[0]==span[1]-1 and pun_masks[b, span[0]]:
 53 |                     continue  # exclude punctuations
 54 |                 if head == 0:
 55 |                     head_dep_tuple.append((('root', words[idx])))
 56 |                     head_label_dep_tuple.append(('root', labels[idx], words[idx]))
 57 |                 else:
 58 |                     head_word_idx = ranges[head-1]
 59 |                     head_word_span = words[head_word_idx]
 60 |                     head_dep_tuple.append(((head_word_span, words[idx])))
 61 |                     head_label_dep_tuple.append((head_word_span, labels[idx], words[idx]))
 62 |             head_dep_tuples.append(head_dep_tuple)
 63 |             head_label_dep_tuples.append(head_label_dep_tuple)
 64 |         
 65 |         return head_dep_tuples,head_label_dep_tuples
 66 | 
 67 | 
 68 |     # def update(self, gold_word_pairs, gold_label_word_pairs, head_preds, label_preds, seq_len,
 69 |     #              pun_masks):
 70 |     def update(self, labels,heads,head_preds, label_preds, seq_len):
 71 |         """
 72 | 
 73 |         max_len是不包含root的character的长度
 74 |         :param gold_word_pairs: List[List[((head_start, head_end), (dep_start, dep_end)), ...]], batch_size
 75 |         :param gold_label_word_pairs: List[List[((head_start, head_end), label, (dep_start, dep_end)), ...]], batch_size
 76 |         :param head_preds: batch_size x max_len
 77 |         :param label_preds: batch_size x max_len
 78 |         :param seq_lens:
 79 |         :return:
 80 |         """
 81 |         pun_masks=(labels==self.pun_index).long()
 82 |         pun_masks=pun_masks[:,1:]
 83 | 
 84 |         head_dep_tuples,head_label_dep_tuples=self.get_word_pairs(head_preds,label_preds,seq_len,pun_masks)
 85 |         gold_head_dep_tuples,gold_head_label_dep_tuples=self.get_word_pairs(heads,labels,seq_len,pun_masks)
 86 | 
 87 |         for b in range(seq_len.shape[0]):
 88 |             head_dep_tuple=head_dep_tuples[b]
 89 |             head_label_dep_tuple=head_label_dep_tuples[b]
 90 |             gold_head_dep_tuple=gold_head_dep_tuples[b]
 91 |             gold_head_label_dep_tuple=gold_head_label_dep_tuples[b]
 92 | 
 93 | 
 94 |             for head_dep, head_label_dep in zip(head_dep_tuple, head_label_dep_tuple):
 95 |                 if head_dep in gold_head_dep_tuple:
 96 |                     self.parse_head_tp += 1
 97 |                 if head_label_dep in gold_head_label_dep_tuple:
 98 |                     self.parse_label_tp += 1
 99 |             self.pre_tol += len(head_dep_tuple)
100 |             self.rec_tol += len(gold_head_dep_tuple)
101 | 
102 |     def get_metric(self, reset=True):
103 |         u_p = self.parse_head_tp / self.pre_tol
104 |         u_r = self.parse_head_tp / self.rec_tol
105 |         u_f = 2*u_p*u_r/(1e-6 + u_p + u_r)
106 |         l_p = self.parse_label_tp / self.pre_tol
107 |         l_r = self.parse_label_tp / self.rec_tol
108 |         l_f = 2*l_p*l_r/(1e-6 + l_p + l_r)
109 | 
110 |         if reset:
111 |             self.parse_head_tp = 0
112 |             self.parse_label_tp = 0
113 |             self.rec_tol = 0
114 |             self.pre_tol = 0
115 | 
116 |         return {'u_f1': round(u_f, 4), 'u_p': round(u_p, 4), 'u_r/uas':round(u_r, 4),
117 |                 'f': round(l_f, 4), 'l_p': round(l_p, 4), 'l_r/las': round(l_r, 4)}
118 | 
119 | 
120 | class CWSMetric(Metric):
121 |     def __init__(self, app_index=0):
122 |         super().__init__()
123 |         self.app_index = app_index
124 |         self.pre = 0
125 |         self.rec = 0
126 |         self.tp = 0
127 | 
128 | 
129 |     def label_to_seg(self,labels,seq_lens):
130 |         segs=torch.zeros_like(labels)[:,1:]
131 |         masks=torch.zeros_like(labels)[:,1:]
132 | 
133 |         seq_lens=(seq_lens-1).tolist()
134 |         # [:,1:]是为了剔除root结点
135 |         for idx,label in enumerate(labels[:, 1:].tolist()):
136 |             seq_len=seq_lens[idx]
137 |             label=label[:seq_len]
138 |             word_len = 0
139 | 
140 |             for i,l in enumerate(label):
141 |                 if l==self.app_index and i!=len(label)-1:
142 |                     word_len+=1
143 |                 else:
144 |                     segs[idx,i]=word_len
145 |                     masks[idx,i]=1
146 |                     word_len=0
147 |         return segs,masks
148 | 
149 | 
150 |     def update(self, labels, label_preds, seq_len):
151 |         """
152 |         :param label_preds: batch_size x max_len
153 |         :param seq_len: batch_size
154 |         :return:
155 |         """
156 | 
157 |         seg_targets,seg_masks=self.label_to_seg(labels,seq_len)
158 |         pred_segs,pred_masks=self.label_to_seg(label_preds,seq_len)
159 | 
160 | 
161 |         right_mask = seg_targets.eq(pred_segs) # 对长度的预测一致
162 |         self.rec += seg_masks.sum().item()
163 |         self.pre += pred_masks.sum().item()
164 |         # 且pred和target在同一个地方有值
165 |         self.tp += (right_mask.__and__(pred_masks.bool().__and__(seg_masks.bool()))).sum().item()
166 | 
167 |     def get_metric(self, reset=True):
168 |         res = {}
169 |         res['rec'] = round(self.tp/(self.rec+1e-6), 4)
170 |         res['pre'] = round(self.tp/(self.pre+1e-6), 4)
171 |         res['f1'] = round(2*res['rec']*res['pre']/(res['pre'] + res['rec'] + 1e-6), 4)
172 | 
173 |         if reset:
174 |             self.pre = 0
175 |             self.rec = 0
176 |             self.tp = 0
177 | 
178 |         return res


--------------------------------------------------------------------------------
/docs/build/html/fastHan.model.utils.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>fastHan.model.utils module &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="genindex.html" />
 40 |     <link rel="search" title="搜索" href="search.html" /> 
 41 | </head>
 42 | 
 43 | <body class="wy-body-for-nav">
 44 | 
 45 |    
 46 |   <div class="wy-grid-for-nav">
 47 |     
 48 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 49 |       <div class="wy-side-scroll">
 50 |         <div class="wy-side-nav-search" >
 51 |           
 52 | 
 53 |           
 54 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 55 |           
 56 | 
 57 |           
 58 |           </a>
 59 | 
 60 |           
 61 |             
 62 |             
 63 |               <div class="version">
 64 |                 0.5.0
 65 |               </div>
 66 |             
 67 |           
 68 | 
 69 |           
 70 | <div role="search">
 71 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 72 |     <input type="text" name="q" placeholder="Search docs" />
 73 |     <input type="hidden" name="check_keywords" value="yes" />
 74 |     <input type="hidden" name="area" value="default" />
 75 |   </form>
 76 | </div>
 77 | 
 78 |           
 79 |         </div>
 80 | 
 81 |         
 82 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 83 |           
 84 |             
 85 |             
 86 |               
 87 |             
 88 |             
 89 |               <ul>
 90 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 91 | </ul>
 92 | 
 93 |             
 94 |           
 95 |         </div>
 96 |         
 97 |       </div>
 98 |     </nav>
 99 | 
100 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
101 | 
102 |       
103 |       <nav class="wy-nav-top" aria-label="top navigation">
104 |         
105 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
106 |           <a href="index.html">fastHan</a>
107 |         
108 |       </nav>
109 | 
110 | 
111 |       <div class="wy-nav-content">
112 |         
113 |         <div class="rst-content">
114 |         
115 |           
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | <div role="navigation" aria-label="breadcrumbs navigation">
132 | 
133 |   <ul class="wy-breadcrumbs">
134 |     
135 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
136 |         
137 |       <li>fastHan.model.utils module</li>
138 |     
139 |     
140 |       <li class="wy-breadcrumbs-aside">
141 |         
142 |             
143 |             <a href="_sources/fastHan.model.utils.rst.txt" rel="nofollow"> View page source</a>
144 |           
145 |         
146 |       </li>
147 |     
148 |   </ul>
149 | 
150 |   
151 |   <hr/>
152 | </div>
153 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
154 |            <div itemprop="articleBody">
155 |             
156 |   <div class="section" id="module-fastHan.model.utils">
157 | <span id="fasthan-model-utils-module"></span><h1>fastHan.model.utils module<a class="headerlink" href="#module-fastHan.model.utils" title="永久链接至标题">¶</a></h1>
158 | <dl class="function">
159 | <dt id="fastHan.model.utils.check_dataloader_paths">
160 | <code class="descclassname">fastHan.model.utils.</code><code class="descname">check_dataloader_paths</code><span class="sig-paren">(</span><em>paths: Union[str, Dict[str, str]]</em><span class="sig-paren">)</span> &#x2192; Dict[str, str]<a class="reference internal" href="_modules/fastHan/model/utils.html#check_dataloader_paths"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.utils.check_dataloader_paths" title="永久链接至目标">¶</a></dt>
161 | <dd><p>检查传入dataloader的文件的合法性。如果为合法路径，将返回至少包含’train’这个key的dict。类似于下面的结果
162 | {</p>
163 | <blockquote>
164 | <div>‘train’: ‘/some/path/to/’, # 一定包含，建词表应该在这上面建立，剩下的其它文件应该只需要处理并index。
165 | ‘test’: ‘xxx’ # 可能有，也可能没有
166 | …</div></blockquote>
167 | <p>}
168 | 如果paths为不合法的，将直接进行raise相应的错误</p>
169 | <table class="docutils field-list" frame="void" rules="none">
170 | <col class="field-name" />
171 | <col class="field-body" />
172 | <tbody valign="top">
173 | <tr class="field-odd field"><th class="field-name">参数:</th><td class="field-body"><strong>paths</strong> – 路径. 可以为一个文件路径(则认为该文件就是train的文件); 可以为一个文件目录，将在该目录下寻找train(文件名
174 | 中包含train这个字段), test.txt, dev.txt; 可以为一个dict, 则key是用户自定义的某个文件的名称，value是这个文件的路径。</td>
175 | </tr>
176 | <tr class="field-even field"><th class="field-name">返回:</th><td class="field-body"></td>
177 | </tr>
178 | </tbody>
179 | </table>
180 | </dd></dl>
181 | 
182 | <dl class="function">
183 | <dt id="fastHan.model.utils.get_tokenizer">
184 | <code class="descclassname">fastHan.model.utils.</code><code class="descname">get_tokenizer</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/utils.html#get_tokenizer"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.utils.get_tokenizer" title="永久链接至目标">¶</a></dt>
185 | <dd></dd></dl>
186 | 
187 | </div>
188 | 
189 | 
190 |            </div>
191 |            
192 |           </div>
193 |           <footer>
194 |   
195 | 
196 |   <hr/>
197 | 
198 |   <div role="contentinfo">
199 |     <p>
200 |         
201 |         &copy; 版权所有 2020, fastHan
202 | 
203 |     </p>
204 |   </div>
205 |     
206 |     
207 |     
208 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
209 |     
210 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
211 |     
212 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
213 | 
214 | </footer>
215 | 
216 |         </div>
217 |       </div>
218 | 
219 |     </section>
220 | 
221 |   </div>
222 |   
223 | 
224 |   <script type="text/javascript">
225 |       jQuery(function () {
226 |           SphinxRtdTheme.Navigation.enable(true);
227 |       });
228 |   </script>
229 | 
230 |   
231 |   
232 |     
233 |    
234 | 
235 | </body>
236 | </html>


--------------------------------------------------------------------------------
/fastHan/model/dependency_parsing_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from fastNLP.models.torch.biaffine_parser import (ArcBiaffine, BiaffineParser,
  4 |                                                   LabelBilinear)
  5 | from fastNLP.modules.torch.dropout import TimestepDropout
  6 | from torch import nn
  7 | from transformers import PreTrainedModel
  8 | 
  9 | 
 10 | # modified from https://github.com/yhcc/JointCwsParser/blob/master/models/BertParser.py
 11 | class BertParser(BiaffineParser):
 12 |     def __init__(self,
 13 |                  num_label,
 14 |                  embed_size=768,
 15 |                  arc_mlp_size=500,
 16 |                  label_mlp_size=100,
 17 |                  dropout=0.1,
 18 |                  use_greedy_infer=False,
 19 |                  app_index=0):
 20 |         super(BiaffineParser, self).__init__()
 21 | 
 22 |         self.embed_size = embed_size
 23 |         self.mlp = nn.Sequential(
 24 |             nn.Linear(self.embed_size, arc_mlp_size * 2 + label_mlp_size * 2),
 25 |             nn.LeakyReLU(0.1),
 26 |             TimestepDropout(p=dropout),
 27 |         )
 28 |         self.arc_mlp_size = arc_mlp_size
 29 |         self.label_mlp_size = label_mlp_size
 30 |         self.arc_predictor = ArcBiaffine(arc_mlp_size, bias=True)
 31 |         self.label_predictor = LabelBilinear(label_mlp_size,
 32 |                                              label_mlp_size,
 33 |                                              num_label,
 34 |                                              bias=True)
 35 |         self.use_greedy_infer = use_greedy_infer
 36 |         self.reset_parameters()
 37 | 
 38 |         self.app_index = app_index
 39 |         self.num_label = num_label
 40 |         if self.app_index != 0:
 41 |             raise ValueError("现在app_index必须等于0")
 42 | 
 43 |         self.dropout = nn.Dropout(dropout)
 44 | 
 45 |     def reset_parameters(self):
 46 |         for name, m in self.named_modules():
 47 |             if hasattr(m, 'reset_parameters') or hasattr(m, 'init_param'):
 48 |                 pass
 49 |             else:
 50 |                 for p in m.parameters():
 51 |                     if len(p.size()) > 1:
 52 |                         nn.init.xavier_normal_(p, gain=0.1)
 53 |                     else:
 54 |                         nn.init.uniform_(p, -0.1, 0.1)
 55 | 
 56 |     def forward(self, feats, mask, gold_heads=None, char_labels=None):
 57 |         # 此处的mask与attention_mask不同，需要去除sep与cls
 58 | 
 59 |         batch_size = feats.shape[0]
 60 |         max_len = feats.shape[1]
 61 | 
 62 |         feats = self.dropout(feats)
 63 |         feats = self.mlp(feats)
 64 |         arc_sz, label_sz = self.arc_mlp_size, self.label_mlp_size
 65 |         arc_dep, arc_head = feats[:, :, :arc_sz], feats[:, :,
 66 |                                                         arc_sz:2 * arc_sz]
 67 |         label_dep, label_head = feats[:, :, 2 * arc_sz:2 * arc_sz +
 68 |                                       label_sz], feats[:, :,
 69 |                                                        2 * arc_sz + label_sz:]
 70 | 
 71 |         arc_pred = self.arc_predictor(arc_head, arc_dep)  # [N, L, L]
 72 | 
 73 |         if self.training:
 74 |             assert gold_heads is not None
 75 |             head_pred = None
 76 |             heads = gold_heads
 77 |         else:
 78 |             heads = self.mst_decoder(arc_pred, mask)
 79 |             head_pred = heads
 80 | 
 81 |         # 将pad的-100替换为-1，以免heads作为矩阵索引的时候报错
 82 |         padded_heads = torch.clone(heads)
 83 |         padded_heads[padded_heads == -100] = -1
 84 | 
 85 |         batch_range = torch.arange(start=0,
 86 |                                    end=batch_size,
 87 |                                    dtype=torch.long,
 88 |                                    device=mask.device).unsqueeze(1)
 89 |         label_head = label_head[batch_range, padded_heads].contiguous()
 90 |         label_pred = self.label_predictor(label_head,
 91 |                                           label_dep)  # [N, max_len, num_label]
 92 |         # 这里限制一下，只有当head为下一个时，才能预测app这个label
 93 |         arange_index = torch.arange(1, max_len+1, dtype=torch.long, device=mask.device).unsqueeze(0)\
 94 |             .repeat(batch_size, 1) # batch_size x max_len
 95 | 
 96 |         app_masks = heads.ne(
 97 |             arange_index)  #  batch_size x max_len, 为1的位置不可以预测app
 98 |         app_masks = app_masks.unsqueeze(2).repeat(1, 1, self.num_label)
 99 |         app_masks[:, :, 1:] = 0
100 |         label_pred = label_pred.masked_fill(app_masks, float('-inf'))
101 | 
102 |         if self.training:
103 |             arc_loss, label_loss = self.loss(arc_pred, label_pred, gold_heads,
104 |                                              char_labels, mask)
105 |             res_dict = {
106 |                 'loss': arc_loss + label_loss,
107 |                 'arc_loss': arc_loss,
108 |                 'label_loss': label_loss
109 |             }
110 |         else:
111 |             res_dict = {
112 |                 'label_preds': label_pred.max(2)[1],
113 |                 'head_preds': head_pred
114 |             }
115 | 
116 |         return res_dict
117 | 
118 |     @staticmethod
119 |     def loss(arc_pred, label_pred, arc_true, label_true, mask):
120 |         """
121 |         Compute loss.
122 |         :param arc_pred: [batch_size, seq_len, seq_len]
123 |         :param label_pred: [batch_size, seq_len, n_tags]
124 |         :param arc_true: [batch_size, seq_len]
125 |         :param label_true: [batch_size, seq_len]
126 |         :param mask: [batch_size, seq_len]
127 |         :return: loss value
128 |         """
129 | 
130 |         batch_size, seq_len, _ = arc_pred.shape
131 |         flip_mask = (mask == 0)
132 |         # _arc_pred = arc_pred.clone()
133 |         _arc_pred = arc_pred.masked_fill(flip_mask.unsqueeze(1), -float('inf'))
134 | 
135 |         arc_true.data[:, 0].fill_(-100)
136 |         label_true.data[:, 0].fill_(-100)
137 | 
138 |         arc_nll = F.cross_entropy(_arc_pred.view(-1, seq_len),
139 |                                   arc_true.view(-1),
140 |                                   ignore_index=-100)
141 |         label_nll = F.cross_entropy(label_pred.view(-1, label_pred.size(-1)),
142 |                                     label_true.view(-1),
143 |                                     ignore_index=-100)
144 | 
145 |         return arc_nll, label_nll
146 | 
147 | 
148 | class DependencyParsingModel(PreTrainedModel):
149 |     def __init__(self, encoder, config, labels):
150 |         super().__init__(config)
151 | 
152 |         label_num = len(labels)
153 | 
154 |         self.num_labels = label_num
155 | 
156 |         self.encoder = encoder
157 |         self.parser = BertParser(num_label=self.num_labels,
158 |                                  embed_size=config.hidden_size,
159 |                                  app_index=labels.index('app'))
160 | 
161 |     def forward(self,
162 |                 input_ids=None,
163 |                 attention_mask=None,
164 |                 labels=None,
165 |                 heads=None):
166 |         # 生成不考虑cls和sep的mask
167 |         seq_len = attention_mask.sum(dim=-1) - 2
168 |         broad_cast_seq_len = torch.arange(attention_mask.shape[1] - 2).expand(
169 |             attention_mask.shape[0], -1).to(seq_len.device)
170 |         mask = broad_cast_seq_len < seq_len.unsqueeze(1)
171 | 
172 |         outputs = self.encoder(input_ids, attention_mask=attention_mask)
173 | 
174 |         feats = outputs[0]
175 |         feats = feats[:, 1:-1]
176 | 
177 |         return self.parser.forward(feats=feats,
178 |                                    mask=mask,
179 |                                    gold_heads=heads,
180 |                                    char_labels=labels)
181 | 


--------------------------------------------------------------------------------
/docs/build/html/py-modindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>Python 模块索引 &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="genindex.html" />
 40 |     <link rel="search" title="搜索" href="search.html" />
 41 |  
 42 | 
 43 | 
 44 | </head>
 45 | 
 46 | <body class="wy-body-for-nav">
 47 | 
 48 |    
 49 |   <div class="wy-grid-for-nav">
 50 |     
 51 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 52 |       <div class="wy-side-scroll">
 53 |         <div class="wy-side-nav-search" >
 54 |           
 55 | 
 56 |           
 57 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 58 |           
 59 | 
 60 |           
 61 |           </a>
 62 | 
 63 |           
 64 |             
 65 |             
 66 |               <div class="version">
 67 |                 0.5.0
 68 |               </div>
 69 |             
 70 |           
 71 | 
 72 |           
 73 | <div role="search">
 74 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 75 |     <input type="text" name="q" placeholder="Search docs" />
 76 |     <input type="hidden" name="check_keywords" value="yes" />
 77 |     <input type="hidden" name="area" value="default" />
 78 |   </form>
 79 | </div>
 80 | 
 81 |           
 82 |         </div>
 83 | 
 84 |         
 85 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 86 |           
 87 |             
 88 |             
 89 |               
 90 |             
 91 |             
 92 |               <ul>
 93 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 94 | </ul>
 95 | 
 96 |             
 97 |           
 98 |         </div>
 99 |         
100 |       </div>
101 |     </nav>
102 | 
103 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
104 | 
105 |       
106 |       <nav class="wy-nav-top" aria-label="top navigation">
107 |         
108 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
109 |           <a href="index.html">fastHan</a>
110 |         
111 |       </nav>
112 | 
113 | 
114 |       <div class="wy-nav-content">
115 |         
116 |         <div class="rst-content">
117 |         
118 |           
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | <div role="navigation" aria-label="breadcrumbs navigation">
135 | 
136 |   <ul class="wy-breadcrumbs">
137 |     
138 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
139 |         
140 |       <li>Python 模块索引</li>
141 |     
142 |     
143 |       <li class="wy-breadcrumbs-aside">
144 |         
145 |       </li>
146 |     
147 |   </ul>
148 | 
149 |   
150 |   <hr/>
151 | </div>
152 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
153 |            <div itemprop="articleBody">
154 |             
155 | 
156 |    <h1>Python 模块索引</h1>
157 | 
158 |    <div class="modindex-jumpbox">
159 |    <a href="#cap-f"><strong>f</strong></a>
160 |    </div>
161 | 
162 |    <table class="indextable modindextable">
163 |      <tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
164 |      <tr class="cap" id="cap-f"><td></td><td>
165 |        <strong>f</strong></td><td></td></tr>
166 |      <tr>
167 |        <td><img src="_static/minus.png" class="toggler"
168 |               id="toggle-1" style="display: none" alt="-" /></td>
169 |        <td>
170 |        <a href="fastHan.html#module-fastHan"><code class="xref">fastHan</code></a></td><td>
171 |        <em></em></td></tr>
172 |      <tr class="cg-1">
173 |        <td></td>
174 |        <td>&#160;&#160;&#160;
175 |        <a href="fastHan.FastModel.html#module-fastHan.FastModel"><code class="xref">fastHan.FastModel</code></a></td><td>
176 |        <em></em></td></tr>
177 |      <tr class="cg-1">
178 |        <td></td>
179 |        <td>&#160;&#160;&#160;
180 |        <a href="fastHan.model.html#module-fastHan.model"><code class="xref">fastHan.model</code></a></td><td>
181 |        <em></em></td></tr>
182 |      <tr class="cg-1">
183 |        <td></td>
184 |        <td>&#160;&#160;&#160;
185 |        <a href="fastHan.model.bert.html#module-fastHan.model.bert"><code class="xref">fastHan.model.bert</code></a></td><td>
186 |        <em></em></td></tr>
187 |      <tr class="cg-1">
188 |        <td></td>
189 |        <td>&#160;&#160;&#160;
190 |        <a href="fastHan.model.bert_encoder_theseus.html#module-fastHan.model.bert_encoder_theseus"><code class="xref">fastHan.model.bert_encoder_theseus</code></a></td><td>
191 |        <em></em></td></tr>
192 |      <tr class="cg-1">
193 |        <td></td>
194 |        <td>&#160;&#160;&#160;
195 |        <a href="fastHan.model.BertCharParser.html#module-fastHan.model.BertCharParser"><code class="xref">fastHan.model.BertCharParser</code></a></td><td>
196 |        <em></em></td></tr>
197 |      <tr class="cg-1">
198 |        <td></td>
199 |        <td>&#160;&#160;&#160;
200 |        <a href="fastHan.model.CharParser.html#module-fastHan.model.CharParser"><code class="xref">fastHan.model.CharParser</code></a></td><td>
201 |        <em></em></td></tr>
202 |      <tr class="cg-1">
203 |        <td></td>
204 |        <td>&#160;&#160;&#160;
205 |        <a href="fastHan.model.model.html#module-fastHan.model.model"><code class="xref">fastHan.model.model</code></a></td><td>
206 |        <em></em></td></tr>
207 |      <tr class="cg-1">
208 |        <td></td>
209 |        <td>&#160;&#160;&#160;
210 |        <a href="fastHan.model.old_fastNLP_bert.html#module-fastHan.model.old_fastNLP_bert"><code class="xref">fastHan.model.old_fastNLP_bert</code></a></td><td>
211 |        <em></em></td></tr>
212 |      <tr class="cg-1">
213 |        <td></td>
214 |        <td>&#160;&#160;&#160;
215 |        <a href="fastHan.model.UserDict.html#module-fastHan.model.UserDict"><code class="xref">fastHan.model.UserDict</code></a></td><td>
216 |        <em></em></td></tr>
217 |      <tr class="cg-1">
218 |        <td></td>
219 |        <td>&#160;&#160;&#160;
220 |        <a href="fastHan.model.utils.html#module-fastHan.model.utils"><code class="xref">fastHan.model.utils</code></a></td><td>
221 |        <em></em></td></tr>
222 |    </table>
223 | 
224 | 
225 |            </div>
226 |            
227 |           </div>
228 |           <footer>
229 |   
230 | 
231 |   <hr/>
232 | 
233 |   <div role="contentinfo">
234 |     <p>
235 |         
236 |         &copy; 版权所有 2020, fastHan
237 | 
238 |     </p>
239 |   </div>
240 |     
241 |     
242 |     
243 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
244 |     
245 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
246 |     
247 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
248 | 
249 | </footer>
250 | 
251 |         </div>
252 |       </div>
253 | 
254 |     </section>
255 | 
256 |   </div>
257 |   
258 | 
259 |   <script type="text/javascript">
260 |       jQuery(function () {
261 |           SphinxRtdTheme.Navigation.enable(true);
262 |       });
263 |   </script>
264 | 
265 |   
266 |   
267 |     
268 |    
269 | 
270 | </body>
271 | </html>


--------------------------------------------------------------------------------
/docs/build/html/fastHan.model.model.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>fastHan.model.model module &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="genindex.html" />
 40 |     <link rel="search" title="搜索" href="search.html" /> 
 41 | </head>
 42 | 
 43 | <body class="wy-body-for-nav">
 44 | 
 45 |    
 46 |   <div class="wy-grid-for-nav">
 47 |     
 48 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 49 |       <div class="wy-side-scroll">
 50 |         <div class="wy-side-nav-search" >
 51 |           
 52 | 
 53 |           
 54 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 55 |           
 56 | 
 57 |           
 58 |           </a>
 59 | 
 60 |           
 61 |             
 62 |             
 63 |               <div class="version">
 64 |                 0.5.0
 65 |               </div>
 66 |             
 67 |           
 68 | 
 69 |           
 70 | <div role="search">
 71 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 72 |     <input type="text" name="q" placeholder="Search docs" />
 73 |     <input type="hidden" name="check_keywords" value="yes" />
 74 |     <input type="hidden" name="area" value="default" />
 75 |   </form>
 76 | </div>
 77 | 
 78 |           
 79 |         </div>
 80 | 
 81 |         
 82 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 83 |           
 84 |             
 85 |             
 86 |               
 87 |             
 88 |             
 89 |               <ul>
 90 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 91 | </ul>
 92 | 
 93 |             
 94 |           
 95 |         </div>
 96 |         
 97 |       </div>
 98 |     </nav>
 99 | 
100 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
101 | 
102 |       
103 |       <nav class="wy-nav-top" aria-label="top navigation">
104 |         
105 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
106 |           <a href="index.html">fastHan</a>
107 |         
108 |       </nav>
109 | 
110 | 
111 |       <div class="wy-nav-content">
112 |         
113 |         <div class="rst-content">
114 |         
115 |           
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | <div role="navigation" aria-label="breadcrumbs navigation">
132 | 
133 |   <ul class="wy-breadcrumbs">
134 |     
135 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
136 |         
137 |       <li>fastHan.model.model module</li>
138 |     
139 |     
140 |       <li class="wy-breadcrumbs-aside">
141 |         
142 |             
143 |             <a href="_sources/fastHan.model.model.rst.txt" rel="nofollow"> View page source</a>
144 |           
145 |         
146 |       </li>
147 |     
148 |   </ul>
149 | 
150 |   
151 |   <hr/>
152 | </div>
153 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
154 |            <div itemprop="articleBody">
155 |             
156 |   <div class="section" id="module-fastHan.model.model">
157 | <span id="fasthan-model-model-module"></span><h1>fastHan.model.model module<a class="headerlink" href="#module-fastHan.model.model" title="永久链接至标题">¶</a></h1>
158 | <dl class="class">
159 | <dt id="fastHan.model.model.CharModel">
160 | <em class="property">class </em><code class="descclassname">fastHan.model.model.</code><code class="descname">CharModel</code><span class="sig-paren">(</span><em>embed</em>, <em>label_vocab</em>, <em>pos_idx=31</em>, <em>Parsing_rnn_layers=3</em>, <em>Parsing_arc_mlp_size=500</em>, <em>Parsing_label_mlp_size=100</em>, <em>Parsing_use_greedy_infer=False</em>, <em>encoding_type='bmeso'</em>, <em>embedding_dim=768</em>, <em>dropout=0.1</em>, <em>use_pos_embedding=True</em>, <em>use_average=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/model.html#CharModel"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.model.CharModel" title="永久链接至目标">¶</a></dt>
161 | <dd><p>基类：<code class="xref py py-class docutils literal notranslate"><span class="pre">torch.nn.modules.module.Module</span></code></p>
162 | <dl class="method">
163 | <dt id="fastHan.model.model.CharModel.__init__">
164 | <code class="descname">__init__</code><span class="sig-paren">(</span><em>embed</em>, <em>label_vocab</em>, <em>pos_idx=31</em>, <em>Parsing_rnn_layers=3</em>, <em>Parsing_arc_mlp_size=500</em>, <em>Parsing_label_mlp_size=100</em>, <em>Parsing_use_greedy_infer=False</em>, <em>encoding_type='bmeso'</em>, <em>embedding_dim=768</em>, <em>dropout=0.1</em>, <em>use_pos_embedding=True</em>, <em>use_average=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/model.html#CharModel.__init__"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.model.CharModel.__init__" title="永久链接至目标">¶</a></dt>
165 | <dd><p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p>
166 | </dd></dl>
167 | 
168 | <dl class="method">
169 | <dt id="fastHan.model.model.CharModel.forward">
170 | <code class="descname">forward</code><span class="sig-paren">(</span><em>chars</em>, <em>seq_len</em>, <em>task_class</em>, <em>target</em>, <em>seq_len_for_wordlist=None</em>, <em>dep_head=None</em>, <em>dep_label=None</em>, <em>pos=None</em>, <em>word_lens=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/model.html#CharModel.forward"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.model.CharModel.forward" title="永久链接至目标">¶</a></dt>
171 | <dd></dd></dl>
172 | 
173 | <dl class="method">
174 | <dt id="fastHan.model.model.CharModel.predict">
175 | <code class="descname">predict</code><span class="sig-paren">(</span><em>chars</em>, <em>seq_len</em>, <em>task_class</em>, <em>tag_seqs=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/model.html#CharModel.predict"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.model.CharModel.predict" title="永久链接至目标">¶</a></dt>
176 | <dd></dd></dl>
177 | 
178 | </dd></dl>
179 | 
180 | </div>
181 | 
182 | 
183 |            </div>
184 |            
185 |           </div>
186 |           <footer>
187 |   
188 | 
189 |   <hr/>
190 | 
191 |   <div role="contentinfo">
192 |     <p>
193 |         
194 |         &copy; 版权所有 2020, fastHan
195 | 
196 |     </p>
197 |   </div>
198 |     
199 |     
200 |     
201 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
202 |     
203 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
204 |     
205 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
206 | 
207 | </footer>
208 | 
209 |         </div>
210 |       </div>
211 | 
212 |     </section>
213 | 
214 |   </div>
215 |   
216 | 
217 |   <script type="text/javascript">
218 |       jQuery(function () {
219 |           SphinxRtdTheme.Navigation.enable(true);
220 |       });
221 |   </script>
222 | 
223 |   
224 |   
225 |     
226 |    
227 | 
228 | </body>
229 | </html>


--------------------------------------------------------------------------------
/fastHan/model/finetune_dataloader.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import numpy as np
  3 | 
  4 | from fastNLP import DataSet
  5 | 
  6 | MAX_LEN = 300
  7 | 
  8 | 
  9 | def fastHan_CWS_Loader(lines, label_to_index, tokenizer):
 10 |     data = {
 11 |         'input_ids': [],
 12 |         'attention_mask': [],
 13 |         'labels': [],
 14 |         'seq_len': [],
 15 |     }
 16 | 
 17 |     for line in lines:
 18 |         line = line.strip()
 19 |         if len(line) <= 1:
 20 |             continue
 21 | 
 22 |         line = line.split(' ')
 23 | 
 24 |         words = []
 25 |         labels = []
 26 |         for word in line:
 27 |             if len(word) == 0:
 28 |                 continue
 29 |             words = words + list(word)
 30 |             if len(word) == 1:
 31 |                 labels.append('s')
 32 |             else:
 33 |                 labels = labels + ['b'] + ['m'] * (len(word) - 2) + ['e']
 34 | 
 35 |         words = words[:MAX_LEN]
 36 |         labels = labels[:MAX_LEN]
 37 |         labels = [-100] + [label_to_index[x] for x in labels] + [-100]
 38 | 
 39 |         tokenize_result = tokenizer(words, is_split_into_words=True)
 40 | 
 41 |         if len(tokenize_result['input_ids']) != len(labels):
 42 |             continue
 43 | 
 44 |         data['input_ids'].append(tokenize_result['input_ids'])
 45 |         data['attention_mask'].append(tokenize_result['attention_mask'])
 46 |         data['labels'].append(labels)
 47 |         data['seq_len'].append(len(labels) - 2)
 48 | 
 49 |     return DataSet(data)
 50 | 
 51 | 
 52 | def fastHan_POS_loader(lines, label_to_index, tokenizer):
 53 |     data = {
 54 |         'input_ids': [],
 55 |         'attention_mask': [],
 56 |         'labels': [],
 57 |         'seq_len': [],
 58 |     }
 59 | 
 60 |     words = []
 61 |     labels = []
 62 | 
 63 |     for line in lines:
 64 |         if line == '\n' and len(words) > 0:
 65 |             words = words[:MAX_LEN]
 66 |             labels = labels[:MAX_LEN]
 67 |             labels = [-100] + [label_to_index[x] for x in labels] + [-100]
 68 | 
 69 |             tokenize_result = tokenizer(words, is_split_into_words=True)
 70 |             if len(tokenize_result['input_ids']) == len(labels):
 71 |                 data['input_ids'].append(tokenize_result['input_ids'])
 72 |                 data['attention_mask'].append(
 73 |                     tokenize_result['attention_mask'])
 74 |                 data['labels'].append(labels)
 75 |                 data['seq_len'].append(len(labels) - 2)
 76 |             words = []
 77 |             labels = []
 78 |         else:
 79 |             line = line.strip()
 80 |             line = line.split('\t')
 81 |             word = line[1]
 82 |             label = line[3].lower()
 83 | 
 84 |             words = words + list(word)
 85 |             if len(word) == 1:
 86 |                 labels.append('s-' + label)
 87 |             else:
 88 |                 labels = labels + [
 89 |                     'b-' + label
 90 |                 ] + ['m-' + label] * (len(word) - 2) + ['e-' + label]
 91 |     return DataSet(data)
 92 | 
 93 | 
 94 | # def fastHan_CWS_guwen_Loader():
 95 | #     pass
 96 | 
 97 | 
 98 | def fastHan_POS_guwen_loader(lines, label_to_index, tokenizer):
 99 |     data = {'input_ids': [], 'attention_mask': [], 'labels': [], 'seq_len': []}
100 | 
101 |     for line in lines:
102 |         line = line.strip()
103 |         if len(line) <= 1:
104 |             continue
105 | 
106 |         line = line.split(' ')
107 | 
108 |         words = []
109 |         labels = []
110 |         for word_label in line:
111 |             if len(word_label) == 0:
112 |                 continue
113 | 
114 |             word, label = word_label.split('/')
115 |             words = words + list(word)
116 |             if len(word) == 1:
117 |                 labels.append('s-' + label)
118 |             else:
119 |                 labels = labels + [
120 |                     'b-' + label
121 |                 ] + ['m-' + label] * (len(word) - 2) + ['e-' + label]
122 | 
123 |         words = words[:MAX_LEN]
124 |         labels = labels[:MAX_LEN]
125 |         labels = [-100] + [label_to_index[x] for x in labels] + [-100]
126 | 
127 |         tokenize_result = tokenizer(words, is_split_into_words=True)
128 | 
129 |         if len(tokenize_result['input_ids']) != len(labels):
130 |             continue
131 | 
132 |         data['input_ids'].append(tokenize_result['input_ids'])
133 |         data['attention_mask'].append(tokenize_result['attention_mask'])
134 |         data['labels'].append(labels)
135 |         data['seq_len'].append(len(labels) - 2)
136 | 
137 |     return DataSet(data)
138 | 
139 | 
140 | def fastHan_NER_Loader(lines, label_to_index, tokenizer):
141 |     data = {
142 |         'input_ids': [],
143 |         'attention_mask': [],
144 |         'labels': [],
145 |         'seq_len': [],
146 |     }
147 | 
148 |     words = []
149 |     labels = []
150 | 
151 |     for line in lines:
152 |         if line == '\n' and len(words) > 0:
153 |             words = words[:MAX_LEN]
154 |             labels = labels[:MAX_LEN]
155 |             labels = [-100] + [label_to_index[x] for x in labels] + [-100]
156 | 
157 |             tokenize_result = tokenizer(words, is_split_into_words=True)
158 |             if len(tokenize_result['input_ids']) == len(labels):
159 |                 data['input_ids'].append(tokenize_result['input_ids'])
160 |                 data['attention_mask'].append(
161 |                     tokenize_result['attention_mask'])
162 |                 data['labels'].append(labels)
163 |                 data['seq_len'].append(len(labels) - 2)
164 |             words = []
165 |             labels = []
166 |         else:
167 |             line = line.strip()
168 |             word, label = line.split(' ')
169 |             label = label.lower()
170 | 
171 |             words.append(word)
172 |             labels.append(label)
173 | 
174 |     return DataSet(data)
175 | 
176 | 
177 | def fastHan_Parsing_Loader(lines, label_to_index, tokenizer):
178 |     data = {
179 |         'input_ids': [],
180 |         'attention_mask': [],
181 |         'heads': [],
182 |         'labels': [],
183 |         'seq_len': []
184 |     }
185 | 
186 |     words = []
187 |     heads = []
188 |     labels = []
189 | 
190 |     skip_1 = 0
191 |     skip_2 = 0
192 |     for line in lines:
193 |         if line == '\n' and len(words) > 0:
194 |             char_words = list(itertools.chain(*words))
195 |             if len(char_words) > 300:
196 |                 skip_1 += 1
197 |                 words = []
198 |                 heads = []
199 |                 labels = []
200 |                 continue
201 |             tokenize_result = tokenizer(char_words, is_split_into_words=True)
202 |             if len(tokenize_result['input_ids']) - 2 != len(char_words):
203 |                 skip_2 += 1
204 |                 words = []
205 |                 heads = []
206 |                 labels = []
207 |                 continue
208 | 
209 |             # 添加根结点
210 |             tokenize_result['input_ids'].insert(1, 1)
211 |             tokenize_result['attention_mask'].insert(1, 1)
212 | 
213 |             head_end_indexes = np.cumsum(list(map(len, words))).tolist() + [0]
214 |             char_index = 1
215 | 
216 |             char_heads = []
217 |             char_labels = []
218 | 
219 |             for word, head, label in zip(words, heads, labels):
220 |                 for _ in range(len(word) - 1):
221 |                     char_index += 1
222 |                     char_heads.append(char_index)
223 |                     char_labels.append('app')
224 |                 char_index += 1
225 |                 char_heads.append(head_end_indexes[head - 1])
226 |                 char_labels.append(label)
227 | 
228 |             # 根节点的label都是-100
229 |             labels = [-100] + [label_to_index[x] for x in char_labels]
230 |             char_heads = [-100] + char_heads
231 | 
232 |             data['input_ids'].append(tokenize_result['input_ids'])
233 |             data['attention_mask'].append(tokenize_result['attention_mask'])
234 |             data['labels'].append(labels)
235 |             data['heads'].append(char_heads)
236 |             data['seq_len'].append(len(labels))
237 | 
238 |             words = []
239 |             heads = []
240 |             labels = []
241 |         else:
242 |             line = line.strip()
243 |             line = line.split('\t')
244 | 
245 |             word = line[1]
246 |             head = line[6].lower()
247 |             label = line[7].lower()
248 | 
249 |             words.append(word)
250 |             labels.append(label)
251 |             heads.append(int(head))
252 |     return DataSet(data)


--------------------------------------------------------------------------------
/fastHan/model/baseModel.py:
--------------------------------------------------------------------------------
  1 | # from multitask_model_normloss2
  2 | # 使用 wm来管理
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from fastNLP.modules.torch import MLP, ConditionalRandomField, allowed_transitions
  8 | from torch.nn import CrossEntropyLoss
  9 | from transformers import PreTrainedModel
 10 | 
 11 | from .dependency_parsing_model import BertParser
 12 | 
 13 | 
 14 | # modified from https://github.com/THUDM/P-tuning-v2
 15 | class PrefixEncoder(torch.nn.Module):
 16 |     r'''
 17 |     The torch.nn model to encode the prefix
 18 |     Input shape: (batch-size, prefix-length)
 19 |     Output shape: (batch-size, prefix-length, 2*layers*hidden)
 20 |     '''
 21 |     def __init__(self,
 22 |                  config,
 23 |                  num_tokens,
 24 |                  prefix_projection,
 25 |                  pre_seq_len,
 26 |                  prefix_hidden_size=500):
 27 |         super().__init__()
 28 |         self.prefix_projection = prefix_projection
 29 |         if self.prefix_projection:
 30 |             # Use a two-layer MLP to encode the prefix
 31 |             self.embedding = torch.nn.Embedding(num_tokens, config.hidden_size)
 32 |             self.trans = torch.nn.Sequential(
 33 |                 torch.nn.Linear(config.hidden_size, prefix_hidden_size),
 34 |                 torch.nn.Tanh(),
 35 |                 torch.nn.Linear(
 36 |                     prefix_hidden_size,
 37 |                     config.num_hidden_layers * 2 * config.hidden_size))
 38 |         else:
 39 |             self.embedding = torch.nn.Embedding(
 40 |                 num_tokens, config.num_hidden_layers * 2 * config.hidden_size)
 41 | 
 42 |     def forward(self, prefix: torch.Tensor):
 43 |         if self.prefix_projection:
 44 |             prefix_tokens = self.embedding(prefix)
 45 |             past_key_values = self.trans(prefix_tokens)
 46 |         else:
 47 |             past_key_values = self.embedding(prefix)
 48 |         return past_key_values
 49 | 
 50 | 
 51 | class MultiTaskModel(PreTrainedModel):
 52 |     def __init__(self,
 53 |                  encoder,
 54 |                  task_label_map,
 55 |                  all_tasks,
 56 |                  ensembledWeightManager,
 57 |                  prefix_projection=False,
 58 |                  pre_seq_len=6,
 59 |                  biaffine_task='Parsing-ctb9'):
 60 |         super().__init__(encoder.config)
 61 | 
 62 |         self.all_tasks = all_tasks
 63 |         self.task_label_map = task_label_map
 64 |         self.ensembledWeightManager = ensembledWeightManager
 65 |         # sequence_labeling
 66 |         self.biaffine_task = biaffine_task
 67 |         self.seq_label_classifier = nn.ModuleDict()
 68 |         self.crf = nn.ModuleDict()
 69 | 
 70 |         for task in task_label_map:
 71 |             if task == self.biaffine_task:
 72 |                 continue
 73 | 
 74 |             label_num = len(task_label_map[task])
 75 |             labels = {i: task_label_map[task][i] for i in range(label_num)}
 76 | 
 77 |             self.seq_label_classifier[task] = MLP(
 78 |                 [encoder.config.hidden_size, 512, label_num])
 79 |             self.crf[task] = ConditionalRandomField(
 80 |                 num_tags=label_num,
 81 |                 allowed_transitions=allowed_transitions(labels))
 82 |             self.crf[task].trans_m.data *= 0
 83 | 
 84 |         self.parser = BertParser(
 85 |             num_label=len(task_label_map[self.biaffine_task]),
 86 |             embed_size=encoder.config.hidden_size,
 87 |             app_index=task_label_map[self.biaffine_task].index('app'))
 88 |         self.dropout = nn.Dropout(encoder.config.hidden_dropout_prob)
 89 |         self.encoder = encoder
 90 | 
 91 |         # prefix tuning
 92 |         self.pre_seq_len = pre_seq_len
 93 |         self.build_prefix_map()
 94 |         self.prefix_encoder = PrefixEncoder(
 95 |             config=encoder.config,
 96 |             prefix_projection=prefix_projection,
 97 |             num_tokens=self.num_tokens,
 98 |             pre_seq_len=self.pre_seq_len)
 99 | 
100 |     def build_prefix_map(self):
101 |         length = self.pre_seq_len // 2
102 |         idx = 0
103 |         prefix_map = dict()
104 |         macro_map = dict()
105 |         for task in self.all_tasks:
106 |             macro_task, _ = task.split('-')
107 |             if macro_task not in macro_map:
108 |                 macro_map[macro_task] = [idx + i for i in range(length)]
109 |                 idx += length
110 | 
111 |         for task in self.all_tasks:
112 |             macro_task, _ = task.split('-')
113 |             prefix_map[task] = torch.LongTensor(
114 |                 macro_map[macro_task] + [idx + i for i in range(length)])
115 |             idx += length
116 |         self.prefix_map = prefix_map
117 |         self.num_tokens = idx
118 | 
119 |     def get_prompt(self, task, batch_size):
120 |         prefix_tokens = self.prefix_map[task]
121 |         prefix_tokens = prefix_tokens.unsqueeze(0).expand(batch_size, -1).to(
122 |             self.encoder.device)
123 |         past_key_values = self.prefix_encoder(prefix_tokens)
124 | 
125 |         past_key_values = past_key_values.view(
126 |             batch_size, self.pre_seq_len,
127 |             self.encoder.config.num_hidden_layers * 2,
128 |             self.encoder.config.num_attention_heads,
129 |             self.encoder.config.hidden_size //
130 |             self.encoder.config.num_attention_heads)
131 |         past_key_values = self.dropout(past_key_values)
132 |         past_key_values = past_key_values.permute([2, 0, 3, 1, 4]).split(2)
133 |         return past_key_values
134 | 
135 |     def __get_ud_diff(self, feats, tag_seqs, user_dict_weight):
136 |         diff = torch.max(feats, dim=2)[0] - torch.mean(feats, dim=2)
137 |         diff = diff.unsqueeze(dim=-1)
138 |         diff = diff.expand(-1, -1, tag_seqs.size()[-1])
139 |         diff = tag_seqs * diff * user_dict_weight
140 |         return diff
141 | 
142 |     def forward(
143 |         self,
144 |         input_ids=None,
145 |         attention_mask=None,
146 |         task=None,
147 |         labels=None,
148 |         heads=None,
149 |         tag_seqs=None,
150 |         user_dict_weight=0.05,
151 |     ):
152 |         r"""
153 |         labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
154 |             Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
155 |             1]``.
156 |         """
157 | 
158 |         task = task.tolist()[0]
159 |         task = self.all_tasks[task]
160 |     
161 |         batch_size = input_ids.shape[0]
162 |         past_key_values = self.get_prompt(task=task, batch_size=batch_size)
163 |         prefix_attention_mask = torch.ones(batch_size, self.pre_seq_len).to(
164 |             self.encoder.device)
165 |         prefix_attention_mask = torch.cat(
166 |             (prefix_attention_mask, attention_mask), dim=1)
167 | 
168 |         outputs = self.encoder(input_ids,
169 |                                attention_mask=prefix_attention_mask,
170 |                                past_key_values=past_key_values)
171 | 
172 |         feats = outputs[0]
173 |         feats = self.dropout(feats)
174 | 
175 |         # 生成不考虑cls和sep的mask
176 |         #if task==self.biaffine_task:
177 |         seq_len_diff = 2
178 |         #else:
179 |         #    seq_len_diff=1
180 |         seq_len = attention_mask.sum(dim=-1) - seq_len_diff
181 |         broad_cast_seq_len = torch.arange(attention_mask.shape[1] -
182 |                                           seq_len_diff).expand(
183 |                                               attention_mask.shape[0],
184 |                                               -1).to(seq_len.device)
185 |         mask = broad_cast_seq_len < seq_len.unsqueeze(1)
186 | 
187 |         # dependency parsing
188 |         # 需要去掉cls和sep的影响
189 |         if task == self.biaffine_task:
190 |             feats = feats[:, 1:-1]
191 |             output = self.parser.forward(feats=feats,
192 |                                          mask=mask,
193 |                                          gold_heads=heads,
194 |                                          char_labels=labels)
195 |         # 其他序列标注任务
196 |         else:
197 |             logits = self.seq_label_classifier[task](feats)
198 |             if self.training:
199 |                 loss_fct = CrossEntropyLoss()
200 |                 loss = loss_fct(
201 |                     logits.view(-1, len(self.task_label_map[task])),
202 |                     labels.view(-1))
203 |                 output = {
204 |                     'loss': loss,
205 |                     'logits': logits,
206 |                 }
207 |             else:
208 |                 # 预测阶段，利用crf模块中集成的维特比解码来预测
209 |                 probs = logits[:, 1:-1]
210 |                 if tag_seqs is not None:
211 |                     diff = self.__get_ud_diff(probs, tag_seqs, user_dict_weight)
212 |                     probs = probs + diff
213 |                 paths, scores = self.crf[task].viterbi_decode(logits=probs,
214 |                                                               mask=mask)
215 |                 paths[mask == 0] = -100
216 |                 output = {
217 |                     'pred': paths,
218 |                     'logits': logits,
219 |                 }
220 | 
221 |         if self.training:
222 |             if task == self.biaffine_task:
223 |                 loss_weight = output['label_loss']
224 |             else:
225 |                 loss_weight = output['loss']
226 |             self.ensembledWeightManager.update(task=task,
227 |                                                loss=float(loss_weight))
228 |             weight = self.ensembledWeightManager.get(task)
229 |             output['loss'] = output['loss'] * weight
230 | 
231 |         return output


--------------------------------------------------------------------------------
/docs/build/html/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({docnames:["fastHan","fastHan.FastModel","fastHan.model","fastHan.model.BertCharParser","fastHan.model.CharParser","fastHan.model.UserDict","fastHan.model.bert","fastHan.model.bert_encoder_theseus","fastHan.model.model","fastHan.model.old_fastNLP_bert","fastHan.model.utils","index","user/example"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["fastHan.rst","fastHan.FastModel.rst","fastHan.model.rst","fastHan.model.BertCharParser.rst","fastHan.model.CharParser.rst","fastHan.model.UserDict.rst","fastHan.model.bert.rst","fastHan.model.bert_encoder_theseus.rst","fastHan.model.model.rst","fastHan.model.old_fastNLP_bert.rst","fastHan.model.utils.rst","index.rst","user/example.rst"],objects:{"":{fastHan:[0,0,0,"-"]},"fastHan.FastHan":{__init__:[0,2,1,""],add_user_dict:[0,2,1,""],remove_user_dict:[0,2,1,""],set_cws_style:[0,2,1,""],set_device:[0,2,1,""],set_user_dict_weight:[0,2,1,""]},"fastHan.FastModel":{FastHan:[1,1,1,""],Sentence:[1,1,1,""],Token:[1,1,1,""]},"fastHan.FastModel.FastHan":{__init__:[1,2,1,""],add_user_dict:[1,2,1,""],remove_user_dict:[1,2,1,""],set_cws_style:[1,2,1,""],set_device:[1,2,1,""],set_user_dict_weight:[1,2,1,""]},"fastHan.FastModel.Sentence":{__init__:[1,2,1,""]},"fastHan.FastModel.Token":{__init__:[1,2,1,""]},"fastHan.Sentence":{__init__:[0,2,1,""]},"fastHan.Token":{__init__:[0,2,1,""]},"fastHan.model":{BertCharParser:[3,0,0,"-"],CharParser:[4,0,0,"-"],UserDict:[5,0,0,"-"],bert:[6,0,0,"-"],bert_encoder_theseus:[7,0,0,"-"],model:[8,0,0,"-"],old_fastNLP_bert:[9,0,0,"-"],utils:[10,0,0,"-"]},"fastHan.model.BertCharParser":{BertCharParser:[3,1,1,""],CharBiaffineParser:[3,1,1,""],drop_input_independent:[3,4,1,""]},"fastHan.model.BertCharParser.BertCharParser":{__init__:[3,2,1,""],forward:[3,2,1,""],predict:[3,2,1,""]},"fastHan.model.BertCharParser.CharBiaffineParser":{__init__:[3,2,1,""],forward:[3,2,1,""],loss:[3,3,1,""],reset_parameters:[3,2,1,""]},"fastHan.model.CharParser":{CharBiaffineParser:[4,1,1,""],CharParser:[4,1,1,""],drop_input_independent:[4,4,1,""]},"fastHan.model.CharParser.CharBiaffineParser":{__init__:[4,2,1,""],forward:[4,2,1,""],loss:[4,3,1,""],predict:[4,2,1,""],reset_parameters:[4,2,1,""]},"fastHan.model.CharParser.CharParser":{__init__:[4,2,1,""],forward:[4,2,1,""],predict:[4,2,1,""]},"fastHan.model.UserDict":{Trie:[5,1,1,""],UserDict:[5,1,1,""]},"fastHan.model.UserDict.Trie":{__init__:[5,2,1,""],insert:[5,2,1,""],search:[5,2,1,""],startsWith:[5,2,1,""]},"fastHan.model.UserDict.UserDict":{__init__:[5,2,1,""],load_file:[5,2,1,""],load_list:[5,2,1,""],process_sentence:[5,2,1,""]},"fastHan.model.bert":{BertEmbedding:[6,1,1,""],BertWordPieceEncoder:[6,1,1,""]},"fastHan.model.bert.BertEmbedding":{__init__:[6,2,1,""],drop_word:[6,2,1,""],forward:[6,2,1,""]},"fastHan.model.bert.BertWordPieceEncoder":{__init__:[6,2,1,""],drop_word:[6,2,1,""],embed_size:[6,5,1,""],embedding_dim:[6,5,1,""],forward:[6,2,1,""],index_datasets:[6,2,1,""],num_embedding:[6,5,1,""]},"fastHan.model.bert_encoder_theseus":{BertModel:[7,1,1,""]},"fastHan.model.bert_encoder_theseus.BertModel":{__init__:[7,2,1,""],forward:[7,2,1,""],from_pretrained:[7,6,1,""],init_bert_weights:[7,2,1,""]},"fastHan.model.model":{CharModel:[8,1,1,""]},"fastHan.model.model.CharModel":{__init__:[8,2,1,""],forward:[8,2,1,""],predict:[8,2,1,""]},"fastHan.model.old_fastNLP_bert":{BertModel:[9,1,1,""]},"fastHan.model.old_fastNLP_bert.BertModel":{__init__:[9,2,1,""],forward:[9,2,1,""],from_pretrained:[9,6,1,""],init_bert_weights:[9,2,1,""]},"fastHan.model.utils":{check_dataloader_paths:[10,4,1,""],get_tokenizer:[10,4,1,""]},fastHan:{FastHan:[0,1,1,""],FastModel:[1,0,0,"-"],Sentence:[0,1,1,""],Token:[0,1,1,""],model:[2,0,0,"-"]}},objnames:{"0":["py","module","Python \u6a21\u5757"],"1":["py","class","Python \u7c7b"],"2":["py","method","Python \u65b9\u6cd5"],"3":["py","staticmethod","Python \u9759\u6001\u65b9\u6cd5"],"4":["py","function","Python \u51fd\u6570"],"5":["py","attribute","Python \u5c5e\u6027"],"6":["py","classmethod","Python \u7c7b\u65b9\u6cd5"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:staticmethod","4":"py:function","5":"py:attribute","6":"py:classmethod"},terms:{"02":[7,9],"05":[0,1],"10":[0,1,6],"100":[3,4,8],"12":[6,7,9],"2304":6,"30522":[7,9],"3072":[7,9],"31":8,"400":4,"430":6,"500":[3,4,8],"510":6,"512":[6,7,9],"768":[6,7,8,9],"800":4,"char":[3,4,8],"class":[0,1,3,4,5,6,7,8,9],"default":[3,4],"float":[0,1,6,7,9,12],"for":[0,1,5,6],"if":[3,4,5],"import":6,"in":[5,6],"int":[0,1,7,9],"long":12,"return":[3,4,5],"static":[3,4],"true":[6,7,8,9,12],"var":[3,4],"void":5,"with":[5,12],__init__:[0,1,3,4,5,6,7,8,9],accur:[0,1,5],add_cls_sep:6,add_user_dict:[0,1],add_word_lst:6,an:12,and:[3,4,7,8,9],ani:5,answer_list:[0,1],app_index:4,appli:12,arc_mlp_siz:[3,4],arc_pr:[3,4],arc_tru:[3,4],as:[0,1],attent:[7,9],attention_mask:[7,9],attention_probs_dropout_prob:[7,9],auto_trunc:6,avg:6,base:[0,1,6,7,9],batch:12,batch_siz:[3,4,6],bert:[0,1,2,7,9],bert_encoder_theseu:[0,2],bertcharpars:[0,2],bertembed:6,bertmodel:[7,9],bertpool:6,bertwordpieceencod:6,beta:12,biaffine_pars:[3,4],biaffinepars:[3,4],bidirect:[7,9],bigram:4,bigram_vocab_s:4,bin:6,bmeso:8,bodi:12,bool:[5,6],both:[3,4,7,8,9],by:[3,4,7,8,9],cell:12,char_head:[3,4],char_label:[3,4],char_vocab_s:4,charbiaffinepars:[3,4],charmodel:[0,1,8],charpars:[0,2],check_dataloader_path:10,cityu:[0,1],classmethod:[7,9],cls:6,cnc:[0,1],column:12,com:[7,9],comput:[3,4],concat:6,config:[7,9],contain:12,contextual_embed:6,contextualembed:6,core:6,corpu:[0,1],cpu:[0,1],ctb:[0,1],cuda:[0,1],cws:[0,1],data:5,dataload:10,dataset:[6,12],delta:12,dep_head:8,dep_label:8,depend:[0,1],dev:[0,1,10],devic:[0,1],device_id:[0,1],dic:[0,1],dict:[3,4,10],doc:12,docutil:12,drop_input_independ:[3,4],drop_word:6,dropout:[3,4,6,7,8,9],dropout_emb:[3,4],element:12,emb:[3,4,6,8],emb_dim:4,embed:[3,4,6,7,9],embed_s:6,embedding_dim:[3,4,6,8],emphas:12,emphasi:12,en:6,encod:[3,4,5,6],encoding_typ:8,english:[7,9],exampl:6,f_:12,f_beta:12,fals:[3,4,5,6,8,12],fastmodel:0,fastnlp:[3,4,6,12],feat:3,ffn:[7,9],field_nam:6,file:12,first:[6,12],floattensor:6,forward:[3,4,6,7,8,9],frac:12,from:[6,7,9],from_pretrain:[7,9],gelu:[7,9],get_token:10,github:[7,9],given:5,gold_head:[3,4],good:6,gpu:[0,1],gradient:6,h_2:12,h_2o:12,head:[0,1,3,4],head_label:[0,1],head_pr:[3,4],header:12,help:[0,1,5],here:5,hidden_act:[7,9],hidden_dropout_prob:[7,9],hidden_s:[7,9],html:12,http:[7,9,12],huggingfac:[7,9],include_cls_sep:6,index:10,index_dataset:6,infer:[3,4],init_bert_weight:[7,9],initi:[0,1,3,4,5,7,8,9],initializer_rang:[7,9],inlin:12,input:[6,7,9,12],input_id:[7,9],insert:5,instanc:12,intermediate_s:[7,9],intern:[3,4,7,8,9],into:5,is:[3,4,5,6,12],json:6,kei:10,kwarg:[7,9],label_mlp_s:[3,4],label_pr:[3,4],label_tru:[3,4],label_vocab:8,larg:[0,1],last:6,layer:6,layer_num:[6,7],layers_cut:9,len:[6,12],line:12,lineno:12,list:[0,1,6],liter:12,load_fil:5,load_list:5,longtensor:6,loss:[3,4],lstm:[3,4],mai:12,markdown:12,mask:[3,4],math:12,max:6,max_len:[3,4,6],max_position_embed:[7,9],mlp:[3,4],model:0,model_dir_or_nam:[6,7,9],model_typ:[0,1],modul:[0,2],msr:[0,1],mst:[3,4],n_tag:[3,4],na:12,naoh:12,ner:[0,1],net:12,ngram:[3,4],ngram_per_cha:4,ngram_per_char:[3,4],nn:[3,4,6,7,8,9],none:[0,1,3,4,6,7,8,9],not:[3,4],num_attention_head:[7,9],num_embed:[3,4,6],num_hidden_lay:[7,9],num_label:[3,4],object:[0,1,5],of:[3,4],old_fastnlp_bert:[0,2],option:12,or:12,output:[6,12],output_all_encoded_lay:[7,9],pad:6,param:[0,1,3,4,12],pars:[0,1,3,4],parsing_arc_mlp_s:8,parsing_label_mlp_s:8,parsing_rnn_lay:8,parsing_use_greedy_inf:8,part:[3,4],path:[5,10],piec:6,pku:[0,1],pool_method:6,pooled_cl:6,pos:[0,1,8],pos_emb_dim:[3,4],pos_idx:8,pos_vocab_s:[3,4],pre:12,pre_bigram:4,pre_bigrams_emb:4,pre_char:[3,4],pre_chars_emb:4,pre_ngram:[3,4],pre_trigram:4,pre_trigrams_emb:4,predict:[3,4,8],prefix:5,pretrain:[7,9],print:12,process_sent:5,provid:[3,4],pytorch:[7,9],quickref:12,rais:10,rec:12,regular:6,remove_user_dict:[0,1],represent:[7,9],requires_grad:6,reset_paramet:[3,4],result:[3,4],revers:5,rnn:[3,4],rnn_hidden_s:[3,4],rnn_layer:[3,4],root:[0,1,3,4],row:12,rst:12,rtype:5,same:12,scriptmodul:[3,4,7,8,9],search:5,second:12,see:[0,1,5],segment:[7,9],self:[0,1,5,6],sentenc:[0,1,5,12],sep:6,seq_len:[3,4,8],seq_len_for_wordlist:8,set:[0,1],set_cws_styl:[0,1],set_devic:[0,1],set_user_dict_weight:[0,1],sf:12,share:[3,4,7,8,9],signatur:[0,1,5],size:6,sohu:12,some:10,sourceforg:12,span:12,speech:[3,4],split:6,stackembed:6,start:5,startswith:5,state:[3,4,7,8,9],str:[0,1,5,6,7,9,10],strong:12,structur:5,style:[0,1],sxu:[0,1],tabl:12,tag:6,tag_seq:8,target:[0,1,8,12],task_class:8,test:[0,1,10],text:12,that:5,the:[3,4,5,6,7,9,12],there:5,thi:12,third:12,to:10,to_index:6,token:[0,1,6],token_type_id:[6,7,9],torch:[0,1,3,4,6,7,8,9],train:10,transform:[3,4,7,9],trie:5,trigram:4,trigram_vocab_s:4,tupl:[3,4],txt:[6,10],type:[0,1,5],type_vocab_s:[7,9],udc:[0,1],uncas:[6,7,9],undocu:[7,9],union:10,unk:6,unknown_index:6,uparrow:12,use_averag:8,use_greedy_inf:[3,4],use_pos_embed:8,user:12,userdict:[0,2],utf:5,util:[0,2],valu:[3,4,6,10,12],vector_s:3,vocab:6,vocab_s:[7,9],vocabulari:6,weight:[0,1,7,9],whether:6,word:[0,1,5,6],word_dropout:6,word_embed:[3,4],word_len:8,word_list:5,word_piec:6,wtb:[0,1],xxx:10,your:5,zx:[0,1]},titles:["fastHan package","fastHan.FastModel module","fastHan.model package","fastHan.model.BertCharParser module","fastHan.model.CharParser module","fastHan.model.UserDict module","fastHan.model.bert module","fastHan.model.bert_encoder_theseus module","fastHan.model.model module","fastHan.model.old_fastNLP_bert module","fastHan.model.utils module","demo \u4e2d\u6587\u6587\u6863","\u5927\u6807\u9898"],titleterms:{api:11,bert:6,bert_encoder_theseu:7,bertcharpars:3,charpars:4,csv:12,demo:11,fasthan:[0,1,2,3,4,5,6,7,8,9,10],fastmodel:1,model:[2,3,4,5,6,7,8,9,10],modul:[1,3,4,5,6,7,8,9,10],old_fastnlp_bert:9,packag:[0,2],submodul:[0,2],subpackag:0,userdict:5,util:10}})


--------------------------------------------------------------------------------
/fastHan/model/camr_restore.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | 
  4 | def restore_camr(line, id_token_list):
  5 |     # 预处理内容
  6 |     line = re.sub('（', '(', line)
  7 |     line = re.sub('）', ')', line)
  8 |     line = re.sub('：', ':', line)
  9 |     time_list = re.findall('\d+\s*\:\s*\d+', line)
 10 |     line = re.sub('"', '', line)
 11 |     for s in time_list:
 12 |         ss = re.sub(':', '：', s)
 13 |         line = re.sub(s, ss, line)
 14 | 
 15 |     idx = 0
 16 |     amr_list = []
 17 | 
 18 |     new_mark = len(id_token_list) + 2
 19 |     node_dict = {}
 20 |     node_name_list = []
 21 | 
 22 |     def convert_node_value(node_value, is_bracket1=False):
 23 |         nonlocal id_token_list, node_name_list
 24 |         if not node_value:
 25 |             return node_value
 26 |         if '^' not in node_value:
 27 |             node_name = search_mark(node_value.split('-')[0])
 28 |             if not is_bracket1:
 29 |                 node_name_list.append(node_name)
 30 |             return node_name + '/' + node_value
 31 |         else:
 32 |             node_value1, node_value2 = node_value.split('^')[0:2]
 33 |             node_name1 = search_mark(node_value1.split('-')[0])
 34 |             if int(node_name1[1:]) > len(id_token_list):
 35 |                 node_name1 = 'x1'
 36 |             node_name2 = search_mark(node_value2.split('-')[0])
 37 |             if not node_name2 in node_name_list:
 38 |                 node_name2 = node_name_list[0]
 39 |             return node_name1 + '/' + node_name2
 40 | 
 41 |     def search_mark(node_value):
 42 |         nonlocal id_token_list, node_dict, new_mark
 43 |         node_name = node_dict.get(node_value, 0)
 44 |         if node_name > len(id_token_list):
 45 |             node_name = new_mark
 46 |             new_mark += 1
 47 |             node_dict[node_value] = node_name
 48 |             return 'x' + str(node_name)
 49 | 
 50 |         i = node_name + 1
 51 |         while i <= len(id_token_list):
 52 |             if id_token_list[i] == node_value:
 53 |                 break
 54 |             i += 1
 55 |         if i > len(id_token_list):
 56 |             j = 1
 57 |             while j <= node_name:
 58 |                 if id_token_list[j] == node_value:
 59 |                     break
 60 |                 j += 1
 61 |             if node_name >= 1 and j <= node_name:
 62 |                 node_name = j
 63 |         else:
 64 |             node_name = i
 65 | 
 66 |         if node_name <= 0:
 67 |             node_name = new_mark
 68 |             new_mark += 1
 69 | 
 70 |         node_dict[node_value] = node_name
 71 |         return 'x' + str(node_name)
 72 | 
 73 |     # 递归处理部分
 74 |     def bracket1():
 75 |         nonlocal idx
 76 |         nonlocal amr_list
 77 |         cur_charseq = []
 78 |         cur_idx = idx
 79 |         has_content = False
 80 |         while True:
 81 |             if cur_idx >= len(line):
 82 |                 break
 83 |             if line[cur_idx] == '(' and not has_content:
 84 |                 has_content = True
 85 |             if has_content:
 86 |                 if line[cur_idx] == ':':
 87 |                     cur_idx = idx
 88 |                     cur_charseq = []
 89 |                     break
 90 |                 if line[cur_idx] == '(' and cur_charseq:
 91 |                     break
 92 |                 cur_charseq.append(line[cur_idx])
 93 |                 if cur_charseq[-1] == ')':
 94 |                     has_content = False
 95 |                     cur_idx += 1
 96 |                     break
 97 |             cur_idx += 1
 98 | 
 99 |         idx = cur_idx
100 |         if not cur_charseq:
101 |             amr_list.append('(')
102 |             amr_list.append(')')
103 |             cur_node_value = ''
104 |         elif cur_charseq[-1] != ')':
105 |             cur_node_value = get_seq_value(cur_charseq[1:])
106 |             amr_list.append('(')
107 |             amr_list.append(convert_node_value(cur_node_value, True))
108 |             amr_list.append(')')
109 |         else:
110 |             cur_node_value = get_seq_value(cur_charseq[1:-1])
111 |             amr_list.append('(')
112 |             amr_list.append(convert_node_value(cur_node_value, True))
113 |             amr_list.append(')')
114 |         pass
115 | 
116 |     def bracket2():
117 |         nonlocal idx
118 |         nonlocal amr_list
119 |         nonlocal id_token_list
120 |         cur_charseq = []
121 |         cur_idx = idx
122 | 
123 |         # 寻找左括号
124 |         while cur_idx < len(line):
125 |             if line[cur_idx] == '(':
126 |                 break
127 |             cur_idx += 1
128 |         if cur_idx < len(line):
129 |             amr_list.append('(')
130 |         else:
131 |             amr_list.append('(')
132 |             amr_list.append(convert_node_value('-'))
133 |             amr_list.append(')')
134 |             idx = cur_idx
135 |             return
136 | 
137 |         # 寻找变量名
138 |         cur_idx += 1
139 |         while cur_idx < len(line):
140 |             if line[cur_idx] == ')' or line[cur_idx] == ':':
141 |                 break
142 |             if line[cur_idx] == '(':
143 |                 break
144 |             cur_charseq.append(line[cur_idx])
145 |             cur_idx += 1
146 |         cur_node_value = get_seq_value(cur_charseq)
147 |         if not cur_node_value:
148 |             cur_node_value = '-'
149 |         amr_list.append(convert_node_value(cur_node_value))
150 |         cur_charseq.clear()
151 | 
152 |         if cur_node_value == 'name':
153 |             name_idx = len(amr_list) - 1
154 |             n_value_list = []
155 |             while cur_idx < len(line) and line[cur_idx] != ')':
156 |                 cur_charseq.append(line[cur_idx])
157 |                 cur_idx += 1
158 |             cur_charseq.append(':')
159 |             l = r = 0
160 |             r_value = ''
161 |             for i, ch in enumerate(cur_charseq):
162 |                 if ch == ':':
163 |                     l = i
164 |                     n_value = get_seq_value(cur_charseq[r:l])
165 |                     if not n_value:
166 |                         n_value = '-'
167 |                     if r_value:
168 |                         amr_list.append(' ')
169 |                         amr_list.append(r_value)
170 |                         amr_list.append(' ')
171 |                         n_value = convert_node_value(n_value, True)
172 |                         if n_value.split('/')[-1].startswith('x'):
173 |                             n_value = n_value.split(
174 |                                 '/')[0] + '/' + id_token_list[int(
175 |                                     n_value.split('/')[0][1:])]
176 |                         amr_list.append(n_value)
177 |                         n_value_list.append(n_value.split('/')[0])
178 |                         n_value = r_value = ""
179 |                 elif ch.isspace() and i > 0 and cur_charseq[i - 1].isdigit():
180 |                     r = i
181 |                     r_value = get_seq_value(cur_charseq[l:r])
182 |                     if not r_value:
183 |                         r_value = 'op1'
184 |                     r_value = ':' + r_value
185 |             cur_idx += 1
186 |             amr_list.append(')')
187 |             idx = cur_idx
188 |             if n_value_list:
189 |                 amr_list[name_idx] = '_'.join(n_value_list) + '/' + 'name'
190 |             return
191 | 
192 |         # 寻找关系名
193 |         def relation():
194 |             nonlocal cur_idx, idx
195 |             cur_relationseq = []
196 |             relation_content = False
197 |             while cur_idx < len(line):
198 |                 if line[cur_idx] == ')':
199 |                     return relation_content
200 |                 if line[cur_idx] == ':':
201 |                     break
202 |                 cur_idx += 1
203 | 
204 |             if cur_idx >= len(line):
205 |                 return relation_content
206 | 
207 |             while cur_idx < len(line):
208 |                 if line[cur_idx] == ':':
209 |                     relation_content = True
210 |                     cur_relationseq = []
211 |                 if relation_content:
212 |                     if line[cur_idx] == '(':
213 |                         cur_relation_value = get_seq_value(cur_relationseq)
214 |                         amr_list.append(' ')
215 |                         amr_list.append(':' + cur_relation_value)
216 |                         idx = cur_idx
217 |                         bracket1()
218 |                         bracket2()
219 |                         cur_idx = idx
220 |                         break
221 |                     elif line[cur_idx] == ')':
222 |                         cur_relation_value = get_seq_value(cur_relationseq)
223 |                         amr_list.append(' ')
224 |                         amr_list.append(':' + cur_relation_value)
225 |                         amr_list.append('()(')
226 |                         amr_list.append(convert_node_value('-'))
227 |                         amr_list.append(')')
228 |                         cur_idx += 1
229 |                         break
230 |                     cur_relationseq.append(line[cur_idx])
231 |                 cur_idx += 1
232 |             idx = cur_idx
233 |             return relation_content
234 | 
235 |         while True:
236 |             if not relation():
237 |                 break
238 | 
239 |         while cur_idx < len(line):
240 |             if line[cur_idx] == ')':
241 |                 break
242 |             cur_idx += 1
243 |         amr_list.append(')')
244 |         cur_idx = cur_idx + 1
245 |         idx = cur_idx
246 |         return
247 | 
248 |     bracket2()
249 |     return amr_list
250 | 
251 | 
252 | def get_seq_value(cur_charseq):
253 |     for idx, ch in enumerate(cur_charseq):
254 |         if ch.isspace():
255 |             cur_charseq[idx] = ''
256 |     s = ''.join(cur_charseq)
257 |     s = re.sub(':', '', s)
258 |     s = re.sub('\(', '', s)
259 |     s = re.sub('\)', '', s)
260 |     return s
261 | 
262 | 
263 | def convert_camr_to_lines(amr):
264 |     amr_list = amr.split(':')
265 |     for i, line in enumerate(amr_list):
266 |         if i == 0:
267 |             continue
268 |         amr_list[i] = ':' + line
269 | 
270 |     num = 0
271 |     for i, line in enumerate(amr_list):
272 |         if '(' not in line:
273 |             continue
274 |         amr_list[i] = '\t' * num + line
275 |         num = num + line.count('(') - line.count(')')
276 | 
277 |     for i, line in enumerate(amr_list):
278 |         if i == 0:
279 |             continue
280 |         if '(' in line:
281 |             amr_list[i] = '\n' + line
282 | 
283 |     return ''.join(amr_list) + '\n'


--------------------------------------------------------------------------------
/docs/build/html/fastHan.model.UserDict.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html4" lang="zh-CN" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>fastHan.model.UserDict module &mdash; fastHan 0.5.0 文档</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 | 
 17 |   
 18 |   
 19 |   
 20 |   
 21 | 
 22 |   
 23 |   <!--[if lt IE 9]>
 24 |     <script src="_static/js/html5shiv.min.js"></script>
 25 |   <![endif]-->
 26 |   
 27 |     
 28 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 29 |         <script type="text/javascript" src="_static/jquery.js"></script>
 30 |         <script type="text/javascript" src="_static/underscore.js"></script>
 31 |         <script type="text/javascript" src="_static/doctools.js"></script>
 32 |         <script type="text/javascript" src="_static/language_data.js"></script>
 33 |         <script type="text/javascript" src="_static/translations.js"></script>
 34 |         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <link rel="index" title="索引" href="genindex.html" />
 40 |     <link rel="search" title="搜索" href="search.html" /> 
 41 | </head>
 42 | 
 43 | <body class="wy-body-for-nav">
 44 | 
 45 |    
 46 |   <div class="wy-grid-for-nav">
 47 |     
 48 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 49 |       <div class="wy-side-scroll">
 50 |         <div class="wy-side-nav-search" >
 51 |           
 52 | 
 53 |           
 54 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> fastHan
 55 |           
 56 | 
 57 |           
 58 |           </a>
 59 | 
 60 |           
 61 |             
 62 |             
 63 |               <div class="version">
 64 |                 0.5.0
 65 |               </div>
 66 |             
 67 |           
 68 | 
 69 |           
 70 | <div role="search">
 71 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 72 |     <input type="text" name="q" placeholder="Search docs" />
 73 |     <input type="hidden" name="check_keywords" value="yes" />
 74 |     <input type="hidden" name="area" value="default" />
 75 |   </form>
 76 | </div>
 77 | 
 78 |           
 79 |         </div>
 80 | 
 81 |         
 82 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 83 |           
 84 |             
 85 |             
 86 |               
 87 |             
 88 |             
 89 |               <ul>
 90 | <li class="toctree-l1"><a class="reference internal" href="user/example.html"> 语法样例</a></li>
 91 | </ul>
 92 | 
 93 |             
 94 |           
 95 |         </div>
 96 |         
 97 |       </div>
 98 |     </nav>
 99 | 
100 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
101 | 
102 |       
103 |       <nav class="wy-nav-top" aria-label="top navigation">
104 |         
105 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
106 |           <a href="index.html">fastHan</a>
107 |         
108 |       </nav>
109 | 
110 | 
111 |       <div class="wy-nav-content">
112 |         
113 |         <div class="rst-content">
114 |         
115 |           
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | <div role="navigation" aria-label="breadcrumbs navigation">
132 | 
133 |   <ul class="wy-breadcrumbs">
134 |     
135 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
136 |         
137 |       <li>fastHan.model.UserDict module</li>
138 |     
139 |     
140 |       <li class="wy-breadcrumbs-aside">
141 |         
142 |             
143 |             <a href="_sources/fastHan.model.UserDict.rst.txt" rel="nofollow"> View page source</a>
144 |           
145 |         
146 |       </li>
147 |     
148 |   </ul>
149 | 
150 |   
151 |   <hr/>
152 | </div>
153 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
154 |            <div itemprop="articleBody">
155 |             
156 |   <div class="section" id="module-fastHan.model.UserDict">
157 | <span id="fasthan-model-userdict-module"></span><h1>fastHan.model.UserDict module<a class="headerlink" href="#module-fastHan.model.UserDict" title="永久链接至标题">¶</a></h1>
158 | <dl class="class">
159 | <dt id="fastHan.model.UserDict.Trie">
160 | <em class="property">class </em><code class="descclassname">fastHan.model.UserDict.</code><code class="descname">Trie</code><a class="reference internal" href="_modules/fastHan/model/UserDict.html#Trie"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.Trie" title="永久链接至目标">¶</a></dt>
161 | <dd><p>基类：<code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
162 | <dl class="method">
163 | <dt id="fastHan.model.UserDict.Trie.__init__">
164 | <code class="descname">__init__</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#Trie.__init__"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.Trie.__init__" title="永久链接至目标">¶</a></dt>
165 | <dd><p>Initialize your data structure here.</p>
166 | </dd></dl>
167 | 
168 | <dl class="method">
169 | <dt id="fastHan.model.UserDict.Trie.insert">
170 | <code class="descname">insert</code><span class="sig-paren">(</span><em>word</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#Trie.insert"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.Trie.insert" title="永久链接至目标">¶</a></dt>
171 | <dd><p>Inserts a word into the trie.
172 | :type word: str
173 | :rtype: void</p>
174 | </dd></dl>
175 | 
176 | <dl class="method">
177 | <dt id="fastHan.model.UserDict.Trie.search">
178 | <code class="descname">search</code><span class="sig-paren">(</span><em>word</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#Trie.search"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.Trie.search" title="永久链接至目标">¶</a></dt>
179 | <dd><p>Returns if the word is in the trie.
180 | :type word: str
181 | :rtype: bool</p>
182 | </dd></dl>
183 | 
184 | <dl class="method">
185 | <dt id="fastHan.model.UserDict.Trie.startsWith">
186 | <code class="descname">startsWith</code><span class="sig-paren">(</span><em>prefix</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#Trie.startsWith"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.Trie.startsWith" title="永久链接至目标">¶</a></dt>
187 | <dd><p>Returns if there is any word in the trie that starts with the given prefix.
188 | :type prefix: str
189 | :rtype: bool</p>
190 | </dd></dl>
191 | 
192 | </dd></dl>
193 | 
194 | <dl class="class">
195 | <dt id="fastHan.model.UserDict.UserDict">
196 | <em class="property">class </em><code class="descclassname">fastHan.model.UserDict.</code><code class="descname">UserDict</code><a class="reference internal" href="_modules/fastHan/model/UserDict.html#UserDict"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.UserDict" title="永久链接至目标">¶</a></dt>
197 | <dd><p>基类：<code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
198 | <dl class="method">
199 | <dt id="fastHan.model.UserDict.UserDict.__init__">
200 | <code class="descname">__init__</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#UserDict.__init__"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.UserDict.__init__" title="永久链接至目标">¶</a></dt>
201 | <dd><p>Initialize self.  See help(type(self)) for accurate signature.</p>
202 | </dd></dl>
203 | 
204 | <dl class="method">
205 | <dt id="fastHan.model.UserDict.UserDict.load_file">
206 | <code class="descname">load_file</code><span class="sig-paren">(</span><em>path</em>, <em>encoding='UTF-8'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#UserDict.load_file"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.UserDict.load_file" title="永久链接至目标">¶</a></dt>
207 | <dd></dd></dl>
208 | 
209 | <dl class="method">
210 | <dt id="fastHan.model.UserDict.UserDict.load_list">
211 | <code class="descname">load_list</code><span class="sig-paren">(</span><em>word_list</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#UserDict.load_list"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.UserDict.load_list" title="永久链接至目标">¶</a></dt>
212 | <dd></dd></dl>
213 | 
214 | <dl class="method">
215 | <dt id="fastHan.model.UserDict.UserDict.process_sentence">
216 | <code class="descname">process_sentence</code><span class="sig-paren">(</span><em>sentence</em>, <em>reverse=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/fastHan/model/UserDict.html#UserDict.process_sentence"><span class="viewcode-link">[源代码]</span></a><a class="headerlink" href="#fastHan.model.UserDict.UserDict.process_sentence" title="永久链接至目标">¶</a></dt>
217 | <dd></dd></dl>
218 | 
219 | </dd></dl>
220 | 
221 | </div>
222 | 
223 | 
224 |            </div>
225 |            
226 |           </div>
227 |           <footer>
228 |   
229 | 
230 |   <hr/>
231 | 
232 |   <div role="contentinfo">
233 |     <p>
234 |         
235 |         &copy; 版权所有 2020, fastHan
236 | 
237 |     </p>
238 |   </div>
239 |     
240 |     
241 |     
242 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
243 |     
244 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
245 |     
246 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
247 | 
248 | </footer>
249 | 
250 |         </div>
251 |       </div>
252 | 
253 |     </section>
254 | 
255 |   </div>
256 |   
257 | 
258 |   <script type="text/javascript">
259 |       jQuery(function () {
260 |           SphinxRtdTheme.Navigation.enable(true);
261 |       });
262 |   </script>
263 | 
264 |   
265 |   
266 |     
267 |    
268 | 
269 | </body>
270 | </html>


--------------------------------------------------------------------------------