├── requirements.txt ├── test ├── __init__.py └── core │ ├── __init__.py │ └── test_fastHan.py ├── fastHan ├── model │ ├── __init__.py │ ├── camrModel.py │ ├── multitask_metric_base.py │ ├── weight_manager.py │ ├── UserDict.py │ ├── camr_finetune_dataloader.py │ ├── metrics.py │ ├── dependency_parsing_model.py │ ├── finetune_dataloader.py │ ├── baseModel.py │ └── camr_restore.py └── __init__.py ├── docs ├── requirements.txt ├── build │ ├── html │ │ ├── objects.inv │ │ ├── _static │ │ │ ├── file.png │ │ │ ├── minus.png │ │ │ ├── plus.png │ │ │ ├── fonts │ │ │ │ ├── FontAwesome.otf │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-normal.woff │ │ │ │ ├── Lato │ │ │ │ │ ├── lato-bold.eot │ │ │ │ │ ├── lato-bold.ttf │ │ │ │ │ ├── lato-bold.woff │ │ │ │ │ ├── lato-bold.woff2 │ │ │ │ │ ├── lato-italic.eot │ │ │ │ │ ├── lato-italic.ttf │ │ │ │ │ ├── lato-italic.woff │ │ │ │ │ ├── lato-regular.eot │ │ │ │ │ ├── lato-regular.ttf │ │ │ │ │ ├── lato-italic.woff2 │ │ │ │ │ ├── lato-regular.woff │ │ │ │ │ ├── lato-regular.woff2 │ │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ │ └── lato-bolditalic.woff2 │ │ │ │ ├── lato-normal.woff2 │ │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ │ ├── Roboto-Slab-Thin.woff │ │ │ │ ├── lato-bold-italic.woff │ │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ │ ├── Roboto-Slab-Light.woff │ │ │ │ ├── Roboto-Slab-Light.woff2 │ │ │ │ ├── Roboto-Slab-Thin.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── lato-bold-italic.woff2 │ │ │ │ ├── lato-normal-italic.woff │ │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ ├── lato-normal-italic.woff2 │ │ │ │ └── RobotoSlab │ │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── css │ │ │ │ ├── fonts │ │ │ │ │ ├── lato-bold.woff │ │ │ │ │ ├── lato-bold.woff2 │ │ │ │ │ ├── lato-normal.woff │ │ │ │ │ ├── lato-normal.woff2 │ │ │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ │ │ ├── lato-bold-italic.woff │ │ │ │ │ ├── lato-bold-italic.woff2 │ │ │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ │ ├── lato-normal-italic.woff │ │ │ │ │ ├── lato-normal-italic.woff2 │ │ │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ │ │ └── fontawesome-webfont.woff2 │ │ │ │ └── badge_only.css │ │ │ ├── documentation_options.js │ │ │ ├── js │ │ │ │ ├── badge_only.js │ │ │ │ ├── html5shiv.min.js │ │ │ │ ├── html5shiv-printshiv.min.js │ │ │ │ └── theme.js │ │ │ ├── translations.js │ │ │ └── pygments.css │ │ ├── _images │ │ │ └── procedures.PNG │ │ ├── _sources │ │ │ ├── fastHan.FastModel.rst.txt │ │ │ ├── fastHan.model.bert.rst.txt │ │ │ ├── fastHan.model.model.rst.txt │ │ │ ├── fastHan.model.utils.rst.txt │ │ │ ├── fastHan.model.UserDict.rst.txt │ │ │ ├── fastHan.model.CharParser.rst.txt │ │ │ ├── fastHan.model.BertCharParser.rst.txt │ │ │ ├── fastHan.model.old_fastNLP_bert.rst.txt │ │ │ ├── fastHan.model.bert_encoder_theseus.rst.txt │ │ │ ├── fastHan.rst.txt │ │ │ ├── index.rst.txt │ │ │ ├── fastHan.model.rst.txt │ │ │ └── user │ │ │ │ └── example.rst.txt │ │ ├── .buildinfo │ │ ├── search.html │ │ ├── _modules │ │ │ └── index.html │ │ ├── index.html │ │ ├── fastHan.model.html │ │ ├── fastHan.model.utils.html │ │ ├── py-modindex.html │ │ ├── fastHan.model.model.html │ │ ├── searchindex.js │ │ └── fastHan.model.UserDict.html │ └── doctrees │ │ ├── index.doctree │ │ ├── fastHan.doctree │ │ ├── environment.pickle │ │ ├── fastHan.model.doctree │ │ ├── user │ │ └── example.doctree │ │ ├── fastHan.FastModel.doctree │ │ ├── fastHan.model.bert.doctree │ │ ├── fastHan.model.model.doctree │ │ ├── fastHan.model.utils.doctree │ │ ├── fastHan.model.UserDict.doctree │ │ ├── fastHan.model.CharParser.doctree │ │ ├── fastHan.model.BertCharParser.doctree │ │ ├── fastHan.model.old_fastNLP_bert.doctree │ │ └── fastHan.model.bert_encoder_theseus.doctree ├── source │ ├── modules.rst │ ├── figures │ │ └── procedures.PNG │ ├── fastHan.FastModel.rst │ ├── fastHan.model.bert.rst │ ├── fastHan.model.model.rst │ ├── fastHan.model.utils.rst │ ├── fastHan.model.UserDict.rst │ ├── fastHan.model.CharParser.rst │ ├── fastHan.model.BertCharParser.rst │ ├── fastHan.model.old_fastNLP_bert.rst │ ├── fastHan.model.bert_encoder_theseus.rst │ ├── fastHan.rst │ ├── user │ │ ├── version.rst │ │ ├── installation.rst │ │ ├── example.rst │ │ └── quickstart.rst │ ├── fastHan.model.rst │ ├── index.rst │ └── conf.py ├── Makefile └── doc_utils.py ├── .vscode └── settings.json ├── setup.py ├── .Jenkinsfile └── .gitignore /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fastHan/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fastHan/model/camrModel.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme -------------------------------------------------------------------------------- /fastHan/__init__.py: -------------------------------------------------------------------------------- 1 | from .FastModel import FastHan 2 | from .FastCamrModel import FastCAMR -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "C:\\Users\\gzc\\Anaconda3\\python.exe" 3 | } -------------------------------------------------------------------------------- /docs/build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/objects.inv -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | fastHan 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | fastHan 8 | -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.doctree -------------------------------------------------------------------------------- /docs/source/figures/procedures.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/source/figures/procedures.PNG -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/html/_images/procedures.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_images/procedures.PNG -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/user/example.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/user/example.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.FastModel.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.FastModel.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.bert.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.bert.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.model.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.model.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.utils.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.utils.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.UserDict.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.UserDict.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Thin.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Thin.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.CharParser.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.CharParser.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Light.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Light.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Light.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Light.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Thin.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Thin.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.BertCharParser.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.BertCharParser.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.old_fastNLP_bert.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.old_fastNLP_bert.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/build/doctrees/fastHan.model.bert_encoder_theseus.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/doctrees/fastHan.model.bert_encoder_theseus.doctree -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastnlp/fastHan/HEAD/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/source/fastHan.FastModel.rst: -------------------------------------------------------------------------------- 1 | fastHan.FastModel module 2 | ======================== 3 | 4 | .. automodule:: fastHan.FastModel 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.bert.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.bert module 2 | ========================= 3 | 4 | .. automodule:: fastHan.model.bert 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.model.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.model module 2 | ========================== 3 | 4 | .. automodule:: fastHan.model.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.utils.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.utils module 2 | ========================== 3 | 4 | .. automodule:: fastHan.model.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.FastModel.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.FastModel module 2 | ======================== 3 | 4 | .. automodule:: fastHan.FastModel 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.UserDict.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.UserDict module 2 | ============================= 3 | 4 | .. automodule:: fastHan.model.UserDict 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.bert.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.bert module 2 | ========================= 3 | 4 | .. automodule:: fastHan.model.bert 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.model.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.model module 2 | ========================== 3 | 4 | .. automodule:: fastHan.model.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.utils.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.utils module 2 | ========================== 3 | 4 | .. automodule:: fastHan.model.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.CharParser.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.CharParser module 2 | =============================== 3 | 4 | .. automodule:: fastHan.model.CharParser 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.UserDict.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.UserDict module 2 | ============================= 3 | 4 | .. automodule:: fastHan.model.UserDict 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.CharParser.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.CharParser module 2 | =============================== 3 | 4 | .. automodule:: fastHan.model.CharParser 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.BertCharParser.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.BertCharParser module 2 | =================================== 3 | 4 | .. automodule:: fastHan.model.BertCharParser 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.old_fastNLP_bert.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.old\_fastNLP\_bert module 2 | ======================================= 3 | 4 | .. automodule:: fastHan.model.old_fastNLP_bert 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.BertCharParser.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.BertCharParser module 2 | =================================== 3 | 4 | .. automodule:: fastHan.model.BertCharParser 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: d710ae17477069bdfb78e0364e07f8a0 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.old_fastNLP_bert.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.old\_fastNLP\_bert module 2 | ======================================= 3 | 4 | .. automodule:: fastHan.model.old_fastNLP_bert 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.bert_encoder_theseus.rst: -------------------------------------------------------------------------------- 1 | fastHan.model.bert\_encoder\_theseus module 2 | =========================================== 3 | 4 | .. automodule:: fastHan.model.bert_encoder_theseus 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.bert_encoder_theseus.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model.bert\_encoder\_theseus module 2 | =========================================== 3 | 4 | .. automodule:: fastHan.model.bert_encoder_theseus 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/fastHan.rst: -------------------------------------------------------------------------------- 1 | fastHan package 2 | =============== 3 | 4 | .. automodule:: fastHan 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Subpackages 10 | ----------- 11 | 12 | .. toctree:: 13 | 14 | fastHan.model 15 | 16 | Submodules 17 | ---------- 18 | 19 | .. toctree:: 20 | 21 | fastHan.FastModel 22 | 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan package 2 | =============== 3 | 4 | .. automodule:: fastHan 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Subpackages 10 | ----------- 11 | 12 | .. toctree:: 13 | 14 | fastHan.model 15 | 16 | Submodules 17 | ---------- 18 | 19 | .. toctree:: 20 | 21 | fastHan.FastModel 22 | 23 | -------------------------------------------------------------------------------- /docs/build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.5.0', 4 | LANGUAGE: 'zh_CN', 5 | COLLAPSE_INDEX: false, 6 | FILE_SUFFIX: '.html', 7 | HAS_SOURCE: true, 8 | SOURCELINK_SUFFIX: '.txt', 9 | NAVIGATION_WITH_KEYS: false, 10 | }; -------------------------------------------------------------------------------- /docs/source/user/version.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | 版本更新 3 | =============== 4 | 5 | .. contents:: 6 | :local: 7 | 8 | fastHan的版本修正如下问题:: 9 | 10 | 1.1版本的fastHan与0.5.5版本的fastNLP会导致importerror。如果使用1.1版本的fastHan,请使用0.5.0版本的fastNLP。 11 | 1.2版本的fastHan修复了fastNLP版本兼容问题。小于等于1.2版本的fastHan在输入句子的首尾包含空格、换行符时会产生BUG。如果字符串首尾包含上述字符,请使用strip函数处理输入字符串。 12 | 1.3版本的fastHan自动对输入字符串做strip函数处理。 13 | 1.4版本的fastHan加入用户词典功能(仅限于分词任务) -------------------------------------------------------------------------------- /docs/source/user/installation.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | 安装指南 3 | =============== 4 | 5 | .. contents:: 6 | :local: 7 | 8 | fastHan 依赖如下包:: 9 | 10 | torch>=1.0.0 11 | fastNLP>=0.5.0 12 | 13 | .. note:: 14 | 15 | 其中torch的安装可能与操作系统及 CUDA 的版本相关,请参见 `PyTorch 官网 `_ 。 16 | 此外,如果使用0.5.0版本的fastNLP,建议使用1.0.0版本的torch,否则在解码阶段会有bug影响准确率。如果使用高版本的torch,请使用0.5.5版本的fastNLP。 17 | 18 | .. code:: shell 19 | 20 | >>> pip install fastHan 21 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | demo 中文文档 2 | ===================== 3 | 4 | 5 | 用户手册 6 | ---------------- 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | 语法样例 12 | 13 | API 文档 14 | ------------- 15 | 16 | 除了用户手册之外,你还可以通过查阅 API 文档来找到你所需要的工具。 17 | 18 | .. toctree:: 19 | :titlesonly: 20 | :maxdepth: 2 21 | 22 | fastHan 23 | 24 | 25 | 索引与搜索 26 | ================== 27 | 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /docs/source/fastHan.model.rst: -------------------------------------------------------------------------------- 1 | fastHan.model package 2 | ===================== 3 | 4 | .. automodule:: fastHan.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Submodules 10 | ---------- 11 | 12 | .. toctree:: 13 | 14 | fastHan.model.BertCharParser 15 | fastHan.model.CharParser 16 | fastHan.model.UserDict 17 | fastHan.model.bert 18 | fastHan.model.bert_encoder_theseus 19 | fastHan.model.model 20 | fastHan.model.old_fastNLP_bert 21 | fastHan.model.utils 22 | 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/fastHan.model.rst.txt: -------------------------------------------------------------------------------- 1 | fastHan.model package 2 | ===================== 3 | 4 | .. automodule:: fastHan.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Submodules 10 | ---------- 11 | 12 | .. toctree:: 13 | 14 | fastHan.model.BertCharParser 15 | fastHan.model.CharParser 16 | fastHan.model.UserDict 17 | fastHan.model.bert 18 | fastHan.model.bert_encoder_theseus 19 | fastHan.model.model 20 | fastHan.model.old_fastNLP_bert 21 | fastHan.model.utils 22 | 23 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | fastHan 中文文档 2 | ===================== 3 | 4 | 5 | 用户手册 6 | ---------------- 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | 安装指南 12 | 版本更新 13 | 快速入门 14 | 15 | API 文档 16 | ------------- 17 | 18 | 除了用户手册之外,你还可以通过查阅 API 文档来找到你所需要的工具。 19 | 20 | .. toctree:: 21 | :titlesonly: 22 | :maxdepth: 2 23 | 24 | fastHan 25 | 26 | 27 | 索引与搜索 28 | ================== 29 | 30 | * :ref:`genindex` 31 | * :ref:`modindex` 32 | * :ref:`search` 33 | 34 | 该项目在github的地址为:https://github.com/fastnlp/fastHan -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from setuptools import setup, find_packages 5 | 6 | with open('README.md', encoding='utf-8') as f: 7 | readme = f.read() 8 | 9 | with open('requirements.txt', encoding='utf-8') as f: 10 | reqs = f.read() 11 | 12 | pkgs = [p for p in find_packages()] 13 | print(pkgs) 14 | 15 | setup( 16 | name='fastHan', 17 | version='2.0', 18 | url='https://github.com/fastnlp/fastHan', 19 | description=( 20 | '使用深度学习联合模型,解决中文分词、词性标注、命名实体识别、依存分析任务。' 21 | ), 22 | long_description=readme, 23 | long_description_content_type='text/markdown', 24 | author='王鹏宇', 25 | license='Apache License', 26 | python_requires='>=3.6', 27 | packages=pkgs, 28 | install_requires=reqs.strip().split('\n'), 29 | ) 30 | 31 | -------------------------------------------------------------------------------- /docs/build/html/_static/js/badge_only.js: -------------------------------------------------------------------------------- 1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}}); -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXAPIDOC = sphinx-apidoc 7 | SPHINXBUILD = sphinx-build 8 | SPHINXPROJ = fastHan 9 | SOURCEDIR = source 10 | BUILDDIR = build 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | apidoc: 17 | $(SPHINXAPIDOC) -efM -o source ../$(SPHINXPROJ) 18 | 19 | server: 20 | cd build/html && python -m http.server 21 | 22 | dev: 23 | rm -f source/$(SPHINXPROJ).* source/modules.rst && rm -rf build && make apidoc && make html && make server 24 | 25 | prod: 26 | make apidoc && make html 27 | 28 | .PHONY: help Makefile 29 | 30 | # Catch-all target: route all unknown targets to Sphinx using the new 31 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 32 | %: Makefile 33 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /test/core/test_fastHan.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from fastHan import FastHan, FastCAMR 3 | 4 | 5 | class TestFastHan(unittest.TestCase 6 | ): # 继承了unittest.TestCase这样可以直接利用unittest的一些function 7 | def test_init(self): 8 | # 测试是否可以正确initialize 9 | 10 | model = FastHan() 11 | camr_model = FastCAMR() 12 | 13 | model = FastHan(url="/remote-home/pywang/finetuned_model") 14 | camr_model = FastCAMR(url="/remote-home/pywang/finetuned_camr_model") 15 | 16 | def test_call(self): 17 | 18 | sentence = [ 19 | '一行人下得山来,走不多时,忽听前面猛兽大吼之声一阵阵的传来。', 20 | '韩宝驹一提缰,胯下黄马向前窜出,奔了一阵,忽地立定,不论如何催迫,黄马只是不动。', 21 | '韩宝驹心知有异,远远望去,只见前面围了一群人,有几头猎豹在地上乱抓乱扒。' 22 | '他知坐骑害怕豹子,跃下马来,抽出金龙鞭握在手中。' 23 | ] 24 | 25 | targets = ['CWS', 'POS', 'CWS-guwen', 'POS-guwen', 'NER', 'Parsing'] 26 | model = FastHan() 27 | for target in targets: 28 | model(sentence, target) 29 | 30 | 31 | model = FastCAMR() 32 | model.set_device('cuda:0') 33 | 34 | sentence = "这样 的 活动 还 有 什么 意义 呢 ?" 35 | answer = model(sentence) 36 | print(answer) 37 | for ans in answer: 38 | print(ans) 39 | -------------------------------------------------------------------------------- /.Jenkinsfile: -------------------------------------------------------------------------------- 1 | pipeline { 2 | agent { 3 | docker { 4 | image 'ubuntu_tester' 5 | args '-u root:root -v ${HOME}/html/docs:/docs -v ${HOME}/html/_ci:/ci' 6 | } 7 | } 8 | environment { 9 | PJ_NAME = 'fastHan' 10 | POST_URL = 'https://open.feishu.cn/open-apis/bot/v2/hook/3aa3a3a1-88f2-4c36-853b-21361a8f1234' 11 | } 12 | stages { 13 | stage('Package Installation') { 14 | steps { 15 | sh 'python setup.py install' 16 | } 17 | } 18 | stage('Parallel Stages') { 19 | parallel { 20 | stage('Document Building') { 21 | steps { 22 | sh 'cd docs && make prod' 23 | sh 'rm -rf /docs/${PJ_NAME}' 24 | sh 'mv docs/build/html /docs/${PJ_NAME}' 25 | } 26 | } 27 | stage('Package Testing') { 28 | steps { 29 | sh 'pytest ./test --html=test_results.html --self-contained-html' 30 | } 31 | } 32 | } 33 | } 34 | } 35 | post { 36 | always { 37 | sh 'post' 38 | } 39 | 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | -------------------------------------------------------------------------------- /fastHan/model/multitask_metric_base.py: -------------------------------------------------------------------------------- 1 | from fastNLP.core.metrics import Metric 2 | from fastNLP import SpanFPreRecMetric 3 | import torch 4 | 5 | from .metrics import SegAppCharParseF1Metric,CWSMetric 6 | 7 | class MultiTaskMetric(Metric): 8 | def __init__(self,all_tasks,task_vocab_map,biaffine_task='Parsing-ctb9'): 9 | super().__init__() 10 | self.all_tasks=all_tasks 11 | self.task_vocab_map=task_vocab_map 12 | self.biaffine_task=biaffine_task 13 | self.metrics=dict() 14 | 15 | for task in all_tasks: 16 | if task==biaffine_task: 17 | self.metrics[task]=SegAppCharParseF1Metric(pun_index=task_vocab_map[self.biaffine_task].word2idx['punct']) 18 | continue 19 | self.metrics[task]=SpanFPreRecMetric(tag_vocab=task_vocab_map[task]) 20 | 21 | self.parsing_cws_metric=CWSMetric() 22 | 23 | self.tasks_flag=set() 24 | 25 | def update(self,task,seq_len,labels,pred=None,heads=None,head_preds=None,label_preds=None): 26 | task=task.tolist()[0] 27 | task=self.all_tasks[task] 28 | self.tasks_flag.add(task) 29 | if task==self.biaffine_task: 30 | assert heads is not None 31 | assert head_preds is not None 32 | assert label_preds is not None 33 | self.metrics[task].update(labels,heads,head_preds,label_preds,seq_len) 34 | self.parsing_cws_metric.update(labels,label_preds,seq_len) 35 | else: 36 | assert pred is not None 37 | self.metrics[task].update(pred=pred,target=labels[:,1:],seq_len=seq_len) 38 | 39 | def reset(self): 40 | for task in self.metrics: 41 | self.metrics[task].reset() 42 | 43 | def get_metric(self,reset=True): 44 | scores=dict() 45 | for task in self.tasks_flag: 46 | macro_task,corpus=task.split('-') 47 | if macro_task not in scores: 48 | scores[macro_task]=dict() 49 | scores[macro_task][corpus]=self.metrics[task].get_metric() 50 | 51 | 52 | all_f=[] 53 | for macro_task in scores: 54 | ave_f=sum(map(lambda corpus:scores[macro_task][corpus]['f'],scores[macro_task]))/len(scores[macro_task]) 55 | all_f.append(ave_f) 56 | 57 | scores['avg_f']=sum(all_f)/len(all_f) 58 | if self.biaffine_task in self.tasks_flag: 59 | scores['Parsing']['ctb9-cws']=self.parsing_cws_metric.get_metric(reset=reset) 60 | 61 | scores['all_f']=all_f 62 | if reset: 63 | self.tasks_flag=set() 64 | return scores -------------------------------------------------------------------------------- /docs/build/html/_static/js/html5shiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/build/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} -------------------------------------------------------------------------------- /docs/source/user/example.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | 大标题 3 | ====== 4 | 5 | .. note:: 6 | 中文标题需要符号的数量至少是中文字数的两倍 7 | 8 | .. warning:: 9 | 符号的数量只可以多,不可以少。 10 | 11 | 小标题1 12 | ########### 13 | 14 | 小标题2 15 | ********* 16 | 17 | 小标题3(正常使用) 18 | ======================== 19 | 20 | 小标题4 21 | ------------------- 22 | 23 | 推荐使用大标题、小标题3和小标题4 24 | 25 | 官方文档 http://docutils.sourceforge.net/docs/user/rst/quickref.html 26 | 27 | `熟悉markdown的同学推荐参考这篇文章 `_ 28 | 29 | \<\>内表示的是链接地址,\<\>外的是显示到外面的文字 30 | 31 | 常见语法 32 | ============ 33 | 34 | *emphasis* 35 | 36 | **strong** 37 | 38 | `text` 39 | 40 | ``inline literal`` 41 | 42 | http://docutils.sf.net/ 孤立的网址会自动生成链接 43 | 44 | 显示为特定的文字的链接 `sohu `_ 45 | 46 | 突出显示的 47 | 上面文字 48 | 49 | 正常缩进 50 | 51 | 形成锻炼 52 | 53 | 54 | 55 | 特殊模块 56 | ============ 57 | 58 | 选项会自动识别 59 | 60 | -v An option 61 | -o file Same with value 62 | --delta A long option 63 | --delta=len Same with value 64 | 65 | 66 | 图片 67 | 68 | .. image:: ../figures/procedures.PNG 69 | :height: 200 70 | :width: 560 71 | :scale: 50 72 | :alt: alternate text 73 | :align: center 74 | 75 | 显示一个冒号的代码块:: 76 | 77 | 中间要空一行 78 | 79 | :: 80 | 81 | 不显示冒号的代码块 82 | 83 | .. code-block:: python 84 | 85 | :linenos: 86 | :emphasize-lines: 1,3 87 | 88 | print("专业的代码块") 89 | print("") 90 | print("有行号和高亮") 91 | 92 | 数学块 93 | ========== 94 | 95 | .. math:: 96 | 97 | H_2O + Na = NaOH + H_2 \uparrow 98 | 99 | 复杂表格 100 | ========== 101 | 102 | +------------------------+------------+----------+----------+ 103 | | Header row, column 1 | Header 2 | Header 3 | Header 4 | 104 | | (header rows optional) | | | | 105 | +========================+============+==========+==========+ 106 | | body row 1, column 1 | column 2 | column 3 | column 4 | 107 | +------------------------+------------+----------+----------+ 108 | | body row 2 | Cells may span columns. | 109 | +------------------------+------------+---------------------+ 110 | | body row 3 | Cells may | - Table cells | 111 | +------------------------+ span rows. | - contain | 112 | | body row 4 | | - body elements. | 113 | +------------------------+------------+---------------------+ 114 | 115 | 简易表格 116 | ========== 117 | 118 | ===== ===== ====== 119 | Inputs Output 120 | ------------ ------ 121 | A B A or B 122 | ===== ===== ====== 123 | False False False 124 | True True True 125 | ===== ===== ====== 126 | 127 | csv 表格 128 | ============ 129 | 130 | .. csv-table:: 131 | :header: sentence, target 132 | 133 | This is the first instance ., 0 134 | Second instance ., 1 135 | Third instance ., 1 136 | ..., ... 137 | 138 | 139 | 140 | [重要]各种链接 141 | =================== 142 | 143 | 各种链接帮助我们连接到fastNLP文档的各个位置 144 | 145 | \<\>内表示的是链接地址,\<\>外的是显示到外面的文字 146 | 147 | :doc:`根据文件名链接 ` 148 | 149 | :mod:`~fastNLP.core.batch` 150 | 151 | :class:`~fastNLP.Batch` 152 | 153 | ~表示只显示最后一项 154 | 155 | :meth:`fastNLP.DataSet.apply` 156 | 157 | 下面这个代码是不可行的,必须要用 r""" 才行: 158 | 159 | .. code:: 160 | 161 | :param float beta: f_beta分数, :math:`f_{beta} = \frac{(1 + {beta}^{2})*(pre*rec)}{({beta}^{2}*pre + rec)}` . 常用为 `beta=0.5, 1, 2` 若为0.5则精确率的权重高于召回率;若为1,则两者平等;若为2,则召回率权重高于精确率。 162 | 163 | -------------------------------------------------------------------------------- /docs/build/html/_sources/user/example.rst.txt: -------------------------------------------------------------------------------- 1 | ====== 2 | 大标题 3 | ====== 4 | 5 | .. note:: 6 | 中文标题需要符号的数量至少是中文字数的两倍 7 | 8 | .. warning:: 9 | 符号的数量只可以多,不可以少。 10 | 11 | 小标题1 12 | ########### 13 | 14 | 小标题2 15 | ********* 16 | 17 | 小标题3(正常使用) 18 | ======================== 19 | 20 | 小标题4 21 | ------------------- 22 | 23 | 推荐使用大标题、小标题3和小标题4 24 | 25 | 官方文档 http://docutils.sourceforge.net/docs/user/rst/quickref.html 26 | 27 | `熟悉markdown的同学推荐参考这篇文章 `_ 28 | 29 | \<\>内表示的是链接地址,\<\>外的是显示到外面的文字 30 | 31 | 常见语法 32 | ============ 33 | 34 | *emphasis* 35 | 36 | **strong** 37 | 38 | `text` 39 | 40 | ``inline literal`` 41 | 42 | http://docutils.sf.net/ 孤立的网址会自动生成链接 43 | 44 | 显示为特定的文字的链接 `sohu `_ 45 | 46 | 突出显示的 47 | 上面文字 48 | 49 | 正常缩进 50 | 51 | 形成锻炼 52 | 53 | 54 | 55 | 特殊模块 56 | ============ 57 | 58 | 选项会自动识别 59 | 60 | -v An option 61 | -o file Same with value 62 | --delta A long option 63 | --delta=len Same with value 64 | 65 | 66 | 图片 67 | 68 | .. image:: ../figures/procedures.PNG 69 | :height: 200 70 | :width: 560 71 | :scale: 50 72 | :alt: alternate text 73 | :align: center 74 | 75 | 显示一个冒号的代码块:: 76 | 77 | 中间要空一行 78 | 79 | :: 80 | 81 | 不显示冒号的代码块 82 | 83 | .. code-block:: python 84 | 85 | :linenos: 86 | :emphasize-lines: 1,3 87 | 88 | print("专业的代码块") 89 | print("") 90 | print("有行号和高亮") 91 | 92 | 数学块 93 | ========== 94 | 95 | .. math:: 96 | 97 | H_2O + Na = NaOH + H_2 \uparrow 98 | 99 | 复杂表格 100 | ========== 101 | 102 | +------------------------+------------+----------+----------+ 103 | | Header row, column 1 | Header 2 | Header 3 | Header 4 | 104 | | (header rows optional) | | | | 105 | +========================+============+==========+==========+ 106 | | body row 1, column 1 | column 2 | column 3 | column 4 | 107 | +------------------------+------------+----------+----------+ 108 | | body row 2 | Cells may span columns. | 109 | +------------------------+------------+---------------------+ 110 | | body row 3 | Cells may | - Table cells | 111 | +------------------------+ span rows. | - contain | 112 | | body row 4 | | - body elements. | 113 | +------------------------+------------+---------------------+ 114 | 115 | 简易表格 116 | ========== 117 | 118 | ===== ===== ====== 119 | Inputs Output 120 | ------------ ------ 121 | A B A or B 122 | ===== ===== ====== 123 | False False False 124 | True True True 125 | ===== ===== ====== 126 | 127 | csv 表格 128 | ============ 129 | 130 | .. csv-table:: 131 | :header: sentence, target 132 | 133 | This is the first instance ., 0 134 | Second instance ., 1 135 | Third instance ., 1 136 | ..., ... 137 | 138 | 139 | 140 | [重要]各种链接 141 | =================== 142 | 143 | 各种链接帮助我们连接到fastNLP文档的各个位置 144 | 145 | \<\>内表示的是链接地址,\<\>外的是显示到外面的文字 146 | 147 | :doc:`根据文件名链接 ` 148 | 149 | :mod:`~fastNLP.core.batch` 150 | 151 | :class:`~fastNLP.Batch` 152 | 153 | ~表示只显示最后一项 154 | 155 | :meth:`fastNLP.DataSet.apply` 156 | 157 | 下面这个代码是不可行的,必须要用 r""" 才行: 158 | 159 | .. code:: 160 | 161 | :param float beta: f_beta分数, :math:`f_{beta} = \frac{(1 + {beta}^{2})*(pre*rec)}{({beta}^{2}*pre + rec)}` . 常用为 `beta=0.5, 1, 2` 若为0.5则精确率的权重高于召回率;若为1,则两者平等;若为2,则召回率权重高于精确率。 162 | 163 | -------------------------------------------------------------------------------- /fastHan/model/weight_manager.py: -------------------------------------------------------------------------------- 1 | def convert_cws_macro(all_tasks): 2 | result = dict() 3 | for task in all_tasks: 4 | if task.startswith('CWS'): 5 | result[task] = 'CWS' 6 | else: 7 | result[task] = task 8 | return result 9 | 10 | 11 | class WeightManagerBase(object): 12 | def __init__(self, all_tasks, key_mapper=None): 13 | self.all_tasks = all_tasks 14 | if key_mapper is not None: 15 | self.key_mapper = key_mapper 16 | else: 17 | self.key_mapper = dict([(x, x) for x in all_tasks]) 18 | 19 | def get(self, task): 20 | task = self.key_mapper.get(task) 21 | return self.weight.get(task) 22 | 23 | def update(self, task, loss): 24 | pass 25 | 26 | 27 | class FixedWeightManager(WeightManagerBase): 28 | def __init__(self, all_tasks, weight, key_mapper=None): 29 | super().__init__(all_tasks=all_tasks, key_mapper=key_mapper) 30 | self.weight = weight 31 | 32 | 33 | class QueueWeightManager(WeightManagerBase): 34 | def __init__(self, 35 | all_tasks, 36 | max_steps=1000, 37 | norm_number=1000, 38 | key_mapper=None): 39 | assert norm_number <= max_steps 40 | 41 | super().__init__(all_tasks=all_tasks, key_mapper=key_mapper) 42 | self.max_steps = max_steps 43 | self.norm_number = norm_number 44 | 45 | self.weight = dict([(x, 1) for x in self.key_mapper.values()]) 46 | self.task_loss = dict([(x, []) for x in self.key_mapper.values()]) 47 | 48 | def update(self, task, loss): 49 | task = self.key_mapper.get(task) 50 | current_length = len(self.task_loss.get(task)) 51 | if current_length >= self.max_steps: 52 | return 53 | self.task_loss.get(task).append(loss) 54 | if current_length == self.max_steps - 1: 55 | self.weight[task] = sum(self.task_loss.get(task)) / self.max_steps 56 | 57 | 58 | class MomentumWeightManager(WeightManagerBase): 59 | def __init__(self, all_tasks, beta=0.9, key_mapper=None): 60 | super().__init__(all_tasks=all_tasks, key_mapper=key_mapper) 61 | 62 | self.beta = beta 63 | self.weight = dict([(x, 0) for x in self.key_mapper.values()]) 64 | # 直接维护 beta**step 65 | self.step_norm = dict([(x, 1) for x in self.key_mapper.values()]) 66 | 67 | def update(self, task, loss): 68 | task = self.key_mapper.get(task) 69 | self.step_norm[task] = self.step_norm[task] * self.beta 70 | self.weight[task] = self.weight[task] * self.beta + (1 - 71 | self.beta) * loss 72 | 73 | def get(self, task): 74 | task = self.key_mapper.get(task) 75 | return self.weight.get(task) / (1 - self.step_norm[task]) 76 | 77 | 78 | class EnsembledWeightManagers(object): 79 | def __init__(self, managers): 80 | for manager in managers: 81 | assert isinstance(manager[0], WeightManagerBase) 82 | self.managers = managers 83 | 84 | def get(self, task): 85 | weight = 1 86 | for manager, pow in self.managers: 87 | # print(manager,manager.weight,manager.key_mapper.get(task)) 88 | weight = weight * (manager.get(task)**pow) 89 | return weight 90 | 91 | def update(self, task, loss): 92 | for manager, pow in self.managers: 93 | # print(manager,manager.weight,manager.key_mapper.get(task)) 94 | manager.update(task, loss) 95 | -------------------------------------------------------------------------------- /docs/source/user/quickstart.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | 快速入门 3 | =============== 4 | 5 | 这篇教程可以带你从零开始了解 fastHan 的使用。 6 | 7 | fastHan的使用流程分为如下两步: 8 | 9 | 1.初始化模型 10 | ~~~~~~~~~~~~ 11 | 12 | .. code-block:: python 13 | 14 | from fastHan import FastHan 15 | model=FastHan() 16 | 17 | 此时若用户为首次初始化模型,将自动从服务器中下载参数。 18 | 19 | 模型默认初始化为base,如果使用large版本,可在初始化时加入如下参数: 20 | 21 | .. code-block:: python 22 | 23 | model=FastHan(model_type="large") 24 | 25 | 2.将句子输入模型 26 | ~~~~~~~~~~~~ 27 | 28 | .. code-block:: python 29 | 30 | sentence="郭靖是金庸笔下的一名男主。" 31 | answer=model(sentence,target="Parsing") 32 | print(answer) 33 | answer=model(sentence,target="NER") 34 | print(answer) 35 | 36 | 模型将会输出如下信息: 37 | 38 | .. code-block:: text 39 | 40 | [[['郭靖', 2, 'top', 'NR'], ['是', 0, 'root', 'VC'], ['金庸', 4, 'nn', 'NR'], ['笔', 5, 'lobj', 'NN'], ['下', 10, 'assmod', 'LC'], ['的', 5, 'assm', 'DEG'], ['一', 8, 'nummod', 'CD'], ['名', 10, 'clf', 'M'], ['男', 10, 'amod', 'JJ'], ['主', 2, 'attr', 'NN'], ['。', 2, 'punct', 'PU']]] 41 | [[['郭靖', 'NR'], ['金庸', 'NR']]] 42 | 43 | 此外,模型拥有如下这些功能: 44 | 45 | 任务选择 46 | ~~~~~~~~~~~~ 47 | 48 | target参数可在'Parsing'、'CWS'、'POS'、'NER'四个选项中取值,模型将分别进行依存分析、分词、词性标注、命名实体识别任务,模型默认进行CWS任务。其中词性标注任务包含了分词的信息,而依存分析任务又包含了词性标注任务的信息。命名实体识别任务相较其他任务独立。 49 | 50 | 如果分别运行CWS、POS、Parsing任务,模型输出的分词结果等可能存在冲突。如果想获得不冲突的各类信息,请直接运行包含全部所需信息的那项任务。 51 | 52 | 模型的POS、Parsing任务均使用CTB标签集。NER使用msra标签集。 53 | 54 | 55 | 分词风格 56 | ~~~~~~~~~~~~ 57 | 分词风格,指的是训练模型中文分词模块的10个语料库,模型可以区分这10个语料库,设置分词style为S即令模型认为现在正在处理S语料库的分词。所以分词style实际上是与语料库的覆盖面、分词粒度相关的。如本模型默认的CTB语料库分词粒度较细。如果想切换不同的粒度,可以使用模型的set_cws_style函数,例子如下: 58 | 59 | .. code-block:: python 60 | 61 | sentence="一个苹果。" 62 | print(model(sentence,'CWS')) 63 | model.set_cws_style('cnc') 64 | print(model(sentence,'CWS')) 65 | 66 | 模型将输出如下内容: 67 | 68 | .. code-block:: text 69 | 70 | [['一', '个', '苹果', '。']] 71 | [['一个', '苹果', '。']] 72 | 73 | 对语料库的选取参考了下方CWS SOTA模型的论文,共包括:SIGHAN 2005的 MSR、PKU、AS、CITYU 语料库,由山西大学发布的 SXU 语料库,由斯坦福的CoreNLP 发布的 CTB6 语料库,由国家语委公布的 CNC 语料库,由王威廉先生公开的微博树库 WTB,由张梅山先生公开的诛仙语料库 ZX,Universal Dependencies 项目的 UD 语料库。 74 | 75 | 输入与输出 76 | ~~~~~~~~~~~~ 77 | 输入模型的可以是单独的字符串,也可是由字符串组成的列表。如果输入的是列表,模型将一次性处理所有输入的字符串,所以请自行控制 batch size。 78 | 79 | 模型的输出是在fastHan模块中定义的sentence与token类。模型将输出一个由sentence组成的列表,而每个sentence又由token组成。每个token本身代表一个被分好的词,有pos、head、head_label、ner四项属性,代表了该词的词性、依存关系、命名实体识别信息。 80 | 81 | 一则输入输出的例子如下所示: 82 | 83 | .. code-block:: python 84 | 85 | sentence=["我爱踢足球。","林丹是冠军"] 86 | answer=model(sentence,'Parsing') 87 | for i,sentence in enumerate(answer): 88 | print(i) 89 | for token in sentence: 90 | print(token,token.pos,token.head,token.head_label) 91 | 92 | 模型将输出如下内容: 93 | 94 | .. code-block:: text 95 | 96 | 0 97 | 我 PN 2 nsubj 98 | 爱 VV 0 root 99 | 踢 VV 2 ccomp 100 | 足球 NN 3 dobj 101 | 。 PU 2 punct 102 | 1 103 | 林丹 NR 2 top 104 | 是 VC 0 root 105 | 冠军 NN 2 attr 106 | ! PU 2 punct 107 | 108 | 可在分词风格中选择'as'、'cityu'进行繁体字分词,这两项为繁体语料库。 109 | 110 | 此外,由于各项任务共享词表、词嵌入,即使不切换模型的分词风格,模型对繁体字、英文字母、数字均具有一定识别能力。 111 | 112 | 切换设备 113 | ~~~~~~~~~~~~ 114 | 可使用模型的 set_device 函数,令模型在cuda上运行或切换回cpu,示例如下: 115 | 116 | .. code-block:: python 117 | 118 | model.set_device('cuda:0') 119 | model.set_device('cpu') 120 | 121 | 122 | 词典分词 123 | ~~~~~~~~~~~~ 124 | 用户可以使用模型的 add_user_dict 函数添加自定义词典,该词典会影响模型在分词任务中的权重分配。进行分词任务时,首先利用词典进行正向、反向最大匹配法进行分词,并将词典方法的分词结果乘上权重系数融入到深度学习模型的结果中。该函数的参数可以是由词组成的list,也可以是文件路径(文件中的内容是由'\n'分隔开的词)。 125 | 126 | 用户可使用 set_user_dict_weight 函数设置权重系数(若不设置,默认为0.05)。我们在大规模的训练语料库中发现0.05-0.1即可取得较好的结果。条件允许的情况下,用户也可以自行设置验证集、测试集,找到最适合自己任务的权重系数。 127 | 128 | 添加完用户词典后,需要在调用模型时令 use_dict 参数为True。再次申明,词典功能目前仅在'CWS'任务中有效。 129 | 130 | 用户可调用 remove_user_dict 移除之前添加的用户词典。 131 | 132 | 使用用户词典影响分词的一则例子如下: 133 | 134 | .. code-block:: python 135 | 136 | sentence="奥利奥利奥" 137 | print(model(sentence)) 138 | model.add_user_dict(["奥利","奥利奥"]) 139 | model.set_user_dict_weight(0.05) 140 | print(model(sentence,use_dict=True)) 141 | 142 | 输出为: 143 | 144 | .. code-block:: text 145 | 146 | [['奥利奥利奥']] 147 | [['奥利', '奥利奥']] 148 | -------------------------------------------------------------------------------- /docs/build/html/_static/js/html5shiv-printshiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/doc_utils.py: -------------------------------------------------------------------------------- 1 | r""" 2 | 用于检测 Python 包的文档是否符合规范的脚本。 3 | 4 | 用法 ``python doc_utils.py `` 5 | 6 | 样例 ``python doc_utils.py ../../fastDemo`` 7 | 8 | .. csv-table:: 9 | :header: "错误代号", "错误类型" 10 | 11 | 0, "项目结构错误" 12 | 1, "模块缺少 __doc__" 13 | 2, "模块缺少 __all__" 14 | 3, "__all__ 中导出的函数/类不应以下划线开头" 15 | 4, "__all__ 中没有导出全部定义的函数/类等" 16 | 5, "__all__ 中存在没有定义的函数/类" 17 | 6, "函数/类中缺少 __doc__" 18 | 7, "类的方法中缺少 __doc__" 19 | 20 | """ 21 | 22 | __all__ = [ 23 | "check", 24 | "check_module", 25 | "check_obj" 26 | ] 27 | 28 | from typing import List, Any 29 | import inspect 30 | import importlib 31 | import sys 32 | import os 33 | 34 | 35 | class ModuleType: 36 | __name__: str 37 | __all__: List[str] 38 | 39 | 40 | def _colored_string(string: str, color: str or int) -> str: 41 | r"""在终端中显示一串有颜色的文字 42 | 43 | :param string: 在终端中显示的文字 44 | :param color: 文字的颜色 45 | :return: 46 | """ 47 | if isinstance(color, str): 48 | color = { 49 | "black": 30, 50 | "red": 31, 51 | "green": 32, 52 | "yellow": 33, 53 | "blue": 34, 54 | "purple": 35, 55 | "cyan": 36, 56 | "white": 37 57 | }[color] 58 | return "\033[%dm%s\033[0m" % (color, string) 59 | 60 | 61 | def _alert(code: int, msg: str, color: str = 'red'): 62 | print(_colored_string("[ERROR-{}] {}".format(code, msg.strip()), color)) 63 | 64 | 65 | def check(path: str): 66 | r"""检查该项目目录下的所实现的包内的文档 67 | 68 | :param path: 项目目录 69 | :return: 70 | """ 71 | path = os.path.abspath(path) 72 | print("Package path:", path) 73 | package_name = str(path.split(os.sep)[-1]) 74 | print("Package name:", package_name) 75 | if not os.path.isdir(os.path.join(path, package_name)): 76 | _alert(0, "Package structure is wrong.") 77 | return 78 | sys.path.insert(0, path) 79 | importlib.import_module(package_name) 80 | module = sys.modules[package_name] 81 | check_module(module, package_name) 82 | 83 | 84 | def check_module(module: ModuleType, base_name: str): 85 | r"""递归检查每个模块中对象是否有文档 86 | 87 | :param module: 模块对象 88 | :param base_name: 根模块的名称 89 | :return: 90 | """ 91 | print("\n[M]", module.__name__) 92 | print([e for e in dir(module) if not e.startswith("_")]) 93 | if module.__doc__ is None: 94 | _alert(1, f"""Module '{module.__name__}' don't have __doc__""") 95 | if "__all__" not in dir(module): 96 | _alert(2, f"""'{module.__name__}' don't have __all__""") 97 | else: 98 | set_all = set(module.__all__) 99 | for name, obj in inspect.getmembers(module): 100 | if inspect.ismodule(obj) and obj.__name__.startswith(base_name): 101 | check_module(obj, base_name) 102 | if inspect.isclass(obj) or inspect.isfunction(obj): 103 | if name.startswith("_"): 104 | continue 105 | if name not in set_all: 106 | _alert(4, f"""'{obj.__name__}' not in __all__ of '{module.__name__}' """) 107 | else: 108 | check_obj(obj, module.__name__) 109 | set_all.remove(name) 110 | for obj_name in set_all: 111 | if obj_name.startswith("_"): 112 | _alert(3, f"""'{obj_name}' in '{module.__name__}' should not start with '_'""") 113 | else: 114 | _alert(5, f""" '{obj_name}' in __all__ of '{module.__name__}' does not exist""") 115 | 116 | print("\n") 117 | 118 | 119 | def check_obj(checked_obj: Any, module_name: str): 120 | r"""检查某个函数或者类的文档 121 | 122 | .. todo: 123 | 124 | 增加对函数的注释中是否介绍了参数的检查 125 | 126 | :param checked_obj: 检查是否有文档的函数或者类 127 | :param module_name: 函数或者类所在地模块 128 | :return: 129 | """ 130 | if inspect.isclass(checked_obj): 131 | for name, obj in inspect.getmembers(checked_obj): 132 | if inspect.isfunction(obj) and not obj.__name__.startswith("_"): 133 | if obj.__doc__ is None: 134 | _alert(7, f""" '{checked_obj.__name__}.{obj.__name__}' in '{module_name}' does not have __doc__""") 135 | elif checked_obj.__doc__ is None: 136 | _alert(6, f""" '{checked_obj.__name__}' in '{module_name}' does not have __doc__""") 137 | 138 | 139 | if __name__ == "__main__": 140 | check(sys.argv[1]) 141 | -------------------------------------------------------------------------------- /docs/build/html/_static/translations.js: -------------------------------------------------------------------------------- 1 | Documentation.addTranslations({"locale": "zh_Hans_CN", "messages": {"%(filename)s — %(docstitle)s": "%(filename)s — %(docstitle)s", "© Copyright %(copyright)s.": "© \u7248\u6743\u6240\u6709 %(copyright)s.", "© Copyright %(copyright)s.": "© \u7248\u6743\u6240\u6709 %(copyright)s.", ", in ": "\uff0c\u5728", "About these documents": "\u5173\u4e8e\u8fd9\u4e9b\u6587\u6863", "Automatically generated list of changes in version %(version)s": "\u81ea\u52a8\u751f\u6210\u7684 %(version)s \u7248\u672c\u4e2d\u7684\u66f4\u6539\u5217\u8868", "C API changes": "C API \u66f4\u6539", "Changes in Version %(version)s — %(docstitle)s": "\u66f4\u6539\u53d1\u751f\u5728\u7248\u672c %(version)s— %(docstitle)s", "Collapse sidebar": "\u6298\u53e0\u8fb9\u680f", "Complete Table of Contents": "\u5b8c\u6574\u7684\u5185\u5bb9\u8868", "Contents": "\u76ee\u5f55", "Copyright": "\u7248\u6743\u6240\u6709", "Created using Sphinx %(sphinx_version)s.": "\u7531 Sphinx %(sphinx_version)s \u521b\u5efa\u3002", "Expand sidebar": "\u5c55\u5f00\u8fb9\u680f", "From here you can search these documents. Enter your search\n words into the box below and click \"search\". Note that the search\n function will automatically search for all of the words. Pages\n containing fewer words won't appear in the result list.": "\u5728\u8fd9\u513f\uff0c\u4f60\u53ef\u4ee5\u5bf9\u8fd9\u4e9b\u6587\u6863\u8fdb\u884c\u641c\u7d22\u3002\u5411\u641c\u7d22\u6846\u4e2d\u8f93\u5165\u4f60\u6240\u8981\u641c\u7d22\u7684\u5173\u952e\u5b57\u5e76\u70b9\u51fb\u201c\u641c\u7d22\u201d\u3002\u6ce8\u610f\uff1a\u641c\u7d22\u5f15\u64ce\u4f1a\u81ea\u52a8\u641c\u7d22\u6240\u6709\u7684\u5173\u952e\u5b57\u3002\u5c06\u4e0d\u4f1a\u641c\u7d22\u5230\u90e8\u5206\u5173\u952e\u5b57\u7684\u9875\u9762.", "Full index on one page": "\u4e00\u9875\u7684\u5168\u90e8\u7d22\u5f15", "General Index": "\u603b\u76ee\u5f55", "Global Module Index": "\u5168\u5c40\u6a21\u5757\u7d22\u5f15", "Go": "\u8f6c\u5411", "Hide Search Matches": "\u9690\u85cf\u641c\u7d22\u7ed3\u679c", "Index": "\u7d22\u5f15", "Index – %(key)s": "\u7d22\u5f15 – %(key)s", "Index pages by letter": "\u6309\u7167\u5b57\u6bcd\u7684\u7d22\u5f15\u9875", "Indices and tables:": "\u7d22\u5f15\u548c\u8868\u683c\uff1a", "Last updated on %(last_updated)s.": "\u6700\u540e\u66f4\u65b0\u4e8e %(last_updated)s.", "Library changes": "\u5e93\u66f4\u6539", "Navigation": "\u5bfc\u822a", "Next topic": "\u4e0b\u4e00\u4e2a\u4e3b\u9898", "Other changes": "\u5176\u4ed6\u66f4\u6539", "Overview": "\u6982\u8ff0", "Permalink to this definition": "\u6c38\u4e45\u94fe\u63a5\u81f3\u76ee\u6807", "Permalink to this headline": "\u6c38\u4e45\u94fe\u63a5\u81f3\u6807\u9898", "Please activate JavaScript to enable the search\n functionality.": "\u8bf7\u6fc0\u6d3b JavaScript \u4ee5\u5f00\u542f\u641c\u7d22\u529f\u80fd", "Preparing search...": "\u51c6\u5907\u641c\u7d22\u2026\u2026", "Previous topic": "\u4e0a\u4e00\u4e2a\u4e3b\u9898", "Quick search": "\u5feb\u901f\u641c\u7d22", "Search": "\u641c\u7d22", "Search Page": "\u641c\u7d22\u9875\u9762", "Search Results": "\u641c\u7d22\u7ed3\u679c", "Search finished, found %s page(s) matching the search query.": "\u641c\u7d22\u5b8c\u6210\uff0c\u6709 %s \u4e2a\u9875\u9762\u5339\u914d\u3002", "Search within %(docstitle)s": "\u5728 %(docstitle)s \u4e2d\u641c\u7d22", "Searching": "\u641c\u7d22\u4e2d", "Show Source": "\u663e\u793a\u6e90\u4ee3\u7801", "Table of Contents": "", "This Page": "\u672c\u9875", "Welcome! This is": "\u6b22\u8fce\uff01\u8fd9\u662f", "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories.": "\u6ca1\u6709\u4efb\u4f55\u6587\u6863\u5339\u914d\u60a8\u7684\u641c\u7d22\u3002\u8bf7\u786e\u4fdd\u4f60\u8f93\u5165\u7684\u8bcd\u62fc\u5199\u6b63\u786e\u5e76\u9009\u62e9\u4e86\u5408\u9002\u7684\u5206\u7c7b\u3002", "all functions, classes, terms": "\u6240\u7684\u51fd\u6570\uff0c\u7c7b\uff0c\u672f\u8bed", "can be huge": "\u53ef\u80fd\u4f1a\u5f88\u591a", "last updated": "\u6700\u540e\u66f4\u65b0\u4e8e", "lists all sections and subsections": "\u5217\u51fa\u6240\u6709\u7684\u7ae0\u8282\u548c\u90e8\u5206", "next chapter": "\u4e0b\u4e00\u7ae0", "previous chapter": "\u4e0a\u4e00\u7ae0", "quick access to all modules": "\u5feb\u901f\u67e5\u770b\u6240\u6709\u7684\u6a21\u5757", "search": "\u641c\u7d22", "search this documentation": "\u641c\u7d22\u6587\u6863", "the documentation for": "\u8fd9\u4efd\u6587\u6863\u662f"}, "plural_expr": "0"}); -------------------------------------------------------------------------------- /docs/build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ 11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 14 | .highlight .ge { font-style: italic } /* Generic.Emph */ 15 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 18 | .highlight .go { color: #333333 } /* Generic.Output */ 19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 20 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 28 | .highlight .kt { color: #902000 } /* Keyword.Type */ 29 | .highlight .m { color: #208050 } /* Literal.Number */ 30 | .highlight .s { color: #4070a0 } /* Literal.String */ 31 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 32 | .highlight .nb { color: #007020 } /* Name.Builtin */ 33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 34 | .highlight .no { color: #60add5 } /* Name.Constant */ 35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 37 | .highlight .ne { color: #007020 } /* Name.Exception */ 38 | .highlight .nf { color: #06287e } /* Name.Function */ 39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */ 46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */ 51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ 54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */ 65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ 69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/build/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t') 88 | word_num = 0 89 | idx = 0 90 | tag_sequence = [] 91 | word = '' 92 | #last_idx,上一个匹配到的词的末字符的索引 93 | last_idx = -1 94 | while idx < len(sentence): 95 | word = word + sentence[idx] 96 | if tree.search(word): 97 | last_idx = idx 98 | if not tree.startsWith(word) or idx == len(sentence) - 1: 99 | length_tag = len(tag_sequence) 100 | #如果匹配到,词的范围是[length_tag,last_idx] 101 | if length_tag == last_idx: 102 | tag_sequence.append('s') 103 | word_num += 1 104 | elif length_tag - 1 == last_idx: 105 | tag_sequence.append('o') 106 | elif length_tag < last_idx: 107 | word_num += 1 108 | tag_sequence.append('b') 109 | for i in range(length_tag + 1, last_idx): 110 | tag_sequence.append('m') 111 | tag_sequence.append('e') 112 | else: 113 | raise ValueError('error when using dict') 114 | 115 | idx = len(tag_sequence) 116 | last_idx = idx - 1 117 | word = '' 118 | else: 119 | idx += 1 120 | 121 | assert (len(tag_sequence) == len(sentence)) 122 | 123 | tag_sequence = tag_sequence[:-1] 124 | if reverse is False: 125 | return word_num, tag_sequence 126 | else: 127 | tag_sequence = tag_sequence[::-1] 128 | for i in range(len(tag_sequence)): 129 | if tag_sequence[i] == 'b': 130 | tag_sequence[i] = 'e' 131 | elif tag_sequence[i] == 'e': 132 | tag_sequence[i] = 'b' 133 | return word_num, tag_sequence 134 | 135 | def __call__(self, sentence): 136 | word_num, tag_sequence = self.process_sentence(sentence) 137 | word_num_reverse, tag_sequence_reverse = self.process_sentence( 138 | sentence, reverse=True) 139 | if word_num >= word_num_reverse: 140 | return word_num, tag_sequence 141 | return word_num_reverse, tag_sequence_reverse 142 | -------------------------------------------------------------------------------- /docs/build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 搜索 — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
49 | 50 | 101 | 102 |
103 | 104 | 105 | 111 | 112 | 113 |
114 | 115 |
116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 |
134 | 135 |
    136 | 137 |
  • »
  • 138 | 139 |
  • 搜索
  • 140 | 141 | 142 |
  • 143 | 144 | 145 | 146 |
  • 147 | 148 |
149 | 150 | 151 |
152 |
153 |
154 |
155 | 156 | 163 | 164 | 165 |
166 | 167 |
168 | 169 |
170 | 171 |
172 |
173 | 174 | 175 |
176 | 177 |
178 |

179 | 180 | © 版权所有 2020, fastHan 181 | 182 |

183 |
184 | 185 | 186 | 187 | Built with Sphinx using a 188 | 189 | theme 190 | 191 | provided by Read the Docs. 192 | 193 |
194 | 195 |
196 |
197 | 198 |
199 | 200 |
201 | 202 | 203 | 208 | 209 | 210 | 211 | 212 | 213 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /fastHan/model/camr_finetune_dataloader.py: -------------------------------------------------------------------------------- 1 | import re 2 | import torch 3 | import pandas as pd 4 | 5 | from datasets import Dataset 6 | from .camr_to_tuples import CAMR 7 | 8 | MAX_LEN = 300 9 | 10 | 11 | def FastCAMR_Parsing_Loader(data_path, tokenizer): 12 | sid_list, sent_list, id_token_list, amr_list, convert_amr_list = var_free_camrs( 13 | data_path) 14 | input_ids, attention_mask, decoder_input_ids, labels = [], [], [], [] 15 | discard_num = 0 16 | discard_index = [] 17 | for idx, sid in enumerate(sid_list): 18 | tokenize_result = tokenizer(convert_amr_list[idx]) 19 | if len(tokenize_result['input_ids']) > MAX_LEN: 20 | discard_num += 1 21 | discard_index.append(idx) 22 | continue 23 | sent_tokenize_result = tokenizer(' '.join(sent_list[idx]), 24 | max_length=MAX_LEN, 25 | padding='max_length', 26 | truncation=True) 27 | amr_tokenize_result = tokenizer(convert_amr_list[idx], 28 | max_length=MAX_LEN, 29 | padding='max_length', 30 | truncation=True) 31 | input_ids.append(sent_tokenize_result['input_ids']) 32 | attention_mask.append(sent_tokenize_result['attention_mask']) 33 | decode_ids = amr_tokenize_result['input_ids'] 34 | decoder_input_ids.append(decode_ids[:-1]) 35 | labels.append(decode_ids[1:]) 36 | # print("******* {0} sentences are discard! *******".format(discard_num)) 37 | for idx in reversed(discard_index): 38 | sid_list.pop(idx) 39 | sent_list.pop(idx) 40 | id_token_list.pop(idx) 41 | amr_list.pop(idx) 42 | convert_amr_list.pop(idx) 43 | amr_data = { 44 | "input_ids": input_ids, 45 | "attention_mask": attention_mask, 46 | "decoder_input_ids": decoder_input_ids, 47 | "labels": labels 48 | } 49 | amr_data = pd.DataFrame(amr_data) 50 | dataset = Dataset.from_pandas(amr_data, preserve_index=False) 51 | return dataset 52 | 53 | 54 | def data_collator(amr_data): 55 | first = amr_data[0] 56 | batch = {} 57 | for k, v in first.items(): 58 | if isinstance(v, torch.Tensor): 59 | batch[k] = torch.stack([f[k] for f in amr_data]) 60 | else: 61 | batch[k] = torch.tensor([f[k] for f in amr_data]) 62 | return batch 63 | 64 | 65 | def var_free_camrs(input_file): 66 | with open(input_file, 'r', encoding='utf-8') as f: 67 | lines = f.readlines() 68 | sid_list, sent_list, id_token_list, amr_list = read_raw_camrs(lines) 69 | convert_amr_list = [ 70 | delete_camr_variables(convert_camr_to_single_line(amr), 71 | id_token_list[idx]) 72 | for idx, amr in enumerate(amr_list) 73 | ] 74 | return sid_list, sent_list, id_token_list, amr_list, convert_amr_list 75 | 76 | 77 | def read_raw_camrs(lines): 78 | sid_list, sent_list, id_token_list, amr_list = [], [], [], [] 79 | # 迭代输入文件中的每个句子 80 | cur_sent, cur_amr = [], [] 81 | id_token_dict = {} 82 | has_content = False 83 | for line in lines: 84 | line = line.strip() 85 | if '\ufeff' in line: 86 | line = line.replace('\ufeff', '') 87 | if '\u200b' in line: 88 | line = line.replace('\u200b', '') 89 | if line == "": 90 | if has_content: # end of current CAMR 91 | sent_list.append(cur_sent) 92 | id_token_list.append(id_token_dict) 93 | amr_list.append(cur_amr) 94 | cur_sent, cur_amr = [], [] 95 | id_token_dict = {} 96 | has_content = False 97 | continue 98 | if line.strip().startswith("#"): 99 | if '::id' in line: 100 | sid = re.findall(r'# ::id export_amr\.(.*?)\s*::', line)[0] 101 | sid_list.append(sid) 102 | elif '::wid' in line: 103 | wid = line[len('# ::wid '):].strip().split(' ') 104 | for i in wid: 105 | token_id, token = i.split('_') 106 | if token != '': 107 | # key: id number, value: token (e.g. "1":"我") 108 | cur_sent.append(token) 109 | id_token_dict[int(token_id[1:])] = token 110 | else: 111 | continue 112 | else: 113 | has_content = True 114 | cur_amr.append(line) 115 | if has_content: 116 | sent_list.append(cur_sent) 117 | id_token_list.append(id_token_dict) 118 | amr_list.append(cur_amr) 119 | return sid_list, sent_list, id_token_list, amr_list 120 | 121 | 122 | def convert_camr_to_single_line(amr): 123 | return "".join([line.strip() for line in amr]) 124 | 125 | 126 | def delete_camr_variables(amr, id_token_dict): 127 | result_amr, coref_dict = CAMR.parse_AMR_line(amr, id_token_dict) 128 | node_dict = dict(zip(result_amr.nodes, result_amr.node_values)) 129 | var = 'x\d+(?:_\d+)*(?:_x\d+(?:_\d+)*)*' 130 | coref_vars = '(' + var + '\s*/\s*' + var + ')' 131 | coref_var_list = re.findall(coref_vars, amr) 132 | for coref_v in coref_var_list: 133 | var0 = coref_v.split('/')[0].strip() 134 | var1 = coref_v.split('/')[1].strip() 135 | var0 = node_dict[var0[1:]] 136 | var1 = node_dict[var1[1:]] 137 | amr = re.sub(coref_v, var0 + '^' + var1, amr) 138 | normal_var = var + '\s*/\s*' 139 | amr = re.sub(normal_var, '', amr) 140 | return amr -------------------------------------------------------------------------------- /docs/build/html/_modules/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 概览:模块代码 — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 99 | 100 |
101 | 102 | 103 | 109 | 110 | 111 |
112 | 113 |
114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 |
132 | 133 |
    134 | 135 |
  • »
  • 136 | 137 |
  • 概览:模块代码
  • 138 | 139 | 140 |
  • 141 | 142 |
  • 143 | 144 |
145 | 146 | 147 |
148 |
149 | 167 |
168 | 169 | 170 |
171 | 172 |
173 |

174 | 175 | © 版权所有 2020, fastHan 176 | 177 |

178 |
179 | 180 | 181 | 182 | Built with Sphinx using a 183 | 184 | theme 185 | 186 | provided by Read the Docs. 187 | 188 |
189 | 190 |
191 |
192 | 193 |
194 | 195 |
196 | 197 | 198 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | 18 | sys.path.insert(0, os.path.abspath('../../')) 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'fastHan' 23 | copyright = '2020, fastHan' 24 | author = 'fastHan' 25 | 26 | # The short X.Y version 27 | version = '1.4' 28 | # The full version, including alpha/beta/rc tags 29 | release = '1.4' 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # If your documentation needs a minimal Sphinx version, state it here. 34 | # 35 | # needs_sphinx = '1.0' 36 | 37 | # Add any Sphinx extension module names here, as strings. They can be 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 39 | # ones. 40 | extensions = [ 41 | 'sphinx.ext.autodoc', 42 | 'sphinx.ext.viewcode', 43 | 'sphinx.ext.autosummary', 44 | 'sphinx.ext.mathjax', 45 | 'sphinx.ext.todo' 46 | ] 47 | 48 | autodoc_default_options = { 49 | 'member-order': 'bysource', 50 | 'special-members': '__init__', 51 | 'undoc-members': False, 52 | } 53 | 54 | autoclass_content = "class" 55 | 56 | # Add any paths that contain templates here, relative to this directory. 57 | templates_path = ['_templates'] 58 | # template_bridge 59 | # The suffix(es) of source filenames. 60 | # You can specify multiple suffix as a list of string: 61 | # 62 | # source_suffix = ['.rst', '.md'] 63 | source_suffix = '.rst' 64 | 65 | # The master toctree document. 66 | master_doc = 'index' 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = "zh_CN" 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This pattern also affects html_static_path and html_extra_path . 78 | exclude_patterns = ['modules.rst'] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | # -- Options for HTML output ------------------------------------------------- 84 | 85 | # The theme to use for HTML and HTML Help pages. See the documentation for 86 | # a list of builtin themes. 87 | # 88 | html_theme = 'sphinx_rtd_theme' 89 | 90 | # Theme options are theme-specific and customize the look and feel of a theme 91 | # further. For a list of options available for each theme, see the 92 | # documentation. 93 | # 94 | html_theme_options = { 95 | 'collapse_navigation': False, 96 | 'titles_only': True 97 | } 98 | 99 | # Add any paths that contain custom static files (such as style sheets) here, 100 | # relative to this directory. They are copied after the builtin static files, 101 | # so a file named "default.css" will overwrite the builtin "default.css". 102 | html_static_path = ['_static'] 103 | 104 | # Custom sidebar templates, must be a dictionary that maps document names 105 | # to template names. 106 | # 107 | # The default sidebars (for documents that don't match any pattern) are 108 | # defined by theme itself. Builtin themes are using these templates by 109 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 110 | # 'searchbox.html']``. 111 | # 112 | # html_sidebars = {} 113 | 114 | 115 | # -- Options for HTMLHelp output --------------------------------------------- 116 | 117 | # Output file base name for HTML help builder. 118 | htmlhelp_basename = 'fastHan' 119 | 120 | # -- Options for LaTeX output ------------------------------------------------ 121 | 122 | latex_elements = { 123 | # The paper size ('letterpaper' or 'a4paper'). 124 | # 125 | # 'papersize': 'letterpaper', 126 | 127 | # The font size ('10pt', '11pt' or '12pt'). 128 | # 129 | # 'pointsize': '10pt', 130 | 131 | # Additional stuff for the LaTeX preamble. 132 | # 133 | # 'preamble': '', 134 | 135 | # Latex figure (float) alignment 136 | # 137 | # 'figure_align': 'htbp', 138 | } 139 | 140 | # Grouping the document tree into LaTeX files. List of tuples 141 | # (source start file, target name, title, 142 | # author, documentclass [howto, manual, or own class]). 143 | latex_documents = [] 144 | 145 | # -- Options for manual page output ------------------------------------------ 146 | 147 | # One entry per manual page. List of tuples 148 | # (source start file, name, description, authors, manual section). 149 | man_pages = [ 150 | (master_doc, 'fastHan', 'fastHan Documentation', 151 | [author], 1) 152 | ] 153 | 154 | # -- Options for Texinfo output ---------------------------------------------- 155 | 156 | # Grouping the document tree into Texinfo files. List of tuples 157 | # (source start file, target name, title, author, 158 | # dir menu entry, description, category) 159 | texinfo_documents = [ 160 | (master_doc, 'fastHan', 'fastHan Documentation', 161 | author, 'fastHan', 'One line description of project.', 162 | 'Miscellaneous'), 163 | ] 164 | 165 | 166 | # -- Extension configuration ------------------------------------------------- 167 | def maybe_skip_member(app, what, name, obj, skip, options): 168 | if name == "__init__": 169 | return False 170 | if name.startswith("_"): 171 | return True 172 | return False 173 | 174 | 175 | def setup(app): 176 | app.connect('autodoc-skip-member', maybe_skip_member) 177 | -------------------------------------------------------------------------------- /docs/build/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | demo 中文文档 — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 |
48 | 49 | 100 | 101 |
102 | 103 | 104 | 110 | 111 | 112 |
113 | 114 |
115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 |
133 | 134 |
    135 | 136 |
  • »
  • 137 | 138 |
  • demo 中文文档
  • 139 | 140 | 141 |
  • 142 | 143 | 144 | View page source 145 | 146 | 147 |
  • 148 | 149 |
150 | 151 | 152 |
153 |
154 |
155 |
156 | 157 |
158 |

demo 中文文档

159 |
160 |

用户手册

161 |
162 | 165 |
166 |
167 |
168 |

API 文档

169 |

除了用户手册之外,你还可以通过查阅 API 文档来找到你所需要的工具。

170 |
171 |
172 |
173 |
174 |
175 |

索引与搜索

176 | 181 |
182 | 183 | 184 |
185 | 186 |
187 |
188 | 189 | 195 | 196 | 197 |
198 | 199 |
200 |

201 | 202 | © 版权所有 2020, fastHan 203 | 204 |

205 |
206 | 207 | 208 | 209 | Built with Sphinx using a 210 | 211 | theme 212 | 213 | provided by Read the Docs. 214 | 215 |
216 | 217 |
218 |
219 | 220 |
221 | 222 |
223 | 224 | 225 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | -------------------------------------------------------------------------------- /docs/build/html/fastHan.model.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | fastHan.model package — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 99 | 100 |
101 | 102 | 103 | 109 | 110 | 111 |
112 | 113 |
114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 |
132 | 133 |
    134 | 135 |
  • »
  • 136 | 137 |
  • fastHan.model package
  • 138 | 139 | 140 |
  • 141 | 142 | 143 | View page source 144 | 145 | 146 |
  • 147 | 148 |
149 | 150 | 151 |
152 |
153 |
154 |
155 | 156 | 174 | 175 | 176 |
177 | 178 |
179 |
180 | 181 | 182 |
183 | 184 |
185 |

186 | 187 | © 版权所有 2020, fastHan 188 | 189 |

190 |
191 | 192 | 193 | 194 | Built with Sphinx using a 195 | 196 | theme 197 | 198 | provided by Read the Docs. 199 | 200 |
201 | 202 |
203 |
204 | 205 |
206 | 207 |
208 | 209 | 210 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /fastHan/model/metrics.py: -------------------------------------------------------------------------------- 1 | from fastNLP.core.metrics import Metric 2 | import torch 3 | 4 | # modified from https://github.com/yhcc/JointCwsParser 5 | class SegAppCharParseF1Metric(Metric): 6 | # 7 | def __init__(self,pun_index,app_index=0): 8 | super().__init__() 9 | self.app_index = app_index 10 | self.pun_index = pun_index 11 | 12 | self.parse_head_tp = 0 13 | self.parse_label_tp = 0 14 | self.rec_tol = 0 15 | self.pre_tol = 0 16 | 17 | def get_word_pairs(self,head_preds,label_preds,seq_lens,pun_masks): 18 | # 去掉root 19 | head_preds = head_preds[:, 1:].tolist() 20 | label_preds = label_preds[:, 1:].tolist() 21 | seq_lens = (seq_lens - 1).tolist() 22 | 23 | head_dep_tuples=[] 24 | head_label_dep_tuples = [] 25 | 26 | for b in range(len(head_preds)): 27 | seq_len = seq_lens[b] 28 | head_pred = head_preds[b][:seq_len] 29 | label_pred = label_preds[b][:seq_len] 30 | 31 | words = [] # 存放[word_start, word_end),相对起始位置,不考虑root 32 | heads = [] 33 | labels = [] 34 | ranges = [] # 对应该char是第几个word,长度是seq_len+1 35 | word_idx = 0 36 | word_start_idx = 0 37 | for idx, (label, head) in enumerate(zip(label_pred, head_pred)): 38 | ranges.append(word_idx) 39 | if label == self.app_index: 40 | pass 41 | else: 42 | labels.append(label) 43 | heads.append(head) 44 | words.append((word_start_idx, idx+1)) 45 | word_start_idx = idx+1 46 | word_idx += 1 47 | 48 | head_dep_tuple = [] # head在前面 49 | head_label_dep_tuple = [] 50 | for idx, head in enumerate(heads): 51 | span = words[idx] 52 | if span[0]==span[1]-1 and pun_masks[b, span[0]]: 53 | continue # exclude punctuations 54 | if head == 0: 55 | head_dep_tuple.append((('root', words[idx]))) 56 | head_label_dep_tuple.append(('root', labels[idx], words[idx])) 57 | else: 58 | head_word_idx = ranges[head-1] 59 | head_word_span = words[head_word_idx] 60 | head_dep_tuple.append(((head_word_span, words[idx]))) 61 | head_label_dep_tuple.append((head_word_span, labels[idx], words[idx])) 62 | head_dep_tuples.append(head_dep_tuple) 63 | head_label_dep_tuples.append(head_label_dep_tuple) 64 | 65 | return head_dep_tuples,head_label_dep_tuples 66 | 67 | 68 | # def update(self, gold_word_pairs, gold_label_word_pairs, head_preds, label_preds, seq_len, 69 | # pun_masks): 70 | def update(self, labels,heads,head_preds, label_preds, seq_len): 71 | """ 72 | 73 | max_len是不包含root的character的长度 74 | :param gold_word_pairs: List[List[((head_start, head_end), (dep_start, dep_end)), ...]], batch_size 75 | :param gold_label_word_pairs: List[List[((head_start, head_end), label, (dep_start, dep_end)), ...]], batch_size 76 | :param head_preds: batch_size x max_len 77 | :param label_preds: batch_size x max_len 78 | :param seq_lens: 79 | :return: 80 | """ 81 | pun_masks=(labels==self.pun_index).long() 82 | pun_masks=pun_masks[:,1:] 83 | 84 | head_dep_tuples,head_label_dep_tuples=self.get_word_pairs(head_preds,label_preds,seq_len,pun_masks) 85 | gold_head_dep_tuples,gold_head_label_dep_tuples=self.get_word_pairs(heads,labels,seq_len,pun_masks) 86 | 87 | for b in range(seq_len.shape[0]): 88 | head_dep_tuple=head_dep_tuples[b] 89 | head_label_dep_tuple=head_label_dep_tuples[b] 90 | gold_head_dep_tuple=gold_head_dep_tuples[b] 91 | gold_head_label_dep_tuple=gold_head_label_dep_tuples[b] 92 | 93 | 94 | for head_dep, head_label_dep in zip(head_dep_tuple, head_label_dep_tuple): 95 | if head_dep in gold_head_dep_tuple: 96 | self.parse_head_tp += 1 97 | if head_label_dep in gold_head_label_dep_tuple: 98 | self.parse_label_tp += 1 99 | self.pre_tol += len(head_dep_tuple) 100 | self.rec_tol += len(gold_head_dep_tuple) 101 | 102 | def get_metric(self, reset=True): 103 | u_p = self.parse_head_tp / self.pre_tol 104 | u_r = self.parse_head_tp / self.rec_tol 105 | u_f = 2*u_p*u_r/(1e-6 + u_p + u_r) 106 | l_p = self.parse_label_tp / self.pre_tol 107 | l_r = self.parse_label_tp / self.rec_tol 108 | l_f = 2*l_p*l_r/(1e-6 + l_p + l_r) 109 | 110 | if reset: 111 | self.parse_head_tp = 0 112 | self.parse_label_tp = 0 113 | self.rec_tol = 0 114 | self.pre_tol = 0 115 | 116 | return {'u_f1': round(u_f, 4), 'u_p': round(u_p, 4), 'u_r/uas':round(u_r, 4), 117 | 'f': round(l_f, 4), 'l_p': round(l_p, 4), 'l_r/las': round(l_r, 4)} 118 | 119 | 120 | class CWSMetric(Metric): 121 | def __init__(self, app_index=0): 122 | super().__init__() 123 | self.app_index = app_index 124 | self.pre = 0 125 | self.rec = 0 126 | self.tp = 0 127 | 128 | 129 | def label_to_seg(self,labels,seq_lens): 130 | segs=torch.zeros_like(labels)[:,1:] 131 | masks=torch.zeros_like(labels)[:,1:] 132 | 133 | seq_lens=(seq_lens-1).tolist() 134 | # [:,1:]是为了剔除root结点 135 | for idx,label in enumerate(labels[:, 1:].tolist()): 136 | seq_len=seq_lens[idx] 137 | label=label[:seq_len] 138 | word_len = 0 139 | 140 | for i,l in enumerate(label): 141 | if l==self.app_index and i!=len(label)-1: 142 | word_len+=1 143 | else: 144 | segs[idx,i]=word_len 145 | masks[idx,i]=1 146 | word_len=0 147 | return segs,masks 148 | 149 | 150 | def update(self, labels, label_preds, seq_len): 151 | """ 152 | :param label_preds: batch_size x max_len 153 | :param seq_len: batch_size 154 | :return: 155 | """ 156 | 157 | seg_targets,seg_masks=self.label_to_seg(labels,seq_len) 158 | pred_segs,pred_masks=self.label_to_seg(label_preds,seq_len) 159 | 160 | 161 | right_mask = seg_targets.eq(pred_segs) # 对长度的预测一致 162 | self.rec += seg_masks.sum().item() 163 | self.pre += pred_masks.sum().item() 164 | # 且pred和target在同一个地方有值 165 | self.tp += (right_mask.__and__(pred_masks.bool().__and__(seg_masks.bool()))).sum().item() 166 | 167 | def get_metric(self, reset=True): 168 | res = {} 169 | res['rec'] = round(self.tp/(self.rec+1e-6), 4) 170 | res['pre'] = round(self.tp/(self.pre+1e-6), 4) 171 | res['f1'] = round(2*res['rec']*res['pre']/(res['pre'] + res['rec'] + 1e-6), 4) 172 | 173 | if reset: 174 | self.pre = 0 175 | self.rec = 0 176 | self.tp = 0 177 | 178 | return res -------------------------------------------------------------------------------- /docs/build/html/fastHan.model.utils.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | fastHan.model.utils module — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 99 | 100 |
101 | 102 | 103 | 109 | 110 | 111 |
112 | 113 |
114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 |
132 | 133 |
    134 | 135 |
  • »
  • 136 | 137 |
  • fastHan.model.utils module
  • 138 | 139 | 140 |
  • 141 | 142 | 143 | View page source 144 | 145 | 146 |
  • 147 | 148 |
149 | 150 | 151 |
152 |
153 |
154 |
155 | 156 |
157 |

fastHan.model.utils module

158 |
159 |
160 | fastHan.model.utils.check_dataloader_paths(paths: Union[str, Dict[str, str]]) → Dict[str, str][源代码]
161 |

检查传入dataloader的文件的合法性。如果为合法路径,将返回至少包含’train’这个key的dict。类似于下面的结果 162 | {

163 |
164 |
‘train’: ‘/some/path/to/’, # 一定包含,建词表应该在这上面建立,剩下的其它文件应该只需要处理并index。 165 | ‘test’: ‘xxx’ # 可能有,也可能没有 166 | …
167 |

} 168 | 如果paths为不合法的,将直接进行raise相应的错误

169 | 170 | 171 | 172 | 173 | 175 | 176 | 177 | 178 | 179 |
参数:paths – 路径. 可以为一个文件路径(则认为该文件就是train的文件); 可以为一个文件目录,将在该目录下寻找train(文件名 174 | 中包含train这个字段), test.txt, dev.txt; 可以为一个dict, 则key是用户自定义的某个文件的名称,value是这个文件的路径。
返回:
180 |
181 | 182 |
183 |
184 | fastHan.model.utils.get_tokenizer()[源代码]
185 |
186 | 187 |
188 | 189 | 190 |
191 | 192 |
193 |
194 | 195 | 196 |
197 | 198 |
199 |

200 | 201 | © 版权所有 2020, fastHan 202 | 203 |

204 |
205 | 206 | 207 | 208 | Built with Sphinx using a 209 | 210 | theme 211 | 212 | provided by Read the Docs. 213 | 214 |
215 | 216 |
217 |
218 | 219 |
220 | 221 |
222 | 223 | 224 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /fastHan/model/dependency_parsing_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from fastNLP.models.torch.biaffine_parser import (ArcBiaffine, BiaffineParser, 4 | LabelBilinear) 5 | from fastNLP.modules.torch.dropout import TimestepDropout 6 | from torch import nn 7 | from transformers import PreTrainedModel 8 | 9 | 10 | # modified from https://github.com/yhcc/JointCwsParser/blob/master/models/BertParser.py 11 | class BertParser(BiaffineParser): 12 | def __init__(self, 13 | num_label, 14 | embed_size=768, 15 | arc_mlp_size=500, 16 | label_mlp_size=100, 17 | dropout=0.1, 18 | use_greedy_infer=False, 19 | app_index=0): 20 | super(BiaffineParser, self).__init__() 21 | 22 | self.embed_size = embed_size 23 | self.mlp = nn.Sequential( 24 | nn.Linear(self.embed_size, arc_mlp_size * 2 + label_mlp_size * 2), 25 | nn.LeakyReLU(0.1), 26 | TimestepDropout(p=dropout), 27 | ) 28 | self.arc_mlp_size = arc_mlp_size 29 | self.label_mlp_size = label_mlp_size 30 | self.arc_predictor = ArcBiaffine(arc_mlp_size, bias=True) 31 | self.label_predictor = LabelBilinear(label_mlp_size, 32 | label_mlp_size, 33 | num_label, 34 | bias=True) 35 | self.use_greedy_infer = use_greedy_infer 36 | self.reset_parameters() 37 | 38 | self.app_index = app_index 39 | self.num_label = num_label 40 | if self.app_index != 0: 41 | raise ValueError("现在app_index必须等于0") 42 | 43 | self.dropout = nn.Dropout(dropout) 44 | 45 | def reset_parameters(self): 46 | for name, m in self.named_modules(): 47 | if hasattr(m, 'reset_parameters') or hasattr(m, 'init_param'): 48 | pass 49 | else: 50 | for p in m.parameters(): 51 | if len(p.size()) > 1: 52 | nn.init.xavier_normal_(p, gain=0.1) 53 | else: 54 | nn.init.uniform_(p, -0.1, 0.1) 55 | 56 | def forward(self, feats, mask, gold_heads=None, char_labels=None): 57 | # 此处的mask与attention_mask不同,需要去除sep与cls 58 | 59 | batch_size = feats.shape[0] 60 | max_len = feats.shape[1] 61 | 62 | feats = self.dropout(feats) 63 | feats = self.mlp(feats) 64 | arc_sz, label_sz = self.arc_mlp_size, self.label_mlp_size 65 | arc_dep, arc_head = feats[:, :, :arc_sz], feats[:, :, 66 | arc_sz:2 * arc_sz] 67 | label_dep, label_head = feats[:, :, 2 * arc_sz:2 * arc_sz + 68 | label_sz], feats[:, :, 69 | 2 * arc_sz + label_sz:] 70 | 71 | arc_pred = self.arc_predictor(arc_head, arc_dep) # [N, L, L] 72 | 73 | if self.training: 74 | assert gold_heads is not None 75 | head_pred = None 76 | heads = gold_heads 77 | else: 78 | heads = self.mst_decoder(arc_pred, mask) 79 | head_pred = heads 80 | 81 | # 将pad的-100替换为-1,以免heads作为矩阵索引的时候报错 82 | padded_heads = torch.clone(heads) 83 | padded_heads[padded_heads == -100] = -1 84 | 85 | batch_range = torch.arange(start=0, 86 | end=batch_size, 87 | dtype=torch.long, 88 | device=mask.device).unsqueeze(1) 89 | label_head = label_head[batch_range, padded_heads].contiguous() 90 | label_pred = self.label_predictor(label_head, 91 | label_dep) # [N, max_len, num_label] 92 | # 这里限制一下,只有当head为下一个时,才能预测app这个label 93 | arange_index = torch.arange(1, max_len+1, dtype=torch.long, device=mask.device).unsqueeze(0)\ 94 | .repeat(batch_size, 1) # batch_size x max_len 95 | 96 | app_masks = heads.ne( 97 | arange_index) # batch_size x max_len, 为1的位置不可以预测app 98 | app_masks = app_masks.unsqueeze(2).repeat(1, 1, self.num_label) 99 | app_masks[:, :, 1:] = 0 100 | label_pred = label_pred.masked_fill(app_masks, float('-inf')) 101 | 102 | if self.training: 103 | arc_loss, label_loss = self.loss(arc_pred, label_pred, gold_heads, 104 | char_labels, mask) 105 | res_dict = { 106 | 'loss': arc_loss + label_loss, 107 | 'arc_loss': arc_loss, 108 | 'label_loss': label_loss 109 | } 110 | else: 111 | res_dict = { 112 | 'label_preds': label_pred.max(2)[1], 113 | 'head_preds': head_pred 114 | } 115 | 116 | return res_dict 117 | 118 | @staticmethod 119 | def loss(arc_pred, label_pred, arc_true, label_true, mask): 120 | """ 121 | Compute loss. 122 | :param arc_pred: [batch_size, seq_len, seq_len] 123 | :param label_pred: [batch_size, seq_len, n_tags] 124 | :param arc_true: [batch_size, seq_len] 125 | :param label_true: [batch_size, seq_len] 126 | :param mask: [batch_size, seq_len] 127 | :return: loss value 128 | """ 129 | 130 | batch_size, seq_len, _ = arc_pred.shape 131 | flip_mask = (mask == 0) 132 | # _arc_pred = arc_pred.clone() 133 | _arc_pred = arc_pred.masked_fill(flip_mask.unsqueeze(1), -float('inf')) 134 | 135 | arc_true.data[:, 0].fill_(-100) 136 | label_true.data[:, 0].fill_(-100) 137 | 138 | arc_nll = F.cross_entropy(_arc_pred.view(-1, seq_len), 139 | arc_true.view(-1), 140 | ignore_index=-100) 141 | label_nll = F.cross_entropy(label_pred.view(-1, label_pred.size(-1)), 142 | label_true.view(-1), 143 | ignore_index=-100) 144 | 145 | return arc_nll, label_nll 146 | 147 | 148 | class DependencyParsingModel(PreTrainedModel): 149 | def __init__(self, encoder, config, labels): 150 | super().__init__(config) 151 | 152 | label_num = len(labels) 153 | 154 | self.num_labels = label_num 155 | 156 | self.encoder = encoder 157 | self.parser = BertParser(num_label=self.num_labels, 158 | embed_size=config.hidden_size, 159 | app_index=labels.index('app')) 160 | 161 | def forward(self, 162 | input_ids=None, 163 | attention_mask=None, 164 | labels=None, 165 | heads=None): 166 | # 生成不考虑cls和sep的mask 167 | seq_len = attention_mask.sum(dim=-1) - 2 168 | broad_cast_seq_len = torch.arange(attention_mask.shape[1] - 2).expand( 169 | attention_mask.shape[0], -1).to(seq_len.device) 170 | mask = broad_cast_seq_len < seq_len.unsqueeze(1) 171 | 172 | outputs = self.encoder(input_ids, attention_mask=attention_mask) 173 | 174 | feats = outputs[0] 175 | feats = feats[:, 1:-1] 176 | 177 | return self.parser.forward(feats=feats, 178 | mask=mask, 179 | gold_heads=heads, 180 | char_labels=labels) 181 | -------------------------------------------------------------------------------- /docs/build/html/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Python 模块索引 — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 |
50 | 51 | 102 | 103 |
104 | 105 | 106 | 112 | 113 | 114 |
115 | 116 |
117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 |
135 | 136 |
    137 | 138 |
  • »
  • 139 | 140 |
  • Python 模块索引
  • 141 | 142 | 143 |
  • 144 | 145 |
  • 146 | 147 |
148 | 149 | 150 |
151 |
152 |
153 |
154 | 155 | 156 |

Python 模块索引

157 | 158 |
159 | f 160 |
161 | 162 | 163 | 164 | 166 | 167 | 169 | 172 | 173 | 174 | 177 | 178 | 179 | 182 | 183 | 184 | 187 | 188 | 189 | 192 | 193 | 194 | 197 | 198 | 199 | 202 | 203 | 204 | 207 | 208 | 209 | 212 | 213 | 214 | 217 | 218 | 219 | 222 |
 
165 | f
170 | fastHan 171 |
    175 | fastHan.FastModel 176 |
    180 | fastHan.model 181 |
    185 | fastHan.model.bert 186 |
    190 | fastHan.model.bert_encoder_theseus 191 |
    195 | fastHan.model.BertCharParser 196 |
    200 | fastHan.model.CharParser 201 |
    205 | fastHan.model.model 206 |
    210 | fastHan.model.old_fastNLP_bert 211 |
    215 | fastHan.model.UserDict 216 |
    220 | fastHan.model.utils 221 |
223 | 224 | 225 |
226 | 227 |
228 |
229 | 230 | 231 |
232 | 233 |
234 |

235 | 236 | © 版权所有 2020, fastHan 237 | 238 |

239 |
240 | 241 | 242 | 243 | Built with Sphinx using a 244 | 245 | theme 246 | 247 | provided by Read the Docs. 248 | 249 |
250 | 251 |
252 |
253 | 254 |
255 | 256 |
257 | 258 | 259 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | -------------------------------------------------------------------------------- /docs/build/html/fastHan.model.model.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | fastHan.model.model module — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 99 | 100 |
101 | 102 | 103 | 109 | 110 | 111 |
112 | 113 |
114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 |
132 | 133 |
    134 | 135 |
  • »
  • 136 | 137 |
  • fastHan.model.model module
  • 138 | 139 | 140 |
  • 141 | 142 | 143 | View page source 144 | 145 | 146 |
  • 147 | 148 |
149 | 150 | 151 |
152 |
153 |
154 |
155 | 156 |
157 |

fastHan.model.model module

158 |
159 |
160 | class fastHan.model.model.CharModel(embed, label_vocab, pos_idx=31, Parsing_rnn_layers=3, Parsing_arc_mlp_size=500, Parsing_label_mlp_size=100, Parsing_use_greedy_infer=False, encoding_type='bmeso', embedding_dim=768, dropout=0.1, use_pos_embedding=True, use_average=True)[源代码]
161 |

基类:torch.nn.modules.module.Module

162 |
163 |
164 | __init__(embed, label_vocab, pos_idx=31, Parsing_rnn_layers=3, Parsing_arc_mlp_size=500, Parsing_label_mlp_size=100, Parsing_use_greedy_infer=False, encoding_type='bmeso', embedding_dim=768, dropout=0.1, use_pos_embedding=True, use_average=True)[源代码]
165 |

Initializes internal Module state, shared by both nn.Module and ScriptModule.

166 |
167 | 168 |
169 |
170 | forward(chars, seq_len, task_class, target, seq_len_for_wordlist=None, dep_head=None, dep_label=None, pos=None, word_lens=None)[源代码]
171 |
172 | 173 |
174 |
175 | predict(chars, seq_len, task_class, tag_seqs=None)[源代码]
176 |
177 | 178 |
179 | 180 |
181 | 182 | 183 |
184 | 185 |
186 |
187 | 188 | 189 |
190 | 191 |
192 |

193 | 194 | © 版权所有 2020, fastHan 195 | 196 |

197 |
198 | 199 | 200 | 201 | Built with Sphinx using a 202 | 203 | theme 204 | 205 | provided by Read the Docs. 206 | 207 |
208 | 209 |
210 |
211 | 212 |
213 | 214 |
215 | 216 | 217 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /fastHan/model/finetune_dataloader.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy as np 3 | 4 | from fastNLP import DataSet 5 | 6 | MAX_LEN = 300 7 | 8 | 9 | def fastHan_CWS_Loader(lines, label_to_index, tokenizer): 10 | data = { 11 | 'input_ids': [], 12 | 'attention_mask': [], 13 | 'labels': [], 14 | 'seq_len': [], 15 | } 16 | 17 | for line in lines: 18 | line = line.strip() 19 | if len(line) <= 1: 20 | continue 21 | 22 | line = line.split(' ') 23 | 24 | words = [] 25 | labels = [] 26 | for word in line: 27 | if len(word) == 0: 28 | continue 29 | words = words + list(word) 30 | if len(word) == 1: 31 | labels.append('s') 32 | else: 33 | labels = labels + ['b'] + ['m'] * (len(word) - 2) + ['e'] 34 | 35 | words = words[:MAX_LEN] 36 | labels = labels[:MAX_LEN] 37 | labels = [-100] + [label_to_index[x] for x in labels] + [-100] 38 | 39 | tokenize_result = tokenizer(words, is_split_into_words=True) 40 | 41 | if len(tokenize_result['input_ids']) != len(labels): 42 | continue 43 | 44 | data['input_ids'].append(tokenize_result['input_ids']) 45 | data['attention_mask'].append(tokenize_result['attention_mask']) 46 | data['labels'].append(labels) 47 | data['seq_len'].append(len(labels) - 2) 48 | 49 | return DataSet(data) 50 | 51 | 52 | def fastHan_POS_loader(lines, label_to_index, tokenizer): 53 | data = { 54 | 'input_ids': [], 55 | 'attention_mask': [], 56 | 'labels': [], 57 | 'seq_len': [], 58 | } 59 | 60 | words = [] 61 | labels = [] 62 | 63 | for line in lines: 64 | if line == '\n' and len(words) > 0: 65 | words = words[:MAX_LEN] 66 | labels = labels[:MAX_LEN] 67 | labels = [-100] + [label_to_index[x] for x in labels] + [-100] 68 | 69 | tokenize_result = tokenizer(words, is_split_into_words=True) 70 | if len(tokenize_result['input_ids']) == len(labels): 71 | data['input_ids'].append(tokenize_result['input_ids']) 72 | data['attention_mask'].append( 73 | tokenize_result['attention_mask']) 74 | data['labels'].append(labels) 75 | data['seq_len'].append(len(labels) - 2) 76 | words = [] 77 | labels = [] 78 | else: 79 | line = line.strip() 80 | line = line.split('\t') 81 | word = line[1] 82 | label = line[3].lower() 83 | 84 | words = words + list(word) 85 | if len(word) == 1: 86 | labels.append('s-' + label) 87 | else: 88 | labels = labels + [ 89 | 'b-' + label 90 | ] + ['m-' + label] * (len(word) - 2) + ['e-' + label] 91 | return DataSet(data) 92 | 93 | 94 | # def fastHan_CWS_guwen_Loader(): 95 | # pass 96 | 97 | 98 | def fastHan_POS_guwen_loader(lines, label_to_index, tokenizer): 99 | data = {'input_ids': [], 'attention_mask': [], 'labels': [], 'seq_len': []} 100 | 101 | for line in lines: 102 | line = line.strip() 103 | if len(line) <= 1: 104 | continue 105 | 106 | line = line.split(' ') 107 | 108 | words = [] 109 | labels = [] 110 | for word_label in line: 111 | if len(word_label) == 0: 112 | continue 113 | 114 | word, label = word_label.split('/') 115 | words = words + list(word) 116 | if len(word) == 1: 117 | labels.append('s-' + label) 118 | else: 119 | labels = labels + [ 120 | 'b-' + label 121 | ] + ['m-' + label] * (len(word) - 2) + ['e-' + label] 122 | 123 | words = words[:MAX_LEN] 124 | labels = labels[:MAX_LEN] 125 | labels = [-100] + [label_to_index[x] for x in labels] + [-100] 126 | 127 | tokenize_result = tokenizer(words, is_split_into_words=True) 128 | 129 | if len(tokenize_result['input_ids']) != len(labels): 130 | continue 131 | 132 | data['input_ids'].append(tokenize_result['input_ids']) 133 | data['attention_mask'].append(tokenize_result['attention_mask']) 134 | data['labels'].append(labels) 135 | data['seq_len'].append(len(labels) - 2) 136 | 137 | return DataSet(data) 138 | 139 | 140 | def fastHan_NER_Loader(lines, label_to_index, tokenizer): 141 | data = { 142 | 'input_ids': [], 143 | 'attention_mask': [], 144 | 'labels': [], 145 | 'seq_len': [], 146 | } 147 | 148 | words = [] 149 | labels = [] 150 | 151 | for line in lines: 152 | if line == '\n' and len(words) > 0: 153 | words = words[:MAX_LEN] 154 | labels = labels[:MAX_LEN] 155 | labels = [-100] + [label_to_index[x] for x in labels] + [-100] 156 | 157 | tokenize_result = tokenizer(words, is_split_into_words=True) 158 | if len(tokenize_result['input_ids']) == len(labels): 159 | data['input_ids'].append(tokenize_result['input_ids']) 160 | data['attention_mask'].append( 161 | tokenize_result['attention_mask']) 162 | data['labels'].append(labels) 163 | data['seq_len'].append(len(labels) - 2) 164 | words = [] 165 | labels = [] 166 | else: 167 | line = line.strip() 168 | word, label = line.split(' ') 169 | label = label.lower() 170 | 171 | words.append(word) 172 | labels.append(label) 173 | 174 | return DataSet(data) 175 | 176 | 177 | def fastHan_Parsing_Loader(lines, label_to_index, tokenizer): 178 | data = { 179 | 'input_ids': [], 180 | 'attention_mask': [], 181 | 'heads': [], 182 | 'labels': [], 183 | 'seq_len': [] 184 | } 185 | 186 | words = [] 187 | heads = [] 188 | labels = [] 189 | 190 | skip_1 = 0 191 | skip_2 = 0 192 | for line in lines: 193 | if line == '\n' and len(words) > 0: 194 | char_words = list(itertools.chain(*words)) 195 | if len(char_words) > 300: 196 | skip_1 += 1 197 | words = [] 198 | heads = [] 199 | labels = [] 200 | continue 201 | tokenize_result = tokenizer(char_words, is_split_into_words=True) 202 | if len(tokenize_result['input_ids']) - 2 != len(char_words): 203 | skip_2 += 1 204 | words = [] 205 | heads = [] 206 | labels = [] 207 | continue 208 | 209 | # 添加根结点 210 | tokenize_result['input_ids'].insert(1, 1) 211 | tokenize_result['attention_mask'].insert(1, 1) 212 | 213 | head_end_indexes = np.cumsum(list(map(len, words))).tolist() + [0] 214 | char_index = 1 215 | 216 | char_heads = [] 217 | char_labels = [] 218 | 219 | for word, head, label in zip(words, heads, labels): 220 | for _ in range(len(word) - 1): 221 | char_index += 1 222 | char_heads.append(char_index) 223 | char_labels.append('app') 224 | char_index += 1 225 | char_heads.append(head_end_indexes[head - 1]) 226 | char_labels.append(label) 227 | 228 | # 根节点的label都是-100 229 | labels = [-100] + [label_to_index[x] for x in char_labels] 230 | char_heads = [-100] + char_heads 231 | 232 | data['input_ids'].append(tokenize_result['input_ids']) 233 | data['attention_mask'].append(tokenize_result['attention_mask']) 234 | data['labels'].append(labels) 235 | data['heads'].append(char_heads) 236 | data['seq_len'].append(len(labels)) 237 | 238 | words = [] 239 | heads = [] 240 | labels = [] 241 | else: 242 | line = line.strip() 243 | line = line.split('\t') 244 | 245 | word = line[1] 246 | head = line[6].lower() 247 | label = line[7].lower() 248 | 249 | words.append(word) 250 | labels.append(label) 251 | heads.append(int(head)) 252 | return DataSet(data) -------------------------------------------------------------------------------- /fastHan/model/baseModel.py: -------------------------------------------------------------------------------- 1 | # from multitask_model_normloss2 2 | # 使用 wm来管理 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from fastNLP.modules.torch import MLP, ConditionalRandomField, allowed_transitions 8 | from torch.nn import CrossEntropyLoss 9 | from transformers import PreTrainedModel 10 | 11 | from .dependency_parsing_model import BertParser 12 | 13 | 14 | # modified from https://github.com/THUDM/P-tuning-v2 15 | class PrefixEncoder(torch.nn.Module): 16 | r''' 17 | The torch.nn model to encode the prefix 18 | Input shape: (batch-size, prefix-length) 19 | Output shape: (batch-size, prefix-length, 2*layers*hidden) 20 | ''' 21 | def __init__(self, 22 | config, 23 | num_tokens, 24 | prefix_projection, 25 | pre_seq_len, 26 | prefix_hidden_size=500): 27 | super().__init__() 28 | self.prefix_projection = prefix_projection 29 | if self.prefix_projection: 30 | # Use a two-layer MLP to encode the prefix 31 | self.embedding = torch.nn.Embedding(num_tokens, config.hidden_size) 32 | self.trans = torch.nn.Sequential( 33 | torch.nn.Linear(config.hidden_size, prefix_hidden_size), 34 | torch.nn.Tanh(), 35 | torch.nn.Linear( 36 | prefix_hidden_size, 37 | config.num_hidden_layers * 2 * config.hidden_size)) 38 | else: 39 | self.embedding = torch.nn.Embedding( 40 | num_tokens, config.num_hidden_layers * 2 * config.hidden_size) 41 | 42 | def forward(self, prefix: torch.Tensor): 43 | if self.prefix_projection: 44 | prefix_tokens = self.embedding(prefix) 45 | past_key_values = self.trans(prefix_tokens) 46 | else: 47 | past_key_values = self.embedding(prefix) 48 | return past_key_values 49 | 50 | 51 | class MultiTaskModel(PreTrainedModel): 52 | def __init__(self, 53 | encoder, 54 | task_label_map, 55 | all_tasks, 56 | ensembledWeightManager, 57 | prefix_projection=False, 58 | pre_seq_len=6, 59 | biaffine_task='Parsing-ctb9'): 60 | super().__init__(encoder.config) 61 | 62 | self.all_tasks = all_tasks 63 | self.task_label_map = task_label_map 64 | self.ensembledWeightManager = ensembledWeightManager 65 | # sequence_labeling 66 | self.biaffine_task = biaffine_task 67 | self.seq_label_classifier = nn.ModuleDict() 68 | self.crf = nn.ModuleDict() 69 | 70 | for task in task_label_map: 71 | if task == self.biaffine_task: 72 | continue 73 | 74 | label_num = len(task_label_map[task]) 75 | labels = {i: task_label_map[task][i] for i in range(label_num)} 76 | 77 | self.seq_label_classifier[task] = MLP( 78 | [encoder.config.hidden_size, 512, label_num]) 79 | self.crf[task] = ConditionalRandomField( 80 | num_tags=label_num, 81 | allowed_transitions=allowed_transitions(labels)) 82 | self.crf[task].trans_m.data *= 0 83 | 84 | self.parser = BertParser( 85 | num_label=len(task_label_map[self.biaffine_task]), 86 | embed_size=encoder.config.hidden_size, 87 | app_index=task_label_map[self.biaffine_task].index('app')) 88 | self.dropout = nn.Dropout(encoder.config.hidden_dropout_prob) 89 | self.encoder = encoder 90 | 91 | # prefix tuning 92 | self.pre_seq_len = pre_seq_len 93 | self.build_prefix_map() 94 | self.prefix_encoder = PrefixEncoder( 95 | config=encoder.config, 96 | prefix_projection=prefix_projection, 97 | num_tokens=self.num_tokens, 98 | pre_seq_len=self.pre_seq_len) 99 | 100 | def build_prefix_map(self): 101 | length = self.pre_seq_len // 2 102 | idx = 0 103 | prefix_map = dict() 104 | macro_map = dict() 105 | for task in self.all_tasks: 106 | macro_task, _ = task.split('-') 107 | if macro_task not in macro_map: 108 | macro_map[macro_task] = [idx + i for i in range(length)] 109 | idx += length 110 | 111 | for task in self.all_tasks: 112 | macro_task, _ = task.split('-') 113 | prefix_map[task] = torch.LongTensor( 114 | macro_map[macro_task] + [idx + i for i in range(length)]) 115 | idx += length 116 | self.prefix_map = prefix_map 117 | self.num_tokens = idx 118 | 119 | def get_prompt(self, task, batch_size): 120 | prefix_tokens = self.prefix_map[task] 121 | prefix_tokens = prefix_tokens.unsqueeze(0).expand(batch_size, -1).to( 122 | self.encoder.device) 123 | past_key_values = self.prefix_encoder(prefix_tokens) 124 | 125 | past_key_values = past_key_values.view( 126 | batch_size, self.pre_seq_len, 127 | self.encoder.config.num_hidden_layers * 2, 128 | self.encoder.config.num_attention_heads, 129 | self.encoder.config.hidden_size // 130 | self.encoder.config.num_attention_heads) 131 | past_key_values = self.dropout(past_key_values) 132 | past_key_values = past_key_values.permute([2, 0, 3, 1, 4]).split(2) 133 | return past_key_values 134 | 135 | def __get_ud_diff(self, feats, tag_seqs, user_dict_weight): 136 | diff = torch.max(feats, dim=2)[0] - torch.mean(feats, dim=2) 137 | diff = diff.unsqueeze(dim=-1) 138 | diff = diff.expand(-1, -1, tag_seqs.size()[-1]) 139 | diff = tag_seqs * diff * user_dict_weight 140 | return diff 141 | 142 | def forward( 143 | self, 144 | input_ids=None, 145 | attention_mask=None, 146 | task=None, 147 | labels=None, 148 | heads=None, 149 | tag_seqs=None, 150 | user_dict_weight=0.05, 151 | ): 152 | r""" 153 | labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): 154 | Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels - 155 | 1]``. 156 | """ 157 | 158 | task = task.tolist()[0] 159 | task = self.all_tasks[task] 160 | 161 | batch_size = input_ids.shape[0] 162 | past_key_values = self.get_prompt(task=task, batch_size=batch_size) 163 | prefix_attention_mask = torch.ones(batch_size, self.pre_seq_len).to( 164 | self.encoder.device) 165 | prefix_attention_mask = torch.cat( 166 | (prefix_attention_mask, attention_mask), dim=1) 167 | 168 | outputs = self.encoder(input_ids, 169 | attention_mask=prefix_attention_mask, 170 | past_key_values=past_key_values) 171 | 172 | feats = outputs[0] 173 | feats = self.dropout(feats) 174 | 175 | # 生成不考虑cls和sep的mask 176 | #if task==self.biaffine_task: 177 | seq_len_diff = 2 178 | #else: 179 | # seq_len_diff=1 180 | seq_len = attention_mask.sum(dim=-1) - seq_len_diff 181 | broad_cast_seq_len = torch.arange(attention_mask.shape[1] - 182 | seq_len_diff).expand( 183 | attention_mask.shape[0], 184 | -1).to(seq_len.device) 185 | mask = broad_cast_seq_len < seq_len.unsqueeze(1) 186 | 187 | # dependency parsing 188 | # 需要去掉cls和sep的影响 189 | if task == self.biaffine_task: 190 | feats = feats[:, 1:-1] 191 | output = self.parser.forward(feats=feats, 192 | mask=mask, 193 | gold_heads=heads, 194 | char_labels=labels) 195 | # 其他序列标注任务 196 | else: 197 | logits = self.seq_label_classifier[task](feats) 198 | if self.training: 199 | loss_fct = CrossEntropyLoss() 200 | loss = loss_fct( 201 | logits.view(-1, len(self.task_label_map[task])), 202 | labels.view(-1)) 203 | output = { 204 | 'loss': loss, 205 | 'logits': logits, 206 | } 207 | else: 208 | # 预测阶段,利用crf模块中集成的维特比解码来预测 209 | probs = logits[:, 1:-1] 210 | if tag_seqs is not None: 211 | diff = self.__get_ud_diff(probs, tag_seqs, user_dict_weight) 212 | probs = probs + diff 213 | paths, scores = self.crf[task].viterbi_decode(logits=probs, 214 | mask=mask) 215 | paths[mask == 0] = -100 216 | output = { 217 | 'pred': paths, 218 | 'logits': logits, 219 | } 220 | 221 | if self.training: 222 | if task == self.biaffine_task: 223 | loss_weight = output['label_loss'] 224 | else: 225 | loss_weight = output['loss'] 226 | self.ensembledWeightManager.update(task=task, 227 | loss=float(loss_weight)) 228 | weight = self.ensembledWeightManager.get(task) 229 | output['loss'] = output['loss'] * weight 230 | 231 | return output -------------------------------------------------------------------------------- /docs/build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({docnames:["fastHan","fastHan.FastModel","fastHan.model","fastHan.model.BertCharParser","fastHan.model.CharParser","fastHan.model.UserDict","fastHan.model.bert","fastHan.model.bert_encoder_theseus","fastHan.model.model","fastHan.model.old_fastNLP_bert","fastHan.model.utils","index","user/example"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.todo":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["fastHan.rst","fastHan.FastModel.rst","fastHan.model.rst","fastHan.model.BertCharParser.rst","fastHan.model.CharParser.rst","fastHan.model.UserDict.rst","fastHan.model.bert.rst","fastHan.model.bert_encoder_theseus.rst","fastHan.model.model.rst","fastHan.model.old_fastNLP_bert.rst","fastHan.model.utils.rst","index.rst","user/example.rst"],objects:{"":{fastHan:[0,0,0,"-"]},"fastHan.FastHan":{__init__:[0,2,1,""],add_user_dict:[0,2,1,""],remove_user_dict:[0,2,1,""],set_cws_style:[0,2,1,""],set_device:[0,2,1,""],set_user_dict_weight:[0,2,1,""]},"fastHan.FastModel":{FastHan:[1,1,1,""],Sentence:[1,1,1,""],Token:[1,1,1,""]},"fastHan.FastModel.FastHan":{__init__:[1,2,1,""],add_user_dict:[1,2,1,""],remove_user_dict:[1,2,1,""],set_cws_style:[1,2,1,""],set_device:[1,2,1,""],set_user_dict_weight:[1,2,1,""]},"fastHan.FastModel.Sentence":{__init__:[1,2,1,""]},"fastHan.FastModel.Token":{__init__:[1,2,1,""]},"fastHan.Sentence":{__init__:[0,2,1,""]},"fastHan.Token":{__init__:[0,2,1,""]},"fastHan.model":{BertCharParser:[3,0,0,"-"],CharParser:[4,0,0,"-"],UserDict:[5,0,0,"-"],bert:[6,0,0,"-"],bert_encoder_theseus:[7,0,0,"-"],model:[8,0,0,"-"],old_fastNLP_bert:[9,0,0,"-"],utils:[10,0,0,"-"]},"fastHan.model.BertCharParser":{BertCharParser:[3,1,1,""],CharBiaffineParser:[3,1,1,""],drop_input_independent:[3,4,1,""]},"fastHan.model.BertCharParser.BertCharParser":{__init__:[3,2,1,""],forward:[3,2,1,""],predict:[3,2,1,""]},"fastHan.model.BertCharParser.CharBiaffineParser":{__init__:[3,2,1,""],forward:[3,2,1,""],loss:[3,3,1,""],reset_parameters:[3,2,1,""]},"fastHan.model.CharParser":{CharBiaffineParser:[4,1,1,""],CharParser:[4,1,1,""],drop_input_independent:[4,4,1,""]},"fastHan.model.CharParser.CharBiaffineParser":{__init__:[4,2,1,""],forward:[4,2,1,""],loss:[4,3,1,""],predict:[4,2,1,""],reset_parameters:[4,2,1,""]},"fastHan.model.CharParser.CharParser":{__init__:[4,2,1,""],forward:[4,2,1,""],predict:[4,2,1,""]},"fastHan.model.UserDict":{Trie:[5,1,1,""],UserDict:[5,1,1,""]},"fastHan.model.UserDict.Trie":{__init__:[5,2,1,""],insert:[5,2,1,""],search:[5,2,1,""],startsWith:[5,2,1,""]},"fastHan.model.UserDict.UserDict":{__init__:[5,2,1,""],load_file:[5,2,1,""],load_list:[5,2,1,""],process_sentence:[5,2,1,""]},"fastHan.model.bert":{BertEmbedding:[6,1,1,""],BertWordPieceEncoder:[6,1,1,""]},"fastHan.model.bert.BertEmbedding":{__init__:[6,2,1,""],drop_word:[6,2,1,""],forward:[6,2,1,""]},"fastHan.model.bert.BertWordPieceEncoder":{__init__:[6,2,1,""],drop_word:[6,2,1,""],embed_size:[6,5,1,""],embedding_dim:[6,5,1,""],forward:[6,2,1,""],index_datasets:[6,2,1,""],num_embedding:[6,5,1,""]},"fastHan.model.bert_encoder_theseus":{BertModel:[7,1,1,""]},"fastHan.model.bert_encoder_theseus.BertModel":{__init__:[7,2,1,""],forward:[7,2,1,""],from_pretrained:[7,6,1,""],init_bert_weights:[7,2,1,""]},"fastHan.model.model":{CharModel:[8,1,1,""]},"fastHan.model.model.CharModel":{__init__:[8,2,1,""],forward:[8,2,1,""],predict:[8,2,1,""]},"fastHan.model.old_fastNLP_bert":{BertModel:[9,1,1,""]},"fastHan.model.old_fastNLP_bert.BertModel":{__init__:[9,2,1,""],forward:[9,2,1,""],from_pretrained:[9,6,1,""],init_bert_weights:[9,2,1,""]},"fastHan.model.utils":{check_dataloader_paths:[10,4,1,""],get_tokenizer:[10,4,1,""]},fastHan:{FastHan:[0,1,1,""],FastModel:[1,0,0,"-"],Sentence:[0,1,1,""],Token:[0,1,1,""],model:[2,0,0,"-"]}},objnames:{"0":["py","module","Python \u6a21\u5757"],"1":["py","class","Python \u7c7b"],"2":["py","method","Python \u65b9\u6cd5"],"3":["py","staticmethod","Python \u9759\u6001\u65b9\u6cd5"],"4":["py","function","Python \u51fd\u6570"],"5":["py","attribute","Python \u5c5e\u6027"],"6":["py","classmethod","Python \u7c7b\u65b9\u6cd5"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:staticmethod","4":"py:function","5":"py:attribute","6":"py:classmethod"},terms:{"02":[7,9],"05":[0,1],"10":[0,1,6],"100":[3,4,8],"12":[6,7,9],"2304":6,"30522":[7,9],"3072":[7,9],"31":8,"400":4,"430":6,"500":[3,4,8],"510":6,"512":[6,7,9],"768":[6,7,8,9],"800":4,"char":[3,4,8],"class":[0,1,3,4,5,6,7,8,9],"default":[3,4],"float":[0,1,6,7,9,12],"for":[0,1,5,6],"if":[3,4,5],"import":6,"in":[5,6],"int":[0,1,7,9],"long":12,"return":[3,4,5],"static":[3,4],"true":[6,7,8,9,12],"var":[3,4],"void":5,"with":[5,12],__init__:[0,1,3,4,5,6,7,8,9],accur:[0,1,5],add_cls_sep:6,add_user_dict:[0,1],add_word_lst:6,an:12,and:[3,4,7,8,9],ani:5,answer_list:[0,1],app_index:4,appli:12,arc_mlp_siz:[3,4],arc_pr:[3,4],arc_tru:[3,4],as:[0,1],attent:[7,9],attention_mask:[7,9],attention_probs_dropout_prob:[7,9],auto_trunc:6,avg:6,base:[0,1,6,7,9],batch:12,batch_siz:[3,4,6],bert:[0,1,2,7,9],bert_encoder_theseu:[0,2],bertcharpars:[0,2],bertembed:6,bertmodel:[7,9],bertpool:6,bertwordpieceencod:6,beta:12,biaffine_pars:[3,4],biaffinepars:[3,4],bidirect:[7,9],bigram:4,bigram_vocab_s:4,bin:6,bmeso:8,bodi:12,bool:[5,6],both:[3,4,7,8,9],by:[3,4,7,8,9],cell:12,char_head:[3,4],char_label:[3,4],char_vocab_s:4,charbiaffinepars:[3,4],charmodel:[0,1,8],charpars:[0,2],check_dataloader_path:10,cityu:[0,1],classmethod:[7,9],cls:6,cnc:[0,1],column:12,com:[7,9],comput:[3,4],concat:6,config:[7,9],contain:12,contextual_embed:6,contextualembed:6,core:6,corpu:[0,1],cpu:[0,1],ctb:[0,1],cuda:[0,1],cws:[0,1],data:5,dataload:10,dataset:[6,12],delta:12,dep_head:8,dep_label:8,depend:[0,1],dev:[0,1,10],devic:[0,1],device_id:[0,1],dic:[0,1],dict:[3,4,10],doc:12,docutil:12,drop_input_independ:[3,4],drop_word:6,dropout:[3,4,6,7,8,9],dropout_emb:[3,4],element:12,emb:[3,4,6,8],emb_dim:4,embed:[3,4,6,7,9],embed_s:6,embedding_dim:[3,4,6,8],emphas:12,emphasi:12,en:6,encod:[3,4,5,6],encoding_typ:8,english:[7,9],exampl:6,f_:12,f_beta:12,fals:[3,4,5,6,8,12],fastmodel:0,fastnlp:[3,4,6,12],feat:3,ffn:[7,9],field_nam:6,file:12,first:[6,12],floattensor:6,forward:[3,4,6,7,8,9],frac:12,from:[6,7,9],from_pretrain:[7,9],gelu:[7,9],get_token:10,github:[7,9],given:5,gold_head:[3,4],good:6,gpu:[0,1],gradient:6,h_2:12,h_2o:12,head:[0,1,3,4],head_label:[0,1],head_pr:[3,4],header:12,help:[0,1,5],here:5,hidden_act:[7,9],hidden_dropout_prob:[7,9],hidden_s:[7,9],html:12,http:[7,9,12],huggingfac:[7,9],include_cls_sep:6,index:10,index_dataset:6,infer:[3,4],init_bert_weight:[7,9],initi:[0,1,3,4,5,7,8,9],initializer_rang:[7,9],inlin:12,input:[6,7,9,12],input_id:[7,9],insert:5,instanc:12,intermediate_s:[7,9],intern:[3,4,7,8,9],into:5,is:[3,4,5,6,12],json:6,kei:10,kwarg:[7,9],label_mlp_s:[3,4],label_pr:[3,4],label_tru:[3,4],label_vocab:8,larg:[0,1],last:6,layer:6,layer_num:[6,7],layers_cut:9,len:[6,12],line:12,lineno:12,list:[0,1,6],liter:12,load_fil:5,load_list:5,longtensor:6,loss:[3,4],lstm:[3,4],mai:12,markdown:12,mask:[3,4],math:12,max:6,max_len:[3,4,6],max_position_embed:[7,9],mlp:[3,4],model:0,model_dir_or_nam:[6,7,9],model_typ:[0,1],modul:[0,2],msr:[0,1],mst:[3,4],n_tag:[3,4],na:12,naoh:12,ner:[0,1],net:12,ngram:[3,4],ngram_per_cha:4,ngram_per_char:[3,4],nn:[3,4,6,7,8,9],none:[0,1,3,4,6,7,8,9],not:[3,4],num_attention_head:[7,9],num_embed:[3,4,6],num_hidden_lay:[7,9],num_label:[3,4],object:[0,1,5],of:[3,4],old_fastnlp_bert:[0,2],option:12,or:12,output:[6,12],output_all_encoded_lay:[7,9],pad:6,param:[0,1,3,4,12],pars:[0,1,3,4],parsing_arc_mlp_s:8,parsing_label_mlp_s:8,parsing_rnn_lay:8,parsing_use_greedy_inf:8,part:[3,4],path:[5,10],piec:6,pku:[0,1],pool_method:6,pooled_cl:6,pos:[0,1,8],pos_emb_dim:[3,4],pos_idx:8,pos_vocab_s:[3,4],pre:12,pre_bigram:4,pre_bigrams_emb:4,pre_char:[3,4],pre_chars_emb:4,pre_ngram:[3,4],pre_trigram:4,pre_trigrams_emb:4,predict:[3,4,8],prefix:5,pretrain:[7,9],print:12,process_sent:5,provid:[3,4],pytorch:[7,9],quickref:12,rais:10,rec:12,regular:6,remove_user_dict:[0,1],represent:[7,9],requires_grad:6,reset_paramet:[3,4],result:[3,4],revers:5,rnn:[3,4],rnn_hidden_s:[3,4],rnn_layer:[3,4],root:[0,1,3,4],row:12,rst:12,rtype:5,same:12,scriptmodul:[3,4,7,8,9],search:5,second:12,see:[0,1,5],segment:[7,9],self:[0,1,5,6],sentenc:[0,1,5,12],sep:6,seq_len:[3,4,8],seq_len_for_wordlist:8,set:[0,1],set_cws_styl:[0,1],set_devic:[0,1],set_user_dict_weight:[0,1],sf:12,share:[3,4,7,8,9],signatur:[0,1,5],size:6,sohu:12,some:10,sourceforg:12,span:12,speech:[3,4],split:6,stackembed:6,start:5,startswith:5,state:[3,4,7,8,9],str:[0,1,5,6,7,9,10],strong:12,structur:5,style:[0,1],sxu:[0,1],tabl:12,tag:6,tag_seq:8,target:[0,1,8,12],task_class:8,test:[0,1,10],text:12,that:5,the:[3,4,5,6,7,9,12],there:5,thi:12,third:12,to:10,to_index:6,token:[0,1,6],token_type_id:[6,7,9],torch:[0,1,3,4,6,7,8,9],train:10,transform:[3,4,7,9],trie:5,trigram:4,trigram_vocab_s:4,tupl:[3,4],txt:[6,10],type:[0,1,5],type_vocab_s:[7,9],udc:[0,1],uncas:[6,7,9],undocu:[7,9],union:10,unk:6,unknown_index:6,uparrow:12,use_averag:8,use_greedy_inf:[3,4],use_pos_embed:8,user:12,userdict:[0,2],utf:5,util:[0,2],valu:[3,4,6,10,12],vector_s:3,vocab:6,vocab_s:[7,9],vocabulari:6,weight:[0,1,7,9],whether:6,word:[0,1,5,6],word_dropout:6,word_embed:[3,4],word_len:8,word_list:5,word_piec:6,wtb:[0,1],xxx:10,your:5,zx:[0,1]},titles:["fastHan package","fastHan.FastModel module","fastHan.model package","fastHan.model.BertCharParser module","fastHan.model.CharParser module","fastHan.model.UserDict module","fastHan.model.bert module","fastHan.model.bert_encoder_theseus module","fastHan.model.model module","fastHan.model.old_fastNLP_bert module","fastHan.model.utils module","demo \u4e2d\u6587\u6587\u6863","\u5927\u6807\u9898"],titleterms:{api:11,bert:6,bert_encoder_theseu:7,bertcharpars:3,charpars:4,csv:12,demo:11,fasthan:[0,1,2,3,4,5,6,7,8,9,10],fastmodel:1,model:[2,3,4,5,6,7,8,9,10],modul:[1,3,4,5,6,7,8,9,10],old_fastnlp_bert:9,packag:[0,2],submodul:[0,2],subpackag:0,userdict:5,util:10}}) -------------------------------------------------------------------------------- /fastHan/model/camr_restore.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def restore_camr(line, id_token_list): 5 | # 预处理内容 6 | line = re.sub('(', '(', line) 7 | line = re.sub(')', ')', line) 8 | line = re.sub(':', ':', line) 9 | time_list = re.findall('\d+\s*\:\s*\d+', line) 10 | line = re.sub('"', '', line) 11 | for s in time_list: 12 | ss = re.sub(':', ':', s) 13 | line = re.sub(s, ss, line) 14 | 15 | idx = 0 16 | amr_list = [] 17 | 18 | new_mark = len(id_token_list) + 2 19 | node_dict = {} 20 | node_name_list = [] 21 | 22 | def convert_node_value(node_value, is_bracket1=False): 23 | nonlocal id_token_list, node_name_list 24 | if not node_value: 25 | return node_value 26 | if '^' not in node_value: 27 | node_name = search_mark(node_value.split('-')[0]) 28 | if not is_bracket1: 29 | node_name_list.append(node_name) 30 | return node_name + '/' + node_value 31 | else: 32 | node_value1, node_value2 = node_value.split('^')[0:2] 33 | node_name1 = search_mark(node_value1.split('-')[0]) 34 | if int(node_name1[1:]) > len(id_token_list): 35 | node_name1 = 'x1' 36 | node_name2 = search_mark(node_value2.split('-')[0]) 37 | if not node_name2 in node_name_list: 38 | node_name2 = node_name_list[0] 39 | return node_name1 + '/' + node_name2 40 | 41 | def search_mark(node_value): 42 | nonlocal id_token_list, node_dict, new_mark 43 | node_name = node_dict.get(node_value, 0) 44 | if node_name > len(id_token_list): 45 | node_name = new_mark 46 | new_mark += 1 47 | node_dict[node_value] = node_name 48 | return 'x' + str(node_name) 49 | 50 | i = node_name + 1 51 | while i <= len(id_token_list): 52 | if id_token_list[i] == node_value: 53 | break 54 | i += 1 55 | if i > len(id_token_list): 56 | j = 1 57 | while j <= node_name: 58 | if id_token_list[j] == node_value: 59 | break 60 | j += 1 61 | if node_name >= 1 and j <= node_name: 62 | node_name = j 63 | else: 64 | node_name = i 65 | 66 | if node_name <= 0: 67 | node_name = new_mark 68 | new_mark += 1 69 | 70 | node_dict[node_value] = node_name 71 | return 'x' + str(node_name) 72 | 73 | # 递归处理部分 74 | def bracket1(): 75 | nonlocal idx 76 | nonlocal amr_list 77 | cur_charseq = [] 78 | cur_idx = idx 79 | has_content = False 80 | while True: 81 | if cur_idx >= len(line): 82 | break 83 | if line[cur_idx] == '(' and not has_content: 84 | has_content = True 85 | if has_content: 86 | if line[cur_idx] == ':': 87 | cur_idx = idx 88 | cur_charseq = [] 89 | break 90 | if line[cur_idx] == '(' and cur_charseq: 91 | break 92 | cur_charseq.append(line[cur_idx]) 93 | if cur_charseq[-1] == ')': 94 | has_content = False 95 | cur_idx += 1 96 | break 97 | cur_idx += 1 98 | 99 | idx = cur_idx 100 | if not cur_charseq: 101 | amr_list.append('(') 102 | amr_list.append(')') 103 | cur_node_value = '' 104 | elif cur_charseq[-1] != ')': 105 | cur_node_value = get_seq_value(cur_charseq[1:]) 106 | amr_list.append('(') 107 | amr_list.append(convert_node_value(cur_node_value, True)) 108 | amr_list.append(')') 109 | else: 110 | cur_node_value = get_seq_value(cur_charseq[1:-1]) 111 | amr_list.append('(') 112 | amr_list.append(convert_node_value(cur_node_value, True)) 113 | amr_list.append(')') 114 | pass 115 | 116 | def bracket2(): 117 | nonlocal idx 118 | nonlocal amr_list 119 | nonlocal id_token_list 120 | cur_charseq = [] 121 | cur_idx = idx 122 | 123 | # 寻找左括号 124 | while cur_idx < len(line): 125 | if line[cur_idx] == '(': 126 | break 127 | cur_idx += 1 128 | if cur_idx < len(line): 129 | amr_list.append('(') 130 | else: 131 | amr_list.append('(') 132 | amr_list.append(convert_node_value('-')) 133 | amr_list.append(')') 134 | idx = cur_idx 135 | return 136 | 137 | # 寻找变量名 138 | cur_idx += 1 139 | while cur_idx < len(line): 140 | if line[cur_idx] == ')' or line[cur_idx] == ':': 141 | break 142 | if line[cur_idx] == '(': 143 | break 144 | cur_charseq.append(line[cur_idx]) 145 | cur_idx += 1 146 | cur_node_value = get_seq_value(cur_charseq) 147 | if not cur_node_value: 148 | cur_node_value = '-' 149 | amr_list.append(convert_node_value(cur_node_value)) 150 | cur_charseq.clear() 151 | 152 | if cur_node_value == 'name': 153 | name_idx = len(amr_list) - 1 154 | n_value_list = [] 155 | while cur_idx < len(line) and line[cur_idx] != ')': 156 | cur_charseq.append(line[cur_idx]) 157 | cur_idx += 1 158 | cur_charseq.append(':') 159 | l = r = 0 160 | r_value = '' 161 | for i, ch in enumerate(cur_charseq): 162 | if ch == ':': 163 | l = i 164 | n_value = get_seq_value(cur_charseq[r:l]) 165 | if not n_value: 166 | n_value = '-' 167 | if r_value: 168 | amr_list.append(' ') 169 | amr_list.append(r_value) 170 | amr_list.append(' ') 171 | n_value = convert_node_value(n_value, True) 172 | if n_value.split('/')[-1].startswith('x'): 173 | n_value = n_value.split( 174 | '/')[0] + '/' + id_token_list[int( 175 | n_value.split('/')[0][1:])] 176 | amr_list.append(n_value) 177 | n_value_list.append(n_value.split('/')[0]) 178 | n_value = r_value = "" 179 | elif ch.isspace() and i > 0 and cur_charseq[i - 1].isdigit(): 180 | r = i 181 | r_value = get_seq_value(cur_charseq[l:r]) 182 | if not r_value: 183 | r_value = 'op1' 184 | r_value = ':' + r_value 185 | cur_idx += 1 186 | amr_list.append(')') 187 | idx = cur_idx 188 | if n_value_list: 189 | amr_list[name_idx] = '_'.join(n_value_list) + '/' + 'name' 190 | return 191 | 192 | # 寻找关系名 193 | def relation(): 194 | nonlocal cur_idx, idx 195 | cur_relationseq = [] 196 | relation_content = False 197 | while cur_idx < len(line): 198 | if line[cur_idx] == ')': 199 | return relation_content 200 | if line[cur_idx] == ':': 201 | break 202 | cur_idx += 1 203 | 204 | if cur_idx >= len(line): 205 | return relation_content 206 | 207 | while cur_idx < len(line): 208 | if line[cur_idx] == ':': 209 | relation_content = True 210 | cur_relationseq = [] 211 | if relation_content: 212 | if line[cur_idx] == '(': 213 | cur_relation_value = get_seq_value(cur_relationseq) 214 | amr_list.append(' ') 215 | amr_list.append(':' + cur_relation_value) 216 | idx = cur_idx 217 | bracket1() 218 | bracket2() 219 | cur_idx = idx 220 | break 221 | elif line[cur_idx] == ')': 222 | cur_relation_value = get_seq_value(cur_relationseq) 223 | amr_list.append(' ') 224 | amr_list.append(':' + cur_relation_value) 225 | amr_list.append('()(') 226 | amr_list.append(convert_node_value('-')) 227 | amr_list.append(')') 228 | cur_idx += 1 229 | break 230 | cur_relationseq.append(line[cur_idx]) 231 | cur_idx += 1 232 | idx = cur_idx 233 | return relation_content 234 | 235 | while True: 236 | if not relation(): 237 | break 238 | 239 | while cur_idx < len(line): 240 | if line[cur_idx] == ')': 241 | break 242 | cur_idx += 1 243 | amr_list.append(')') 244 | cur_idx = cur_idx + 1 245 | idx = cur_idx 246 | return 247 | 248 | bracket2() 249 | return amr_list 250 | 251 | 252 | def get_seq_value(cur_charseq): 253 | for idx, ch in enumerate(cur_charseq): 254 | if ch.isspace(): 255 | cur_charseq[idx] = '' 256 | s = ''.join(cur_charseq) 257 | s = re.sub(':', '', s) 258 | s = re.sub('\(', '', s) 259 | s = re.sub('\)', '', s) 260 | return s 261 | 262 | 263 | def convert_camr_to_lines(amr): 264 | amr_list = amr.split(':') 265 | for i, line in enumerate(amr_list): 266 | if i == 0: 267 | continue 268 | amr_list[i] = ':' + line 269 | 270 | num = 0 271 | for i, line in enumerate(amr_list): 272 | if '(' not in line: 273 | continue 274 | amr_list[i] = '\t' * num + line 275 | num = num + line.count('(') - line.count(')') 276 | 277 | for i, line in enumerate(amr_list): 278 | if i == 0: 279 | continue 280 | if '(' in line: 281 | amr_list[i] = '\n' + line 282 | 283 | return ''.join(amr_list) + '\n' -------------------------------------------------------------------------------- /docs/build/html/fastHan.model.UserDict.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | fastHan.model.UserDict module — fastHan 0.5.0 文档 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 99 | 100 |
101 | 102 | 103 | 109 | 110 | 111 |
112 | 113 |
114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 |
132 | 133 |
    134 | 135 |
  • »
  • 136 | 137 |
  • fastHan.model.UserDict module
  • 138 | 139 | 140 |
  • 141 | 142 | 143 | View page source 144 | 145 | 146 |
  • 147 | 148 |
149 | 150 | 151 |
152 |
153 |
154 |
155 | 156 |
157 |

fastHan.model.UserDict module

158 |
159 |
160 | class fastHan.model.UserDict.Trie[源代码]
161 |

基类:object

162 |
163 |
164 | __init__()[源代码]
165 |

Initialize your data structure here.

166 |
167 | 168 |
169 |
170 | insert(word)[源代码]
171 |

Inserts a word into the trie. 172 | :type word: str 173 | :rtype: void

174 |
175 | 176 |
177 |
178 | search(word)[源代码]
179 |

Returns if the word is in the trie. 180 | :type word: str 181 | :rtype: bool

182 |
183 | 184 |
185 |
186 | startsWith(prefix)[源代码]
187 |

Returns if there is any word in the trie that starts with the given prefix. 188 | :type prefix: str 189 | :rtype: bool

190 |
191 | 192 |
193 | 194 |
195 |
196 | class fastHan.model.UserDict.UserDict[源代码]
197 |

基类:object

198 |
199 |
200 | __init__()[源代码]
201 |

Initialize self. See help(type(self)) for accurate signature.

202 |
203 | 204 |
205 |
206 | load_file(path, encoding='UTF-8')[源代码]
207 |
208 | 209 |
210 |
211 | load_list(word_list)[源代码]
212 |
213 | 214 |
215 |
216 | process_sentence(sentence, reverse=False)[源代码]
217 |
218 | 219 |
220 | 221 |
222 | 223 | 224 |
225 | 226 |
227 |
228 | 229 | 230 |
231 | 232 |
233 |

234 | 235 | © 版权所有 2020, fastHan 236 | 237 |

238 |
239 | 240 | 241 | 242 | Built with Sphinx using a 243 | 244 | theme 245 | 246 | provided by Read the Docs. 247 | 248 |
249 | 250 |
251 |
252 | 253 |
254 | 255 |
256 | 257 | 258 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | --------------------------------------------------------------------------------