├── .github
    ├── ISSUE_TEMPLATE.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .gitmodules
├── CHANGES.rst
├── CMakeLists.txt
├── MANIFEST.in
├── README.md
├── doc
    ├── Makefile
    ├── api.rst
    ├── changelog.rst
    ├── conf.py
    ├── index.rst
    ├── install.rst
    ├── intro.rst
    ├── make.bat
    └── start.rst
├── example
    ├── example.py
    └── lexicon.txt
├── pyproject.toml
├── setup.py
├── src
    └── pyltp.cpp
└── tests
    └── basic_test.py


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- 中文模板起始：If you speak English, please remove the Chinese templates -->
 2 | 
 3 | 在提问之前，请确认以下几点:
 4 | - [ ] 如果您对算法或C++实现有问题，请在https://github.com/HIT-SCIR/ltp/issues提问
 5 | - [ ] 由于您的问题可能与前任问题重复，在提交issue前，请您确认您已经搜索过之前的问题
 6 | 
 7 | ## 问题*类型*
 8 | <!-- 例如：构建失败、内存错误、异常终止等 -->
 9 | 
10 | ## 出错*场景*
11 | <!-- 例如：分析句子“xxx”时出错，运行4小时后出错，能否复现 -->
12 | 
13 | ## 代码片段
14 | 
15 | ## 如何复现这一错误
16 | <!-- Please be specific as possible. Use dashes (-) or numbers (1.) to create a list of steps -->
17 | 
18 | ## 运行环境
19 | <!-- 操作系统, python版本等。 -->
20 | 
21 | ## 期望结果
22 | <!-- What should have happened? -->
23 | 
24 | ## 其他
25 | 
26 | <!-- 中文模板结束, end of Chinese template -->
27 | 
28 | <!-- start of English template: 如果您用中文提问，请删除英文模板 -->
29 | 
30 | Please ensure your issue adheres to the following guidelines:
31 | - [ ] If there is an algorithm or native (c++) problem. Go to https://github.com/HIT-SCIR/ltp/issues
32 | - [ ] Search previous issues before making a new one, as yours may be a duplicate.
33 | 
34 | ## *What* is affected by this bug?
35 | <!-- Eg. building failed, memory leak, program terminated. -->
36 | 
37 | ## *When* does this occur?
38 | <!-- Eg. when analyze the sentence "xxx", when the program run for about 4 hours. (Does it possibly occur or occur every time?) -->
39 | 
40 | ## *Where* on the code does it happen?
41 | <!-- Eg. when i call the api xxx and then call xxx the program will crash. (show the process code if needed.) -->
42 | 
43 | ## *How* do we replicate the issue?
44 | <!-- Please be specific as possible. Use dashes (-) or numbers (1.) to create a list of steps -->
45 | 
46 | ## Your environment information
47 | <!-- OS, languages, IDE and it's version, and other related tools, environment variables, the way you insert the code to your project. -->
48 | 
49 | ## Expected behavior (i.e. solution)
50 | <!-- What should have happened? -->
51 | 
52 | ## Other Comments
53 | 
54 | <!-- end of English template, 英文模板结束 -->
55 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--- Provide a general summary of your changes in the Title above -->
 2 | 
 3 | ## Description Changes
 4 | <!-- Describe your changes in detail -->
 5 | 
 6 | ## Motivation and Context
 7 | <!--- Why is this change required? What problem does it solve? -->
 8 | <!--- If it fixes an open issue, please link to the issue here. -->
 9 | 
10 | ## How Has This Been Tested?
11 | <!--- Please describe in detail how you tested your changes. -->
12 | <!--- Include details of your testing environment, tests ran to see how -->
13 | <!--- your change affects other areas of the code, etc. -->
14 | 
15 | ## Screenshots (if appropriate):
16 | 
17 | ## Types of changes
18 | <!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
19 | - [ ] Bug fix (non-breaking change which fixes an issue)
20 | - [ ] New feature (non-breaking change which adds functionality)
21 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
22 | 
23 | ## Checklist:
24 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
25 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
26 | - [ ] My code follows the code style of this project.
27 | - [ ] My change requires a change to the documentation.
28 | - [ ] I have updated the documentation accordingly.
29 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: Wheels
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   pull_request:
  6 |   push:
  7 |     branches:
  8 |       - master
  9 |   release:
 10 |     types:
 11 |       - published
 12 | 
 13 | jobs:
 14 |   build_sdist:
 15 |     name: Build SDist
 16 |     runs-on: ubuntu-latest
 17 |     steps:
 18 |       - uses: actions/checkout@v3
 19 |         with:
 20 |           submodules: true
 21 | 
 22 |       - name: Build SDist
 23 |         run: pipx run build --sdist
 24 | 
 25 |       - name: Check metadata
 26 |         run: pipx run twine check dist/*
 27 | 
 28 |       - uses: actions/upload-artifact@v3
 29 |         with:
 30 |           path: dist/*.tar.gz
 31 | 
 32 | 
 33 |   build_wheels_x64:
 34 |     name: x64 Wheels on ${{ matrix.os }}
 35 |     runs-on: ${{ matrix.os }}
 36 |     strategy:
 37 |       fail-fast: false
 38 |       matrix:
 39 |         os: [
 40 |           ubuntu-latest,
 41 |           windows-latest,
 42 |           macos-latest
 43 |         ]
 44 | 
 45 |     steps:
 46 |       - uses: actions/checkout@v3
 47 |         with:
 48 |           submodules: true
 49 |       - name: Build Wheels
 50 |         uses: pypa/cibuildwheel@v2.8.0
 51 |         env:
 52 |           CIBW_SKIP: "*-musllinux_x86_64 *-musllinux_i686"
 53 |           CIBW_ARCHS_MACOS: auto
 54 |           CIBW_ARCHS_WINDOWS: auto64
 55 |           CIBW_ARCHS_LINUX: auto64
 56 | 
 57 |       - name: Upload wheels
 58 |         uses: actions/upload-artifact@v3
 59 |         with:
 60 |           path: wheelhouse/*.whl
 61 | 
 62 |   build_wheels_x86:
 63 |     name: x86 Wheels on ${{ matrix.os }}
 64 |     runs-on: ${{ matrix.os }}
 65 |     strategy:
 66 |       fail-fast: false
 67 |       matrix:
 68 |         os: [
 69 |           ubuntu-latest,
 70 |           # windows-latest,
 71 |           # macos-latest
 72 |         ]
 73 | 
 74 |     steps:
 75 |       - uses: actions/checkout@v3
 76 |         with:
 77 |           submodules: true
 78 |       - name: Build Wheels
 79 |         uses: pypa/cibuildwheel@v2.8.0
 80 |         env:
 81 |           CIBW_SKIP: "*-musllinux_x86_64 *-musllinux_i686"
 82 |           CIBW_ARCHS_MACOS: universal2
 83 |           CIBW_ARCHS_WINDOWS: auto32
 84 |           CIBW_ARCHS_LINUX: auto32
 85 | 
 86 |       - name: Upload wheels
 87 |         uses: actions/upload-artifact@v3
 88 |         with:
 89 |           path: wheelhouse/*.whl
 90 | 
 91 |   build_wheels_musl:
 92 |     name: Musl Wheels on ${{ matrix.os }}
 93 |     runs-on: ${{ matrix.os }}
 94 |     strategy:
 95 |       fail-fast: false
 96 |       matrix:
 97 |         os: [ ubuntu-latest ]
 98 | 
 99 |     steps:
100 |       - uses: actions/checkout@v3
101 |         with:
102 |           submodules: true
103 |       - name: Build Wheels
104 |         uses: pypa/cibuildwheel@v2.8.0
105 |         env:
106 |           CIBW_BUILD: "*-musllinux_x86_64 *-musllinux_i686"
107 |           CIBW_ARCHS_MACOS: universal2
108 |           CIBW_ARCHS_WINDOWS: auto32
109 |           CIBW_ARCHS_LINUX: auto32
110 | 
111 |       - name: Upload wheels
112 |         uses: actions/upload-artifact@v3
113 |         with:
114 |           path: wheelhouse/*.whl
115 | 
116 | 
117 |   upload_all:
118 |     name: Upload if release
119 |     needs: [ build_sdist, build_wheels_x64, build_wheels_x86, build_wheels_musl ]
120 |     runs-on: ubuntu-latest
121 |     if: github.event_name == 'release' && github.event.action == 'published'
122 | 
123 |     steps:
124 |       - uses: actions/setup-python@v4
125 |         with:
126 |           python-version: "3.x"
127 | 
128 |       - uses: actions/download-artifact@v3
129 |         with:
130 |           name: artifact
131 |           path: dist
132 | 
133 |       - uses: pypa/gh-action-pypi-publish@v1.5.0
134 |         with:
135 |           user: ${{ secrets.PYPI_USERNAME }}
136 |           password: ${{ secrets.PYPI_PASSWORD }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ###############
 2 | # object file #
 3 | ###############
 4 | .*
 5 | !.travis
 6 | !.github
 7 | 
 8 | ###############
 9 | # build       #
10 | ###############
11 | build
12 | 
13 | ###############
14 | # output      #
15 | ###############
16 | include/
17 | lib/
18 | bin/
19 | dist
20 | !patch/include
21 | pyltp.egg-info
22 | 
23 | *.swp
24 | doc/_build
25 | doc/_static
26 | doc/_templates
27 | !doc/Makefile
28 | 
29 | ###############
30 | # data        #
31 | ###############
32 | ltp_data
33 | 
34 | cmake-*


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ltp"]
2 | 	path = ltp
3 | 	url = https://github.com/HIT-SCIR/ltp.git
4 | [submodule "pybind11"]
5 | 	path = pybind11
6 | 	url = https://github.com/pybind/pybind11.git
7 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
1 | * 2022年07月23日 使用 修复编译失败的问题
2 | * 2020年07月30日 使用 Pybind11 生成 Python 绑定，减少维护困难
3 | * 2017年12月05日 升级更新兼容 LTP 3.4.0


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0)
 2 | project(pyltp)
 3 | 
 4 | set(-DCMAKE_CXX_STANDARD=14)
 5 | add_subdirectory(pybind11)
 6 | add_subdirectory(ltp)
 7 | 
 8 | pybind11_add_module(pyltp src/pyltp.cpp)
 9 | target_link_libraries(
10 |         pyltp PRIVATE
11 |         pybind11::module
12 |         ner_static_lib
13 |         parser_static_lib
14 |         postagger_static_lib
15 |         segmentor_static_lib
16 |         splitsnt_static_lib
17 |         srl_static_lib
18 | )
19 | target_include_directories(
20 |         pyltp PRIVATE
21 |         ltp/include
22 | )
23 | target_compile_definitions(pyltp
24 |         PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO})
25 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include CHANGES.rst
 3 | 
 4 | recursive-include src *.cpp
 5 | recursive-include ltp/src *.h
 6 | recursive-include ltp/src/framework *.h *.hpp
 7 | recursive-include ltp/src/utils *.h *.hpp *.tab
 8 | recursive-include ltp/src/segmentor *.cpp *.h *.hpp
 9 | recursive-include ltp/src/postagger *.cpp *.h *.hpp
10 | recursive-include ltp/src/ner *.cpp *.h *.hpp
11 | recursive-include ltp/src/parser.n *.cpp *.h *.hpp
12 | recursive-include ltp/src/srl *.cpp *.h
13 | recursive-include ltp/thirdparty/boost *.h *.hpp *.cpp *.ipp
14 | recursive-include ltp/thirdparty/dynet *
15 | recursive-include ltp/thirdparty/eigen *
16 | recursive-include ltp/thirdparty/gtest *
17 | recursive-include ltp/thirdparty/jsoncpp *
18 | recursive-include ltp/thirdparty/maxent *.h *.cpp
19 | recursive-include ltp/thirdparty/tinythreadpp *.h *.cpp
20 | recursive-include ltp/thirdparty/tinyxml *.h *.cpp
21 | graft pybind11/include
22 | graft pybind11/tools
23 | global-include CMakeLists.txt *.cmake
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ﻿# pyltp
 2 | 
 3 | [![PyPI Status](https://badge.fury.io/py/pyltp.svg)](https://badge.fury.io/py/pyltp)
 4 | [![Readthedocs](https://readthedocs.org/projects/pyltp/badge/?version=latest)](http://pyltp.readthedocs.io/)
 5 | [![Build Status](https://travis-ci.org/HIT-SCIR/pyltp.svg?branch=master)](https://travis-ci.org/HIT-SCIR/pyltp)
 6 | [![Build status](https://ci.appveyor.com/api/projects/status/kp2kjujo4amunyvr/branch/master?svg=true)](https://ci.appveyor.com/project/Oneplus/pyltp/branch/master)
 7 | [![PyPI Downloads](https://img.shields.io/pypi/dm/pyltp.svg)](https://pypi.python.org/pypi/pyltp)
 8 | 
 9 | pyltp 是 [语言技术平台（Language Technology Platform, LTP）](https://github.com/HIT-SCIR/ltp)的 Python 封装。
10 | 
11 | 在使用 pyltp 之前，您需要简要了解 [语言技术平台（LTP）](http://ltp.readthedocs.org/zh_CN/latest/) 能否帮助您解决问题。
12 | 
13 | **目前基于Pytorch的LTP4 已经发布，而PyLTP将会只有非常有限的维护，请大家移步使用**[LTP 4]([LTP 4](https://github.com/HIT-SCIR/ltp))
14 | 
15 | ## 依赖支持情况
16 | 
17 | Python 2.7, 3.x, and PyPy (PyPy2.7 >= 5.7)
18 | 
19 | ## 一个简单的例子
20 | 
21 | 下面是一个使用 pyltp 进行分词的例子
22 | 
23 | ```python
24 | # -*- coding: utf-8 -*-
25 | from pyltp import Segmentor
26 | segmentor = Segmentor("/path/to/your/cws/model")
27 | words = segmentor.segment("元芳你怎么看")
28 | print("|".join(words))
29 | segmentor.release()
30 | ```
31 | 除了分词之外，pyltp 还提供词性标注、命名实体识别、依存句法分析、语义角色标注等功能。
32 | 
33 | 详细使用方法可以参考 [example](example/example.py)
34 | 
35 | ## 安装
36 | 
37 | * 第一步，安装 pyltp
38 | 
39 | 	使用 pip 安装
40 | 
41 | 	```
42 | 	$ pip install pyltp
43 | 	```
44 | 	或从源代码安装
45 | 
46 | 	```
47 | 	$ git clone https://github.com/HIT-SCIR/pyltp
48 | 	$ cd pyltp
49 | 	$ git submodule init
50 | 	$ git submodule update
51 | 	$ python setup.py install
52 | 	```
53 | 
54 | 	+ Mac系统出现版本问题使用 MACOSX_DEPLOYMENT_TARGET=10.7 python setup.py install
55 | 	+ 编译时间较长（约5分钟左右），请耐心等待
56 | 
57 | * 第二步，下载模型文件
58 | 
59 | 	[七牛云](http://ltp.ai/download.html)，当前模型版本 3.4.0
60 | 
61 | ## 制作安装包
62 | 
63 | ```
64 | git submodule init
65 | git submodule update
66 | python setup.py bdist_wheel
67 | ```
68 | 
69 | ## 版本对应
70 | 
71 | * pyltp 版本：0.4.0
72 | * LTP 版本：3.4.0
73 | * 模型版本：3.4.0
74 | 
75 | ## 作者
76 | 
77 | * 冯云龙 << ylfeng@ir.hit.edu.cn >> 2020-7-30 重写代码，换用 Pybind11
78 | * 徐梓翔 << zxxu@ir.hit.edu.cn >> 2015-01-20 解决跨平台运行问题
79 | * 刘一佳 << yjliu@ir.hit.edu.cn >> 2014-06-12 重组项目
80 | * HuangFJ << biohfj@gmail.com >> 本项目最初作者
81 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | .PHONY: clean
 52 | clean:
 53 | 	rm -rf $(BUILDDIR)/*
 54 | 
 55 | .PHONY: html
 56 | html:
 57 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 58 | 	@echo
 59 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 60 | 
 61 | .PHONY: dirhtml
 62 | dirhtml:
 63 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 66 | 
 67 | .PHONY: singlehtml
 68 | singlehtml:
 69 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 70 | 	@echo
 71 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 72 | 
 73 | .PHONY: pickle
 74 | pickle:
 75 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the pickle files."
 78 | 
 79 | .PHONY: json
 80 | json:
 81 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 82 | 	@echo
 83 | 	@echo "Build finished; now you can process the JSON files."
 84 | 
 85 | .PHONY: htmlhelp
 86 | htmlhelp:
 87 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 88 | 	@echo
 89 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 90 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 91 | 
 92 | .PHONY: qthelp
 93 | qthelp:
 94 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 95 | 	@echo
 96 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 97 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 98 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyltp.qhcp"
 99 | 	@echo "To view the help file:"
100 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyltp.qhc"
101 | 
102 | .PHONY: applehelp
103 | applehelp:
104 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
105 | 	@echo
106 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
107 | 	@echo "N.B. You won't be able to view it unless you put it in" \
108 | 	      "~/Library/Documentation/Help or install it in your application" \
109 | 	      "bundle."
110 | 
111 | .PHONY: devhelp
112 | devhelp:
113 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
114 | 	@echo
115 | 	@echo "Build finished."
116 | 	@echo "To view the help file:"
117 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pyltp"
118 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyltp"
119 | 	@echo "# devhelp"
120 | 
121 | .PHONY: epub
122 | epub:
123 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
124 | 	@echo
125 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
126 | 
127 | .PHONY: epub3
128 | epub3:
129 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
130 | 	@echo
131 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
132 | 
133 | .PHONY: latex
134 | latex:
135 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
136 | 	@echo
137 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
138 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
139 | 	      "(use \`make latexpdf' here to do that automatically)."
140 | 
141 | .PHONY: latexpdf
142 | latexpdf:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through pdflatex..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: latexpdfja
149 | latexpdfja:
150 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
151 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
152 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
153 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
154 | 
155 | .PHONY: text
156 | text:
157 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
158 | 	@echo
159 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
160 | 
161 | .PHONY: man
162 | man:
163 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
164 | 	@echo
165 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
166 | 
167 | .PHONY: texinfo
168 | texinfo:
169 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
170 | 	@echo
171 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
172 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
173 | 	      "(use \`make info' here to do that automatically)."
174 | 
175 | .PHONY: info
176 | info:
177 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
178 | 	@echo "Running Texinfo files through makeinfo..."
179 | 	make -C $(BUILDDIR)/texinfo info
180 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
181 | 
182 | .PHONY: gettext
183 | gettext:
184 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
185 | 	@echo
186 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
187 | 
188 | .PHONY: changes
189 | changes:
190 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
191 | 	@echo
192 | 	@echo "The overview file is in $(BUILDDIR)/changes."
193 | 
194 | .PHONY: linkcheck
195 | linkcheck:
196 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
197 | 	@echo
198 | 	@echo "Link check complete; look for any errors in the above output " \
199 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
200 | 
201 | .PHONY: doctest
202 | doctest:
203 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
204 | 	@echo "Testing of doctests in the sources finished, look at the " \
205 | 	      "results in $(BUILDDIR)/doctest/output.txt."
206 | 
207 | .PHONY: coverage
208 | coverage:
209 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
210 | 	@echo "Testing of coverage in the sources finished, look at the " \
211 | 	      "results in $(BUILDDIR)/coverage/python.txt."
212 | 
213 | .PHONY: xml
214 | xml:
215 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
216 | 	@echo
217 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
218 | 
219 | .PHONY: pseudoxml
220 | pseudoxml:
221 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
222 | 	@echo
223 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
224 | 


--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
  1 | 使用 pyltp
  2 | ===========
  3 | pyltp 是 `LTP <https://github.com/HIT-SCIR/ltp>`_ 的 Python 封装，提供了分词，词性标注，命名实体识别，依存句法分析，语义角色标注的功能。
  4 | 
  5 | 关于各个模块任务的介绍、标注体系、性能指标，可以查阅 `这里 <http://www.ltp-cloud.com/intro/#introduction>`_ 的介绍。
  6 | 
  7 | 
  8 | 使用前请先下载完整模型
  9 | ----------------------
 10 | 
 11 | 请先下载完整的 LTP 模型文件
 12 | 
 13 | * 下载地址 - `百度云 <http://pan.baidu.com/share/link?shareid=1988562907&uk=2738088569>`_
 14 | * 当前模型版本 - 3.4.0
 15 | 
 16 | 请确保下载的模型版本与当前版本的 pyltp 对应，否则会导致程序无法正确加载模型。
 17 | 
 18 | 请注意编码
 19 | ----------
 20 | 
 21 | pyltp 的所有输入的分析文本和输出的结果的编码均为 UTF-8。
 22 | 
 23 | 如果您以非 UTF-8 编码的文本输入进行分析，结果可能为空。请注意源代码文件的默认编码。
 24 | 
 25 | 由于 Windows 终端采用 GBK 编码显示，直接输出 pyltp 的分析结果会在终端显示为乱码。您可以将标准输出重定向到文件，以 UTF8 方式查看文件，就可以解决显示乱码的问题。
 26 | 
 27 | 
 28 | 分句
 29 | -----
 30 | 
 31 | 使用 pyltp 进行分句示例如下 ::
 32 | 
 33 |     # -*- coding: utf-8 -*-
 34 |     from pyltp import SentenceSplitter
 35 |     sents = SentenceSplitter.split('元芳你怎么看？我就趴窗口上看呗！')  # 分句
 36 |     print '\n'.join(sents)
 37 | 
 38 | 结果如下 ::
 39 | 
 40 |     元芳你怎么看？
 41 |     我就趴窗口上看呗！
 42 | 
 43 | 
 44 | 分词
 45 | -----
 46 | 
 47 | 使用 pyltp 进行分词示例如下 ::
 48 | 
 49 |     # -*- coding: utf-8 -*-
 50 |     import os
 51 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
 52 |     cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')  # 分词模型路径，模型名称为`cws.model`
 53 | 
 54 |     from pyltp import Segmentor
 55 |     segmentor = Segmentor()  # 初始化实例
 56 |     segmentor.load(cws_model_path)  # 加载模型
 57 |     words = segmentor.segment('元芳你怎么看')  # 分词
 58 |     print '\t'.join(words)
 59 |     segmentor.release()  # 释放模型
 60 | 
 61 | 结果如下 ::
 62 | 
 63 |     元芳	你	怎么	看
 64 | 
 65 | :code:`words = segmentor.segment('元芳你怎么看')` 的返回值类型是native的VectorOfString类型，可以使用list转换成Python的列表类型，例如 ::
 66 | 
 67 |     ...
 68 |     >>> words = segmentor.segment('元芳你怎么看')
 69 |     >>> type(words)
 70 |     <class 'pyltp.VectorOfString'>
 71 |     >>> words_list = list(words)
 72 |     >>> type(words_list)
 73 |     <type 'list'>
 74 |     >>> print words_list
 75 |     ['\xe5\xae\xa2\xe6\x9c\x8d', '\xe5\xa4\xaa', '\xe7\xb3\x9f\xe7\xb3\x95', '\xe4\xba\x86']
 76 | 
 77 | 使用分词外部词典
 78 | ~~~~~~~~~~~~~~~~
 79 | 
 80 | pyltp 分词支持用户使用自定义词典。分词外部词典本身是一个文本文件（plain text），每行指定一个词，编码同样须为 UTF-8，样例如下所示 ::
 81 | 
 82 |     苯并芘
 83 |     亚硝酸盐
 84 | 
 85 | 示例如下 ::
 86 | 
 87 |     # -*- coding: utf-8 -*-
 88 |     import os
 89 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
 90 |     cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')  # 分词模型路径，模型名称为`cws.model`
 91 | 
 92 |     from pyltp import Segmentor
 93 |     segmentor = Segmentor()  # 初始化实例
 94 |     segmentor.load_with_lexicon(cws_model_path, '/path/to/your/lexicon') # 加载模型，第二个参数是您的外部词典文件路径
 95 |     words = segmentor.segment('亚硝酸盐是一种化学物质')
 96 |     print '\t'.join(words)
 97 |     segmentor.release()
 98 | 
 99 | 
100 | 使用个性化分词模型
101 | ~~~~~~~~~~~~~~~~~~~
102 | 
103 | 个性化分词是 LTP 的特色功能。个性化分词为了解决测试数据切换到如小说、财经等不同于新闻领域的领域。 在切换到新领域时，用户只需要标注少量数据。 个性化分词会在原有新闻数据基础之上进行增量训练。 从而达到即利用新闻领域的丰富数据，又兼顾目标领域特殊性的目的。
104 | 
105 | pyltp 支持使用用户训练好的个性化模型。关于个性化模型的训练需使用 LTP，详细介绍和训练方法请参考 `个性化分词 <http://ltp.readthedocs.org/zh_CN/latest/theory.html#customized-cws-reference-label>`_ 。
106 | 
107 | 在 pyltp 中使用个性化分词模型的示例如下 ::
108 | 
109 |     # -*- coding: utf-8 -*-
110 |     import os
111 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
112 |     cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')  # 分词模型路径，模型名称为`cws.model`
113 | 
114 |     from pyltp import CustomizedSegmentor
115 |     customized_segmentor = CustomizedSegmentor()  # 初始化实例
116 |     customized_segmentor.load(cws_model_path, '/path/to/your/customized_model') # 加载模型，第二个参数是您的增量模型路径
117 |     words = customized_segmentor.segment('亚硝酸盐是一种化学物质')
118 |     print '\t'.join(words)
119 |     customized_segmentor.release()
120 | 
121 | 同样，使用个性化分词模型的同时也可以使用外部词典 ::
122 | 
123 |     # -*- coding: utf-8 -*-
124 |     import os
125 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
126 |     cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')  # 分词模型路径，模型名称为`cws.model`
127 | 
128 |     from pyltp import CustomizedSegmentor
129 |     customized_segmentor = CustomizedSegmentor()  # 初始化实例
130 |     customized_segmentor.load_with_lexicon(cws_model_path, '/path/to/your/customized_model', '/path/to/your/lexicon') # 加载模型
131 |     words = customized_segmentor.segment('亚硝酸盐是一种化学物质')
132 |     print '\t'.join(words)
133 |     customized_segmentor.release()
134 | 
135 | 
136 | 词性标注
137 | --------
138 | 
139 | 使用 pyltp 进行词性标注示例如下 ::
140 | 
141 |     # -*- coding: utf-8 -*-
142 |     import os
143 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
144 |     pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')  # 词性标注模型路径，模型名称为`pos.model`
145 | 
146 |     from pyltp import Postagger
147 |     postagger = Postagger() # 初始化实例
148 |     postagger.load(pos_model_path)  # 加载模型
149 | 
150 |     words = ['元芳', '你', '怎么', '看']  # 分词结果
151 |     postags = postagger.postag(words)  # 词性标注
152 | 
153 |     print '\t'.join(postags)
154 |     postagger.release()  # 释放模型
155 | 
156 | 结果如下 ::
157 | 
158 |     nh	r	r	v
159 | 
160 | 参数 :code:`words` 是分词模块的返回值，也支持Python原生的list类型，例如 ::
161 | 
162 |     words = ['元芳', '你', '怎么', '看']
163 |     postags = postagger.postag(words)
164 | 
165 | LTP 使用 863 词性标注集，详细请参考 `词性标注集 <http://ltp.readthedocs.org/zh_CN/latest/appendix.html#id3>`_ 。
166 | 
167 | 使用词性标注外部词典
168 | ~~~~~~~~~~~~~~~~~~~~
169 | 
170 | pyltp 词性标注同样支持用户的外部词典。词性标注外部词典同样为一个文本文件，每行指定一个词，第一列指定单词，第二列之后指定该词的候选词性（可以有多项，每一项占一列），列与列之间用空格区分。示例如下 ::
171 | 
172 |     雷人 v a
173 |     】 wp
174 | 
175 | 命名实体识别
176 | -------------
177 | 
178 | 使用 pyltp 进行命名实体识别示例如下 ::
179 | 
180 |     # -*- coding: utf-8 -*-
181 |     import os
182 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
183 |     ner_model_path = os.path.join(LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径，模型名称为`pos.model`
184 | 
185 |     from pyltp import NamedEntityRecognizer
186 |     recognizer = NamedEntityRecognizer() # 初始化实例
187 |     recognizer.load(ner_model_path)  # 加载模型
188 | 
189 |     words = ['元芳', '你', '怎么', '看']
190 |     postags = ['nh', 'r', 'r', 'v']
191 |     netags = recognizer.recognize(words, postags)  # 命名实体识别
192 | 
193 |     print '\t'.join(netags)
194 |     recognizer.release()  # 释放模型
195 | 
196 | 其中，:code:`words` 和 :code:`postags` 分别为分词和词性标注的结果。同样支持Python原生的list类型。
197 | 
198 | 结果如下 ::
199 | 
200 |     S-Nh	O	O	O
201 | 
202 | LTP 采用 BIESO 标注体系。B 表示实体开始词，I表示实体中间词，E表示实体结束词，S表示单独成实体，O表示不构成命名实体。
203 | 
204 | LTP 提供的命名实体类型为:人名（Nh）、地名（Ns）、机构名（Ni）。
205 | 
206 | B、I、E、S位置标签和实体类型标签之间用一个横线 :code:`-` 相连；O标签后没有类型标签。
207 | 
208 | 详细标注请参考 `命名实体识别标注集 <http://ltp.readthedocs.org/zh_CN/latest/appendix.html#id4>`_ 。
209 | 
210 | 依存句法分析
211 | ------------
212 | 
213 | 使用 pyltp 进行依存句法分析示例如下 ::
214 | 
215 |     # -*- coding: utf-8 -*-
216 |     import os
217 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
218 |     par_model_path = os.path.join(LTP_DATA_DIR, 'parser.model')  # 依存句法分析模型路径，模型名称为`parser.model`
219 | 
220 |     from pyltp import Parser
221 |     parser = Parser() # 初始化实例
222 |     parser.load(par_model_path)  # 加载模型
223 | 
224 |     words = ['元芳', '你', '怎么', '看']
225 |     postags = ['nh', 'r', 'r', 'v']
226 |     arcs = parser.parse(words, postags)  # 句法分析
227 | 
228 |     print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)
229 |     parser.release()  # 释放模型
230 | 
231 | 
232 | 其中，:code:`words` 和 :code:`postags` 分别为分词和词性标注的结果。同样支持Python原生的list类型。
233 | 
234 | 结果如下 ::
235 | 
236 |     4:SBV	4:SBV	4:ADV	0:HED
237 | 
238 | :code:`arc.head` 表示依存弧的父节点词的索引。ROOT节点的索引是0，第一个词开始的索引依次为1、2、3...
239 | 
240 | :code:`arc.relation` 表示依存弧的关系。
241 | 
242 | :code:`arc.head` 表示依存弧的父节点词的索引，:code:`arc.relation` 表示依存弧的关系。
243 | 
244 | 标注集请参考 `依存句法关系 <http://ltp.readthedocs.org/zh_CN/latest/appendix.html#id5>`_ 。
245 | 
246 | 语义角色标注
247 | -------------
248 | 
249 | 使用 pyltp 进行语义角色标注示例如下 ::
250 | 
251 |     # -*- coding: utf-8 -*-
252 |     import os
253 |     LTP_DATA_DIR = '/path/to/your/ltp_data'  # ltp模型目录的路径
254 |     srl_model_path = os.path.join(LTP_DATA_DIR, 'srl')  # 语义角色标注模型目录路径，模型目录为`srl`。注意该模型路径是一个目录，而不是一个文件。
255 | 
256 |     from pyltp import SementicRoleLabeller
257 |     labeller = SementicRoleLabeller() # 初始化实例
258 |     labeller.load(srl_model_path)  # 加载模型
259 | 
260 |     words = ['元芳', '你', '怎么', '看']
261 |     postags = ['nh', 'r', 'r', 'v']
262 |     # arcs 使用依存句法分析的结果
263 |     roles = labeller.label(words, postags, arcs)  # 语义角色标注
264 | 
265 |     # 打印结果
266 |     for role in roles:
267 |         print role.index, "".join(
268 |             ["%s:(%d,%d)" % (arg.name, arg.range.start, arg.range.end) for arg in role.arguments])
269 |     labeller.release()  # 释放模型
270 | 
271 | 结果如下 ::
272 | 
273 |     3 A0:(0,0)A0:(1,1)ADV:(2,2)
274 | 
275 | 
276 | 第一个词开始的索引依次为0、1、2...
277 | 
278 | 返回结果 :code:`roles` 是关于多个谓词的语义角色分析的结果。由于一句话中可能不含有语义角色，所以结果可能为空。
279 | 
280 | :code:`role.index` 代表谓词的索引， :code:`role.arguments` 代表关于该谓词的若干语义角色。
281 | 
282 | :code:`arg.name` 表示语义角色类型，:code:`arg.range.start` 表示该语义角色起始词位置的索引，:code:`arg.range.end` 表示该语义角色结束词位置的索引。
283 | 
284 | 例如上面的例子，由于结果输出一行，所以“元芳你怎么看”有一组语义角色。
285 | 其谓词索引为3，即“看”。这个谓词有三个语义角色，范围分别是(0,0)即“元芳”，(1,1)即“你”，(2,2)即“怎么”，类型分别是A0、A0、ADV。
286 | 
287 | :code:`arg.name` 表示语义角色关系，:code:`arg.range.start` 表示起始词位置，:code:`arg.range.end` 表示结束位置。
288 | 
289 | 标注集请参考 `语义角色关系 <http://ltp.readthedocs.org/zh_CN/latest/appendix.html#id6>`_ 。
290 | 
291 | 语义依存分析
292 | ------------
293 | 
294 | pyltp 暂不提供语义依存分析功能。若需使用该功能，请使用 `语言云 <http://www.ltp-cloud.com>`_ 。
295 | 
296 | 
297 | 完整示例代码
298 | -------------
299 | 
300 | 完整的示例代码可以参考 :file:`example/example.py` 。
301 | 


--------------------------------------------------------------------------------
/doc/changelog.rst:
--------------------------------------------------------------------------------
1 | * 2020年07月30日 使用 Pybind11 生成 Python 绑定，减少维护困难
2 | * 2017年12月05日 升级更新兼容 LTP 3.4.0


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # pyltp documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Mar 29 11:19:39 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | #sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | extensions = []
 32 | 
 33 | # Add any paths that contain templates here, relative to this directory.
 34 | templates_path = ['_templates']
 35 | 
 36 | # The suffix(es) of source filenames.
 37 | # You can specify multiple suffix as a list of string:
 38 | # source_suffix = ['.rst', '.md']
 39 | source_suffix = '.rst'
 40 | 
 41 | # The encoding of source files.
 42 | #source_encoding = 'utf-8-sig'
 43 | 
 44 | # The master toctree document.
 45 | master_doc = 'index'
 46 | 
 47 | # General information about the project.
 48 | project = u'pyltp'
 49 | copyright = u'2017, HIT-SCIR'
 50 | author = u'HIT-SCIR'
 51 | 
 52 | # The version info for the project you're documenting, acts as replacement for
 53 | # |version| and |release|, also used in various other places throughout the
 54 | # built documents.
 55 | #
 56 | # The short X.Y version.
 57 | version = u'0.2.0'
 58 | # The full version, including alpha/beta/rc tags.
 59 | release = u'0.2.0'
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #
 64 | # This is also used if you do content translation via gettext catalogs.
 65 | # Usually you set "language" from the command line for these cases.
 66 | language = 'zh_CN'
 67 | 
 68 | # There are two options for replacing |today|: either, you set today to some
 69 | # non-false value, then it is used:
 70 | #today = ''
 71 | # Else, today_fmt is used as the format for a strftime call.
 72 | #today_fmt = '%B %d, %Y'
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | # This patterns also effect to html_static_path and html_extra_path
 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 78 | 
 79 | # The reST default role (used for this markup: `text`) to use for all
 80 | # documents.
 81 | #default_role = None
 82 | 
 83 | # If true, '()' will be appended to :func: etc. cross-reference text.
 84 | #add_function_parentheses = True
 85 | 
 86 | # If true, the current module name will be prepended to all description
 87 | # unit titles (such as .. function::).
 88 | #add_module_names = True
 89 | 
 90 | # If true, sectionauthor and moduleauthor directives will be shown in the
 91 | # output. They are ignored by default.
 92 | #show_authors = False
 93 | 
 94 | # The name of the Pygments (syntax highlighting) style to use.
 95 | pygments_style = 'sphinx'
 96 | 
 97 | # A list of ignored prefixes for module index sorting.
 98 | #modindex_common_prefix = []
 99 | 
100 | # If true, keep warnings as "system message" paragraphs in the built documents.
101 | #keep_warnings = False
102 | 
103 | # If true, `todo` and `todoList` produce output, else they produce nothing.
104 | todo_include_todos = False
105 | 
106 | 
107 | # -- Options for HTML output ----------------------------------------------
108 | 
109 | # The theme to use for HTML and HTML Help pages.  See the documentation for
110 | # a list of builtin themes.
111 | html_theme = 'alabaster'
112 | 
113 | # Theme options are theme-specific and customize the look and feel of a theme
114 | # further.  For a list of options available for each theme, see the
115 | # documentation.
116 | #html_theme_options = {}
117 | 
118 | # Add any paths that contain custom themes here, relative to this directory.
119 | #html_theme_path = []
120 | 
121 | # The name for this set of Sphinx documents.
122 | # "<project> v<release> documentation" by default.
123 | #html_title = u'pyltp v0.1.9'
124 | 
125 | # A shorter title for the navigation bar.  Default is the same as html_title.
126 | #html_short_title = None
127 | 
128 | # The name of an image file (relative to this directory) to place at the top
129 | # of the sidebar.
130 | #html_logo = None
131 | 
132 | # The name of an image file (relative to this directory) to use as a favicon of
133 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
134 | # pixels large.
135 | #html_favicon = None
136 | 
137 | # Add any paths that contain custom static files (such as style sheets) here,
138 | # relative to this directory. They are copied after the builtin static files,
139 | # so a file named "default.css" will overwrite the builtin "default.css".
140 | html_static_path = ['_static']
141 | 
142 | # Add any extra paths that contain custom files (such as robots.txt or
143 | # .htaccess) here, relative to this directory. These files are copied
144 | # directly to the root of the documentation.
145 | #html_extra_path = []
146 | 
147 | # If not None, a 'Last updated on:' timestamp is inserted at every page
148 | # bottom, using the given strftime format.
149 | # The empty string is equivalent to '%b %d, %Y'.
150 | #html_last_updated_fmt = None
151 | 
152 | # If true, SmartyPants will be used to convert quotes and dashes to
153 | # typographically correct entities.
154 | #html_use_smartypants = True
155 | 
156 | # Custom sidebar templates, maps document names to template names.
157 | #html_sidebars = {}
158 | 
159 | # Additional templates that should be rendered to pages, maps page names to
160 | # template names.
161 | #html_additional_pages = {}
162 | 
163 | # If false, no module index is generated.
164 | #html_domain_indices = True
165 | 
166 | # If false, no index is generated.
167 | #html_use_index = True
168 | 
169 | # If true, the index is split into individual pages for each letter.
170 | #html_split_index = False
171 | 
172 | # If true, links to the reST sources are added to the pages.
173 | #html_show_sourcelink = True
174 | 
175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
176 | #html_show_sphinx = True
177 | 
178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
179 | #html_show_copyright = True
180 | 
181 | # If true, an OpenSearch description file will be output, and all pages will
182 | # contain a <link> tag referring to it.  The value of this option must be the
183 | # base URL from which the finished HTML is served.
184 | #html_use_opensearch = ''
185 | 
186 | # This is the file name suffix for HTML files (e.g. ".xhtml").
187 | #html_file_suffix = None
188 | 
189 | # Language to be used for generating the HTML full-text search index.
190 | # Sphinx supports the following languages:
191 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
192 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
193 | #html_search_language = 'en'
194 | 
195 | # A dictionary with options for the search language support, empty by default.
196 | # 'ja' uses this config value.
197 | # 'zh' user can custom change `jieba` dictionary path.
198 | #html_search_options = {'type': 'default'}
199 | 
200 | # The name of a javascript file (relative to the configuration directory) that
201 | # implements a search results scorer. If empty, the default will be used.
202 | #html_search_scorer = 'scorer.js'
203 | 
204 | # Output file base name for HTML help builder.
205 | htmlhelp_basename = 'pyltpdoc'
206 | 
207 | # -- Options for LaTeX output ---------------------------------------------
208 | 
209 | latex_elements = {
210 | # The paper size ('letterpaper' or 'a4paper').
211 | #'papersize': 'letterpaper',
212 | 
213 | # The font size ('10pt', '11pt' or '12pt').
214 | #'pointsize': '10pt',
215 | 
216 | # Additional stuff for the LaTeX preamble.
217 | #'preamble': '',
218 | 
219 | # Latex figure (float) alignment
220 | #'figure_align': 'htbp',
221 | }
222 | 
223 | # Grouping the document tree into LaTeX files. List of tuples
224 | # (source start file, target name, title,
225 | #  author, documentclass [howto, manual, or own class]).
226 | latex_documents = [
227 |     (master_doc, 'pyltp.tex', u'pyltp Documentation',
228 |      u'HIT-SCIR', 'manual'),
229 | ]
230 | 
231 | # The name of an image file (relative to this directory) to place at the top of
232 | # the title page.
233 | #latex_logo = None
234 | 
235 | # For "manual" documents, if this is true, then toplevel headings are parts,
236 | # not chapters.
237 | #latex_use_parts = False
238 | 
239 | # If true, show page references after internal links.
240 | #latex_show_pagerefs = False
241 | 
242 | # If true, show URL addresses after external links.
243 | #latex_show_urls = False
244 | 
245 | # Documents to append as an appendix to all manuals.
246 | #latex_appendices = []
247 | 
248 | # If false, no module index is generated.
249 | #latex_domain_indices = True
250 | 
251 | 
252 | # -- Options for manual page output ---------------------------------------
253 | 
254 | # One entry per manual page. List of tuples
255 | # (source start file, name, description, authors, manual section).
256 | man_pages = [
257 |     (master_doc, 'pyltp', u'pyltp Documentation',
258 |      [author], 1)
259 | ]
260 | 
261 | # If true, show URL addresses after external links.
262 | #man_show_urls = False
263 | 
264 | 
265 | # -- Options for Texinfo output -------------------------------------------
266 | 
267 | # Grouping the document tree into Texinfo files. List of tuples
268 | # (source start file, target name, title, author,
269 | #  dir menu entry, description, category)
270 | texinfo_documents = [
271 |     (master_doc, 'pyltp', u'pyltp Documentation',
272 |      author, 'pyltp', 'One line description of project.',
273 |      'Miscellaneous'),
274 | ]
275 | 
276 | # Documents to append as an appendix to all manuals.
277 | #texinfo_appendices = []
278 | 
279 | # If false, no module index is generated.
280 | #texinfo_domain_indices = True
281 | 
282 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
283 | #texinfo_show_urls = 'footnote'
284 | 
285 | # If true, do not generate a @detailmenu in the "Top" node's menu.
286 | #texinfo_no_detailmenu = False
287 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. pyltp documentation master file, created by
 2 |    sphinx-quickstart on Tue Mar 29 11:19:39 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | .. include:: intro.rst
 7 | 
 8 | 目录
 9 | =================================
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    install
15 |    api
16 |    changelog
17 | 
18 | 


--------------------------------------------------------------------------------
/doc/install.rst:
--------------------------------------------------------------------------------
 1 | 安装 pyltp
 2 | ===========
 3 | 
 4 | * 注：由于新版本增加了新的第三方依赖如dynet等，不再支持 windows 下 python2 环境。
 5 | 
 6 | 使用 pip 安装
 7 | -------------
 8 | 
 9 | 使用 pip 安装前，请确保您已安装了 `pip <https://pip.pypa.io/>`_ ::
10 | 
11 |     $ pip install pyltp
12 | 
13 | 接下来，需要下载 LTP 模型文件。
14 | 
15 | * 下载地址 - `模型下载 http://ltp.ai/download.html`_
16 | * 当前模型版本 - 3.4.0
17 | * 注意在windows下 3.4.0 版本的 语义角色标注模块 模型需要单独下载，具体查看下载地址链接中的说明。
18 | 
19 | 请确保下载的模型版本与当前版本的 pyltp 对应，否则会导致程序无法正确加载模型。
20 | 
21 | 从源码安装
22 | ---------
23 | 
24 | 您也可以选择从源代码编译安装 ::
25 | 
26 |     $ git clone https://github.com/HIT-SCIR/pyltp
27 |     $ git submodule init
28 |     $ git submodule update
29 |     $ python setup.py install
30 | 
31 | 安装完毕后，也需要下载相应版本的 LTP 模型文件。
32 | 


--------------------------------------------------------------------------------
/doc/intro.rst:
--------------------------------------------------------------------------------
1 | 欢迎使用 pyltp
2 | 
3 | pyltp 是 `语言技术平台（Language Technology Platform, LTP） <https://github.com/HIT-SCIR/ltp>`_ 的 Python 封装。
4 | 
5 | 如需了解 LTP 的详细信息，请参考 LTP 的 `文档 <https://github.com/HIT-SCIR/ltp>`_ 。
6 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  epub3      to make an epub3
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  xml        to make Docutils-native XML files
 38 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 39 | 	echo.  linkcheck  to check all external links for integrity
 40 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 41 | 	echo.  coverage   to run coverage check of the documentation if enabled
 42 | 	goto end
 43 | )
 44 | 
 45 | if "%1" == "clean" (
 46 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 47 | 	del /q /s %BUILDDIR%\*
 48 | 	goto end
 49 | )
 50 | 
 51 | 
 52 | REM Check if sphinx-build is available and fallback to Python version if any
 53 | %SPHINXBUILD% 1>NUL 2>NUL
 54 | if errorlevel 9009 goto sphinx_python
 55 | goto sphinx_ok
 56 | 
 57 | :sphinx_python
 58 | 
 59 | set SPHINXBUILD=python -m sphinx.__init__
 60 | %SPHINXBUILD% 2> nul
 61 | if errorlevel 9009 (
 62 | 	echo.
 63 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 64 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 65 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 66 | 	echo.may add the Sphinx directory to PATH.
 67 | 	echo.
 68 | 	echo.If you don't have Sphinx installed, grab it from
 69 | 	echo.http://sphinx-doc.org/
 70 | 	exit /b 1
 71 | )
 72 | 
 73 | :sphinx_ok
 74 | 
 75 | 
 76 | if "%1" == "html" (
 77 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 78 | 	if errorlevel 1 exit /b 1
 79 | 	echo.
 80 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 81 | 	goto end
 82 | )
 83 | 
 84 | if "%1" == "dirhtml" (
 85 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 86 | 	if errorlevel 1 exit /b 1
 87 | 	echo.
 88 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 89 | 	goto end
 90 | )
 91 | 
 92 | if "%1" == "singlehtml" (
 93 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 94 | 	if errorlevel 1 exit /b 1
 95 | 	echo.
 96 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 97 | 	goto end
 98 | )
 99 | 
100 | if "%1" == "pickle" (
101 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
102 | 	if errorlevel 1 exit /b 1
103 | 	echo.
104 | 	echo.Build finished; now you can process the pickle files.
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "json" (
109 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished; now you can process the JSON files.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "htmlhelp" (
117 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
121 | .hhp project file in %BUILDDIR%/htmlhelp.
122 | 	goto end
123 | )
124 | 
125 | if "%1" == "qthelp" (
126 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
127 | 	if errorlevel 1 exit /b 1
128 | 	echo.
129 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
130 | .qhcp project file in %BUILDDIR%/qthelp, like this:
131 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyltp.qhcp
132 | 	echo.To view the help file:
133 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyltp.ghc
134 | 	goto end
135 | )
136 | 
137 | if "%1" == "devhelp" (
138 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
139 | 	if errorlevel 1 exit /b 1
140 | 	echo.
141 | 	echo.Build finished.
142 | 	goto end
143 | )
144 | 
145 | if "%1" == "epub" (
146 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
147 | 	if errorlevel 1 exit /b 1
148 | 	echo.
149 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
150 | 	goto end
151 | )
152 | 
153 | if "%1" == "epub3" (
154 | 	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
155 | 	if errorlevel 1 exit /b 1
156 | 	echo.
157 | 	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
158 | 	goto end
159 | )
160 | 
161 | if "%1" == "latex" (
162 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
163 | 	if errorlevel 1 exit /b 1
164 | 	echo.
165 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
166 | 	goto end
167 | )
168 | 
169 | if "%1" == "latexpdf" (
170 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
171 | 	cd %BUILDDIR%/latex
172 | 	make all-pdf
173 | 	cd %~dp0
174 | 	echo.
175 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
176 | 	goto end
177 | )
178 | 
179 | if "%1" == "latexpdfja" (
180 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
181 | 	cd %BUILDDIR%/latex
182 | 	make all-pdf-ja
183 | 	cd %~dp0
184 | 	echo.
185 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
186 | 	goto end
187 | )
188 | 
189 | if "%1" == "text" (
190 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
191 | 	if errorlevel 1 exit /b 1
192 | 	echo.
193 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
194 | 	goto end
195 | )
196 | 
197 | if "%1" == "man" (
198 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
199 | 	if errorlevel 1 exit /b 1
200 | 	echo.
201 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
202 | 	goto end
203 | )
204 | 
205 | if "%1" == "texinfo" (
206 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
207 | 	if errorlevel 1 exit /b 1
208 | 	echo.
209 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
210 | 	goto end
211 | )
212 | 
213 | if "%1" == "gettext" (
214 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
215 | 	if errorlevel 1 exit /b 1
216 | 	echo.
217 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
218 | 	goto end
219 | )
220 | 
221 | if "%1" == "changes" (
222 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
223 | 	if errorlevel 1 exit /b 1
224 | 	echo.
225 | 	echo.The overview file is in %BUILDDIR%/changes.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "linkcheck" (
230 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Link check complete; look for any errors in the above output ^
234 | or in %BUILDDIR%/linkcheck/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "doctest" (
239 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of doctests in the sources finished, look at the ^
243 | results in %BUILDDIR%/doctest/output.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "coverage" (
248 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Testing of coverage in the sources finished, look at the ^
252 | results in %BUILDDIR%/coverage/python.txt.
253 | 	goto end
254 | )
255 | 
256 | if "%1" == "xml" (
257 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
258 | 	if errorlevel 1 exit /b 1
259 | 	echo.
260 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
261 | 	goto end
262 | )
263 | 
264 | if "%1" == "pseudoxml" (
265 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
266 | 	if errorlevel 1 exit /b 1
267 | 	echo.
268 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
269 | 	goto end
270 | )
271 | 
272 | :end
273 | 


--------------------------------------------------------------------------------
/doc/start.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HIT-SCIR/pyltp/722f609f3798071dc7ab173eea2f02d663e9a798/doc/start.rst


--------------------------------------------------------------------------------
/example/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import sys, os
 4 | 
 5 | ROOTDIR = os.path.join(os.path.dirname(__file__), os.pardir)
 6 | sys.path = [os.path.join(ROOTDIR, "lib")] + sys.path
 7 | 
 8 | # Set your own model path
 9 | MODELDIR = os.path.join(ROOTDIR, "./ltp_data")
10 | 
11 | from pyltp import SentenceSplitter, Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
12 | 
13 | if __name__ == '__main__':
14 |     paragraph = '他叫汤姆去拿外衣。'
15 | 
16 |     # --------------------- 断句 ------------------------
17 |     sentence = SentenceSplitter.split(paragraph)[0]
18 | 
19 |     # -------------------- Context Manager -------------
20 |     with Segmentor(os.path.join(MODELDIR, "cws.model")) as s:
21 |         words = s.segment(sentence)
22 |         print("\t".join(words))
23 | 
24 |     # --------------------- 分词 ------------------------
25 |     segmentor = Segmentor(os.path.join(MODELDIR, "cws.model"))
26 | 
27 |     segmentor_with_vocab = Segmentor(
28 |         os.path.join(MODELDIR, "cws.model"),
29 |         lexicon_path='lexicon.txt',  # 分开的会合并在一起
30 |     )
31 | 
32 |     segmentor_with_force_vocab = Segmentor(
33 |         os.path.join(MODELDIR, "cws.model"),
34 |         force_lexicon_path='lexicon.txt'  # 除上述功能外，原本合并在一起的亦会拆分
35 |     )
36 | 
37 |     words = segmentor.segment(sentence)
38 |     print("\t".join(words))
39 | 
40 |     words_with_vocab = segmentor_with_vocab.segment(sentence)
41 |     print("\t".join(words_with_vocab), "\t\t| With Vocab")
42 | 
43 |     words_with_force_vocab = segmentor_with_force_vocab.segment(sentence)
44 |     print("\t".join(words_with_force_vocab), "\t| Force Vocab")
45 | 
46 |     # --------------------- 词性标注 ------------------------
47 |     postagger = Postagger(os.path.join(MODELDIR, "pos.model"))
48 |     postags = postagger.postag(words)
49 |     # list-of-string parameter is support in 0.1.5
50 |     # postags = postagger.postag(["中国","进出口","银行","与","中国银行","加强","合作"])
51 |     print("\t".join(postags))
52 | 
53 |     # --------------------- 语义依存分析 ------------------------
54 |     parser = Parser(os.path.join(MODELDIR, "parser.model"))
55 |     arcs = parser.parse(words, postags)
56 | 
57 |     print("\t".join("%d:%s" % (head, relation) for (head, relation) in arcs))
58 | 
59 |     # --------------------- 命名实体识别 ------------------------
60 |     recognizer = NamedEntityRecognizer(os.path.join(MODELDIR, "ner.model"))
61 |     netags = recognizer.recognize(words, postags)
62 |     print("\t".join(netags))
63 | 
64 |     # --------------------- 语义角色标注 ------------------------
65 |     labeller = SementicRoleLabeller(os.path.join(MODELDIR, "pisrl.model"))
66 |     roles = labeller.label(words, postags, arcs)
67 | 
68 |     for index, arguments in roles:
69 |         print(index, " ".join(["%s: (%d,%d)" % (name, start, end) for (name, (start, end)) in arguments]))
70 | 
71 |     segmentor.release()
72 |     segmentor_with_vocab.release()
73 |     segmentor_with_force_vocab.release()
74 |     segmentor.release()
75 |     postagger.release()
76 |     parser.release()
77 |     recognizer.release()
78 |     labeller.release()
79 | 


--------------------------------------------------------------------------------
/example/lexicon.txt:
--------------------------------------------------------------------------------
1 | 他叫
2 | 汤


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=42",
 4 |     "wheel",
 5 |     "cmake>=3.12",
 6 | ]
 7 | build-backend = "setuptools.build_meta"
 8 | 
 9 | [tool.isort]
10 | profile = "black"
11 | 
12 | [tool.cibuildwheel]
13 | test-command = "pytest {project}/tests"
14 | test-extras = ["test"]
15 | test-skip = ["*universal2:arm64"]
16 | # Setuptools bug causes collision between pypy and cpython artifacts
17 | before-build = "rm -rf {project}/build"


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import codecs
  2 | import os
  3 | import re
  4 | import sys
  5 | import subprocess
  6 | 
  7 | from setuptools import setup, Extension
  8 | from setuptools.command.build_ext import build_ext
  9 | 
 10 | # Convert distutils Windows platform specifiers to CMake -A arguments
 11 | PLAT_TO_CMAKE = {
 12 |     "win32": "Win32",
 13 |     "win-amd64": "x64",
 14 |     "win-arm32": "ARM",
 15 |     "win-arm64": "ARM64",
 16 | }
 17 | 
 18 | 
 19 | class CMakeExtension(Extension):
 20 |     def __init__(self, name, sourcedir=''):
 21 |         Extension.__init__(self, name, sources=[])
 22 |         self.sourcedir = os.path.abspath(sourcedir)
 23 | 
 24 | 
 25 | class CMakeBuild(build_ext):
 26 |     def build_extension(self, ext):
 27 |         extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
 28 | 
 29 |         # required for auto-detection & inclusion of auxiliary "native" libs
 30 |         if not extdir.endswith(os.path.sep):
 31 |             extdir += os.path.sep
 32 | 
 33 |         debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
 34 |         cfg = "Debug" if debug else "Release"
 35 | 
 36 |         # CMake lets you override the generator - we need to check this.
 37 |         # Can be set with Conda-Build, for example.
 38 |         cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
 39 | 
 40 |         # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
 41 |         # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
 42 |         # from Python.
 43 |         cmake_args = [
 44 |             f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
 45 |             f"-DPYTHON_EXECUTABLE={sys.executable}",
 46 |             f"-DCMAKE_BUILD_TYPE={cfg}",  # not used on MSVC, but no harm
 47 |         ]
 48 |         build_args = []
 49 |         # Adding CMake arguments set as environment variable
 50 |         # (needed e.g. to build for ARM OSx on conda-forge)
 51 |         if "CMAKE_ARGS" in os.environ:
 52 |             cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
 53 | 
 54 |         # In this example, we pass in the version to C++. You might not need to.
 55 |         cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"]
 56 | 
 57 |         if self.compiler.compiler_type != "msvc":
 58 |             # Using Ninja-build since it a) is available as a wheel and b)
 59 |             # multithreads automatically. MSVC would require all variables be
 60 |             # exported for Ninja to pick it up, which is a little tricky to do.
 61 |             # Users can override the generator with CMAKE_GENERATOR in CMake
 62 |             # 3.15+.
 63 |             if not cmake_generator or cmake_generator == "Ninja":
 64 |                 try:
 65 |                     import ninja  # noqa: F401
 66 | 
 67 |                     ninja_executable_path = os.path.join(ninja.BIN_DIR, "ninja")
 68 |                     cmake_args += [
 69 |                         "-GNinja",
 70 |                         f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
 71 |                     ]
 72 |                 except ImportError:
 73 |                     pass
 74 | 
 75 |         else:
 76 | 
 77 |             # Single config generators are handled "normally"
 78 |             single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
 79 | 
 80 |             # CMake allows an arch-in-generator style for backward compatibility
 81 |             contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
 82 | 
 83 |             # Specify the arch if using MSVC generator, but only if it doesn't
 84 |             # contain a backward-compatibility arch spec already in the
 85 |             # generator name.
 86 |             if not single_config and not contains_arch:
 87 |                 cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
 88 | 
 89 |             # Multi-config generators have a different way to specify configs
 90 |             if not single_config:
 91 |                 cmake_args += [
 92 |                     f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"
 93 |                 ]
 94 |                 build_args += ["--config", cfg]
 95 | 
 96 |         if sys.platform.startswith("darwin"):
 97 |             # Cross-compile support for macOS - respect ARCHFLAGS if set
 98 |             archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
 99 |             if archs:
100 |                 cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
101 | 
102 |         # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
103 |         # across all generators.
104 |         if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
105 |             # self.parallel is a Python 3 only way to set parallel jobs by hand
106 |             # using -j in the build_ext call, not supported by pip or PyPA-build.
107 |             if hasattr(self, "parallel") and self.parallel:
108 |                 # CMake 3.12+ only.
109 |                 build_args += [f"-j{self.parallel}"]
110 | 
111 |         build_temp = os.path.join(self.build_temp, ext.name)
112 |         if not os.path.exists(build_temp):
113 |             os.makedirs(build_temp)
114 | 
115 |         subprocess.check_call(["cmake", ext.sourcedir] + cmake_args, cwd=build_temp)
116 |         subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=build_temp)
117 | 
118 | 
119 | setup(
120 |     name='pyltp',
121 |     version='0.4.0',
122 |     description='pyltp: the python extension for LTP 3',
123 |     long_description=codecs.open('README.md', encoding='utf-8').read(),
124 |     long_description_content_type='text/markdown',
125 |     author='Yijia Liu, Zixiang Xu, Yang Liu, Yunlong Feng',
126 |     author_email='ylfeng@ir.hit.edu.cn',
127 |     url='https://github.com/HIT-SCIR/pyltp',
128 |     classifiers=[
129 |         'Development Status :: 4 - Beta',
130 |         'Intended Audience :: Developers',
131 |         'Intended Audience :: Science/Research',
132 |         'License :: OSI Approved :: MIT License',
133 |         'Programming Language :: Python :: 2',
134 |         'Programming Language :: Python :: 2.6',
135 |         'Programming Language :: Python :: 2.7',
136 |         'Programming Language :: Python :: 3',
137 |         'Programming Language :: Python :: 3.2',
138 |         'Programming Language :: Python :: 3.3',
139 |         'Programming Language :: Python :: 3.4',
140 |         'Programming Language :: Python :: 3.5',
141 |         'Programming Language :: Python :: 3.6',
142 |         'Programming Language :: Python :: 3.7',
143 |         'Programming Language :: Python :: 3.8',
144 |         "Topic :: Software Development",
145 |         "Topic :: Software Development :: Libraries :: Python Modules",
146 |         "Topic :: Scientific/Engineering",
147 |         "Topic :: Scientific/Engineering :: Information Analysis",
148 |         "Topic :: Text Processing :: Linguistic",
149 |     ],
150 |     zip_safe=False,
151 |     ext_modules=[CMakeExtension("pyltp")],
152 |     extras_require={"test": ["pytest>=6.0"]},
153 |     cmdclass={"build_ext": CMakeBuild},
154 |     python_requires=">=3.6",
155 | )
156 | 


--------------------------------------------------------------------------------
/src/pyltp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * pyltp - A python extension for Language Technology Platform
  3 |  *
  4 |  *  @author: YunLong Feng <ylfeng@ir.hit.edu.cn>
  5 |  *
  6 |  * This project forks from https://github.com/HuangFJ/pyltp. The basic structure
  7 |  * of the project is perserved. But interface is adopted from XML level to
  8 |  * library level to allow more flexible usage.
  9 |  *
 10 |  *  @author: Yijia Liu <yjliu@ir.hit.edu.cn>
 11 |  *  @author: Zixiang Xu <zxxu@ir.hit.edu.cn>
 12 |  *  @author: Yang Liu <yliu@ir.hit.edu.cn>
 13 |  *  @author: YunLong Feng <ylfeng@ir.hit.edu.cn>
 14 |  */
 15 | #include "ltp/ner_dll.h"
 16 | #include "ltp/parser_dll.h"
 17 | #include "ltp/postag_dll.h"
 18 | #include "ltp/segment_dll.h"
 19 | #include "ltp/SplitSentence.h"
 20 | #include "ltp/srl_dll.h"
 21 | #include <iostream>
 22 | #include <pybind11/pybind11.h>
 23 | #include <pybind11/stl.h>
 24 | #include <vector>
 25 | 
 26 | #define STRINGIFY(x) #x
 27 | #define MACRO_STRINGIFY(x) STRINGIFY(x)
 28 | 
 29 | namespace py = pybind11;
 30 | 
 31 | struct SentenceSplitter {
 32 |   SentenceSplitter() {}
 33 | 
 34 |   static std::vector<std::string> split(const std::string &paragraph) {
 35 |     std::vector<std::string> ret;
 36 |     SplitSentence(paragraph, ret);
 37 |     return ret;
 38 |   }
 39 | };
 40 | 
 41 | struct Segmentor {
 42 |   Segmentor(const char *model_path,
 43 |             const char *lexicon_path = nullptr,
 44 |             const char *force_lexicon_file = nullptr) : model(NULL) {
 45 |     load(model_path, lexicon_path, force_lexicon_file);
 46 |   }
 47 | 
 48 |   std::vector<std::string> segment(const std::string &sentence) {
 49 |     std::vector<std::string> ret;
 50 |     if (model == NULL) {
 51 |       std::cerr << "Segmentor: Model not loaded!" << std::endl;
 52 |     } else {
 53 |       segmentor_segment(model, sentence.c_str(), ret);
 54 |     }
 55 |     return ret;
 56 |   }
 57 | 
 58 |   void release() {
 59 |     if (model != NULL) {
 60 |       segmentor_release_segmentor(model);
 61 |       model = NULL;
 62 |     }
 63 |   }
 64 | 
 65 |   void *model;
 66 | 
 67 | private:
 68 |   void load(const char *model_path,
 69 |             const char *lexicon_path = nullptr,
 70 |             const char *force_lexicon_file = nullptr) {
 71 |     if (model == NULL) {
 72 |       model = segmentor_create_segmentor(model_path, lexicon_path, force_lexicon_file);
 73 |     } else {
 74 |       std::cerr << "Segmentor: Model reloaded!" << std::endl;
 75 |     }
 76 |   }
 77 | };
 78 | 
 79 | struct CustomizedSegmentor {
 80 |   CustomizedSegmentor(const char *base_model_path,
 81 |                       const char *customized_model_path = nullptr,
 82 |                       const char *lexicon_path = nullptr) : model(NULL) {
 83 |     load(base_model_path, customized_model_path, lexicon_path);
 84 |   }
 85 | 
 86 |   std::vector<std::string> segment(const std::string &sentence) {
 87 |     std::vector<std::string> ret;
 88 |     if (model == NULL) {
 89 |       std::cerr << "CustomizedSegmentor: Model not loaded!" << std::endl;
 90 |     } else {
 91 |       customized_segmentor_segment(model, sentence.c_str(), ret);
 92 |     }
 93 |     return ret;
 94 |   }
 95 | 
 96 |   void release() {
 97 |     if (model != NULL) {
 98 |       customized_segmentor_release_segmentor(model);
 99 |       model = NULL;
100 |     }
101 |   }
102 | 
103 |   void *model;
104 | 
105 | private:
106 |   void load(const char *base_model_path,
107 |             const char *customized_model_path = nullptr,
108 |             const char *lexicon_path = nullptr) {
109 |     if (model == NULL) {
110 |       model = customized_segmentor_create_segmentor(base_model_path, customized_model_path, lexicon_path);
111 |     } else {
112 |       std::cerr << "CustomizedSegmentor: Model reloaded!" << std::endl;
113 |     }
114 |   }
115 | };
116 | 
117 | struct Postagger {
118 |   Postagger(const char *model_path, const char *lexicon_path = nullptr) : model(NULL) {
119 |     load(model_path, lexicon_path);
120 |   }
121 | 
122 |   std::vector<std::string> postag(const std::vector<std::string> &words) {
123 |     std::vector<std::string> ret;
124 |     if (model == NULL) {
125 |       std::cerr << "Postagger: Model not loaded!" << std::endl;
126 |     } else {
127 |       postagger_postag(model, words, ret);
128 |     }
129 |     return ret;
130 |   }
131 | 
132 |   void release() {
133 |     if (model != NULL) {
134 |       postagger_release_postagger(model);
135 |       model = NULL;
136 |     }
137 |   }
138 | 
139 |   void *model;
140 | private:
141 |   void load(const char *model_path, const char *lexicon_path = nullptr) {
142 |     if (model == NULL) {
143 |       model =
144 |           postagger_create_postagger(model_path, lexicon_path);
145 |     } else {
146 |       std::cerr << "Postagger: Model reloaded!" << std::endl;
147 |     }
148 |   }
149 | };
150 | 
151 | typedef std::pair<int, std::string> ParseResult;
152 | 
153 | struct Parser {
154 |   Parser(const char *model_path) : model(NULL) {
155 |     load(model_path);
156 |   }
157 | 
158 |   std::vector<ParseResult> parse(const std::vector<std::string> &words,
159 |                                  const std::vector<std::string> &postags) {
160 |     std::vector<ParseResult> ret;
161 |     std::vector<int> heads;
162 |     std::vector<std::string> relations;
163 | 
164 |     if (model == NULL) {
165 |       std::cerr << "Parser: Model not loaded!" << std::endl;
166 |     } else {
167 |       parser_parse(model, words, postags, heads, relations);
168 |     }
169 | 
170 |     for (std::size_t i = 0; i < heads.size(); ++i) {
171 |       ret.push_back(ParseResult(heads[i], relations[i]));
172 |     }
173 |     return ret;
174 |   }
175 | 
176 |   void release() {
177 |     if (model != NULL) {
178 |       parser_release_parser(model);
179 |       model = NULL;
180 |     }
181 |   }
182 | 
183 |   void *model;
184 | private:
185 |   void load(const char *model_path) {
186 |     if (model == NULL) {
187 |       model = parser_create_parser(model_path);
188 |     } else {
189 |       std::cerr << "Parser: Model reloaded!" << std::endl;
190 |     }
191 |   }
192 | };
193 | 
194 | struct NamedEntityRecognizer {
195 |   NamedEntityRecognizer(const char *model_path) : model(NULL) {
196 |     load(model_path);
197 |   }
198 | 
199 |   std::vector<std::string> recognize(const std::vector<std::string> &words,
200 |                                      const std::vector<std::string> &postags) {
201 |     std::vector<std::string> netags;
202 |     if (model == NULL) {
203 |       std::cerr << "NER: Model not loaded!" << std::endl;
204 |     } else {
205 |       ner_recognize(model, words, postags, netags);
206 |     }
207 |     return netags;
208 |   }
209 | 
210 |   void release() {
211 |     if (model != NULL) {
212 |       ner_release_recognizer(model);
213 |       model = NULL;
214 |     }
215 |   }
216 | 
217 |   void *model;
218 | private:
219 |   void load(const char *model_path) {
220 |     if (model == NULL) {
221 |       model = ner_create_recognizer(model_path);
222 |     } else {
223 |       std::cerr << "NER: Model reloaded!" << std::endl;
224 |     }
225 |   }
226 | };
227 | 
228 | typedef std::pair<int, int> ArgRange;
229 | typedef std::pair<std::string, ArgRange> Arg;
230 | typedef std::pair<int, std::vector<Arg>> SementicRole;
231 | 
232 | struct SementicRoleLabeller {
233 |   SementicRoleLabeller(const char *model_path) : loaded(false) {
234 |     load(model_path);
235 |   }
236 | 
237 |   std::vector<SementicRole> label(const std::vector<std::string> &words,
238 |                                   const std::vector<std::string> &postags,
239 |                                   const std::vector<ParseResult> &parse) {
240 |     std::vector<SementicRole> ret;
241 | 
242 |     // Some trick
243 |     std::vector<ParseResult> tmp_parse(parse);
244 |     for (std::size_t i = 0; i < tmp_parse.size(); ++i) {
245 |       tmp_parse[i].first--;
246 |     }
247 |     if (!loaded) {
248 |       std::cerr << "SRL: Model not loaded!" << std::endl;
249 |     } else {
250 |       srl_dosrl(words, postags, tmp_parse, ret);
251 |     }
252 |     return ret;
253 |   }
254 | 
255 |   void release() {
256 |     if (loaded) {
257 |       srl_release_resource();
258 |     }
259 |   }
260 | 
261 |   bool loaded;
262 | private:
263 |   void load(const char *model_path) {
264 |     loaded = (srl_load_resource(model_path) == 0);
265 |   }
266 | };
267 | 
268 | #ifdef SDPG
269 | #include "ltp/lstm_sdparser_dll.h"
270 | 
271 | typedef std::pair<std::string, int> SemanticArc;
272 | typedef std::vector<SemanticArc> SemanticNode;
273 | 
274 | struct SDGraphParser {
275 | 
276 |   void load(const std::string &model_path) {
277 |     if (model == NULL) {
278 |       model = lstmsdparser_create_parser(model_path.c_str());
279 |     } else {
280 |       std::cerr << "SDGraphParser: Model reloaded!" << std::endl;
281 |     }
282 |   }
283 | 
284 |   std::vector<SemanticNode> parse(const std::vector<std::string> &words,
285 |                                   const std::vector<std::string> &postags) {
286 |     std::vector<std::vector<std::string>> vecSemResult;
287 |     std::vector<SemanticNode> ret;
288 |     if (model == NULL) {
289 |       std::cerr << "SDGraphParser: Model not loaded!" << std::endl;
290 |     } else {
291 |       lstmsdparser_parse(model, words, postags, vecSemResult);
292 |     }
293 |     for (int i = 0; i < vecSemResult.size(); i++) {
294 |       SemanticNode node;
295 |       for (int j = 0; j < vecSemResult[i].size(); j++) {
296 |         if (vecSemResult[i][j] != "-NULL-") {
297 |           node.push_back(SemanticArc(
298 |               vecSemResult[i][j], j < vecSemResult[i].size() - 1 ? j + 1 : -1));
299 |         }
300 |       }
301 |       ret.push_back(node);
302 |     }
303 |     return ret;
304 |   }
305 | 
306 |   void release() {
307 |     if (model != NULL) {
308 |       lstmsdparser_release_parser(model);
309 |       model = NULL;
310 |     }
311 |   }
312 | 
313 |   void *model;
314 | };
315 | #endif
316 | 
317 | PYBIND11_MODULE(pyltp, m) {
318 | #ifdef VERSION_INFO
319 |   m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
320 | #else
321 |   m.attr("__version__") = "dev";
322 | #endif
323 | 
324 |   py::class_<SentenceSplitter>(m, "SentenceSplitter")
325 |       .def(py::init<>())
326 |       .def_static("split", &SentenceSplitter::split);
327 | 
328 |   py::class_<Segmentor>(m, "Segmentor")
329 |       .def(
330 |           py::init<const char *, const char *, const char *>(),
331 |           "Init Segmentor",
332 |           py::arg("model_path"),
333 |           py::arg("lexicon_path") = nullptr,
334 |           py::arg("force_lexicon_path") = nullptr
335 |       )
336 |       .def("segment", &Segmentor::segment)
337 |       .def("release", &Segmentor::release)
338 |       .def("__enter__",
339 |            [&](Segmentor &s) { return s; }, "Enter the runtime context related to this object")
340 |       .def("__exit__",
341 |            [&](Segmentor &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
342 |            "Exit the runtime context related to this object");
343 | 
344 |   py::class_<CustomizedSegmentor>(m, "CustomizedSegmentor")
345 |       .def(py::init<const char *, const char *, const char *>(),
346 |            "Init CustomizedSegmentor",
347 |            py::arg("base_model_path"),
348 |            py::arg("customized_model_path") = nullptr,
349 |            py::arg("lexicon_path") = nullptr
350 |       )
351 |       .def("segment", &CustomizedSegmentor::segment)
352 |       .def("release", &CustomizedSegmentor::release)
353 |       .def("__enter__",
354 |            [&](CustomizedSegmentor &s) { return s; }, "Enter the runtime context related to this object")
355 |       .def("__exit__",
356 |            [&](CustomizedSegmentor &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
357 |            "Exit the runtime context related to this object");
358 | 
359 |   py::class_<Postagger>(m, "Postagger")
360 |       .def(py::init<const char *, const char *>(),
361 |            "Init Postagger",
362 |            py::arg("model_path") = nullptr,
363 |            py::arg("lexicon_path") = nullptr)
364 |       .def("postag", &Postagger::postag)
365 |       .def("release", &Postagger::release)
366 |       .def("__enter__",
367 |            [&](Postagger &s) { return s; }, "Enter the runtime context related to this object")
368 |       .def("__exit__",
369 |            [&](Postagger &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
370 |            "Exit the runtime context related to this object");
371 | 
372 |   py::class_<Parser>(m, "Parser")
373 |       .def(py::init<const char *>())
374 |       .def("parse", &Parser::parse)
375 |       .def("release", &Parser::release)
376 |       .def("__enter__",
377 |            [&](Parser &s) { return s; }, "Enter the runtime context related to this object")
378 |       .def("__exit__",
379 |            [&](Parser &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
380 |            "Exit the runtime context related to this object");
381 | 
382 |   py::class_<NamedEntityRecognizer>(m, "NamedEntityRecognizer")
383 |       .def(py::init<const char *>())
384 |       .def("recognize", &NamedEntityRecognizer::recognize)
385 |       .def("release", &NamedEntityRecognizer::release)
386 |       .def("__enter__",
387 |            [&](NamedEntityRecognizer &s) { return s; }, "Enter the runtime context related to this object")
388 |       .def("__exit__",
389 |            [&](NamedEntityRecognizer &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
390 |            "Exit the runtime context related to this object");
391 | 
392 |   py::class_<SementicRoleLabeller>(m, "SementicRoleLabeller")
393 |       .def(py::init<const char *>())
394 | //      .def("pi",&SementicRoleLabeller::pi)
395 |       .def("label", &SementicRoleLabeller::label)
396 |       .def("release", &SementicRoleLabeller::release)
397 |       .def("__enter__",
398 |            [&](SementicRoleLabeller &s) { return s; }, "Enter the runtime context related to this object")
399 |       .def("__exit__",
400 |            [&](SementicRoleLabeller &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
401 |            "Exit the runtime context related to this object");
402 | #ifdef SDPG
403 |   py::class_<SDGraphParser>(m, "SDGraphParser")
404 |       .def(py::init<>())
405 |       .def("load", &SDGraphParser::load)
406 |       .def("parse",&SDGraphParser::parse)
407 |       .def("release", &SDGraphParser::release)
408 |       .def("__enter__",
409 |            [&](SDGraphParser &s) { return s; }, "Enter the runtime context related to this object")
410 |       .def("__exit__",
411 |            [&](SDGraphParser &s, py::object exc_type, py::object exc_value, py::object traceback) { s.release(); },
412 |            "Exit the runtime context related to this object");
413 | #endif
414 | }
415 | 


--------------------------------------------------------------------------------
/tests/basic_test.py:
--------------------------------------------------------------------------------
1 | def test_main():
2 |     assert True
3 | 


--------------------------------------------------------------------------------