├── .bandit
├── .github
├── FUNDING.yml
├── dependabot.yml
└── workflows
│ └── ci.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CITATION
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── SECURITY.md
├── bin
├── nb_md.sh
├── preview.py
├── push_pypi.sh
└── vis_doc.py
├── code_of_conduct.md
├── dat
├── ars.txt
├── cfc.txt
├── gen.txt
├── lee.txt
├── mih.txt
└── suz.txt
├── deprecated
├── WARNING.md
├── example.ipynb
├── oldsrc.py
└── test.py
├── docs
├── ack.md
├── apidocs.yml
├── assets
│ ├── favicon.png
│ ├── logo.png
│ └── noam.jpg
├── biblio.jinja
├── biblio.md
├── build.md
├── codecov_io.svg
├── depend.md
├── faq.md
├── glossary.jinja
├── glossary.md
├── index.md
├── javascripts
│ └── config.js
├── mkrefs.ttl
├── mkrefs.yml
├── noam.jpg
├── overview.md
├── ref.jinja
├── ref.md
├── setup.md
├── start.md
├── stylesheets
│ └── extra.css
├── talks.md
├── todo.md
└── tutorial.md
├── environment.yml
├── examples
├── explain_algo.ipynb
├── explain_summ.ipynb
└── sample.ipynb
├── lgtm.yml
├── mkdocs.yml
├── pkg_doc.cfg
├── pkg_doc.py
├── pyfixdoc.py
├── pylintrc
├── pyproject.toml
├── pytextrank
├── __init__.py
├── base.py
├── biasedrank.py
├── positionrank.py
├── topicrank.py
├── util.py
└── version.py
├── requirements-dev.txt
├── requirements-viz.txt
├── requirements.txt
├── sample.py
├── setup.py
├── tests
├── conftest.py
├── test_base.py
├── test_biasedrank.py
├── test_positionrank.py
├── test_topicrank.py
└── trace.py
├── tmp_api.py
└── wip
└── error.ipynb
/.bandit:
--------------------------------------------------------------------------------
1 | [bandit]
2 | exclude_dirs:
3 | - tests
4 | - examples
5 | exclude: setup.py
6 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: ceteri
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # Please see the documentation for all configuration options:
2 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
3 |
4 | version: 2
5 | updates:
6 | - package-ecosystem: "pip"
7 | directory: "/"
8 | schedule:
9 | interval: "daily"
10 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: [push, pull_request, workflow_dispatch]
4 |
5 | jobs:
6 | # pre-commit:
7 | # name: Run pre-commit
8 | # runs-on: ubuntu-latest
9 | # steps:
10 | # - uses: actions/checkout@v3
11 | # - uses: actions/setup-python@v3
12 | # - uses: pre-commit/action@v3.0.0
13 |
14 | test:
15 | name: Tests for Python ${{ matrix.python-version }}
16 | runs-on: ubuntu-latest
17 | strategy:
18 | matrix:
19 | python-version: ['3.7', '3.8', '3.9', '3.10']
20 | fail-fast: false
21 | # needs: pre-commit
22 |
23 | steps:
24 | - uses: actions/checkout@v3
25 |
26 | - name: Set up Python
27 | uses: actions/setup-python@v3
28 | with:
29 | python-version: ${{ matrix.python-version }}
30 |
31 | - name: Install dependencies
32 | run: |
33 | pip install -e .
34 | spacy download en_core_web_sm
35 | pip install pre-commit pytest
36 |
37 | - name: Run tests
38 | run: |
39 | pytest
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # misc.
2 | venv/
3 | README.rst
4 | *~
5 |
6 | # files generated by running PyTextRank
7 | *.dot
8 | graph.png
9 |
10 | # files generated by Jupyter notebooks
11 | .ipynb_checkpoints/
12 |
13 | # files generated by building the documentation
14 | docs/biblio.md
15 | docs/glossary.md
16 | docs/ex*.md
17 | docs/ex*_files
18 | docs/sample.md
19 | docs/sample_files
20 | site/
21 | ptr.tgz
22 |
23 | # codecov.io
24 | .cc_token
25 | .coverage
26 | coverage.xml
27 |
28 | # Compiled python modules.
29 | *.pyc
30 |
31 | # Setuptools distribution folder.
32 | /dist/
33 | /build/
34 |
35 | # Python egg metadata, regenerated from source files by setuptools.
36 | .eggs/
37 | /*.egg-info
38 | /*.egg
39 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | default_stages: [commit, push]
4 | default_language_version:
5 | python: python3
6 | exclude: "deprecated"
7 | repos:
8 | - repo: https://github.com/pre-commit/pre-commit-hooks
9 | rev: v3.4.0
10 | hooks:
11 | - id: check-added-large-files
12 | - id: check-builtin-literals
13 | - id: check-executables-have-shebangs
14 | - id: check-merge-conflict
15 | - id: check-yaml
16 | - id: debug-statements
17 | - id: detect-private-key
18 | - repo: https://github.com/PyCQA/bandit
19 | rev: 1.7.0
20 | hooks:
21 | - id: bandit # security vulnerabilities
22 | args: ["--exclude", "setup.py,bin,tests"]
23 | - repo: https://github.com/pre-commit/mirrors-mypy
24 | rev: v0.812
25 | hooks:
26 | - id: mypy # type annotations
27 | exclude: ^tests/,^examples/
28 | - repo: https://github.com/PyCQA/pylint
29 | rev: pylint-2.7.2
30 | hooks:
31 | - id: pylint
32 | exclude: pyfixdoc
33 | - repo: https://github.com/codespell-project/codespell
34 | rev: v2.0.0
35 | hooks:
36 | - id: codespell # spell-check source code
37 | args: ["pytextrank/*.py", "*.md", "docs/*.md"]
38 | exclude: ^examples/
39 | language: python
40 | types: [text]
41 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # PyTextRank changelog
2 |
3 | ## 3.2.4
4 |
5 | 2022-07-27
6 |
7 | * better support for "ru" and other languages without `noun_chunks` support in spaCy
8 | * updated example notebook to illustrate `TopicRank` algorithm
9 | * made the node bias setting case-independent for `Biased Textrank` algorithm; kudos @Ankush-Chander
10 | * updated summarization tests; kudos @tomaarsen
11 | * reworked some unit tests to be less brittle, less dependent on specific spaCy point releases
12 |
13 |
14 | ## 3.2.3
15 |
16 | 2022-03-06
17 |
18 | * handles missing `noun_chunks` in some language models (e.g., "ru")
19 | * add *TopicRank* algorithm; kudos @tomaarsen
20 | * improved test suite; fixed tests for newer spaCy releases; kudos @tomaarsen
21 |
22 |
23 | ## 3.2.2
24 |
25 | 2021-10-09
26 |
27 | * adjust for changes in `NetworkX` where they are removing `SciPy` as a dependency; kudos @clabornd, @tomaarsen, @duarteocarmo
28 | * more scrubber examples; kudos @dayalstrub-cma
29 |
30 |
31 | ## 3.2.1
32 |
33 | 2021-07-24
34 |
35 | * add "paragraph" option into `summary()` function; kudos @CaptXiong
36 |
37 |
38 | ## 3.2.0
39 |
40 | 2021-07-17
41 |
42 | * **NB: THIS SCRUBBER UPDATE WILL BREAK PREVIOUS RELEASES**
43 | * allow `Span` as scrubber argument, to align with `spaCy` 3.1.x; kudos @Ankush-Chander
44 | * add `lgtm` code reviews (slow, not integrating into GitHub PRs directly)
45 | * evaluating `grayskull` to generate a conda-forge recipe
46 | * add use of `pipdeptree` to analyze dependencies
47 | * use KG from `biblio.ttl` to generate bibliography
48 | * fixed overlooked comment from earlier code; kudos @debraj135
49 | * add visualisation using `altair`; kudos @louisguitton
50 | * add scrubber usage in sample notebook; kudos @Ankush-Chander
51 | * integrating use of `MkRefs` to generate semantic reference pages in `docs`
52 |
53 |
54 | ## 3.1.1
55 |
56 | 2021-03-25
57 |
58 | * fix the span length calculation in explanation notebook; kudos @Ankush-Chander
59 | * add `BiasedTextRank` by @Ankush-Chander (many thanks!)
60 | * add conda `environment.yml` plus instructions
61 | * use `bandit` to check for security issues
62 | * use `codespell` to check for spelling errors
63 | * add `pre-commit` checks in general
64 | * update `doc._.phrases` in the call to `change_focus()` so the summarization will sync with the latest focus
65 |
66 |
67 | ## 3.1.0
68 |
69 | 2021-03-12
70 |
71 | * rename `master` branch to `main`
72 | * add a factory class that assigns each doc its own Textrank object; kudos @Ankush-Chander
73 | * refactor the stopwords feature as a constructor argument
74 | * add `get_unit_vector()` method to expose the characteristic *unit vector*
75 | * add `calc_sent_dist()` method to expose the sentence distance measures (for summarization)
76 | * include a unit test for summarization
77 | * updated contributor instructions
78 | * `pylint` coverage for code checking
79 | * linking definitions and citations in source code apidocs to our online docs
80 | * updated links on PyPi
81 |
82 |
83 | ## 3.0.1
84 |
85 | 2021-02-27
86 |
87 | * `mypy` coverage for type annotations
88 | * add DOI to README and CITATION
89 | * now deploying online docs at
90 |
91 |
92 | ## 3.0.0
93 |
94 | 2021-02-14
95 |
96 | * **THIS WILL BREAK THINGS!!!**
97 | * support for `spaCy` 3.0.x; kudos @Lord-V15
98 | * full integration of `PositionRank`
99 | * migrated all unit tests to `pytest`
100 | * removed use of `logger` for debugging, introducing `icecream` instead
101 |
102 |
103 | ## 2.1.0
104 |
105 | 2021-01-31
106 |
107 | * add `PositionRank` by @louisguitton (many thanks!)
108 | * fixes chunk in `explain_summ.ipynb` by @anna-droid-beep
109 | * add option `preserve_order` in TextRank.summary by @kavorite
110 | * tested with `spaCy` 2.3.5
111 |
112 |
113 | ## 2.0.3
114 |
115 | 2020-09-15
116 |
117 | * try-catch `ZeroDivisionError` in summary method -- kudos @shyamcody
118 | * tested with updated dependencies: `spaCy` 2.3.x and `NetworkX` 2.5
119 |
120 |
121 | ## 2.0.2
122 |
123 | 2020-05-20
124 |
125 | * fixed default value of `._.phrases` to allow for disabling PTR in a pipeline
126 |
127 |
128 | ## 2.0.1
129 |
130 | 2020-03-02
131 |
132 | * fix `KeyError` issue for pre Python 3.6
133 | * integrated `codecov.io`
134 | * added PyTextRank to the spaCy uniVerse
135 | * fixed README.md instructions to download `en_core_web_sm`
136 |
137 |
138 | ## 2.0.0
139 |
140 | 2019-11-05
141 |
142 | * refactored library to run as a `spaCy` extension
143 | * supports multiple languages
144 | * significantly faster, with less memory required
145 | * better extraction of top-ranked phrases
146 | * changed license to MIT
147 | * uses lemma-based stopwords for more precise control
148 | * WIP toward integration with knowledge graph use cases
149 |
150 |
151 | ## 1.2.1
152 |
153 | 2019-11-01
154 |
155 | * fixed error in installation instructions
156 |
157 |
158 | ## 1.2.0
159 |
160 | 2019-11-01
161 |
162 | * updated to fix for current versions of `spaCy` and `NetworkX` -- kudos @dimmu
163 | * removed deprecated argument -- kudos @laxatives
164 |
165 |
166 | ## 1.1.1
167 |
168 | 2017-09-15
169 |
170 | * patch disables use of NER in `spaCy` until an intermittent bug is resolved.
171 | * will probably replace named tuples with `spaCy` spans instead.
172 |
173 |
174 | ## 1.1.0
175 |
176 | 2017-06-07
177 |
178 | * replaced use of `TextBlob` with `spaCy`
179 | * updated other Py dependencies
180 | * better handling for UTF-8
181 |
182 |
183 | ## 1.0.1
184 |
185 | 2017-04-30
186 |
187 | * updated Jupyter notebook example -- kudos @kjam
188 | * better install/import for `aptagger`
189 | * comparing `spaCy` performance with `TextBlob`
190 |
--------------------------------------------------------------------------------
/CITATION:
--------------------------------------------------------------------------------
1 | @software{PyTextRank,
2 | author = {Paco Nathan},
3 | title = {{PyTextRank, a Python implementation of TextRank for phrase extraction and summarization of text documents}},
4 | year = 2016,
5 | publisher = {Derwen},
6 | doi = {10.5281/zenodo.4602393},
7 | url = {https://github.com/DerwenAI/pytextrank}
8 | }
9 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Welcome!
5 |
6 | Thanks for your interest in contributing to **PyTextRank** 🎉
7 |
8 | This page gives a quick overview of how things are organized and most
9 | importantly, how to get involved.
10 |
11 |
12 | ## Issues and bug reports
13 |
14 | First, if you want to report a potential issue with this library, please
15 | [do a quick search](https://github.com/DerwenAI/pytextrank/issues)
16 | to see if the issue has already been reported.
17 | If so, it's best to simply leave a comment on an existing issue,
18 | rather than create a new one.
19 | Older issues may also include helpful info and show solutions to
20 | commonly encountered questions.
21 |
22 |
23 | ## Opening new issues
24 |
25 | When opening a
26 | [new issue](https://github.com/DerwenAI/pytextrank/issues/new/choose),
27 | please use a **descriptive title** and include information about your
28 | **environment** and library **installation**:
29 |
30 | * Which operating system and version number?
31 | * Which version of Python?
32 | * How did you install? `pip`, `conda`, clone repo then `setup.py`, etc.
33 |
34 | Try to provide as many details as possible.
35 | What exactly is going wrong?
36 | _How_ is it failing?
37 | Is there an error?
38 |
39 | Please understand that in general our developer community does not
40 | provide support via email, Twitter DMs, and other 1:1 messaging.
41 | We believe that help is much more valuable when it gets **shared
42 | publicly**, so that more people can benefit.
43 |
44 |
45 | ## Code of conduct
46 |
47 | In all communications and collaborations, we adhere to the
48 | [Contributor Covenant Code of Conduct](https://github.com/DerwenAI/pytextrank/blob/main/code_of_conduct.md).
49 | By participating, you are expected to follow this code.
50 |
51 |
52 | ## Developer community
53 |
54 | If you'd like to contribute to this open source project, the best way
55 | to get involved with our developer community is to participate in our
56 | [public office hours](https://www.notion.so/KG-Community-Events-Calendar-8aacbe22efa94d9b8b39b7288e22c2d3)
57 | events.
58 | First join the
59 | [*Graph-Based Data Science*](https://www.linkedin.com/groups/6725785/)
60 | group on LinkedIn where these meetingsget announced.
61 | We'll also have other developer discussions on that forum, along with
62 | related updates, news, conference coupons, etc.
63 |
64 | The
65 | [Knowledge Graph Conference](https://derwen.ai/docs/kgl/glossary/#knowledge-graph-conference)
66 | hosts several community resources where you can post questions and get
67 | help about **PyTextRank** and related topics.
68 | Many of our developers are involved there too:
69 |
70 | * [community Slack](https://knowledgegraphconf.slack.com/ssb/redirect) – specifically on the `#ask` channel
71 |
72 | * [Knowledge Tech Q&A site](https://answers.knowledgegraph.tech/) for extended questions posed to experts
73 |
74 |
75 | ## Contributing to the code base
76 |
77 | You don't have to be an expert to contribute, and we're happy to help
78 | you get started.
79 | We'll try to use the
80 | [`good first issue`](https://github.com/DerwenAI/pytextrank/labels/good%20first%20issue)
81 | tags to mark bugs and feature requests that are easy and self-contained.
82 |
83 | If you've decided to take on one of these problems, it's best to
84 | [fork the repo](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-forks)
85 | and do development and testing in your own fork first.
86 |
87 | Please follow the conventions for code formatting, type annotations,
88 | unit tests, code linting, naming conventions, and so on.
89 | Understand that we will not be able to accept pull requests that make
90 | *major overhauls* of the code base or completely change our shared
91 | work on formatting, testing, etc.
92 |
93 | If you need to incorporate other libraries, please discuss this with
94 | the other developers.
95 | There may be issues regarding point releases and compatibility that
96 | would have impact on other parts of the code base.
97 |
98 | Once you're making good progress, don't forget to add a quick comment
99 | to the original issue.
100 | You can also use the issue to ask questions, or share your work in
101 | progress.
102 | Then when you're ready to submit code for review, please use a
103 | [pull request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request)
104 | on our `main` repo branch.
105 |
106 |
107 | ## Project roadmap
108 |
109 | The
110 | ["Graph-Based Data Science"](https://derwen.ai/s/kcgh)
111 | talk describes the **PyTextRank** open source project in more detail,
112 | and discusses some about our roadmap.
113 | In other words, what new features and integrations are we working toward?
114 |
115 | See also our:
116 |
117 | * [Project Board](https://github.com/DerwenAI/pytextrank/projects/1)
118 | * [Milestones](https://github.com/DerwenAI/pytextrank/milestones)
119 |
120 | Suggestions and contributions for our documentation and tutorial are
121 | always welcomed.
122 | These tend to be good starting points for new contributors: you'll get
123 | familiar with our code samples and other resources through that.
124 |
125 | Many thanks!
126 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016-2022 Derwen, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CITATION
2 | include LICENSE
3 | include README.md
4 | include pyproject.toml
5 | include requirements.txt
6 | include setup.py
7 | include tests/*.py
8 | prune .ipynb_checkpoints
9 | # added by check-manifest
10 | include *.md
11 | include *.py
12 | include *.txt
13 | include *.yaml
14 | include *.yml
15 | include pylintrc
16 | recursive-include bin *.py
17 | recursive-include bin *.sh
18 | recursive-include dat *.txt
19 | recursive-include deprecated *.ipynb
20 | recursive-include deprecated *.md
21 | recursive-include deprecated *.py
22 | recursive-include docs *.css
23 | recursive-include docs *.jinja
24 | recursive-include docs *.jpg
25 | recursive-include docs *.js
26 | recursive-include docs *.md
27 | recursive-include docs *.png
28 | recursive-include docs *.svg
29 | recursive-include docs *.ttl
30 | recursive-include docs *.yml
31 | recursive-include examples *.ipynb
32 | recursive-include tests *.py
33 | recursive-include wip *.ipynb
34 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyTextRank
2 |
3 | [](https://doi.org/10.5281/zenodo.4637885)
4 | 
5 | 
6 | 
7 | [](http://mypy-lang.org/)
8 | [](https://github.com/PyCQA/bandit)
9 | 
10 | 
11 | 
12 |
13 | **PyTextRank** is a Python implementation of *TextRank* as a
14 | [spaCy pipeline extension](https://spacy.io/universe/project/spacy-pytextrank),
15 | for graph-based natural language work -- and related knowledge graph practices.
16 | This includes the family of
17 | [*textgraph*](https://derwen.ai/docs/ptr/glossary/#textgraphs) algorithms:
18 |
19 | - *TextRank* by [[mihalcea04textrank]](https://derwen.ai/docs/ptr/biblio/#mihalcea04textrank)
20 | - *PositionRank* by [[florescuc17]](https://derwen.ai/docs/ptr/biblio/#florescuc17)
21 | - *Biased TextRank* by [[kazemi-etal-2020-biased]](https://derwen.ai/docs/ptr/biblio/#kazemi-etal-2020-biased)
22 | - *TopicRank* by [[bougouin-etal-2013-topicrank]](https://derwen.ai/docs/ptr/biblio/#bougouin-etal-2013-topicrank)
23 |
24 | Popular use cases for this library include:
25 |
26 | - *phrase extraction*: get the top-ranked phrases from a text document
27 | - low-cost *extractive summarization* of a text document
28 | - help infer concepts from unstructured text into more structured representation
29 |
30 | See our full documentation at:
31 |
32 |
33 | ## Getting Started
34 |
35 | See the ["Getting Started"](https://derwen.ai/docs/ptr/start/)
36 | section of the online documentation.
37 |
38 | To install from [PyPi](https://pypi.python.org/pypi/pytextrank):
39 | ```
40 | python3 -m pip install pytextrank
41 | python3 -m spacy download en_core_web_sm
42 | ```
43 |
44 | If you work directly from this Git repo, be sure to install the
45 | dependencies as well:
46 | ```
47 | python3 -m pip install -r requirements.txt
48 | ```
49 |
50 | Alternatively, to install dependencies using `conda`:
51 | ```
52 | conda env create -f environment.yml
53 | conda activate pytextrank
54 | ```
55 |
56 | Then to use the library with a simple use case:
57 | ```python
58 | import spacy
59 | import pytextrank
60 |
61 | # example text
62 | text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types."
63 |
64 | # load a spaCy model, depending on language, scale, etc.
65 | nlp = spacy.load("en_core_web_sm")
66 |
67 | # add PyTextRank to the spaCy pipeline
68 | nlp.add_pipe("textrank")
69 | doc = nlp(text)
70 |
71 | # examine the top-ranked phrases in the document
72 | for phrase in doc._.phrases:
73 | print(phrase.text)
74 | print(phrase.rank, phrase.count)
75 | print(phrase.chunks)
76 | ```
77 |
78 | See the **tutorial notebooks** in the `examples` subdirectory for
79 | sample code and patterns to use in integrating **PyTextTank** with
80 | related libraries in Python:
81 |
82 |
83 |
84 |
85 | Contributing Code
86 |
87 | We welcome people getting involved as contributors to this open source
88 | project!
89 |
90 | For detailed instructions please see:
91 | [CONTRIBUTING.md](https://github.com/DerwenAI/pytextrank/blob/main/CONTRIBUTING.md)
92 |
93 |
94 |
95 | Build Instructions
96 |
97 |
98 | Note: unless you are contributing code and updates,
99 | in most use cases won't need to build this package locally.
100 |
101 |
102 | Instead, simply install from
103 | [PyPi](https://pypi.python.org/pypi/pytextrank)
104 | or use [Conda](https://docs.conda.io/).
105 |
106 | To set up the build environment locally, see the
107 | ["Build Instructions"](https://derwen.ai/docs/ptr/build/)
108 | section of the online documentation.
109 |
110 |
111 |
112 | Semantic Versioning
113 |
114 | Generally speaking the major release number of PyTextRank
115 | will track with the major release number of the associated spaCy
116 | version.
117 |
118 | See:
119 | [CHANGELOG.md](https://github.com/DerwenAI/pytextrank/blob/main/CHANGELOG.md)
120 |
121 |
122 |
127 |
128 |
129 | ## License and Copyright
130 |
131 | Source code for **PyTextRank** plus its logo, documentation, and examples
132 | have an [MIT license](https://spdx.org/licenses/MIT.html) which is
133 | succinct and simplifies use in commercial applications.
134 |
135 | All materials herein are Copyright © 2016-2024 Derwen, Inc.
136 |
137 |
138 | ## Attribution
139 |
140 | Please use the following BibTeX entry for citing **PyTextRank** if you
141 | use it in your research or software:
142 | ```bibtex
143 | @software{PyTextRank,
144 | author = {Paco Nathan},
145 | title = {{PyTextRank, a Python implementation of TextRank for phrase extraction and summarization of text documents}},
146 | year = 2016,
147 | publisher = {Derwen},
148 | doi = {10.5281/zenodo.4637885},
149 | url = {https://github.com/DerwenAI/pytextrank}
150 | }
151 | ```
152 |
153 | Citations are helpful for the continued development and maintenance of
154 | this library.
155 | For example, see our citations listed on
156 | [Google Scholar](https://scholar.google.com/scholar?q=related:5tl6J4xZlCIJ:scholar.google.com/&scioq=&hl=en&as_sdt=0,5).
157 |
158 |
159 | ## Kudos
160 |
161 | Many thanks to our open source [sponsors](https://github.com/sponsors/ceteri);
162 | and to our contributors:
163 | [@ceteri](https://github.com/ceteri),
164 | [@louisguitton](https://github.com/louisguitton),
165 | [@Ankush-Chander](https://github.com/Ankush-Chander),
166 | [@tomaarsen](https://github.com/tomaarsen),
167 | [@CaptXiong](https://github.com/CaptXiong),
168 | [@Lord-V15](https://github.com/Lord-V15),
169 | [@anna-droid-beep](https://github.com/anna-droid-beep),
170 | [@dvsrepo](https://github.com/dvsrepo),
171 | [@clabornd](https://github.com/clabornd),
172 | [@dayalstrub-cma](https://github.com/dayalstrub-cma),
173 | [@kavorite](https://github.com/kavorite),
174 | [@0dB](https://github.com/0dB),
175 | [@htmartin](https://github.com/htmartin),
176 | [@williamsmj](https://github.com/williamsmj/),
177 | [@mattkohl](https://github.com/mattkohl),
178 | [@vanita5](https://github.com/vanita5),
179 | [@HarshGrandeur](https://github.com/HarshGrandeur),
180 | [@mnowotka](https://github.com/mnowotka),
181 | [@kjam](https://github.com/kjam),
182 | [@SaiThejeshwar](https://github.com/SaiThejeshwar),
183 | [@laxatives](https://github.com/laxatives),
184 | [@dimmu](https://github.com/dimmu),
185 | [@JasonZhangzy1757](https://github.com/JasonZhangzy1757),
186 | [@jake-aft](https://github.com/jake-aft),
187 | [@junchen1992](https://github.com/junchen1992),
188 | [@shyamcody](https://github.com/shyamcody),
189 | [@chikubee](https://github.com/chikubee);
190 | also to [@mihalcea](https://github.com/mihalcea) who leads outstanding NLP research work,
191 | encouragement from the wonderful folks at Explosion who develop [spaCy](https://github.com/explosion/spaCy),
192 | plus general support from [Derwen, Inc.](https://derwen.ai/)
193 |
194 | ## Star History
195 |
196 | [](https://star-history.com/#derwenai/pytextrank&Date)
197 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Versions which are currently being supported with security updates:
6 |
7 | | Version | Supported |
8 | | ------- | ------------------ |
9 | | > 0.2 | :white_check_mark: |
10 |
11 | ## Reporting a Vulnerability
12 |
13 | To report a vulnerability, please create a new [*issue*](https://github.com/DerwenAI/pytextrank/issues).
14 | We will be notified immediately, and will attempt to respond on the reported issue immediately.
15 |
--------------------------------------------------------------------------------
/bin/nb_md.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e -x
2 |
3 | for notebook_path in examples/*.ipynb; do
4 | [ -e "$notebook_path" ] || continue
5 |
6 | notebook=`basename $notebook_path`
7 | stem=`basename $notebook_path .ipynb`
8 |
9 | cp $notebook_path docs/$notebook
10 | jupyter nbconvert docs/$notebook --to markdown
11 | python3 bin/vis_doc.py docs/"$stem".md
12 | rm docs/$notebook
13 | done
14 |
--------------------------------------------------------------------------------
/bin/preview.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from flask import Flask, redirect, send_from_directory, url_for # pylint: disable=E0401
5 | from pathlib import PurePosixPath
6 | import os
7 |
8 | DOCS_ROUTE = "/docs/"
9 | DOCS_FILES = "../site"
10 | DOCS_PORT = 8000
11 |
12 | APP = Flask(__name__, static_folder=DOCS_FILES, template_folder=DOCS_FILES)
13 |
14 | APP.config["DEBUG"] = False
15 | APP.config["MAX_CONTENT_LENGTH"] = 52428800
16 | APP.config["SECRET_KEY"] = "Technically, I remain uncommitted."
17 | APP.config["SEND_FILE_MAX_AGE_DEFAULT"] = 3000
18 |
19 |
20 | @APP.route(DOCS_ROUTE, methods=["GET"])
21 | @APP.route(DOCS_ROUTE + "", methods=["GET"], defaults={"path": None})
22 | @APP.route(DOCS_ROUTE + "", methods=["GET"])
23 | def static_proxy (path=""):
24 | """Serve static files from the /site directory."""
25 | if not path:
26 | suffix = ""
27 | else:
28 | suffix = PurePosixPath(path).suffix
29 |
30 | if suffix not in [".css", ".js", ".map", ".png", ".svg", ".xml"]:
31 | path = os.path.join(path, "index.html")
32 |
33 | return send_from_directory(DOCS_FILES, path)
34 |
35 |
36 | @APP.route("/index.html")
37 | @APP.route("/home/")
38 | @APP.route("/")
39 | def home_redirects ():
40 | """Serve generated documentation microsite.
41 |
42 | See build.md for more details.
43 | """
44 | return redirect(url_for("static_proxy"))
45 |
46 |
47 | if __name__ == "__main__":
48 | APP.run(host="0.0.0.0", port=DOCS_PORT, debug=True)
49 |
--------------------------------------------------------------------------------
/bin/push_pypi.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e -x
2 |
3 | rm -rf dist build pytextrank.egg-info
4 | python3 -m build
5 | twine check dist/*
6 |
7 | # this assumes the use of `~/.pypirc`
8 | # https://packaging.python.org/en/latest/specifications/pypirc/
9 |
10 | twine upload ./dist/* --verbose
11 |
--------------------------------------------------------------------------------
/bin/vis_doc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from selenium import webdriver # pylint: disable=E0401
5 | from selenium.webdriver.chrome.options import Options # pylint: disable=E0401
6 | import os
7 | import pathlib
8 | import re
9 | import sys
10 | import time
11 |
12 | PAT_HEADER = re.compile(r"^(```python\n\# for use.*production:\n.*\n```\n)", re.MULTILINE)
13 | PAT_IFRAME = re.compile(r"^(\