├── .bumpversion.cfg ├── .gitignore ├── .pyup.yml ├── .travis.yml ├── LICENSE ├── README.rst ├── html5lib_truncation ├── __init__.py ├── filters.py ├── shortcuts.py └── utils.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── test_filters.py └── test_utils.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | files = setup.py html5lib_truncation/__init__.py 3 | commit = True 4 | tag = True 5 | current_version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io 2 | 3 | ### Python ### 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | -------------------------------------------------------------------------------- /.pyup.yml: -------------------------------------------------------------------------------- 1 | # autogenerated pyup.io config file 2 | # see https://pyup.io/docs/configuration/ for all available options 3 | 4 | update: insecure 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.3" 5 | - "3.4" 6 | - "pypy" 7 | install: 8 | - "pip install ." 9 | - "pip install pytest pytest-cov pytest-pep8 coveralls" 10 | script: "py.test" 11 | after_success: "coveralls" 12 | branches: 13 | only: 14 | - master 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2015 Jiangge Zhang 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |Build Status| |Coverage Status| |PyPI Version| |Wheel Status| 2 | 3 | html5lib-truncation 4 | =================== 5 | 6 | ``html5lib-truncation`` is a html5lib_ filter implementation, which can 7 | truncate HTML to specific length in display, but never breaks HTML tags. 8 | 9 | There is a shortcut function, the simplest way to use it: 10 | 11 | .. code-block:: python 12 | 13 | >>> from html5lib_truncation import truncate_html 14 | >>> 15 | >>> html = u'
' 16 | >>> truncate_html(html, 8) 17 | u'A very' 18 | >>> truncate_html(html, 8, break_words=True) 19 | u'
A very ve' 20 | >>> truncate_html(html, 20, end='...') 21 | u'
A very very...' 22 | >>> truncate_html(html, 20, end='...', break_words=True) 23 | u'
A very very lon...' 24 | 25 | 26 | .. _html5lib: https://github.com/html5lib/html5lib-python 27 | 28 | 29 | Installation 30 | ------------ 31 | 32 | :: 33 | 34 | pip install html5lib-truncation 35 | 36 | Don't forget to put it into your ``requirements.txt`` or ``setup.py``. 37 | 38 | 39 | API Overview 40 | ------------ 41 | 42 | The core API of html5lib-truncation is the filter: 43 | 44 | .. code-block:: python 45 | 46 | import html5lib 47 | from html5lib_truncation import TruncationFilter 48 | 49 | etree = html5lib.parse(u'
') 50 | walker = html5lib.getTreeWalker('etree') 51 | 52 | stream = walker(etree) 53 | stream = TruncationFilter(stream, 20, end='...', break_words=True) 54 | 55 | serializer = html5lib.serializer.HTMLSerializer() 56 | serialized = serializer.serialize(stream) 57 | 58 | print(u''.join(serialized).strip()) 59 | 60 | The output is ``A very very lon...``.
61 |
62 |
63 | Issues
64 | ------
65 |
66 | If you want to report bugs or other issues, please create issues on
67 | `GitHub Issues Return a truncated copy of the string. The length is specified
8 | with the first parameter which defaults to
9 | 255. If the second parameter is
10 | true the filter
11 | will cut the text at length. Otherwise it will discard the last word. If
12 | the text was in fact truncated it will append an ellipsis sign
13 | ("..."). If you
14 | want a different ellipsis sign than
15 | "..." you can specify it using the third
16 | parameter. Return a truncated copy of the string. The length is specified with '
9 | 'the first parameter which \n'
10 | '\n'
11 | ' '
12 | ' '
13 | '\n'
14 | '')
15 | result_b = (
16 | ' Return a truncated copy of the string. The length is specified\n'
17 | 'with the first parameter which defa \n'
18 | '\n'
19 | ' '
20 | ' '
21 | '\n'
22 | '')
23 | result_c = (
24 | ' Return a truncated copy of the string. The length is specified with '
25 | 'the first parameter... \n'
26 | '\n'
27 | ' '
28 | ' '
29 | '\n'
30 | '')
31 | result_d = (
32 | ' Return a truncated copy of the string. The length is specified\n'
33 | 'with the first parameter whic... \n'
34 | '\n'
35 | ' '
36 | ' '
37 | '\n'
38 | '')
39 |
40 |
41 | def test_truncation(etree):
42 | assert truncate_html(etree, 98) == result_a
43 | assert truncate_html(etree, 98, break_words=True) == result_b
44 | assert truncate_html(etree, 98, end='...') == result_c
45 | assert truncate_html(etree, 98, end='...', break_words=True) == result_d
46 |
47 |
48 | def test_truncation_with_string():
49 | assert truncate_html(result_a, 98) == result_a
50 |
51 |
52 | def test_iterable(etree):
53 | walker = getTreeWalker('etree')
54 | stream = walker(etree)
55 | stream = TruncationFilter(stream, 98, end='...')
56 |
57 | assert stream.tree is etree
58 |
59 | iterator = iter(stream)
60 | assert iterator is not stream
61 | assert iter(iterator) is iterator
62 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 |
3 | from html5lib_truncation.utils import truncate_sentence
4 |
5 |
6 | def test_truncate_sentence():
7 | s = 'Three Rings for the Elven-kings under the sky'
8 |
9 | assert truncate_sentence(s, 18) == 'Three Rings for'
10 | assert truncate_sentence(s, 18, break_words=True) == 'Three Rings for th'
11 | assert truncate_sentence(s, 18, break_words=False) == 'Three Rings for'
12 |
13 | assert truncate_sentence(s, 18, break_words=True, padding=9) == 'Three Rin'
14 | assert truncate_sentence(s, 18, break_words=False, padding=9) == 'Three'
15 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27,py33,py34,pypy
3 | [testenv]
4 | deps =
5 | pytest
6 | pytest-cov
7 | pytest-pep8
8 | commands =
9 | py.test
10 |
--------------------------------------------------------------------------------