├── .coveragerc
├── .gitignore
├── .gitlab-ci.yml
├── .travis.yml
├── CHANGES
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
├── _static
│ └── custom.css
├── _templates
│ └── layout.html
├── changelog.rst
├── conf.py
├── css3.rst
├── extending.rst
├── hacking.rst
├── index.rst
└── parsing.rst
├── setup.cfg
├── setup.py
└── tinycss
├── __init__.py
├── color3.py
├── css21.py
├── decoding.py
├── fonts3.py
├── page3.py
├── parsing.py
├── speedups.pyx
├── tests
├── __init__.py
├── speed.py
├── test_api.py
├── test_color3.py
├── test_css21.py
├── test_decoding.py
├── test_fonts3.py
├── test_page3.py
└── test_tokenizer.py
├── token_data.py
├── tokenizer.py
└── version.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 |
4 | [report]
5 | exclude_lines =
6 | pragma: no cover
7 | def __repr__
8 | except ImportError
9 | omit =
10 | tinycss/tests/speed.py
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.c
3 | *.so
4 | *.egg-info
5 | /.coverage
6 | /htmlcov
7 | /build
8 | /dist
9 | /.tox
10 | /MANIFEST
11 | /docs/_build
12 | /env
13 |
--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | before_script:
2 | - pip install -U setuptools
3 | - pip install Cython
4 | - pip install --upgrade -e .[test]
5 |
6 | .before_script_alpine: &before_alpine
7 | before_script:
8 | - apk add --no-cache openssl gcc musl-dev
9 | - pip install Cython setuptools
10 | - pip install --upgrade -e .[test]
11 |
12 | .test_template: &test
13 | script:
14 | - python setup.py test
15 |
16 | python 2.7alpine:
17 | image: python:2.7-alpine
18 | <<: *before_alpine
19 | <<: *test
20 |
21 | python 3.3alpine:
22 | image: python:3.3-alpine
23 | <<: *before_alpine
24 | <<: *test
25 |
26 | python 3.4alpine:
27 | image: python:3.4-alpine
28 | <<: *before_alpine
29 | <<: *test
30 |
31 | python 3.5alpine:
32 | image: python:3.5-alpine
33 | <<: *before_alpine
34 | <<: *test
35 |
36 | python 3.6alpine:
37 | image: python:3.6-alpine
38 | <<: *before_alpine
39 | <<: *test
40 |
41 | python pypy:
42 | image: pypy:2
43 | <<: *test
44 |
45 | python pypy3:
46 | image: pypy:3
47 | <<: *test
48 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | python:
4 | - "2.7"
5 | - "3.3"
6 | - "3.4"
7 | - "3.5"
8 | - "pypy"
9 |
10 | install:
11 | - pip install Cython
12 | - pip install --upgrade -e .[test]
13 |
14 | script:
15 | - python setup.py test
16 |
--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
1 | tinycss changelog
2 | =================
3 |
4 |
5 | Version 0.4
6 | -----------
7 |
8 | Released on 2016-09-23.
9 |
10 | * Add an __eq__ operator to Token object.
11 | * Support Fonts 3.
12 |
13 |
14 |
15 | Version 0.3
16 | -----------
17 |
18 | Released on 2012-09-18.
19 |
20 | * Fix a bug when parsing \5c (an escaped antislash.)
21 |
22 |
23 |
24 | Version 0.2
25 | -----------
26 |
27 | Released on 2012-04-27.
28 |
29 | **Breaking changes:**
30 |
31 | * Remove the ``selectors3`` module. The functionality has moved to the
32 | `cssselect `_ project.
33 | * Simplify the API for :func:`~tinycss.make_parser`.
34 |
35 |
36 | Version 0.1.1
37 | -------------
38 |
39 | Released on 2012-04-06.
40 |
41 | Bug fixes:
42 |
43 | * Error handling on exepected end of stylesheet in an at-rule head
44 | * Fix the installation on ASCII-only locales
45 |
46 |
47 | Version 0.1
48 | -----------
49 |
50 | Released on 2012-04-05.
51 |
52 | First release. Parser support for CSS 2.1, Seloctors 3, Color 3 and
53 | Paged Media 3.
54 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2012 by Simon Sapin.
2 |
3 | Some rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are
7 | met:
8 |
9 | * Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above
13 | copyright notice, this list of conditions and the following
14 | disclaimer in the documentation and/or other materials provided
15 | with the distribution.
16 |
17 | * The names of the contributors may not be used to endorse or
18 | promote products derived from this software without specific
19 | prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst CHANGES LICENSE tox.ini .coveragerc tinycss/speedups.c
2 | recursive-include docs *
3 | prune docs/_build
4 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | tinycss: CSS parser for Python
2 | ==============================
3 |
4 | *tinycss* is a complete yet simple CSS parser for Python. It supports the full
5 | syntax and error handling for CSS 2.1 as well as some CSS 3 modules:
6 |
7 | * CSS Color 3
8 | * CSS Fonts 3
9 | * CSS Paged Media 3
10 |
11 | It is designed to be easy to extend for new CSS modules and syntax,
12 | and integrates well with cssselect_ for Selectors 3 support.
13 |
14 | Quick facts:
15 |
16 | * Free software: BSD licensed
17 | * Compatible with Python 2.7 and 3.x
18 | * Latest documentation `on python.org`_
19 | * Source, issues and pull requests `on Github`_
20 | * Releases `on PyPI`_
21 | * Install with ``pip install tinycss``
22 |
23 | .. _cssselect: http://packages.python.org/cssselect/
24 | .. _on python.org: http://packages.python.org/tinycss/
25 | .. _on Github: https://github.com/SimonSapin/tinycss/
26 | .. _on PyPI: http://pypi.python.org/pypi/tinycss
27 |
--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
1 | div.body {
2 | text-align: left;
3 | }
4 | div.document p, div.document ul {
5 | margin-top: 0;
6 | margin-bottom: 1em;
7 | }
8 | div.document ul ul {
9 | margin-top: 0;
10 | margin-bottom: .5em;
11 | }
12 | .field-name {
13 | padding-right: .5em;
14 | }
15 | table.field-list p, table.field-list ul {
16 | margin-bottom: .5em;
17 | }
18 | table {
19 | border-collapse: collapse;
20 | margin-bottom: 1em;
21 | }
22 | table.docutils td, table.docutils th {
23 | padding: .2em .5em;
24 | }
25 |
--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% block extrahead %}
3 |
4 | {% endblock %}
5 |
--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGES
2 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # tinycss documentation build configuration file, created by
5 | # sphinx-quickstart on Tue Mar 27 14:20:34 2012.
6 | #
7 | # This file is execfile()d with the current directory set to its containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | import sys, os
16 |
17 | # If extensions (or modules to document with autodoc) are in another directory,
18 | # add these directories to sys.path here. If the directory is relative to the
19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
20 | #sys.path.insert(0, os.path.abspath('.'))
21 |
22 | # -- General configuration -----------------------------------------------------
23 |
24 | # If your documentation needs a minimal Sphinx version, state it here.
25 | #needs_sphinx = '1.0'
26 |
27 | # Add any Sphinx extension module names here, as strings. They can be extensions
28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx',
30 | 'sphinx.ext.viewcode', 'sphinx.ext.doctest']
31 |
32 | # Add any paths that contain templates here, relative to this directory.
33 | templates_path = ['_templates']
34 |
35 | # The suffix of source filenames.
36 | source_suffix = '.rst'
37 |
38 | # The encoding of source files.
39 | #source_encoding = 'utf-8-sig'
40 |
41 | # The master toctree document.
42 | master_doc = 'index'
43 |
44 | # General information about the project.
45 | project = 'tinycss'
46 | copyright = '2012, Simon Sapin'
47 |
48 | # The version info for the project you're documenting, acts as replacement for
49 | # |version| and |release|, also used in various other places throughout the
50 | # built documents.
51 | #
52 | # The full version, including alpha/beta/rc tags.
53 | #release = '0.1dev'
54 | import re
55 | with open(os.path.join(os.path.dirname(__file__), '..',
56 | 'tinycss', 'version.py')) as init_py:
57 | release = re.search("VERSION = '([^']+)'", init_py.read()).group(1)
58 | # The short X.Y version.
59 | version = release.rstrip('dev')
60 |
61 | # The language for content autogenerated by Sphinx. Refer to documentation
62 | # for a list of supported languages.
63 | #language = None
64 |
65 | # There are two options for replacing |today|: either, you set today to some
66 | # non-false value, then it is used:
67 | #today = ''
68 | # Else, today_fmt is used as the format for a strftime call.
69 | #today_fmt = '%B %d, %Y'
70 |
71 | # List of patterns, relative to source directory, that match files and
72 | # directories to ignore when looking for source files.
73 | exclude_patterns = ['_build']
74 |
75 | # The reST default role (used for this markup: `text`) to use for all documents.
76 | #default_role = None
77 |
78 | # If true, '()' will be appended to :func: etc. cross-reference text.
79 | #add_function_parentheses = True
80 |
81 | # If true, the current module name will be prepended to all description
82 | # unit titles (such as .. function::).
83 | #add_module_names = True
84 |
85 | # If true, sectionauthor and moduleauthor directives will be shown in the
86 | # output. They are ignored by default.
87 | #show_authors = False
88 |
89 | # The name of the Pygments (syntax highlighting) style to use.
90 | pygments_style = 'sphinx'
91 |
92 | # A list of ignored prefixes for module index sorting.
93 | #modindex_common_prefix = []
94 |
95 |
96 | # -- Options for HTML output ---------------------------------------------------
97 |
98 | # The theme to use for HTML and HTML Help pages. See the documentation for
99 | # a list of builtin themes.
100 | #html_theme = 'agogo'
101 |
102 | # Theme options are theme-specific and customize the look and feel of a theme
103 | # further. For a list of options available for each theme, see the
104 | # documentation.
105 | #html_theme_options = {}
106 |
107 | # Add any paths that contain custom themes here, relative to this directory.
108 | #html_theme_path = []
109 |
110 | # The name for this set of Sphinx documents. If None, it defaults to
111 | # " v documentation".
112 | #html_title = None
113 |
114 | # A shorter title for the navigation bar. Default is the same as html_title.
115 | #html_short_title = None
116 |
117 | # The name of an image file (relative to this directory) to place at the top
118 | # of the sidebar.
119 | #html_logo = None
120 |
121 | # The name of an image file (within the static path) to use as favicon of the
122 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
123 | # pixels large.
124 | #html_favicon = None
125 |
126 | # Add any paths that contain custom static files (such as style sheets) here,
127 | # relative to this directory. They are copied after the builtin static files,
128 | # so a file named "default.css" will overwrite the builtin "default.css".
129 | html_static_path = ['_static']
130 |
131 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
132 | # using the given strftime format.
133 | #html_last_updated_fmt = '%b %d, %Y'
134 |
135 | # If true, SmartyPants will be used to convert quotes and dashes to
136 | # typographically correct entities.
137 | #html_use_smartypants = True
138 |
139 | # Custom sidebar templates, maps document names to template names.
140 | #html_sidebars = {}
141 |
142 | # Additional templates that should be rendered to pages, maps page names to
143 | # template names.
144 | #html_additional_pages = {}
145 |
146 | # If false, no module index is generated.
147 | #html_domain_indices = True
148 |
149 | # If false, no index is generated.
150 | #html_use_index = True
151 |
152 | # If true, the index is split into individual pages for each letter.
153 | #html_split_index = False
154 |
155 | # If true, links to the reST sources are added to the pages.
156 | #html_show_sourcelink = True
157 |
158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
159 | #html_show_sphinx = True
160 |
161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
162 | #html_show_copyright = True
163 |
164 | # If true, an OpenSearch description file will be output, and all pages will
165 | # contain a tag referring to it. The value of this option must be the
166 | # base URL from which the finished HTML is served.
167 | #html_use_opensearch = ''
168 |
169 | # This is the file name suffix for HTML files (e.g. ".xhtml").
170 | #html_file_suffix = None
171 |
172 | # Output file base name for HTML help builder.
173 | htmlhelp_basename = 'tinycssdoc'
174 |
175 |
176 | # -- Options for LaTeX output --------------------------------------------------
177 |
178 | latex_elements = {
179 | # The paper size ('letterpaper' or 'a4paper').
180 | #'papersize': 'letterpaper',
181 |
182 | # The font size ('10pt', '11pt' or '12pt').
183 | #'pointsize': '10pt',
184 |
185 | # Additional stuff for the LaTeX preamble.
186 | #'preamble': '',
187 | }
188 |
189 | # Grouping the document tree into LaTeX files. List of tuples
190 | # (source start file, target name, title, author, documentclass [howto/manual]).
191 | latex_documents = [
192 | ('index', 'tinycss.tex', 'tinycss Documentation',
193 | 'Simon Sapin', 'manual'),
194 | ]
195 |
196 | # The name of an image file (relative to this directory) to place at the top of
197 | # the title page.
198 | #latex_logo = None
199 |
200 | # For "manual" documents, if this is true, then toplevel headings are parts,
201 | # not chapters.
202 | #latex_use_parts = False
203 |
204 | # If true, show page references after internal links.
205 | #latex_show_pagerefs = False
206 |
207 | # If true, show URL addresses after external links.
208 | #latex_show_urls = False
209 |
210 | # Documents to append as an appendix to all manuals.
211 | #latex_appendices = []
212 |
213 | # If false, no module index is generated.
214 | #latex_domain_indices = True
215 |
216 |
217 | # -- Options for manual page output --------------------------------------------
218 |
219 | # One entry per manual page. List of tuples
220 | # (source start file, name, description, authors, manual section).
221 | man_pages = [
222 | ('index', 'tinycss', 'tinycss Documentation',
223 | ['Simon Sapin'], 1)
224 | ]
225 |
226 | # If true, show URL addresses after external links.
227 | #man_show_urls = False
228 |
229 |
230 | # -- Options for Texinfo output ------------------------------------------------
231 |
232 | # Grouping the document tree into Texinfo files. List of tuples
233 | # (source start file, target name, title, author,
234 | # dir menu entry, description, category)
235 | texinfo_documents = [
236 | ('index', 'tinycss', 'tinycss Documentation',
237 | 'Simon Sapin', 'tinycss', 'One line description of project.',
238 | 'Miscellaneous'),
239 | ]
240 |
241 | # Documents to append as an appendix to all manuals.
242 | #texinfo_appendices = []
243 |
244 | # If false, no module index is generated.
245 | #texinfo_domain_indices = True
246 |
247 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
248 | #texinfo_show_urls = 'footnote'
249 |
250 |
251 | # Example configuration for intersphinx: refer to the Python standard library.
252 | intersphinx_mapping = {'http://docs.python.org/': None}
253 |
--------------------------------------------------------------------------------
/docs/css3.rst:
--------------------------------------------------------------------------------
1 | CSS 3 Modules
2 | =============
3 |
4 | .. _selectors3:
5 |
6 | Selectors 3
7 | -----------
8 |
9 | .. currentmodule:: tinycss.css21
10 |
11 | On :attr:`RuleSet.selector`, the :meth:`~.token_data.TokenList.as_css` method
12 | can be used to serialize a selector back to an Unicode string.
13 |
14 | >>> import tinycss
15 | >>> stylesheet = tinycss.make_parser().parse_stylesheet(
16 | ... 'div.error, #root > section:first-letter { color: red }')
17 | >>> selector_string = stylesheet.rules[0].selector.as_css()
18 | >>> selector_string
19 | 'div.error, #root > section:first-letter'
20 |
21 | This string can be parsed by cssselect_. The parsed objects have information
22 | about pseudo-elements and selector specificity.
23 |
24 | .. _cssselect: http://packages.python.org/cssselect/
25 |
26 | >>> import cssselect
27 | >>> selectors = cssselect.parse(selector_string)
28 | >>> [s.specificity() for s in selectors]
29 | [(0, 1, 1), (1, 0, 2)]
30 | >>> [s.pseudo_element for s in selectors]
31 | [None, 'first-letter']
32 |
33 | These objects can in turn be translated to XPath expressions. Note that
34 | the translation ignores pseudo-elements, you have to account for them
35 | somehow or reject selectors with pseudo-elements.
36 |
37 | >>> xpath = cssselect.HTMLTranslator().selector_to_xpath(selectors[1])
38 | >>> xpath
39 | "descendant-or-self::*[@id = 'root']/section"
40 |
41 | Finally, the XPath expressions can be used with lxml_ to find the matching
42 | elements.
43 |
44 | >>> from lxml import etree
45 | >>> compiled_selector = etree.XPath(xpath)
46 | >>> document = etree.fromstring('''
47 | ...
48 | ...
51 | ... ''')
52 | >>> [el.get('id') for el in compiled_selector(document)]
53 | ['head', 'content']
54 |
55 | .. _lxml: http://lxml.de/xpathxslt.html#xpath
56 |
57 | Find more details in the `cssselect documentation`_.
58 |
59 | .. _cssselect documentation: http://packages.python.org/cssselect/
60 |
61 |
62 | .. module:: tinycss.color3
63 |
64 | Color 3
65 | -------
66 |
67 | This module implements parsing for the ** values, as defined in
68 | `CSS 3 Color `_.
69 |
70 | The (deprecated) CSS2 system colors are not supported, but you can
71 | easily test for them if you want as they are simple ``IDENT`` tokens.
72 | For example::
73 |
74 | if token.type == 'IDENT' and token.value == 'ButtonText':
75 | return ...
76 |
77 | All other values types *are* supported:
78 |
79 | * Basic, extended (X11) and transparent color keywords;
80 | * 3-digit and 6-digit hexadecimal notations;
81 | * ``rgb()``, ``rgba()``, ``hsl()`` and ``hsla()`` functional notations.
82 | * ``currentColor``
83 |
84 | This module does not integrate with a parser class. Instead, it provides
85 | a function that can parse tokens as found in :attr:`.css21.Declaration.value`,
86 | for example.
87 |
88 | .. autofunction:: parse_color
89 | .. autofunction:: parse_color_string
90 | .. autoclass:: RGBA
91 |
92 |
93 | .. module:: tinycss.page3
94 |
95 | Paged Media 3
96 | -------------
97 |
98 | .. autoclass:: CSSPage3Parser
99 | .. autoclass:: MarginRule
100 |
101 |
102 | .. module:: tinycss.fonts3
103 |
104 | Fonts 3
105 | -------
106 |
107 | .. autoclass:: CSSFonts3Parser
108 | .. autoclass:: FontFaceRule
109 | .. autoclass:: FontFeatureValuesRule
110 | .. autoclass:: FontFeatureRule
111 |
112 |
113 | Other CSS modules
114 | -----------------
115 |
116 | To add support for new CSS syntax, see :ref:`extending`.
117 |
--------------------------------------------------------------------------------
/docs/extending.rst:
--------------------------------------------------------------------------------
1 | .. _extending:
2 |
3 | Extending the parser
4 | ====================
5 |
6 | Modules such as :mod:`.page3` extend the CSS 2.1 parser to add support for
7 | CSS 3 syntax.
8 | They do so by sub-classing :class:`.css21.CSS21Parser` and overriding/extending
9 | some of its methods. If fact, the parser is made of methods in a class
10 | (rather than a set of functions) solely to enable this kind of sub-classing.
11 |
12 | tinycss is designed to enable you to have parser subclasses outside of
13 | tinycss, without monkey-patching. If however the syntax you added is for a
14 | W3C specification, consider including your subclass in a new tinycss module
15 | and send a pull request: see :ref:`hacking`.
16 |
17 |
18 | .. currentmodule:: tinycss.css21
19 |
20 | Example: star hack
21 | ------------------
22 |
23 | .. _star hack: https://en.wikipedia.org/wiki/CSS_filter#Star_hack
24 |
25 | The `star hack`_ uses invalid declarations that are only parsed by some
26 | versions of Internet Explorer. By default, tinycss ignores invalid
27 | declarations and logs an error.
28 |
29 | >>> from tinycss.css21 import CSS21Parser
30 | >>> css = '#elem { width: [W3C Model Width]; *width: [BorderBox Model]; }'
31 | >>> stylesheet = CSS21Parser().parse_stylesheet(css)
32 | >>> stylesheet.errors
33 | [ParseError('Parse error at 1:35, expected a property name, got DELIM',)]
34 | >>> [decl.name for decl in stylesheet.rules[0].declarations]
35 | ['width']
36 |
37 | If for example a minifier based on tinycss wants to support the star hack,
38 | it can by extending the parser::
39 |
40 | >>> class CSSStarHackParser(CSS21Parser):
41 | ... def parse_declaration(self, tokens):
42 | ... has_star_hack = (tokens[0].type == 'DELIM' and tokens[0].value == '*')
43 | ... if has_star_hack:
44 | ... tokens = tokens[1:]
45 | ... declaration = super(CSSStarHackParser, self).parse_declaration(tokens)
46 | ... declaration.has_star_hack = has_star_hack
47 | ... return declaration
48 | ...
49 | >>> stylesheet = CSSStarHackParser().parse_stylesheet(css)
50 | >>> stylesheet.errors
51 | []
52 | >>> [(d.name, d.has_star_hack) for d in stylesheet.rules[0].declarations]
53 | [('width', False), ('width', True)]
54 |
55 | This class extends the :meth:`~CSS21Parser.parse_declaration` method.
56 | It removes any ``*`` delimeter :class:`~.token_data.Token` at the start of
57 | a declaration, and adds a ``has_star_hack`` boolean attribute on parsed
58 | :class:`Declaration` objects: ``True`` if a ``*`` was removed, ``False`` for
59 | “normal” declarations.
60 |
61 |
62 | Parser methods
63 | --------------
64 |
65 | In addition to methods of the user API (see :ref:`parsing`), here
66 | are the methods of the CSS 2.1 parser that can be overriden or extended:
67 |
68 | .. automethod:: CSS21Parser.parse_rules
69 | .. automethod:: CSS21Parser.read_at_rule
70 | .. automethod:: CSS21Parser.parse_at_rule
71 | .. automethod:: CSS21Parser.parse_media
72 | .. automethod:: CSS21Parser.parse_page_selector
73 | .. automethod:: CSS21Parser.parse_declarations_and_at_rules
74 | .. automethod:: CSS21Parser.parse_ruleset
75 | .. automethod:: CSS21Parser.parse_declaration_list
76 | .. automethod:: CSS21Parser.parse_declaration
77 | .. automethod:: CSS21Parser.parse_value_priority
78 |
79 | Unparsed at-rules
80 | -----------------
81 |
82 | .. autoclass:: AtRule
83 |
84 |
85 | .. module:: tinycss.parsing
86 |
87 | Parsing helper functions
88 | ------------------------
89 |
90 | The :mod:`tinycss.parsing` module contains helper functions for parsing
91 | tokens into a more structured form:
92 |
93 | .. autofunction:: strip_whitespace
94 | .. autofunction:: split_on_comma
95 | .. autofunction:: validate_value
96 | .. autofunction:: validate_block
97 | .. autofunction:: validate_any
98 |
--------------------------------------------------------------------------------
/docs/hacking.rst:
--------------------------------------------------------------------------------
1 | .. _hacking:
2 |
3 | Hacking tinycss
4 | ===============
5 |
6 | .. highlight:: sh
7 |
8 | Bugs and feature requests
9 | -------------------------
10 |
11 | Bug reports, feature requests and other issues should got to the
12 | `tinycss issue tracker`_ on Github. Any suggestion or feedback is welcome.
13 | Please include in full any error message, trackback or other detail that
14 | could be helpful.
15 |
16 | .. _tinycss issue tracker: https://github.com/SimonSapin/tinycss/issues
17 |
18 |
19 | Installing the development version
20 | ----------------------------------
21 |
22 | First, get the latest git version::
23 |
24 | git clone https://github.com/SimonSapin/tinycss.git
25 | cd tinycss
26 |
27 | You will need Cython_ and pytest_. Installing in a virtualenv_ is recommended::
28 |
29 | virtualenv env
30 | . env/bin/activate
31 | pip install Cython pytest
32 |
33 | .. _Cython: http://cython.org/
34 | .. _pytest: http://pytest.org/
35 | .. _virtualenv: http://www.virtualenv.org/
36 |
37 | Then, install tinycss in-place with pip’s *editable mode*. This will also
38 | build the accelerators::
39 |
40 | pip install -e .
41 |
42 |
43 | Running the test suite
44 | ----------------------
45 |
46 | Once you have everything installed (see above), just run pytest from the
47 | *tinycss* directory::
48 |
49 | py.test
50 |
51 | If the accelerators are not available for some reason, use the
52 | ``TINYCSS_SKIP_SPEEDUPS_TESTS`` environment variable::
53 |
54 | TINYCSS_SKIP_SPEEDUPS_TESTS=1 py.test
55 |
56 | If you get test failures on a fresh git clone, something may have gone wrong
57 | during the installation. Otherwise, you probably found a bug. Please
58 | `report it <#bugs-and-feature-requests>`_.
59 |
60 |
61 | Test in multiple Python versions with tox
62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
63 |
64 | tox_ automatically creates virtualenvs for various Python versions and
65 | runs the test suite there::
66 |
67 | pip install tox
68 |
69 | Change to the project’s root directory and just run::
70 |
71 | tox
72 |
73 | .. _tox: http://tox.testrun.org/
74 |
75 | tinycss comes with a pre-configured ``tox.ini`` file to test in CPython
76 | 2.6, 2.7, 3.1 and 3.2 as well as PyPy. You can change that with the ``-e``
77 | parameter::
78 |
79 | tox -e py27,py32
80 |
81 | If you use ``--`` in the arguments passed to tox, further arguments
82 | are passed to the underlying ``py.test`` command::
83 |
84 | tox -- -x --pdb
85 |
86 |
87 | Building the documentation
88 | --------------------------
89 |
90 | This documentation is made with Sphinx_::
91 |
92 | pip install Sphinx
93 |
94 | .. _Sphinx: http://sphinx.pocoo.org/
95 |
96 | To build the HTML version of the documentation, change to the project’s root
97 | directory and run::
98 |
99 | python setup.py build_sphinx
100 |
101 | The built HTML files are in ``docs/_build/html``.
102 |
103 |
104 | Making a patch and a pull request
105 | ---------------------------------
106 |
107 | If you would like to see something included in tinycss, please fork
108 | `the repository `_ on Github
109 | and make a pull request. Make sure to include tests for your change.
110 |
111 |
112 | Mailing-list
113 | ------------
114 |
115 | tinycss does not have a mailing-list of its own for now, but the
116 | `WeasyPrint mailing-list `_
117 | is appropriate to discuss it.
118 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 |
3 |
4 | Requirements
5 | ------------
6 |
7 | `tinycss is tested `_ on CPython 2.7, 3.3,
8 | 3.4 and 3.5 as well as PyPy 5.3 and PyPy3 2.4; it should work on any
9 | implementation of **Python 2.7 or later version (including 3.x)** of the
10 | language.
11 |
12 | Cython_ is used for optional accelerators but is only required for
13 | development versions on tinycss.
14 |
15 | .. _Cython: http://cython.org/
16 |
17 |
18 | Installation
19 | ------------
20 |
21 | Installing with `pip `_ should Just Work:
22 |
23 | .. code-block:: sh
24 |
25 | pip install tinycss
26 |
27 | The release tarballs contain pre-*cythoned* C files for the accelerators:
28 | you will not need Cython to install like this.
29 | If the accelerators fail to build for some reason, tinycss will
30 | print a warning and fall back to a pure-Python installation.
31 |
32 |
33 | Documentation
34 | -------------
35 |
36 | .. Have this page in the sidebar, but do not show a link to itself here:
37 |
38 | .. toctree::
39 | :hidden:
40 |
41 | self
42 |
43 | .. toctree::
44 | :maxdepth: 2
45 |
46 | parsing
47 | css3
48 | extending
49 | hacking
50 | changelog
51 |
--------------------------------------------------------------------------------
/docs/parsing.rst:
--------------------------------------------------------------------------------
1 | Parsing with tinycss
2 | ====================
3 |
4 | .. highlight:: python
5 |
6 | Quickstart
7 | ----------
8 |
9 | Import *tinycss*, make a parser object with the features you want,
10 | and parse a stylesheet:
11 |
12 | .. doctest::
13 |
14 | >>> import tinycss
15 | >>> parser = tinycss.make_parser('page3')
16 | >>> stylesheet = parser.parse_stylesheet_bytes(b'''@import "foo.css";
17 | ... p.error { color: red } @lorem-ipsum;
18 | ... @page tables { size: landscape }''')
19 | >>> stylesheet.rules
20 | [, , ]
21 | >>> stylesheet.errors
22 | [ParseError('Parse error at 2:29, unknown at-rule in stylesheet context: @lorem-ipsum',)]
23 |
24 | You’ll get a :class:`~tinycss.css21.Stylesheet` object which contains
25 | all the parsed content as well as a list of encountered errors.
26 |
27 |
28 | Parsers
29 | -------
30 |
31 | Parsers are subclasses of :class:`tinycss.css21.CSS21Parser`. Various
32 | subclasses add support for more syntax. You can choose which features to
33 | enable by making a new parser class with multiple inheritance, but there
34 | is also a convenience function to do that:
35 |
36 | .. module:: tinycss
37 |
38 | .. autofunction:: make_parser
39 |
40 |
41 | .. module:: tinycss.css21
42 | .. _parsing:
43 |
44 | Parsing a stylesheet
45 | ~~~~~~~~~~~~~~~~~~~~
46 |
47 | Parser classes have three different methods to parse CSS stylesheet,
48 | depending on whether you have a file, a byte string, or an Unicode string.
49 |
50 | .. autoclass:: CSS21Parser
51 | :members: parse_stylesheet_file, parse_stylesheet_bytes, parse_stylesheet
52 |
53 |
54 | Parsing a ``style`` attribute
55 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
56 |
57 | .. automethod:: CSS21Parser.parse_style_attr
58 |
59 |
60 | Parsed objects
61 | --------------
62 |
63 | These data structures make up the results of the various parsing methods.
64 |
65 | .. autoclass:: tinycss.parsing.ParseError()
66 | .. autoclass:: Stylesheet()
67 |
68 | .. note::
69 | All subsequent objects have :obj:`line` and :obj:`column` attributes (not
70 | repeated every time fore brevity) that indicate where in the CSS source
71 | this object was read.
72 |
73 | .. autoclass:: RuleSet()
74 | .. autoclass:: ImportRule()
75 | .. autoclass:: MediaRule()
76 | .. autoclass:: PageRule()
77 | .. autoclass:: Declaration()
78 |
79 |
80 | Tokens
81 | ------
82 |
83 | Some parts of a stylesheet (such as selectors in CSS 2.1 or property values)
84 | are not parsed by tinycss. They appear as tokens instead.
85 |
86 | .. module:: tinycss.token_data
87 |
88 | .. autoclass:: TokenList()
89 | :member-order: bysource
90 | :members:
91 | .. autoclass:: Token()
92 | :members:
93 | .. autoclass:: tinycss.speedups.CToken()
94 | .. autoclass:: ContainerToken()
95 | :members:
96 |
97 | .. autoclass:: FunctionToken()
98 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [build_sphinx]
2 | source-dir = docs
3 | build-dir = docs/_build
4 | #all_files = 1
5 |
6 | [upload_sphinx] # Sphinx-PyPI-upload
7 | upload-dir = docs/_build/html
8 |
9 | [aliases]
10 | test = pytest
11 |
12 | [tool:pytest]
13 | addopts = --flake8 --isort --cov --ignore=test/cairosvg_reference
14 | norecursedirs = dist .cache .git build *.egg-info .eggs venv cairosvg_reference
15 | flake8-ignore = docs/conf.py ALL
16 | isort_ignore =
17 | docs/conf.py
18 | setup.py
19 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import re
3 | import sys
4 | from distutils.errors import (
5 | CCompilerError, DistutilsExecError, DistutilsPlatformError)
6 | from setuptools import Extension, setup
7 |
8 | try:
9 | from Cython.Distutils import build_ext
10 | import Cython.Compiler.Version
11 | CYTHON_INSTALLED = True
12 | except ImportError:
13 | from distutils.command.build_ext import build_ext
14 | CYTHON_INSTALLED = False
15 |
16 |
17 | ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
18 | if sys.platform == 'win32' and sys.version_info > (2, 6):
19 | # 2.6's distutils.msvc9compiler can raise an IOError when failing to
20 | # find the compiler
21 | ext_errors += (IOError,)
22 |
23 |
24 | class BuildFailed(Exception):
25 | pass
26 |
27 |
28 | class ve_build_ext(build_ext):
29 | # This class allows C extension building to fail.
30 |
31 | def run(self):
32 | try:
33 | build_ext.run(self)
34 | except DistutilsPlatformError:
35 | raise BuildFailed
36 |
37 | def build_extension(self, ext):
38 | try:
39 | build_ext.build_extension(self, ext)
40 | except ext_errors:
41 | raise BuildFailed
42 |
43 |
44 | ROOT = os.path.dirname(__file__)
45 | with open(os.path.join(ROOT, 'tinycss', 'version.py')) as fd:
46 | VERSION = re.search("VERSION = '([^']+)'", fd.read()).group(1)
47 |
48 | with open(os.path.join(ROOT, 'README.rst'), 'rb') as fd:
49 | README = fd.read().decode('utf8')
50 |
51 |
52 | needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
53 | pytest_runner = ['pytest-runner'] if needs_pytest else []
54 |
55 |
56 | def run_setup(with_extension):
57 | if with_extension:
58 | extension_path = os.path.join('tinycss', 'speedups')
59 | if CYTHON_INSTALLED:
60 | extension_path += '.pyx'
61 | print('Building with Cython %s.' % Cython.Compiler.Version.version)
62 | else:
63 | extension_path += '.c'
64 | if not os.path.exists(extension_path):
65 | print("WARNING: Trying to build without Cython, but "
66 | "pre-generated '%s' does not seem to be available."
67 | % extension_path)
68 | else:
69 | print('Building without Cython.')
70 | kwargs = dict(
71 | cmdclass=dict(build_ext=ve_build_ext),
72 | ext_modules=[Extension('tinycss.speedups',
73 | sources=[extension_path])],
74 | )
75 | else:
76 | kwargs = dict()
77 |
78 | setup(
79 | name='tinycss',
80 | version=VERSION,
81 | url='http://tinycss.readthedocs.io/',
82 | license='BSD',
83 | author='Simon Sapin',
84 | author_email='simon.sapin@exyr.org',
85 | description='tinycss is a complete yet simple CSS parser for Python.',
86 | long_description=README,
87 | classifiers=[
88 | 'Development Status :: 4 - Beta',
89 | 'Intended Audience :: Developers',
90 | 'License :: OSI Approved :: BSD License',
91 | 'Programming Language :: Python :: 2',
92 | 'Programming Language :: Python :: 2.7',
93 | 'Programming Language :: Python :: 3',
94 | 'Programming Language :: Python :: 3.3',
95 | 'Programming Language :: Python :: 3.4',
96 | 'Programming Language :: Python :: 3.5',
97 | 'Programming Language :: Python :: Implementation :: CPython',
98 | 'Programming Language :: Python :: Implementation :: PyPy',
99 | ],
100 | setup_requires=pytest_runner,
101 | tests_require=[
102 | 'pytest-cov', 'pytest-flake8', 'pytest-isort', 'pytest-runner'],
103 | extras_require={'test': (
104 | 'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort')},
105 | packages=['tinycss', 'tinycss.tests'],
106 | **kwargs
107 | )
108 |
109 |
110 | IS_PYPY = hasattr(sys, 'pypy_translation_info')
111 | try:
112 | run_setup(not IS_PYPY)
113 | except BuildFailed:
114 | BUILD_EXT_WARNING = ('WARNING: The extension could not be compiled, '
115 | 'speedups are not enabled.')
116 | print('*' * 75)
117 | print(BUILD_EXT_WARNING)
118 | print('Failure information, if any, is above.')
119 | print('Retrying the build without the Cython extension now.')
120 | print('*' * 75)
121 |
122 | run_setup(False)
123 |
124 | print('*' * 75)
125 | print(BUILD_EXT_WARNING)
126 | print('Plain-Python installation succeeded.')
127 | print('*' * 75)
128 |
--------------------------------------------------------------------------------
/tinycss/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss
4 | -------
5 |
6 | A CSS parser, and nothing else.
7 |
8 | :copyright: (c) 2012 by Simon Sapin.
9 | :license: BSD, see LICENSE for more details.
10 | """
11 |
12 | from .version import VERSION
13 |
14 | from .css21 import CSS21Parser
15 | from .page3 import CSSPage3Parser
16 | from .fonts3 import CSSFonts3Parser
17 |
18 |
19 | __version__ = VERSION
20 |
21 | PARSER_MODULES = {
22 | 'page3': CSSPage3Parser,
23 | 'fonts3': CSSFonts3Parser,
24 | }
25 |
26 |
27 | def make_parser(*features, **kwargs):
28 | """Make a parser object with the chosen features.
29 |
30 | :param features:
31 | Positional arguments are base classes the new parser class will extend.
32 | The string ``'page3'`` is accepted as short for
33 | :class:`~page3.CSSPage3Parser`.
34 | The string ``'fonts3'`` is accepted as short for
35 | :class:`~fonts3.CSSFonts3Parser`.
36 | :param kwargs:
37 | Keyword arguments are passed to the parser’s constructor.
38 | :returns:
39 | An instance of a new subclass of :class:`CSS21Parser`
40 |
41 | """
42 | if features:
43 | bases = tuple(PARSER_MODULES.get(f, f) for f in features)
44 | parser_class = type('CustomCSSParser', bases + (CSS21Parser,), {})
45 | else:
46 | parser_class = CSS21Parser
47 | return parser_class(**kwargs)
48 |
--------------------------------------------------------------------------------
/tinycss/color3.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.colors3
4 | ---------------
5 |
6 | Parser for CSS 3 color values
7 | http://www.w3.org/TR/css3-color/
8 |
9 | This module does not provide anything that integrates in a parser class,
10 | only functions that parse single tokens from (eg.) a property value.
11 |
12 | :copyright: (c) 2012 by Simon Sapin.
13 | :license: BSD, see LICENSE for more details.
14 | """
15 |
16 | from __future__ import division, unicode_literals
17 |
18 | import collections
19 | import itertools
20 | import re
21 |
22 | from .tokenizer import tokenize_grouped
23 |
24 |
25 | class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])):
26 | """An RGBA color.
27 |
28 | A tuple of four floats in the 0..1 range: ``(r, g, b, a)``.
29 | Also has ``red``, ``green``, ``blue`` and ``alpha`` attributes to access
30 | the same values.
31 |
32 | """
33 |
34 |
35 | def parse_color_string(css_string):
36 | """Parse a CSS string as a color value.
37 |
38 | This is a convenience wrapper around :func:`parse_color` in case you
39 | have a string that is not from a CSS stylesheet.
40 |
41 | :param css_string:
42 | An unicode string in CSS syntax.
43 | :returns:
44 | Same as :func:`parse_color`.
45 |
46 | """
47 | tokens = list(tokenize_grouped(css_string.strip()))
48 | if len(tokens) == 1:
49 | return parse_color(tokens[0])
50 |
51 |
52 | def parse_color(token):
53 | """Parse single token as a color value.
54 |
55 | :param token:
56 | A single :class:`~.token_data.Token` or
57 | :class:`~.token_data.ContainerToken`, as found eg. in a
58 | property value.
59 | :returns:
60 | * ``None``, if the token is not a valid CSS 3 color value.
61 | (No exception is raised.)
62 | * For the *currentColor* keyword: the string ``'currentColor'``
63 | * Every other values (including keywords, HSL and HSLA) is converted
64 | to RGBA and returned as an :class:`RGBA` object (a 4-tuple with
65 | attribute access).
66 | The alpha channel is clipped to [0, 1], but R, G, or B can be
67 | out of range (eg. ``rgb(-51, 306, 0)`` is represented as
68 | ``(-.2, 1.2, 0, 1)``.)
69 |
70 | """
71 | if token.type == 'IDENT':
72 | return COLOR_KEYWORDS.get(token.value.lower())
73 | elif token.type == 'HASH':
74 | for multiplier, regexp in HASH_REGEXPS:
75 | match = regexp(token.value)
76 | if match:
77 | r, g, b = [int(group * multiplier, 16) / 255
78 | for group in match.groups()]
79 | return RGBA(r, g, b, 1.)
80 | elif token.type == 'FUNCTION':
81 | args = parse_comma_separated(token.content)
82 | if args:
83 | name = token.function_name.lower()
84 | if name == 'rgb':
85 | return parse_rgb(args, alpha=1.)
86 | elif name == 'rgba':
87 | alpha = parse_alpha(args[3:])
88 | if alpha is not None:
89 | return parse_rgb(args[:3], alpha)
90 | elif name == 'hsl':
91 | return parse_hsl(args, alpha=1.)
92 | elif name == 'hsla':
93 | alpha = parse_alpha(args[3:])
94 | if alpha is not None:
95 | return parse_hsl(args[:3], alpha)
96 |
97 |
98 | def parse_alpha(args):
99 | """
100 | If args is a list of a single INTEGER or NUMBER token,
101 | retur its value clipped to the 0..1 range
102 | Otherwise, return None.
103 | """
104 | if len(args) == 1 and args[0].type in ('NUMBER', 'INTEGER'):
105 | return min(1, max(0, args[0].value))
106 |
107 |
108 | def parse_rgb(args, alpha):
109 | """
110 | If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens,
111 | return RGB values as a tuple of 3 floats in 0..1.
112 | Otherwise, return None.
113 | """
114 | types = [arg.type for arg in args]
115 | if types == ['INTEGER', 'INTEGER', 'INTEGER']:
116 | r, g, b = [arg.value / 255 for arg in args[:3]]
117 | return RGBA(r, g, b, alpha)
118 | elif types == ['PERCENTAGE', 'PERCENTAGE', 'PERCENTAGE']:
119 | r, g, b = [arg.value / 100 for arg in args[:3]]
120 | return RGBA(r, g, b, alpha)
121 |
122 |
123 | def parse_hsl(args, alpha):
124 | """
125 | If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens,
126 | return RGB values as a tuple of 3 floats in 0..1.
127 | Otherwise, return None.
128 | """
129 | types = [arg.type for arg in args]
130 | if types == ['INTEGER', 'PERCENTAGE', 'PERCENTAGE']:
131 | hsl = [arg.value for arg in args[:3]]
132 | r, g, b = hsl_to_rgb(*hsl)
133 | return RGBA(r, g, b, alpha)
134 |
135 |
136 | def hsl_to_rgb(hue, saturation, lightness):
137 | """
138 | :param hue: degrees
139 | :param saturation: percentage
140 | :param lightness: percentage
141 | :returns: (r, g, b) as floats in the 0..1 range
142 | """
143 | hue = (hue / 360) % 1
144 | saturation = min(1, max(0, saturation / 100))
145 | lightness = min(1, max(0, lightness / 100))
146 |
147 | # Translated from ABC: http://www.w3.org/TR/css3-color/#hsl-color
148 | def hue_to_rgb(m1, m2, h):
149 | if h < 0:
150 | h += 1
151 | if h > 1:
152 | h -= 1
153 | if h * 6 < 1:
154 | return m1 + (m2 - m1) * h * 6
155 | if h * 2 < 1:
156 | return m2
157 | if h * 3 < 2:
158 | return m1 + (m2 - m1) * (2 / 3 - h) * 6
159 | return m1
160 |
161 | if lightness <= 0.5:
162 | m2 = lightness * (saturation + 1)
163 | else:
164 | m2 = lightness + saturation - lightness * saturation
165 | m1 = lightness * 2 - m2
166 | return (
167 | hue_to_rgb(m1, m2, hue + 1 / 3),
168 | hue_to_rgb(m1, m2, hue),
169 | hue_to_rgb(m1, m2, hue - 1 / 3),
170 | )
171 |
172 |
173 | def parse_comma_separated(tokens):
174 | """Parse a list of tokens (typically the content of a function token)
175 | as arguments made of a single token each, separated by mandatory commas,
176 | with optional white space around each argument.
177 |
178 | return the argument list without commas or white space;
179 | or None if the function token content do not match the description above.
180 |
181 | """
182 | tokens = [token for token in tokens if token.type != 'S']
183 | if not tokens:
184 | return []
185 | if len(tokens) % 2 == 1 and all(
186 | token.type == 'DELIM' and token.value == ','
187 | for token in tokens[1::2]):
188 | return tokens[::2]
189 |
190 |
191 | HASH_REGEXPS = (
192 | (2, re.compile('^#([\da-f])([\da-f])([\da-f])$', re.I).match),
193 | (1, re.compile('^#([\da-f]{2})([\da-f]{2})([\da-f]{2})$', re.I).match),
194 | )
195 |
196 |
197 | # (r, g, b) in 0..255
198 | BASIC_COLOR_KEYWORDS = [
199 | ('black', (0, 0, 0)),
200 | ('silver', (192, 192, 192)),
201 | ('gray', (128, 128, 128)),
202 | ('white', (255, 255, 255)),
203 | ('maroon', (128, 0, 0)),
204 | ('red', (255, 0, 0)),
205 | ('purple', (128, 0, 128)),
206 | ('fuchsia', (255, 0, 255)),
207 | ('green', (0, 128, 0)),
208 | ('lime', (0, 255, 0)),
209 | ('olive', (128, 128, 0)),
210 | ('yellow', (255, 255, 0)),
211 | ('navy', (0, 0, 128)),
212 | ('blue', (0, 0, 255)),
213 | ('teal', (0, 128, 128)),
214 | ('aqua', (0, 255, 255)),
215 | ]
216 |
217 |
218 | # (r, g, b) in 0..255
219 | EXTENDED_COLOR_KEYWORDS = [
220 | ('aliceblue', (240, 248, 255)),
221 | ('antiquewhite', (250, 235, 215)),
222 | ('aqua', (0, 255, 255)),
223 | ('aquamarine', (127, 255, 212)),
224 | ('azure', (240, 255, 255)),
225 | ('beige', (245, 245, 220)),
226 | ('bisque', (255, 228, 196)),
227 | ('black', (0, 0, 0)),
228 | ('blanchedalmond', (255, 235, 205)),
229 | ('blue', (0, 0, 255)),
230 | ('blueviolet', (138, 43, 226)),
231 | ('brown', (165, 42, 42)),
232 | ('burlywood', (222, 184, 135)),
233 | ('cadetblue', (95, 158, 160)),
234 | ('chartreuse', (127, 255, 0)),
235 | ('chocolate', (210, 105, 30)),
236 | ('coral', (255, 127, 80)),
237 | ('cornflowerblue', (100, 149, 237)),
238 | ('cornsilk', (255, 248, 220)),
239 | ('crimson', (220, 20, 60)),
240 | ('cyan', (0, 255, 255)),
241 | ('darkblue', (0, 0, 139)),
242 | ('darkcyan', (0, 139, 139)),
243 | ('darkgoldenrod', (184, 134, 11)),
244 | ('darkgray', (169, 169, 169)),
245 | ('darkgreen', (0, 100, 0)),
246 | ('darkgrey', (169, 169, 169)),
247 | ('darkkhaki', (189, 183, 107)),
248 | ('darkmagenta', (139, 0, 139)),
249 | ('darkolivegreen', (85, 107, 47)),
250 | ('darkorange', (255, 140, 0)),
251 | ('darkorchid', (153, 50, 204)),
252 | ('darkred', (139, 0, 0)),
253 | ('darksalmon', (233, 150, 122)),
254 | ('darkseagreen', (143, 188, 143)),
255 | ('darkslateblue', (72, 61, 139)),
256 | ('darkslategray', (47, 79, 79)),
257 | ('darkslategrey', (47, 79, 79)),
258 | ('darkturquoise', (0, 206, 209)),
259 | ('darkviolet', (148, 0, 211)),
260 | ('deeppink', (255, 20, 147)),
261 | ('deepskyblue', (0, 191, 255)),
262 | ('dimgray', (105, 105, 105)),
263 | ('dimgrey', (105, 105, 105)),
264 | ('dodgerblue', (30, 144, 255)),
265 | ('firebrick', (178, 34, 34)),
266 | ('floralwhite', (255, 250, 240)),
267 | ('forestgreen', (34, 139, 34)),
268 | ('fuchsia', (255, 0, 255)),
269 | ('gainsboro', (220, 220, 220)),
270 | ('ghostwhite', (248, 248, 255)),
271 | ('gold', (255, 215, 0)),
272 | ('goldenrod', (218, 165, 32)),
273 | ('gray', (128, 128, 128)),
274 | ('green', (0, 128, 0)),
275 | ('greenyellow', (173, 255, 47)),
276 | ('grey', (128, 128, 128)),
277 | ('honeydew', (240, 255, 240)),
278 | ('hotpink', (255, 105, 180)),
279 | ('indianred', (205, 92, 92)),
280 | ('indigo', (75, 0, 130)),
281 | ('ivory', (255, 255, 240)),
282 | ('khaki', (240, 230, 140)),
283 | ('lavender', (230, 230, 250)),
284 | ('lavenderblush', (255, 240, 245)),
285 | ('lawngreen', (124, 252, 0)),
286 | ('lemonchiffon', (255, 250, 205)),
287 | ('lightblue', (173, 216, 230)),
288 | ('lightcoral', (240, 128, 128)),
289 | ('lightcyan', (224, 255, 255)),
290 | ('lightgoldenrodyellow', (250, 250, 210)),
291 | ('lightgray', (211, 211, 211)),
292 | ('lightgreen', (144, 238, 144)),
293 | ('lightgrey', (211, 211, 211)),
294 | ('lightpink', (255, 182, 193)),
295 | ('lightsalmon', (255, 160, 122)),
296 | ('lightseagreen', (32, 178, 170)),
297 | ('lightskyblue', (135, 206, 250)),
298 | ('lightslategray', (119, 136, 153)),
299 | ('lightslategrey', (119, 136, 153)),
300 | ('lightsteelblue', (176, 196, 222)),
301 | ('lightyellow', (255, 255, 224)),
302 | ('lime', (0, 255, 0)),
303 | ('limegreen', (50, 205, 50)),
304 | ('linen', (250, 240, 230)),
305 | ('magenta', (255, 0, 255)),
306 | ('maroon', (128, 0, 0)),
307 | ('mediumaquamarine', (102, 205, 170)),
308 | ('mediumblue', (0, 0, 205)),
309 | ('mediumorchid', (186, 85, 211)),
310 | ('mediumpurple', (147, 112, 219)),
311 | ('mediumseagreen', (60, 179, 113)),
312 | ('mediumslateblue', (123, 104, 238)),
313 | ('mediumspringgreen', (0, 250, 154)),
314 | ('mediumturquoise', (72, 209, 204)),
315 | ('mediumvioletred', (199, 21, 133)),
316 | ('midnightblue', (25, 25, 112)),
317 | ('mintcream', (245, 255, 250)),
318 | ('mistyrose', (255, 228, 225)),
319 | ('moccasin', (255, 228, 181)),
320 | ('navajowhite', (255, 222, 173)),
321 | ('navy', (0, 0, 128)),
322 | ('oldlace', (253, 245, 230)),
323 | ('olive', (128, 128, 0)),
324 | ('olivedrab', (107, 142, 35)),
325 | ('orange', (255, 165, 0)),
326 | ('orangered', (255, 69, 0)),
327 | ('orchid', (218, 112, 214)),
328 | ('palegoldenrod', (238, 232, 170)),
329 | ('palegreen', (152, 251, 152)),
330 | ('paleturquoise', (175, 238, 238)),
331 | ('palevioletred', (219, 112, 147)),
332 | ('papayawhip', (255, 239, 213)),
333 | ('peachpuff', (255, 218, 185)),
334 | ('peru', (205, 133, 63)),
335 | ('pink', (255, 192, 203)),
336 | ('plum', (221, 160, 221)),
337 | ('powderblue', (176, 224, 230)),
338 | ('purple', (128, 0, 128)),
339 | ('red', (255, 0, 0)),
340 | ('rosybrown', (188, 143, 143)),
341 | ('royalblue', (65, 105, 225)),
342 | ('saddlebrown', (139, 69, 19)),
343 | ('salmon', (250, 128, 114)),
344 | ('sandybrown', (244, 164, 96)),
345 | ('seagreen', (46, 139, 87)),
346 | ('seashell', (255, 245, 238)),
347 | ('sienna', (160, 82, 45)),
348 | ('silver', (192, 192, 192)),
349 | ('skyblue', (135, 206, 235)),
350 | ('slateblue', (106, 90, 205)),
351 | ('slategray', (112, 128, 144)),
352 | ('slategrey', (112, 128, 144)),
353 | ('snow', (255, 250, 250)),
354 | ('springgreen', (0, 255, 127)),
355 | ('steelblue', (70, 130, 180)),
356 | ('tan', (210, 180, 140)),
357 | ('teal', (0, 128, 128)),
358 | ('thistle', (216, 191, 216)),
359 | ('tomato', (255, 99, 71)),
360 | ('turquoise', (64, 224, 208)),
361 | ('violet', (238, 130, 238)),
362 | ('wheat', (245, 222, 179)),
363 | ('white', (255, 255, 255)),
364 | ('whitesmoke', (245, 245, 245)),
365 | ('yellow', (255, 255, 0)),
366 | ('yellowgreen', (154, 205, 50)),
367 | ]
368 |
369 |
370 | # (r, g, b, a) in 0..1 or a string marker
371 | SPECIAL_COLOR_KEYWORDS = {
372 | 'currentcolor': 'currentColor',
373 | 'transparent': RGBA(0., 0., 0., 0.),
374 | }
375 |
376 |
377 | # RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker
378 | COLOR_KEYWORDS = SPECIAL_COLOR_KEYWORDS.copy()
379 | COLOR_KEYWORDS.update(
380 | # 255 maps to 1, 0 to 0, the rest is linear.
381 | (keyword, RGBA(r / 255., g / 255., b / 255., 1.))
382 | for keyword, (r, g, b) in itertools.chain(
383 | BASIC_COLOR_KEYWORDS, EXTENDED_COLOR_KEYWORDS))
384 |
--------------------------------------------------------------------------------
/tinycss/css21.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.css21
4 | -------------
5 |
6 | Parser for CSS 2.1
7 | http://www.w3.org/TR/CSS21/syndata.html
8 |
9 | :copyright: (c) 2012 by Simon Sapin.
10 | :license: BSD, see LICENSE for more details.
11 | """
12 |
13 | from __future__ import unicode_literals
14 |
15 | from itertools import chain, islice
16 |
17 | from .decoding import decode
18 | from .parsing import (
19 | ParseError, remove_whitespace, split_on_comma, strip_whitespace,
20 | validate_any, validate_value)
21 | from .token_data import TokenList
22 | from .tokenizer import tokenize_grouped
23 |
24 |
25 | # stylesheet : [ CDO | CDC | S | statement ]*;
26 | # statement : ruleset | at-rule;
27 | # at-rule : ATKEYWORD S* any* [ block | ';' S* ];
28 | # block : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
29 | # ruleset : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
30 | # selector : any+;
31 | # declaration : property S* ':' S* value;
32 | # property : IDENT;
33 | # value : [ any | block | ATKEYWORD S* ]+;
34 | # any : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
35 | # | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
36 | # | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
37 | # | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
38 | # ] S*;
39 | # unused : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
40 |
41 |
42 | class Stylesheet(object):
43 | """
44 | A parsed CSS stylesheet.
45 |
46 | .. attribute:: rules
47 |
48 | A mixed list, in source order, of :class:`RuleSet` and various
49 | at-rules such as :class:`ImportRule`, :class:`MediaRule`
50 | and :class:`PageRule`.
51 | Use their :obj:`at_keyword` attribute to distinguish them.
52 |
53 | .. attribute:: errors
54 |
55 | A list of :class:`~.parsing.ParseError`. Invalid rules and declarations
56 | are ignored, with the details logged in this list.
57 |
58 | .. attribute:: encoding
59 |
60 | The character encoding that was used to decode the stylesheet
61 | from bytes, or ``None`` for Unicode stylesheets.
62 |
63 | """
64 | def __init__(self, rules, errors, encoding):
65 | self.rules = rules
66 | self.errors = errors
67 | self.encoding = encoding
68 |
69 | def __repr__(self):
70 | return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
71 | self, len(self.rules), len(self.errors))
72 |
73 |
74 | class AtRule(object):
75 | """
76 | An unparsed at-rule.
77 |
78 | .. attribute:: at_keyword
79 |
80 | The normalized (lower-case) at-keyword as a string. Eg: ``'@page'``
81 |
82 | .. attribute:: head
83 |
84 | The part of the at-rule between the at-keyword and the ``{``
85 | marking the body, or the ``;`` marking the end of an at-rule without
86 | a body. A :class:`~.token_data.TokenList`.
87 |
88 | .. attribute:: body
89 |
90 | The content of the body between ``{`` and ``}`` as a
91 | :class:`~.token_data.TokenList`, or ``None`` if there is no body
92 | (ie. if the rule ends with ``;``).
93 |
94 | The head was validated against the core grammar but **not** the body,
95 | as the body might contain declarations. In case of an error in a
96 | declaration, parsing should continue from the next declaration.
97 | The whole rule should not be ignored as it would be for an error
98 | in the head.
99 |
100 | These at-rules are expected to be parsed further before reaching
101 | the user API.
102 |
103 | """
104 | def __init__(self, at_keyword, head, body, line, column):
105 | self.at_keyword = at_keyword
106 | self.head = TokenList(head)
107 | self.body = TokenList(body) if body is not None else body
108 | self.line = line
109 | self.column = column
110 |
111 | def __repr__(self):
112 | return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
113 | .format(self))
114 |
115 |
116 | class RuleSet(object):
117 | """A ruleset.
118 |
119 | .. attribute:: at_keyword
120 |
121 | Always ``None``. Helps to tell rulesets apart from at-rules.
122 |
123 | .. attribute:: selector
124 |
125 | The selector as a :class:`~.token_data.TokenList`.
126 | In CSS 3, this is actually called a selector group.
127 |
128 | ``rule.selector.as_css()`` gives the selector as a string.
129 | This string can be used with *cssselect*, see :ref:`selectors3`.
130 |
131 | .. attribute:: declarations
132 |
133 | The list of :class:`Declaration`, in source order.
134 |
135 | """
136 |
137 | at_keyword = None
138 |
139 | def __init__(self, selector, declarations, line, column):
140 | self.selector = TokenList(selector)
141 | self.declarations = declarations
142 | self.line = line
143 | self.column = column
144 |
145 | def __repr__(self):
146 | return ('<{0.__class__.__name__} at {0.line}:{0.column} {1}>'
147 | .format(self, self.selector.as_css()))
148 |
149 |
150 | class Declaration(object):
151 | """A property declaration.
152 |
153 | .. attribute:: name
154 |
155 | The property name as a normalized (lower-case) string.
156 |
157 | .. attribute:: value
158 |
159 | The property value as a :class:`~.token_data.TokenList`.
160 |
161 | The value is not parsed. UAs using tinycss may only support
162 | some properties or some values and tinycss does not know which.
163 | They need to parse values themselves and ignore declarations with
164 | unknown or unsupported properties or values, and fall back
165 | on any previous declaration.
166 |
167 | :mod:`tinycss.color3` parses color values, but other values
168 | will need specific parsing/validation code.
169 |
170 | .. attribute:: priority
171 |
172 | Either the string ``'important'`` or ``None``.
173 |
174 | """
175 | def __init__(self, name, value, priority, line, column):
176 | self.name = name
177 | self.value = TokenList(value)
178 | self.priority = priority
179 | self.line = line
180 | self.column = column
181 |
182 | def __repr__(self):
183 | priority = ' !' + self.priority if self.priority else ''
184 | return ('<{0.__class__.__name__} {0.line}:{0.column}'
185 | ' {0.name}: {1}{2}>'.format(
186 | self, self.value.as_css(), priority))
187 |
188 |
189 | class PageRule(object):
190 | """A parsed CSS 2.1 @page rule.
191 |
192 | .. attribute:: at_keyword
193 |
194 | Always ``'@page'``
195 |
196 | .. attribute:: selector
197 |
198 | The page selector.
199 | In CSS 2.1 this is either ``None`` (no selector), or the string
200 | ``'first'``, ``'left'`` or ``'right'`` for the pseudo class
201 | of the same name.
202 |
203 | .. attribute:: specificity
204 |
205 | Specificity of the page selector. This is a tuple of four integers,
206 | but these tuples are mostly meant to be compared to each other.
207 |
208 | .. attribute:: declarations
209 |
210 | A list of :class:`Declaration`, in source order.
211 |
212 | .. attribute:: at_rules
213 |
214 | The list of parsed at-rules inside the @page block, in source order.
215 | Always empty for CSS 2.1.
216 |
217 | """
218 | at_keyword = '@page'
219 |
220 | def __init__(self, selector, specificity, declarations, at_rules,
221 | line, column):
222 | self.selector = selector
223 | self.specificity = specificity
224 | self.declarations = declarations
225 | self.at_rules = at_rules
226 | self.line = line
227 | self.column = column
228 |
229 | def __repr__(self):
230 | return ('<{0.__class__.__name__} {0.line}:{0.column}'
231 | ' {0.selector}>'.format(self))
232 |
233 |
234 | class MediaRule(object):
235 | """A parsed @media rule.
236 |
237 | .. attribute:: at_keyword
238 |
239 | Always ``'@media'``
240 |
241 | .. attribute:: media
242 |
243 | For CSS 2.1 without media queries: the media types
244 | as a list of strings.
245 |
246 | .. attribute:: rules
247 |
248 | The list :class:`RuleSet` and various at-rules inside the @media
249 | block, in source order.
250 |
251 | """
252 | at_keyword = '@media'
253 |
254 | def __init__(self, media, rules, line, column):
255 | self.media = media
256 | self.rules = rules
257 | self.line = line
258 | self.column = column
259 |
260 | def __repr__(self):
261 | return ('<{0.__class__.__name__} {0.line}:{0.column}'
262 | ' {0.media}>'.format(self))
263 |
264 |
265 | class ImportRule(object):
266 | """A parsed @import rule.
267 |
268 | .. attribute:: at_keyword
269 |
270 | Always ``'@import'``
271 |
272 | .. attribute:: uri
273 |
274 | The URI to be imported, as read from the stylesheet.
275 | (URIs are not made absolute.)
276 |
277 | .. attribute:: media
278 |
279 | For CSS 2.1 without media queries: the media types
280 | as a list of strings.
281 | This attribute is explicitly ``['all']`` if the media was omitted
282 | in the source.
283 |
284 | """
285 | at_keyword = '@import'
286 |
287 | def __init__(self, uri, media, line, column):
288 | self.uri = uri
289 | self.media = media
290 | self.line = line
291 | self.column = column
292 |
293 | def __repr__(self):
294 | return ('<{0.__class__.__name__} {0.line}:{0.column}'
295 | ' {0.uri}>'.format(self))
296 |
297 |
298 | def _remove_at_charset(tokens):
299 | """Remove any valid @charset at the beggining of a token stream.
300 |
301 | :param tokens:
302 | An iterable of tokens
303 | :returns:
304 | A possibly truncated iterable of tokens
305 |
306 | """
307 | tokens = iter(tokens)
308 | header = list(islice(tokens, 4))
309 | if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
310 | atkw, space, string, semicolon = header
311 | if ((atkw.value, space.value) == ('@charset', ' ') and
312 | string.as_css()[0] == '"'):
313 | # Found a valid @charset rule, only keep what’s after it.
314 | return tokens
315 | return chain(header, tokens)
316 |
317 |
318 | class CSS21Parser(object):
319 | """Parser for CSS 2.1
320 |
321 | This parser supports the core CSS syntax as well as @import, @media,
322 | @page and !important.
323 |
324 | Note that property values are still not parsed, as UAs using this
325 | parser may only support some properties or some values.
326 |
327 | Currently the parser holds no state. It being a class only allows
328 | subclassing and overriding its methods.
329 |
330 | """
331 |
332 | # User API:
333 |
334 | def parse_stylesheet_file(self, css_file, protocol_encoding=None,
335 | linking_encoding=None, document_encoding=None):
336 | """Parse a stylesheet from a file or filename.
337 |
338 | Character encoding-related parameters and behavior are the same
339 | as in :meth:`parse_stylesheet_bytes`.
340 |
341 | :param css_file:
342 | Either a file (any object with a :meth:`~file.read` method)
343 | or a filename.
344 | :return:
345 | A :class:`Stylesheet`.
346 |
347 | """
348 | if hasattr(css_file, 'read'):
349 | css_bytes = css_file.read()
350 | else:
351 | with open(css_file, 'rb') as fd:
352 | css_bytes = fd.read()
353 | return self.parse_stylesheet_bytes(css_bytes, protocol_encoding,
354 | linking_encoding, document_encoding)
355 |
356 | def parse_stylesheet_bytes(self, css_bytes, protocol_encoding=None,
357 | linking_encoding=None, document_encoding=None):
358 | """Parse a stylesheet from a byte string.
359 |
360 | The character encoding is determined from the passed metadata and the
361 | ``@charset`` rule in the stylesheet (if any).
362 | If no encoding information is available or decoding fails,
363 | decoding defaults to UTF-8 and then fall back on ISO-8859-1.
364 |
365 | :param css_bytes:
366 | A CSS stylesheet as a byte string.
367 | :param protocol_encoding:
368 | The "charset" parameter of a "Content-Type" HTTP header (if any),
369 | or similar metadata for other protocols.
370 | :param linking_encoding:
371 | ```` or other metadata from the linking mechanism
372 | (if any)
373 | :param document_encoding:
374 | Encoding of the referring style sheet or document (if any)
375 | :return:
376 | A :class:`Stylesheet`.
377 |
378 | """
379 | css_unicode, encoding = decode(css_bytes, protocol_encoding,
380 | linking_encoding, document_encoding)
381 | return self.parse_stylesheet(css_unicode, encoding=encoding)
382 |
383 | def parse_stylesheet(self, css_unicode, encoding=None):
384 | """Parse a stylesheet from an Unicode string.
385 |
386 | :param css_unicode:
387 | A CSS stylesheet as an unicode string.
388 | :param encoding:
389 | The character encoding used to decode the stylesheet from bytes,
390 | if any.
391 | :return:
392 | A :class:`Stylesheet`.
393 |
394 | """
395 | tokens = tokenize_grouped(css_unicode)
396 | if encoding:
397 | tokens = _remove_at_charset(tokens)
398 | rules, errors = self.parse_rules(tokens, context='stylesheet')
399 | return Stylesheet(rules, errors, encoding)
400 |
401 | def parse_style_attr(self, css_source):
402 | """Parse a "style" attribute (eg. of an HTML element).
403 |
404 | This method only accepts Unicode as the source (HTML) document
405 | is supposed to handle the character encoding.
406 |
407 | :param css_source:
408 | The attribute value, as an unicode string.
409 | :return:
410 | A tuple of the list of valid :class:`Declaration` and
411 | a list of :class:`~.parsing.ParseError`.
412 | """
413 | return self.parse_declaration_list(tokenize_grouped(css_source))
414 |
415 | # API for subclasses:
416 |
417 | def parse_rules(self, tokens, context):
418 | """Parse a sequence of rules (rulesets and at-rules).
419 |
420 | :param tokens:
421 | An iterable of tokens.
422 | :param context:
423 | Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
424 | (Most at-rules are only allowed in some contexts.)
425 | :return:
426 | A tuple of a list of parsed rules and a list of
427 | :class:`~.parsing.ParseError`.
428 |
429 | """
430 | rules = []
431 | errors = []
432 | tokens = iter(tokens)
433 | for token in tokens:
434 | if token.type not in ('S', 'CDO', 'CDC'):
435 | try:
436 | if token.type == 'ATKEYWORD':
437 | rule = self.read_at_rule(token, tokens)
438 | result = self.parse_at_rule(
439 | rule, rules, errors, context)
440 | rules.append(result)
441 | else:
442 | rule, rule_errors = self.parse_ruleset(token, tokens)
443 | rules.append(rule)
444 | errors.extend(rule_errors)
445 | except ParseError as exc:
446 | errors.append(exc)
447 | # Skip the entire rule
448 | return rules, errors
449 |
450 | def read_at_rule(self, at_keyword_token, tokens):
451 | """Read an at-rule from a token stream.
452 |
453 | :param at_keyword_token:
454 | The ATKEYWORD token that starts this at-rule
455 | You may have read it already to distinguish the rule
456 | from a ruleset.
457 | :param tokens:
458 | An iterator of subsequent tokens. Will be consumed just enough
459 | for one at-rule.
460 | :return:
461 | An unparsed :class:`AtRule`.
462 | :raises:
463 | :class:`~.parsing.ParseError` if the head is invalid for the core
464 | grammar. The body is **not** validated. See :class:`AtRule`.
465 |
466 | """
467 | # CSS syntax is case-insensitive
468 | at_keyword = at_keyword_token.value.lower()
469 | head = []
470 | # For the ParseError in case `tokens` is empty:
471 | token = at_keyword_token
472 | for token in tokens:
473 | if token.type in '{;':
474 | break
475 | # Ignore white space just after the at-keyword.
476 | else:
477 | head.append(token)
478 | # On unexpected end of stylesheet, pretend that a ';' was there
479 | head = strip_whitespace(head)
480 | for head_token in head:
481 | validate_any(head_token, 'at-rule head')
482 | body = token.content if token.type == '{' else None
483 | return AtRule(at_keyword, head, body,
484 | at_keyword_token.line, at_keyword_token.column)
485 |
486 | def parse_at_rule(self, rule, previous_rules, errors, context):
487 | """Parse an at-rule.
488 |
489 | Subclasses that override this method must use ``super()`` and
490 | pass its return value for at-rules they do not know.
491 |
492 | In CSS 2.1, this method handles @charset, @import, @media and @page
493 | rules.
494 |
495 | :param rule:
496 | An unparsed :class:`AtRule`.
497 | :param previous_rules:
498 | The list of at-rules and rulesets that have been parsed so far
499 | in this context. This list can be used to decide if the current
500 | rule is valid. (For example, @import rules are only allowed
501 | before anything but a @charset rule.)
502 | :param context:
503 | Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
504 | (Most at-rules are only allowed in some contexts.)
505 | :raises:
506 | :class:`~.parsing.ParseError` if the rule is invalid.
507 | :return:
508 | A parsed at-rule
509 |
510 | """
511 | if rule.at_keyword == '@page':
512 | if context != 'stylesheet':
513 | raise ParseError(rule, '@page rule not allowed in ' + context)
514 | selector, specificity = self.parse_page_selector(rule.head)
515 | if rule.body is None:
516 | raise ParseError(
517 | rule, 'invalid {0} rule: missing block'.format(
518 | rule.at_keyword))
519 | declarations, at_rules, rule_errors = \
520 | self.parse_declarations_and_at_rules(rule.body, '@page')
521 | errors.extend(rule_errors)
522 | return PageRule(selector, specificity, declarations, at_rules,
523 | rule.line, rule.column)
524 |
525 | elif rule.at_keyword == '@media':
526 | if context != 'stylesheet':
527 | raise ParseError(rule, '@media rule not allowed in ' + context)
528 | if not rule.head:
529 | raise ParseError(rule, 'expected media types for @media')
530 | media = self.parse_media(rule.head)
531 | if rule.body is None:
532 | raise ParseError(
533 | rule, 'invalid {0} rule: missing block'.format(
534 | rule.at_keyword))
535 | rules, rule_errors = self.parse_rules(rule.body, '@media')
536 | errors.extend(rule_errors)
537 | return MediaRule(media, rules, rule.line, rule.column)
538 |
539 | elif rule.at_keyword == '@import':
540 | if context != 'stylesheet':
541 | raise ParseError(
542 | rule, '@import rule not allowed in ' + context)
543 | for previous_rule in previous_rules:
544 | if previous_rule.at_keyword not in ('@charset', '@import'):
545 | if previous_rule.at_keyword:
546 | type_ = 'an {0} rule'.format(previous_rule.at_keyword)
547 | else:
548 | type_ = 'a ruleset'
549 | raise ParseError(
550 | previous_rule,
551 | '@import rule not allowed after ' + type_)
552 | head = rule.head
553 | if not head:
554 | raise ParseError(
555 | rule, 'expected URI or STRING for @import rule')
556 | if head[0].type not in ('URI', 'STRING'):
557 | raise ParseError(
558 | rule, 'expected URI or STRING for @import rule, got ' +
559 | head[0].type)
560 | uri = head[0].value
561 | media = self.parse_media(strip_whitespace(head[1:]))
562 | if rule.body is not None:
563 | # The position of the ';' token would be best, but we don’t
564 | # have it anymore here.
565 | raise ParseError(head[-1], "expected ';', got a block")
566 | return ImportRule(uri, media, rule.line, rule.column)
567 |
568 | elif rule.at_keyword == '@charset':
569 | raise ParseError(rule, 'mis-placed or malformed @charset rule')
570 |
571 | else:
572 | raise ParseError(
573 | rule, 'unknown at-rule in {0} context: {1}'.format(
574 | context, rule.at_keyword))
575 |
576 | def parse_media(self, tokens):
577 | """For CSS 2.1, parse a list of media types.
578 |
579 | Media Queries are expected to override this.
580 |
581 | :param tokens:
582 | A list of tokens
583 | :raises:
584 | :class:`~.parsing.ParseError` on invalid media types/queries
585 | :returns:
586 | For CSS 2.1, a list of media types as strings
587 | """
588 | if not tokens:
589 | return ['all']
590 | media_types = []
591 | for part in split_on_comma(remove_whitespace(tokens)):
592 | types = [token.type for token in part]
593 | if types == ['IDENT']:
594 | media_types.append(part[0].value)
595 | else:
596 | raise ParseError(
597 | tokens[0], 'expected a media type' +
598 | ((', got ' + ', '.join(types)) if types else ''))
599 | return media_types
600 |
601 | def parse_page_selector(self, tokens):
602 | """Parse an @page selector.
603 |
604 | :param tokens:
605 | An iterable of token, typically from the ``head`` attribute of
606 | an unparsed :class:`AtRule`.
607 | :returns:
608 | A page selector. For CSS 2.1, this is ``'first'``, ``'left'``,
609 | ``'right'`` or ``None``.
610 | :raises:
611 | :class:`~.parsing.ParseError` on invalid selectors
612 |
613 | """
614 | if not tokens:
615 | return None, (0, 0)
616 | if (len(tokens) == 2 and tokens[0].type == ':' and
617 | tokens[1].type == 'IDENT'):
618 | pseudo_class = tokens[1].value
619 | specificity = {
620 | 'first': (1, 0), 'left': (0, 1), 'right': (0, 1),
621 | }.get(pseudo_class)
622 | if specificity:
623 | return pseudo_class, specificity
624 | raise ParseError(tokens[0], 'invalid @page selector')
625 |
626 | def parse_declarations_and_at_rules(self, tokens, context):
627 | """Parse a mixed list of declarations and at rules, as found eg.
628 | in the body of an @page rule.
629 |
630 | Note that to add supported at-rules inside @page,
631 | :class:`~.page3.CSSPage3Parser` extends :meth:`parse_at_rule`,
632 | not this method.
633 |
634 | :param tokens:
635 | An iterable of token, typically from the ``body`` attribute of
636 | an unparsed :class:`AtRule`.
637 | :param context:
638 | An at-keyword such as ``'@page'``.
639 | (Most at-rules are only allowed in some contexts.)
640 | :returns:
641 | A tuple of:
642 |
643 | * A list of :class:`Declaration`
644 | * A list of parsed at-rules (empty for CSS 2.1)
645 | * A list of :class:`~.parsing.ParseError`
646 |
647 | """
648 | at_rules = []
649 | declarations = []
650 | errors = []
651 | tokens = iter(tokens)
652 | for token in tokens:
653 | if token.type == 'ATKEYWORD':
654 | try:
655 | rule = self.read_at_rule(token, tokens)
656 | result = self.parse_at_rule(
657 | rule, at_rules, errors, context)
658 | at_rules.append(result)
659 | except ParseError as err:
660 | errors.append(err)
661 | elif token.type != 'S':
662 | declaration_tokens = []
663 | while token and token.type != ';':
664 | declaration_tokens.append(token)
665 | token = next(tokens, None)
666 | if declaration_tokens:
667 | try:
668 | declarations.append(
669 | self.parse_declaration(declaration_tokens))
670 | except ParseError as err:
671 | errors.append(err)
672 | return declarations, at_rules, errors
673 |
674 | def parse_ruleset(self, first_token, tokens):
675 | """Parse a ruleset: a selector followed by declaration block.
676 |
677 | :param first_token:
678 | The first token of the ruleset (probably of the selector).
679 | You may have read it already to distinguish the rule
680 | from an at-rule.
681 | :param tokens:
682 | an iterator of subsequent tokens. Will be consumed just enough
683 | for one ruleset.
684 | :return:
685 | a tuple of a :class:`RuleSet` and an error list.
686 | The errors are recovered :class:`~.parsing.ParseError` in
687 | declarations. (Parsing continues from the next declaration on such
688 | errors.)
689 | :raises:
690 | :class:`~.parsing.ParseError` if the selector is invalid for the
691 | core grammar.
692 | Note a that a selector can be valid for the core grammar but
693 | not for CSS 2.1 or another level.
694 |
695 | """
696 | selector = []
697 | for token in chain([first_token], tokens):
698 | if token.type == '{':
699 | # Parse/validate once we’ve read the whole rule
700 | selector = strip_whitespace(selector)
701 | if not selector:
702 | raise ParseError(first_token, 'empty selector')
703 | for selector_token in selector:
704 | validate_any(selector_token, 'selector')
705 | declarations, errors = self.parse_declaration_list(
706 | token.content)
707 | ruleset = RuleSet(selector, declarations,
708 | first_token.line, first_token.column)
709 | return ruleset, errors
710 | else:
711 | selector.append(token)
712 | raise ParseError(token, 'no declaration block found for ruleset')
713 |
714 | def parse_declaration_list(self, tokens):
715 | """Parse a ``;`` separated declaration list.
716 |
717 | You may want to use :meth:`parse_declarations_and_at_rules` (or
718 | some other method that uses :func:`parse_declaration` directly)
719 | instead if you have not just declarations in the same context.
720 |
721 | :param tokens:
722 | an iterable of tokens. Should stop at (before) the end
723 | of the block, as marked by ``}``.
724 | :return:
725 | a tuple of the list of valid :class:`Declaration` and a list
726 | of :class:`~.parsing.ParseError`
727 |
728 | """
729 | # split at ';'
730 | parts = []
731 | this_part = []
732 | for token in tokens:
733 | if token.type == ';':
734 | parts.append(this_part)
735 | this_part = []
736 | else:
737 | this_part.append(token)
738 | parts.append(this_part)
739 |
740 | declarations = []
741 | errors = []
742 | for tokens in parts:
743 | tokens = strip_whitespace(tokens)
744 | if tokens:
745 | try:
746 | declarations.append(self.parse_declaration(tokens))
747 | except ParseError as exc:
748 | errors.append(exc)
749 | # Skip the entire declaration
750 | return declarations, errors
751 |
752 | def parse_declaration(self, tokens):
753 | """Parse a single declaration.
754 |
755 | :param tokens:
756 | an iterable of at least one token. Should stop at (before)
757 | the end of the declaration, as marked by a ``;`` or ``}``.
758 | Empty declarations (ie. consecutive ``;`` with only white space
759 | in-between) should be skipped earlier and not passed to
760 | this method.
761 | :returns:
762 | a :class:`Declaration`
763 | :raises:
764 | :class:`~.parsing.ParseError` if the tokens do not match the
765 | 'declaration' production of the core grammar.
766 |
767 | """
768 | tokens = iter(tokens)
769 |
770 | name_token = next(tokens) # assume there is at least one
771 | if name_token.type == 'IDENT':
772 | # CSS syntax is case-insensitive
773 | property_name = name_token.value.lower()
774 | else:
775 | raise ParseError(
776 | name_token, 'expected a property name, got {0}'.format(
777 | name_token.type))
778 |
779 | token = name_token # In case ``tokens`` is now empty
780 | for token in tokens:
781 | if token.type == ':':
782 | break
783 | elif token.type != 'S':
784 | raise ParseError(
785 | token, "expected ':', got {0}".format(token.type))
786 | else:
787 | raise ParseError(token, "expected ':'")
788 |
789 | value = strip_whitespace(list(tokens))
790 | if not value:
791 | raise ParseError(token, 'expected a property value')
792 | validate_value(value)
793 | value, priority = self.parse_value_priority(value)
794 | return Declaration(
795 | property_name, value, priority, name_token.line, name_token.column)
796 |
797 | def parse_value_priority(self, tokens):
798 | """Separate any ``!important`` marker at the end of a property value.
799 |
800 | :param tokens:
801 | A list of tokens for the property value.
802 | :returns:
803 | A tuple of the actual property value (a list of tokens)
804 | and the :attr:`~Declaration.priority`.
805 | """
806 | value = list(tokens)
807 | # Walk the token list from the end
808 | token = value.pop()
809 | if token.type == 'IDENT' and token.value.lower() == 'important':
810 | while value:
811 | token = value.pop()
812 | if token.type == 'DELIM' and token.value == '!':
813 | # Skip any white space before the '!'
814 | while value and value[-1].type == 'S':
815 | value.pop()
816 | if not value:
817 | raise ParseError(
818 | token, 'expected a value before !important')
819 | return value, 'important'
820 | # Skip white space between '!' and 'important'
821 | elif token.type != 'S':
822 | break
823 | return tokens, None
824 |
--------------------------------------------------------------------------------
/tinycss/decoding.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.decoding
4 | ----------------
5 |
6 | Decoding stylesheets from bytes to Unicode.
7 | http://www.w3.org/TR/CSS21/syndata.html#charset
8 |
9 | :copyright: (c) 2012 by Simon Sapin.
10 | :license: BSD, see LICENSE for more details.
11 | """
12 |
13 | from __future__ import unicode_literals
14 |
15 | import operator
16 | import re
17 | from binascii import unhexlify
18 |
19 | __all__ = ['decode'] # Everything else is implementation detail
20 |
21 |
22 | def decode(css_bytes, protocol_encoding=None,
23 | linking_encoding=None, document_encoding=None):
24 | """
25 | Determine the character encoding from the passed metadata and the
26 | ``@charset`` rule in the stylesheet (if any); and decode accordingly.
27 | If no encoding information is available or decoding fails,
28 | decoding defaults to UTF-8 and then fall back on ISO-8859-1.
29 |
30 | :param css_bytes:
31 | a CSS stylesheet as a byte string
32 | :param protocol_encoding:
33 | The "charset" parameter of a "Content-Type" HTTP header (if any),
34 | or similar metadata for other protocols.
35 | :param linking_encoding:
36 | ```` or other metadata from the linking mechanism
37 | (if any)
38 | :param document_encoding:
39 | Encoding of the referring style sheet or document (if any)
40 | :return:
41 | A tuple of an Unicode string, with any BOM removed, and the
42 | encoding that was used.
43 |
44 | """
45 | if protocol_encoding:
46 | css_unicode = try_encoding(css_bytes, protocol_encoding)
47 | if css_unicode is not None:
48 | return css_unicode, protocol_encoding
49 | for encoding, pattern in ENCODING_MAGIC_NUMBERS:
50 | match = pattern(css_bytes)
51 | if match:
52 | has_at_charset = isinstance(encoding, tuple)
53 | if has_at_charset:
54 | extract, endianness = encoding
55 | encoding = extract(match.group(1))
56 | # Get an ASCII-only unicode value.
57 | # This is the only thing that works on both Python 2 and 3
58 | # for bytes.decode()
59 | # Non-ASCII encoding names are invalid anyway,
60 | # but make sure they stay invalid.
61 | encoding = encoding.decode('ascii', 'replace')
62 | encoding = encoding.replace('\ufffd', '?')
63 | if encoding.replace('-', '').replace('_', '').lower() in [
64 | 'utf16', 'utf32']:
65 | encoding += endianness
66 | encoding = encoding.encode('ascii', 'replace').decode('ascii')
67 | css_unicode = try_encoding(css_bytes, encoding)
68 | if css_unicode and not (has_at_charset and not
69 | css_unicode.startswith('@charset "')):
70 | return css_unicode, encoding
71 | break
72 | for encoding in [linking_encoding, document_encoding]:
73 | if encoding:
74 | css_unicode = try_encoding(css_bytes, encoding)
75 | if css_unicode is not None:
76 | return css_unicode, encoding
77 | css_unicode = try_encoding(css_bytes, 'UTF-8')
78 | if css_unicode is not None:
79 | return css_unicode, 'UTF-8'
80 | return try_encoding(css_bytes, 'ISO-8859-1', fallback=False), 'ISO-8859-1'
81 |
82 |
83 | def try_encoding(css_bytes, encoding, fallback=True):
84 | if fallback:
85 | try:
86 | css_unicode = css_bytes.decode(encoding)
87 | # LookupError means unknown encoding
88 | except (UnicodeDecodeError, LookupError):
89 | return None
90 | else:
91 | css_unicode = css_bytes.decode(encoding)
92 | if css_unicode and css_unicode[0] == '\ufeff':
93 | # Remove any Byte Order Mark
94 | css_unicode = css_unicode[1:]
95 | return css_unicode
96 |
97 |
98 | def hex2re(hex_data):
99 | return re.escape(unhexlify(hex_data.replace(' ', '').encode('ascii')))
100 |
101 |
102 | class Slicer(object):
103 | """Slice()[start:stop:end] == slice(start, stop, end)"""
104 | def __getitem__(self, slice_):
105 | return operator.itemgetter(slice_)
106 |
107 |
108 | Slice = Slicer()
109 |
110 |
111 | # List of (bom_size, encoding, pattern)
112 | # bom_size is in bytes and can be zero
113 | # encoding is a string or (slice_, endianness) for "as specified"
114 | # slice_ is a slice object.How to extract the specified
115 |
116 | ENCODING_MAGIC_NUMBERS = [
117 | ((Slice[:], ''), re.compile(
118 | hex2re('EF BB BF 40 63 68 61 72 73 65 74 20 22') +
119 | b'([^\x22]*?)' +
120 | hex2re('22 3B')).match),
121 |
122 | ('UTF-8', re.compile(
123 | hex2re('EF BB BF')).match),
124 |
125 | ((Slice[:], ''), re.compile(
126 | hex2re('40 63 68 61 72 73 65 74 20 22') +
127 | b'([^\x22]*?)' +
128 | hex2re('22 3B')).match),
129 |
130 | ((Slice[1::2], '-BE'), re.compile(
131 | hex2re('FE FF 00 40 00 63 00 68 00 61 00 72 00 73 00 65 00'
132 | '74 00 20 00 22') +
133 | b'((\x00[^\x22])*?)' +
134 | hex2re('00 22 00 3B')).match),
135 |
136 | ((Slice[1::2], '-BE'), re.compile(
137 | hex2re('00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00'
138 | '20 00 22') +
139 | b'((\x00[^\x22])*?)' +
140 | hex2re('00 22 00 3B')).match),
141 |
142 | ((Slice[::2], '-LE'), re.compile(
143 | hex2re('FF FE 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74'
144 | '00 20 00 22 00') +
145 | b'(([^\x22]\x00)*?)' +
146 | hex2re('22 00 3B 00')).match),
147 |
148 | ((Slice[::2], '-LE'), re.compile(
149 | hex2re('40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20'
150 | '00 22 00') +
151 | b'(([^\x22]\x00)*?)' +
152 | hex2re('22 00 3B 00')).match),
153 |
154 | ((Slice[3::4], '-BE'), re.compile(
155 | hex2re('00 00 FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00'
156 | '00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00'
157 | '00 74 00 00 00 20 00 00 00 22') +
158 | b'((\x00\x00\x00[^\x22])*?)' +
159 | hex2re('00 00 00 22 00 00 00 3B')).match),
160 |
161 | ((Slice[3::4], '-BE'), re.compile(
162 | hex2re('00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00'
163 | '00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00'
164 | '00 20 00 00 00 22') +
165 | b'((\x00\x00\x00[^\x22])*?)' +
166 | hex2re('00 00 00 22 00 00 00 3B')).match),
167 |
168 |
169 | # Python does not support 2143 or 3412 endianness, AFAIK.
170 | # I guess we could fix it up ourselves but meh. Patches welcome.
171 |
172 | # ((Slice[2::4], '-2143'), re.compile(
173 | # hex2re('00 00 FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00'
174 | # '00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00'
175 | # '74 00 00 00 20 00 00 00 22 00') +
176 | # b'((\x00\x00[^\x22]\x00)*?)' +
177 | # hex2re('00 00 22 00 00 00 3B 00')).match),
178 |
179 | # ((Slice[2::4], '-2143'), re.compile(
180 | # hex2re('00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00'
181 | # '00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00'
182 | # '20 00 00 00 22 00') +
183 | # b'((\x00\x00[^\x22]\x00)*?)' +
184 | # hex2re('00 00 22 00 00 00 3B 00')).match),
185 |
186 | # ((Slice[1::4], '-3412'), re.compile(
187 | # hex2re('FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00 00 00'
188 | # '61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74'
189 | # '00 00 00 20 00 00 00 22 00 00') +
190 | # b'((\x00[^\x22]\x00\x00)*?)' +
191 | # hex2re('00 22 00 00 00 3B 00 00')).match),
192 |
193 | # ((Slice[1::4], '-3412'), re.compile(
194 | # hex2re('00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00'
195 | # '72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20'
196 | # '00 00 00 22 00 00') +
197 | # b'((\x00[^\x22]\x00\x00)*?)' +
198 | # hex2re('00 22 00 00 00 3B 00 00')).match),
199 |
200 | ((Slice[::4], '-LE'), re.compile(
201 | hex2re('FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61'
202 | '00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00'
203 | '00 00 20 00 00 00 22 00 00 00') +
204 | b'(([^\x22]\x00\x00\x00)*?)' +
205 | hex2re('22 00 00 00 3B 00 00 00')).match),
206 |
207 | ((Slice[::4], '-LE'), re.compile(
208 | hex2re('40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72'
209 | '00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00'
210 | '00 00 22 00 00 00') +
211 | b'(([^\x22]\x00\x00\x00)*?)' +
212 | hex2re('22 00 00 00 3B 00 00 00')).match),
213 |
214 | ('UTF-32-BE', re.compile(
215 | hex2re('00 00 FE FF')).match),
216 |
217 | ('UTF-32-LE', re.compile(
218 | hex2re('FF FE 00 00')).match),
219 |
220 | # ('UTF-32-2143', re.compile(
221 | # hex2re('00 00 FF FE')).match),
222 |
223 | # ('UTF-32-3412', re.compile(
224 | # hex2re('FE FF 00 00')).match),
225 |
226 | ('UTF-16-BE', re.compile(
227 | hex2re('FE FF')).match),
228 |
229 | ('UTF-16-LE', re.compile(
230 | hex2re('FF FE')).match),
231 |
232 |
233 | # Some of there are supported by Python, but I didn’t bother.
234 | # You know the story with patches ...
235 |
236 | # # as specified, transcoded from EBCDIC to ASCII
237 | # ('as_specified-EBCDIC', re.compile(
238 | # hex2re('7C 83 88 81 99 A2 85 A3 40 7F')
239 | # + b'([^\x7F]*?)'
240 | # + hex2re('7F 5E')).match),
241 |
242 | # # as specified, transcoded from IBM1026 to ASCII
243 | # ('as_specified-IBM1026', re.compile(
244 | # hex2re('AE 83 88 81 99 A2 85 A3 40 FC')
245 | # + b'([^\xFC]*?)'
246 | # + hex2re('FC 5E')).match),
247 |
248 | # # as specified, transcoded from GSM 03.38 to ASCII
249 | # ('as_specified-GSM_03.38', re.compile(
250 | # hex2re('00 63 68 61 72 73 65 74 20 22')
251 | # + b'([^\x22]*?)'
252 | # + hex2re('22 3B')).match),
253 | ]
254 |
--------------------------------------------------------------------------------
/tinycss/fonts3.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.colors3
4 | ---------------
5 |
6 | Parser for CSS 3 Fonts syntax:
7 | https://www.w3.org/TR/css-fonts-3/
8 |
9 | Adds support for font-face and font-feature-values rules.
10 |
11 | :copyright: (c) 2016 by Kozea.
12 | :license: BSD, see LICENSE for more details.
13 | """
14 |
15 | from __future__ import division, unicode_literals
16 |
17 | from .css21 import CSS21Parser, ParseError
18 |
19 |
20 | class FontFaceRule(object):
21 | """A parsed at-rule for font faces.
22 |
23 | .. attribute:: at_keyword
24 |
25 | Always ``'@font-face'``.
26 |
27 | .. attribute:: declarations
28 |
29 | A list of :class:`~.css21.Declaration` objects.
30 |
31 | .. attribute:: line
32 |
33 | Source line where this was read.
34 |
35 | .. attribute:: column
36 |
37 | Source column where this was read.
38 |
39 | """
40 |
41 | def __init__(self, at_keyword, declarations, line, column):
42 | assert at_keyword == '@font-face'
43 | self.at_keyword = at_keyword
44 | self.declarations = declarations
45 | self.line = line
46 | self.column = column
47 |
48 |
49 | class FontFeatureValuesRule(object):
50 | """A parsed at-rule for font feature values.
51 |
52 | .. attribute:: at_keyword
53 |
54 | Always ``'@font-feature-values'``.
55 |
56 | .. attribute:: line
57 |
58 | Source line where this was read.
59 |
60 | .. attribute:: column
61 |
62 | Source column where this was read.
63 |
64 | .. attribute:: at_rules
65 |
66 | The list of parsed at-rules inside the @font-feature-values block, in
67 | source order.
68 |
69 | .. attribute:: family_names
70 |
71 | A list of strings representing font families.
72 |
73 | """
74 |
75 | def __init__(self, at_keyword, at_rules, family_names, line, column):
76 | assert at_keyword == '@font-feature-values'
77 | self.at_keyword = at_keyword
78 | self.family_names = family_names
79 | self.at_rules = at_rules
80 | self.line = line
81 | self.column = column
82 |
83 |
84 | class FontFeatureRule(object):
85 | """A parsed at-rule for font features.
86 |
87 | .. attribute:: at_keyword
88 |
89 | One of the 16 following strings:
90 |
91 | * ``@stylistic``
92 | * ``@styleset``
93 | * ``@character-variant``
94 | * ``@swash``
95 | * ``@ornaments``
96 | * ``@annotation``
97 |
98 | .. attribute:: declarations
99 |
100 | A list of :class:`~.css21.Declaration` objects.
101 |
102 | .. attribute:: line
103 |
104 | Source line where this was read.
105 |
106 | .. attribute:: column
107 |
108 | Source column where this was read.
109 |
110 | """
111 |
112 | def __init__(self, at_keyword, declarations, line, column):
113 | self.at_keyword = at_keyword
114 | self.declarations = declarations
115 | self.line = line
116 | self.column = column
117 |
118 |
119 | class CSSFonts3Parser(CSS21Parser):
120 | """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Fonts`_ syntax.
121 |
122 | .. _CSS 3 Fonts: https://www.w3.org/TR/css-fonts-3/
123 |
124 | """
125 |
126 | FONT_FEATURE_VALUES_AT_KEYWORDS = [
127 | '@stylistic',
128 | '@styleset',
129 | '@character-variant',
130 | '@swash',
131 | '@ornaments',
132 | '@annotation',
133 | ]
134 |
135 | def parse_at_rule(self, rule, previous_rules, errors, context):
136 | if rule.at_keyword == '@font-face':
137 | if rule.head:
138 | raise ParseError(
139 | rule.head[0],
140 | 'unexpected {0} token in {1} rule header'.format(
141 | rule.head[0].type, rule.at_keyword))
142 | declarations, body_errors = self.parse_declaration_list(rule.body)
143 | errors.extend(body_errors)
144 | return FontFaceRule(
145 | rule.at_keyword, declarations, rule.line, rule.column)
146 | elif rule.at_keyword == '@font-feature-values':
147 | family_names = tuple(
148 | self.parse_font_feature_values_family_names(rule.head))
149 | at_rules, body_errors = (
150 | self.parse_rules(rule.body or [], '@font-feature-values'))
151 | errors.extend(body_errors)
152 | return FontFeatureValuesRule(
153 | rule.at_keyword, at_rules, family_names,
154 | rule.line, rule.column)
155 | elif rule.at_keyword in self.FONT_FEATURE_VALUES_AT_KEYWORDS:
156 | if context != '@font-feature-values':
157 | raise ParseError(
158 | rule, '{0} rule not allowed in {1}'.format(
159 | rule.at_keyword, context))
160 | declarations, body_errors = self.parse_declaration_list(rule.body)
161 | errors.extend(body_errors)
162 | return FontFeatureRule(
163 | rule.at_keyword, declarations, rule.line, rule.column)
164 | return super(CSSFonts3Parser, self).parse_at_rule(
165 | rule, previous_rules, errors, context)
166 |
167 | def parse_font_feature_values_family_names(self, tokens):
168 | """Parse an @font-feature-values selector.
169 |
170 | :param tokens:
171 | An iterable of token, typically from the ``head`` attribute of
172 | an unparsed :class:`AtRule`.
173 | :returns:
174 | A generator of strings representing font families.
175 | :raises:
176 | :class:`~.parsing.ParseError` on invalid selectors
177 |
178 | """
179 | family = ''
180 | current_string = False
181 | for token in tokens:
182 | if token.type == 'DELIM' and token.value == ',' and family:
183 | yield family
184 | family = ''
185 | current_string = False
186 | elif token.type == 'STRING' and not family and (
187 | current_string is False):
188 | family = token.value
189 | current_string = True
190 | elif token.type == 'IDENT' and not current_string:
191 | if family:
192 | family += ' '
193 | family += token.value
194 | elif token.type != 'S':
195 | family = ''
196 | break
197 | if family:
198 | yield family
199 | else:
200 | raise ParseError(token, 'invalid @font-feature-values selector')
201 |
--------------------------------------------------------------------------------
/tinycss/page3.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.page3
4 | ------------------
5 |
6 | Support for CSS 3 Paged Media syntax:
7 | http://dev.w3.org/csswg/css3-page/
8 |
9 | Adds support for named page selectors and margin rules.
10 |
11 | :copyright: (c) 2012 by Simon Sapin.
12 | :license: BSD, see LICENSE for more details.
13 | """
14 |
15 | from __future__ import division, unicode_literals
16 |
17 | from .css21 import CSS21Parser, ParseError
18 |
19 |
20 | class MarginRule(object):
21 | """A parsed at-rule for margin box.
22 |
23 | .. attribute:: at_keyword
24 |
25 | One of the 16 following strings:
26 |
27 | * ``@top-left-corner``
28 | * ``@top-left``
29 | * ``@top-center``
30 | * ``@top-right``
31 | * ``@top-right-corner``
32 | * ``@bottom-left-corner``
33 | * ``@bottom-left``
34 | * ``@bottom-center``
35 | * ``@bottom-right``
36 | * ``@bottom-right-corner``
37 | * ``@left-top``
38 | * ``@left-middle``
39 | * ``@left-bottom``
40 | * ``@right-top``
41 | * ``@right-middle``
42 | * ``@right-bottom``
43 |
44 | .. attribute:: declarations
45 |
46 | A list of :class:`~.css21.Declaration` objects.
47 |
48 | .. attribute:: line
49 |
50 | Source line where this was read.
51 |
52 | .. attribute:: column
53 |
54 | Source column where this was read.
55 |
56 | """
57 |
58 | def __init__(self, at_keyword, declarations, line, column):
59 | self.at_keyword = at_keyword
60 | self.declarations = declarations
61 | self.line = line
62 | self.column = column
63 |
64 |
65 | class CSSPage3Parser(CSS21Parser):
66 | """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Paged Media`_ syntax.
67 |
68 | .. _CSS 3 Paged Media: http://dev.w3.org/csswg/css3-page/
69 |
70 | Compared to CSS 2.1, the ``at_rules`` and ``selector`` attributes of
71 | :class:`~.css21.PageRule` objects are modified:
72 |
73 | * ``at_rules`` is not always empty, it is a list of :class:`MarginRule`
74 | objects.
75 |
76 | * ``selector``, instead of a single string, is a tuple of the page name
77 | and the pseudo class. Each of these may be a ``None`` or a string.
78 |
79 | +--------------------------+------------------------+
80 | | CSS | Parsed selectors |
81 | +==========================+========================+
82 | | .. code-block:: css | .. code-block:: python |
83 | | | |
84 | | @page {} | (None, None) |
85 | | @page :first {} | (None, 'first') |
86 | | @page chapter {} | ('chapter', None) |
87 | | @page table:right {} | ('table', 'right') |
88 | +--------------------------+------------------------+
89 |
90 | """
91 |
92 | PAGE_MARGIN_AT_KEYWORDS = [
93 | '@top-left-corner',
94 | '@top-left',
95 | '@top-center',
96 | '@top-right',
97 | '@top-right-corner',
98 | '@bottom-left-corner',
99 | '@bottom-left',
100 | '@bottom-center',
101 | '@bottom-right',
102 | '@bottom-right-corner',
103 | '@left-top',
104 | '@left-middle',
105 | '@left-bottom',
106 | '@right-top',
107 | '@right-middle',
108 | '@right-bottom',
109 | ]
110 |
111 | def parse_at_rule(self, rule, previous_rules, errors, context):
112 | if rule.at_keyword in self.PAGE_MARGIN_AT_KEYWORDS:
113 | if context != '@page':
114 | raise ParseError(
115 | rule, '{0} rule not allowed in {1}'.format(
116 | rule.at_keyword, context))
117 | if rule.head:
118 | raise ParseError(
119 | rule.head[0],
120 | 'unexpected {0} token in {1} rule header'.format(
121 | rule.head[0].type, rule.at_keyword))
122 | declarations, body_errors = self.parse_declaration_list(rule.body)
123 | errors.extend(body_errors)
124 | return MarginRule(
125 | rule.at_keyword, declarations, rule.line, rule.column)
126 | return super(CSSPage3Parser, self).parse_at_rule(
127 | rule, previous_rules, errors, context)
128 |
129 | def parse_page_selector(self, head):
130 | """Parse an @page selector.
131 |
132 | :param head:
133 | The ``head`` attribute of an unparsed :class:`AtRule`.
134 | :returns:
135 | A page selector. For CSS 2.1, this is 'first', 'left', 'right'
136 | or None. 'blank' is added by GCPM.
137 | :raises:
138 | :class`~parsing.ParseError` on invalid selectors
139 |
140 | """
141 | if not head:
142 | return (None, None), (0, 0, 0)
143 | if head[0].type == 'IDENT':
144 | name = head.pop(0).value
145 | while head and head[0].type == 'S':
146 | head.pop(0)
147 | if not head:
148 | return (name, None), (1, 0, 0)
149 | name_specificity = (1,)
150 | else:
151 | name = None
152 | name_specificity = (0,)
153 | if (len(head) == 2 and head[0].type == ':' and
154 | head[1].type == 'IDENT'):
155 | pseudo_class = head[1].value
156 | specificity = {
157 | 'first': (1, 0), 'blank': (1, 0),
158 | 'left': (0, 1), 'right': (0, 1),
159 | }.get(pseudo_class)
160 | if specificity:
161 | return (name, pseudo_class), (name_specificity + specificity)
162 | raise ParseError(head[0], 'invalid @page selector')
163 |
--------------------------------------------------------------------------------
/tinycss/parsing.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.parsing
4 | ---------------
5 |
6 | Utilities for parsing lists of tokens.
7 |
8 | :copyright: (c) 2012 by Simon Sapin.
9 | :license: BSD, see LICENSE for more details.
10 | """
11 |
12 | from __future__ import unicode_literals
13 |
14 |
15 | # TODO: unit tests
16 |
17 | def split_on_comma(tokens):
18 | """Split a list of tokens on commas, ie ``,`` DELIM tokens.
19 |
20 | Only "top-level" comma tokens are splitting points, not commas inside a
21 | function or other :class:`ContainerToken`.
22 |
23 | :param tokens:
24 | An iterable of :class:`~.token_data.Token` or
25 | :class:`~.token_data.ContainerToken`.
26 | :returns:
27 | A list of lists of tokens
28 |
29 | """
30 | parts = []
31 | this_part = []
32 | for token in tokens:
33 | if token.type == 'DELIM' and token.value == ',':
34 | parts.append(this_part)
35 | this_part = []
36 | else:
37 | this_part.append(token)
38 | parts.append(this_part)
39 | return parts
40 |
41 |
42 | def strip_whitespace(tokens):
43 | """Remove whitespace at the beggining and end of a token list.
44 |
45 | Whitespace tokens in-between other tokens in the list are preserved.
46 |
47 | :param tokens:
48 | A list of :class:`~.token_data.Token` or
49 | :class:`~.token_data.ContainerToken`.
50 | :return:
51 | A new sub-sequence of the list.
52 |
53 | """
54 | for i, token in enumerate(tokens):
55 | if token.type != 'S':
56 | break
57 | else:
58 | return [] # only whitespace
59 | tokens = tokens[i:]
60 | while tokens and tokens[-1].type == 'S':
61 | tokens.pop()
62 | return tokens
63 |
64 |
65 | def remove_whitespace(tokens):
66 | """Remove any top-level whitespace in a token list.
67 |
68 | Whitespace tokens inside recursive :class:`~.token_data.ContainerToken`
69 | are preserved.
70 |
71 | :param tokens:
72 | A list of :class:`~.token_data.Token` or
73 | :class:`~.token_data.ContainerToken`.
74 | :return:
75 | A new sub-sequence of the list.
76 |
77 | """
78 | return [token for token in tokens if token.type != 'S']
79 |
80 |
81 | def validate_value(tokens):
82 | """Validate a property value.
83 |
84 | :param tokens:
85 | an iterable of tokens
86 | :raises:
87 | :class:`ParseError` if there is any invalid token for the 'value'
88 | production of the core grammar.
89 |
90 | """
91 | for token in tokens:
92 | type_ = token.type
93 | if type_ == '{':
94 | validate_block(token.content, 'property value')
95 | else:
96 | validate_any(token, 'property value')
97 |
98 |
99 | def validate_block(tokens, context):
100 | """
101 | :raises:
102 | :class:`ParseError` if there is any invalid token for the 'block'
103 | production of the core grammar.
104 | :param tokens: an iterable of tokens
105 | :param context: a string for the 'unexpected in ...' message
106 |
107 | """
108 | for token in tokens:
109 | type_ = token.type
110 | if type_ == '{':
111 | validate_block(token.content, context)
112 | elif type_ not in (';', 'ATKEYWORD'):
113 | validate_any(token, context)
114 |
115 |
116 | def validate_any(token, context):
117 | """
118 | :raises:
119 | :class:`ParseError` if this is an invalid token for the
120 | 'any' production of the core grammar.
121 | :param token: a single token
122 | :param context: a string for the 'unexpected in ...' message
123 |
124 | """
125 | type_ = token.type
126 | if type_ in ('FUNCTION', '(', '['):
127 | for token in token.content:
128 | validate_any(token, type_)
129 | elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER',
130 | 'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':',
131 | 'UNICODE-RANGE'):
132 | if type_ in ('}', ')', ']'):
133 | adjective = 'unmatched'
134 | else:
135 | adjective = 'unexpected'
136 | raise ParseError(
137 | token, '{0} {1} token in {2}'.format(adjective, type_, context))
138 |
139 |
140 | class ParseError(ValueError):
141 | """Details about a CSS syntax error. Usually indicates that something
142 | (a rule or a declaration) was ignored and will not appear as a parsed
143 | object.
144 |
145 | This exception is typically logged in a list rather than being propagated
146 | to the user API.
147 |
148 | .. attribute:: line
149 |
150 | Source line where the error occured.
151 |
152 | .. attribute:: column
153 |
154 | Column in the source line where the error occured.
155 |
156 | .. attribute:: reason
157 |
158 | What happend (a string).
159 |
160 | """
161 | def __init__(self, subject, reason):
162 | self.line = subject.line
163 | self.column = subject.column
164 | self.reason = reason
165 | super(ParseError, self).__init__(
166 | 'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
167 |
--------------------------------------------------------------------------------
/tinycss/speedups.pyx:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.speedups
4 | ----------------
5 |
6 | Cython module for speeding up inner loops.
7 |
8 | Right now only :func:`tokenize_flat` has a second implementation.
9 |
10 | :copyright: (c) 2010 by Simon Sapin.
11 | :license: BSD, see LICENSE for more details.
12 | """
13 |
14 | from __future__ import unicode_literals
15 |
16 | from .token_data import (
17 | COMPILED_TOKEN_REGEXPS, UNICODE_UNESCAPE, NEWLINE_UNESCAPE,
18 | SIMPLE_UNESCAPE, FIND_NEWLINES, TOKEN_DISPATCH)
19 |
20 |
21 | COMPILED_TOKEN_INDEXES = dict(
22 | (name, i) for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS))
23 |
24 |
25 | cdef class CToken:
26 | """A token built by the Cython speedups. Identical to
27 | :class:`~.token_data.Token`.
28 |
29 | """
30 | is_container = False
31 |
32 | cdef public object type, _as_css, value, unit
33 | cdef public Py_ssize_t line, column
34 |
35 | def __init__(self, type_, css_value, value, unit, line, column):
36 | self.type = type_
37 | self._as_css = css_value
38 | self.value = value
39 | self.unit = unit
40 | self.line = line
41 | self.column = column
42 |
43 | def as_css(self):
44 | """
45 | Return as an Unicode string the CSS representation of the token,
46 | as parsed in the source.
47 | """
48 | return self._as_css
49 |
50 | def __repr__(self):
51 | return (''
52 | .format(self, self.unit or ''))
53 |
54 |
55 | def tokenize_flat(css_source, int ignore_comments=1):
56 | """
57 | :param css_source:
58 | CSS as an unicode string
59 | :param ignore_comments:
60 | if true (the default) comments will not be included in the
61 | return value
62 | :return:
63 | An iterator of :class:`Token`
64 |
65 | """
66 | # Make these local variable to avoid global lookups in the loop
67 | tokens_dispatch = TOKEN_DISPATCH
68 | compiled_token_indexes = COMPILED_TOKEN_INDEXES
69 | compiled_tokens = COMPILED_TOKEN_REGEXPS
70 | unicode_unescape = UNICODE_UNESCAPE
71 | newline_unescape = NEWLINE_UNESCAPE
72 | simple_unescape = SIMPLE_UNESCAPE
73 | find_newlines = FIND_NEWLINES
74 |
75 | # Use the integer indexes instead of string markers
76 | cdef Py_ssize_t BAD_COMMENT = compiled_token_indexes['BAD_COMMENT']
77 | cdef Py_ssize_t BAD_STRING = compiled_token_indexes['BAD_STRING']
78 | cdef Py_ssize_t PERCENTAGE = compiled_token_indexes['PERCENTAGE']
79 | cdef Py_ssize_t DIMENSION = compiled_token_indexes['DIMENSION']
80 | cdef Py_ssize_t ATKEYWORD = compiled_token_indexes['ATKEYWORD']
81 | cdef Py_ssize_t FUNCTION = compiled_token_indexes['FUNCTION']
82 | cdef Py_ssize_t COMMENT = compiled_token_indexes['COMMENT']
83 | cdef Py_ssize_t NUMBER = compiled_token_indexes['NUMBER']
84 | cdef Py_ssize_t STRING = compiled_token_indexes['STRING']
85 | cdef Py_ssize_t IDENT = compiled_token_indexes['IDENT']
86 | cdef Py_ssize_t HASH = compiled_token_indexes['HASH']
87 | cdef Py_ssize_t URI = compiled_token_indexes['URI']
88 | cdef Py_ssize_t DELIM = -1
89 |
90 | cdef Py_ssize_t pos = 0
91 | cdef Py_ssize_t line = 1
92 | cdef Py_ssize_t column = 1
93 | cdef Py_ssize_t source_len = len(css_source)
94 | cdef Py_ssize_t n_tokens = len(compiled_tokens)
95 | cdef Py_ssize_t length, next_pos, type_
96 | cdef CToken token
97 |
98 | tokens = []
99 | while pos < source_len:
100 | char = css_source[pos]
101 | if char in ':;{}()[]':
102 | type_ = -1 # not parsed further anyway
103 | type_name = char
104 | css_value = char
105 | else:
106 | codepoint = min(ord(char), 160)
107 | for type_, type_name, regexp in tokens_dispatch[codepoint]:
108 | match = regexp(css_source, pos)
109 | if match:
110 | # First match is the longest. See comments on TOKENS above.
111 | css_value = match.group()
112 | break
113 | else:
114 | # No match.
115 | # "Any other character not matched by the above rules,
116 | # and neither a single nor a double quote."
117 | # ... but quotes at the start of a token are always matched
118 | # by STRING or BAD_STRING. So DELIM is any single character.
119 | type_ = DELIM
120 | type_name = 'DELIM'
121 | css_value = char
122 | length = len(css_value)
123 | next_pos = pos + length
124 |
125 | # A BAD_COMMENT is a comment at EOF. Ignore it too.
126 | if not (ignore_comments and type_ in (COMMENT, BAD_COMMENT)):
127 | # Parse numbers, extract strings and URIs, unescape
128 | unit = None
129 | if type_ == DIMENSION:
130 | value = match.group(1)
131 | value = float(value) if '.' in value else int(value)
132 | unit = match.group(2)
133 | unit = simple_unescape(unit)
134 | unit = unicode_unescape(unit)
135 | unit = unit.lower() # normalize
136 | elif type_ == PERCENTAGE:
137 | value = css_value[:-1]
138 | value = float(value) if '.' in value else int(value)
139 | unit = '%'
140 | elif type_ == NUMBER:
141 | value = css_value
142 | if '.' in value:
143 | value = float(value)
144 | else:
145 | value = int(value)
146 | type_name = 'INTEGER'
147 | elif type_ in (IDENT, ATKEYWORD, HASH, FUNCTION):
148 | value = simple_unescape(css_value)
149 | value = unicode_unescape(value)
150 | elif type_ == URI:
151 | value = match.group(1)
152 | if value and value[0] in '"\'':
153 | value = value[1:-1] # Remove quotes
154 | value = newline_unescape(value)
155 | value = simple_unescape(value)
156 | value = unicode_unescape(value)
157 | elif type_ == STRING:
158 | value = css_value[1:-1] # Remove quotes
159 | value = newline_unescape(value)
160 | value = simple_unescape(value)
161 | value = unicode_unescape(value)
162 | # BAD_STRING can only be one of:
163 | # * Unclosed string at the end of the stylesheet:
164 | # Close the string, but this is not an error.
165 | # Make it a "good" STRING token.
166 | # * Unclosed string at the (unescaped) end of the line:
167 | # Close the string, but this is an error.
168 | # Leave it as a BAD_STRING, don’t bother parsing it.
169 | # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
170 | elif type_ == BAD_STRING and next_pos == source_len:
171 | type_name = 'STRING'
172 | value = css_value[1:] # Remove quote
173 | value = newline_unescape(value)
174 | value = simple_unescape(value)
175 | value = unicode_unescape(value)
176 | else:
177 | value = css_value
178 | token = CToken(type_name, css_value, value, unit, line, column)
179 | tokens.append(token)
180 |
181 | pos = next_pos
182 | newlines = list(find_newlines(css_value))
183 | if newlines:
184 | line += len(newlines)
185 | # Add 1 to have lines start at column 1, not 0
186 | column = length - newlines[-1].end() + 1
187 | else:
188 | column += length
189 | return tokens
190 |
--------------------------------------------------------------------------------
/tinycss/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Test suite for tinycss
4 | ----------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import sys
14 |
15 |
16 | # Awful workaround to fix isort's "sys.setdefaultencoding('utf-8')".
17 | if sys.version_info[0] == 2:
18 | reload(sys) # noqa
19 | sys.setdefaultencoding('ascii')
20 |
21 |
22 | def assert_errors(errors, expected_errors):
23 | """Test not complete error messages but only substrings."""
24 | assert len(errors) == len(expected_errors)
25 | for error, expected in zip(errors, expected_errors):
26 | assert expected in str(error)
27 |
--------------------------------------------------------------------------------
/tinycss/tests/speed.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Speed tests
4 | -----------
5 |
6 | Note: this file is not named test_*.py as it is not part of the
7 | test suite ran by pytest.
8 |
9 | :copyright: (c) 2012 by Simon Sapin.
10 | :license: BSD, see LICENSE for more details.
11 | """
12 |
13 |
14 | from __future__ import division, unicode_literals
15 |
16 | import contextlib
17 | import functools
18 | import os.path
19 | import sys
20 | import timeit
21 |
22 | from cssutils import parseString
23 |
24 | from .. import tokenizer
25 | from ..css21 import CSS21Parser
26 | from ..parsing import remove_whitespace
27 |
28 | CSS_REPEAT = 4
29 | TIMEIT_REPEAT = 3
30 | TIMEIT_NUMBER = 20
31 |
32 |
33 | def load_css():
34 | filename = os.path.join(os.path.dirname(__file__),
35 | '..', '..', 'docs', '_static', 'custom.css')
36 | with open(filename, 'rb') as fd:
37 | return b'\n'.join([fd.read()] * CSS_REPEAT)
38 |
39 |
40 | # Pre-load so that I/O is not measured
41 | CSS = load_css()
42 |
43 |
44 | @contextlib.contextmanager
45 | def install_tokenizer(name):
46 | original = tokenizer.tokenize_flat
47 | try:
48 | tokenizer.tokenize_flat = getattr(tokenizer, name)
49 | yield
50 | finally:
51 | tokenizer.tokenize_flat = original
52 |
53 |
54 | def parse(tokenizer_name):
55 | with install_tokenizer(tokenizer_name):
56 | stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS)
57 | result = []
58 | for rule in stylesheet.rules:
59 | selector = rule.selector.as_css()
60 | declarations = [
61 | (declaration.name, len(list(remove_whitespace(declaration.value))))
62 | for declaration in rule.declarations]
63 | result.append((selector, declarations))
64 | return result
65 |
66 |
67 | parse_cython = functools.partial(parse, 'cython_tokenize_flat')
68 | parse_python = functools.partial(parse, 'python_tokenize_flat')
69 |
70 |
71 | def parse_cssutils():
72 | stylesheet = parseString(CSS)
73 | result = []
74 | for rule in stylesheet.cssRules:
75 | selector = rule.selectorText
76 | declarations = [
77 | (declaration.name, len(list(declaration.propertyValue)))
78 | for declaration in rule.style.getProperties(all=True)]
79 | result.append((selector, declarations))
80 | return result
81 |
82 |
83 | def check_consistency():
84 | result = parse_python()
85 | assert len(result) > 0
86 | if tokenizer.cython_tokenize_flat:
87 | assert parse_cython() == result
88 | assert parse_cssutils() == result
89 | version = '.'.join(map(str, sys.version_info[:3]))
90 | print('Python {}, consistency OK.'.format(version))
91 |
92 |
93 | def warm_up():
94 | is_pypy = hasattr(sys, 'pypy_translation_info')
95 | if is_pypy:
96 | print('Warming up for PyPy...')
97 | for i in range(80):
98 | for i in range(10):
99 | parse_python()
100 | parse_cssutils()
101 | sys.stdout.write('.')
102 | sys.stdout.flush()
103 | sys.stdout.write('\n')
104 |
105 |
106 | def time(function):
107 | seconds = timeit.Timer(function).repeat(TIMEIT_REPEAT, TIMEIT_NUMBER)
108 | miliseconds = int(min(seconds) * 1000)
109 | return miliseconds
110 |
111 |
112 | def run():
113 | if tokenizer.cython_tokenize_flat:
114 | data_set = [
115 | ('tinycss + speedups ', parse_cython),
116 | ]
117 | else:
118 | print('Speedups are NOT available.')
119 | data_set = []
120 | data_set += [
121 | ('tinycss WITHOUT speedups', parse_python),
122 | ('cssutils ', parse_cssutils),
123 | ]
124 | label, function = data_set.pop(0)
125 | ref = time(function)
126 | print('{} {} ms'.format(label, ref))
127 | for label, function in data_set:
128 | result = time(function)
129 | print('{} {} ms {:.2f}x'.format(label, result, result / ref))
130 |
131 |
132 | if __name__ == '__main__':
133 | check_consistency()
134 | warm_up()
135 | run()
136 |
--------------------------------------------------------------------------------
/tinycss/tests/test_api.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for the public API
4 | ------------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | from pytest import raises
14 | from tinycss import make_parser
15 | from tinycss.page3 import CSSPage3Parser
16 |
17 |
18 | def test_make_parser():
19 | class MyParser(object):
20 | def __init__(self, some_config):
21 | self.some_config = some_config
22 |
23 | parsers = [
24 | make_parser(),
25 | make_parser('page3'),
26 | make_parser(CSSPage3Parser),
27 | make_parser(MyParser, some_config=42),
28 | make_parser(CSSPage3Parser, MyParser, some_config=42),
29 | make_parser(MyParser, 'page3', some_config=42),
30 | ]
31 |
32 | for parser, exp in zip(parsers, [False, True, True, False, True, True]):
33 | assert isinstance(parser, CSSPage3Parser) == exp
34 |
35 | for parser, exp in zip(parsers, [False, False, False, True, True, True]):
36 | assert isinstance(parser, MyParser) == exp
37 |
38 | for parser in parsers[3:]:
39 | assert parser.some_config == 42
40 |
41 | # Extra or missing named parameters
42 | raises(TypeError, make_parser, some_config=4)
43 | raises(TypeError, make_parser, 'page3', some_config=4)
44 | raises(TypeError, make_parser, MyParser)
45 | raises(TypeError, make_parser, MyParser, some_config=4, other_config=7)
46 |
--------------------------------------------------------------------------------
/tinycss/tests/test_color3.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for the CSS 3 color parser
4 | --------------------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import pytest
14 | from tinycss.color3 import hsl_to_rgb, parse_color_string
15 |
16 |
17 | @pytest.mark.parametrize(('css_source', 'expected_result'), [
18 | ('', None),
19 | (' /* hey */\n', None),
20 | ('4', None),
21 | ('top', None),
22 | ('/**/transparent', (0, 0, 0, 0)),
23 | ('transparent', (0, 0, 0, 0)),
24 | (' transparent\n', (0, 0, 0, 0)),
25 | ('TransParent', (0, 0, 0, 0)),
26 | ('currentColor', 'currentColor'),
27 | ('CURRENTcolor', 'currentColor'),
28 | ('current_Color', None),
29 |
30 | ('black', (0, 0, 0, 1)),
31 | ('white', (1, 1, 1, 1)),
32 | ('fuchsia', (1, 0, 1, 1)),
33 | ('cyan', (0, 1, 1, 1)),
34 | ('CyAn', (0, 1, 1, 1)),
35 | ('darkkhaki', (189 / 255., 183 / 255., 107 / 255., 1)),
36 |
37 | ('#', None),
38 | ('#f', None),
39 | ('#ff', None),
40 | ('#fff', (1, 1, 1, 1)),
41 | ('#ffg', None),
42 | ('#ffff', None),
43 | ('#fffff', None),
44 | ('#ffffff', (1, 1, 1, 1)),
45 | ('#fffffg', None),
46 | ('#fffffff', None),
47 | ('#ffffffff', None),
48 | ('#fffffffff', None),
49 |
50 | ('#cba987', (203 / 255., 169 / 255., 135 / 255., 1)),
51 | ('#CbA987', (203 / 255., 169 / 255., 135 / 255., 1)),
52 | ('#1122aA', (17 / 255., 34 / 255., 170 / 255., 1)),
53 | ('#12a', (17 / 255., 34 / 255., 170 / 255., 1)),
54 |
55 | ('rgb(203, 169, 135)', (203 / 255., 169 / 255., 135 / 255., 1)),
56 | ('RGB(255, 255, 255)', (1, 1, 1, 1)),
57 | ('rgB(0, 0, 0)', (0, 0, 0, 1)),
58 | ('rgB(0, 51, 255)', (0, .2, 1, 1)),
59 | ('rgb(0,51,255)', (0, .2, 1, 1)),
60 | ('rgb(0\t, 51 ,255)', (0, .2, 1, 1)),
61 | ('rgb(/* R */0, /* G */51, /* B */255)', (0, .2, 1, 1)),
62 | ('rgb(-51, 306, 0)', (-.2, 1.2, 0, 1)), # out of 0..1 is allowed
63 |
64 | ('rgb(42%, 3%, 50%)', (.42, .03, .5, 1)),
65 | ('RGB(100%, 100%, 100%)', (1, 1, 1, 1)),
66 | ('rgB(0%, 0%, 0%)', (0, 0, 0, 1)),
67 | ('rgB(10%, 20%, 30%)', (.1, .2, .3, 1)),
68 | ('rgb(10%,20%,30%)', (.1, .2, .3, 1)),
69 | ('rgb(10%\t, 20% ,30%)', (.1, .2, .3, 1)),
70 | ('rgb(/* R */10%, /* G */20%, /* B */30%)', (.1, .2, .3, 1)),
71 | ('rgb(-12%, 110%, 1400%)', (-.12, 1.1, 14, 1)), # out of 0..1 is allowed
72 |
73 | ('rgb(10%, 50%, 0)', None),
74 | ('rgb(255, 50%, 0%)', None),
75 | ('rgb(0, 0 0)', None),
76 | ('rgb(0, 0, 0deg)', None),
77 | ('rgb(0, 0, light)', None),
78 | ('rgb()', None),
79 | ('rgb(0)', None),
80 | ('rgb(0, 0)', None),
81 | ('rgb(0, 0, 0, 0)', None),
82 | ('rgb(0%)', None),
83 | ('rgb(0%, 0%)', None),
84 | ('rgb(0%, 0%, 0%, 0%)', None),
85 | ('rgb(0%, 0%, 0%, 0)', None),
86 |
87 | ('rgba(0, 0, 0, 0)', (0, 0, 0, 0)),
88 | ('rgba(203, 169, 135, 0.3)', (203 / 255., 169 / 255., 135 / 255., 0.3)),
89 | ('RGBA(255, 255, 255, 0)', (1, 1, 1, 0)),
90 | ('rgBA(0, 51, 255, 1)', (0, 0.2, 1, 1)),
91 | ('rgba(0, 51, 255, 1.1)', (0, 0.2, 1, 1)),
92 | ('rgba(0, 51, 255, 37)', (0, 0.2, 1, 1)),
93 | ('rgba(0, 51, 255, 0.42)', (0, 0.2, 1, 0.42)),
94 | ('rgba(0, 51, 255, 0)', (0, 0.2, 1, 0)),
95 | ('rgba(0, 51, 255, -0.1)', (0, 0.2, 1, 0)),
96 | ('rgba(0, 51, 255, -139)', (0, 0.2, 1, 0)),
97 |
98 | ('rgba(42%, 3%, 50%, 0.3)', (.42, .03, .5, 0.3)),
99 | ('RGBA(100%, 100%, 100%, 0)', (1, 1, 1, 0)),
100 | ('rgBA(0%, 20%, 100%, 1)', (0, 0.2, 1, 1)),
101 | ('rgba(0%, 20%, 100%, 1.1)', (0, 0.2, 1, 1)),
102 | ('rgba(0%, 20%, 100%, 37)', (0, 0.2, 1, 1)),
103 | ('rgba(0%, 20%, 100%, 0.42)', (0, 0.2, 1, 0.42)),
104 | ('rgba(0%, 20%, 100%, 0)', (0, 0.2, 1, 0)),
105 | ('rgba(0%, 20%, 100%, -0.1)', (0, 0.2, 1, 0)),
106 | ('rgba(0%, 20%, 100%, -139)', (0, 0.2, 1, 0)),
107 |
108 | ('rgba(255, 255, 255, 0%)', None),
109 | ('rgba(10%, 50%, 0, 1)', None),
110 | ('rgba(255, 50%, 0%, 1)', None),
111 | ('rgba(0, 0, 0 0)', None),
112 | ('rgba(0, 0, 0, 0deg)', None),
113 | ('rgba(0, 0, 0, light)', None),
114 | ('rgba()', None),
115 | ('rgba(0)', None),
116 | ('rgba(0, 0, 0)', None),
117 | ('rgba(0, 0, 0, 0, 0)', None),
118 | ('rgba(0%)', None),
119 | ('rgba(0%, 0%)', None),
120 | ('rgba(0%, 0%, 0%)', None),
121 | ('rgba(0%, 0%, 0%, 0%)', None),
122 | ('rgba(0%, 0%, 0%, 0%, 0%)', None),
123 |
124 | ('HSL(0, 0%, 0%)', (0, 0, 0, 1)),
125 | ('hsL(0, 100%, 50%)', (1, 0, 0, 1)),
126 | ('hsl(60, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
127 | ('hsl(780, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
128 | ('hsl(-300, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
129 | ('hsl(300, 50%, 50%)', (0.75, 0.25, 0.75, 1)),
130 |
131 | ('hsl(10, 50%, 0)', None),
132 | ('hsl(50%, 50%, 0%)', None),
133 | ('hsl(0, 0% 0%)', None),
134 | ('hsl(30deg, 100%, 100%)', None),
135 | ('hsl(0, 0%, light)', None),
136 | ('hsl()', None),
137 | ('hsl(0)', None),
138 | ('hsl(0, 0%)', None),
139 | ('hsl(0, 0%, 0%, 0%)', None),
140 |
141 | ('HSLA(-300, 100%, 37.5%, 1)', (0.75, 0.75, 0, 1)),
142 | ('hsLA(-300, 100%, 37.5%, 12)', (0.75, 0.75, 0, 1)),
143 | ('hsla(-300, 100%, 37.5%, 0.2)', (0.75, 0.75, 0, .2)),
144 | ('hsla(-300, 100%, 37.5%, 0)', (0.75, 0.75, 0, 0)),
145 | ('hsla(-300, 100%, 37.5%, -3)', (0.75, 0.75, 0, 0)),
146 |
147 | ('hsla(10, 50%, 0, 1)', None),
148 | ('hsla(50%, 50%, 0%, 1)', None),
149 | ('hsla(0, 0% 0%, 1)', None),
150 | ('hsla(30deg, 100%, 100%, 1)', None),
151 | ('hsla(0, 0%, light, 1)', None),
152 | ('hsla()', None),
153 | ('hsla(0)', None),
154 | ('hsla(0, 0%)', None),
155 | ('hsla(0, 0%, 0%, 50%)', None),
156 | ('hsla(0, 0%, 0%, 1, 0%)', None),
157 |
158 | ('cmyk(0, 0, 0, 0)', None),
159 | ])
160 | def test_color(css_source, expected_result):
161 | result = parse_color_string(css_source)
162 | if isinstance(result, tuple):
163 | for got, expected in zip(result, expected_result):
164 | # Compensate for floating point errors:
165 | assert abs(got - expected) < 1e-10
166 | for i, attr in enumerate(['red', 'green', 'blue', 'alpha']):
167 | assert getattr(result, attr) == result[i]
168 | else:
169 | assert result == expected_result
170 |
171 |
172 | @pytest.mark.parametrize(('hsl', 'expected_rgb'), [
173 | # http://en.wikipedia.org/wiki/HSL_and_HSV#Examples
174 | ((0, 0, 100 ), (1, 1, 1 )), # noqa
175 | ((127, 0, 100 ), (1, 1, 1 )), # noqa
176 | ((0, 0, 50 ), (0.5, 0.5, 0.5 )), # noqa
177 | ((127, 0, 50 ), (0.5, 0.5, 0.5 )), # noqa
178 | ((0, 0, 0 ), (0, 0, 0 )), # noqa
179 | ((127, 0, 0 ), (0, 0, 0 )), # noqa
180 | ((0, 100, 50 ), (1, 0, 0 )), # noqa
181 | ((60, 100, 37.5), (0.75, 0.75, 0 )), # noqa
182 | ((780, 100, 37.5), (0.75, 0.75, 0 )), # noqa
183 | ((-300, 100, 37.5), (0.75, 0.75, 0 )), # noqa
184 | ((120, 100, 25 ), (0, 0.5, 0 )), # noqa
185 | ((180, 100, 75 ), (0.5, 1, 1 )), # noqa
186 | ((240, 100, 75 ), (0.5, 0.5, 1 )), # noqa
187 | ((300, 50, 50 ), (0.75, 0.25, 0.75 )), # noqa
188 | ((61.8, 63.8, 39.3), (0.628, 0.643, 0.142)), # noqa
189 | ((251.1, 83.2, 51.1), (0.255, 0.104, 0.918)), # noqa
190 | ((134.9, 70.7, 39.6), (0.116, 0.675, 0.255)), # noqa
191 | ((49.5, 89.3, 49.7), (0.941, 0.785, 0.053)), # noqa
192 | ((283.7, 77.5, 54.2), (0.704, 0.187, 0.897)), # noqa
193 | ((14.3, 81.7, 62.4), (0.931, 0.463, 0.316)), # noqa
194 | ((56.9, 99.1, 76.5), (0.998, 0.974, 0.532)), # noqa
195 | ((162.4, 77.9, 44.7), (0.099, 0.795, 0.591)), # noqa
196 | ((248.3, 60.1, 37.3), (0.211, 0.149, 0.597)), # noqa
197 | ((240.5, 29, 60.7), (0.495, 0.493, 0.721)), # noqa
198 | ])
199 | def test_hsl(hsl, expected_rgb):
200 | for got, expected in zip(hsl_to_rgb(*hsl), expected_rgb):
201 | # Compensate for floating point errors and Wikipedia’s rounding:
202 | assert abs(got - expected) < 0.001
203 |
--------------------------------------------------------------------------------
/tinycss/tests/test_css21.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for the CSS 2.1 parser
4 | ----------------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import io
14 | import os
15 | import tempfile
16 |
17 | import pytest
18 | from tinycss.css21 import CSS21Parser
19 |
20 | from . import assert_errors
21 | from .test_tokenizer import jsonify
22 |
23 |
24 | def parse_bytes(css_bytes, kwargs):
25 | return CSS21Parser().parse_stylesheet_bytes(css_bytes, **kwargs)
26 |
27 |
28 | def parse_bytesio_file(css_bytes, kwargs):
29 | css_file = io.BytesIO(css_bytes)
30 | return CSS21Parser().parse_stylesheet_file(css_file, **kwargs)
31 |
32 |
33 | def parse_filename(css_bytes, kwargs):
34 | css_file = tempfile.NamedTemporaryFile(delete=False)
35 | try:
36 | css_file.write(css_bytes)
37 | # Windows can not open the filename a second time while
38 | # it is still open for writing.
39 | css_file.close()
40 | return CSS21Parser().parse_stylesheet_file(css_file.name, **kwargs)
41 | finally:
42 | os.remove(css_file.name)
43 |
44 |
45 | @pytest.mark.parametrize(('css_bytes', 'kwargs', 'expected_result', 'parse'), [
46 | params + (parse,)
47 | for parse in [parse_bytes, parse_bytesio_file, parse_filename]
48 | for params in [
49 | ('@import "é";'.encode('utf8'), {}, 'é'),
50 | ('@import "é";'.encode('utf16'), {}, 'é'), # with a BOM
51 | ('@import "é";'.encode('latin1'), {}, 'é'),
52 | ('@import "£";'.encode('Shift-JIS'), {}, '\x81\x92'), # lat1 mojibake
53 | ('@charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {}, '£'),
54 | (' @charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {},
55 | '\x81\x92'),
56 | ('@import "£";'.encode('Shift-JIS'),
57 | {'document_encoding': 'Shift-JIS'}, '£'),
58 | ('@import "£";'.encode('Shift-JIS'),
59 | {'document_encoding': 'utf8'}, '\x81\x92'),
60 | ('@charset "utf8"; @import "£";'.encode('utf8'),
61 | {'document_encoding': 'latin1'}, '£'),
62 | # Mojibake yay!
63 | (' @charset "utf8"; @import "é";'.encode('utf8'),
64 | {'document_encoding': 'latin1'}, 'é'),
65 | ('@import "é";'.encode('utf8'), {'document_encoding': 'latin1'}, 'é'),
66 | ]
67 | ])
68 | def test_bytes(css_bytes, kwargs, expected_result, parse):
69 | stylesheet = parse(css_bytes, kwargs)
70 | assert stylesheet.rules[0].at_keyword == '@import'
71 | assert stylesheet.rules[0].uri == expected_result
72 |
73 |
74 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
75 | (' /* hey */\n', 0, []),
76 | ('foo {}', 1, []),
77 | ('foo{} @lipsum{} bar{}', 2,
78 | ['unknown at-rule in stylesheet context: @lipsum']),
79 | ('@charset "ascii"; foo {}', 1, []),
80 | (' @charset "ascii"; foo {}', 1, [
81 | 'mis-placed or malformed @charset rule']),
82 | ('@charset ascii; foo {}', 1, ['mis-placed or malformed @charset rule']),
83 | ('foo {} @charset "ascii";', 1, ['mis-placed or malformed @charset rule']),
84 | ])
85 | def test_at_rules(css_source, expected_rules, expected_errors):
86 | # Pass 'encoding' to allow @charset
87 | stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
88 | assert_errors(stylesheet.errors, expected_errors)
89 | result = len(stylesheet.rules)
90 | assert result == expected_rules
91 |
92 |
93 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
94 | (' /* hey */\n', [], []),
95 |
96 | ('foo{} /* hey */\n@bar;@baz{}',
97 | [('foo', []), ('@bar', [], None), ('@baz', [], [])], []),
98 |
99 | ('@import "foo.css"/**/;', [
100 | ('@import', [('STRING', 'foo.css')], None)], []),
101 |
102 | ('@import "foo.css"/**/', [
103 | ('@import', [('STRING', 'foo.css')], None)], []),
104 |
105 | ('@import "foo.css', [
106 | ('@import', [('STRING', 'foo.css')], None)], []),
107 |
108 | ('{}', [], ['empty selector']),
109 |
110 | ('a{b:4}', [('a', [('b', [('INTEGER', 4)])])], []),
111 |
112 | ('@page {\t b: 4; @margin}', [('@page', [], [
113 | ('S', '\t '), ('IDENT', 'b'), (':', ':'), ('S', ' '), ('INTEGER', 4),
114 | (';', ';'), ('S', ' '), ('ATKEYWORD', '@margin'),
115 | ])], []),
116 |
117 | ('foo', [], ['no declaration block found']),
118 |
119 | ('foo @page {} bar {}', [('bar', [])],
120 | ['unexpected ATKEYWORD token in selector']),
121 |
122 | ('foo { content: "unclosed string;\n color:red; ; margin/**/\n: 2cm; }',
123 | [('foo', [('margin', [('DIMENSION', 2)])])],
124 | ['unexpected BAD_STRING token in property value']),
125 |
126 | ('foo { 4px; bar: 12% }',
127 | [('foo', [('bar', [('PERCENTAGE', 12)])])],
128 | ['expected a property name, got DIMENSION']),
129 |
130 | ('foo { bar! 3cm auto ; baz: 7px }',
131 | [('foo', [('baz', [('DIMENSION', 7)])])],
132 | ["expected ':', got DELIM"]),
133 |
134 | ('foo { bar ; baz: {("}"/* comment */) {0@fizz}} }',
135 | [('foo', [('baz', [('{', [
136 | ('(', [('STRING', '}')]), ('S', ' '),
137 | ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
138 | ])])])],
139 | ["expected ':'"]),
140 |
141 | ('foo { bar: ; baz: not(z) }',
142 | [('foo', [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])])],
143 | ['expected a property value']),
144 |
145 | ('foo { bar: (]) ; baz: U+20 }',
146 | [('foo', [('baz', [('UNICODE-RANGE', 'U+20')])])],
147 | ['unmatched ] token in (']),
148 | ])
149 | def test_core_parser(css_source, expected_rules, expected_errors):
150 | class CoreParser(CSS21Parser):
151 | """A parser that always accepts unparsed at-rules."""
152 | def parse_at_rule(self, rule, stylesheet_rules, errors, context):
153 | return rule
154 |
155 | stylesheet = CoreParser().parse_stylesheet(css_source)
156 | assert_errors(stylesheet.errors, expected_errors)
157 | result = [
158 | (rule.at_keyword, list(jsonify(rule.head)),
159 | list(jsonify(rule.body))
160 | if rule.body is not None else None)
161 | if rule.at_keyword else
162 | (rule.selector.as_css(), [
163 | (decl.name, list(jsonify(decl.value)))
164 | for decl in rule.declarations])
165 | for rule in stylesheet.rules
166 | ]
167 | assert result == expected_rules
168 |
169 |
170 | @pytest.mark.parametrize(('css_source', 'expected_declarations',
171 | 'expected_errors'), [
172 | (' /* hey */\n', [], []),
173 |
174 | ('b:4', [('b', [('INTEGER', 4)])], []),
175 |
176 | ('{b:4}', [], ['expected a property name, got {']),
177 |
178 | ('b:4} c:3', [], ['unmatched } token in property value']),
179 |
180 | (' 4px; bar: 12% ',
181 | [('bar', [('PERCENTAGE', 12)])],
182 | ['expected a property name, got DIMENSION']),
183 |
184 | ('bar! 3cm auto ; baz: 7px',
185 | [('baz', [('DIMENSION', 7)])],
186 | ["expected ':', got DELIM"]),
187 |
188 | ('foo; bar ; baz: {("}"/* comment */) {0@fizz}}',
189 | [('baz', [('{', [
190 | ('(', [('STRING', '}')]), ('S', ' '),
191 | ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
192 | ])])],
193 | ["expected ':'", "expected ':'"]),
194 |
195 | ('bar: ; baz: not(z)',
196 | [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])],
197 | ['expected a property value']),
198 |
199 | ('bar: (]) ; baz: U+20',
200 | [('baz', [('UNICODE-RANGE', 'U+20')])],
201 | ['unmatched ] token in (']),
202 | ])
203 | def test_parse_style_attr(css_source, expected_declarations, expected_errors):
204 | declarations, errors = CSS21Parser().parse_style_attr(css_source)
205 | assert_errors(errors, expected_errors)
206 | result = [(decl.name, list(jsonify(decl.value)))
207 | for decl in declarations]
208 | assert result == expected_declarations
209 |
210 |
211 | @pytest.mark.parametrize(('css_source', 'expected_declarations',
212 | 'expected_errors'), [
213 | (' /* hey */\n', [], []),
214 |
215 | ('a:1; b:2',
216 | [('a', [('INTEGER', 1)], None), ('b', [('INTEGER', 2)], None)], []),
217 |
218 | ('a:1 important; b: important',
219 | [('a', [('INTEGER', 1), ('S', ' '), ('IDENT', 'important')], None),
220 | ('b', [('IDENT', 'important')], None)],
221 | []),
222 |
223 | ('a:1 !important; b:2',
224 | [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
225 | []),
226 |
227 | ('a:1!\t Im\\50 O\\RTant; b:2',
228 | [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
229 | []),
230 |
231 | ('a: !important; b:2',
232 | [('b', [('INTEGER', 2)], None)],
233 | ['expected a value before !important']),
234 |
235 | ])
236 | def test_important(css_source, expected_declarations, expected_errors):
237 | declarations, errors = CSS21Parser().parse_style_attr(css_source)
238 | assert_errors(errors, expected_errors)
239 | result = [(decl.name, list(jsonify(decl.value)), decl.priority)
240 | for decl in declarations]
241 | assert result == expected_declarations
242 |
243 |
244 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
245 | (' /* hey */\n', [], []),
246 | ('@import "foo.css";', [('foo.css', ['all'])], []),
247 | ('@import url(foo.css);', [('foo.css', ['all'])], []),
248 | ('@import "foo.css" screen, print;',
249 | [('foo.css', ['screen', 'print'])], []),
250 | ('@charset "ascii"; @import "foo.css"; @import "bar.css";',
251 | [('foo.css', ['all']), ('bar.css', ['all'])], []),
252 | ('foo {} @import "foo.css";',
253 | [], ['@import rule not allowed after a ruleset']),
254 | ('@page {} @import "foo.css";',
255 | [], ['@import rule not allowed after an @page rule']),
256 | ('@import ;',
257 | [], ['expected URI or STRING for @import rule']),
258 | ('@import foo.css;',
259 | [], ['expected URI or STRING for @import rule, got IDENT']),
260 | ('@import "foo.css" {}',
261 | [], ["expected ';', got a block"]),
262 | ])
263 | def test_at_import(css_source, expected_rules, expected_errors):
264 | # Pass 'encoding' to allow @charset
265 | stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
266 | assert_errors(stylesheet.errors, expected_errors)
267 |
268 | result = [
269 | (rule.uri, rule.media)
270 | for rule in stylesheet.rules
271 | if rule.at_keyword == '@import'
272 | ]
273 | assert result == expected_rules
274 |
275 |
276 | @pytest.mark.parametrize(('css', 'expected_result', 'expected_errors'), [
277 | ('@page {}', (None, (0, 0), []), []),
278 | ('@page:first {}', ('first', (1, 0), []), []),
279 | ('@page :left{}', ('left', (0, 1), []), []),
280 | ('@page\t\n:right {}', ('right', (0, 1), []), []),
281 | ('@page :last {}', None, ['invalid @page selector']),
282 | ('@page : right {}', None, ['invalid @page selector']),
283 | ('@page table:left {}', None, ['invalid @page selector']),
284 |
285 | ('@page;', None, ['invalid @page rule: missing block']),
286 | ('@page { a:1; ; b: 2 }',
287 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
288 | []),
289 | ('@page { a:1; c: ; b: 2 }',
290 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
291 | ['expected a property value']),
292 | ('@page { a:1; @top-left {} b: 2 }',
293 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
294 | ['unknown at-rule in @page context: @top-left']),
295 | ('@page { a:1; @top-left {}; b: 2 }',
296 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
297 | ['unknown at-rule in @page context: @top-left']),
298 | ])
299 | def test_at_page(css, expected_result, expected_errors):
300 | stylesheet = CSS21Parser().parse_stylesheet(css)
301 | assert_errors(stylesheet.errors, expected_errors)
302 |
303 | if expected_result is None:
304 | assert not stylesheet.rules
305 | else:
306 | assert len(stylesheet.rules) == 1
307 | rule = stylesheet.rules[0]
308 | assert rule.at_keyword == '@page'
309 | assert rule.at_rules == [] # in CSS 2.1
310 | result = (
311 | rule.selector,
312 | rule.specificity,
313 | [(decl.name, list(jsonify(decl.value)))
314 | for decl in rule.declarations],
315 | )
316 | assert result == expected_result
317 |
318 |
319 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
320 | (' /* hey */\n', [], []),
321 | ('@media all {}', [(['all'], [])], []),
322 | ('@media screen, print {}', [(['screen', 'print'], [])], []),
323 | ('@media all;', [], ['invalid @media rule: missing block']),
324 | ('@media {}', [], ['expected media types for @media']),
325 | ('@media 4 {}', [], ['expected a media type, got INTEGER']),
326 | ('@media , screen {}', [], ['expected a media type']),
327 | ('@media screen, {}', [], ['expected a media type']),
328 | ('@media screen print {}', [],
329 | ['expected a media type, got IDENT, IDENT']),
330 |
331 | ('@media all { @page { a: 1 } @media; @import; foo { a: 1 } }',
332 | [(['all'], [('foo', [('a', [('INTEGER', 1)])])])],
333 | ['@page rule not allowed in @media',
334 | '@media rule not allowed in @media',
335 | '@import rule not allowed in @media']),
336 |
337 | ])
338 | def test_at_media(css_source, expected_rules, expected_errors):
339 | stylesheet = CSS21Parser().parse_stylesheet(css_source)
340 | assert_errors(stylesheet.errors, expected_errors)
341 |
342 | for rule in stylesheet.rules:
343 | assert rule.at_keyword == '@media'
344 | result = [
345 | (rule.media, [
346 | (sub_rule.selector.as_css(), [
347 | (decl.name, list(jsonify(decl.value)))
348 | for decl in sub_rule.declarations])
349 | for sub_rule in rule.rules
350 | ])
351 | for rule in stylesheet.rules
352 | ]
353 | assert result == expected_rules
354 |
--------------------------------------------------------------------------------
/tinycss/tests/test_decoding.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for decoding bytes to Unicode
4 | -----------------------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import pytest
14 | from tinycss.decoding import decode
15 |
16 |
17 | def params(css, encoding, use_bom=False, expect_error=False, **kwargs):
18 | """Nicer syntax to make a tuple."""
19 | return css, encoding, use_bom, expect_error, kwargs
20 |
21 |
22 | @pytest.mark.parametrize(('css', 'encoding', 'use_bom', 'expect_error',
23 | 'kwargs'), [
24 | params('', 'utf8'), # default to utf8
25 | params('𐂃', 'utf8'),
26 | params('é', 'latin1'), # utf8 fails, fall back on ShiftJIS
27 | params('£', 'ShiftJIS', expect_error=True),
28 | params('£', 'ShiftJIS', protocol_encoding='Shift-JIS'),
29 | params('£', 'ShiftJIS', linking_encoding='Shift-JIS'),
30 | params('£', 'ShiftJIS', document_encoding='Shift-JIS'),
31 | params('£', 'ShiftJIS', protocol_encoding='utf8',
32 | document_encoding='ShiftJIS'),
33 | params('@charset "utf8"; £', 'ShiftJIS', expect_error=True),
34 | params('@charset "utf£8"; £', 'ShiftJIS', expect_error=True),
35 | params('@charset "unknown-encoding"; £', 'ShiftJIS', expect_error=True),
36 | params('@charset "utf8"; £', 'ShiftJIS', document_encoding='ShiftJIS'),
37 | params('£', 'ShiftJIS', linking_encoding='utf8',
38 | document_encoding='ShiftJIS'),
39 | params('@charset "utf-32"; 𐂃', 'utf-32-be'),
40 | params('@charset "Shift-JIS"; £', 'ShiftJIS'),
41 | params('@charset "ISO-8859-8"; £', 'ShiftJIS', expect_error=True),
42 | params('𐂃', 'utf-16-le', expect_error=True), # no BOM
43 | params('𐂃', 'utf-16-le', use_bom=True),
44 | params('𐂃', 'utf-32-be', expect_error=True),
45 | params('𐂃', 'utf-32-be', use_bom=True),
46 | params('𐂃', 'utf-32-be', document_encoding='utf-32-be'),
47 | params('𐂃', 'utf-32-be', linking_encoding='utf-32-be'),
48 | params('@charset "utf-32-le"; 𐂃', 'utf-32-be',
49 | use_bom=True, expect_error=True),
50 | # protocol_encoding takes precedence over @charset
51 | params('@charset "ISO-8859-8"; £', 'ShiftJIS',
52 | protocol_encoding='Shift-JIS'),
53 | params('@charset "unknown-encoding"; £', 'ShiftJIS',
54 | protocol_encoding='Shift-JIS'),
55 | params('@charset "Shift-JIS"; £', 'ShiftJIS',
56 | protocol_encoding='utf8'),
57 | # @charset takes precedence over document_encoding
58 | params('@charset "Shift-JIS"; £', 'ShiftJIS',
59 | document_encoding='ISO-8859-8'),
60 | # @charset takes precedence over linking_encoding
61 | params('@charset "Shift-JIS"; £', 'ShiftJIS',
62 | linking_encoding='ISO-8859-8'),
63 | # linking_encoding takes precedence over document_encoding
64 | params('£', 'ShiftJIS',
65 | linking_encoding='Shift-JIS', document_encoding='ISO-8859-8'),
66 | ])
67 | def test_decode(css, encoding, use_bom, expect_error, kwargs):
68 | # Workaround PyPy and CPython 3.0 bug: https://bugs.pypy.org/issue1094
69 | css = css.encode('utf16').decode('utf16')
70 | if use_bom:
71 | source = '\ufeff' + css
72 | else:
73 | source = css
74 | css_bytes = source.encode(encoding)
75 | result, result_encoding = decode(css_bytes, **kwargs)
76 | if expect_error:
77 | assert result != css, 'Unexpected unicode success'
78 | else:
79 | assert result == css, 'Unexpected unicode error'
80 |
--------------------------------------------------------------------------------
/tinycss/tests/test_fonts3.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for the Fonts 3 parser
4 | ----------------------------
5 |
6 | :copyright: (c) 2016 by Kozea.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import pytest
14 | from tinycss.fonts3 import CSSFonts3Parser
15 |
16 | from . import assert_errors
17 | from .test_tokenizer import jsonify
18 |
19 |
20 | @pytest.mark.parametrize(('css', 'expected_family_names', 'expected_errors'), [
21 | ('@font-feature-values foo {}', ('foo',), []),
22 | ('@font-feature-values Foo Test {}', ('Foo Test',), []),
23 | ('@font-feature-values \'Foo Test\' {}', ('Foo Test',), []),
24 | ('@font-feature-values Foo Test, Foo Lol, "Foo tooo"', (
25 | 'Foo Test', 'Foo Lol', 'Foo tooo'), []),
26 | ('@font-feature-values Foo , Foo lol {}', ('Foo', 'Foo lol'), []),
27 | ('@font-feature-values Foo , "Foobar" , Lol {}', (
28 | 'Foo', 'Foobar', 'Lol'), []),
29 | ('@font-feature-values Foo, {}', None, [
30 | 'invalid @font-feature-values selector']),
31 | ('@font-feature-values ,Foo {}', None, [
32 | 'invalid @font-feature-values selector']),
33 | ('@font-feature-values Test,"Foo", {}', None, [
34 | 'invalid @font-feature-values selector']),
35 | ('@font-feature-values Test "Foo" {}', None, [
36 | 'invalid @font-feature-values selector']),
37 | ('@font-feature-values Test Foo, Test "bar", "foo" {}', None, [
38 | 'invalid @font-feature-values selector']),
39 | ('@font-feature-values Test/Foo {}', None, [
40 | 'invalid @font-feature-values selector']),
41 | ('@font-feature-values /Foo {}', None, [
42 | 'invalid @font-feature-values selector']),
43 | ('@font-feature-values #Foo {}', None, [
44 | 'invalid @font-feature-values selector']),
45 | # TODO: this currently works but should not work
46 | # ('@font-feature-values test@foo {}', None, [
47 | # 'invalid @font-feature-values selector']),
48 | ('@font-feature-values Hawaii 5-0 {}', None, [
49 | 'invalid @font-feature-values selector']),
50 | ])
51 | def test_font_feature_values_selectors(css, expected_family_names,
52 | expected_errors):
53 | stylesheet = CSSFonts3Parser().parse_stylesheet(css)
54 | assert_errors(stylesheet.errors, expected_errors)
55 |
56 | if stylesheet.rules:
57 | assert len(stylesheet.rules) == 1
58 | rule = stylesheet.rules[0]
59 | assert rule.at_keyword == '@font-feature-values'
60 | assert rule.family_names == expected_family_names
61 |
62 |
63 | @pytest.mark.parametrize(('css', 'expected_declarations', 'expected_errors'), [
64 | ('@font-face {}', [], []),
65 | ('@font-face test { src: "lol"; font-family: "bar" }', None, [
66 | 'unexpected IDENT token in @font-face rule header']),
67 | ('@font-face { src: "lol"; font-family: "bar" }', [
68 | ('src', [('STRING', 'lol')]),
69 | ('font-family', [('STRING', 'bar')])], []),
70 | ('@font-face { src: "lol"; font-family: "bar"; src: "baz" }', [
71 | ('src', [('STRING', 'lol')]),
72 | ('font-family', [('STRING', 'bar')]),
73 | ('src', [('STRING', 'baz')])], []),
74 | ])
75 | def test_font_face_content(css, expected_declarations, expected_errors):
76 | stylesheet = CSSFonts3Parser().parse_stylesheet(css)
77 | assert_errors(stylesheet.errors, expected_errors)
78 |
79 | def declarations(rule):
80 | return [(decl.name, list(jsonify(decl.value)))
81 | for decl in rule.declarations]
82 |
83 | if expected_declarations is None:
84 | assert stylesheet.rules == []
85 | assert expected_errors
86 | else:
87 | assert len(stylesheet.rules) == 1
88 | rule = stylesheet.rules[0]
89 | assert rule.at_keyword == '@font-face'
90 | assert declarations(rule) == expected_declarations
91 |
92 |
93 | @pytest.mark.parametrize(
94 | ('css', 'expected_rules', 'expected_errors'), [
95 | ('''@annotation{}''', None, [
96 | '@annotation rule not allowed in stylesheet']),
97 | ('''@font-feature-values foo {}''', None, []),
98 | ('''@font-feature-values foo {
99 | @swash { ornate: 1; }
100 | @styleset { double-W: 14; sharp-terminals: 16 1; }
101 | }''', [
102 | ('@swash', [('ornate', [('INTEGER', 1)])]),
103 | ('@styleset', [
104 | ('double-w', [('INTEGER', 14)]),
105 | ('sharp-terminals', [
106 | ('INTEGER', 16), ('S', ' '), ('INTEGER', 1)])])], []),
107 | ('''@font-feature-values foo {
108 | @swash { ornate: 14; }
109 | @unknown { test: 1; }
110 | }''', [('@swash', [('ornate', [('INTEGER', 14)])])], [
111 | 'unknown at-rule in @font-feature-values context: @unknown']),
112 | ('''@font-feature-values foo {
113 | @annotation{boxed:1}
114 | bad: 2;
115 | @brokenstylesetbecauseofbadabove { sharp: 1}
116 | @styleset { sharp-terminals: 16 1; @bad {}}
117 | @styleset { @bad {} top-ignored: 3; top: 9000}
118 | really-bad
119 | }''', [
120 | ('@annotation', [('boxed', [('INTEGER', 1)])]),
121 | ('@styleset', [
122 | ('sharp-terminals', [
123 | ('INTEGER', 16), ('S', ' '), ('INTEGER', 1)])]),
124 | ('@styleset', [('top', [('INTEGER', 9000)])])], [
125 | 'unexpected ; token in selector',
126 | 'expected a property name, got ATKEYWORD',
127 | 'expected a property name, got ATKEYWORD',
128 | 'no declaration block found for ruleset']),
129 | ])
130 | def test_font_feature_values_content(css, expected_rules, expected_errors):
131 | stylesheet = CSSFonts3Parser().parse_stylesheet(css)
132 | assert_errors(stylesheet.errors, expected_errors)
133 |
134 | if expected_rules is not None:
135 | assert len(stylesheet.rules) == 1
136 | rule = stylesheet.rules[0]
137 | assert rule.at_keyword == '@font-feature-values'
138 |
139 | rules = [
140 | (at_rule.at_keyword, [
141 | (decl.name, list(jsonify(decl.value)))
142 | for decl in at_rule.declarations])
143 | for at_rule in rule.at_rules] if rule.at_rules else None
144 | assert rules == expected_rules
145 |
--------------------------------------------------------------------------------
/tinycss/tests/test_page3.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for the Paged Media 3 parser
4 | ----------------------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import pytest
14 | from tinycss.page3 import CSSPage3Parser
15 |
16 | from . import assert_errors
17 | from .test_tokenizer import jsonify
18 |
19 |
20 | @pytest.mark.parametrize(('css', 'expected_selector',
21 | 'expected_specificity', 'expected_errors'), [
22 | ('@page {}', (None, None), (0, 0, 0), []),
23 |
24 | ('@page :first {}', (None, 'first'), (0, 1, 0), []),
25 | ('@page:left{}', (None, 'left'), (0, 0, 1), []),
26 | ('@page :right {}', (None, 'right'), (0, 0, 1), []),
27 | ('@page :blank{}', (None, 'blank'), (0, 1, 0), []),
28 | ('@page :last {}', None, None, ['invalid @page selector']),
29 | ('@page : first {}', None, None, ['invalid @page selector']),
30 |
31 | ('@page foo:first {}', ('foo', 'first'), (1, 1, 0), []),
32 | ('@page bar :left {}', ('bar', 'left'), (1, 0, 1), []),
33 | (r'@page \26:right {}', ('&', 'right'), (1, 0, 1), []),
34 |
35 | ('@page foo {}', ('foo', None), (1, 0, 0), []),
36 | (r'@page \26 {}', ('&', None), (1, 0, 0), []),
37 |
38 | ('@page foo fist {}', None, None, ['invalid @page selector']),
39 | ('@page foo, bar {}', None, None, ['invalid @page selector']),
40 | ('@page foo&first {}', None, None, ['invalid @page selector']),
41 | ])
42 | def test_selectors(css, expected_selector, expected_specificity,
43 | expected_errors):
44 | stylesheet = CSSPage3Parser().parse_stylesheet(css)
45 | assert_errors(stylesheet.errors, expected_errors)
46 |
47 | if stylesheet.rules:
48 | assert len(stylesheet.rules) == 1
49 | rule = stylesheet.rules[0]
50 | assert rule.at_keyword == '@page'
51 | selector = rule.selector
52 | assert rule.specificity == expected_specificity
53 | else:
54 | selector = None
55 | assert selector == expected_selector
56 |
57 |
58 | @pytest.mark.parametrize(('css', 'expected_declarations',
59 | 'expected_rules', 'expected_errors'), [
60 | ('@page {}', [], [], []),
61 | ('@page { foo: 4; bar: z }',
62 | [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], [], []),
63 | ('''@page { foo: 4;
64 | @top-center { content: "Awesome Title" }
65 | @bottom-left { content: counter(page) }
66 | bar: z
67 | }''',
68 | [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
69 | [('@top-center', [('content', [('STRING', 'Awesome Title')])]),
70 | ('@bottom-left', [('content', [
71 | ('FUNCTION', 'counter', [('IDENT', 'page')])])])],
72 | []),
73 | ('''@page { foo: 4;
74 | @bottom-top { content: counter(page) }
75 | bar: z
76 | }''',
77 | [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
78 | [],
79 | ['unknown at-rule in @page context: @bottom-top']),
80 |
81 | ('@page{} @top-right{}', [], [], [
82 | '@top-right rule not allowed in stylesheet']),
83 | ('@page{ @top-right 4 {} }', [], [], [
84 | 'unexpected INTEGER token in @top-right rule header']),
85 | # Not much error recovery tests here. This should be covered in test_css21
86 | ])
87 | def test_content(css, expected_declarations, expected_rules, expected_errors):
88 | stylesheet = CSSPage3Parser().parse_stylesheet(css)
89 | assert_errors(stylesheet.errors, expected_errors)
90 |
91 | def declarations(rule):
92 | return [(decl.name, list(jsonify(decl.value)))
93 | for decl in rule.declarations]
94 |
95 | assert len(stylesheet.rules) == 1
96 | rule = stylesheet.rules[0]
97 | assert rule.at_keyword == '@page'
98 | assert declarations(rule) == expected_declarations
99 | rules = [(margin_rule.at_keyword, declarations(margin_rule))
100 | for margin_rule in rule.at_rules]
101 | assert rules == expected_rules
102 |
--------------------------------------------------------------------------------
/tinycss/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Tests for the tokenizer
4 | -----------------------
5 |
6 | :copyright: (c) 2012 by Simon Sapin.
7 | :license: BSD, see LICENSE for more details.
8 | """
9 |
10 |
11 | from __future__ import unicode_literals
12 |
13 | import os
14 | import sys
15 |
16 | import pytest
17 | from tinycss.tokenizer import (
18 | cython_tokenize_flat, python_tokenize_flat, regroup)
19 |
20 |
21 | def test_speedups():
22 | is_pypy = hasattr(sys, 'pypy_translation_info')
23 | env_skip_tests = os.environ.get('TINYCSS_SKIP_SPEEDUPS_TESTS')
24 | # pragma: no cover
25 | if is_pypy or env_skip_tests:
26 | return
27 | assert cython_tokenize_flat is not None, (
28 | 'Cython speedups are not installed, related tests will '
29 | 'be skipped. Set the TINYCSS_SKIP_SPEEDUPS_TESTS environment '
30 | 'variable if this is expected.')
31 |
32 |
33 | @pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [
34 | (tokenize,) + test_data
35 | for tokenize in (python_tokenize_flat, cython_tokenize_flat)
36 | for test_data in [
37 | ('', []),
38 | ('red -->', [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
39 | # Longest match rule: no CDC
40 | ('red-->', [('IDENT', 'red--'), ('DELIM', '>')]),
41 | (r'p[example="foo(int x) { this.x = x;}"]', [
42 | ('IDENT', 'p'),
43 | ('[', '['),
44 | ('IDENT', 'example'),
45 | ('DELIM', '='),
46 | ('STRING', 'foo(int x) { this.x = x;}'),
47 | (']', ']')]),
48 |
49 | # Numbers are parsed
50 | ('42 .5 -4pX 1.25em 30%', [
51 | ('INTEGER', 42), ('S', ' '),
52 | ('NUMBER', .5), ('S', ' '),
53 | # units are normalized to lower-case:
54 | ('DIMENSION', -4, 'px'), ('S', ' '),
55 | ('DIMENSION', 1.25, 'em'), ('S', ' '),
56 | ('PERCENTAGE', 30, '%')]),
57 |
58 | # URLs are extracted
59 | ('url(foo.png)', [('URI', 'foo.png')]),
60 | ('url("foo.png")', [('URI', 'foo.png')]),
61 |
62 | # Escaping
63 |
64 | (r'/* Comment with a \ backslash */', [
65 | ('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged
66 |
67 | # backslash followed by a newline in a string: ignored
68 | ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
69 |
70 | # backslash followed by a newline outside a string: stands for itself
71 | ('Lorem\\\nIpsum', [
72 | ('IDENT', 'Lorem'), ('DELIM', '\\'),
73 | ('S', '\n'), ('IDENT', 'Ipsum')]),
74 |
75 | # Cancel the meaning of special characters
76 | (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal
77 | (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
78 | (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
79 | (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
80 | (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
81 | (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
82 | (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
83 | (r'Lorem+Ipsum', [
84 | ('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
85 | (r'url(foo\).png)', [('URI', 'foo).png')]),
86 |
87 | # Unicode and backslash escaping
88 | ('\\26 B', [('IDENT', '&B')]),
89 | ('\\&B', [('IDENT', '&B')]),
90 | ('@\\26\tB', [('ATKEYWORD', '@&B')]),
91 | ('@\\&B', [('ATKEYWORD', '@&B')]),
92 | ('#\\26\nB', [('HASH', '#&B')]),
93 | ('#\\&B', [('HASH', '#&B')]),
94 | ('\\26\r\nB(', [('FUNCTION', '&B(')]),
95 | ('\\&B(', [('FUNCTION', '&B(')]),
96 | (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
97 | (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits
98 | (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
99 | (r'"\26 B"', [('STRING', '&B')]),
100 | (r"'\000026B'", [('STRING', '&B')]),
101 | (r'"\&B"', [('STRING', '&B')]),
102 | (r'url("\26 B")', [('URI', '&B')]),
103 | (r'url(\26 B)', [('URI', '&B')]),
104 | (r'url("\&B")', [('URI', '&B')]),
105 | (r'url(\&B)', [('URI', '&B')]),
106 | (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
107 |
108 | # Bad strings
109 |
110 | # String ends at EOF without closing: no error, parsed
111 | ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
112 | # Unescaped newline: ends the string, error, unparsed
113 | ('"Lorem\\26Ipsum\n', [
114 | ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
115 | # Tokenization restarts after the newline, so the second " starts
116 | # a new string (which ends at EOF without errors, as above.)
117 | ('"Lorem\\26Ipsum\ndolor" sit', [
118 | ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
119 | ('IDENT', 'dolor'), ('STRING', ' sit')]),
120 |
121 | ]])
122 | def test_tokens(tokenize, css_source, expected_tokens):
123 | if tokenize is None: # pragma: no cover
124 | pytest.skip('Speedups not available')
125 | sources = [css_source]
126 | if sys.version_info[0] < 3:
127 | # On Python 2.x, ASCII-only bytestrings can be used
128 | # where Unicode is expected.
129 | sources.append(css_source.encode('ascii'))
130 | for css_source in sources:
131 | tokens = tokenize(css_source, ignore_comments=False)
132 | result = [
133 | (token.type, token.value) + (
134 | () if token.unit is None else (token.unit,))
135 | for token in tokens
136 | ]
137 | assert result == expected_tokens
138 |
139 |
140 | @pytest.mark.parametrize('tokenize', [
141 | python_tokenize_flat, cython_tokenize_flat])
142 | def test_positions(tokenize):
143 | """Test the reported line/column position of each token."""
144 | if tokenize is None: # pragma: no cover
145 | pytest.skip('Speedups not available')
146 | css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }'
147 | tokens = tokenize(css, ignore_comments=False)
148 | result = [(token.type, token.line, token.column) for token in tokens]
149 | assert result == [
150 | ('COMMENT', 1, 1), ('S', 2, 9),
151 | ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
152 | ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
153 | ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
154 | ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
155 | ('S', 5, 5), ('}', 5, 6)]
156 |
157 |
158 | @pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [
159 | (tokenize,) + test_data
160 | for tokenize in (python_tokenize_flat, cython_tokenize_flat)
161 | for test_data in [
162 | ('', []),
163 | (r'Lorem\26 "i\psum"4px', [
164 | ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
165 |
166 | ('not([[lorem]]{ipsum (42)})', [
167 | ('FUNCTION', 'not', [
168 | ('[', [
169 | ('[', [
170 | ('IDENT', 'lorem'),
171 | ]),
172 | ]),
173 | ('{', [
174 | ('IDENT', 'ipsum'),
175 | ('S', ' '),
176 | ('(', [
177 | ('INTEGER', 42),
178 | ])
179 | ])
180 | ])]),
181 |
182 | # Close everything at EOF, no error
183 | ('a[b{"d', [
184 | ('IDENT', 'a'),
185 | ('[', [
186 | ('IDENT', 'b'),
187 | ('{', [
188 | ('STRING', 'd'),
189 | ]),
190 | ]),
191 | ]),
192 |
193 | # Any remaining ), ] or } token is a nesting error
194 | ('a[b{d]e}', [
195 | ('IDENT', 'a'),
196 | ('[', [
197 | ('IDENT', 'b'),
198 | ('{', [
199 | ('IDENT', 'd'),
200 | (']', ']'), # The error is visible here
201 | ('IDENT', 'e'),
202 | ]),
203 | ]),
204 | ]),
205 | # ref:
206 | ('a[b{d}e]', [
207 | ('IDENT', 'a'),
208 | ('[', [
209 | ('IDENT', 'b'),
210 | ('{', [
211 | ('IDENT', 'd'),
212 | ]),
213 | ('IDENT', 'e'),
214 | ]),
215 | ]),
216 | ]])
217 | def test_token_grouping(tokenize, css_source, expected_tokens):
218 | if tokenize is None: # pragma: no cover
219 | pytest.skip('Speedups not available')
220 | tokens = regroup(tokenize(css_source, ignore_comments=False))
221 | result = list(jsonify(tokens))
222 | assert result == expected_tokens
223 |
224 |
225 | def jsonify(tokens):
226 | """Turn tokens into "JSON-compatible" data structures."""
227 | for token in tokens:
228 | if token.type == 'FUNCTION':
229 | yield (token.type, token.function_name,
230 | list(jsonify(token.content)))
231 | elif token.is_container:
232 | yield token.type, list(jsonify(token.content))
233 | else:
234 | yield token.type, token.value
235 |
236 |
237 | @pytest.mark.parametrize(('tokenize', 'ignore_comments', 'expected_tokens'), [
238 | (tokenize,) + test_data
239 | for tokenize in (python_tokenize_flat, cython_tokenize_flat)
240 | for test_data in [
241 | (False, [
242 | ('COMMENT', '/* lorem */'),
243 | ('S', ' '),
244 | ('IDENT', 'ipsum'),
245 | ('[', [
246 | ('IDENT', 'dolor'),
247 | ('COMMENT', '/* sit */'),
248 | ]),
249 | ('BAD_COMMENT', '/* amet')
250 | ]),
251 | (True, [
252 | ('S', ' '),
253 | ('IDENT', 'ipsum'),
254 | ('[', [
255 | ('IDENT', 'dolor'),
256 | ]),
257 | ]),
258 | ]])
259 | def test_comments(tokenize, ignore_comments, expected_tokens):
260 | if tokenize is None: # pragma: no cover
261 | pytest.skip('Speedups not available')
262 | css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
263 | tokens = regroup(tokenize(css_source, ignore_comments))
264 | result = list(jsonify(tokens))
265 | assert result == expected_tokens
266 |
267 |
268 | @pytest.mark.parametrize(('tokenize', 'css_source'), [
269 | (tokenize, test_data)
270 | for tokenize in (python_tokenize_flat, cython_tokenize_flat)
271 | for test_data in [
272 | r'p[example="foo(int x) { this.x = x;}"]',
273 | '"Lorem\\26Ipsum\ndolor" sit',
274 | '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }',
275 | 'not([[lorem]]{ipsum (42)})',
276 | 'a[b{d]e}',
277 | 'a[b{"d',
278 | ]])
279 | def test_token_serialize_css(tokenize, css_source):
280 | if tokenize is None: # pragma: no cover
281 | pytest.skip('Speedups not available')
282 | for _regroup in [regroup, lambda x: x]:
283 | tokens = _regroup(tokenize(css_source, ignore_comments=False))
284 | result = ''.join(token.as_css() for token in tokens)
285 | assert result == css_source
286 |
287 |
288 | @pytest.mark.parametrize(('tokenize', 'css_source'), [
289 | (tokenize, test_data)
290 | for tokenize in (python_tokenize_flat, cython_tokenize_flat)
291 | for test_data in [
292 | '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
293 | ]
294 | ])
295 | def test_token_api(tokenize, css_source):
296 | if tokenize is None: # pragma: no cover
297 | pytest.skip('Speedups not available')
298 | tokens = list(regroup(tokenize(css_source)))
299 | assert len(tokens) == 1
300 | token = tokens[0]
301 | expected_len = 7 # 2 spaces, 2 commas, 3 others.
302 | assert len(token.content) == expected_len
303 |
--------------------------------------------------------------------------------
/tinycss/token_data.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.token_data
4 | ------------------
5 |
6 | Shared data for both implementations (Cython and Python) of the tokenizer.
7 |
8 | :copyright: (c) 2012 by Simon Sapin.
9 | :license: BSD, see LICENSE for more details.
10 | """
11 |
12 | from __future__ import unicode_literals
13 |
14 | import functools
15 | import operator
16 | import re
17 | import string
18 | import sys
19 |
20 | # * Raw strings with the r'' notation are used so that \ do not need
21 | # to be escaped.
22 | # * Names and regexps are separated by a tabulation.
23 | # * Macros are re-ordered so that only previous definitions are needed.
24 | # * {} are used for macro substitution with ``string.Formatter``,
25 | # so other uses of { or } have been doubled.
26 | # * The syntax is otherwise compatible with re.compile.
27 | # * Some parentheses were added to add capturing groups.
28 | # (in unicode, DIMENSION and URI)
29 |
30 | # *** Willful violation: ***
31 | # Numbers can take a + or - sign, but the sign is a separate DELIM token.
32 | # Since comments are allowed anywhere between tokens, this makes
33 | # the following this is valid. It means 10 negative pixels:
34 | # margin-top: -/**/10px
35 |
36 | # This makes parsing numbers a pain, so instead we’ll do the same is Firefox
37 | # and make the sign part as of the 'num' macro. The above CSS will be invalid.
38 | # See discussion:
39 | # http://lists.w3.org/Archives/Public/www-style/2011Oct/0028.html
40 | MACROS = r'''
41 | nl \n|\r\n|\r|\f
42 | w [ \t\r\n\f]*
43 | nonascii [^\0-\237]
44 | unicode \\([0-9a-f]{{1,6}})(\r\n|[ \n\r\t\f])?
45 | simple_escape [^\n\r\f0-9a-f]
46 | escape {unicode}|\\{simple_escape}
47 | nmstart [_a-z]|{nonascii}|{escape}
48 | nmchar [_a-z0-9-]|{nonascii}|{escape}
49 | name {nmchar}+
50 | ident [-]?{nmstart}{nmchar}*
51 | num [-+]?(?:[0-9]*\.[0-9]+|[0-9]+)
52 | string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
53 | string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
54 | string {string1}|{string2}
55 | badstring1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
56 | badstring2 \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
57 | badstring {badstring1}|{badstring2}
58 | badcomment1 \/\*[^*]*\*+([^/*][^*]*\*+)*
59 | badcomment2 \/\*[^*]*(\*+[^/*][^*]*)*
60 | badcomment {badcomment1}|{badcomment2}
61 | baduri1 url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
62 | baduri2 url\({w}{string}{w}
63 | baduri3 url\({w}{badstring}
64 | baduri {baduri1}|{baduri2}|{baduri3}
65 | '''.replace(r'\0', '\0').replace(r'\237', '\237')
66 |
67 | # Removed these tokens. Instead, they’re tokenized as two DELIM each.
68 | # INCLUDES ~=
69 | # DASHMATCH |=
70 | # They are only used in selectors but selectors3 also have ^=, *= and $=.
71 | # We don’t actually parse selectors anyway
72 |
73 | # Re-ordered so that the longest match is always the first.
74 | # For example, "url('foo')" matches URI, BAD_URI, FUNCTION and IDENT,
75 | # but URI would always be a longer match than the others.
76 | TOKENS = r'''
77 | S [ \t\r\n\f]+
78 |
79 | URI url\({w}({string}|([!#$%&*-\[\]-~]|{nonascii}|{escape})*){w}\)
80 | BAD_URI {baduri}
81 | FUNCTION {ident}\(
82 | UNICODE-RANGE u\+[0-9a-f?]{{1,6}}(-[0-9a-f]{{1,6}})?
83 | IDENT {ident}
84 |
85 | ATKEYWORD @{ident}
86 | HASH #{name}
87 |
88 | DIMENSION ({num})({ident})
89 | PERCENTAGE {num}%
90 | NUMBER {num}
91 |
92 | STRING {string}
93 | BAD_STRING {badstring}
94 |
95 | COMMENT \/\*[^*]*\*+([^/*][^*]*\*+)*\/
96 | BAD_COMMENT {badcomment}
97 |
98 | : :
99 | ; ;
100 | { \{{
101 | } \}}
102 | ( \(
103 | ) \)
104 | [ \[
105 | ] \]
106 | CDO
108 | '''
109 |
110 |
111 | # Strings with {macro} expanded
112 | COMPILED_MACROS = {}
113 |
114 |
115 | COMPILED_TOKEN_REGEXPS = [] # [(name, regexp.match)] ordered
116 | COMPILED_TOKEN_INDEXES = {} # {name: i} helper for the C speedups
117 |
118 |
119 | # Indexed by codepoint value of the first character of a token.
120 | # Codepoints >= 160 (aka nonascii) all use the index 160.
121 | # values are (i, name, regexp.match)
122 | TOKEN_DISPATCH = []
123 |
124 |
125 | try:
126 | unichr
127 | except NameError:
128 | # Python 3
129 | unichr = chr
130 | unicode = str
131 |
132 |
133 | def _init():
134 | """Import-time initialization."""
135 | COMPILED_MACROS.clear()
136 | for line in MACROS.splitlines():
137 | if line.strip():
138 | name, value = line.split('\t')
139 | COMPILED_MACROS[name.strip()] = '(?:%s)' \
140 | % value.format(**COMPILED_MACROS)
141 |
142 | COMPILED_TOKEN_REGEXPS[:] = (
143 | (
144 | name.strip(),
145 | re.compile(
146 | value.format(**COMPILED_MACROS),
147 | # Case-insensitive when matching eg. uRL(foo)
148 | # but preserve the case in extracted groups
149 | re.I
150 | ).match
151 | )
152 | for line in TOKENS.splitlines()
153 | if line.strip()
154 | for name, value in [line.split('\t')]
155 | )
156 |
157 | COMPILED_TOKEN_INDEXES.clear()
158 | for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS):
159 | COMPILED_TOKEN_INDEXES[name] = i
160 |
161 | dispatch = [[] for i in range(161)]
162 | for chars, names in [
163 | (' \t\r\n\f', ['S']),
164 | ('uU', ['URI', 'BAD_URI', 'UNICODE-RANGE']),
165 | # \ is an escape outside of another token
166 | (string.ascii_letters + '\\_-' + unichr(160), ['FUNCTION', 'IDENT']),
167 | (string.digits + '.+-', ['DIMENSION', 'PERCENTAGE', 'NUMBER']),
168 | ('@', ['ATKEYWORD']),
169 | ('#', ['HASH']),
170 | ('\'"', ['STRING', 'BAD_STRING']),
171 | ('/', ['COMMENT', 'BAD_COMMENT']),
172 | ('<', ['CDO']),
173 | ('-', ['CDC']),
174 | ]:
175 | for char in chars:
176 | dispatch[ord(char)].extend(names)
177 | for char in ':;{}()[]':
178 | dispatch[ord(char)] = [char]
179 |
180 | TOKEN_DISPATCH[:] = (
181 | [
182 | (index,) + COMPILED_TOKEN_REGEXPS[index]
183 | for name in names
184 | for index in [COMPILED_TOKEN_INDEXES[name]]
185 | ]
186 | for names in dispatch
187 | )
188 |
189 |
190 | _init()
191 |
192 |
193 | def _unicode_replace(match, int=int, unichr=unichr, maxunicode=sys.maxunicode):
194 | codepoint = int(match.group(1), 16)
195 | if codepoint <= maxunicode:
196 | return unichr(codepoint)
197 | else:
198 | return '\N{REPLACEMENT CHARACTER}' # U+FFFD
199 |
200 |
201 | UNICODE_UNESCAPE = functools.partial(
202 | re.compile(COMPILED_MACROS['unicode'], re.I).sub,
203 | _unicode_replace)
204 |
205 | NEWLINE_UNESCAPE = functools.partial(
206 | re.compile(r'()\\' + COMPILED_MACROS['nl']).sub,
207 | '')
208 |
209 | SIMPLE_UNESCAPE = functools.partial(
210 | re.compile(r'\\(%s)' % COMPILED_MACROS['simple_escape'], re.I).sub,
211 | # Same as r'\1', but faster on CPython
212 | operator.methodcaller('group', 1))
213 |
214 | FIND_NEWLINES = re.compile(COMPILED_MACROS['nl']).finditer
215 |
216 |
217 | class Token(object):
218 | """A single atomic token.
219 |
220 | .. attribute:: is_container
221 |
222 | Always ``False``.
223 | Helps to tell :class:`Token` apart from :class:`ContainerToken`.
224 |
225 | .. attribute:: type
226 |
227 | The type of token as a string:
228 |
229 | ``S``
230 | A sequence of white space
231 |
232 | ``IDENT``
233 | An identifier: a name that does not start with a digit.
234 | A name is a sequence of letters, digits, ``_``, ``-``, escaped
235 | characters and non-ASCII characters. Eg: ``margin-left``
236 |
237 | ``HASH``
238 | ``#`` followed immediately by a name. Eg: ``#ff8800``
239 |
240 | ``ATKEYWORD``
241 | ``@`` followed immediately by an identifier. Eg: ``@page``
242 |
243 | ``URI``
244 | Eg: ``url(foo)`` The content may or may not be quoted.
245 |
246 | ``UNICODE-RANGE``
247 | ``U+`` followed by one or two hexadecimal
248 | Unicode codepoints. Eg: ``U+20-00FF``
249 |
250 | ``INTEGER``
251 | An integer with an optional ``+`` or ``-`` sign
252 |
253 | ``NUMBER``
254 | A non-integer number with an optional ``+`` or ``-`` sign
255 |
256 | ``DIMENSION``
257 | An integer or number followed immediately by an
258 | identifier (the unit). Eg: ``12px``
259 |
260 | ``PERCENTAGE``
261 | An integer or number followed immediately by ``%``
262 |
263 | ``STRING``
264 | A string, quoted with ``"`` or ``'``
265 |
266 | ``:`` or ``;``
267 | That character.
268 |
269 | ``DELIM``
270 | A single character not matched in another token. Eg: ``,``
271 |
272 | See the source of the :mod:`.token_data` module for the precise
273 | regular expressions that match various tokens.
274 |
275 | Note that other token types exist in the early tokenization steps,
276 | but these are ignored, are syntax errors, or are later transformed
277 | into :class:`ContainerToken` or :class:`FunctionToken`.
278 |
279 | .. attribute:: value
280 |
281 | The parsed value:
282 |
283 | * INTEGER, NUMBER, PERCENTAGE or DIMENSION tokens: the numeric value
284 | as an int or float.
285 | * STRING tokens: the unescaped string without quotes
286 | * URI tokens: the unescaped URI without quotes or
287 | ``url(`` and ``)`` markers.
288 | * IDENT, ATKEYWORD or HASH tokens: the unescaped token,
289 | with ``@`` or ``#`` markers left as-is
290 | * Other tokens: same as :attr:`as_css`
291 |
292 | *Unescaped* refers to the various escaping methods based on the
293 | backslash ``\`` character in CSS syntax.
294 |
295 | .. attribute:: unit
296 |
297 | * DIMENSION tokens: the normalized (unescaped, lower-case)
298 | unit name as a string. eg. ``'px'``
299 | * PERCENTAGE tokens: the string ``'%'``
300 | * Other tokens: ``None``
301 |
302 | .. attribute:: line
303 |
304 | The line number in the CSS source of the start of this token.
305 |
306 | .. attribute:: column
307 |
308 | The column number (inside a source line) of the start of this token.
309 |
310 | """
311 | is_container = False
312 | __slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column'
313 |
314 | def __init__(self, type_, css_value, value, unit, line, column):
315 | self.type = type_
316 | self._as_css = css_value
317 | self.value = value
318 | self.unit = unit
319 | self.line = line
320 | self.column = column
321 |
322 | def as_css(self):
323 | """
324 | Return as an Unicode string the CSS representation of the token,
325 | as parsed in the source.
326 | """
327 | return self._as_css
328 |
329 | def __repr__(self):
330 | return (''
331 | .format(self, self.unit or ''))
332 |
333 | def __eq__(self, other):
334 | if type(self) != type(other):
335 | raise TypeError(
336 | 'Cannot compare {0} and {1}'.format(type(self), type(other)))
337 | else:
338 | return all(
339 | self.type_ == other.type_,
340 | self._as_css == other._as_css,
341 | self.value == other.value,
342 | self.unit == other.unit,
343 | )
344 |
345 |
346 | class ContainerToken(object):
347 | """A token that contains other (nested) tokens.
348 |
349 | .. attribute:: is_container
350 |
351 | Always ``True``.
352 | Helps to tell :class:`ContainerToken` apart from :class:`Token`.
353 |
354 | .. attribute:: type
355 |
356 | The type of token as a string. One of ``{``, ``(``, ``[`` or
357 | ``FUNCTION``. For ``FUNCTION``, the object is actually a
358 | :class:`FunctionToken`.
359 |
360 | .. attribute:: unit
361 |
362 | Always ``None``. Included to make :class:`ContainerToken` behave
363 | more like :class:`Token`.
364 |
365 | .. attribute:: content
366 |
367 | A list of :class:`Token` or nested :class:`ContainerToken`,
368 | not including the opening or closing token.
369 |
370 | .. attribute:: line
371 |
372 | The line number in the CSS source of the start of this token.
373 |
374 | .. attribute:: column
375 |
376 | The column number (inside a source line) of the start of this token.
377 |
378 | """
379 | is_container = True
380 | unit = None
381 | __slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column'
382 |
383 | def __init__(self, type_, css_start, css_end, content, line, column):
384 | self.type = type_
385 | self._css_start = css_start
386 | self._css_end = css_end
387 | self.content = content
388 | self.line = line
389 | self.column = column
390 |
391 | def as_css(self):
392 | """
393 | Return as an Unicode string the CSS representation of the token,
394 | as parsed in the source.
395 | """
396 | parts = [self._css_start]
397 | parts.extend(token.as_css() for token in self.content)
398 | parts.append(self._css_end)
399 | return ''.join(parts)
400 |
401 | format_string = ''
402 |
403 | def __repr__(self):
404 | return (self.format_string + ' {0.content}').format(self)
405 |
406 |
407 | class FunctionToken(ContainerToken):
408 | """A specialized :class:`ContainerToken` for a ``FUNCTION`` group.
409 | Has an additional attribute:
410 |
411 | .. attribute:: function_name
412 |
413 | The unescaped name of the function, with the ``(`` marker removed.
414 |
415 | """
416 | __slots__ = 'function_name',
417 |
418 | def __init__(self, type_, css_start, css_end, function_name, content,
419 | line, column):
420 | super(FunctionToken, self).__init__(
421 | type_, css_start, css_end, content, line, column)
422 | # Remove the ( marker:
423 | self.function_name = function_name[:-1]
424 |
425 | format_string = ('')
427 |
428 |
429 | class TokenList(list):
430 | """
431 | A mixed list of :class:`~.token_data.Token` and
432 | :class:`~.token_data.ContainerToken` objects.
433 |
434 | This is a subclass of the builtin :class:`~builtins.list` type.
435 | It can be iterated, indexed and sliced as usual, but also has some
436 | additional API:
437 |
438 | """
439 | @property
440 | def line(self):
441 | """The line number in the CSS source of the first token."""
442 | return self[0].line
443 |
444 | @property
445 | def column(self):
446 | """The column number (inside a source line) of the first token."""
447 | return self[0].column
448 |
449 | def as_css(self):
450 | """
451 | Return as an Unicode string the CSS representation of the tokens,
452 | as parsed in the source.
453 | """
454 | return ''.join(token.as_css() for token in self)
455 |
--------------------------------------------------------------------------------
/tinycss/tokenizer.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | tinycss.tokenizer
4 | -----------------
5 |
6 | Tokenizer for the CSS core syntax:
7 | http://www.w3.org/TR/CSS21/syndata.html#tokenization
8 |
9 | This is the pure-python implementation. See also speedups.pyx
10 |
11 | :copyright: (c) 2012 by Simon Sapin.
12 | :license: BSD, see LICENSE for more details.
13 | """
14 |
15 | from __future__ import unicode_literals
16 |
17 | from . import token_data
18 |
19 |
20 | def tokenize_flat(
21 | css_source, ignore_comments=True,
22 | # Make these local variable to avoid global lookups in the loop
23 | tokens_dispatch=token_data.TOKEN_DISPATCH,
24 | unicode_unescape=token_data.UNICODE_UNESCAPE,
25 | newline_unescape=token_data.NEWLINE_UNESCAPE,
26 | simple_unescape=token_data.SIMPLE_UNESCAPE,
27 | find_newlines=token_data.FIND_NEWLINES,
28 | Token=token_data.Token,
29 | len=len,
30 | int=int,
31 | float=float,
32 | list=list,
33 | _None=None):
34 | """
35 | :param css_source:
36 | CSS as an unicode string
37 | :param ignore_comments:
38 | if true (the default) comments will not be included in the
39 | return value
40 | :return:
41 | An iterator of :class:`Token`
42 |
43 | """
44 |
45 | pos = 0
46 | line = 1
47 | column = 1
48 | source_len = len(css_source)
49 | tokens = []
50 | while pos < source_len:
51 | char = css_source[pos]
52 | if char in ':;{}()[]':
53 | type_ = char
54 | css_value = char
55 | else:
56 | codepoint = min(ord(char), 160)
57 | for _index, type_, regexp in tokens_dispatch[codepoint]:
58 | match = regexp(css_source, pos)
59 | if match:
60 | # First match is the longest. See comments on TOKENS above.
61 | css_value = match.group()
62 | break
63 | else:
64 | # No match.
65 | # "Any other character not matched by the above rules,
66 | # and neither a single nor a double quote."
67 | # ... but quotes at the start of a token are always matched
68 | # by STRING or BAD_STRING. So DELIM is any single character.
69 | type_ = 'DELIM'
70 | css_value = char
71 | length = len(css_value)
72 | next_pos = pos + length
73 |
74 | # A BAD_COMMENT is a comment at EOF. Ignore it too.
75 | if not (ignore_comments and type_ in ('COMMENT', 'BAD_COMMENT')):
76 | # Parse numbers, extract strings and URIs, unescape
77 | unit = _None
78 | if type_ == 'DIMENSION':
79 | value = match.group(1)
80 | value = float(value) if '.' in value else int(value)
81 | unit = match.group(2)
82 | unit = simple_unescape(unit)
83 | unit = unicode_unescape(unit)
84 | unit = unit.lower() # normalize
85 | elif type_ == 'PERCENTAGE':
86 | value = css_value[:-1]
87 | value = float(value) if '.' in value else int(value)
88 | unit = '%'
89 | elif type_ == 'NUMBER':
90 | value = css_value
91 | if '.' in value:
92 | value = float(value)
93 | else:
94 | value = int(value)
95 | type_ = 'INTEGER'
96 | elif type_ in ('IDENT', 'ATKEYWORD', 'HASH', 'FUNCTION'):
97 | value = simple_unescape(css_value)
98 | value = unicode_unescape(value)
99 | elif type_ == 'URI':
100 | value = match.group(1)
101 | if value and value[0] in '"\'':
102 | value = value[1:-1] # Remove quotes
103 | value = newline_unescape(value)
104 | value = simple_unescape(value)
105 | value = unicode_unescape(value)
106 | elif type_ == 'STRING':
107 | value = css_value[1:-1] # Remove quotes
108 | value = newline_unescape(value)
109 | value = simple_unescape(value)
110 | value = unicode_unescape(value)
111 | # BAD_STRING can only be one of:
112 | # * Unclosed string at the end of the stylesheet:
113 | # Close the string, but this is not an error.
114 | # Make it a "good" STRING token.
115 | # * Unclosed string at the (unescaped) end of the line:
116 | # Close the string, but this is an error.
117 | # Leave it as a BAD_STRING, don’t bother parsing it.
118 | # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
119 | elif type_ == 'BAD_STRING' and next_pos == source_len:
120 | type_ = 'STRING'
121 | value = css_value[1:] # Remove quote
122 | value = newline_unescape(value)
123 | value = simple_unescape(value)
124 | value = unicode_unescape(value)
125 | else:
126 | value = css_value
127 | tokens.append(Token(type_, css_value, value, unit, line, column))
128 |
129 | pos = next_pos
130 | newlines = list(find_newlines(css_value))
131 | if newlines:
132 | line += len(newlines)
133 | # Add 1 to have lines start at column 1, not 0
134 | column = length - newlines[-1].end() + 1
135 | else:
136 | column += length
137 | return tokens
138 |
139 |
140 | def regroup(tokens):
141 | """
142 | Match pairs of tokens: () [] {} function()
143 | (Strings in "" or '' are taken care of by the tokenizer.)
144 |
145 | Opening tokens are replaced by a :class:`ContainerToken`.
146 | Closing tokens are removed. Unmatched closing tokens are invalid
147 | but left as-is. All nested structures that are still open at
148 | the end of the stylesheet are implicitly closed.
149 |
150 | :param tokens:
151 | a *flat* iterable of tokens, as returned by :func:`tokenize_flat`.
152 | :return:
153 | A tree of tokens.
154 |
155 | """
156 | # "global" objects for the inner recursion
157 | pairs = {'FUNCTION': ')', '(': ')', '[': ']', '{': '}'}
158 | tokens = iter(tokens)
159 | eof = [False]
160 |
161 | def _regroup_inner(stop_at=None, tokens=tokens, pairs=pairs, eof=eof,
162 | ContainerToken=token_data.ContainerToken,
163 | FunctionToken=token_data.FunctionToken):
164 | for token in tokens:
165 | type_ = token.type
166 | if type_ == stop_at:
167 | return
168 |
169 | end = pairs.get(type_)
170 | if end is None:
171 | yield token # Not a grouping token
172 | else:
173 | assert not isinstance(token, ContainerToken), (
174 | 'Token looks already grouped: {0}'.format(token))
175 | content = list(_regroup_inner(end))
176 | if eof[0]:
177 | end = '' # Implicit end of structure at EOF.
178 | if type_ == 'FUNCTION':
179 | yield FunctionToken(token.type, token.as_css(), end,
180 | token.value, content,
181 | token.line, token.column)
182 | else:
183 | yield ContainerToken(token.type, token.as_css(), end,
184 | content,
185 | token.line, token.column)
186 | else:
187 | eof[0] = True # end of file/stylesheet
188 | return _regroup_inner()
189 |
190 |
191 | def tokenize_grouped(css_source, ignore_comments=True):
192 | """
193 | :param css_source:
194 | CSS as an unicode string
195 | :param ignore_comments:
196 | if true (the default) comments will not be included in the
197 | return value
198 | :return:
199 | An iterator of :class:`Token`
200 |
201 | """
202 | return regroup(tokenize_flat(css_source, ignore_comments))
203 |
204 |
205 | # Optional Cython version of tokenize_flat
206 | # Make both versions available with explicit names for tests.
207 | python_tokenize_flat = tokenize_flat
208 | try:
209 | from . import speedups
210 | except ImportError:
211 | cython_tokenize_flat = None
212 | else:
213 | cython_tokenize_flat = speedups.tokenize_flat
214 | # Default to the Cython version if available
215 | tokenize_flat = cython_tokenize_flat
216 |
--------------------------------------------------------------------------------
/tinycss/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.4'
2 |
--------------------------------------------------------------------------------