├── .coveragerc
├── .gitignore
├── .gitlab-ci.yml
├── .travis.yml
├── CHANGES
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── _static
    │   └── custom.css
    ├── _templates
    │   └── layout.html
    ├── changelog.rst
    ├── conf.py
    ├── css3.rst
    ├── extending.rst
    ├── hacking.rst
    ├── index.rst
    └── parsing.rst
├── setup.cfg
├── setup.py
└── tinycss
    ├── __init__.py
    ├── color3.py
    ├── css21.py
    ├── decoding.py
    ├── fonts3.py
    ├── page3.py
    ├── parsing.py
    ├── speedups.pyx
    ├── tests
        ├── __init__.py
        ├── speed.py
        ├── test_api.py
        ├── test_color3.py
        ├── test_css21.py
        ├── test_decoding.py
        ├── test_fonts3.py
        ├── test_page3.py
        └── test_tokenizer.py
    ├── token_data.py
    ├── tokenizer.py
    └── version.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | 
 4 | [report]
 5 | exclude_lines =
 6 |     pragma: no cover
 7 |     def __repr__
 8 |     except ImportError
 9 | omit =
10 |     tinycss/tests/speed.py
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.c
 3 | *.so
 4 | *.egg-info
 5 | /.coverage
 6 | /htmlcov
 7 | /build
 8 | /dist
 9 | /.tox
10 | /MANIFEST
11 | /docs/_build
12 | /env
13 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | before_script:
 2 |     - pip install -U setuptools
 3 |     - pip install Cython
 4 |     - pip install --upgrade -e .[test]
 5 | 
 6 | .before_script_alpine: &before_alpine
 7 |     before_script:
 8 |         - apk add --no-cache openssl gcc musl-dev
 9 |         - pip install Cython setuptools
10 |         - pip install --upgrade -e .[test]
11 | 
12 | .test_template: &test
13 |     script:
14 |         - python setup.py test
15 | 
16 | python 2.7alpine:
17 |     image: python:2.7-alpine
18 |     <<: *before_alpine
19 |     <<: *test
20 | 
21 | python 3.3alpine:
22 |     image: python:3.3-alpine
23 |     <<: *before_alpine
24 |     <<: *test
25 | 
26 | python 3.4alpine:
27 |     image: python:3.4-alpine
28 |     <<: *before_alpine
29 |     <<: *test
30 | 
31 | python 3.5alpine:
32 |     image: python:3.5-alpine
33 |     <<: *before_alpine
34 |     <<: *test
35 | 
36 | python 3.6alpine:
37 |     image: python:3.6-alpine
38 |     <<: *before_alpine
39 |     <<: *test
40 | 
41 | python pypy:
42 |     image: pypy:2
43 |     <<: *test
44 | 
45 | python pypy3:
46 |     image: pypy:3
47 |     <<: *test
48 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "2.7"
 5 |   - "3.3"
 6 |   - "3.4"
 7 |   - "3.5"
 8 |   - "pypy"
 9 | 
10 | install:
11 |   - pip install Cython
12 |   - pip install --upgrade -e .[test]
13 | 
14 | script:
15 |   - python setup.py test
16 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
 1 | tinycss changelog
 2 | =================
 3 | 
 4 | 
 5 | Version 0.4
 6 | -----------
 7 | 
 8 | Released on 2016-09-23.
 9 | 
10 | * Add an __eq__ operator to Token object.
11 | * Support Fonts 3.
12 | 
13 | 
14 | 
15 | Version 0.3
16 | -----------
17 | 
18 | Released on 2012-09-18.
19 | 
20 | * Fix a bug when parsing \5c (an escaped antislash.)
21 | 
22 | 
23 | 
24 | Version 0.2
25 | -----------
26 | 
27 | Released on 2012-04-27.
28 | 
29 | **Breaking changes:**
30 | 
31 | * Remove the ``selectors3`` module. The functionality has moved to the
32 |   `cssselect <http://packages.python.org/cssselect/>`_ project.
33 | * Simplify the API for :func:`~tinycss.make_parser`.
34 | 
35 | 
36 | Version 0.1.1
37 | -------------
38 | 
39 | Released on 2012-04-06.
40 | 
41 | Bug fixes:
42 | 
43 | * Error handling on exepected end of stylesheet in an at-rule head
44 | * Fix the installation on ASCII-only locales
45 | 
46 | 
47 | Version 0.1
48 | -----------
49 | 
50 | Released on 2012-04-05.
51 | 
52 | First release. Parser support for CSS 2.1, Seloctors 3, Color 3 and
53 | Paged Media 3.
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 by Simon Sapin.
 2 | 
 3 | Some rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 | 
 9 |     * Redistributions of source code must retain the above copyright
10 |       notice, this list of conditions and the following disclaimer.
11 | 
12 |     * Redistributions in binary form must reproduce the above
13 |       copyright notice, this list of conditions and the following
14 |       disclaimer in the documentation and/or other materials provided
15 |       with the distribution.
16 | 
17 |     * The names of the contributors may not be used to endorse or
18 |       promote products derived from this software without specific
19 |       prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst CHANGES LICENSE tox.ini .coveragerc tinycss/speedups.c
2 | recursive-include docs *
3 | prune docs/_build
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | tinycss: CSS parser for Python
 2 | ==============================
 3 | 
 4 | *tinycss* is a complete yet simple CSS parser for Python. It supports the full
 5 | syntax and error handling for CSS 2.1 as well as some CSS 3 modules:
 6 | 
 7 | * CSS Color 3
 8 | * CSS Fonts 3
 9 | * CSS Paged Media 3
10 | 
11 | It is designed to be easy to extend for new CSS modules and syntax,
12 | and integrates well with cssselect_ for Selectors 3 support.
13 | 
14 | Quick facts:
15 | 
16 | * Free software: BSD licensed
17 | * Compatible with Python 2.7 and 3.x
18 | * Latest documentation `on python.org`_
19 | * Source, issues and pull requests `on Github`_
20 | * Releases `on PyPI`_
21 | * Install with ``pip install tinycss``
22 | 
23 | .. _cssselect: http://packages.python.org/cssselect/
24 | .. _on python.org: http://packages.python.org/tinycss/
25 | .. _on Github: https://github.com/SimonSapin/tinycss/
26 | .. _on PyPI: http://pypi.python.org/pypi/tinycss
27 | 


--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
 1 | div.body {
 2 |     text-align: left;
 3 | }
 4 | div.document p, div.document ul {
 5 |     margin-top: 0;
 6 |     margin-bottom: 1em;
 7 | }
 8 | div.document ul ul {
 9 |     margin-top: 0;
10 |     margin-bottom: .5em;
11 | }
12 | .field-name {
13 |     padding-right: .5em;
14 | }
15 | table.field-list p, table.field-list ul {
16 |     margin-bottom: .5em;
17 | }
18 | table {
19 |     border-collapse: collapse;
20 |     margin-bottom: 1em;
21 | }
22 | table.docutils td, table.docutils th {
23 |     padding: .2em .5em;
24 | }
25 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% block extrahead %}
3 |     <link rel="stylesheet" href="{{ pathto('_static/custom.css', 1) }}" />
4 | {% endblock %} 
5 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGES
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # tinycss documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue Mar 27 14:20:34 2012.
  6 | #
  7 | # This file is execfile()d with the current directory set to its containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys, os
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | #sys.path.insert(0, os.path.abspath('.'))
 21 | 
 22 | # -- General configuration -----------------------------------------------------
 23 | 
 24 | # If your documentation needs a minimal Sphinx version, state it here.
 25 | #needs_sphinx = '1.0'
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx',
 30 |               'sphinx.ext.viewcode', 'sphinx.ext.doctest']
 31 | 
 32 | # Add any paths that contain templates here, relative to this directory.
 33 | templates_path = ['_templates']
 34 | 
 35 | # The suffix of source filenames.
 36 | source_suffix = '.rst'
 37 | 
 38 | # The encoding of source files.
 39 | #source_encoding = 'utf-8-sig'
 40 | 
 41 | # The master toctree document.
 42 | master_doc = 'index'
 43 | 
 44 | # General information about the project.
 45 | project = 'tinycss'
 46 | copyright = '2012, Simon Sapin'
 47 | 
 48 | # The version info for the project you're documenting, acts as replacement for
 49 | # |version| and |release|, also used in various other places throughout the
 50 | # built documents.
 51 | #
 52 | # The full version, including alpha/beta/rc tags.
 53 | #release = '0.1dev'
 54 | import re
 55 | with open(os.path.join(os.path.dirname(__file__), '..',
 56 |                        'tinycss', 'version.py')) as init_py:
 57 |     release = re.search("VERSION = '([^']+)'", init_py.read()).group(1)
 58 | # The short X.Y version.
 59 | version = release.rstrip('dev')
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #language = None
 64 | 
 65 | # There are two options for replacing |today|: either, you set today to some
 66 | # non-false value, then it is used:
 67 | #today = ''
 68 | # Else, today_fmt is used as the format for a strftime call.
 69 | #today_fmt = '%B %d, %Y'
 70 | 
 71 | # List of patterns, relative to source directory, that match files and
 72 | # directories to ignore when looking for source files.
 73 | exclude_patterns = ['_build']
 74 | 
 75 | # The reST default role (used for this markup: `text`) to use for all documents.
 76 | #default_role = None
 77 | 
 78 | # If true, '()' will be appended to :func: etc. cross-reference text.
 79 | #add_function_parentheses = True
 80 | 
 81 | # If true, the current module name will be prepended to all description
 82 | # unit titles (such as .. function::).
 83 | #add_module_names = True
 84 | 
 85 | # If true, sectionauthor and moduleauthor directives will be shown in the
 86 | # output. They are ignored by default.
 87 | #show_authors = False
 88 | 
 89 | # The name of the Pygments (syntax highlighting) style to use.
 90 | pygments_style = 'sphinx'
 91 | 
 92 | # A list of ignored prefixes for module index sorting.
 93 | #modindex_common_prefix = []
 94 | 
 95 | 
 96 | # -- Options for HTML output ---------------------------------------------------
 97 | 
 98 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 99 | # a list of builtin themes.
100 | #html_theme = 'agogo'
101 | 
102 | # Theme options are theme-specific and customize the look and feel of a theme
103 | # further.  For a list of options available for each theme, see the
104 | # documentation.
105 | #html_theme_options = {}
106 | 
107 | # Add any paths that contain custom themes here, relative to this directory.
108 | #html_theme_path = []
109 | 
110 | # The name for this set of Sphinx documents.  If None, it defaults to
111 | # "<project> v<release> documentation".
112 | #html_title = None
113 | 
114 | # A shorter title for the navigation bar.  Default is the same as html_title.
115 | #html_short_title = None
116 | 
117 | # The name of an image file (relative to this directory) to place at the top
118 | # of the sidebar.
119 | #html_logo = None
120 | 
121 | # The name of an image file (within the static path) to use as favicon of the
122 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
123 | # pixels large.
124 | #html_favicon = None
125 | 
126 | # Add any paths that contain custom static files (such as style sheets) here,
127 | # relative to this directory. They are copied after the builtin static files,
128 | # so a file named "default.css" will overwrite the builtin "default.css".
129 | html_static_path = ['_static']
130 | 
131 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
132 | # using the given strftime format.
133 | #html_last_updated_fmt = '%b %d, %Y'
134 | 
135 | # If true, SmartyPants will be used to convert quotes and dashes to
136 | # typographically correct entities.
137 | #html_use_smartypants = True
138 | 
139 | # Custom sidebar templates, maps document names to template names.
140 | #html_sidebars = {}
141 | 
142 | # Additional templates that should be rendered to pages, maps page names to
143 | # template names.
144 | #html_additional_pages = {}
145 | 
146 | # If false, no module index is generated.
147 | #html_domain_indices = True
148 | 
149 | # If false, no index is generated.
150 | #html_use_index = True
151 | 
152 | # If true, the index is split into individual pages for each letter.
153 | #html_split_index = False
154 | 
155 | # If true, links to the reST sources are added to the pages.
156 | #html_show_sourcelink = True
157 | 
158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
159 | #html_show_sphinx = True
160 | 
161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
162 | #html_show_copyright = True
163 | 
164 | # If true, an OpenSearch description file will be output, and all pages will
165 | # contain a <link> tag referring to it.  The value of this option must be the
166 | # base URL from which the finished HTML is served.
167 | #html_use_opensearch = ''
168 | 
169 | # This is the file name suffix for HTML files (e.g. ".xhtml").
170 | #html_file_suffix = None
171 | 
172 | # Output file base name for HTML help builder.
173 | htmlhelp_basename = 'tinycssdoc'
174 | 
175 | 
176 | # -- Options for LaTeX output --------------------------------------------------
177 | 
178 | latex_elements = {
179 | # The paper size ('letterpaper' or 'a4paper').
180 | #'papersize': 'letterpaper',
181 | 
182 | # The font size ('10pt', '11pt' or '12pt').
183 | #'pointsize': '10pt',
184 | 
185 | # Additional stuff for the LaTeX preamble.
186 | #'preamble': '',
187 | }
188 | 
189 | # Grouping the document tree into LaTeX files. List of tuples
190 | # (source start file, target name, title, author, documentclass [howto/manual]).
191 | latex_documents = [
192 |   ('index', 'tinycss.tex', 'tinycss Documentation',
193 |    'Simon Sapin', 'manual'),
194 | ]
195 | 
196 | # The name of an image file (relative to this directory) to place at the top of
197 | # the title page.
198 | #latex_logo = None
199 | 
200 | # For "manual" documents, if this is true, then toplevel headings are parts,
201 | # not chapters.
202 | #latex_use_parts = False
203 | 
204 | # If true, show page references after internal links.
205 | #latex_show_pagerefs = False
206 | 
207 | # If true, show URL addresses after external links.
208 | #latex_show_urls = False
209 | 
210 | # Documents to append as an appendix to all manuals.
211 | #latex_appendices = []
212 | 
213 | # If false, no module index is generated.
214 | #latex_domain_indices = True
215 | 
216 | 
217 | # -- Options for manual page output --------------------------------------------
218 | 
219 | # One entry per manual page. List of tuples
220 | # (source start file, name, description, authors, manual section).
221 | man_pages = [
222 |     ('index', 'tinycss', 'tinycss Documentation',
223 |      ['Simon Sapin'], 1)
224 | ]
225 | 
226 | # If true, show URL addresses after external links.
227 | #man_show_urls = False
228 | 
229 | 
230 | # -- Options for Texinfo output ------------------------------------------------
231 | 
232 | # Grouping the document tree into Texinfo files. List of tuples
233 | # (source start file, target name, title, author,
234 | #  dir menu entry, description, category)
235 | texinfo_documents = [
236 |   ('index', 'tinycss', 'tinycss Documentation',
237 |    'Simon Sapin', 'tinycss', 'One line description of project.',
238 |    'Miscellaneous'),
239 | ]
240 | 
241 | # Documents to append as an appendix to all manuals.
242 | #texinfo_appendices = []
243 | 
244 | # If false, no module index is generated.
245 | #texinfo_domain_indices = True
246 | 
247 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
248 | #texinfo_show_urls = 'footnote'
249 | 
250 | 
251 | # Example configuration for intersphinx: refer to the Python standard library.
252 | intersphinx_mapping = {'http://docs.python.org/': None}
253 | 


--------------------------------------------------------------------------------
/docs/css3.rst:
--------------------------------------------------------------------------------
  1 | CSS 3 Modules
  2 | =============
  3 | 
  4 | .. _selectors3:
  5 | 
  6 | Selectors 3
  7 | -----------
  8 | 
  9 | .. currentmodule:: tinycss.css21
 10 | 
 11 | On :attr:`RuleSet.selector`, the :meth:`~.token_data.TokenList.as_css` method
 12 | can be used to serialize a selector back to an Unicode string.
 13 | 
 14 |     >>> import tinycss
 15 |     >>> stylesheet = tinycss.make_parser().parse_stylesheet(
 16 |     ...     'div.error, #root > section:first-letter { color: red }')
 17 |     >>> selector_string = stylesheet.rules[0].selector.as_css()
 18 |     >>> selector_string
 19 |     'div.error, #root > section:first-letter'
 20 | 
 21 | This string can be parsed by cssselect_. The parsed objects have information
 22 | about pseudo-elements and selector specificity.
 23 | 
 24 | .. _cssselect: http://packages.python.org/cssselect/
 25 | 
 26 |     >>> import cssselect
 27 |     >>> selectors = cssselect.parse(selector_string)
 28 |     >>> [s.specificity() for s in selectors]
 29 |     [(0, 1, 1), (1, 0, 2)]
 30 |     >>> [s.pseudo_element for s in selectors]
 31 |     [None, 'first-letter']
 32 | 
 33 | These objects can in turn be translated to XPath expressions. Note that
 34 | the translation ignores pseudo-elements, you have to account for them
 35 | somehow or reject selectors with pseudo-elements.
 36 | 
 37 |     >>> xpath = cssselect.HTMLTranslator().selector_to_xpath(selectors[1])
 38 |     >>> xpath
 39 |     "descendant-or-self::*[@id = 'root']/section"
 40 | 
 41 | Finally, the XPath expressions can be used with lxml_ to find the matching
 42 | elements.
 43 | 
 44 |     >>> from lxml import etree
 45 |     >>> compiled_selector = etree.XPath(xpath)
 46 |     >>> document = etree.fromstring('''<section id="root">
 47 |     ...   <section id="head">Title</section>
 48 |     ...   <section id="content">
 49 |     ...     Lorem <section id="sub-section">ipsum</section>
 50 |     ...   </section>
 51 |     ... </section>''')
 52 |     >>> [el.get('id') for el in compiled_selector(document)]
 53 |     ['head', 'content']
 54 | 
 55 | .. _lxml: http://lxml.de/xpathxslt.html#xpath
 56 | 
 57 | Find more details in the `cssselect documentation`_.
 58 | 
 59 | .. _cssselect documentation: http://packages.python.org/cssselect/
 60 | 
 61 | 
 62 | .. module:: tinycss.color3
 63 | 
 64 | Color 3
 65 | -------
 66 | 
 67 | This module implements parsing for the *<color>* values, as defined in
 68 | `CSS 3 Color <http://www.w3.org/TR/css3-color/>`_.
 69 | 
 70 | The (deprecated) CSS2 system colors are not supported, but you can
 71 | easily test for them if you want as they are simple ``IDENT`` tokens.
 72 | For example::
 73 | 
 74 |     if token.type == 'IDENT' and token.value == 'ButtonText':
 75 |         return ...
 76 | 
 77 | All other values types *are* supported:
 78 | 
 79 | * Basic, extended (X11) and transparent color keywords;
 80 | * 3-digit and 6-digit hexadecimal notations;
 81 | * ``rgb()``, ``rgba()``, ``hsl()`` and ``hsla()`` functional notations.
 82 | * ``currentColor``
 83 | 
 84 | This module does not integrate with a parser class. Instead, it provides
 85 | a function that can parse tokens as found in :attr:`.css21.Declaration.value`,
 86 | for example.
 87 | 
 88 | .. autofunction:: parse_color
 89 | .. autofunction:: parse_color_string
 90 | .. autoclass:: RGBA
 91 | 
 92 | 
 93 | .. module:: tinycss.page3
 94 | 
 95 | Paged Media 3
 96 | -------------
 97 | 
 98 | .. autoclass:: CSSPage3Parser
 99 | .. autoclass:: MarginRule
100 | 
101 | 
102 | .. module:: tinycss.fonts3
103 | 
104 | Fonts 3
105 | -------
106 | 
107 | .. autoclass:: CSSFonts3Parser
108 | .. autoclass:: FontFaceRule
109 | .. autoclass:: FontFeatureValuesRule
110 | .. autoclass:: FontFeatureRule
111 | 
112 | 
113 | Other CSS modules
114 | -----------------
115 | 
116 | To add support for new CSS syntax, see :ref:`extending`.
117 | 


--------------------------------------------------------------------------------
/docs/extending.rst:
--------------------------------------------------------------------------------
 1 | .. _extending:
 2 | 
 3 | Extending the parser
 4 | ====================
 5 | 
 6 | Modules such as :mod:`.page3` extend the CSS 2.1 parser to add support for
 7 | CSS 3 syntax.
 8 | They do so by sub-classing :class:`.css21.CSS21Parser` and overriding/extending
 9 | some of its methods. If fact, the parser is made of methods in a class
10 | (rather than a set of functions) solely to enable this kind of sub-classing.
11 | 
12 | tinycss is designed to enable you to have parser subclasses outside of
13 | tinycss, without monkey-patching. If however the syntax you added is for a
14 | W3C specification, consider including your subclass in a new tinycss module
15 | and send a pull request: see :ref:`hacking`.
16 | 
17 | 
18 | .. currentmodule:: tinycss.css21
19 | 
20 | Example: star hack
21 | ------------------
22 | 
23 | .. _star hack: https://en.wikipedia.org/wiki/CSS_filter#Star_hack
24 | 
25 | The `star hack`_ uses invalid declarations that are only parsed by some
26 | versions of Internet Explorer. By default, tinycss ignores invalid
27 | declarations and logs an error.
28 | 
29 |     >>> from tinycss.css21 import CSS21Parser
30 |     >>> css = '#elem { width: [W3C Model Width]; *width: [BorderBox Model]; }'
31 |     >>> stylesheet = CSS21Parser().parse_stylesheet(css)
32 |     >>> stylesheet.errors
33 |     [ParseError('Parse error at 1:35, expected a property name, got DELIM',)]
34 |     >>> [decl.name for decl in stylesheet.rules[0].declarations]
35 |     ['width']
36 | 
37 | If for example a minifier based on tinycss wants to support the star hack,
38 | it can by extending the parser::
39 | 
40 |     >>> class CSSStarHackParser(CSS21Parser):
41 |     ...     def parse_declaration(self, tokens):
42 |     ...         has_star_hack = (tokens[0].type == 'DELIM' and tokens[0].value == '*')
43 |     ...         if has_star_hack:
44 |     ...             tokens = tokens[1:]
45 |     ...         declaration = super(CSSStarHackParser, self).parse_declaration(tokens)
46 |     ...         declaration.has_star_hack = has_star_hack
47 |     ...         return declaration
48 |     ...
49 |     >>> stylesheet = CSSStarHackParser().parse_stylesheet(css)
50 |     >>> stylesheet.errors
51 |     []
52 |     >>> [(d.name, d.has_star_hack) for d in stylesheet.rules[0].declarations]
53 |     [('width', False), ('width', True)]
54 | 
55 | This class extends the :meth:`~CSS21Parser.parse_declaration` method.
56 | It removes any ``*`` delimeter :class:`~.token_data.Token` at the start of
57 | a declaration, and adds a ``has_star_hack`` boolean attribute on parsed
58 | :class:`Declaration` objects: ``True`` if a ``*`` was removed, ``False`` for
59 | “normal” declarations.
60 | 
61 | 
62 | Parser methods
63 | --------------
64 | 
65 | In addition to methods of the user API (see :ref:`parsing`), here
66 | are the methods of the CSS 2.1 parser that can be overriden or extended:
67 | 
68 | .. automethod:: CSS21Parser.parse_rules
69 | .. automethod:: CSS21Parser.read_at_rule
70 | .. automethod:: CSS21Parser.parse_at_rule
71 | .. automethod:: CSS21Parser.parse_media
72 | .. automethod:: CSS21Parser.parse_page_selector
73 | .. automethod:: CSS21Parser.parse_declarations_and_at_rules
74 | .. automethod:: CSS21Parser.parse_ruleset
75 | .. automethod:: CSS21Parser.parse_declaration_list
76 | .. automethod:: CSS21Parser.parse_declaration
77 | .. automethod:: CSS21Parser.parse_value_priority
78 | 
79 | Unparsed at-rules
80 | -----------------
81 | 
82 | .. autoclass:: AtRule
83 | 
84 | 
85 | .. module:: tinycss.parsing
86 | 
87 | Parsing helper functions
88 | ------------------------
89 | 
90 | The :mod:`tinycss.parsing` module contains helper functions for parsing
91 | tokens into a more structured form:
92 | 
93 | .. autofunction:: strip_whitespace
94 | .. autofunction:: split_on_comma
95 | .. autofunction:: validate_value
96 | .. autofunction:: validate_block
97 | .. autofunction:: validate_any
98 | 


--------------------------------------------------------------------------------
/docs/hacking.rst:
--------------------------------------------------------------------------------
  1 | .. _hacking:
  2 | 
  3 | Hacking tinycss
  4 | ===============
  5 | 
  6 | .. highlight:: sh
  7 | 
  8 | Bugs and feature requests
  9 | -------------------------
 10 | 
 11 | Bug reports, feature requests and other issues should got to the
 12 | `tinycss issue tracker`_ on Github. Any suggestion or feedback is welcome.
 13 | Please include in full any error message, trackback or other detail that
 14 | could be helpful.
 15 | 
 16 | .. _tinycss issue tracker: https://github.com/SimonSapin/tinycss/issues
 17 | 
 18 | 
 19 | Installing the development version
 20 | ----------------------------------
 21 | 
 22 | First, get the latest git version::
 23 | 
 24 |     git clone https://github.com/SimonSapin/tinycss.git
 25 |     cd tinycss
 26 | 
 27 | You will need Cython_ and pytest_. Installing in a virtualenv_ is recommended::
 28 | 
 29 |     virtualenv env
 30 |     . env/bin/activate
 31 |     pip install Cython pytest
 32 | 
 33 | .. _Cython: http://cython.org/
 34 | .. _pytest: http://pytest.org/
 35 | .. _virtualenv: http://www.virtualenv.org/
 36 | 
 37 | Then, install tinycss in-place with pip’s *editable mode*. This will also
 38 | build the accelerators::
 39 | 
 40 |     pip install -e .
 41 | 
 42 | 
 43 | Running the test suite
 44 | ----------------------
 45 | 
 46 | Once you have everything installed (see above), just run pytest from the
 47 | *tinycss* directory::
 48 | 
 49 |     py.test
 50 | 
 51 | If the accelerators are not available for some reason, use the
 52 | ``TINYCSS_SKIP_SPEEDUPS_TESTS`` environment variable::
 53 | 
 54 |     TINYCSS_SKIP_SPEEDUPS_TESTS=1 py.test
 55 | 
 56 | If you get test failures on a fresh git clone, something may have gone wrong
 57 | during the installation. Otherwise, you probably found a bug. Please
 58 | `report it <#bugs-and-feature-requests>`_.
 59 | 
 60 | 
 61 | Test in multiple Python versions with tox
 62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 63 | 
 64 | tox_ automatically creates virtualenvs for various Python versions and
 65 | runs the test suite there::
 66 | 
 67 |     pip install tox
 68 | 
 69 | Change to the project’s root directory and just run::
 70 | 
 71 |     tox
 72 | 
 73 | .. _tox: http://tox.testrun.org/
 74 | 
 75 | tinycss comes with a pre-configured ``tox.ini`` file to test in CPython
 76 | 2.6, 2.7, 3.1 and 3.2 as well as PyPy. You can change that with the ``-e``
 77 | parameter::
 78 | 
 79 |     tox -e py27,py32
 80 | 
 81 | If you use ``--`` in the arguments passed to tox, further arguments
 82 | are passed to the underlying ``py.test`` command::
 83 | 
 84 |     tox -- -x --pdb
 85 | 
 86 | 
 87 | Building the documentation
 88 | --------------------------
 89 | 
 90 | This documentation is made with Sphinx_::
 91 | 
 92 |     pip install Sphinx
 93 | 
 94 | .. _Sphinx: http://sphinx.pocoo.org/
 95 | 
 96 | To build the HTML version of the documentation, change to the project’s root
 97 | directory and run::
 98 | 
 99 |     python setup.py build_sphinx
100 | 
101 | The built HTML files are in ``docs/_build/html``.
102 | 
103 | 
104 | Making a patch and a pull request
105 | ---------------------------------
106 | 
107 | If you would like to see something included in tinycss, please fork
108 | `the repository <https://github.com/SimonSapin/tinycss/>`_ on Github
109 | and make a pull request. Make sure to include tests for your change.
110 | 
111 | 
112 | Mailing-list
113 | ------------
114 | 
115 | tinycss does not have a mailing-list of its own for now, but the
116 | `WeasyPrint mailing-list <http://weasyprint.org/community/>`_
117 | is appropriate to discuss it.
118 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../README.rst
 2 | 
 3 | 
 4 | Requirements
 5 | ------------
 6 | 
 7 | `tinycss is tested <https://travis-ci.org/Kozea/tinycss>`_ on CPython 2.7, 3.3,
 8 | 3.4 and 3.5 as well as PyPy 5.3 and PyPy3 2.4; it should work on any
 9 | implementation of **Python 2.7 or later version (including 3.x)** of the
10 | language.
11 | 
12 | Cython_ is used for optional accelerators but is only required for
13 | development versions on tinycss.
14 | 
15 | .. _Cython: http://cython.org/
16 | 
17 | 
18 | Installation
19 | ------------
20 | 
21 | Installing with `pip <http://www.pip-installer.org/>`_ should Just Work:
22 | 
23 | .. code-block:: sh
24 | 
25 |     pip install tinycss
26 | 
27 | The release tarballs contain pre-*cythoned* C files for the accelerators:
28 | you will not need Cython to install like this.
29 | If the accelerators fail to build for some reason, tinycss will
30 | print a warning and fall back to a pure-Python installation.
31 | 
32 | 
33 | Documentation
34 | -------------
35 | 
36 | .. Have this page in the sidebar, but do not show a link to itself here:
37 | 
38 | .. toctree::
39 |     :hidden:
40 | 
41 |     self
42 | 
43 | .. toctree::
44 |     :maxdepth: 2
45 | 
46 |     parsing
47 |     css3
48 |     extending
49 |     hacking
50 |     changelog
51 | 


--------------------------------------------------------------------------------
/docs/parsing.rst:
--------------------------------------------------------------------------------
 1 | Parsing with tinycss
 2 | ====================
 3 | 
 4 | .. highlight:: python
 5 | 
 6 | Quickstart
 7 | ----------
 8 | 
 9 | Import *tinycss*, make a parser object with the features you want,
10 | and parse a stylesheet:
11 | 
12 | .. doctest::
13 | 
14 |     >>> import tinycss
15 |     >>> parser = tinycss.make_parser('page3')
16 |     >>> stylesheet = parser.parse_stylesheet_bytes(b'''@import "foo.css";
17 |     ...     p.error { color: red }  @lorem-ipsum;
18 |     ...     @page tables { size: landscape }''')
19 |     >>> stylesheet.rules
20 |     [<ImportRule 1:1 foo.css>, <RuleSet at 2:5 p.error>, <PageRule 3:5 ('tables', None)>]
21 |     >>> stylesheet.errors
22 |     [ParseError('Parse error at 2:29, unknown at-rule in stylesheet context: @lorem-ipsum',)]
23 | 
24 | You’ll get a :class:`~tinycss.css21.Stylesheet` object which contains
25 | all the parsed content as well as a list of encountered errors.
26 | 
27 | 
28 | Parsers
29 | -------
30 | 
31 | Parsers are subclasses of :class:`tinycss.css21.CSS21Parser`. Various
32 | subclasses add support for more syntax. You can choose which features to
33 | enable by making a new parser class with multiple inheritance, but there
34 | is also a convenience function to do that:
35 | 
36 | .. module:: tinycss
37 | 
38 | .. autofunction:: make_parser
39 | 
40 | 
41 | .. module:: tinycss.css21
42 | .. _parsing:
43 | 
44 | Parsing a stylesheet
45 | ~~~~~~~~~~~~~~~~~~~~
46 | 
47 | Parser classes have three different methods to parse CSS stylesheet,
48 | depending on whether you have a file, a byte string, or an Unicode string.
49 | 
50 | .. autoclass:: CSS21Parser
51 |     :members: parse_stylesheet_file, parse_stylesheet_bytes, parse_stylesheet
52 | 
53 | 
54 | Parsing a ``style`` attribute
55 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
56 | 
57 | .. automethod:: CSS21Parser.parse_style_attr
58 | 
59 | 
60 | Parsed objects
61 | --------------
62 | 
63 | These data structures make up the results of the various parsing methods.
64 | 
65 | .. autoclass:: tinycss.parsing.ParseError()
66 | .. autoclass:: Stylesheet()
67 | 
68 | .. note::
69 |     All subsequent objects have :obj:`line` and :obj:`column` attributes (not
70 |     repeated every time fore brevity) that indicate where in the CSS source
71 |     this object was read.
72 | 
73 | .. autoclass:: RuleSet()
74 | .. autoclass:: ImportRule()
75 | .. autoclass:: MediaRule()
76 | .. autoclass:: PageRule()
77 | .. autoclass:: Declaration()
78 | 
79 | 
80 | Tokens
81 | ------
82 | 
83 | Some parts of a stylesheet (such as selectors in CSS 2.1 or property values)
84 | are not parsed by tinycss. They appear as tokens instead.
85 | 
86 | .. module:: tinycss.token_data
87 | 
88 | .. autoclass:: TokenList()
89 |     :member-order: bysource
90 |     :members:
91 | .. autoclass:: Token()
92 |     :members:
93 | .. autoclass:: tinycss.speedups.CToken()
94 | .. autoclass:: ContainerToken()
95 |     :members:
96 | 
97 | .. autoclass:: FunctionToken()
98 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [build_sphinx]
 2 | source-dir = docs
 3 | build-dir  = docs/_build
 4 | #all_files  = 1
 5 | 
 6 | [upload_sphinx] # Sphinx-PyPI-upload
 7 | upload-dir = docs/_build/html
 8 | 
 9 | [aliases]
10 | test = pytest
11 | 
12 | [tool:pytest]
13 | addopts = --flake8 --isort --cov --ignore=test/cairosvg_reference
14 | norecursedirs = dist .cache .git build *.egg-info .eggs venv cairosvg_reference
15 | flake8-ignore = docs/conf.py ALL
16 | isort_ignore =
17 |   docs/conf.py
18 |   setup.py
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import re
  3 | import sys
  4 | from distutils.errors import (
  5 |     CCompilerError, DistutilsExecError, DistutilsPlatformError)
  6 | from setuptools import Extension, setup
  7 | 
  8 | try:
  9 |     from Cython.Distutils import build_ext
 10 |     import Cython.Compiler.Version
 11 |     CYTHON_INSTALLED = True
 12 | except ImportError:
 13 |     from distutils.command.build_ext import build_ext
 14 |     CYTHON_INSTALLED = False
 15 | 
 16 | 
 17 | ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
 18 | if sys.platform == 'win32' and sys.version_info > (2, 6):
 19 |     # 2.6's distutils.msvc9compiler can raise an IOError when failing to
 20 |     # find the compiler
 21 |     ext_errors += (IOError,)
 22 | 
 23 | 
 24 | class BuildFailed(Exception):
 25 |     pass
 26 | 
 27 | 
 28 | class ve_build_ext(build_ext):
 29 |     # This class allows C extension building to fail.
 30 | 
 31 |     def run(self):
 32 |         try:
 33 |             build_ext.run(self)
 34 |         except DistutilsPlatformError:
 35 |             raise BuildFailed
 36 | 
 37 |     def build_extension(self, ext):
 38 |         try:
 39 |             build_ext.build_extension(self, ext)
 40 |         except ext_errors:
 41 |             raise BuildFailed
 42 | 
 43 | 
 44 | ROOT = os.path.dirname(__file__)
 45 | with open(os.path.join(ROOT, 'tinycss', 'version.py')) as fd:
 46 |     VERSION = re.search("VERSION = '([^']+)'", fd.read()).group(1)
 47 | 
 48 | with open(os.path.join(ROOT, 'README.rst'), 'rb') as fd:
 49 |     README = fd.read().decode('utf8')
 50 | 
 51 | 
 52 | needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
 53 | pytest_runner = ['pytest-runner'] if needs_pytest else []
 54 | 
 55 | 
 56 | def run_setup(with_extension):
 57 |     if with_extension:
 58 |         extension_path = os.path.join('tinycss', 'speedups')
 59 |         if CYTHON_INSTALLED:
 60 |             extension_path += '.pyx'
 61 |             print('Building with Cython %s.' % Cython.Compiler.Version.version)
 62 |         else:
 63 |             extension_path += '.c'
 64 |             if not os.path.exists(extension_path):
 65 |                 print("WARNING: Trying to build without Cython, but "
 66 |                       "pre-generated '%s' does not seem to be available."
 67 |                       % extension_path)
 68 |             else:
 69 |                 print('Building without Cython.')
 70 |         kwargs = dict(
 71 |             cmdclass=dict(build_ext=ve_build_ext),
 72 |             ext_modules=[Extension('tinycss.speedups',
 73 |                                    sources=[extension_path])],
 74 |         )
 75 |     else:
 76 |         kwargs = dict()
 77 | 
 78 |     setup(
 79 |         name='tinycss',
 80 |         version=VERSION,
 81 |         url='http://tinycss.readthedocs.io/',
 82 |         license='BSD',
 83 |         author='Simon Sapin',
 84 |         author_email='simon.sapin@exyr.org',
 85 |         description='tinycss is a complete yet simple CSS parser for Python.',
 86 |         long_description=README,
 87 |         classifiers=[
 88 |             'Development Status :: 4 - Beta',
 89 |             'Intended Audience :: Developers',
 90 |             'License :: OSI Approved :: BSD License',
 91 |             'Programming Language :: Python :: 2',
 92 |             'Programming Language :: Python :: 2.7',
 93 |             'Programming Language :: Python :: 3',
 94 |             'Programming Language :: Python :: 3.3',
 95 |             'Programming Language :: Python :: 3.4',
 96 |             'Programming Language :: Python :: 3.5',
 97 |             'Programming Language :: Python :: Implementation :: CPython',
 98 |             'Programming Language :: Python :: Implementation :: PyPy',
 99 |         ],
100 |         setup_requires=pytest_runner,
101 |         tests_require=[
102 |             'pytest-cov', 'pytest-flake8', 'pytest-isort', 'pytest-runner'],
103 |         extras_require={'test': (
104 |             'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort')},
105 |         packages=['tinycss', 'tinycss.tests'],
106 |         **kwargs
107 |     )
108 | 
109 | 
110 | IS_PYPY = hasattr(sys, 'pypy_translation_info')
111 | try:
112 |     run_setup(not IS_PYPY)
113 | except BuildFailed:
114 |     BUILD_EXT_WARNING = ('WARNING: The extension could not be compiled, '
115 |                          'speedups are not enabled.')
116 |     print('*' * 75)
117 |     print(BUILD_EXT_WARNING)
118 |     print('Failure information, if any, is above.')
119 |     print('Retrying the build without the Cython extension now.')
120 |     print('*' * 75)
121 | 
122 |     run_setup(False)
123 | 
124 |     print('*' * 75)
125 |     print(BUILD_EXT_WARNING)
126 |     print('Plain-Python installation succeeded.')
127 |     print('*' * 75)
128 | 


--------------------------------------------------------------------------------
/tinycss/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 |     tinycss
 4 |     -------
 5 | 
 6 |     A CSS parser, and nothing else.
 7 | 
 8 |     :copyright: (c) 2012 by Simon Sapin.
 9 |     :license: BSD, see LICENSE for more details.
10 | """
11 | 
12 | from .version import VERSION
13 | 
14 | from .css21 import CSS21Parser
15 | from .page3 import CSSPage3Parser
16 | from .fonts3 import CSSFonts3Parser
17 | 
18 | 
19 | __version__ = VERSION
20 | 
21 | PARSER_MODULES = {
22 |     'page3': CSSPage3Parser,
23 |     'fonts3': CSSFonts3Parser,
24 | }
25 | 
26 | 
27 | def make_parser(*features, **kwargs):
28 |     """Make a parser object with the chosen features.
29 | 
30 |     :param features:
31 |         Positional arguments are base classes the new parser class will extend.
32 |         The string ``'page3'`` is accepted as short for
33 |         :class:`~page3.CSSPage3Parser`.
34 |         The string ``'fonts3'`` is accepted as short for
35 |         :class:`~fonts3.CSSFonts3Parser`.
36 |     :param kwargs:
37 |         Keyword arguments are passed to the parser’s constructor.
38 |     :returns:
39 |         An instance of a new subclass of :class:`CSS21Parser`
40 | 
41 |     """
42 |     if features:
43 |         bases = tuple(PARSER_MODULES.get(f, f) for f in features)
44 |         parser_class = type('CustomCSSParser', bases + (CSS21Parser,), {})
45 |     else:
46 |         parser_class = CSS21Parser
47 |     return parser_class(**kwargs)
48 | 


--------------------------------------------------------------------------------
/tinycss/color3.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.colors3
  4 |     ---------------
  5 | 
  6 |     Parser for CSS 3 color values
  7 |     http://www.w3.org/TR/css3-color/
  8 | 
  9 |     This module does not provide anything that integrates in a parser class,
 10 |     only functions that parse single tokens from (eg.) a property value.
 11 | 
 12 |     :copyright: (c) 2012 by Simon Sapin.
 13 |     :license: BSD, see LICENSE for more details.
 14 | """
 15 | 
 16 | from __future__ import division, unicode_literals
 17 | 
 18 | import collections
 19 | import itertools
 20 | import re
 21 | 
 22 | from .tokenizer import tokenize_grouped
 23 | 
 24 | 
 25 | class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])):
 26 |     """An RGBA color.
 27 | 
 28 |     A tuple of four floats in the 0..1 range: ``(r, g, b, a)``.
 29 |     Also has ``red``, ``green``, ``blue`` and ``alpha`` attributes to access
 30 |     the same values.
 31 | 
 32 |     """
 33 | 
 34 | 
 35 | def parse_color_string(css_string):
 36 |     """Parse a CSS string as a color value.
 37 | 
 38 |     This is a convenience wrapper around :func:`parse_color` in case you
 39 |     have a string that is not from a CSS stylesheet.
 40 | 
 41 |     :param css_string:
 42 |         An unicode string in CSS syntax.
 43 |     :returns:
 44 |         Same as :func:`parse_color`.
 45 | 
 46 |     """
 47 |     tokens = list(tokenize_grouped(css_string.strip()))
 48 |     if len(tokens) == 1:
 49 |         return parse_color(tokens[0])
 50 | 
 51 | 
 52 | def parse_color(token):
 53 |     """Parse single token as a color value.
 54 | 
 55 |     :param token:
 56 |         A single :class:`~.token_data.Token` or
 57 |         :class:`~.token_data.ContainerToken`, as found eg. in a
 58 |         property value.
 59 |     :returns:
 60 |         * ``None``, if the token is not a valid CSS 3 color value.
 61 |           (No exception is raised.)
 62 |         * For the *currentColor* keyword: the string ``'currentColor'``
 63 |         * Every other values (including keywords, HSL and HSLA) is converted
 64 |           to RGBA and returned as an :class:`RGBA` object (a 4-tuple with
 65 |           attribute access).
 66 |           The alpha channel is clipped to [0, 1], but R, G, or B can be
 67 |           out of range (eg. ``rgb(-51, 306, 0)`` is represented as
 68 |           ``(-.2, 1.2, 0, 1)``.)
 69 | 
 70 |     """
 71 |     if token.type == 'IDENT':
 72 |         return COLOR_KEYWORDS.get(token.value.lower())
 73 |     elif token.type == 'HASH':
 74 |         for multiplier, regexp in HASH_REGEXPS:
 75 |             match = regexp(token.value)
 76 |             if match:
 77 |                 r, g, b = [int(group * multiplier, 16) / 255
 78 |                            for group in match.groups()]
 79 |                 return RGBA(r, g, b, 1.)
 80 |     elif token.type == 'FUNCTION':
 81 |         args = parse_comma_separated(token.content)
 82 |         if args:
 83 |             name = token.function_name.lower()
 84 |             if name == 'rgb':
 85 |                 return parse_rgb(args, alpha=1.)
 86 |             elif name == 'rgba':
 87 |                 alpha = parse_alpha(args[3:])
 88 |                 if alpha is not None:
 89 |                     return parse_rgb(args[:3], alpha)
 90 |             elif name == 'hsl':
 91 |                 return parse_hsl(args, alpha=1.)
 92 |             elif name == 'hsla':
 93 |                 alpha = parse_alpha(args[3:])
 94 |                 if alpha is not None:
 95 |                     return parse_hsl(args[:3], alpha)
 96 | 
 97 | 
 98 | def parse_alpha(args):
 99 |     """
100 |     If args is a list of a single INTEGER or NUMBER token,
101 |     retur its value clipped to the 0..1 range
102 |     Otherwise, return None.
103 |     """
104 |     if len(args) == 1 and args[0].type in ('NUMBER', 'INTEGER'):
105 |         return min(1, max(0, args[0].value))
106 | 
107 | 
108 | def parse_rgb(args, alpha):
109 |     """
110 |     If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens,
111 |     return RGB values as a tuple of 3 floats in 0..1.
112 |     Otherwise, return None.
113 |     """
114 |     types = [arg.type for arg in args]
115 |     if types == ['INTEGER', 'INTEGER', 'INTEGER']:
116 |         r, g, b = [arg.value / 255 for arg in args[:3]]
117 |         return RGBA(r, g, b, alpha)
118 |     elif types == ['PERCENTAGE', 'PERCENTAGE', 'PERCENTAGE']:
119 |         r, g, b = [arg.value / 100 for arg in args[:3]]
120 |         return RGBA(r, g, b, alpha)
121 | 
122 | 
123 | def parse_hsl(args, alpha):
124 |     """
125 |     If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens,
126 |     return RGB values as a tuple of 3 floats in 0..1.
127 |     Otherwise, return None.
128 |     """
129 |     types = [arg.type for arg in args]
130 |     if types == ['INTEGER', 'PERCENTAGE', 'PERCENTAGE']:
131 |         hsl = [arg.value for arg in args[:3]]
132 |         r, g, b = hsl_to_rgb(*hsl)
133 |         return RGBA(r, g, b, alpha)
134 | 
135 | 
136 | def hsl_to_rgb(hue, saturation, lightness):
137 |     """
138 |     :param hue: degrees
139 |     :param saturation: percentage
140 |     :param lightness: percentage
141 |     :returns: (r, g, b) as floats in the 0..1 range
142 |     """
143 |     hue = (hue / 360) % 1
144 |     saturation = min(1, max(0, saturation / 100))
145 |     lightness = min(1, max(0, lightness / 100))
146 | 
147 |     # Translated from ABC: http://www.w3.org/TR/css3-color/#hsl-color
148 |     def hue_to_rgb(m1, m2, h):
149 |         if h < 0:
150 |             h += 1
151 |         if h > 1:
152 |             h -= 1
153 |         if h * 6 < 1:
154 |             return m1 + (m2 - m1) * h * 6
155 |         if h * 2 < 1:
156 |             return m2
157 |         if h * 3 < 2:
158 |             return m1 + (m2 - m1) * (2 / 3 - h) * 6
159 |         return m1
160 | 
161 |     if lightness <= 0.5:
162 |         m2 = lightness * (saturation + 1)
163 |     else:
164 |         m2 = lightness + saturation - lightness * saturation
165 |     m1 = lightness * 2 - m2
166 |     return (
167 |         hue_to_rgb(m1, m2, hue + 1 / 3),
168 |         hue_to_rgb(m1, m2, hue),
169 |         hue_to_rgb(m1, m2, hue - 1 / 3),
170 |     )
171 | 
172 | 
173 | def parse_comma_separated(tokens):
174 |     """Parse a list of tokens (typically the content of a function token)
175 |     as arguments made of a single token each, separated by mandatory commas,
176 |     with optional white space around each argument.
177 | 
178 |     return the argument list without commas or white space;
179 |     or None if the function token content do not match the description above.
180 | 
181 |     """
182 |     tokens = [token for token in tokens if token.type != 'S']
183 |     if not tokens:
184 |         return []
185 |     if len(tokens) % 2 == 1 and all(
186 |             token.type == 'DELIM' and token.value == ','
187 |             for token in tokens[1::2]):
188 |         return tokens[::2]
189 | 
190 | 
191 | HASH_REGEXPS = (
192 |     (2, re.compile('^#([\da-f])([\da-f])([\da-f])$', re.I).match),
193 |     (1, re.compile('^#([\da-f]{2})([\da-f]{2})([\da-f]{2})$', re.I).match),
194 | )
195 | 
196 | 
197 | # (r, g, b) in 0..255
198 | BASIC_COLOR_KEYWORDS = [
199 |     ('black', (0, 0, 0)),
200 |     ('silver', (192, 192, 192)),
201 |     ('gray', (128, 128, 128)),
202 |     ('white', (255, 255, 255)),
203 |     ('maroon', (128, 0, 0)),
204 |     ('red', (255, 0, 0)),
205 |     ('purple', (128, 0, 128)),
206 |     ('fuchsia', (255, 0, 255)),
207 |     ('green', (0, 128, 0)),
208 |     ('lime', (0, 255, 0)),
209 |     ('olive', (128, 128, 0)),
210 |     ('yellow', (255, 255, 0)),
211 |     ('navy', (0, 0, 128)),
212 |     ('blue', (0, 0, 255)),
213 |     ('teal', (0, 128, 128)),
214 |     ('aqua', (0, 255, 255)),
215 | ]
216 | 
217 | 
218 | # (r, g, b) in 0..255
219 | EXTENDED_COLOR_KEYWORDS = [
220 |     ('aliceblue', (240, 248, 255)),
221 |     ('antiquewhite', (250, 235, 215)),
222 |     ('aqua', (0, 255, 255)),
223 |     ('aquamarine', (127, 255, 212)),
224 |     ('azure', (240, 255, 255)),
225 |     ('beige', (245, 245, 220)),
226 |     ('bisque', (255, 228, 196)),
227 |     ('black', (0, 0, 0)),
228 |     ('blanchedalmond', (255, 235, 205)),
229 |     ('blue', (0, 0, 255)),
230 |     ('blueviolet', (138, 43, 226)),
231 |     ('brown', (165, 42, 42)),
232 |     ('burlywood', (222, 184, 135)),
233 |     ('cadetblue', (95, 158, 160)),
234 |     ('chartreuse', (127, 255, 0)),
235 |     ('chocolate', (210, 105, 30)),
236 |     ('coral', (255, 127, 80)),
237 |     ('cornflowerblue', (100, 149, 237)),
238 |     ('cornsilk', (255, 248, 220)),
239 |     ('crimson', (220, 20, 60)),
240 |     ('cyan', (0, 255, 255)),
241 |     ('darkblue', (0, 0, 139)),
242 |     ('darkcyan', (0, 139, 139)),
243 |     ('darkgoldenrod', (184, 134, 11)),
244 |     ('darkgray', (169, 169, 169)),
245 |     ('darkgreen', (0, 100, 0)),
246 |     ('darkgrey', (169, 169, 169)),
247 |     ('darkkhaki', (189, 183, 107)),
248 |     ('darkmagenta', (139, 0, 139)),
249 |     ('darkolivegreen', (85, 107, 47)),
250 |     ('darkorange', (255, 140, 0)),
251 |     ('darkorchid', (153, 50, 204)),
252 |     ('darkred', (139, 0, 0)),
253 |     ('darksalmon', (233, 150, 122)),
254 |     ('darkseagreen', (143, 188, 143)),
255 |     ('darkslateblue', (72, 61, 139)),
256 |     ('darkslategray', (47, 79, 79)),
257 |     ('darkslategrey', (47, 79, 79)),
258 |     ('darkturquoise', (0, 206, 209)),
259 |     ('darkviolet', (148, 0, 211)),
260 |     ('deeppink', (255, 20, 147)),
261 |     ('deepskyblue', (0, 191, 255)),
262 |     ('dimgray', (105, 105, 105)),
263 |     ('dimgrey', (105, 105, 105)),
264 |     ('dodgerblue', (30, 144, 255)),
265 |     ('firebrick', (178, 34, 34)),
266 |     ('floralwhite', (255, 250, 240)),
267 |     ('forestgreen', (34, 139, 34)),
268 |     ('fuchsia', (255, 0, 255)),
269 |     ('gainsboro', (220, 220, 220)),
270 |     ('ghostwhite', (248, 248, 255)),
271 |     ('gold', (255, 215, 0)),
272 |     ('goldenrod', (218, 165, 32)),
273 |     ('gray', (128, 128, 128)),
274 |     ('green', (0, 128, 0)),
275 |     ('greenyellow', (173, 255, 47)),
276 |     ('grey', (128, 128, 128)),
277 |     ('honeydew', (240, 255, 240)),
278 |     ('hotpink', (255, 105, 180)),
279 |     ('indianred', (205, 92, 92)),
280 |     ('indigo', (75, 0, 130)),
281 |     ('ivory', (255, 255, 240)),
282 |     ('khaki', (240, 230, 140)),
283 |     ('lavender', (230, 230, 250)),
284 |     ('lavenderblush', (255, 240, 245)),
285 |     ('lawngreen', (124, 252, 0)),
286 |     ('lemonchiffon', (255, 250, 205)),
287 |     ('lightblue', (173, 216, 230)),
288 |     ('lightcoral', (240, 128, 128)),
289 |     ('lightcyan', (224, 255, 255)),
290 |     ('lightgoldenrodyellow', (250, 250, 210)),
291 |     ('lightgray', (211, 211, 211)),
292 |     ('lightgreen', (144, 238, 144)),
293 |     ('lightgrey', (211, 211, 211)),
294 |     ('lightpink', (255, 182, 193)),
295 |     ('lightsalmon', (255, 160, 122)),
296 |     ('lightseagreen', (32, 178, 170)),
297 |     ('lightskyblue', (135, 206, 250)),
298 |     ('lightslategray', (119, 136, 153)),
299 |     ('lightslategrey', (119, 136, 153)),
300 |     ('lightsteelblue', (176, 196, 222)),
301 |     ('lightyellow', (255, 255, 224)),
302 |     ('lime', (0, 255, 0)),
303 |     ('limegreen', (50, 205, 50)),
304 |     ('linen', (250, 240, 230)),
305 |     ('magenta', (255, 0, 255)),
306 |     ('maroon', (128, 0, 0)),
307 |     ('mediumaquamarine', (102, 205, 170)),
308 |     ('mediumblue', (0, 0, 205)),
309 |     ('mediumorchid', (186, 85, 211)),
310 |     ('mediumpurple', (147, 112, 219)),
311 |     ('mediumseagreen', (60, 179, 113)),
312 |     ('mediumslateblue', (123, 104, 238)),
313 |     ('mediumspringgreen', (0, 250, 154)),
314 |     ('mediumturquoise', (72, 209, 204)),
315 |     ('mediumvioletred', (199, 21, 133)),
316 |     ('midnightblue', (25, 25, 112)),
317 |     ('mintcream', (245, 255, 250)),
318 |     ('mistyrose', (255, 228, 225)),
319 |     ('moccasin', (255, 228, 181)),
320 |     ('navajowhite', (255, 222, 173)),
321 |     ('navy', (0, 0, 128)),
322 |     ('oldlace', (253, 245, 230)),
323 |     ('olive', (128, 128, 0)),
324 |     ('olivedrab', (107, 142, 35)),
325 |     ('orange', (255, 165, 0)),
326 |     ('orangered', (255, 69, 0)),
327 |     ('orchid', (218, 112, 214)),
328 |     ('palegoldenrod', (238, 232, 170)),
329 |     ('palegreen', (152, 251, 152)),
330 |     ('paleturquoise', (175, 238, 238)),
331 |     ('palevioletred', (219, 112, 147)),
332 |     ('papayawhip', (255, 239, 213)),
333 |     ('peachpuff', (255, 218, 185)),
334 |     ('peru', (205, 133, 63)),
335 |     ('pink', (255, 192, 203)),
336 |     ('plum', (221, 160, 221)),
337 |     ('powderblue', (176, 224, 230)),
338 |     ('purple', (128, 0, 128)),
339 |     ('red', (255, 0, 0)),
340 |     ('rosybrown', (188, 143, 143)),
341 |     ('royalblue', (65, 105, 225)),
342 |     ('saddlebrown', (139, 69, 19)),
343 |     ('salmon', (250, 128, 114)),
344 |     ('sandybrown', (244, 164, 96)),
345 |     ('seagreen', (46, 139, 87)),
346 |     ('seashell', (255, 245, 238)),
347 |     ('sienna', (160, 82, 45)),
348 |     ('silver', (192, 192, 192)),
349 |     ('skyblue', (135, 206, 235)),
350 |     ('slateblue', (106, 90, 205)),
351 |     ('slategray', (112, 128, 144)),
352 |     ('slategrey', (112, 128, 144)),
353 |     ('snow', (255, 250, 250)),
354 |     ('springgreen', (0, 255, 127)),
355 |     ('steelblue', (70, 130, 180)),
356 |     ('tan', (210, 180, 140)),
357 |     ('teal', (0, 128, 128)),
358 |     ('thistle', (216, 191, 216)),
359 |     ('tomato', (255, 99, 71)),
360 |     ('turquoise', (64, 224, 208)),
361 |     ('violet', (238, 130, 238)),
362 |     ('wheat', (245, 222, 179)),
363 |     ('white', (255, 255, 255)),
364 |     ('whitesmoke', (245, 245, 245)),
365 |     ('yellow', (255, 255, 0)),
366 |     ('yellowgreen', (154, 205, 50)),
367 | ]
368 | 
369 | 
370 | # (r, g, b, a) in 0..1 or a string marker
371 | SPECIAL_COLOR_KEYWORDS = {
372 |     'currentcolor': 'currentColor',
373 |     'transparent': RGBA(0., 0., 0., 0.),
374 | }
375 | 
376 | 
377 | # RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker
378 | COLOR_KEYWORDS = SPECIAL_COLOR_KEYWORDS.copy()
379 | COLOR_KEYWORDS.update(
380 |     # 255 maps to 1, 0 to 0, the rest is linear.
381 |     (keyword, RGBA(r / 255., g / 255., b / 255., 1.))
382 |     for keyword, (r, g, b) in itertools.chain(
383 |         BASIC_COLOR_KEYWORDS, EXTENDED_COLOR_KEYWORDS))
384 | 


--------------------------------------------------------------------------------
/tinycss/css21.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.css21
  4 |     -------------
  5 | 
  6 |     Parser for CSS 2.1
  7 |     http://www.w3.org/TR/CSS21/syndata.html
  8 | 
  9 |     :copyright: (c) 2012 by Simon Sapin.
 10 |     :license: BSD, see LICENSE for more details.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | 
 15 | from itertools import chain, islice
 16 | 
 17 | from .decoding import decode
 18 | from .parsing import (
 19 |     ParseError, remove_whitespace, split_on_comma, strip_whitespace,
 20 |     validate_any, validate_value)
 21 | from .token_data import TokenList
 22 | from .tokenizer import tokenize_grouped
 23 | 
 24 | 
 25 | #  stylesheet  : [ CDO | CDC | S | statement ]*;
 26 | #  statement   : ruleset | at-rule;
 27 | #  at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
 28 | #  block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
 29 | #  ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
 30 | #  selector    : any+;
 31 | #  declaration : property S* ':' S* value;
 32 | #  property    : IDENT;
 33 | #  value       : [ any | block | ATKEYWORD S* ]+;
 34 | #  any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
 35 | #                | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
 36 | #                | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
 37 | #                | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
 38 | #                ] S*;
 39 | #  unused      : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
 40 | 
 41 | 
 42 | class Stylesheet(object):
 43 |     """
 44 |     A parsed CSS stylesheet.
 45 | 
 46 |     .. attribute:: rules
 47 | 
 48 |         A mixed list, in source order, of :class:`RuleSet` and various
 49 |         at-rules such as :class:`ImportRule`, :class:`MediaRule`
 50 |         and :class:`PageRule`.
 51 |         Use their :obj:`at_keyword` attribute to distinguish them.
 52 | 
 53 |     .. attribute:: errors
 54 | 
 55 |         A list of :class:`~.parsing.ParseError`. Invalid rules and declarations
 56 |         are ignored, with the details logged in this list.
 57 | 
 58 |     .. attribute:: encoding
 59 | 
 60 |         The character encoding that was used to decode the stylesheet
 61 |         from bytes, or ``None`` for Unicode stylesheets.
 62 | 
 63 |     """
 64 |     def __init__(self, rules, errors, encoding):
 65 |         self.rules = rules
 66 |         self.errors = errors
 67 |         self.encoding = encoding
 68 | 
 69 |     def __repr__(self):
 70 |         return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
 71 |             self, len(self.rules), len(self.errors))
 72 | 
 73 | 
 74 | class AtRule(object):
 75 |     """
 76 |     An unparsed at-rule.
 77 | 
 78 |     .. attribute:: at_keyword
 79 | 
 80 |         The normalized (lower-case) at-keyword as a string. Eg: ``'@page'``
 81 | 
 82 |     .. attribute:: head
 83 | 
 84 |         The part of the at-rule between the at-keyword and the ``{``
 85 |         marking the body, or the ``;`` marking the end of an at-rule without
 86 |         a body.  A :class:`~.token_data.TokenList`.
 87 | 
 88 |     .. attribute:: body
 89 | 
 90 |         The content of the body between ``{`` and ``}`` as a
 91 |         :class:`~.token_data.TokenList`, or ``None`` if there is no body
 92 |         (ie. if the rule ends with ``;``).
 93 | 
 94 |     The head was validated against the core grammar but **not** the body,
 95 |     as the body might contain declarations. In case of an error in a
 96 |     declaration, parsing should continue from the next declaration.
 97 |     The whole rule should not be ignored as it would be for an error
 98 |     in the head.
 99 | 
100 |     These at-rules are expected to be parsed further before reaching
101 |     the user API.
102 | 
103 |     """
104 |     def __init__(self, at_keyword, head, body, line, column):
105 |         self.at_keyword = at_keyword
106 |         self.head = TokenList(head)
107 |         self.body = TokenList(body) if body is not None else body
108 |         self.line = line
109 |         self.column = column
110 | 
111 |     def __repr__(self):
112 |         return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
113 |                 .format(self))
114 | 
115 | 
116 | class RuleSet(object):
117 |     """A ruleset.
118 | 
119 |     .. attribute:: at_keyword
120 | 
121 |         Always ``None``. Helps to tell rulesets apart from at-rules.
122 | 
123 |     .. attribute:: selector
124 | 
125 |         The selector as a :class:`~.token_data.TokenList`.
126 |         In CSS 3, this is actually called a selector group.
127 | 
128 |         ``rule.selector.as_css()`` gives the selector as a string.
129 |         This string can be used with *cssselect*, see :ref:`selectors3`.
130 | 
131 |     .. attribute:: declarations
132 | 
133 |         The list of :class:`Declaration`, in source order.
134 | 
135 |     """
136 | 
137 |     at_keyword = None
138 | 
139 |     def __init__(self, selector, declarations, line, column):
140 |         self.selector = TokenList(selector)
141 |         self.declarations = declarations
142 |         self.line = line
143 |         self.column = column
144 | 
145 |     def __repr__(self):
146 |         return ('<{0.__class__.__name__} at {0.line}:{0.column} {1}>'
147 |                 .format(self, self.selector.as_css()))
148 | 
149 | 
150 | class Declaration(object):
151 |     """A property declaration.
152 | 
153 |     .. attribute:: name
154 | 
155 |         The property name as a normalized (lower-case) string.
156 | 
157 |     .. attribute:: value
158 | 
159 |         The property value as a :class:`~.token_data.TokenList`.
160 | 
161 |         The value is not parsed. UAs using tinycss may only support
162 |         some properties or some values and tinycss does not know which.
163 |         They need to parse values themselves and ignore declarations with
164 |         unknown or unsupported properties or values, and fall back
165 |         on any previous declaration.
166 | 
167 |         :mod:`tinycss.color3` parses color values, but other values
168 |         will need specific parsing/validation code.
169 | 
170 |     .. attribute:: priority
171 | 
172 |         Either the string ``'important'`` or ``None``.
173 | 
174 |     """
175 |     def __init__(self, name, value, priority, line, column):
176 |         self.name = name
177 |         self.value = TokenList(value)
178 |         self.priority = priority
179 |         self.line = line
180 |         self.column = column
181 | 
182 |     def __repr__(self):
183 |         priority = ' !' + self.priority if self.priority else ''
184 |         return ('<{0.__class__.__name__} {0.line}:{0.column}'
185 |                 ' {0.name}: {1}{2}>'.format(
186 |                     self, self.value.as_css(), priority))
187 | 
188 | 
189 | class PageRule(object):
190 |     """A parsed CSS 2.1 @page rule.
191 | 
192 |     .. attribute:: at_keyword
193 | 
194 |         Always ``'@page'``
195 | 
196 |     .. attribute:: selector
197 | 
198 |         The page selector.
199 |         In CSS 2.1 this is either ``None`` (no selector), or the string
200 |         ``'first'``, ``'left'`` or ``'right'`` for the pseudo class
201 |         of the same name.
202 | 
203 |     .. attribute:: specificity
204 | 
205 |         Specificity of the page selector. This is a tuple of four integers,
206 |         but these tuples are mostly meant to be compared to each other.
207 | 
208 |     .. attribute:: declarations
209 | 
210 |         A list of :class:`Declaration`, in source order.
211 | 
212 |     .. attribute:: at_rules
213 | 
214 |         The list of parsed at-rules inside the @page block, in source order.
215 |         Always empty for CSS 2.1.
216 | 
217 |     """
218 |     at_keyword = '@page'
219 | 
220 |     def __init__(self, selector, specificity, declarations, at_rules,
221 |                  line, column):
222 |         self.selector = selector
223 |         self.specificity = specificity
224 |         self.declarations = declarations
225 |         self.at_rules = at_rules
226 |         self.line = line
227 |         self.column = column
228 | 
229 |     def __repr__(self):
230 |         return ('<{0.__class__.__name__} {0.line}:{0.column}'
231 |                 ' {0.selector}>'.format(self))
232 | 
233 | 
234 | class MediaRule(object):
235 |     """A parsed @media rule.
236 | 
237 |     .. attribute:: at_keyword
238 | 
239 |         Always ``'@media'``
240 | 
241 |     .. attribute:: media
242 | 
243 |         For CSS 2.1 without media queries: the media types
244 |         as a list of strings.
245 | 
246 |     .. attribute:: rules
247 | 
248 |         The list :class:`RuleSet` and various at-rules inside the @media
249 |         block, in source order.
250 | 
251 |     """
252 |     at_keyword = '@media'
253 | 
254 |     def __init__(self, media, rules, line, column):
255 |         self.media = media
256 |         self.rules = rules
257 |         self.line = line
258 |         self.column = column
259 | 
260 |     def __repr__(self):
261 |         return ('<{0.__class__.__name__} {0.line}:{0.column}'
262 |                 ' {0.media}>'.format(self))
263 | 
264 | 
265 | class ImportRule(object):
266 |     """A parsed @import rule.
267 | 
268 |     .. attribute:: at_keyword
269 | 
270 |         Always ``'@import'``
271 | 
272 |     .. attribute:: uri
273 | 
274 |         The URI to be imported, as read from the stylesheet.
275 |         (URIs are not made absolute.)
276 | 
277 |     .. attribute:: media
278 | 
279 |         For CSS 2.1 without media queries: the media types
280 |         as a list of strings.
281 |         This attribute is explicitly ``['all']`` if the media was omitted
282 |         in the source.
283 | 
284 |     """
285 |     at_keyword = '@import'
286 | 
287 |     def __init__(self, uri, media, line, column):
288 |         self.uri = uri
289 |         self.media = media
290 |         self.line = line
291 |         self.column = column
292 | 
293 |     def __repr__(self):
294 |         return ('<{0.__class__.__name__} {0.line}:{0.column}'
295 |                 ' {0.uri}>'.format(self))
296 | 
297 | 
298 | def _remove_at_charset(tokens):
299 |     """Remove any valid @charset at the beggining of a token stream.
300 | 
301 |     :param tokens:
302 |         An iterable of tokens
303 |     :returns:
304 |         A possibly truncated iterable of tokens
305 | 
306 |     """
307 |     tokens = iter(tokens)
308 |     header = list(islice(tokens, 4))
309 |     if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
310 |         atkw, space, string, semicolon = header
311 |         if ((atkw.value, space.value) == ('@charset', ' ') and
312 |                 string.as_css()[0] == '"'):
313 |             # Found a valid @charset rule, only keep what’s after it.
314 |             return tokens
315 |     return chain(header, tokens)
316 | 
317 | 
318 | class CSS21Parser(object):
319 |     """Parser for CSS 2.1
320 | 
321 |     This parser supports the core CSS syntax as well as @import, @media,
322 |     @page and !important.
323 | 
324 |     Note that property values are still not parsed, as UAs using this
325 |     parser may only support some properties or some values.
326 | 
327 |     Currently the parser holds no state. It being a class only allows
328 |     subclassing and overriding its methods.
329 | 
330 |     """
331 | 
332 |     # User API:
333 | 
334 |     def parse_stylesheet_file(self, css_file, protocol_encoding=None,
335 |                               linking_encoding=None, document_encoding=None):
336 |         """Parse a stylesheet from a file or filename.
337 | 
338 |         Character encoding-related parameters and behavior are the same
339 |         as in :meth:`parse_stylesheet_bytes`.
340 | 
341 |         :param css_file:
342 |             Either a file (any object with a :meth:`~file.read` method)
343 |             or a filename.
344 |         :return:
345 |             A :class:`Stylesheet`.
346 | 
347 |         """
348 |         if hasattr(css_file, 'read'):
349 |             css_bytes = css_file.read()
350 |         else:
351 |             with open(css_file, 'rb') as fd:
352 |                 css_bytes = fd.read()
353 |         return self.parse_stylesheet_bytes(css_bytes, protocol_encoding,
354 |                                            linking_encoding, document_encoding)
355 | 
356 |     def parse_stylesheet_bytes(self, css_bytes, protocol_encoding=None,
357 |                                linking_encoding=None, document_encoding=None):
358 |         """Parse a stylesheet from a byte string.
359 | 
360 |         The character encoding is determined from the passed metadata and the
361 |         ``@charset`` rule in the stylesheet (if any).
362 |         If no encoding information is available or decoding fails,
363 |         decoding defaults to UTF-8 and then fall back on ISO-8859-1.
364 | 
365 |         :param css_bytes:
366 |             A CSS stylesheet as a byte string.
367 |         :param protocol_encoding:
368 |             The "charset" parameter of a "Content-Type" HTTP header (if any),
369 |             or similar metadata for other protocols.
370 |         :param linking_encoding:
371 |             ``<link charset="">`` or other metadata from the linking mechanism
372 |             (if any)
373 |         :param document_encoding:
374 |             Encoding of the referring style sheet or document (if any)
375 |         :return:
376 |             A :class:`Stylesheet`.
377 | 
378 |         """
379 |         css_unicode, encoding = decode(css_bytes, protocol_encoding,
380 |                                        linking_encoding, document_encoding)
381 |         return self.parse_stylesheet(css_unicode, encoding=encoding)
382 | 
383 |     def parse_stylesheet(self, css_unicode, encoding=None):
384 |         """Parse a stylesheet from an Unicode string.
385 | 
386 |         :param css_unicode:
387 |             A CSS stylesheet as an unicode string.
388 |         :param encoding:
389 |             The character encoding used to decode the stylesheet from bytes,
390 |             if any.
391 |         :return:
392 |             A :class:`Stylesheet`.
393 | 
394 |         """
395 |         tokens = tokenize_grouped(css_unicode)
396 |         if encoding:
397 |             tokens = _remove_at_charset(tokens)
398 |         rules, errors = self.parse_rules(tokens, context='stylesheet')
399 |         return Stylesheet(rules, errors, encoding)
400 | 
401 |     def parse_style_attr(self, css_source):
402 |         """Parse a "style" attribute (eg. of an HTML element).
403 | 
404 |         This method only accepts Unicode as the source (HTML) document
405 |         is supposed to handle the character encoding.
406 | 
407 |         :param css_source:
408 |             The attribute value, as an unicode string.
409 |         :return:
410 |             A tuple of the list of valid :class:`Declaration` and
411 |             a list of :class:`~.parsing.ParseError`.
412 |         """
413 |         return self.parse_declaration_list(tokenize_grouped(css_source))
414 | 
415 |     # API for subclasses:
416 | 
417 |     def parse_rules(self, tokens, context):
418 |         """Parse a sequence of rules (rulesets and at-rules).
419 | 
420 |         :param tokens:
421 |             An iterable of tokens.
422 |         :param context:
423 |             Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
424 |             (Most at-rules are only allowed in some contexts.)
425 |         :return:
426 |             A tuple of a list of parsed rules and a list of
427 |             :class:`~.parsing.ParseError`.
428 | 
429 |         """
430 |         rules = []
431 |         errors = []
432 |         tokens = iter(tokens)
433 |         for token in tokens:
434 |             if token.type not in ('S', 'CDO', 'CDC'):
435 |                 try:
436 |                     if token.type == 'ATKEYWORD':
437 |                         rule = self.read_at_rule(token, tokens)
438 |                         result = self.parse_at_rule(
439 |                             rule, rules, errors, context)
440 |                         rules.append(result)
441 |                     else:
442 |                         rule, rule_errors = self.parse_ruleset(token, tokens)
443 |                         rules.append(rule)
444 |                         errors.extend(rule_errors)
445 |                 except ParseError as exc:
446 |                     errors.append(exc)
447 |                     # Skip the entire rule
448 |         return rules, errors
449 | 
450 |     def read_at_rule(self, at_keyword_token, tokens):
451 |         """Read an at-rule from a token stream.
452 | 
453 |         :param at_keyword_token:
454 |             The ATKEYWORD token that starts this at-rule
455 |             You may have read it already to distinguish the rule
456 |             from a ruleset.
457 |         :param tokens:
458 |             An iterator of subsequent tokens. Will be consumed just enough
459 |             for one at-rule.
460 |         :return:
461 |             An unparsed :class:`AtRule`.
462 |         :raises:
463 |             :class:`~.parsing.ParseError` if the head is invalid for the core
464 |             grammar. The body is **not** validated. See :class:`AtRule`.
465 | 
466 |         """
467 |         # CSS syntax is case-insensitive
468 |         at_keyword = at_keyword_token.value.lower()
469 |         head = []
470 |         # For the ParseError in case `tokens` is empty:
471 |         token = at_keyword_token
472 |         for token in tokens:
473 |             if token.type in '{;':
474 |                 break
475 |             # Ignore white space just after the at-keyword.
476 |             else:
477 |                 head.append(token)
478 |         # On unexpected end of stylesheet, pretend that a ';' was there
479 |         head = strip_whitespace(head)
480 |         for head_token in head:
481 |             validate_any(head_token, 'at-rule head')
482 |         body = token.content if token.type == '{' else None
483 |         return AtRule(at_keyword, head, body,
484 |                       at_keyword_token.line, at_keyword_token.column)
485 | 
486 |     def parse_at_rule(self, rule, previous_rules, errors, context):
487 |         """Parse an at-rule.
488 | 
489 |         Subclasses that override this method must use ``super()`` and
490 |         pass its return value for at-rules they do not know.
491 | 
492 |         In CSS 2.1, this method handles @charset, @import, @media and @page
493 |         rules.
494 | 
495 |         :param rule:
496 |             An unparsed :class:`AtRule`.
497 |         :param previous_rules:
498 |             The list of at-rules and rulesets that have been parsed so far
499 |             in this context. This list can be used to decide if the current
500 |             rule is valid. (For example, @import rules are only allowed
501 |             before anything but a @charset rule.)
502 |         :param context:
503 |             Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
504 |             (Most at-rules are only allowed in some contexts.)
505 |         :raises:
506 |             :class:`~.parsing.ParseError` if the rule is invalid.
507 |         :return:
508 |             A parsed at-rule
509 | 
510 |         """
511 |         if rule.at_keyword == '@page':
512 |             if context != 'stylesheet':
513 |                 raise ParseError(rule, '@page rule not allowed in ' + context)
514 |             selector, specificity = self.parse_page_selector(rule.head)
515 |             if rule.body is None:
516 |                 raise ParseError(
517 |                     rule, 'invalid {0} rule: missing block'.format(
518 |                         rule.at_keyword))
519 |             declarations, at_rules, rule_errors = \
520 |                 self.parse_declarations_and_at_rules(rule.body, '@page')
521 |             errors.extend(rule_errors)
522 |             return PageRule(selector, specificity, declarations, at_rules,
523 |                             rule.line, rule.column)
524 | 
525 |         elif rule.at_keyword == '@media':
526 |             if context != 'stylesheet':
527 |                 raise ParseError(rule, '@media rule not allowed in ' + context)
528 |             if not rule.head:
529 |                 raise ParseError(rule, 'expected media types for @media')
530 |             media = self.parse_media(rule.head)
531 |             if rule.body is None:
532 |                 raise ParseError(
533 |                     rule, 'invalid {0} rule: missing block'.format(
534 |                         rule.at_keyword))
535 |             rules, rule_errors = self.parse_rules(rule.body, '@media')
536 |             errors.extend(rule_errors)
537 |             return MediaRule(media, rules, rule.line, rule.column)
538 | 
539 |         elif rule.at_keyword == '@import':
540 |             if context != 'stylesheet':
541 |                 raise ParseError(
542 |                     rule, '@import rule not allowed in ' + context)
543 |             for previous_rule in previous_rules:
544 |                 if previous_rule.at_keyword not in ('@charset', '@import'):
545 |                     if previous_rule.at_keyword:
546 |                         type_ = 'an {0} rule'.format(previous_rule.at_keyword)
547 |                     else:
548 |                         type_ = 'a ruleset'
549 |                     raise ParseError(
550 |                         previous_rule,
551 |                         '@import rule not allowed after ' + type_)
552 |             head = rule.head
553 |             if not head:
554 |                 raise ParseError(
555 |                     rule, 'expected URI or STRING for @import rule')
556 |             if head[0].type not in ('URI', 'STRING'):
557 |                 raise ParseError(
558 |                     rule, 'expected URI or STRING for @import rule, got ' +
559 |                     head[0].type)
560 |             uri = head[0].value
561 |             media = self.parse_media(strip_whitespace(head[1:]))
562 |             if rule.body is not None:
563 |                 # The position of the ';' token would be best, but we don’t
564 |                 # have it anymore here.
565 |                 raise ParseError(head[-1], "expected ';', got a block")
566 |             return ImportRule(uri, media, rule.line, rule.column)
567 | 
568 |         elif rule.at_keyword == '@charset':
569 |             raise ParseError(rule, 'mis-placed or malformed @charset rule')
570 | 
571 |         else:
572 |             raise ParseError(
573 |                 rule, 'unknown at-rule in {0} context: {1}'.format(
574 |                     context, rule.at_keyword))
575 | 
576 |     def parse_media(self, tokens):
577 |         """For CSS 2.1, parse a list of media types.
578 | 
579 |         Media Queries are expected to override this.
580 | 
581 |         :param tokens:
582 |             A list of tokens
583 |         :raises:
584 |             :class:`~.parsing.ParseError` on invalid media types/queries
585 |         :returns:
586 |             For CSS 2.1, a list of media types as strings
587 |         """
588 |         if not tokens:
589 |             return ['all']
590 |         media_types = []
591 |         for part in split_on_comma(remove_whitespace(tokens)):
592 |             types = [token.type for token in part]
593 |             if types == ['IDENT']:
594 |                 media_types.append(part[0].value)
595 |             else:
596 |                 raise ParseError(
597 |                     tokens[0], 'expected a media type' +
598 |                     ((', got ' + ', '.join(types)) if types else ''))
599 |         return media_types
600 | 
601 |     def parse_page_selector(self, tokens):
602 |         """Parse an @page selector.
603 | 
604 |         :param tokens:
605 |             An iterable of token, typically from the  ``head`` attribute of
606 |             an unparsed :class:`AtRule`.
607 |         :returns:
608 |             A page selector. For CSS 2.1, this is ``'first'``, ``'left'``,
609 |             ``'right'`` or ``None``.
610 |         :raises:
611 |             :class:`~.parsing.ParseError` on invalid selectors
612 | 
613 |         """
614 |         if not tokens:
615 |             return None, (0, 0)
616 |         if (len(tokens) == 2 and tokens[0].type == ':' and
617 |                 tokens[1].type == 'IDENT'):
618 |             pseudo_class = tokens[1].value
619 |             specificity = {
620 |                 'first': (1, 0), 'left': (0, 1), 'right': (0, 1),
621 |             }.get(pseudo_class)
622 |             if specificity:
623 |                 return pseudo_class, specificity
624 |         raise ParseError(tokens[0], 'invalid @page selector')
625 | 
626 |     def parse_declarations_and_at_rules(self, tokens, context):
627 |         """Parse a mixed list of declarations and at rules, as found eg.
628 |         in the body of an @page rule.
629 | 
630 |         Note that to add supported at-rules inside @page,
631 |         :class:`~.page3.CSSPage3Parser` extends :meth:`parse_at_rule`,
632 |         not this method.
633 | 
634 |         :param tokens:
635 |             An iterable of token, typically from the  ``body`` attribute of
636 |             an unparsed :class:`AtRule`.
637 |         :param context:
638 |             An at-keyword such as ``'@page'``.
639 |             (Most at-rules are only allowed in some contexts.)
640 |         :returns:
641 |             A tuple of:
642 | 
643 |             * A list of :class:`Declaration`
644 |             * A list of parsed at-rules (empty for CSS 2.1)
645 |             * A list of :class:`~.parsing.ParseError`
646 | 
647 |         """
648 |         at_rules = []
649 |         declarations = []
650 |         errors = []
651 |         tokens = iter(tokens)
652 |         for token in tokens:
653 |             if token.type == 'ATKEYWORD':
654 |                 try:
655 |                     rule = self.read_at_rule(token, tokens)
656 |                     result = self.parse_at_rule(
657 |                         rule, at_rules, errors, context)
658 |                     at_rules.append(result)
659 |                 except ParseError as err:
660 |                     errors.append(err)
661 |             elif token.type != 'S':
662 |                 declaration_tokens = []
663 |                 while token and token.type != ';':
664 |                     declaration_tokens.append(token)
665 |                     token = next(tokens, None)
666 |                 if declaration_tokens:
667 |                     try:
668 |                         declarations.append(
669 |                             self.parse_declaration(declaration_tokens))
670 |                     except ParseError as err:
671 |                         errors.append(err)
672 |         return declarations, at_rules, errors
673 | 
674 |     def parse_ruleset(self, first_token, tokens):
675 |         """Parse a ruleset: a selector followed by declaration block.
676 | 
677 |         :param first_token:
678 |             The first token of the ruleset (probably of the selector).
679 |             You may have read it already to distinguish the rule
680 |             from an at-rule.
681 |         :param tokens:
682 |             an iterator of subsequent tokens. Will be consumed just enough
683 |             for one ruleset.
684 |         :return:
685 |             a tuple of a :class:`RuleSet` and an error list.
686 |             The errors are recovered :class:`~.parsing.ParseError` in
687 |             declarations. (Parsing continues from the next declaration on such
688 |             errors.)
689 |         :raises:
690 |             :class:`~.parsing.ParseError` if the selector is invalid for the
691 |             core grammar.
692 |             Note a that a selector can be valid for the core grammar but
693 |             not for CSS 2.1 or another level.
694 | 
695 |         """
696 |         selector = []
697 |         for token in chain([first_token], tokens):
698 |             if token.type == '{':
699 |                 # Parse/validate once we’ve read the whole rule
700 |                 selector = strip_whitespace(selector)
701 |                 if not selector:
702 |                     raise ParseError(first_token, 'empty selector')
703 |                 for selector_token in selector:
704 |                     validate_any(selector_token, 'selector')
705 |                 declarations, errors = self.parse_declaration_list(
706 |                     token.content)
707 |                 ruleset = RuleSet(selector, declarations,
708 |                                   first_token.line, first_token.column)
709 |                 return ruleset, errors
710 |             else:
711 |                 selector.append(token)
712 |         raise ParseError(token, 'no declaration block found for ruleset')
713 | 
714 |     def parse_declaration_list(self, tokens):
715 |         """Parse a ``;`` separated declaration list.
716 | 
717 |         You may want to use :meth:`parse_declarations_and_at_rules` (or
718 |         some other method that uses :func:`parse_declaration` directly)
719 |         instead if you have not just declarations in the same context.
720 | 
721 |         :param tokens:
722 |             an iterable of tokens. Should stop at (before) the end
723 |             of the block, as marked by ``}``.
724 |         :return:
725 |             a tuple of the list of valid :class:`Declaration` and a list
726 |             of :class:`~.parsing.ParseError`
727 | 
728 |         """
729 |         # split at ';'
730 |         parts = []
731 |         this_part = []
732 |         for token in tokens:
733 |             if token.type == ';':
734 |                 parts.append(this_part)
735 |                 this_part = []
736 |             else:
737 |                 this_part.append(token)
738 |         parts.append(this_part)
739 | 
740 |         declarations = []
741 |         errors = []
742 |         for tokens in parts:
743 |             tokens = strip_whitespace(tokens)
744 |             if tokens:
745 |                 try:
746 |                     declarations.append(self.parse_declaration(tokens))
747 |                 except ParseError as exc:
748 |                     errors.append(exc)
749 |                     # Skip the entire declaration
750 |         return declarations, errors
751 | 
752 |     def parse_declaration(self, tokens):
753 |         """Parse a single declaration.
754 | 
755 |         :param tokens:
756 |             an iterable of at least one token. Should stop at (before)
757 |             the end of the declaration, as marked by a ``;`` or ``}``.
758 |             Empty declarations (ie. consecutive ``;`` with only white space
759 |             in-between) should be skipped earlier and not passed to
760 |             this method.
761 |         :returns:
762 |             a :class:`Declaration`
763 |         :raises:
764 |             :class:`~.parsing.ParseError` if the tokens do not match the
765 |             'declaration' production of the core grammar.
766 | 
767 |         """
768 |         tokens = iter(tokens)
769 | 
770 |         name_token = next(tokens)  # assume there is at least one
771 |         if name_token.type == 'IDENT':
772 |             # CSS syntax is case-insensitive
773 |             property_name = name_token.value.lower()
774 |         else:
775 |             raise ParseError(
776 |                 name_token, 'expected a property name, got {0}'.format(
777 |                     name_token.type))
778 | 
779 |         token = name_token  # In case ``tokens`` is now empty
780 |         for token in tokens:
781 |             if token.type == ':':
782 |                 break
783 |             elif token.type != 'S':
784 |                 raise ParseError(
785 |                     token, "expected ':', got {0}".format(token.type))
786 |         else:
787 |             raise ParseError(token, "expected ':'")
788 | 
789 |         value = strip_whitespace(list(tokens))
790 |         if not value:
791 |             raise ParseError(token, 'expected a property value')
792 |         validate_value(value)
793 |         value, priority = self.parse_value_priority(value)
794 |         return Declaration(
795 |             property_name, value, priority, name_token.line, name_token.column)
796 | 
797 |     def parse_value_priority(self, tokens):
798 |         """Separate any ``!important`` marker at the end of a property value.
799 | 
800 |         :param tokens:
801 |             A list of tokens for the property value.
802 |         :returns:
803 |             A tuple of the actual property value (a list of tokens)
804 |             and the :attr:`~Declaration.priority`.
805 |         """
806 |         value = list(tokens)
807 |         # Walk the token list from the end
808 |         token = value.pop()
809 |         if token.type == 'IDENT' and token.value.lower() == 'important':
810 |             while value:
811 |                 token = value.pop()
812 |                 if token.type == 'DELIM' and token.value == '!':
813 |                     # Skip any white space before the '!'
814 |                     while value and value[-1].type == 'S':
815 |                         value.pop()
816 |                     if not value:
817 |                         raise ParseError(
818 |                             token, 'expected a value before !important')
819 |                     return value, 'important'
820 |                 # Skip white space between '!' and 'important'
821 |                 elif token.type != 'S':
822 |                     break
823 |         return tokens, None
824 | 


--------------------------------------------------------------------------------
/tinycss/decoding.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.decoding
  4 |     ----------------
  5 | 
  6 |     Decoding stylesheets from bytes to Unicode.
  7 |     http://www.w3.org/TR/CSS21/syndata.html#charset
  8 | 
  9 |     :copyright: (c) 2012 by Simon Sapin.
 10 |     :license: BSD, see LICENSE for more details.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | 
 15 | import operator
 16 | import re
 17 | from binascii import unhexlify
 18 | 
 19 | __all__ = ['decode']  # Everything else is implementation detail
 20 | 
 21 | 
 22 | def decode(css_bytes, protocol_encoding=None,
 23 |            linking_encoding=None, document_encoding=None):
 24 |     """
 25 |     Determine the character encoding from the passed metadata and the
 26 |     ``@charset`` rule in the stylesheet (if any); and decode accordingly.
 27 |     If no encoding information is available or decoding fails,
 28 |     decoding defaults to UTF-8 and then fall back on ISO-8859-1.
 29 | 
 30 |     :param css_bytes:
 31 |         a CSS stylesheet as a byte string
 32 |     :param protocol_encoding:
 33 |         The "charset" parameter of a "Content-Type" HTTP header (if any),
 34 |         or similar metadata for other protocols.
 35 |     :param linking_encoding:
 36 |         ``<link charset="">`` or other metadata from the linking mechanism
 37 |         (if any)
 38 |     :param document_encoding:
 39 |         Encoding of the referring style sheet or document (if any)
 40 |     :return:
 41 |         A tuple of an Unicode string, with any BOM removed, and the
 42 |         encoding that was used.
 43 | 
 44 |     """
 45 |     if protocol_encoding:
 46 |         css_unicode = try_encoding(css_bytes, protocol_encoding)
 47 |         if css_unicode is not None:
 48 |             return css_unicode, protocol_encoding
 49 |     for encoding, pattern in ENCODING_MAGIC_NUMBERS:
 50 |         match = pattern(css_bytes)
 51 |         if match:
 52 |             has_at_charset = isinstance(encoding, tuple)
 53 |             if has_at_charset:
 54 |                 extract, endianness = encoding
 55 |                 encoding = extract(match.group(1))
 56 |                 # Get an ASCII-only unicode value.
 57 |                 # This is the only thing that works on both Python 2 and 3
 58 |                 # for bytes.decode()
 59 |                 # Non-ASCII encoding names are invalid anyway,
 60 |                 # but make sure they stay invalid.
 61 |                 encoding = encoding.decode('ascii', 'replace')
 62 |                 encoding = encoding.replace('\ufffd', '?')
 63 |                 if encoding.replace('-', '').replace('_', '').lower() in [
 64 |                         'utf16', 'utf32']:
 65 |                     encoding += endianness
 66 |                 encoding = encoding.encode('ascii', 'replace').decode('ascii')
 67 |             css_unicode = try_encoding(css_bytes, encoding)
 68 |             if css_unicode and not (has_at_charset and not
 69 |                                     css_unicode.startswith('@charset "')):
 70 |                 return css_unicode, encoding
 71 |             break
 72 |     for encoding in [linking_encoding, document_encoding]:
 73 |         if encoding:
 74 |             css_unicode = try_encoding(css_bytes, encoding)
 75 |             if css_unicode is not None:
 76 |                 return css_unicode, encoding
 77 |     css_unicode = try_encoding(css_bytes, 'UTF-8')
 78 |     if css_unicode is not None:
 79 |         return css_unicode, 'UTF-8'
 80 |     return try_encoding(css_bytes, 'ISO-8859-1', fallback=False), 'ISO-8859-1'
 81 | 
 82 | 
 83 | def try_encoding(css_bytes, encoding, fallback=True):
 84 |     if fallback:
 85 |         try:
 86 |             css_unicode = css_bytes.decode(encoding)
 87 |         # LookupError means unknown encoding
 88 |         except (UnicodeDecodeError, LookupError):
 89 |             return None
 90 |     else:
 91 |         css_unicode = css_bytes.decode(encoding)
 92 |     if css_unicode and css_unicode[0] == '\ufeff':
 93 |         # Remove any Byte Order Mark
 94 |         css_unicode = css_unicode[1:]
 95 |     return css_unicode
 96 | 
 97 | 
 98 | def hex2re(hex_data):
 99 |     return re.escape(unhexlify(hex_data.replace(' ', '').encode('ascii')))
100 | 
101 | 
102 | class Slicer(object):
103 |     """Slice()[start:stop:end] == slice(start, stop, end)"""
104 |     def __getitem__(self, slice_):
105 |         return operator.itemgetter(slice_)
106 | 
107 | 
108 | Slice = Slicer()
109 | 
110 | 
111 | # List of (bom_size, encoding, pattern)
112 | #   bom_size is in bytes and can be zero
113 | #   encoding is a string or (slice_, endianness) for "as specified"
114 | #   slice_ is a slice object.How to extract the specified
115 | 
116 | ENCODING_MAGIC_NUMBERS = [
117 |     ((Slice[:], ''), re.compile(
118 |         hex2re('EF BB BF 40 63 68 61 72 73 65 74 20 22') +
119 |         b'([^\x22]*?)' +
120 |         hex2re('22 3B')).match),
121 | 
122 |     ('UTF-8', re.compile(
123 |         hex2re('EF BB BF')).match),
124 | 
125 |     ((Slice[:], ''), re.compile(
126 |         hex2re('40 63 68 61 72 73 65 74 20 22') +
127 |         b'([^\x22]*?)' +
128 |         hex2re('22 3B')).match),
129 | 
130 |     ((Slice[1::2], '-BE'), re.compile(
131 |         hex2re('FE FF 00 40 00 63 00 68 00 61 00 72 00 73 00 65 00'
132 |                '74 00 20 00 22') +
133 |         b'((\x00[^\x22])*?)' +
134 |         hex2re('00 22 00 3B')).match),
135 | 
136 |     ((Slice[1::2], '-BE'), re.compile(
137 |         hex2re('00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00'
138 |                '20 00 22') +
139 |         b'((\x00[^\x22])*?)' +
140 |         hex2re('00 22 00 3B')).match),
141 | 
142 |     ((Slice[::2], '-LE'), re.compile(
143 |         hex2re('FF FE 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74'
144 |                '00 20 00 22 00') +
145 |         b'(([^\x22]\x00)*?)' +
146 |         hex2re('22 00 3B 00')).match),
147 | 
148 |     ((Slice[::2], '-LE'), re.compile(
149 |         hex2re('40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20'
150 |                '00 22 00') +
151 |         b'(([^\x22]\x00)*?)' +
152 |         hex2re('22 00 3B 00')).match),
153 | 
154 |     ((Slice[3::4], '-BE'), re.compile(
155 |         hex2re('00 00 FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00'
156 |                '00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00'
157 |                '00 74 00 00 00 20 00 00 00 22') +
158 |         b'((\x00\x00\x00[^\x22])*?)' +
159 |         hex2re('00 00 00 22 00 00 00 3B')).match),
160 | 
161 |     ((Slice[3::4], '-BE'), re.compile(
162 |         hex2re('00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00'
163 |                '00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00'
164 |                '00 20 00 00 00 22') +
165 |         b'((\x00\x00\x00[^\x22])*?)' +
166 |         hex2re('00 00 00 22 00 00 00 3B')).match),
167 | 
168 | 
169 |     # Python does not support 2143 or 3412 endianness, AFAIK.
170 |     # I guess we could fix it up ourselves but meh. Patches welcome.
171 | 
172 |     # ((Slice[2::4], '-2143'), re.compile(
173 |     #     hex2re('00 00 FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00'
174 |     #            '00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00'
175 |     #            '74 00 00 00 20 00 00 00 22 00') +
176 |     #     b'((\x00\x00[^\x22]\x00)*?)' +
177 |     #     hex2re('00 00 22 00 00 00 3B 00')).match),
178 | 
179 |     # ((Slice[2::4], '-2143'), re.compile(
180 |     #     hex2re('00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00'
181 |     #            '00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00'
182 |     #            '20 00 00 00 22 00') +
183 |     #     b'((\x00\x00[^\x22]\x00)*?)' +
184 |     #     hex2re('00 00 22 00 00 00 3B 00')).match),
185 | 
186 |     # ((Slice[1::4], '-3412'), re.compile(
187 |     #     hex2re('FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00 00 00'
188 |     #            '61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74'
189 |     #            '00 00 00 20 00 00 00 22 00 00') +
190 |     #     b'((\x00[^\x22]\x00\x00)*?)' +
191 |     #     hex2re('00 22 00 00 00 3B 00 00')).match),
192 | 
193 |     # ((Slice[1::4], '-3412'), re.compile(
194 |     #     hex2re('00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00'
195 |     #            '72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20'
196 |     #            '00 00 00 22 00 00') +
197 |     #     b'((\x00[^\x22]\x00\x00)*?)' +
198 |     #     hex2re('00 22 00 00 00 3B 00 00')).match),
199 | 
200 |     ((Slice[::4], '-LE'), re.compile(
201 |         hex2re('FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61'
202 |                '00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00'
203 |                '00 00 20 00 00 00 22 00 00 00') +
204 |         b'(([^\x22]\x00\x00\x00)*?)' +
205 |         hex2re('22 00 00 00 3B 00 00 00')).match),
206 | 
207 |     ((Slice[::4], '-LE'), re.compile(
208 |         hex2re('40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72'
209 |                '00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00'
210 |                '00 00 22 00 00 00') +
211 |         b'(([^\x22]\x00\x00\x00)*?)' +
212 |         hex2re('22 00 00 00 3B 00 00 00')).match),
213 | 
214 |     ('UTF-32-BE', re.compile(
215 |         hex2re('00 00 FE FF')).match),
216 | 
217 |     ('UTF-32-LE', re.compile(
218 |         hex2re('FF FE 00 00')).match),
219 | 
220 |     # ('UTF-32-2143', re.compile(
221 |     #     hex2re('00 00 FF FE')).match),
222 | 
223 |     # ('UTF-32-3412', re.compile(
224 |     #     hex2re('FE FF 00 00')).match),
225 | 
226 |     ('UTF-16-BE', re.compile(
227 |         hex2re('FE FF')).match),
228 | 
229 |     ('UTF-16-LE', re.compile(
230 |         hex2re('FF FE')).match),
231 | 
232 | 
233 |     # Some of there are supported by Python, but I didn’t bother.
234 |     # You know the story with patches ...
235 | 
236 |     # # as specified, transcoded from EBCDIC to ASCII
237 |     # ('as_specified-EBCDIC', re.compile(
238 |     #     hex2re('7C 83 88 81 99 A2 85 A3 40 7F')
239 |     #     + b'([^\x7F]*?)'
240 |     #     + hex2re('7F 5E')).match),
241 | 
242 |     # # as specified, transcoded from IBM1026 to ASCII
243 |     # ('as_specified-IBM1026', re.compile(
244 |     #     hex2re('AE 83 88 81 99 A2 85 A3 40 FC')
245 |     #     + b'([^\xFC]*?)'
246 |     #     + hex2re('FC 5E')).match),
247 | 
248 |     # # as specified, transcoded from GSM 03.38 to ASCII
249 |     # ('as_specified-GSM_03.38', re.compile(
250 |     #     hex2re('00 63 68 61 72 73 65 74 20 22')
251 |     #     + b'([^\x22]*?)'
252 |     #     + hex2re('22 3B')).match),
253 | ]
254 | 


--------------------------------------------------------------------------------
/tinycss/fonts3.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.colors3
  4 |     ---------------
  5 | 
  6 |     Parser for CSS 3 Fonts syntax:
  7 |     https://www.w3.org/TR/css-fonts-3/
  8 | 
  9 |     Adds support for font-face and font-feature-values rules.
 10 | 
 11 |     :copyright: (c) 2016 by Kozea.
 12 |     :license: BSD, see LICENSE for more details.
 13 | """
 14 | 
 15 | from __future__ import division, unicode_literals
 16 | 
 17 | from .css21 import CSS21Parser, ParseError
 18 | 
 19 | 
 20 | class FontFaceRule(object):
 21 |     """A parsed at-rule for font faces.
 22 | 
 23 |     .. attribute:: at_keyword
 24 | 
 25 |         Always ``'@font-face'``.
 26 | 
 27 |     .. attribute:: declarations
 28 | 
 29 |         A list of :class:`~.css21.Declaration` objects.
 30 | 
 31 |     .. attribute:: line
 32 | 
 33 |         Source line where this was read.
 34 | 
 35 |     .. attribute:: column
 36 | 
 37 |         Source column where this was read.
 38 | 
 39 |     """
 40 | 
 41 |     def __init__(self, at_keyword, declarations, line, column):
 42 |         assert at_keyword == '@font-face'
 43 |         self.at_keyword = at_keyword
 44 |         self.declarations = declarations
 45 |         self.line = line
 46 |         self.column = column
 47 | 
 48 | 
 49 | class FontFeatureValuesRule(object):
 50 |     """A parsed at-rule for font feature values.
 51 | 
 52 |     .. attribute:: at_keyword
 53 | 
 54 |         Always ``'@font-feature-values'``.
 55 | 
 56 |     .. attribute:: line
 57 | 
 58 |         Source line where this was read.
 59 | 
 60 |     .. attribute:: column
 61 | 
 62 |         Source column where this was read.
 63 | 
 64 |     .. attribute:: at_rules
 65 | 
 66 |         The list of parsed at-rules inside the @font-feature-values block, in
 67 |         source order.
 68 | 
 69 |     .. attribute:: family_names
 70 | 
 71 |         A list of strings representing font families.
 72 | 
 73 |     """
 74 | 
 75 |     def __init__(self, at_keyword, at_rules, family_names, line, column):
 76 |         assert at_keyword == '@font-feature-values'
 77 |         self.at_keyword = at_keyword
 78 |         self.family_names = family_names
 79 |         self.at_rules = at_rules
 80 |         self.line = line
 81 |         self.column = column
 82 | 
 83 | 
 84 | class FontFeatureRule(object):
 85 |     """A parsed at-rule for font features.
 86 | 
 87 |     .. attribute:: at_keyword
 88 | 
 89 |         One of the 16 following strings:
 90 | 
 91 |         * ``@stylistic``
 92 |         * ``@styleset``
 93 |         * ``@character-variant``
 94 |         * ``@swash``
 95 |         * ``@ornaments``
 96 |         * ``@annotation``
 97 | 
 98 |     .. attribute:: declarations
 99 | 
100 |         A list of :class:`~.css21.Declaration` objects.
101 | 
102 |     .. attribute:: line
103 | 
104 |         Source line where this was read.
105 | 
106 |     .. attribute:: column
107 | 
108 |         Source column where this was read.
109 | 
110 |     """
111 | 
112 |     def __init__(self, at_keyword, declarations, line, column):
113 |         self.at_keyword = at_keyword
114 |         self.declarations = declarations
115 |         self.line = line
116 |         self.column = column
117 | 
118 | 
119 | class CSSFonts3Parser(CSS21Parser):
120 |     """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Fonts`_ syntax.
121 | 
122 |     .. _CSS 3 Fonts: https://www.w3.org/TR/css-fonts-3/
123 | 
124 |     """
125 | 
126 |     FONT_FEATURE_VALUES_AT_KEYWORDS = [
127 |         '@stylistic',
128 |         '@styleset',
129 |         '@character-variant',
130 |         '@swash',
131 |         '@ornaments',
132 |         '@annotation',
133 |     ]
134 | 
135 |     def parse_at_rule(self, rule, previous_rules, errors, context):
136 |         if rule.at_keyword == '@font-face':
137 |             if rule.head:
138 |                 raise ParseError(
139 |                     rule.head[0],
140 |                     'unexpected {0} token in {1} rule header'.format(
141 |                         rule.head[0].type, rule.at_keyword))
142 |             declarations, body_errors = self.parse_declaration_list(rule.body)
143 |             errors.extend(body_errors)
144 |             return FontFaceRule(
145 |                 rule.at_keyword, declarations, rule.line, rule.column)
146 |         elif rule.at_keyword == '@font-feature-values':
147 |             family_names = tuple(
148 |                 self.parse_font_feature_values_family_names(rule.head))
149 |             at_rules, body_errors = (
150 |                 self.parse_rules(rule.body or [], '@font-feature-values'))
151 |             errors.extend(body_errors)
152 |             return FontFeatureValuesRule(
153 |                 rule.at_keyword, at_rules, family_names,
154 |                 rule.line, rule.column)
155 |         elif rule.at_keyword in self.FONT_FEATURE_VALUES_AT_KEYWORDS:
156 |             if context != '@font-feature-values':
157 |                 raise ParseError(
158 |                     rule, '{0} rule not allowed in {1}'.format(
159 |                         rule.at_keyword, context))
160 |             declarations, body_errors = self.parse_declaration_list(rule.body)
161 |             errors.extend(body_errors)
162 |             return FontFeatureRule(
163 |                 rule.at_keyword, declarations, rule.line, rule.column)
164 |         return super(CSSFonts3Parser, self).parse_at_rule(
165 |             rule, previous_rules, errors, context)
166 | 
167 |     def parse_font_feature_values_family_names(self, tokens):
168 |         """Parse an @font-feature-values selector.
169 | 
170 |         :param tokens:
171 |             An iterable of token, typically from the  ``head`` attribute of
172 |             an unparsed :class:`AtRule`.
173 |         :returns:
174 |             A generator of strings representing font families.
175 |         :raises:
176 |             :class:`~.parsing.ParseError` on invalid selectors
177 | 
178 |         """
179 |         family = ''
180 |         current_string = False
181 |         for token in tokens:
182 |             if token.type == 'DELIM' and token.value == ',' and family:
183 |                 yield family
184 |                 family = ''
185 |                 current_string = False
186 |             elif token.type == 'STRING' and not family and (
187 |                     current_string is False):
188 |                 family = token.value
189 |                 current_string = True
190 |             elif token.type == 'IDENT' and not current_string:
191 |                 if family:
192 |                     family += ' '
193 |                 family += token.value
194 |             elif token.type != 'S':
195 |                 family = ''
196 |                 break
197 |         if family:
198 |             yield family
199 |         else:
200 |             raise ParseError(token, 'invalid @font-feature-values selector')
201 | 


--------------------------------------------------------------------------------
/tinycss/page3.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.page3
  4 |     ------------------
  5 | 
  6 |     Support for CSS 3 Paged Media syntax:
  7 |     http://dev.w3.org/csswg/css3-page/
  8 | 
  9 |     Adds support for named page selectors and margin rules.
 10 | 
 11 |     :copyright: (c) 2012 by Simon Sapin.
 12 |     :license: BSD, see LICENSE for more details.
 13 | """
 14 | 
 15 | from __future__ import division, unicode_literals
 16 | 
 17 | from .css21 import CSS21Parser, ParseError
 18 | 
 19 | 
 20 | class MarginRule(object):
 21 |     """A parsed at-rule for margin box.
 22 | 
 23 |     .. attribute:: at_keyword
 24 | 
 25 |         One of the 16 following strings:
 26 | 
 27 |         * ``@top-left-corner``
 28 |         * ``@top-left``
 29 |         * ``@top-center``
 30 |         * ``@top-right``
 31 |         * ``@top-right-corner``
 32 |         * ``@bottom-left-corner``
 33 |         * ``@bottom-left``
 34 |         * ``@bottom-center``
 35 |         * ``@bottom-right``
 36 |         * ``@bottom-right-corner``
 37 |         * ``@left-top``
 38 |         * ``@left-middle``
 39 |         * ``@left-bottom``
 40 |         * ``@right-top``
 41 |         * ``@right-middle``
 42 |         * ``@right-bottom``
 43 | 
 44 |     .. attribute:: declarations
 45 | 
 46 |         A list of :class:`~.css21.Declaration` objects.
 47 | 
 48 |     .. attribute:: line
 49 | 
 50 |         Source line where this was read.
 51 | 
 52 |     .. attribute:: column
 53 | 
 54 |         Source column where this was read.
 55 | 
 56 |     """
 57 | 
 58 |     def __init__(self, at_keyword, declarations, line, column):
 59 |         self.at_keyword = at_keyword
 60 |         self.declarations = declarations
 61 |         self.line = line
 62 |         self.column = column
 63 | 
 64 | 
 65 | class CSSPage3Parser(CSS21Parser):
 66 |     """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Paged Media`_ syntax.
 67 | 
 68 |     .. _CSS 3 Paged Media: http://dev.w3.org/csswg/css3-page/
 69 | 
 70 |     Compared to CSS 2.1, the ``at_rules`` and ``selector`` attributes of
 71 |     :class:`~.css21.PageRule` objects are modified:
 72 | 
 73 |     * ``at_rules`` is not always empty, it is a list of :class:`MarginRule`
 74 |       objects.
 75 | 
 76 |     * ``selector``, instead of a single string, is a tuple of the page name
 77 |       and the pseudo class. Each of these may be a ``None`` or a string.
 78 | 
 79 |     +--------------------------+------------------------+
 80 |     | CSS                      | Parsed selectors       |
 81 |     +==========================+========================+
 82 |     | .. code-block:: css      | .. code-block:: python |
 83 |     |                          |                        |
 84 |     |     @page {}             |     (None, None)       |
 85 |     |     @page :first {}      |     (None, 'first')    |
 86 |     |     @page chapter {}     |     ('chapter', None)  |
 87 |     |     @page table:right {} |     ('table', 'right') |
 88 |     +--------------------------+------------------------+
 89 | 
 90 |     """
 91 | 
 92 |     PAGE_MARGIN_AT_KEYWORDS = [
 93 |         '@top-left-corner',
 94 |         '@top-left',
 95 |         '@top-center',
 96 |         '@top-right',
 97 |         '@top-right-corner',
 98 |         '@bottom-left-corner',
 99 |         '@bottom-left',
100 |         '@bottom-center',
101 |         '@bottom-right',
102 |         '@bottom-right-corner',
103 |         '@left-top',
104 |         '@left-middle',
105 |         '@left-bottom',
106 |         '@right-top',
107 |         '@right-middle',
108 |         '@right-bottom',
109 |     ]
110 | 
111 |     def parse_at_rule(self, rule, previous_rules, errors, context):
112 |         if rule.at_keyword in self.PAGE_MARGIN_AT_KEYWORDS:
113 |             if context != '@page':
114 |                 raise ParseError(
115 |                     rule, '{0} rule not allowed in {1}'.format(
116 |                         rule.at_keyword, context))
117 |             if rule.head:
118 |                 raise ParseError(
119 |                     rule.head[0],
120 |                     'unexpected {0} token in {1} rule header'.format(
121 |                         rule.head[0].type, rule.at_keyword))
122 |             declarations, body_errors = self.parse_declaration_list(rule.body)
123 |             errors.extend(body_errors)
124 |             return MarginRule(
125 |                 rule.at_keyword, declarations, rule.line, rule.column)
126 |         return super(CSSPage3Parser, self).parse_at_rule(
127 |             rule, previous_rules, errors, context)
128 | 
129 |     def parse_page_selector(self, head):
130 |         """Parse an @page selector.
131 | 
132 |         :param head:
133 |             The ``head`` attribute of an unparsed :class:`AtRule`.
134 |         :returns:
135 |             A page selector. For CSS 2.1, this is 'first', 'left', 'right'
136 |             or None. 'blank' is added by GCPM.
137 |         :raises:
138 |             :class`~parsing.ParseError` on invalid selectors
139 | 
140 |         """
141 |         if not head:
142 |             return (None, None), (0, 0, 0)
143 |         if head[0].type == 'IDENT':
144 |             name = head.pop(0).value
145 |             while head and head[0].type == 'S':
146 |                 head.pop(0)
147 |             if not head:
148 |                 return (name, None), (1, 0, 0)
149 |             name_specificity = (1,)
150 |         else:
151 |             name = None
152 |             name_specificity = (0,)
153 |         if (len(head) == 2 and head[0].type == ':' and
154 |                 head[1].type == 'IDENT'):
155 |             pseudo_class = head[1].value
156 |             specificity = {
157 |                 'first': (1, 0), 'blank': (1, 0),
158 |                 'left': (0, 1), 'right': (0, 1),
159 |             }.get(pseudo_class)
160 |             if specificity:
161 |                 return (name, pseudo_class), (name_specificity + specificity)
162 |         raise ParseError(head[0], 'invalid @page selector')
163 | 


--------------------------------------------------------------------------------
/tinycss/parsing.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.parsing
  4 |     ---------------
  5 | 
  6 |     Utilities for parsing lists of tokens.
  7 | 
  8 |     :copyright: (c) 2012 by Simon Sapin.
  9 |     :license: BSD, see LICENSE for more details.
 10 | """
 11 | 
 12 | from __future__ import unicode_literals
 13 | 
 14 | 
 15 | # TODO: unit tests
 16 | 
 17 | def split_on_comma(tokens):
 18 |     """Split a list of tokens on commas, ie ``,`` DELIM tokens.
 19 | 
 20 |     Only "top-level" comma tokens are splitting points, not commas inside a
 21 |     function or other :class:`ContainerToken`.
 22 | 
 23 |     :param tokens:
 24 |         An iterable of :class:`~.token_data.Token` or
 25 |         :class:`~.token_data.ContainerToken`.
 26 |     :returns:
 27 |         A list of lists of tokens
 28 | 
 29 |     """
 30 |     parts = []
 31 |     this_part = []
 32 |     for token in tokens:
 33 |         if token.type == 'DELIM' and token.value == ',':
 34 |             parts.append(this_part)
 35 |             this_part = []
 36 |         else:
 37 |             this_part.append(token)
 38 |     parts.append(this_part)
 39 |     return parts
 40 | 
 41 | 
 42 | def strip_whitespace(tokens):
 43 |     """Remove whitespace at the beggining and end of a token list.
 44 | 
 45 |     Whitespace tokens in-between other tokens in the list are preserved.
 46 | 
 47 |     :param tokens:
 48 |         A list of :class:`~.token_data.Token` or
 49 |         :class:`~.token_data.ContainerToken`.
 50 |     :return:
 51 |         A new sub-sequence of the list.
 52 | 
 53 |     """
 54 |     for i, token in enumerate(tokens):
 55 |         if token.type != 'S':
 56 |             break
 57 |     else:
 58 |         return []  # only whitespace
 59 |     tokens = tokens[i:]
 60 |     while tokens and tokens[-1].type == 'S':
 61 |         tokens.pop()
 62 |     return tokens
 63 | 
 64 | 
 65 | def remove_whitespace(tokens):
 66 |     """Remove any top-level whitespace in a token list.
 67 | 
 68 |     Whitespace tokens inside recursive :class:`~.token_data.ContainerToken`
 69 |     are preserved.
 70 | 
 71 |     :param tokens:
 72 |         A list of :class:`~.token_data.Token` or
 73 |         :class:`~.token_data.ContainerToken`.
 74 |     :return:
 75 |         A new sub-sequence of the list.
 76 | 
 77 |     """
 78 |     return [token for token in tokens if token.type != 'S']
 79 | 
 80 | 
 81 | def validate_value(tokens):
 82 |     """Validate a property value.
 83 | 
 84 |     :param tokens:
 85 |         an iterable of tokens
 86 |     :raises:
 87 |         :class:`ParseError` if there is any invalid token for the 'value'
 88 |         production of the core grammar.
 89 | 
 90 |     """
 91 |     for token in tokens:
 92 |         type_ = token.type
 93 |         if type_ == '{':
 94 |             validate_block(token.content, 'property value')
 95 |         else:
 96 |             validate_any(token, 'property value')
 97 | 
 98 | 
 99 | def validate_block(tokens, context):
100 |     """
101 |     :raises:
102 |         :class:`ParseError` if there is any invalid token for the 'block'
103 |         production of the core grammar.
104 |     :param tokens: an iterable of tokens
105 |     :param context: a string for the 'unexpected in ...' message
106 | 
107 |     """
108 |     for token in tokens:
109 |         type_ = token.type
110 |         if type_ == '{':
111 |             validate_block(token.content, context)
112 |         elif type_ not in (';', 'ATKEYWORD'):
113 |             validate_any(token, context)
114 | 
115 | 
116 | def validate_any(token, context):
117 |     """
118 |     :raises:
119 |         :class:`ParseError` if this is an invalid token for the
120 |         'any' production of the core grammar.
121 |     :param token: a single token
122 |     :param context: a string for the 'unexpected in ...' message
123 | 
124 |     """
125 |     type_ = token.type
126 |     if type_ in ('FUNCTION', '(', '['):
127 |         for token in token.content:
128 |             validate_any(token, type_)
129 |     elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER',
130 |                        'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':',
131 |                        'UNICODE-RANGE'):
132 |         if type_ in ('}', ')', ']'):
133 |             adjective = 'unmatched'
134 |         else:
135 |             adjective = 'unexpected'
136 |         raise ParseError(
137 |             token, '{0} {1} token in {2}'.format(adjective, type_, context))
138 | 
139 | 
140 | class ParseError(ValueError):
141 |     """Details about a CSS syntax error. Usually indicates that something
142 |     (a rule or a declaration) was ignored and will not appear as a parsed
143 |     object.
144 | 
145 |     This exception is typically logged in a list rather than being propagated
146 |     to the user API.
147 | 
148 |     .. attribute:: line
149 | 
150 |         Source line where the error occured.
151 | 
152 |     .. attribute:: column
153 | 
154 |         Column in the source line where the error occured.
155 | 
156 |     .. attribute:: reason
157 | 
158 |         What happend (a string).
159 | 
160 |     """
161 |     def __init__(self, subject, reason):
162 |         self.line = subject.line
163 |         self.column = subject.column
164 |         self.reason = reason
165 |         super(ParseError, self).__init__(
166 |             'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
167 | 


--------------------------------------------------------------------------------
/tinycss/speedups.pyx:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.speedups
  4 |     ----------------
  5 | 
  6 |     Cython module for speeding up inner loops.
  7 | 
  8 |     Right now only :func:`tokenize_flat` has a second implementation.
  9 | 
 10 |     :copyright: (c) 2010 by Simon Sapin.
 11 |     :license: BSD, see LICENSE for more details.
 12 | """
 13 | 
 14 | from __future__ import unicode_literals
 15 | 
 16 | from .token_data import (
 17 |     COMPILED_TOKEN_REGEXPS, UNICODE_UNESCAPE, NEWLINE_UNESCAPE,
 18 |     SIMPLE_UNESCAPE, FIND_NEWLINES, TOKEN_DISPATCH)
 19 | 
 20 | 
 21 | COMPILED_TOKEN_INDEXES = dict(
 22 |     (name, i) for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS))
 23 | 
 24 | 
 25 | cdef class CToken:
 26 |     """A token built by the Cython speedups. Identical to
 27 |     :class:`~.token_data.Token`.
 28 | 
 29 |     """
 30 |     is_container = False
 31 | 
 32 |     cdef public object type, _as_css, value, unit
 33 |     cdef public Py_ssize_t line, column
 34 | 
 35 |     def __init__(self, type_, css_value, value, unit, line, column):
 36 |         self.type = type_
 37 |         self._as_css = css_value
 38 |         self.value = value
 39 |         self.unit = unit
 40 |         self.line = line
 41 |         self.column = column
 42 | 
 43 |     def as_css(self):
 44 |         """
 45 |         Return as an Unicode string the CSS representation of the token,
 46 |         as parsed in the source.
 47 |         """
 48 |         return self._as_css
 49 | 
 50 |     def __repr__(self):
 51 |         return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
 52 |                 .format(self, self.unit or ''))
 53 | 
 54 | 
 55 | def tokenize_flat(css_source, int ignore_comments=1):
 56 |     """
 57 |     :param css_source:
 58 |         CSS as an unicode string
 59 |     :param ignore_comments:
 60 |         if true (the default) comments will not be included in the
 61 |         return value
 62 |     :return:
 63 |         An iterator of :class:`Token`
 64 | 
 65 |     """
 66 |     # Make these local variable to avoid global lookups in the loop
 67 |     tokens_dispatch = TOKEN_DISPATCH
 68 |     compiled_token_indexes = COMPILED_TOKEN_INDEXES
 69 |     compiled_tokens = COMPILED_TOKEN_REGEXPS
 70 |     unicode_unescape = UNICODE_UNESCAPE
 71 |     newline_unescape = NEWLINE_UNESCAPE
 72 |     simple_unescape = SIMPLE_UNESCAPE
 73 |     find_newlines = FIND_NEWLINES
 74 | 
 75 |     # Use the integer indexes instead of string markers
 76 |     cdef Py_ssize_t BAD_COMMENT = compiled_token_indexes['BAD_COMMENT']
 77 |     cdef Py_ssize_t BAD_STRING = compiled_token_indexes['BAD_STRING']
 78 |     cdef Py_ssize_t PERCENTAGE = compiled_token_indexes['PERCENTAGE']
 79 |     cdef Py_ssize_t DIMENSION = compiled_token_indexes['DIMENSION']
 80 |     cdef Py_ssize_t ATKEYWORD = compiled_token_indexes['ATKEYWORD']
 81 |     cdef Py_ssize_t FUNCTION = compiled_token_indexes['FUNCTION']
 82 |     cdef Py_ssize_t COMMENT = compiled_token_indexes['COMMENT']
 83 |     cdef Py_ssize_t NUMBER = compiled_token_indexes['NUMBER']
 84 |     cdef Py_ssize_t STRING = compiled_token_indexes['STRING']
 85 |     cdef Py_ssize_t IDENT = compiled_token_indexes['IDENT']
 86 |     cdef Py_ssize_t HASH = compiled_token_indexes['HASH']
 87 |     cdef Py_ssize_t URI = compiled_token_indexes['URI']
 88 |     cdef Py_ssize_t DELIM = -1
 89 | 
 90 |     cdef Py_ssize_t pos = 0
 91 |     cdef Py_ssize_t line = 1
 92 |     cdef Py_ssize_t column = 1
 93 |     cdef Py_ssize_t source_len = len(css_source)
 94 |     cdef Py_ssize_t n_tokens = len(compiled_tokens)
 95 |     cdef Py_ssize_t length, next_pos, type_
 96 |     cdef CToken token
 97 | 
 98 |     tokens = []
 99 |     while pos < source_len:
100 |         char = css_source[pos]
101 |         if char in ':;{}()[]':
102 |             type_ = -1  # not parsed further anyway
103 |             type_name = char
104 |             css_value = char
105 |         else:
106 |             codepoint = min(ord(char), 160)
107 |             for type_, type_name, regexp in tokens_dispatch[codepoint]:
108 |                 match = regexp(css_source, pos)
109 |                 if match:
110 |                     # First match is the longest. See comments on TOKENS above.
111 |                     css_value = match.group()
112 |                     break
113 |             else:
114 |                 # No match.
115 |                 # "Any other character not matched by the above rules,
116 |                 #  and neither a single nor a double quote."
117 |                 # ... but quotes at the start of a token are always matched
118 |                 # by STRING or BAD_STRING. So DELIM is any single character.
119 |                 type_ = DELIM
120 |                 type_name = 'DELIM'
121 |                 css_value = char
122 |         length = len(css_value)
123 |         next_pos = pos + length
124 | 
125 |         # A BAD_COMMENT is a comment at EOF. Ignore it too.
126 |         if not (ignore_comments and type_ in (COMMENT, BAD_COMMENT)):
127 |             # Parse numbers, extract strings and URIs, unescape
128 |             unit = None
129 |             if type_ == DIMENSION:
130 |                 value = match.group(1)
131 |                 value = float(value) if '.' in value else int(value)
132 |                 unit = match.group(2)
133 |                 unit = simple_unescape(unit)
134 |                 unit = unicode_unescape(unit)
135 |                 unit = unit.lower()  # normalize
136 |             elif type_ == PERCENTAGE:
137 |                 value = css_value[:-1]
138 |                 value = float(value) if '.' in value else int(value)
139 |                 unit = '%'
140 |             elif type_ == NUMBER:
141 |                 value = css_value
142 |                 if '.' in value:
143 |                     value = float(value)
144 |                 else:
145 |                     value = int(value)
146 |                     type_name = 'INTEGER'
147 |             elif type_ in (IDENT, ATKEYWORD, HASH, FUNCTION):
148 |                 value = simple_unescape(css_value)
149 |                 value = unicode_unescape(value)
150 |             elif type_ == URI:
151 |                 value = match.group(1)
152 |                 if value and value[0] in '"\'':
153 |                     value = value[1:-1]  # Remove quotes
154 |                     value = newline_unescape(value)
155 |                 value = simple_unescape(value)
156 |                 value = unicode_unescape(value)
157 |             elif type_ == STRING:
158 |                 value = css_value[1:-1]  # Remove quotes
159 |                 value = newline_unescape(value)
160 |                 value = simple_unescape(value)
161 |                 value = unicode_unescape(value)
162 |             # BAD_STRING can only be one of:
163 |             # * Unclosed string at the end of the stylesheet:
164 |             #   Close the string, but this is not an error.
165 |             #   Make it a "good" STRING token.
166 |             # * Unclosed string at the (unescaped) end of the line:
167 |             #   Close the string, but this is an error.
168 |             #   Leave it as a BAD_STRING, don’t bother parsing it.
169 |             # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
170 |             elif type_ == BAD_STRING and next_pos == source_len:
171 |                 type_name = 'STRING'
172 |                 value = css_value[1:]  # Remove quote
173 |                 value = newline_unescape(value)
174 |                 value = simple_unescape(value)
175 |                 value = unicode_unescape(value)
176 |             else:
177 |                 value = css_value
178 |             token = CToken(type_name, css_value, value, unit, line, column)
179 |             tokens.append(token)
180 | 
181 |         pos = next_pos
182 |         newlines = list(find_newlines(css_value))
183 |         if newlines:
184 |             line += len(newlines)
185 |             # Add 1 to have lines start at column 1, not 0
186 |             column = length - newlines[-1].end() + 1
187 |         else:
188 |             column += length
189 |     return tokens
190 | 


--------------------------------------------------------------------------------
/tinycss/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 |     Test suite for tinycss
 4 |     ----------------------
 5 | 
 6 |     :copyright: (c) 2012 by Simon Sapin.
 7 |     :license: BSD, see LICENSE for more details.
 8 | """
 9 | 
10 | 
11 | from __future__ import unicode_literals
12 | 
13 | import sys
14 | 
15 | 
16 | # Awful workaround to fix isort's "sys.setdefaultencoding('utf-8')".
17 | if sys.version_info[0] == 2:
18 |     reload(sys)  # noqa
19 |     sys.setdefaultencoding('ascii')
20 | 
21 | 
22 | def assert_errors(errors, expected_errors):
23 |     """Test not complete error messages but only substrings."""
24 |     assert len(errors) == len(expected_errors)
25 |     for error, expected in zip(errors, expected_errors):
26 |         assert expected in str(error)
27 | 


--------------------------------------------------------------------------------
/tinycss/tests/speed.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     Speed tests
  4 |     -----------
  5 | 
  6 |     Note: this file is not named test_*.py as it is not part of the
  7 |     test suite ran by pytest.
  8 | 
  9 |     :copyright: (c) 2012 by Simon Sapin.
 10 |     :license: BSD, see LICENSE for more details.
 11 | """
 12 | 
 13 | 
 14 | from __future__ import division, unicode_literals
 15 | 
 16 | import contextlib
 17 | import functools
 18 | import os.path
 19 | import sys
 20 | import timeit
 21 | 
 22 | from cssutils import parseString
 23 | 
 24 | from .. import tokenizer
 25 | from ..css21 import CSS21Parser
 26 | from ..parsing import remove_whitespace
 27 | 
 28 | CSS_REPEAT = 4
 29 | TIMEIT_REPEAT = 3
 30 | TIMEIT_NUMBER = 20
 31 | 
 32 | 
 33 | def load_css():
 34 |     filename = os.path.join(os.path.dirname(__file__),
 35 |                             '..', '..', 'docs', '_static', 'custom.css')
 36 |     with open(filename, 'rb') as fd:
 37 |         return b'\n'.join([fd.read()] * CSS_REPEAT)
 38 | 
 39 | 
 40 | # Pre-load so that I/O is not measured
 41 | CSS = load_css()
 42 | 
 43 | 
 44 | @contextlib.contextmanager
 45 | def install_tokenizer(name):
 46 |     original = tokenizer.tokenize_flat
 47 |     try:
 48 |         tokenizer.tokenize_flat = getattr(tokenizer, name)
 49 |         yield
 50 |     finally:
 51 |         tokenizer.tokenize_flat = original
 52 | 
 53 | 
 54 | def parse(tokenizer_name):
 55 |     with install_tokenizer(tokenizer_name):
 56 |         stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS)
 57 |     result = []
 58 |     for rule in stylesheet.rules:
 59 |         selector = rule.selector.as_css()
 60 |         declarations = [
 61 |             (declaration.name, len(list(remove_whitespace(declaration.value))))
 62 |             for declaration in rule.declarations]
 63 |         result.append((selector, declarations))
 64 |     return result
 65 | 
 66 | 
 67 | parse_cython = functools.partial(parse, 'cython_tokenize_flat')
 68 | parse_python = functools.partial(parse, 'python_tokenize_flat')
 69 | 
 70 | 
 71 | def parse_cssutils():
 72 |     stylesheet = parseString(CSS)
 73 |     result = []
 74 |     for rule in stylesheet.cssRules:
 75 |         selector = rule.selectorText
 76 |         declarations = [
 77 |             (declaration.name, len(list(declaration.propertyValue)))
 78 |             for declaration in rule.style.getProperties(all=True)]
 79 |         result.append((selector, declarations))
 80 |     return result
 81 | 
 82 | 
 83 | def check_consistency():
 84 |     result = parse_python()
 85 |     assert len(result) > 0
 86 |     if tokenizer.cython_tokenize_flat:
 87 |         assert parse_cython() == result
 88 |     assert parse_cssutils() == result
 89 |     version = '.'.join(map(str, sys.version_info[:3]))
 90 |     print('Python {}, consistency OK.'.format(version))
 91 | 
 92 | 
 93 | def warm_up():
 94 |     is_pypy = hasattr(sys, 'pypy_translation_info')
 95 |     if is_pypy:
 96 |         print('Warming up for PyPy...')
 97 |         for i in range(80):
 98 |             for i in range(10):
 99 |                 parse_python()
100 |                 parse_cssutils()
101 |             sys.stdout.write('.')
102 |             sys.stdout.flush()
103 |         sys.stdout.write('\n')
104 | 
105 | 
106 | def time(function):
107 |     seconds = timeit.Timer(function).repeat(TIMEIT_REPEAT, TIMEIT_NUMBER)
108 |     miliseconds = int(min(seconds) * 1000)
109 |     return miliseconds
110 | 
111 | 
112 | def run():
113 |     if tokenizer.cython_tokenize_flat:
114 |         data_set = [
115 |             ('tinycss + speedups      ', parse_cython),
116 |         ]
117 |     else:
118 |         print('Speedups are NOT available.')
119 |         data_set = []
120 |     data_set += [
121 |         ('tinycss WITHOUT speedups', parse_python),
122 |         ('cssutils                ', parse_cssutils),
123 |     ]
124 |     label, function = data_set.pop(0)
125 |     ref = time(function)
126 |     print('{}  {} ms'.format(label, ref))
127 |     for label, function in data_set:
128 |         result = time(function)
129 |         print('{}  {} ms  {:.2f}x'.format(label, result, result / ref))
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     check_consistency()
134 |     warm_up()
135 |     run()
136 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 |     Tests for the public API
 4 |     ------------------------
 5 | 
 6 |     :copyright: (c) 2012 by Simon Sapin.
 7 |     :license: BSD, see LICENSE for more details.
 8 | """
 9 | 
10 | 
11 | from __future__ import unicode_literals
12 | 
13 | from pytest import raises
14 | from tinycss import make_parser
15 | from tinycss.page3 import CSSPage3Parser
16 | 
17 | 
18 | def test_make_parser():
19 |     class MyParser(object):
20 |         def __init__(self, some_config):
21 |             self.some_config = some_config
22 | 
23 |     parsers = [
24 |         make_parser(),
25 |         make_parser('page3'),
26 |         make_parser(CSSPage3Parser),
27 |         make_parser(MyParser, some_config=42),
28 |         make_parser(CSSPage3Parser, MyParser, some_config=42),
29 |         make_parser(MyParser, 'page3', some_config=42),
30 |     ]
31 | 
32 |     for parser, exp in zip(parsers, [False, True, True, False, True, True]):
33 |         assert isinstance(parser, CSSPage3Parser) == exp
34 | 
35 |     for parser, exp in zip(parsers, [False, False, False, True, True, True]):
36 |         assert isinstance(parser, MyParser) == exp
37 | 
38 |     for parser in parsers[3:]:
39 |         assert parser.some_config == 42
40 | 
41 |     # Extra or missing named parameters
42 |     raises(TypeError, make_parser, some_config=4)
43 |     raises(TypeError, make_parser, 'page3', some_config=4)
44 |     raises(TypeError, make_parser, MyParser)
45 |     raises(TypeError, make_parser, MyParser, some_config=4, other_config=7)
46 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_color3.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     Tests for the CSS 3 color parser
  4 |     --------------------------------
  5 | 
  6 |     :copyright: (c) 2012 by Simon Sapin.
  7 |     :license: BSD, see LICENSE for more details.
  8 | """
  9 | 
 10 | 
 11 | from __future__ import unicode_literals
 12 | 
 13 | import pytest
 14 | from tinycss.color3 import hsl_to_rgb, parse_color_string
 15 | 
 16 | 
 17 | @pytest.mark.parametrize(('css_source', 'expected_result'), [
 18 |     ('', None),
 19 |     (' /* hey */\n', None),
 20 |     ('4', None),
 21 |     ('top', None),
 22 |     ('/**/transparent', (0, 0, 0, 0)),
 23 |     ('transparent', (0, 0, 0, 0)),
 24 |     (' transparent\n', (0, 0, 0, 0)),
 25 |     ('TransParent', (0, 0, 0, 0)),
 26 |     ('currentColor', 'currentColor'),
 27 |     ('CURRENTcolor', 'currentColor'),
 28 |     ('current_Color', None),
 29 | 
 30 |     ('black', (0, 0, 0, 1)),
 31 |     ('white', (1, 1, 1, 1)),
 32 |     ('fuchsia', (1, 0, 1, 1)),
 33 |     ('cyan', (0, 1, 1, 1)),
 34 |     ('CyAn', (0, 1, 1, 1)),
 35 |     ('darkkhaki', (189 / 255., 183 / 255., 107 / 255., 1)),
 36 | 
 37 |     ('#', None),
 38 |     ('#f', None),
 39 |     ('#ff', None),
 40 |     ('#fff', (1, 1, 1, 1)),
 41 |     ('#ffg', None),
 42 |     ('#ffff', None),
 43 |     ('#fffff', None),
 44 |     ('#ffffff', (1, 1, 1, 1)),
 45 |     ('#fffffg', None),
 46 |     ('#fffffff', None),
 47 |     ('#ffffffff', None),
 48 |     ('#fffffffff', None),
 49 | 
 50 |     ('#cba987', (203 / 255., 169 / 255., 135 / 255., 1)),
 51 |     ('#CbA987', (203 / 255., 169 / 255., 135 / 255., 1)),
 52 |     ('#1122aA', (17 / 255., 34 / 255., 170 / 255., 1)),
 53 |     ('#12a', (17 / 255., 34 / 255., 170 / 255., 1)),
 54 | 
 55 |     ('rgb(203, 169, 135)', (203 / 255., 169 / 255., 135 / 255., 1)),
 56 |     ('RGB(255, 255, 255)', (1, 1, 1, 1)),
 57 |     ('rgB(0, 0, 0)', (0, 0, 0, 1)),
 58 |     ('rgB(0, 51, 255)', (0, .2, 1, 1)),
 59 |     ('rgb(0,51,255)', (0, .2, 1, 1)),
 60 |     ('rgb(0\t,  51 ,255)', (0, .2, 1, 1)),
 61 |     ('rgb(/* R */0, /* G */51, /* B */255)', (0, .2, 1, 1)),
 62 |     ('rgb(-51, 306, 0)', (-.2, 1.2, 0, 1)),  # out of 0..1 is allowed
 63 | 
 64 |     ('rgb(42%, 3%, 50%)', (.42, .03, .5, 1)),
 65 |     ('RGB(100%, 100%, 100%)', (1, 1, 1, 1)),
 66 |     ('rgB(0%, 0%, 0%)', (0, 0, 0, 1)),
 67 |     ('rgB(10%, 20%, 30%)', (.1, .2, .3, 1)),
 68 |     ('rgb(10%,20%,30%)', (.1, .2, .3, 1)),
 69 |     ('rgb(10%\t,  20% ,30%)', (.1, .2, .3, 1)),
 70 |     ('rgb(/* R */10%, /* G */20%, /* B */30%)', (.1, .2, .3, 1)),
 71 |     ('rgb(-12%, 110%, 1400%)', (-.12, 1.1, 14, 1)),  # out of 0..1 is allowed
 72 | 
 73 |     ('rgb(10%, 50%, 0)', None),
 74 |     ('rgb(255, 50%, 0%)', None),
 75 |     ('rgb(0, 0 0)', None),
 76 |     ('rgb(0, 0, 0deg)', None),
 77 |     ('rgb(0, 0, light)', None),
 78 |     ('rgb()', None),
 79 |     ('rgb(0)', None),
 80 |     ('rgb(0, 0)', None),
 81 |     ('rgb(0, 0, 0, 0)', None),
 82 |     ('rgb(0%)', None),
 83 |     ('rgb(0%, 0%)', None),
 84 |     ('rgb(0%, 0%, 0%, 0%)', None),
 85 |     ('rgb(0%, 0%, 0%, 0)', None),
 86 | 
 87 |     ('rgba(0, 0, 0, 0)', (0, 0, 0, 0)),
 88 |     ('rgba(203, 169, 135, 0.3)', (203 / 255., 169 / 255., 135 / 255., 0.3)),
 89 |     ('RGBA(255, 255, 255, 0)', (1, 1, 1, 0)),
 90 |     ('rgBA(0, 51, 255, 1)', (0, 0.2, 1, 1)),
 91 |     ('rgba(0, 51, 255, 1.1)', (0, 0.2, 1, 1)),
 92 |     ('rgba(0, 51, 255, 37)', (0, 0.2, 1, 1)),
 93 |     ('rgba(0, 51, 255, 0.42)', (0, 0.2, 1, 0.42)),
 94 |     ('rgba(0, 51, 255, 0)', (0, 0.2, 1, 0)),
 95 |     ('rgba(0, 51, 255, -0.1)', (0, 0.2, 1, 0)),
 96 |     ('rgba(0, 51, 255, -139)', (0, 0.2, 1, 0)),
 97 | 
 98 |     ('rgba(42%, 3%, 50%, 0.3)', (.42, .03, .5, 0.3)),
 99 |     ('RGBA(100%, 100%, 100%, 0)', (1, 1, 1, 0)),
100 |     ('rgBA(0%, 20%, 100%, 1)', (0, 0.2, 1, 1)),
101 |     ('rgba(0%, 20%, 100%, 1.1)', (0, 0.2, 1, 1)),
102 |     ('rgba(0%, 20%, 100%, 37)', (0, 0.2, 1, 1)),
103 |     ('rgba(0%, 20%, 100%, 0.42)', (0, 0.2, 1, 0.42)),
104 |     ('rgba(0%, 20%, 100%, 0)', (0, 0.2, 1, 0)),
105 |     ('rgba(0%, 20%, 100%, -0.1)', (0, 0.2, 1, 0)),
106 |     ('rgba(0%, 20%, 100%, -139)', (0, 0.2, 1, 0)),
107 | 
108 |     ('rgba(255, 255, 255, 0%)', None),
109 |     ('rgba(10%, 50%, 0, 1)', None),
110 |     ('rgba(255, 50%, 0%, 1)', None),
111 |     ('rgba(0, 0, 0 0)', None),
112 |     ('rgba(0, 0, 0, 0deg)', None),
113 |     ('rgba(0, 0, 0, light)', None),
114 |     ('rgba()', None),
115 |     ('rgba(0)', None),
116 |     ('rgba(0, 0, 0)', None),
117 |     ('rgba(0, 0, 0, 0, 0)', None),
118 |     ('rgba(0%)', None),
119 |     ('rgba(0%, 0%)', None),
120 |     ('rgba(0%, 0%, 0%)', None),
121 |     ('rgba(0%, 0%, 0%, 0%)', None),
122 |     ('rgba(0%, 0%, 0%, 0%, 0%)', None),
123 | 
124 |     ('HSL(0, 0%, 0%)', (0, 0, 0, 1)),
125 |     ('hsL(0, 100%, 50%)', (1, 0, 0, 1)),
126 |     ('hsl(60, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
127 |     ('hsl(780, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
128 |     ('hsl(-300, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
129 |     ('hsl(300, 50%, 50%)', (0.75, 0.25, 0.75, 1)),
130 | 
131 |     ('hsl(10, 50%, 0)', None),
132 |     ('hsl(50%, 50%, 0%)', None),
133 |     ('hsl(0, 0% 0%)', None),
134 |     ('hsl(30deg, 100%, 100%)', None),
135 |     ('hsl(0, 0%, light)', None),
136 |     ('hsl()', None),
137 |     ('hsl(0)', None),
138 |     ('hsl(0, 0%)', None),
139 |     ('hsl(0, 0%, 0%, 0%)', None),
140 | 
141 |     ('HSLA(-300, 100%, 37.5%, 1)', (0.75, 0.75, 0, 1)),
142 |     ('hsLA(-300, 100%, 37.5%, 12)', (0.75, 0.75, 0, 1)),
143 |     ('hsla(-300, 100%, 37.5%, 0.2)', (0.75, 0.75, 0, .2)),
144 |     ('hsla(-300, 100%, 37.5%, 0)', (0.75, 0.75, 0, 0)),
145 |     ('hsla(-300, 100%, 37.5%, -3)', (0.75, 0.75, 0, 0)),
146 | 
147 |     ('hsla(10, 50%, 0, 1)', None),
148 |     ('hsla(50%, 50%, 0%, 1)', None),
149 |     ('hsla(0, 0% 0%, 1)', None),
150 |     ('hsla(30deg, 100%, 100%, 1)', None),
151 |     ('hsla(0, 0%, light, 1)', None),
152 |     ('hsla()', None),
153 |     ('hsla(0)', None),
154 |     ('hsla(0, 0%)', None),
155 |     ('hsla(0, 0%, 0%, 50%)', None),
156 |     ('hsla(0, 0%, 0%, 1, 0%)', None),
157 | 
158 |     ('cmyk(0, 0, 0, 0)', None),
159 | ])
160 | def test_color(css_source, expected_result):
161 |     result = parse_color_string(css_source)
162 |     if isinstance(result, tuple):
163 |         for got, expected in zip(result, expected_result):
164 |             # Compensate for floating point errors:
165 |             assert abs(got - expected) < 1e-10
166 |         for i, attr in enumerate(['red', 'green', 'blue', 'alpha']):
167 |             assert getattr(result, attr) == result[i]
168 |     else:
169 |         assert result == expected_result
170 | 
171 | 
172 | @pytest.mark.parametrize(('hsl', 'expected_rgb'), [
173 |     # http://en.wikipedia.org/wiki/HSL_and_HSV#Examples
174 |     ((0,     0,    100 ), (1,     1,     1    )),  # noqa
175 |     ((127,   0,    100 ), (1,     1,     1    )),  # noqa
176 |     ((0,     0,    50  ), (0.5,   0.5,   0.5  )),  # noqa
177 |     ((127,   0,    50  ), (0.5,   0.5,   0.5  )),  # noqa
178 |     ((0,     0,    0   ), (0,     0,     0    )),  # noqa
179 |     ((127,   0,    0   ), (0,     0,     0    )),  # noqa
180 |     ((0,     100,  50  ), (1,     0,     0    )),  # noqa
181 |     ((60,    100,  37.5), (0.75,  0.75,  0    )),  # noqa
182 |     ((780,   100,  37.5), (0.75,  0.75,  0    )),  # noqa
183 |     ((-300,  100,  37.5), (0.75,  0.75,  0    )),  # noqa
184 |     ((120,   100,  25  ), (0,     0.5,   0    )),  # noqa
185 |     ((180,   100,  75  ), (0.5,   1,     1    )),  # noqa
186 |     ((240,   100,  75  ), (0.5,   0.5,   1    )),  # noqa
187 |     ((300,   50,   50  ), (0.75,  0.25,  0.75 )),  # noqa
188 |     ((61.8,  63.8, 39.3), (0.628, 0.643, 0.142)),  # noqa
189 |     ((251.1, 83.2, 51.1), (0.255, 0.104, 0.918)),  # noqa
190 |     ((134.9, 70.7, 39.6), (0.116, 0.675, 0.255)),  # noqa
191 |     ((49.5,  89.3, 49.7), (0.941, 0.785, 0.053)),  # noqa
192 |     ((283.7, 77.5, 54.2), (0.704, 0.187, 0.897)),  # noqa
193 |     ((14.3,  81.7, 62.4), (0.931, 0.463, 0.316)),  # noqa
194 |     ((56.9,  99.1, 76.5), (0.998, 0.974, 0.532)),  # noqa
195 |     ((162.4, 77.9, 44.7), (0.099, 0.795, 0.591)),  # noqa
196 |     ((248.3, 60.1, 37.3), (0.211, 0.149, 0.597)),  # noqa
197 |     ((240.5, 29,   60.7), (0.495, 0.493, 0.721)),  # noqa
198 | ])
199 | def test_hsl(hsl, expected_rgb):
200 |     for got, expected in zip(hsl_to_rgb(*hsl), expected_rgb):
201 |         # Compensate for floating point errors and Wikipedia’s rounding:
202 |         assert abs(got - expected) < 0.001
203 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_css21.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     Tests for the CSS 2.1 parser
  4 |     ----------------------------
  5 | 
  6 |     :copyright: (c) 2012 by Simon Sapin.
  7 |     :license: BSD, see LICENSE for more details.
  8 | """
  9 | 
 10 | 
 11 | from __future__ import unicode_literals
 12 | 
 13 | import io
 14 | import os
 15 | import tempfile
 16 | 
 17 | import pytest
 18 | from tinycss.css21 import CSS21Parser
 19 | 
 20 | from . import assert_errors
 21 | from .test_tokenizer import jsonify
 22 | 
 23 | 
 24 | def parse_bytes(css_bytes, kwargs):
 25 |     return CSS21Parser().parse_stylesheet_bytes(css_bytes, **kwargs)
 26 | 
 27 | 
 28 | def parse_bytesio_file(css_bytes, kwargs):
 29 |     css_file = io.BytesIO(css_bytes)
 30 |     return CSS21Parser().parse_stylesheet_file(css_file, **kwargs)
 31 | 
 32 | 
 33 | def parse_filename(css_bytes, kwargs):
 34 |     css_file = tempfile.NamedTemporaryFile(delete=False)
 35 |     try:
 36 |         css_file.write(css_bytes)
 37 |         # Windows can not open the filename a second time while
 38 |         # it is still open for writing.
 39 |         css_file.close()
 40 |         return CSS21Parser().parse_stylesheet_file(css_file.name, **kwargs)
 41 |     finally:
 42 |         os.remove(css_file.name)
 43 | 
 44 | 
 45 | @pytest.mark.parametrize(('css_bytes', 'kwargs', 'expected_result', 'parse'), [
 46 |     params + (parse,)
 47 |     for parse in [parse_bytes, parse_bytesio_file, parse_filename]
 48 |     for params in [
 49 |         ('@import "é";'.encode('utf8'), {}, 'é'),
 50 |         ('@import "é";'.encode('utf16'), {}, 'é'),  # with a BOM
 51 |         ('@import "é";'.encode('latin1'), {}, 'é'),
 52 |         ('@import "£";'.encode('Shift-JIS'), {}, '\x81\x92'),  # lat1 mojibake
 53 |         ('@charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {}, '£'),
 54 |         (' @charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {},
 55 |             '\x81\x92'),
 56 |         ('@import "£";'.encode('Shift-JIS'),
 57 |             {'document_encoding': 'Shift-JIS'}, '£'),
 58 |         ('@import "£";'.encode('Shift-JIS'),
 59 |             {'document_encoding': 'utf8'}, '\x81\x92'),
 60 |         ('@charset "utf8"; @import "£";'.encode('utf8'),
 61 |             {'document_encoding': 'latin1'}, '£'),
 62 |         # Mojibake yay!
 63 |         (' @charset "utf8"; @import "é";'.encode('utf8'),
 64 |             {'document_encoding': 'latin1'}, 'Ã©'),
 65 |         ('@import "é";'.encode('utf8'), {'document_encoding': 'latin1'}, 'Ã©'),
 66 |     ]
 67 | ])
 68 | def test_bytes(css_bytes, kwargs, expected_result, parse):
 69 |     stylesheet = parse(css_bytes, kwargs)
 70 |     assert stylesheet.rules[0].at_keyword == '@import'
 71 |     assert stylesheet.rules[0].uri == expected_result
 72 | 
 73 | 
 74 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
 75 |     (' /* hey */\n', 0, []),
 76 |     ('foo {}', 1, []),
 77 |     ('foo{} @lipsum{} bar{}', 2,
 78 |         ['unknown at-rule in stylesheet context: @lipsum']),
 79 |     ('@charset "ascii"; foo {}', 1, []),
 80 |     (' @charset "ascii"; foo {}', 1, [
 81 |         'mis-placed or malformed @charset rule']),
 82 |     ('@charset ascii; foo {}', 1, ['mis-placed or malformed @charset rule']),
 83 |     ('foo {} @charset "ascii";', 1, ['mis-placed or malformed @charset rule']),
 84 | ])
 85 | def test_at_rules(css_source, expected_rules, expected_errors):
 86 |     # Pass 'encoding' to allow @charset
 87 |     stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
 88 |     assert_errors(stylesheet.errors, expected_errors)
 89 |     result = len(stylesheet.rules)
 90 |     assert result == expected_rules
 91 | 
 92 | 
 93 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
 94 |     (' /* hey */\n', [], []),
 95 | 
 96 |     ('foo{} /* hey */\n@bar;@baz{}',
 97 |         [('foo', []), ('@bar', [], None), ('@baz', [], [])], []),
 98 | 
 99 |     ('@import "foo.css"/**/;', [
100 |         ('@import', [('STRING', 'foo.css')], None)], []),
101 | 
102 |     ('@import "foo.css"/**/', [
103 |         ('@import', [('STRING', 'foo.css')], None)], []),
104 | 
105 |     ('@import "foo.css', [
106 |         ('@import', [('STRING', 'foo.css')], None)], []),
107 | 
108 |     ('{}', [], ['empty selector']),
109 | 
110 |     ('a{b:4}', [('a', [('b', [('INTEGER', 4)])])], []),
111 | 
112 |     ('@page {\t b: 4; @margin}', [('@page', [], [
113 |         ('S', '\t '), ('IDENT', 'b'), (':', ':'), ('S', ' '), ('INTEGER', 4),
114 |         (';', ';'), ('S', ' '), ('ATKEYWORD', '@margin'),
115 |     ])], []),
116 | 
117 |     ('foo', [], ['no declaration block found']),
118 | 
119 |     ('foo @page {} bar {}', [('bar', [])],
120 |         ['unexpected ATKEYWORD token in selector']),
121 | 
122 |     ('foo { content: "unclosed string;\n color:red; ; margin/**/\n: 2cm; }',
123 |         [('foo', [('margin', [('DIMENSION', 2)])])],
124 |         ['unexpected BAD_STRING token in property value']),
125 | 
126 |     ('foo { 4px; bar: 12% }',
127 |         [('foo', [('bar', [('PERCENTAGE', 12)])])],
128 |         ['expected a property name, got DIMENSION']),
129 | 
130 |     ('foo { bar! 3cm auto ; baz: 7px }',
131 |         [('foo', [('baz', [('DIMENSION', 7)])])],
132 |         ["expected ':', got DELIM"]),
133 | 
134 |     ('foo { bar ; baz: {("}"/* comment */) {0@fizz}} }',
135 |         [('foo', [('baz', [('{', [
136 |             ('(', [('STRING', '}')]), ('S', ' '),
137 |             ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
138 |         ])])])],
139 |         ["expected ':'"]),
140 | 
141 |     ('foo { bar: ; baz: not(z) }',
142 |         [('foo', [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])])],
143 |         ['expected a property value']),
144 | 
145 |     ('foo { bar: (]) ; baz: U+20 }',
146 |         [('foo', [('baz', [('UNICODE-RANGE', 'U+20')])])],
147 |         ['unmatched ] token in (']),
148 | ])
149 | def test_core_parser(css_source, expected_rules, expected_errors):
150 |     class CoreParser(CSS21Parser):
151 |         """A parser that always accepts unparsed at-rules."""
152 |         def parse_at_rule(self, rule, stylesheet_rules, errors, context):
153 |             return rule
154 | 
155 |     stylesheet = CoreParser().parse_stylesheet(css_source)
156 |     assert_errors(stylesheet.errors, expected_errors)
157 |     result = [
158 |         (rule.at_keyword, list(jsonify(rule.head)),
159 |             list(jsonify(rule.body))
160 |             if rule.body is not None else None)
161 |         if rule.at_keyword else
162 |         (rule.selector.as_css(), [
163 |             (decl.name, list(jsonify(decl.value)))
164 |             for decl in rule.declarations])
165 |         for rule in stylesheet.rules
166 |     ]
167 |     assert result == expected_rules
168 | 
169 | 
170 | @pytest.mark.parametrize(('css_source', 'expected_declarations',
171 |                           'expected_errors'), [
172 |     (' /* hey */\n', [], []),
173 | 
174 |     ('b:4', [('b', [('INTEGER', 4)])], []),
175 | 
176 |     ('{b:4}', [], ['expected a property name, got {']),
177 | 
178 |     ('b:4} c:3', [], ['unmatched } token in property value']),
179 | 
180 |     (' 4px; bar: 12% ',
181 |         [('bar', [('PERCENTAGE', 12)])],
182 |         ['expected a property name, got DIMENSION']),
183 | 
184 |     ('bar! 3cm auto ; baz: 7px',
185 |         [('baz', [('DIMENSION', 7)])],
186 |         ["expected ':', got DELIM"]),
187 | 
188 |     ('foo; bar ; baz: {("}"/* comment */) {0@fizz}}',
189 |         [('baz', [('{', [
190 |             ('(', [('STRING', '}')]), ('S', ' '),
191 |             ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
192 |         ])])],
193 |         ["expected ':'", "expected ':'"]),
194 | 
195 |     ('bar: ; baz: not(z)',
196 |         [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])],
197 |         ['expected a property value']),
198 | 
199 |     ('bar: (]) ; baz: U+20',
200 |         [('baz', [('UNICODE-RANGE', 'U+20')])],
201 |         ['unmatched ] token in (']),
202 | ])
203 | def test_parse_style_attr(css_source, expected_declarations, expected_errors):
204 |     declarations, errors = CSS21Parser().parse_style_attr(css_source)
205 |     assert_errors(errors, expected_errors)
206 |     result = [(decl.name, list(jsonify(decl.value)))
207 |               for decl in declarations]
208 |     assert result == expected_declarations
209 | 
210 | 
211 | @pytest.mark.parametrize(('css_source', 'expected_declarations',
212 |                           'expected_errors'), [
213 |     (' /* hey */\n', [], []),
214 | 
215 |     ('a:1; b:2',
216 |         [('a', [('INTEGER', 1)], None), ('b', [('INTEGER', 2)], None)], []),
217 | 
218 |     ('a:1 important; b: important',
219 |         [('a', [('INTEGER', 1), ('S', ' '), ('IDENT', 'important')], None),
220 |             ('b', [('IDENT', 'important')], None)],
221 |         []),
222 | 
223 |     ('a:1 !important; b:2',
224 |         [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
225 |         []),
226 | 
227 |     ('a:1!\t Im\\50 O\\RTant; b:2',
228 |         [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
229 |         []),
230 | 
231 |     ('a: !important; b:2',
232 |         [('b', [('INTEGER', 2)], None)],
233 |         ['expected a value before !important']),
234 | 
235 | ])
236 | def test_important(css_source, expected_declarations, expected_errors):
237 |     declarations, errors = CSS21Parser().parse_style_attr(css_source)
238 |     assert_errors(errors, expected_errors)
239 |     result = [(decl.name, list(jsonify(decl.value)), decl.priority)
240 |               for decl in declarations]
241 |     assert result == expected_declarations
242 | 
243 | 
244 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
245 |     (' /* hey */\n', [], []),
246 |     ('@import "foo.css";', [('foo.css', ['all'])], []),
247 |     ('@import url(foo.css);', [('foo.css', ['all'])], []),
248 |     ('@import "foo.css" screen, print;',
249 |         [('foo.css', ['screen', 'print'])], []),
250 |     ('@charset "ascii"; @import "foo.css"; @import "bar.css";',
251 |         [('foo.css', ['all']), ('bar.css', ['all'])], []),
252 |     ('foo {} @import "foo.css";',
253 |         [], ['@import rule not allowed after a ruleset']),
254 |     ('@page {} @import "foo.css";',
255 |         [], ['@import rule not allowed after an @page rule']),
256 |     ('@import ;',
257 |         [], ['expected URI or STRING for @import rule']),
258 |     ('@import foo.css;',
259 |         [], ['expected URI or STRING for @import rule, got IDENT']),
260 |     ('@import "foo.css" {}',
261 |         [], ["expected ';', got a block"]),
262 | ])
263 | def test_at_import(css_source, expected_rules, expected_errors):
264 |     # Pass 'encoding' to allow @charset
265 |     stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
266 |     assert_errors(stylesheet.errors, expected_errors)
267 | 
268 |     result = [
269 |         (rule.uri, rule.media)
270 |         for rule in stylesheet.rules
271 |         if rule.at_keyword == '@import'
272 |     ]
273 |     assert result == expected_rules
274 | 
275 | 
276 | @pytest.mark.parametrize(('css', 'expected_result', 'expected_errors'), [
277 |     ('@page {}', (None, (0, 0), []), []),
278 |     ('@page:first {}', ('first', (1, 0), []), []),
279 |     ('@page :left{}', ('left', (0, 1), []), []),
280 |     ('@page\t\n:right {}', ('right', (0, 1), []), []),
281 |     ('@page :last {}', None, ['invalid @page selector']),
282 |     ('@page : right {}', None, ['invalid @page selector']),
283 |     ('@page table:left {}', None, ['invalid @page selector']),
284 | 
285 |     ('@page;', None, ['invalid @page rule: missing block']),
286 |     ('@page { a:1; ; b: 2 }',
287 |         (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
288 |         []),
289 |     ('@page { a:1; c: ; b: 2 }',
290 |         (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
291 |         ['expected a property value']),
292 |     ('@page { a:1; @top-left {} b: 2 }',
293 |         (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
294 |         ['unknown at-rule in @page context: @top-left']),
295 |     ('@page { a:1; @top-left {}; b: 2 }',
296 |         (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
297 |         ['unknown at-rule in @page context: @top-left']),
298 | ])
299 | def test_at_page(css, expected_result, expected_errors):
300 |     stylesheet = CSS21Parser().parse_stylesheet(css)
301 |     assert_errors(stylesheet.errors, expected_errors)
302 | 
303 |     if expected_result is None:
304 |         assert not stylesheet.rules
305 |     else:
306 |         assert len(stylesheet.rules) == 1
307 |         rule = stylesheet.rules[0]
308 |         assert rule.at_keyword == '@page'
309 |         assert rule.at_rules == []  # in CSS 2.1
310 |         result = (
311 |             rule.selector,
312 |             rule.specificity,
313 |             [(decl.name, list(jsonify(decl.value)))
314 |                 for decl in rule.declarations],
315 |         )
316 |         assert result == expected_result
317 | 
318 | 
319 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [
320 |     (' /* hey */\n', [], []),
321 |     ('@media all {}', [(['all'], [])], []),
322 |     ('@media screen, print {}', [(['screen', 'print'], [])], []),
323 |     ('@media all;', [], ['invalid @media rule: missing block']),
324 |     ('@media  {}', [], ['expected media types for @media']),
325 |     ('@media 4 {}', [], ['expected a media type, got INTEGER']),
326 |     ('@media , screen {}', [], ['expected a media type']),
327 |     ('@media screen, {}', [], ['expected a media type']),
328 |     ('@media screen print {}', [],
329 |         ['expected a media type, got IDENT, IDENT']),
330 | 
331 |     ('@media all { @page { a: 1 } @media; @import; foo { a: 1 } }',
332 |         [(['all'], [('foo', [('a', [('INTEGER', 1)])])])],
333 |         ['@page rule not allowed in @media',
334 |          '@media rule not allowed in @media',
335 |          '@import rule not allowed in @media']),
336 | 
337 | ])
338 | def test_at_media(css_source, expected_rules, expected_errors):
339 |     stylesheet = CSS21Parser().parse_stylesheet(css_source)
340 |     assert_errors(stylesheet.errors, expected_errors)
341 | 
342 |     for rule in stylesheet.rules:
343 |         assert rule.at_keyword == '@media'
344 |     result = [
345 |         (rule.media, [
346 |             (sub_rule.selector.as_css(), [
347 |                 (decl.name, list(jsonify(decl.value)))
348 |                 for decl in sub_rule.declarations])
349 |             for sub_rule in rule.rules
350 |         ])
351 |         for rule in stylesheet.rules
352 |     ]
353 |     assert result == expected_rules
354 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_decoding.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 |     Tests for decoding bytes to Unicode
 4 |     -----------------------------------
 5 | 
 6 |     :copyright: (c) 2012 by Simon Sapin.
 7 |     :license: BSD, see LICENSE for more details.
 8 | """
 9 | 
10 | 
11 | from __future__ import unicode_literals
12 | 
13 | import pytest
14 | from tinycss.decoding import decode
15 | 
16 | 
17 | def params(css, encoding, use_bom=False, expect_error=False, **kwargs):
18 |     """Nicer syntax to make a tuple."""
19 |     return css, encoding, use_bom, expect_error, kwargs
20 | 
21 | 
22 | @pytest.mark.parametrize(('css', 'encoding', 'use_bom', 'expect_error',
23 |                           'kwargs'), [
24 |     params('', 'utf8'),  # default to utf8
25 |     params('𐂃', 'utf8'),
26 |     params('é', 'latin1'),  # utf8 fails, fall back on ShiftJIS
27 |     params('£', 'ShiftJIS', expect_error=True),
28 |     params('£', 'ShiftJIS', protocol_encoding='Shift-JIS'),
29 |     params('£', 'ShiftJIS', linking_encoding='Shift-JIS'),
30 |     params('£', 'ShiftJIS', document_encoding='Shift-JIS'),
31 |     params('£', 'ShiftJIS', protocol_encoding='utf8',
32 |            document_encoding='ShiftJIS'),
33 |     params('@charset "utf8"; £', 'ShiftJIS', expect_error=True),
34 |     params('@charset "utf£8"; £', 'ShiftJIS', expect_error=True),
35 |     params('@charset "unknown-encoding"; £', 'ShiftJIS', expect_error=True),
36 |     params('@charset "utf8"; £', 'ShiftJIS', document_encoding='ShiftJIS'),
37 |     params('£', 'ShiftJIS', linking_encoding='utf8',
38 |            document_encoding='ShiftJIS'),
39 |     params('@charset "utf-32"; 𐂃', 'utf-32-be'),
40 |     params('@charset "Shift-JIS"; £', 'ShiftJIS'),
41 |     params('@charset "ISO-8859-8"; £', 'ShiftJIS', expect_error=True),
42 |     params('𐂃', 'utf-16-le', expect_error=True),  # no BOM
43 |     params('𐂃', 'utf-16-le', use_bom=True),
44 |     params('𐂃', 'utf-32-be', expect_error=True),
45 |     params('𐂃', 'utf-32-be', use_bom=True),
46 |     params('𐂃', 'utf-32-be', document_encoding='utf-32-be'),
47 |     params('𐂃', 'utf-32-be', linking_encoding='utf-32-be'),
48 |     params('@charset "utf-32-le"; 𐂃', 'utf-32-be',
49 |            use_bom=True, expect_error=True),
50 |     # protocol_encoding takes precedence over @charset
51 |     params('@charset "ISO-8859-8"; £', 'ShiftJIS',
52 |            protocol_encoding='Shift-JIS'),
53 |     params('@charset "unknown-encoding"; £', 'ShiftJIS',
54 |            protocol_encoding='Shift-JIS'),
55 |     params('@charset "Shift-JIS"; £', 'ShiftJIS',
56 |            protocol_encoding='utf8'),
57 |     # @charset takes precedence over document_encoding
58 |     params('@charset "Shift-JIS"; £', 'ShiftJIS',
59 |            document_encoding='ISO-8859-8'),
60 |     # @charset takes precedence over linking_encoding
61 |     params('@charset "Shift-JIS"; £', 'ShiftJIS',
62 |            linking_encoding='ISO-8859-8'),
63 |     # linking_encoding takes precedence over document_encoding
64 |     params('£', 'ShiftJIS',
65 |            linking_encoding='Shift-JIS', document_encoding='ISO-8859-8'),
66 | ])
67 | def test_decode(css, encoding, use_bom, expect_error, kwargs):
68 |     # Workaround PyPy and CPython 3.0 bug: https://bugs.pypy.org/issue1094
69 |     css = css.encode('utf16').decode('utf16')
70 |     if use_bom:
71 |         source = '\ufeff' + css
72 |     else:
73 |         source = css
74 |     css_bytes = source.encode(encoding)
75 |     result, result_encoding = decode(css_bytes, **kwargs)
76 |     if expect_error:
77 |         assert result != css, 'Unexpected unicode success'
78 |     else:
79 |         assert result == css, 'Unexpected unicode error'
80 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_fonts3.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     Tests for the Fonts 3 parser
  4 |     ----------------------------
  5 | 
  6 |     :copyright: (c) 2016 by Kozea.
  7 |     :license: BSD, see LICENSE for more details.
  8 | """
  9 | 
 10 | 
 11 | from __future__ import unicode_literals
 12 | 
 13 | import pytest
 14 | from tinycss.fonts3 import CSSFonts3Parser
 15 | 
 16 | from . import assert_errors
 17 | from .test_tokenizer import jsonify
 18 | 
 19 | 
 20 | @pytest.mark.parametrize(('css', 'expected_family_names', 'expected_errors'), [
 21 |     ('@font-feature-values foo {}', ('foo',), []),
 22 |     ('@font-feature-values Foo Test {}', ('Foo Test',), []),
 23 |     ('@font-feature-values \'Foo Test\' {}', ('Foo Test',), []),
 24 |     ('@font-feature-values Foo Test, Foo Lol, "Foo tooo"', (
 25 |         'Foo Test', 'Foo Lol', 'Foo tooo'), []),
 26 |     ('@font-feature-values Foo    , Foo    lol   {}', ('Foo', 'Foo lol'), []),
 27 |     ('@font-feature-values Foo , "Foobar" , Lol   {}', (
 28 |         'Foo', 'Foobar', 'Lol'), []),
 29 |     ('@font-feature-values Foo, {}', None, [
 30 |         'invalid @font-feature-values selector']),
 31 |     ('@font-feature-values ,Foo {}', None, [
 32 |         'invalid @font-feature-values selector']),
 33 |     ('@font-feature-values Test,"Foo", {}', None, [
 34 |         'invalid @font-feature-values selector']),
 35 |     ('@font-feature-values Test "Foo" {}', None, [
 36 |         'invalid @font-feature-values selector']),
 37 |     ('@font-feature-values Test Foo, Test "bar", "foo" {}', None, [
 38 |         'invalid @font-feature-values selector']),
 39 |     ('@font-feature-values Test/Foo {}', None, [
 40 |         'invalid @font-feature-values selector']),
 41 |     ('@font-feature-values /Foo {}', None, [
 42 |         'invalid @font-feature-values selector']),
 43 |     ('@font-feature-values #Foo {}', None, [
 44 |         'invalid @font-feature-values selector']),
 45 |     # TODO: this currently works but should not work
 46 |     # ('@font-feature-values test@foo {}', None, [
 47 |     #     'invalid @font-feature-values selector']),
 48 |     ('@font-feature-values Hawaii 5-0 {}', None, [
 49 |         'invalid @font-feature-values selector']),
 50 | ])
 51 | def test_font_feature_values_selectors(css, expected_family_names,
 52 |                                        expected_errors):
 53 |     stylesheet = CSSFonts3Parser().parse_stylesheet(css)
 54 |     assert_errors(stylesheet.errors, expected_errors)
 55 | 
 56 |     if stylesheet.rules:
 57 |         assert len(stylesheet.rules) == 1
 58 |         rule = stylesheet.rules[0]
 59 |         assert rule.at_keyword == '@font-feature-values'
 60 |         assert rule.family_names == expected_family_names
 61 | 
 62 | 
 63 | @pytest.mark.parametrize(('css', 'expected_declarations', 'expected_errors'), [
 64 |     ('@font-face {}', [], []),
 65 |     ('@font-face test { src: "lol"; font-family: "bar" }', None, [
 66 |         'unexpected IDENT token in @font-face rule header']),
 67 |     ('@font-face { src: "lol"; font-family: "bar" }', [
 68 |         ('src', [('STRING', 'lol')]),
 69 |         ('font-family', [('STRING', 'bar')])], []),
 70 |     ('@font-face { src: "lol"; font-family: "bar"; src: "baz" }', [
 71 |         ('src', [('STRING', 'lol')]),
 72 |         ('font-family', [('STRING', 'bar')]),
 73 |         ('src', [('STRING', 'baz')])], []),
 74 | ])
 75 | def test_font_face_content(css, expected_declarations, expected_errors):
 76 |     stylesheet = CSSFonts3Parser().parse_stylesheet(css)
 77 |     assert_errors(stylesheet.errors, expected_errors)
 78 | 
 79 |     def declarations(rule):
 80 |         return [(decl.name, list(jsonify(decl.value)))
 81 |                 for decl in rule.declarations]
 82 | 
 83 |     if expected_declarations is None:
 84 |         assert stylesheet.rules == []
 85 |         assert expected_errors
 86 |     else:
 87 |         assert len(stylesheet.rules) == 1
 88 |         rule = stylesheet.rules[0]
 89 |         assert rule.at_keyword == '@font-face'
 90 |         assert declarations(rule) == expected_declarations
 91 | 
 92 | 
 93 | @pytest.mark.parametrize(
 94 |     ('css', 'expected_rules', 'expected_errors'), [
 95 |         ('''@annotation{}''', None, [
 96 |             '@annotation rule not allowed in stylesheet']),
 97 |         ('''@font-feature-values foo {}''', None, []),
 98 |         ('''@font-feature-values foo {
 99 |                 @swash { ornate: 1; }
100 |                 @styleset { double-W: 14; sharp-terminals: 16 1; }
101 |         }''', [
102 |             ('@swash', [('ornate', [('INTEGER', 1)])]),
103 |             ('@styleset', [
104 |                 ('double-w', [('INTEGER', 14)]),
105 |                 ('sharp-terminals', [
106 |                     ('INTEGER', 16), ('S', ' '), ('INTEGER', 1)])])], []),
107 |         ('''@font-feature-values foo {
108 |                 @swash { ornate: 14; }
109 |                 @unknown { test: 1; }
110 |         }''', [('@swash', [('ornate', [('INTEGER', 14)])])], [
111 |             'unknown at-rule in @font-feature-values context: @unknown']),
112 |         ('''@font-feature-values foo {
113 |                 @annotation{boxed:1}
114 |                 bad: 2;
115 |                 @brokenstylesetbecauseofbadabove  { sharp: 1}
116 |                 @styleset  { sharp-terminals: 16 1; @bad {}}
117 |                 @styleset  { @bad {} top-ignored: 3; top: 9000}
118 |                 really-bad
119 |         }''', [
120 |             ('@annotation', [('boxed', [('INTEGER', 1)])]),
121 |             ('@styleset', [
122 |                 ('sharp-terminals', [
123 |                     ('INTEGER', 16), ('S', ' '), ('INTEGER', 1)])]),
124 |             ('@styleset', [('top', [('INTEGER', 9000)])])], [
125 |                 'unexpected ; token in selector',
126 |                 'expected a property name, got ATKEYWORD',
127 |                 'expected a property name, got ATKEYWORD',
128 |                 'no declaration block found for ruleset']),
129 |     ])
130 | def test_font_feature_values_content(css, expected_rules, expected_errors):
131 |     stylesheet = CSSFonts3Parser().parse_stylesheet(css)
132 |     assert_errors(stylesheet.errors, expected_errors)
133 | 
134 |     if expected_rules is not None:
135 |         assert len(stylesheet.rules) == 1
136 |         rule = stylesheet.rules[0]
137 |         assert rule.at_keyword == '@font-feature-values'
138 | 
139 |         rules = [
140 |             (at_rule.at_keyword, [
141 |                 (decl.name, list(jsonify(decl.value)))
142 |                 for decl in at_rule.declarations])
143 |             for at_rule in rule.at_rules] if rule.at_rules else None
144 |         assert rules == expected_rules
145 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_page3.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     Tests for the Paged Media 3 parser
  4 |     ----------------------------------
  5 | 
  6 |     :copyright: (c) 2012 by Simon Sapin.
  7 |     :license: BSD, see LICENSE for more details.
  8 | """
  9 | 
 10 | 
 11 | from __future__ import unicode_literals
 12 | 
 13 | import pytest
 14 | from tinycss.page3 import CSSPage3Parser
 15 | 
 16 | from . import assert_errors
 17 | from .test_tokenizer import jsonify
 18 | 
 19 | 
 20 | @pytest.mark.parametrize(('css', 'expected_selector',
 21 |                           'expected_specificity', 'expected_errors'), [
 22 |     ('@page {}', (None, None), (0, 0, 0), []),
 23 | 
 24 |     ('@page :first {}', (None, 'first'), (0, 1, 0), []),
 25 |     ('@page:left{}', (None, 'left'), (0, 0, 1), []),
 26 |     ('@page :right {}', (None, 'right'), (0, 0, 1), []),
 27 |     ('@page  :blank{}', (None, 'blank'), (0, 1, 0), []),
 28 |     ('@page :last {}', None, None, ['invalid @page selector']),
 29 |     ('@page : first {}', None, None, ['invalid @page selector']),
 30 | 
 31 |     ('@page foo:first {}', ('foo', 'first'), (1, 1, 0), []),
 32 |     ('@page bar :left {}', ('bar', 'left'), (1, 0, 1), []),
 33 |     (r'@page \26:right {}', ('&', 'right'), (1, 0, 1), []),
 34 | 
 35 |     ('@page foo {}', ('foo', None), (1, 0, 0), []),
 36 |     (r'@page \26 {}', ('&', None), (1, 0, 0), []),
 37 | 
 38 |     ('@page foo fist {}', None, None, ['invalid @page selector']),
 39 |     ('@page foo, bar {}', None, None, ['invalid @page selector']),
 40 |     ('@page foo&first {}', None, None, ['invalid @page selector']),
 41 | ])
 42 | def test_selectors(css, expected_selector, expected_specificity,
 43 |                    expected_errors):
 44 |     stylesheet = CSSPage3Parser().parse_stylesheet(css)
 45 |     assert_errors(stylesheet.errors, expected_errors)
 46 | 
 47 |     if stylesheet.rules:
 48 |         assert len(stylesheet.rules) == 1
 49 |         rule = stylesheet.rules[0]
 50 |         assert rule.at_keyword == '@page'
 51 |         selector = rule.selector
 52 |         assert rule.specificity == expected_specificity
 53 |     else:
 54 |         selector = None
 55 |     assert selector == expected_selector
 56 | 
 57 | 
 58 | @pytest.mark.parametrize(('css', 'expected_declarations',
 59 |                           'expected_rules', 'expected_errors'), [
 60 |     ('@page {}', [], [], []),
 61 |     ('@page { foo: 4; bar: z }',
 62 |         [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], [], []),
 63 |     ('''@page { foo: 4;
 64 |                 @top-center { content: "Awesome Title" }
 65 |                 @bottom-left { content: counter(page) }
 66 |                 bar: z
 67 |         }''',
 68 |         [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
 69 |         [('@top-center', [('content', [('STRING', 'Awesome Title')])]),
 70 |          ('@bottom-left', [('content', [
 71 |              ('FUNCTION', 'counter', [('IDENT', 'page')])])])],
 72 |         []),
 73 |     ('''@page { foo: 4;
 74 |                 @bottom-top { content: counter(page) }
 75 |                 bar: z
 76 |         }''',
 77 |         [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
 78 |         [],
 79 |         ['unknown at-rule in @page context: @bottom-top']),
 80 | 
 81 |     ('@page{} @top-right{}', [], [], [
 82 |         '@top-right rule not allowed in stylesheet']),
 83 |     ('@page{ @top-right 4 {} }', [], [], [
 84 |         'unexpected INTEGER token in @top-right rule header']),
 85 |     # Not much error recovery tests here. This should be covered in test_css21
 86 | ])
 87 | def test_content(css, expected_declarations, expected_rules, expected_errors):
 88 |     stylesheet = CSSPage3Parser().parse_stylesheet(css)
 89 |     assert_errors(stylesheet.errors, expected_errors)
 90 | 
 91 |     def declarations(rule):
 92 |         return [(decl.name, list(jsonify(decl.value)))
 93 |                 for decl in rule.declarations]
 94 | 
 95 |     assert len(stylesheet.rules) == 1
 96 |     rule = stylesheet.rules[0]
 97 |     assert rule.at_keyword == '@page'
 98 |     assert declarations(rule) == expected_declarations
 99 |     rules = [(margin_rule.at_keyword, declarations(margin_rule))
100 |              for margin_rule in rule.at_rules]
101 |     assert rules == expected_rules
102 | 


--------------------------------------------------------------------------------
/tinycss/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     Tests for the tokenizer
  4 |     -----------------------
  5 | 
  6 |     :copyright: (c) 2012 by Simon Sapin.
  7 |     :license: BSD, see LICENSE for more details.
  8 | """
  9 | 
 10 | 
 11 | from __future__ import unicode_literals
 12 | 
 13 | import os
 14 | import sys
 15 | 
 16 | import pytest
 17 | from tinycss.tokenizer import (
 18 |     cython_tokenize_flat, python_tokenize_flat, regroup)
 19 | 
 20 | 
 21 | def test_speedups():
 22 |     is_pypy = hasattr(sys, 'pypy_translation_info')
 23 |     env_skip_tests = os.environ.get('TINYCSS_SKIP_SPEEDUPS_TESTS')
 24 |     # pragma: no cover
 25 |     if is_pypy or env_skip_tests:
 26 |         return
 27 |     assert cython_tokenize_flat is not None, (
 28 |         'Cython speedups are not installed, related tests will '
 29 |         'be skipped. Set the TINYCSS_SKIP_SPEEDUPS_TESTS environment '
 30 |         'variable if this is expected.')
 31 | 
 32 | 
 33 | @pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [
 34 |     (tokenize,) + test_data
 35 |     for tokenize in (python_tokenize_flat, cython_tokenize_flat)
 36 |     for test_data in [
 37 |         ('', []),
 38 |         ('red -->', [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
 39 |         # Longest match rule: no CDC
 40 |         ('red-->', [('IDENT', 'red--'), ('DELIM', '>')]),
 41 |         (r'p[example="foo(int x) {    this.x = x;}"]', [
 42 |             ('IDENT', 'p'),
 43 |             ('[', '['),
 44 |             ('IDENT', 'example'),
 45 |             ('DELIM', '='),
 46 |             ('STRING', 'foo(int x) {    this.x = x;}'),
 47 |             (']', ']')]),
 48 | 
 49 |         # Numbers are parsed
 50 |         ('42 .5 -4pX 1.25em 30%', [
 51 |             ('INTEGER', 42), ('S', ' '),
 52 |             ('NUMBER', .5), ('S', ' '),
 53 |             # units are normalized to lower-case:
 54 |             ('DIMENSION', -4, 'px'), ('S', ' '),
 55 |             ('DIMENSION', 1.25, 'em'), ('S', ' '),
 56 |             ('PERCENTAGE', 30, '%')]),
 57 | 
 58 |         # URLs are extracted
 59 |         ('url(foo.png)', [('URI', 'foo.png')]),
 60 |         ('url("foo.png")', [('URI', 'foo.png')]),
 61 | 
 62 |         # Escaping
 63 | 
 64 |         (r'/* Comment with a \ backslash */', [
 65 |             ('COMMENT', '/* Comment with a \ backslash */')]),  # Unchanged
 66 | 
 67 |         # backslash followed by a newline in a string: ignored
 68 |         ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
 69 | 
 70 |         # backslash followed by a newline outside a string: stands for itself
 71 |         ('Lorem\\\nIpsum', [
 72 |             ('IDENT', 'Lorem'), ('DELIM', '\\'),
 73 |             ('S', '\n'), ('IDENT', 'Ipsum')]),
 74 | 
 75 |         # Cancel the meaning of special characters
 76 |         (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]),  # or not specal
 77 |         (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
 78 |         (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
 79 |         (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
 80 |         (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
 81 |         (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
 82 |         (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
 83 |         (r'Lorem+Ipsum', [
 84 |             ('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
 85 |         (r'url(foo\).png)', [('URI', 'foo).png')]),
 86 | 
 87 |         # Unicode and backslash escaping
 88 |         ('\\26 B', [('IDENT', '&B')]),
 89 |         ('\\&B', [('IDENT', '&B')]),
 90 |         ('@\\26\tB', [('ATKEYWORD', '@&B')]),
 91 |         ('@\\&B', [('ATKEYWORD', '@&B')]),
 92 |         ('#\\26\nB', [('HASH', '#&B')]),
 93 |         ('#\\&B', [('HASH', '#&B')]),
 94 |         ('\\26\r\nB(', [('FUNCTION', '&B(')]),
 95 |         ('\\&B(', [('FUNCTION', '&B(')]),
 96 |         (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
 97 |         (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]),  # max 6 digits
 98 |         (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
 99 |         (r'"\26 B"', [('STRING', '&B')]),
100 |         (r"'\000026B'", [('STRING', '&B')]),
101 |         (r'"\&B"', [('STRING', '&B')]),
102 |         (r'url("\26 B")', [('URI', '&B')]),
103 |         (r'url(\26 B)', [('URI', '&B')]),
104 |         (r'url("\&B")', [('URI', '&B')]),
105 |         (r'url(\&B)', [('URI', '&B')]),
106 |         (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
107 | 
108 |         # Bad strings
109 | 
110 |         # String ends at EOF without closing: no error, parsed
111 |         ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
112 |         # Unescaped newline: ends the string, error, unparsed
113 |         ('"Lorem\\26Ipsum\n', [
114 |             ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
115 |         # Tokenization restarts after the newline, so the second " starts
116 |         # a new string (which ends at EOF without errors, as above.)
117 |         ('"Lorem\\26Ipsum\ndolor" sit', [
118 |             ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
119 |             ('IDENT', 'dolor'), ('STRING', ' sit')]),
120 | 
121 |     ]])
122 | def test_tokens(tokenize, css_source, expected_tokens):
123 |     if tokenize is None:  # pragma: no cover
124 |         pytest.skip('Speedups not available')
125 |     sources = [css_source]
126 |     if sys.version_info[0] < 3:
127 |         # On Python 2.x, ASCII-only bytestrings can be used
128 |         # where Unicode is expected.
129 |         sources.append(css_source.encode('ascii'))
130 |     for css_source in sources:
131 |         tokens = tokenize(css_source, ignore_comments=False)
132 |         result = [
133 |             (token.type, token.value) + (
134 |                 () if token.unit is None else (token.unit,))
135 |             for token in tokens
136 |         ]
137 |         assert result == expected_tokens
138 | 
139 | 
140 | @pytest.mark.parametrize('tokenize', [
141 |     python_tokenize_flat, cython_tokenize_flat])
142 | def test_positions(tokenize):
143 |     """Test the reported line/column position of each token."""
144 |     if tokenize is None:  # pragma: no cover
145 |         pytest.skip('Speedups not available')
146 |     css = '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }'
147 |     tokens = tokenize(css, ignore_comments=False)
148 |     result = [(token.type, token.line, token.column) for token in tokens]
149 |     assert result == [
150 |         ('COMMENT', 1, 1), ('S', 2, 9),
151 |         ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
152 |         ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
153 |         ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
154 |         ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
155 |         ('S', 5, 5), ('}', 5, 6)]
156 | 
157 | 
158 | @pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [
159 |     (tokenize,) + test_data
160 |     for tokenize in (python_tokenize_flat, cython_tokenize_flat)
161 |     for test_data in [
162 |         ('', []),
163 |         (r'Lorem\26 "i\psum"4px', [
164 |             ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
165 | 
166 |         ('not([[lorem]]{ipsum (42)})', [
167 |             ('FUNCTION', 'not', [
168 |                 ('[', [
169 |                     ('[', [
170 |                         ('IDENT', 'lorem'),
171 |                     ]),
172 |                 ]),
173 |                 ('{', [
174 |                     ('IDENT', 'ipsum'),
175 |                     ('S', ' '),
176 |                     ('(', [
177 |                         ('INTEGER', 42),
178 |                     ])
179 |                 ])
180 |             ])]),
181 | 
182 |         # Close everything at EOF, no error
183 |         ('a[b{"d', [
184 |             ('IDENT', 'a'),
185 |             ('[', [
186 |                 ('IDENT', 'b'),
187 |                 ('{', [
188 |                     ('STRING', 'd'),
189 |                 ]),
190 |             ]),
191 |         ]),
192 | 
193 |         # Any remaining ), ] or } token is a nesting error
194 |         ('a[b{d]e}', [
195 |             ('IDENT', 'a'),
196 |             ('[', [
197 |                 ('IDENT', 'b'),
198 |                 ('{', [
199 |                     ('IDENT', 'd'),
200 |                     (']', ']'),  # The error is visible here
201 |                     ('IDENT', 'e'),
202 |                 ]),
203 |             ]),
204 |         ]),
205 |         # ref:
206 |         ('a[b{d}e]', [
207 |             ('IDENT', 'a'),
208 |             ('[', [
209 |                 ('IDENT', 'b'),
210 |                 ('{', [
211 |                     ('IDENT', 'd'),
212 |                 ]),
213 |                 ('IDENT', 'e'),
214 |             ]),
215 |         ]),
216 |     ]])
217 | def test_token_grouping(tokenize, css_source, expected_tokens):
218 |     if tokenize is None:  # pragma: no cover
219 |         pytest.skip('Speedups not available')
220 |     tokens = regroup(tokenize(css_source, ignore_comments=False))
221 |     result = list(jsonify(tokens))
222 |     assert result == expected_tokens
223 | 
224 | 
225 | def jsonify(tokens):
226 |     """Turn tokens into "JSON-compatible" data structures."""
227 |     for token in tokens:
228 |         if token.type == 'FUNCTION':
229 |             yield (token.type, token.function_name,
230 |                    list(jsonify(token.content)))
231 |         elif token.is_container:
232 |             yield token.type, list(jsonify(token.content))
233 |         else:
234 |             yield token.type, token.value
235 | 
236 | 
237 | @pytest.mark.parametrize(('tokenize', 'ignore_comments', 'expected_tokens'), [
238 |     (tokenize,) + test_data
239 |     for tokenize in (python_tokenize_flat, cython_tokenize_flat)
240 |     for test_data in [
241 |         (False, [
242 |             ('COMMENT', '/* lorem */'),
243 |             ('S', ' '),
244 |             ('IDENT', 'ipsum'),
245 |             ('[', [
246 |                 ('IDENT', 'dolor'),
247 |                 ('COMMENT', '/* sit */'),
248 |             ]),
249 |             ('BAD_COMMENT', '/* amet')
250 |         ]),
251 |         (True, [
252 |             ('S', ' '),
253 |             ('IDENT', 'ipsum'),
254 |             ('[', [
255 |                 ('IDENT', 'dolor'),
256 |             ]),
257 |         ]),
258 |     ]])
259 | def test_comments(tokenize, ignore_comments, expected_tokens):
260 |     if tokenize is None:  # pragma: no cover
261 |         pytest.skip('Speedups not available')
262 |     css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
263 |     tokens = regroup(tokenize(css_source, ignore_comments))
264 |     result = list(jsonify(tokens))
265 |     assert result == expected_tokens
266 | 
267 | 
268 | @pytest.mark.parametrize(('tokenize', 'css_source'), [
269 |     (tokenize, test_data)
270 |     for tokenize in (python_tokenize_flat, cython_tokenize_flat)
271 |     for test_data in [
272 |         r'p[example="foo(int x) {    this.x = x;}"]',
273 |         '"Lorem\\26Ipsum\ndolor" sit',
274 |         '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }',
275 |         'not([[lorem]]{ipsum (42)})',
276 |         'a[b{d]e}',
277 |         'a[b{"d',
278 |     ]])
279 | def test_token_serialize_css(tokenize, css_source):
280 |     if tokenize is None:  # pragma: no cover
281 |         pytest.skip('Speedups not available')
282 |     for _regroup in [regroup, lambda x: x]:
283 |         tokens = _regroup(tokenize(css_source, ignore_comments=False))
284 |         result = ''.join(token.as_css() for token in tokens)
285 |         assert result == css_source
286 | 
287 | 
288 | @pytest.mark.parametrize(('tokenize', 'css_source'), [
289 |     (tokenize, test_data)
290 |     for tokenize in (python_tokenize_flat, cython_tokenize_flat)
291 |     for test_data in [
292 |         '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
293 |     ]
294 | ])
295 | def test_token_api(tokenize, css_source):
296 |     if tokenize is None:  # pragma: no cover
297 |         pytest.skip('Speedups not available')
298 |     tokens = list(regroup(tokenize(css_source)))
299 |     assert len(tokens) == 1
300 |     token = tokens[0]
301 |     expected_len = 7  # 2 spaces, 2 commas, 3 others.
302 |     assert len(token.content) == expected_len
303 | 


--------------------------------------------------------------------------------
/tinycss/token_data.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.token_data
  4 |     ------------------
  5 | 
  6 |     Shared data for both implementations (Cython and Python) of the tokenizer.
  7 | 
  8 |     :copyright: (c) 2012 by Simon Sapin.
  9 |     :license: BSD, see LICENSE for more details.
 10 | """
 11 | 
 12 | from __future__ import unicode_literals
 13 | 
 14 | import functools
 15 | import operator
 16 | import re
 17 | import string
 18 | import sys
 19 | 
 20 | # * Raw strings with the r'' notation are used so that \ do not need
 21 | #   to be escaped.
 22 | # * Names and regexps are separated by a tabulation.
 23 | # * Macros are re-ordered so that only previous definitions are needed.
 24 | # * {} are used for macro substitution with ``string.Formatter``,
 25 | #   so other uses of { or } have been doubled.
 26 | # * The syntax is otherwise compatible with re.compile.
 27 | # * Some parentheses were added to add capturing groups.
 28 | #   (in unicode, DIMENSION and URI)
 29 | 
 30 | # *** Willful violation: ***
 31 | # Numbers can take a + or - sign, but the sign is a separate DELIM token.
 32 | # Since comments are allowed anywhere between tokens, this makes
 33 | # the following this is valid. It means 10 negative pixels:
 34 | #    margin-top: -/**/10px
 35 | 
 36 | # This makes parsing numbers a pain, so instead we’ll do the same is Firefox
 37 | # and make the sign part as of the 'num' macro. The above CSS will be invalid.
 38 | # See discussion:
 39 | # http://lists.w3.org/Archives/Public/www-style/2011Oct/0028.html
 40 | MACROS = r'''
 41 |     nl	\n|\r\n|\r|\f
 42 |     w	[ \t\r\n\f]*
 43 |     nonascii	[^\0-\237]
 44 |     unicode	\\([0-9a-f]{{1,6}})(\r\n|[ \n\r\t\f])?
 45 |     simple_escape	[^\n\r\f0-9a-f]
 46 |     escape	{unicode}|\\{simple_escape}
 47 |     nmstart	[_a-z]|{nonascii}|{escape}
 48 |     nmchar	[_a-z0-9-]|{nonascii}|{escape}
 49 |     name	{nmchar}+
 50 |     ident	[-]?{nmstart}{nmchar}*
 51 |     num	[-+]?(?:[0-9]*\.[0-9]+|[0-9]+)
 52 |     string1	\"([^\n\r\f\\"]|\\{nl}|{escape})*\"
 53 |     string2	\'([^\n\r\f\\']|\\{nl}|{escape})*\'
 54 |     string	{string1}|{string2}
 55 |     badstring1	\"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
 56 |     badstring2	\'([^\n\r\f\\']|\\{nl}|{escape})*\\?
 57 |     badstring	{badstring1}|{badstring2}
 58 |     badcomment1	\/\*[^*]*\*+([^/*][^*]*\*+)*
 59 |     badcomment2	\/\*[^*]*(\*+[^/*][^*]*)*
 60 |     badcomment	{badcomment1}|{badcomment2}
 61 |     baduri1	url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
 62 |     baduri2	url\({w}{string}{w}
 63 |     baduri3	url\({w}{badstring}
 64 |     baduri	{baduri1}|{baduri2}|{baduri3}
 65 | '''.replace(r'\0', '\0').replace(r'\237', '\237')
 66 | 
 67 | # Removed these tokens. Instead, they’re tokenized as two DELIM each.
 68 | #    INCLUDES	~=
 69 | #    DASHMATCH	|=
 70 | # They are only used in selectors but selectors3 also have ^=, *= and $=.
 71 | # We don’t actually parse selectors anyway
 72 | 
 73 | # Re-ordered so that the longest match is always the first.
 74 | # For example, "url('foo')" matches URI, BAD_URI, FUNCTION and IDENT,
 75 | # but URI would always be a longer match than the others.
 76 | TOKENS = r'''
 77 |     S	[ \t\r\n\f]+
 78 | 
 79 |     URI	url\({w}({string}|([!#$%&*-\[\]-~]|{nonascii}|{escape})*){w}\)
 80 |     BAD_URI	{baduri}
 81 |     FUNCTION	{ident}\(
 82 |     UNICODE-RANGE	u\+[0-9a-f?]{{1,6}}(-[0-9a-f]{{1,6}})?
 83 |     IDENT	{ident}
 84 | 
 85 |     ATKEYWORD	@{ident}
 86 |     HASH	#{name}
 87 | 
 88 |     DIMENSION	({num})({ident})
 89 |     PERCENTAGE	{num}%
 90 |     NUMBER	{num}
 91 | 
 92 |     STRING	{string}
 93 |     BAD_STRING	{badstring}
 94 | 
 95 |     COMMENT	\/\*[^*]*\*+([^/*][^*]*\*+)*\/
 96 |     BAD_COMMENT	{badcomment}
 97 | 
 98 |     :	:
 99 |     ;	;
100 |     {	\{{
101 |     }	\}}
102 |     (	\(
103 |     )	\)
104 |     [	\[
105 |     ]	\]
106 |     CDO	<!--
107 |     CDC	-->
108 | '''
109 | 
110 | 
111 | # Strings with {macro} expanded
112 | COMPILED_MACROS = {}
113 | 
114 | 
115 | COMPILED_TOKEN_REGEXPS = []  # [(name, regexp.match)]  ordered
116 | COMPILED_TOKEN_INDEXES = {}  # {name: i}  helper for the C speedups
117 | 
118 | 
119 | # Indexed by codepoint value of the first character of a token.
120 | # Codepoints >= 160 (aka nonascii) all use the index 160.
121 | # values are (i, name, regexp.match)
122 | TOKEN_DISPATCH = []
123 | 
124 | 
125 | try:
126 |     unichr
127 | except NameError:
128 |     # Python 3
129 |     unichr = chr
130 |     unicode = str
131 | 
132 | 
133 | def _init():
134 |     """Import-time initialization."""
135 |     COMPILED_MACROS.clear()
136 |     for line in MACROS.splitlines():
137 |         if line.strip():
138 |             name, value = line.split('\t')
139 |             COMPILED_MACROS[name.strip()] = '(?:%s)' \
140 |                 % value.format(**COMPILED_MACROS)
141 | 
142 |     COMPILED_TOKEN_REGEXPS[:] = (
143 |         (
144 |             name.strip(),
145 |             re.compile(
146 |                 value.format(**COMPILED_MACROS),
147 |                 # Case-insensitive when matching eg. uRL(foo)
148 |                 # but preserve the case in extracted groups
149 |                 re.I
150 |             ).match
151 |         )
152 |         for line in TOKENS.splitlines()
153 |         if line.strip()
154 |         for name, value in [line.split('\t')]
155 |     )
156 | 
157 |     COMPILED_TOKEN_INDEXES.clear()
158 |     for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS):
159 |         COMPILED_TOKEN_INDEXES[name] = i
160 | 
161 |     dispatch = [[] for i in range(161)]
162 |     for chars, names in [
163 |         (' \t\r\n\f', ['S']),
164 |         ('uU', ['URI', 'BAD_URI', 'UNICODE-RANGE']),
165 |         # \ is an escape outside of another token
166 |         (string.ascii_letters + '\\_-' + unichr(160), ['FUNCTION', 'IDENT']),
167 |         (string.digits + '.+-', ['DIMENSION', 'PERCENTAGE', 'NUMBER']),
168 |         ('@', ['ATKEYWORD']),
169 |         ('#', ['HASH']),
170 |         ('\'"', ['STRING', 'BAD_STRING']),
171 |         ('/', ['COMMENT', 'BAD_COMMENT']),
172 |         ('<', ['CDO']),
173 |         ('-', ['CDC']),
174 |     ]:
175 |         for char in chars:
176 |             dispatch[ord(char)].extend(names)
177 |     for char in ':;{}()[]':
178 |         dispatch[ord(char)] = [char]
179 | 
180 |     TOKEN_DISPATCH[:] = (
181 |         [
182 |             (index,) + COMPILED_TOKEN_REGEXPS[index]
183 |             for name in names
184 |             for index in [COMPILED_TOKEN_INDEXES[name]]
185 |         ]
186 |         for names in dispatch
187 |     )
188 | 
189 | 
190 | _init()
191 | 
192 | 
193 | def _unicode_replace(match, int=int, unichr=unichr, maxunicode=sys.maxunicode):
194 |     codepoint = int(match.group(1), 16)
195 |     if codepoint <= maxunicode:
196 |         return unichr(codepoint)
197 |     else:
198 |         return '\N{REPLACEMENT CHARACTER}'  # U+FFFD
199 | 
200 | 
201 | UNICODE_UNESCAPE = functools.partial(
202 |     re.compile(COMPILED_MACROS['unicode'], re.I).sub,
203 |     _unicode_replace)
204 | 
205 | NEWLINE_UNESCAPE = functools.partial(
206 |     re.compile(r'()\\' + COMPILED_MACROS['nl']).sub,
207 |     '')
208 | 
209 | SIMPLE_UNESCAPE = functools.partial(
210 |     re.compile(r'\\(%s)' % COMPILED_MACROS['simple_escape'], re.I).sub,
211 |     # Same as r'\1', but faster on CPython
212 |     operator.methodcaller('group', 1))
213 | 
214 | FIND_NEWLINES = re.compile(COMPILED_MACROS['nl']).finditer
215 | 
216 | 
217 | class Token(object):
218 |     """A single atomic token.
219 | 
220 |     .. attribute:: is_container
221 | 
222 |         Always ``False``.
223 |         Helps to tell :class:`Token` apart from :class:`ContainerToken`.
224 | 
225 |     .. attribute:: type
226 | 
227 |         The type of token as a string:
228 | 
229 |         ``S``
230 |             A sequence of white space
231 | 
232 |         ``IDENT``
233 |             An identifier: a name that does not start with a digit.
234 |             A name is a sequence of letters, digits, ``_``, ``-``, escaped
235 |             characters and non-ASCII characters. Eg: ``margin-left``
236 | 
237 |         ``HASH``
238 |             ``#`` followed immediately by a name. Eg: ``#ff8800``
239 | 
240 |         ``ATKEYWORD``
241 |             ``@`` followed immediately by an identifier. Eg: ``@page``
242 | 
243 |         ``URI``
244 |             Eg: ``url(foo)`` The content may or may not be quoted.
245 | 
246 |         ``UNICODE-RANGE``
247 |             ``U+`` followed by one or two hexadecimal
248 |             Unicode codepoints. Eg: ``U+20-00FF``
249 | 
250 |         ``INTEGER``
251 |             An integer with an optional ``+`` or ``-`` sign
252 | 
253 |         ``NUMBER``
254 |             A non-integer number  with an optional ``+`` or ``-`` sign
255 | 
256 |         ``DIMENSION``
257 |             An integer or number followed immediately by an
258 |             identifier (the unit). Eg: ``12px``
259 | 
260 |         ``PERCENTAGE``
261 |             An integer or number followed immediately by ``%``
262 | 
263 |         ``STRING``
264 |             A string, quoted with ``"`` or ``'``
265 | 
266 |         ``:`` or ``;``
267 |             That character.
268 | 
269 |         ``DELIM``
270 |             A single character not matched in another token. Eg: ``,``
271 | 
272 |         See the source of the :mod:`.token_data` module for the precise
273 |         regular expressions that match various tokens.
274 | 
275 |         Note that other token types exist in the early tokenization steps,
276 |         but these are ignored, are syntax errors, or are later transformed
277 |         into :class:`ContainerToken` or :class:`FunctionToken`.
278 | 
279 |     .. attribute:: value
280 | 
281 |         The parsed value:
282 | 
283 |         * INTEGER, NUMBER, PERCENTAGE or DIMENSION tokens: the numeric value
284 |           as an int or float.
285 |         * STRING tokens: the unescaped string without quotes
286 |         * URI tokens: the unescaped URI without quotes or
287 |           ``url(`` and ``)`` markers.
288 |         * IDENT, ATKEYWORD or HASH tokens: the unescaped token,
289 |           with ``@`` or ``#`` markers left as-is
290 |         * Other tokens: same as :attr:`as_css`
291 | 
292 |         *Unescaped* refers to the various escaping methods based on the
293 |         backslash ``\`` character in CSS syntax.
294 | 
295 |     .. attribute:: unit
296 | 
297 |         * DIMENSION tokens: the normalized (unescaped, lower-case)
298 |           unit name as a string. eg. ``'px'``
299 |         * PERCENTAGE tokens: the string ``'%'``
300 |         * Other tokens: ``None``
301 | 
302 |     .. attribute:: line
303 | 
304 |         The line number in the CSS source of the start of this token.
305 | 
306 |     .. attribute:: column
307 | 
308 |         The column number (inside a source line) of the start of this token.
309 | 
310 |     """
311 |     is_container = False
312 |     __slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column'
313 | 
314 |     def __init__(self, type_, css_value, value, unit, line, column):
315 |         self.type = type_
316 |         self._as_css = css_value
317 |         self.value = value
318 |         self.unit = unit
319 |         self.line = line
320 |         self.column = column
321 | 
322 |     def as_css(self):
323 |         """
324 |         Return as an Unicode string the CSS representation of the token,
325 |         as parsed in the source.
326 |         """
327 |         return self._as_css
328 | 
329 |     def __repr__(self):
330 |         return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
331 |                 .format(self, self.unit or ''))
332 | 
333 |     def __eq__(self, other):
334 |         if type(self) != type(other):
335 |             raise TypeError(
336 |                 'Cannot compare {0} and {1}'.format(type(self), type(other)))
337 |         else:
338 |             return all(
339 |                 self.type_ == other.type_,
340 |                 self._as_css == other._as_css,
341 |                 self.value == other.value,
342 |                 self.unit == other.unit,
343 |             )
344 | 
345 | 
346 | class ContainerToken(object):
347 |     """A token that contains other (nested) tokens.
348 | 
349 |     .. attribute:: is_container
350 | 
351 |         Always ``True``.
352 |         Helps to tell :class:`ContainerToken` apart from :class:`Token`.
353 | 
354 |     .. attribute:: type
355 | 
356 |         The type of token as a string. One of ``{``, ``(``, ``[`` or
357 |         ``FUNCTION``. For ``FUNCTION``, the object is actually a
358 |         :class:`FunctionToken`.
359 | 
360 |     .. attribute:: unit
361 | 
362 |         Always ``None``. Included to make :class:`ContainerToken` behave
363 |         more like :class:`Token`.
364 | 
365 |     .. attribute:: content
366 | 
367 |         A list of :class:`Token` or nested :class:`ContainerToken`,
368 |         not including the opening or closing token.
369 | 
370 |     .. attribute:: line
371 | 
372 |         The line number in the CSS source of the start of this token.
373 | 
374 |     .. attribute:: column
375 | 
376 |         The column number (inside a source line) of the start of this token.
377 | 
378 |     """
379 |     is_container = True
380 |     unit = None
381 |     __slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column'
382 | 
383 |     def __init__(self, type_, css_start, css_end, content, line, column):
384 |         self.type = type_
385 |         self._css_start = css_start
386 |         self._css_end = css_end
387 |         self.content = content
388 |         self.line = line
389 |         self.column = column
390 | 
391 |     def as_css(self):
392 |         """
393 |         Return as an Unicode string the CSS representation of the token,
394 |         as parsed in the source.
395 |         """
396 |         parts = [self._css_start]
397 |         parts.extend(token.as_css() for token in self.content)
398 |         parts.append(self._css_end)
399 |         return ''.join(parts)
400 | 
401 |     format_string = '<ContainerToken {0.type} at {0.line}:{0.column}>'
402 | 
403 |     def __repr__(self):
404 |         return (self.format_string + ' {0.content}').format(self)
405 | 
406 | 
407 | class FunctionToken(ContainerToken):
408 |     """A specialized :class:`ContainerToken` for a ``FUNCTION`` group.
409 |     Has an additional attribute:
410 | 
411 |     .. attribute:: function_name
412 | 
413 |         The unescaped name of the function, with the ``(`` marker removed.
414 | 
415 |     """
416 |     __slots__ = 'function_name',
417 | 
418 |     def __init__(self, type_, css_start, css_end, function_name, content,
419 |                  line, column):
420 |         super(FunctionToken, self).__init__(
421 |             type_, css_start, css_end, content, line, column)
422 |         # Remove the ( marker:
423 |         self.function_name = function_name[:-1]
424 | 
425 |     format_string = ('<FunctionToken {0.function_name}() at '
426 |                      '{0.line}:{0.column}>')
427 | 
428 | 
429 | class TokenList(list):
430 |     """
431 |     A mixed list of :class:`~.token_data.Token` and
432 |     :class:`~.token_data.ContainerToken` objects.
433 | 
434 |     This is a subclass of the builtin :class:`~builtins.list` type.
435 |     It can be iterated, indexed and sliced as usual, but also has some
436 |     additional API:
437 | 
438 |     """
439 |     @property
440 |     def line(self):
441 |         """The line number in the CSS source of the first token."""
442 |         return self[0].line
443 | 
444 |     @property
445 |     def column(self):
446 |         """The column number (inside a source line) of the first token."""
447 |         return self[0].column
448 | 
449 |     def as_css(self):
450 |         """
451 |         Return as an Unicode string the CSS representation of the tokens,
452 |         as parsed in the source.
453 |         """
454 |         return ''.join(token.as_css() for token in self)
455 | 


--------------------------------------------------------------------------------
/tinycss/tokenizer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 |     tinycss.tokenizer
  4 |     -----------------
  5 | 
  6 |     Tokenizer for the CSS core syntax:
  7 |     http://www.w3.org/TR/CSS21/syndata.html#tokenization
  8 | 
  9 |     This is the pure-python implementation. See also speedups.pyx
 10 | 
 11 |     :copyright: (c) 2012 by Simon Sapin.
 12 |     :license: BSD, see LICENSE for more details.
 13 | """
 14 | 
 15 | from __future__ import unicode_literals
 16 | 
 17 | from . import token_data
 18 | 
 19 | 
 20 | def tokenize_flat(
 21 |         css_source, ignore_comments=True,
 22 |         # Make these local variable to avoid global lookups in the loop
 23 |         tokens_dispatch=token_data.TOKEN_DISPATCH,
 24 |         unicode_unescape=token_data.UNICODE_UNESCAPE,
 25 |         newline_unescape=token_data.NEWLINE_UNESCAPE,
 26 |         simple_unescape=token_data.SIMPLE_UNESCAPE,
 27 |         find_newlines=token_data.FIND_NEWLINES,
 28 |         Token=token_data.Token,
 29 |         len=len,
 30 |         int=int,
 31 |         float=float,
 32 |         list=list,
 33 |         _None=None):
 34 |     """
 35 |     :param css_source:
 36 |         CSS as an unicode string
 37 |     :param ignore_comments:
 38 |         if true (the default) comments will not be included in the
 39 |         return value
 40 |     :return:
 41 |         An iterator of :class:`Token`
 42 | 
 43 |     """
 44 | 
 45 |     pos = 0
 46 |     line = 1
 47 |     column = 1
 48 |     source_len = len(css_source)
 49 |     tokens = []
 50 |     while pos < source_len:
 51 |         char = css_source[pos]
 52 |         if char in ':;{}()[]':
 53 |             type_ = char
 54 |             css_value = char
 55 |         else:
 56 |             codepoint = min(ord(char), 160)
 57 |             for _index, type_, regexp in tokens_dispatch[codepoint]:
 58 |                 match = regexp(css_source, pos)
 59 |                 if match:
 60 |                     # First match is the longest. See comments on TOKENS above.
 61 |                     css_value = match.group()
 62 |                     break
 63 |             else:
 64 |                 # No match.
 65 |                 # "Any other character not matched by the above rules,
 66 |                 #  and neither a single nor a double quote."
 67 |                 # ... but quotes at the start of a token are always matched
 68 |                 # by STRING or BAD_STRING. So DELIM is any single character.
 69 |                 type_ = 'DELIM'
 70 |                 css_value = char
 71 |         length = len(css_value)
 72 |         next_pos = pos + length
 73 | 
 74 |         # A BAD_COMMENT is a comment at EOF. Ignore it too.
 75 |         if not (ignore_comments and type_ in ('COMMENT', 'BAD_COMMENT')):
 76 |             # Parse numbers, extract strings and URIs, unescape
 77 |             unit = _None
 78 |             if type_ == 'DIMENSION':
 79 |                 value = match.group(1)
 80 |                 value = float(value) if '.' in value else int(value)
 81 |                 unit = match.group(2)
 82 |                 unit = simple_unescape(unit)
 83 |                 unit = unicode_unescape(unit)
 84 |                 unit = unit.lower()  # normalize
 85 |             elif type_ == 'PERCENTAGE':
 86 |                 value = css_value[:-1]
 87 |                 value = float(value) if '.' in value else int(value)
 88 |                 unit = '%'
 89 |             elif type_ == 'NUMBER':
 90 |                 value = css_value
 91 |                 if '.' in value:
 92 |                     value = float(value)
 93 |                 else:
 94 |                     value = int(value)
 95 |                     type_ = 'INTEGER'
 96 |             elif type_ in ('IDENT', 'ATKEYWORD', 'HASH', 'FUNCTION'):
 97 |                 value = simple_unescape(css_value)
 98 |                 value = unicode_unescape(value)
 99 |             elif type_ == 'URI':
100 |                 value = match.group(1)
101 |                 if value and value[0] in '"\'':
102 |                     value = value[1:-1]  # Remove quotes
103 |                     value = newline_unescape(value)
104 |                 value = simple_unescape(value)
105 |                 value = unicode_unescape(value)
106 |             elif type_ == 'STRING':
107 |                 value = css_value[1:-1]  # Remove quotes
108 |                 value = newline_unescape(value)
109 |                 value = simple_unescape(value)
110 |                 value = unicode_unescape(value)
111 |             # BAD_STRING can only be one of:
112 |             # * Unclosed string at the end of the stylesheet:
113 |             #   Close the string, but this is not an error.
114 |             #   Make it a "good" STRING token.
115 |             # * Unclosed string at the (unescaped) end of the line:
116 |             #   Close the string, but this is an error.
117 |             #   Leave it as a BAD_STRING, don’t bother parsing it.
118 |             # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
119 |             elif type_ == 'BAD_STRING' and next_pos == source_len:
120 |                 type_ = 'STRING'
121 |                 value = css_value[1:]  # Remove quote
122 |                 value = newline_unescape(value)
123 |                 value = simple_unescape(value)
124 |                 value = unicode_unescape(value)
125 |             else:
126 |                 value = css_value
127 |             tokens.append(Token(type_, css_value, value, unit, line, column))
128 | 
129 |         pos = next_pos
130 |         newlines = list(find_newlines(css_value))
131 |         if newlines:
132 |             line += len(newlines)
133 |             # Add 1 to have lines start at column 1, not 0
134 |             column = length - newlines[-1].end() + 1
135 |         else:
136 |             column += length
137 |     return tokens
138 | 
139 | 
140 | def regroup(tokens):
141 |     """
142 |     Match pairs of tokens: () [] {} function()
143 |     (Strings in "" or '' are taken care of by the tokenizer.)
144 | 
145 |     Opening tokens are replaced by a :class:`ContainerToken`.
146 |     Closing tokens are removed. Unmatched closing tokens are invalid
147 |     but left as-is. All nested structures that are still open at
148 |     the end of the stylesheet are implicitly closed.
149 | 
150 |     :param tokens:
151 |         a *flat* iterable of tokens, as returned by :func:`tokenize_flat`.
152 |     :return:
153 |         A tree of tokens.
154 | 
155 |     """
156 |     # "global" objects for the inner recursion
157 |     pairs = {'FUNCTION': ')', '(': ')', '[': ']', '{': '}'}
158 |     tokens = iter(tokens)
159 |     eof = [False]
160 | 
161 |     def _regroup_inner(stop_at=None, tokens=tokens, pairs=pairs, eof=eof,
162 |                        ContainerToken=token_data.ContainerToken,
163 |                        FunctionToken=token_data.FunctionToken):
164 |         for token in tokens:
165 |             type_ = token.type
166 |             if type_ == stop_at:
167 |                 return
168 | 
169 |             end = pairs.get(type_)
170 |             if end is None:
171 |                 yield token  # Not a grouping token
172 |             else:
173 |                 assert not isinstance(token, ContainerToken), (
174 |                     'Token looks already grouped: {0}'.format(token))
175 |                 content = list(_regroup_inner(end))
176 |                 if eof[0]:
177 |                     end = ''  # Implicit end of structure at EOF.
178 |                 if type_ == 'FUNCTION':
179 |                     yield FunctionToken(token.type, token.as_css(), end,
180 |                                         token.value, content,
181 |                                         token.line, token.column)
182 |                 else:
183 |                     yield ContainerToken(token.type, token.as_css(), end,
184 |                                          content,
185 |                                          token.line, token.column)
186 |         else:
187 |             eof[0] = True  # end of file/stylesheet
188 |     return _regroup_inner()
189 | 
190 | 
191 | def tokenize_grouped(css_source, ignore_comments=True):
192 |     """
193 |     :param css_source:
194 |         CSS as an unicode string
195 |     :param ignore_comments:
196 |         if true (the default) comments will not be included in the
197 |         return value
198 |     :return:
199 |         An iterator of :class:`Token`
200 | 
201 |     """
202 |     return regroup(tokenize_flat(css_source, ignore_comments))
203 | 
204 | 
205 | # Optional Cython version of tokenize_flat
206 | # Make both versions available with explicit names for tests.
207 | python_tokenize_flat = tokenize_flat
208 | try:
209 |     from . import speedups
210 | except ImportError:
211 |     cython_tokenize_flat = None
212 | else:
213 |     cython_tokenize_flat = speedups.tokenize_flat
214 |     # Default to the Cython version if available
215 |     tokenize_flat = cython_tokenize_flat
216 | 


--------------------------------------------------------------------------------
/tinycss/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.4'
2 | 


--------------------------------------------------------------------------------