├── LICENSE
├── MANIFEST
├── MANIFEST.in
├── README
├── README.md
├── emacs
└── pyxl-mode.el
├── finish_install.py
├── pyxl.pth
├── pyxl
├── __init__.py
├── base.py
├── browser_hacks.py
├── codec
│ ├── __init__.py
│ ├── html_tokenizer.py
│ ├── parser.py
│ ├── pytokenize.py
│ ├── register.py
│ └── tokenizer.py
├── element.py
├── examples
│ ├── __init__.py
│ └── hello_world.py
├── html.py
├── rss.py
├── scripts
│ ├── __init__.py
│ └── parse_file.py
└── utils.py
├── setup.py
├── tests
├── __init__.py
├── error_cases
│ ├── if_1.py.txt
│ ├── if_2.py.txt
│ └── if_3.py.txt
├── test_attr_name_case.py
├── test_basic.py
├── test_curlies_in_attrs_1.py
├── test_curlies_in_attrs_2.py
├── test_curlies_in_strings_1.py
├── test_curlies_in_strings_2.py
├── test_curlies_in_strings_3.py
├── test_curlies_in_strings_4.py
├── test_eof_1.py
├── test_errors.py
├── test_html_comments_1.py
├── test_html_comments_2.py
├── test_if_1.py
├── test_if_2.py
├── test_if_3.py
├── test_if_4.py
├── test_nested_curlies.py
├── test_python_comments_1.py
├── test_python_comments_2.py
├── test_python_comments_3.py
├── test_rss.py
├── test_tags_in_curlies_1.py
├── test_tags_in_curlies_10.py
├── test_tags_in_curlies_2.py
├── test_tags_in_curlies_3.py
├── test_tags_in_curlies_4.py
├── test_tags_in_curlies_5.py
├── test_tags_in_curlies_6.py
├── test_tags_in_curlies_7.py
├── test_tags_in_curlies_8.py
├── test_tags_in_curlies_9.py
├── test_whitespace_1.py
├── test_whitespace_10.py
├── test_whitespace_11.py
├── test_whitespace_12.py
├── test_whitespace_2.py
├── test_whitespace_3.py
├── test_whitespace_4.py
├── test_whitespace_5.py
├── test_whitespace_6.py
├── test_whitespace_7.py
├── test_whitespace_8.py
└── test_whitespace_9.py
└── vim
├── ftdetect
└── pyxl.vim
├── indent
└── pyxl.vim
└── syntax
└── pyxl.vim
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | README
3 | finish_install.py
4 | pyxl.pth
5 | setup.py
6 | emacs/pyxl-mode.el
7 | pyxl/__init__.py
8 | pyxl/base.py
9 | pyxl/element.py
10 | pyxl/html.py
11 | pyxl/utils.py
12 | pyxl/codec/__init__.py
13 | pyxl/codec/parser.py
14 | pyxl/codec/register.py
15 | pyxl/codec/tokenizer.py
16 | pyxl/examples/__init__.py
17 | pyxl/examples/hello_world.py
18 | pyxl/scripts/__init__.py
19 | pyxl/scripts/parse_file.py
20 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README pyxl.pth finish_install.py
2 | recursive-include emacs *.el
3 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | README.md
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Pyxl is an open source package that extends Python to support inline HTML. It converts HTML fragments into valid Python expressions, and is meant as a replacement for traditional python templating systems like [Mako](http://www.makotemplates.org/) or [Cheetah](http://www.cheetahtemplate.org/). It automatically escapes data, enforces correct markup and makes it easier to write reusable and well structured UI code. Pyxl was inspired by the [XHP](https://github.com/facebook/xhp/wiki) project at Facebook.
2 |
3 | This project only supports Python 2. However, a [Python 3 fork](https://github.com/gvanrossum/pyxl3) is available.
4 |
5 | ## Motivation
6 |
7 | At Cove, where Pyxl was developed, we found that using templates was getting in the way of quickly building new features. There were the usual issues of remembering to escape data to prevent XSS holes, avoiding invalid markup and deciphering cryptic stack traces. More importantly, our templates were getting hard to manage and understand which made iterating on our product more work than should be necessary.
8 |
9 | Existing templating systems do support things like logic and reusable modules - but they are essentially like having a different programming language for writing UI which falls well short of python itself. The primary reason templating systems exist is because creating HTML in languages like python means writing crazy string manipulation code, or losing the niceness of writing actual HTML by doing something like this:
10 |
11 | ```py
12 | import html
13 | print (
14 | html.head().appendChild(
15 | html.body().appendChild(
16 | html.text("Hello World!"))))
17 | ```
18 |
19 | To get around these limitations, we developed Pyxl which allowed us to treat HTML as a part of the python language itself. So, writing the above example with Pyxl would look like:
20 |
21 | ```py
22 | # coding: pyxl
23 | print
Hello World!
24 | ```
25 |
26 | This meant no longer dealing with a separate "templating" language, and a lot more control over how we wrote our front-end code. Also, since Pyxl maps HTML to structured python objects and expressions instead of arbitrary blobs of strings, adding support for things like automatically escaping data was trivial. Switching to Pyxl led to much cleaner and modularized UI code, and allowed us to write new features and pages a lot quicker.
27 |
28 | ## Installation
29 |
30 | Clone the repo and run the following commands from the directory you cloned to.
31 |
32 | ```sh
33 | python setup.py build
34 | sudo python setup.py install
35 | sudo python finish_install.py
36 | ```
37 |
38 | To confirm that Pyxl was correctly installed, run the following command from the same directory:
39 |
40 | ```sh
41 | python pyxl/examples/hello_world.py
42 | ```
43 |
44 | You should see the string `Hello World!` printed out. Thats it! You're ready to use Pyxl.
45 |
46 | ## Running the tests
47 |
48 | After installing pyxl:
49 |
50 | ```sh
51 | easy_install unittest2
52 | python pyxl_tests.py
53 | ```
54 |
55 | ## How it works
56 |
57 | Pyxl converts HTML tags into python objects before the file is run through the interpreter, so the code that actually runs is regular python. For example, the `Hello World` example above is converted into:
58 |
59 | ```py
60 | print x_head().append_children(x_body().append_children("Hello World!"))
61 | ```
62 |
63 | Pyxl's usefulness comes from being able to write HTML rather than unwieldy object instantiations and function calls. Note that Pyxl automatically adds objects for all HTML tags to Python builtins, so there is no need to import `x_head` or `x_body` in the example above.
64 |
65 | The conversion to Python is relatively straightforward: Opening tags are converted into object instantiations for the respective tag, nested tags are passed in as arguments to the `append_children` method, and closing tags close the bracket to the `append_children` call. As a result, a big advantage of this is that stack traces on errors map directly to what you've written. To learn more about how Pyxl does this, see the **Implementation Details** section below.
66 |
67 | ## Documentation
68 |
69 | All python files with inline HTML must have the following first line:
70 |
71 | ```py
72 | # coding: pyxl
73 | ```
74 |
75 | With that, you can start using HTML in your python file.
76 |
77 | ### Inline Python Expressions
78 |
79 | Anything wrapped with {}'s is evaluated as a python expression. Please note that attribute values must be wrapped inside quotes, regardless of whether it contains a python expression or not. When used in attribute values, the python expression must evaluate to something that can be cast to unicode. When used inside a tag, the expression can evaluate to anything that can be cast to unicode, an HTML tag, or a list containing those two types. This is demonstrated in the example below:
80 |
81 | ```py
82 | image_name = "bolton.png"
83 | image =
84 |
85 | text = "Michael Bolton"
86 | block =
90 | ```
91 |
92 | ### Dynamic Elements
93 |
94 | Pyxl converts tags into python objects in the background, which inherit from a class called [`x_base`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/base.py). This means that tags have certain methods you can call on them. Here is an example snippet that uses the `append` function to dynamically create an unordered list.
95 |
96 | ```py
97 | items = ['Puppies', 'Dragons']
98 | nav =
99 | for text in items:
100 | nav.append(
{text}
)
101 | ```
102 |
103 | Another useful function is `children()`, which returns a list of all the child nodes for an element. `children()` accepts an optional selector string as an argument to filter the children. Currently, there is only support for filtering the children by a class (format: ".class_name"), id (format: "#id_string") or tag name. Here is a snippet which adds all `input` elements from an existing form to a new form:
104 |
105 | ```py
106 | new_form =
107 | ```
108 |
109 | ### Attributes
110 |
111 | You can access any attribute of a tag as a member variable on the tag, or via the `attr(attr_name)` function. Setting attribute must happen via the `set_attr(attr_name, attr_value)` function i.e. do not set attrs by directly setting member variables. To access attributes that contain '-' (hypen) as a member variable, replace the hypen with '_' (underscore). For this reason, pyxl does not allow attributes with an underscore in their name. Here is an example that demonstrates all these principles:
112 |
113 | ```py
114 | fruit =
115 | print fruit.data_text
116 | fruit.set_attr('data-text', 'clementine')
117 | print fruit.attr('data-text') # alternate method for accessing attributes
118 | ```
119 |
120 | ### Escaping
121 |
122 | Pyxl automatically escapes all data and attribute values, therefore all your markup is XSS safe by default. One can explicitly avoid escaping by wrapping data in a call to `rawhtml`, but that only applies to data inside a tag. Everything in attribute values is always escaped. Note that static text inside tags (i.e. anything not inside {}'s) is considered regular HTML and is not escaped.
123 |
124 | ```py
125 | safe_value = "Puppies!"
126 | unsafe_value = ""
127 | unsafe_attr = '">'
128 | print (
141 | ```
142 |
143 | ### UI Modules
144 |
145 | UI Modules are especially useful for creating re-usable building blocks in your application, making it quicker to implement new features, and keeping the UI consistent. Pyxl thinks of UI modules as user defined HTML tags, and so they are used just like you would use a `
` or any other tag.
146 |
147 | Creating UI modules in Pyxl simply means creating a class that inherits from [`x_element`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/element.py) and implements the `render()` method. Modules must be prefixed with `x_`. This is an arbitrary requirement, but is useful in separating out pyxl modules from other things.
148 |
149 | Arguments to a UI module are passed as attributes to the UI module tag. Attribute values for these tags need not evaluate to samething that can be cast to unicode, ONLY if the attribute value is a single python expression i.e. the only thing inside the quotes is a {} wrapped python expression. This allows one to pass in any type to a UI module. To demonstrate, a useful UI module is a user badge, which displays a user profile picture with the user's name and some arbitrary content to the right of it:
150 |
151 | ```py
152 | # coding: pyxl
153 | from pyxl.element import x_element
154 |
155 | class x_user_badge(x_element):
156 | __attrs__ = {
157 | 'user': object,
158 | }
159 | def render(self):
160 | return (
161 |
162 |
163 |
164 |
{self.user.name}
165 | {self.children()}
166 |
167 |
)
168 | ```
169 |
170 | This makes the tag `` available to us which accepts `user` as an attribute which is an object that contains the user's name and profile picture. Here is an example of this new UI module being used.
171 |
172 | ```py
173 | # coding: pyxl
174 | from some_module import x_user_badge
175 |
176 | user = User.get(some_user_id)
177 | content =
Any arbitrary content...
178 | print {content}
179 | ```
180 |
181 | Some things to note about UI modules.
182 |
183 | * Modules names must begin with `x_` and be an instance of `x_element`
184 | * Modules must specify the attributes they accept via the `__attrs__` class variable. This is a dictionary where the key is the attribute name, and the value is the attribute type. Passing an attribute that is not listed in `__attrs__` will result in an error. The only exceptions are attributes accepted by all pyxl elements i.e. id, class, style, onclick, title and anything prefixed with "data-" or "aria-"
185 | * Providing a `class` attribute for a UI module element will automatically append the class string to the underlying HTML element the UI module renders. This is useful when you want to style UI modules differently based on where it is being rendered.
186 |
187 | ### Fragments
188 |
189 | The [`pyxl.html`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/html.py) module provides the `` tag, which allows one to group a set of HTML tags without a parent. Rendering the `` tag simply renders all the children, and doesn't add to the markup.
190 |
191 | ### Conditional HTML
192 |
193 | Pyxl avoids support for logic within the HTML flow, except for one case where we found it especially useful: conditionally rendering HTML. That is why Pyxl provides the `` tag, which takes an attr called `cond`. Children of an `` are only rendered if `cond` evaluates to True.
194 |
195 | ## Implementation Details
196 |
197 | ### Parsing
198 |
199 | Pyxl uses support for specifying source code encodings as described in [PEP 263](http://www.python.org/dev/peps/pep-0263/) to do what it does. The functionality was originally provided so that python developers could write code in non-ascii languages (eg. chinese variable names). Pyxl creates a custom encoding called pyxl which allows it to convert XML into regular python before the file is compiled. Once the pyxl codec is registered, any file starting with `# coding: pyxl` is run through the pyxl parser before compilation.
200 |
201 | To register the pyxl codec, one must import the [`pyxl.codec.register`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/codec/register.py) module. The **Installation Process** makes it so that this always happens at python startup via the final `sudo python finish_install.py` step. What this step is doing is adding a file called `pyxl.pth` in your python site-packages directory, which imports the `pyxl.codec.register` module. Anything with a `.pth` extension in the site-packages directory is run automatically at python startup. Read more about that [here](http://docs.python.org/library/site.html).
202 |
203 | Some people may prefer avoiding adding pyxl.pth to their site-packages directory, in which case they should skip the final step of the installation process and explicitly import `pyxl.codec.register` in the entry point of their application.
204 |
205 | The pyxl encoding is a wrapper around utf-8, but every time it encounters a blob of HTML in the file, it runs it through python's [`HTMLParser`](http://docs.python.org/library/htmlparser.html) and replaces the HTML with python objects. As explained above, opening tags are converted into object instantiations for the respective tag, nested tags are passed in as arguments to the `append_children` method, and closing tags close the bracket to the `append_children` call. The code for these conversions can be seen [here](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/codec/parser.py).
206 |
207 | ### HTML Objects
208 |
209 | Though the syntactic sugar of being able to write HTML in python is pyxl's biggest usefulness, pyxl does also provide a basic framework for dealing with HTML tags as objects. This is not a full DOM implementation, but provides most of the necessary functionality. All the basic HTML tags are represented by objects defined in the [`pyxl.html`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/html.py) module, all of which inherit from the [`x_base`](https://github.com/dropbox/pyxl/blob/master/pyxl/pyxl/base.py) class.
210 |
211 | An HTML tag is rendered by calling the `to_string()` method (called automatically when tags are cast to strings), which recursively calls `to_string()` on all its children. Therefore, it should be noted that almost all the work happens only once `to_string()` is called. It is also at this stage where attribute values and data is escaped. Most of the work consists of string concatenations, and performance based on applications we've written is equivalent to templating engines like Cheetah. Note that there is probably some low hanging fruit in performance improvements that we haven't looked in to (mostly because it hasn't been a problem).
212 |
213 | ## Editor Support
214 |
215 | ### Emacs
216 |
217 | Grab pyxl-mode.el from the downloaded package under `pyxl/emacs/pyxl-mode.el` or copy it from [here](https://github.com/dropbox/pyxl/blob/master/emacs/pyxl-mode.el). To install, drop the file anywhere on your load path, and add the following to your ~/.emacs file (GNU Emacs) or ~/.xemacs/init.el file (XEmacs):
218 |
219 | ```py
220 | (autoload 'pyxl-mode "pyxl-mode" "Major mode for editing pyxl" t)
221 | (setq auto-mode-alist
222 | (cons '("\\.py\\'" . pyxl-mode) auto-mode-alist))
223 | ```
224 |
225 | ### Vim
226 |
227 | Pyxl detection, syntax, and indent files are in the `vim` directory. The easiest way to install the vim support is via [pathogen](https://github.com/tpope/vim-pathogen); with pathogen, you can simply link or copy the directory into your bundle directory. Without pathogen, place the various files in the corresponding subdirectories of your .vim directory.
228 |
229 | ### Pycharm
230 |
231 | See [pycharm-pyxl](https://github.com/christoffer/pycharm-pyxl).
232 |
233 | ### Sublime Text
234 |
235 | See [sublime-pyxl](https://github.com/yyjhao/sublime-pyxl).
236 |
--------------------------------------------------------------------------------
/emacs/pyxl-mode.el:
--------------------------------------------------------------------------------
1 | ;;; pyxl-mode.el --- major mode for editing pyxl enabled Python
2 | ;;;
3 | ;;; @author Akhil Wable
4 | ;;;
5 | ;;; To install, drop this anywhere on your load path, and add the following to
6 | ;;; your ~/.emacs file (GNU Emacs) or ~/.xemacs/init.el file (XEmacs):
7 | ;;;
8 | ;;; (autoload 'pyxl-mode "pyxl-mode" "Major mode for editing pyxl" t)
9 | ;;; (setq auto-mode-alist
10 | ;;; (cons '("\\.py\\'" . pyxl-mode) auto-mode-alist))
11 | ;;;
12 |
13 | (require 'cl)
14 | (require 'python)
15 |
16 | (defcustom pyxl-mode-hook nil
17 | "list of functions to be executed on entry to pyxl-mode."
18 | :type 'hook
19 | :group 'python)
20 |
21 | (defun pyxl-context-p ()
22 | "Does the range include some HTML?"
23 | (let ((start-rexp "([ \n\t]*<")
24 | (end-rexp ">[ \n\t]*)"))
25 | (let ((backward-start (save-excursion (re-search-backward start-rexp nil t)))
26 | (backward-end (save-excursion (re-search-backward end-rexp nil t))))
27 | (if (and backward-start
28 | (or (not backward-end) (< backward-end backward-start)))
29 | backward-start
30 | nil))))
31 |
32 | (defun pyxl-back-to-indentation ()
33 | (let ((first-non-indent
34 | (save-excursion
35 | (back-to-indentation)
36 | (point))))
37 | (if (< (point) first-non-indent)
38 | (back-to-indentation))))
39 |
40 | (defun pyx-indent-line-helper ()
41 | "Indent a line containing html."
42 | ;; nesting regex matches either an opening tag OR a closing tag
43 | (let ((nesting-regex "\\(<[:a-zA-Z][:a-zA-Z0-9_]*\\)\\|\\(\\|/>\\)")
44 | (indent-from (line-beginning-position))
45 | (depth 1))
46 | (save-excursion
47 | (re-search-backward "([ \n\t]*<" nil t)
48 | (let ((starting-indent (current-indentation)))
49 | (while (and (< (point) indent-from)
50 | (re-search-forward nesting-regex indent-from t))
51 | (if (match-string 1) (incf depth))
52 | (if (match-string 2) (decf depth)))
53 | (goto-char indent-from)
54 | (indent-line-to
55 | (+ starting-indent
56 | (* 4 depth)
57 | (if (looking-at "[ \t]*\\(?:\\|/>\\)") -4 0)))))
58 | (pyxl-back-to-indentation)))
59 |
60 | (defun pyxl-indent-line ()
61 | "Modify indent for a line of html."
62 | (interactive)
63 | (save-excursion
64 | (if (pyxl-context-p)
65 | ;; If a line is inside html, use the custom indent function
66 | (pyx-indent-line-helper)
67 | ;; Fall back to regular python indentation for no html
68 | (python-indent-line)))
69 |
70 | (pyxl-back-to-indentation))
71 |
72 | (defun pyxl-indent-region (start end)
73 | (save-excursion
74 | (goto-char end)
75 | (setq end (point-marker))
76 | (goto-char start)
77 | (or (bolp) (forward-line 1))
78 | (while (< (point) end)
79 | (or (and (bolp) (eolp))
80 | (pyxl-indent-line))
81 | (forward-line 1))
82 | (move-marker end nil)))
83 |
84 | (defcustom pyxl-default-face 'default
85 | "Default face in pyxl-mode buffers."
86 | :type 'face
87 | :group 'pyxl-mode)
88 |
89 | (defconst pyxl-font-lock-keywords
90 | (append
91 | (list
92 | ;; tags
93 | '("\\(?\\)\\([:a-zA-Z0-9_]+\\)" (1 pyxl-default-face) (2 font-lock-function-name-face))
94 |
95 | ;; comments
96 | '("" (0 font-lock-comment-face))
97 |
98 | ;; XML entities
99 | '("&\\w+;" . font-lock-constant-face)
100 | )
101 | python-font-lock-keywords)
102 | "Font Lock for pyxl mode.")
103 |
104 | ;;;###autoload
105 | (define-derived-mode pyxl-mode python-mode "pyxl"
106 | "Major mode for editing Python code with pyxl."
107 |
108 | ;; Adapted from python-mode.el
109 | (set (make-local-variable 'font-lock-defaults)
110 | '(pyxl-font-lock-keywords
111 | nil
112 | nil
113 | nil
114 | nil
115 | '(font-lock-syntactic-keywords . python-font-lock-syntactic-keywords)
116 | ;; This probably isn't worth it.
117 | ;; (font-lock-syntactic-face-function
118 | ;; . python-font-lock-syntactic-face-function)
119 | ))
120 |
121 | (setq indent-line-function 'pyxl-indent-line)
122 | (setq indent-region-function 'pyxl-indent-region)
123 | (run-hooks 'pyxl-mode-hook))
124 |
125 | (provide 'pyxl-mode)
126 |
127 | ;; In python-mode.el RET is bound to newline-and-indent, which indents the next line if necessary.
128 | ;; In python.el which we're extending, this is bound to C-j instead.
129 | ;; This binds RET to newline-and-indent
130 | (add-hook
131 | 'python-mode-hook
132 | '(lambda () (define-key python-mode-map "\C-m" 'newline-and-indent)))
133 |
--------------------------------------------------------------------------------
/finish_install.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from distutils.sysconfig import get_python_lib
3 |
4 | python_lib = get_python_lib()
5 | shutil.copy('pyxl.pth', python_lib)
6 |
--------------------------------------------------------------------------------
/pyxl.pth:
--------------------------------------------------------------------------------
1 | import pyxl.codec.register
2 |
--------------------------------------------------------------------------------
/pyxl/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
--------------------------------------------------------------------------------
/pyxl/base.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # We want a way to generate non-colliding 'pyxl' ids for elements, so we're
4 | # using a non-cryptographically secure random number generator. We want it to be
5 | # insecure because these aren't being used for anything cryptographic and it's
6 | # much faster (2x). We're also not using NumPy (which is even faster) because
7 | # it's a difficult dependency to fulfill purely to generate random numbers.
8 | import collections
9 | import random
10 | import sys
11 |
12 | from pyxl.utils import escape
13 |
14 | class PyxlException(Exception):
15 | pass
16 |
17 | class x_base_metaclass(type):
18 | def __init__(self, name, parents, attrs):
19 | super(x_base_metaclass, self).__init__(name, parents, attrs)
20 | x_base_parents = [parent for parent in parents if hasattr(parent, '__attrs__')]
21 | parent_attrs = x_base_parents[0].__attrs__ if len(x_base_parents) else {}
22 | self_attrs = self.__dict__.get('__attrs__', {})
23 |
24 | # Dont allow '_' in attr names
25 | for attr_name in self_attrs:
26 | assert '_' not in attr_name, (
27 | "%s: '_' not allowed in attr names, use '-' instead" % attr_name)
28 |
29 | combined_attrs = dict(parent_attrs)
30 | combined_attrs.update(self_attrs)
31 | setattr(self, '__attrs__', combined_attrs)
32 | setattr(self, '__tag__', name[2:])
33 |
34 | class x_base(object):
35 |
36 | __metaclass__ = x_base_metaclass
37 | __attrs__ = {
38 | # HTML attributes
39 | 'accesskey': unicode,
40 | 'class': unicode,
41 | 'dir': unicode,
42 | 'id': unicode,
43 | 'lang': unicode,
44 | 'maxlength': unicode,
45 | 'role': unicode,
46 | 'style': unicode,
47 | 'tabindex': int,
48 | 'title': unicode,
49 | 'xml:lang': unicode,
50 |
51 | # Microdata HTML attributes
52 | 'itemtype': unicode,
53 | 'itemscope': unicode,
54 | 'itemprop': unicode,
55 | 'itemid': unicode,
56 | 'itemref': unicode,
57 |
58 | # JS attributes
59 | 'onabort': unicode,
60 | 'onblur': unicode,
61 | 'onchange': unicode,
62 | 'onclick': unicode,
63 | 'ondblclick': unicode,
64 | 'onerror': unicode,
65 | 'onfocus': unicode,
66 | 'onkeydown': unicode,
67 | 'onkeypress': unicode,
68 | 'onkeyup': unicode,
69 | 'onload': unicode,
70 | 'onmousedown': unicode,
71 | 'onmouseenter': unicode,
72 | 'onmouseleave': unicode,
73 | 'onmousemove': unicode,
74 | 'onmouseout': unicode,
75 | 'onmouseover': unicode,
76 | 'onmouseup': unicode,
77 | 'onreset': unicode,
78 | 'onresize': unicode,
79 | 'onselect': unicode,
80 | 'onsubmit': unicode,
81 | 'onunload': unicode,
82 | }
83 |
84 | def __init__(self, **kwargs):
85 | self.__attributes__ = {}
86 | self.__children__ = []
87 |
88 | for name, value in kwargs.iteritems():
89 | self.set_attr(x_base._fix_attribute_name(name), value)
90 |
91 | def __call__(self, *children):
92 | self.append_children(children)
93 | return self
94 |
95 | def get_id(self):
96 | eid = self.attr('id')
97 | if not eid:
98 | eid = 'pyxl%d' % random.randint(0, sys.maxint)
99 | self.set_attr('id', eid)
100 | return eid
101 |
102 | def children(self, selector=None, exclude=False):
103 | if not selector:
104 | return self.__children__
105 |
106 | # filter by class
107 | if selector[0] == '.':
108 | select = lambda x: selector[1:] in x.get_class()
109 |
110 | # filter by id
111 | elif selector[0] == '#':
112 | select = lambda x: selector[1:] == x.get_id()
113 |
114 | # filter by tag name
115 | else:
116 | select = lambda x: x.__class__.__name__ == ('x_%s' % selector)
117 |
118 | if exclude:
119 | func = lambda x: not select(x)
120 | else:
121 | func = select
122 |
123 | return filter(func, self.__children__)
124 |
125 | def append(self, child):
126 | if type(child) in (list, tuple) or hasattr(child, '__iter__'):
127 | self.__children__.extend(c for c in child if c is not None and c is not False)
128 | elif child is not None and child is not False:
129 | self.__children__.append(child)
130 |
131 | def prepend(self, child):
132 | if child is not None and child is not False:
133 | self.__children__.insert(0, child)
134 |
135 | def __getattr__(self, name):
136 | return self.attr(name.replace('_', '-'))
137 |
138 | def attr(self, name, default=None):
139 | # this check is fairly expensive (~8% of cost)
140 | if not self.allows_attribute(name):
141 | raise PyxlException('<%s> has no attr named "%s"' % (self.__tag__, name))
142 |
143 | value = self.__attributes__.get(name)
144 |
145 | if value is not None:
146 | return value
147 |
148 | attr_type = self.__attrs__.get(name, unicode)
149 | if type(attr_type) == list:
150 | if not attr_type:
151 | raise PyxlException('Invalid attribute definition')
152 |
153 | if None in attr_type[1:]:
154 | raise PyxlException('None must be the first, default value')
155 |
156 | return attr_type[0]
157 |
158 | return default
159 |
160 | def transfer_attributes(self, element):
161 | for name, value in self.__attributes__.iteritems():
162 | if element.allows_attribute(name) and element.attr(name) is None:
163 | element.set_attr(name, value)
164 |
165 | def set_attr(self, name, value):
166 | # this check is fairly expensive (~8% of cost)
167 | if not self.allows_attribute(name):
168 | raise PyxlException('<%s> has no attr named "%s"' % (self.__tag__, name))
169 |
170 | if value is not None:
171 | attr_type = self.__attrs__.get(name, unicode)
172 |
173 | if type(attr_type) == list:
174 | # support for enum values in pyxl attributes
175 | values_enum = attr_type
176 | assert values_enum, 'Invalid attribute definition'
177 |
178 | if value not in values_enum:
179 | msg = '%s: %s: incorrect value "%s" for "%s". Expecting enum value %s' % (
180 | self.__tag__, self.__class__.__name__, value, name, values_enum)
181 | raise PyxlException(msg)
182 |
183 | else:
184 | try:
185 | # Validate type of attr and cast to correct type if possible
186 | value = value if isinstance(value, attr_type) else attr_type(value)
187 | except Exception:
188 | exc_type, exc_obj, exc_tb = sys.exc_info()
189 | msg = '%s: %s: incorrect type for "%s". expected %s, got %s' % (
190 | self.__tag__, self.__class__.__name__, name, attr_type, type(value))
191 | exception = PyxlException(msg)
192 | raise exception, None, exc_tb
193 |
194 | self.__attributes__[name] = value
195 |
196 | elif name in self.__attributes__:
197 | del self.__attributes__[name]
198 |
199 | def get_class(self):
200 | return self.attr('class', '')
201 |
202 | def add_class(self, xclass):
203 | if not xclass: return
204 | current_class = self.attr('class')
205 | if current_class: current_class += ' ' + xclass
206 | else: current_class = xclass
207 | self.set_attr('class', current_class)
208 |
209 | def append_children(self, children):
210 | for child in children:
211 | self.append(child)
212 |
213 | def attributes(self):
214 | return self.__attributes__
215 |
216 | def set_attributes(self, attrs_dict):
217 | for name, value in attrs_dict.iteritems():
218 | self.set_attr(name, value)
219 |
220 | def allows_attribute(self, name):
221 | return (name in self.__attrs__ or name.startswith('data-') or name.startswith('aria-'))
222 |
223 | def to_string(self):
224 | l = collections.deque()
225 | self._to_list(l)
226 | return u''.join(l)
227 |
228 | def _to_list(self, l):
229 | raise NotImplementedError()
230 |
231 | def __str__(self):
232 | return self.to_string()
233 |
234 | def __unicode__(self):
235 | return self.to_string()
236 |
237 | @staticmethod
238 | def _render_child_to_list(child, l):
239 | if isinstance(child, x_base): child._to_list(l)
240 | elif child is not None: l.append(escape(child))
241 |
242 | @staticmethod
243 | def _fix_attribute_name(name):
244 | if name == 'xclass': return 'class'
245 | if name == 'xfor': return 'for'
246 | return name.replace('_', '-').replace('COLON', ':')
247 |
--------------------------------------------------------------------------------
/pyxl/browser_hacks.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 | # not use this file except in compliance with the License. You may obtain
5 | # a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations
13 | # under the License.
14 |
15 | from pyxl.base import x_base
16 | from pyxl.utils import escape
17 |
18 | class x_cond_comment(x_base):
19 | __attrs__ = {
20 | 'cond': unicode,
21 | }
22 |
23 | def _to_list(self, l):
24 | # allow '&', escape everything else from cond
25 | cond = self.__attributes__.get('cond', '')
26 | cond = '&'.join(map(escape, cond.split('&')))
27 |
28 | l.extend((u'')
34 |
35 | class x_cond_noncomment(x_base):
36 | ''' This is a conditional comment where browsers which don't support conditional comments
37 | will parse the children by default. '''
38 | __attrs__ = {
39 | 'cond': unicode,
40 | }
41 |
42 | def _to_list(self, l):
43 | # allow '&', escape everything else from cond
44 | cond = self.__attributes__.get('cond', '')
45 | cond = '&'.join(map(escape, cond.split('&')))
46 |
47 | l.extend((u''))
48 |
49 | for child in self.__children__:
50 | x_base._render_child_to_list(child, l)
51 |
52 | l.append(u'')
53 |
54 |
--------------------------------------------------------------------------------
/pyxl/codec/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
--------------------------------------------------------------------------------
/pyxl/codec/html_tokenizer.py:
--------------------------------------------------------------------------------
1 | """
2 | A naive but strict HTML tokenizer. Based directly on
3 | http://www.w3.org/TR/2011/WD-html5-20110525/tokenization.html
4 |
5 | In the ATTRIBUTE_VALUE and BEFORE_ATTRIBUTE_VALUE states, python tokens are accepted.
6 | """
7 |
8 | import sys
9 | from collections import OrderedDict
10 |
11 | class State(object):
12 | DATA = 1
13 | # unused states: charrefs, RCDATA, script, RAWTEXT, PLAINTEXT
14 | TAG_OPEN = 7
15 | END_TAG_OPEN = 8
16 | TAG_NAME = 9
17 | # unused states: RCDATA, RAWTEXT, script
18 | BEFORE_ATTRIBUTE_NAME = 34
19 | ATTRIBUTE_NAME = 35
20 | AFTER_ATTRIBUTE_NAME = 36
21 | BEFORE_ATTRIBUTE_VALUE = 37
22 | ATTRIBUTE_VALUE_DOUBLE_QUOTED = 38
23 | ATTRIBUTE_VALUE_SINGLE_QUOTED = 39
24 | ATTRIBUTE_VALUE_UNQUOTED = 40
25 | # unused state: CHARREF_IN_ATTRIBUTE_VALUE = 41
26 | AFTER_ATTRIBUTE_VALUE = 42
27 | SELF_CLOSING_START_TAG = 43
28 | # unused state: BOGUS_COMMENT_STATE = 44
29 | MARKUP_DECLARATION_OPEN = 45
30 | COMMENT_START = 46
31 | COMMENT_START_DASH = 47
32 | COMMENT = 48
33 | COMMENT_END_DASH = 49
34 | COMMENT_END = 50
35 | # unused state: COMMENT_END_BANG = 51
36 | DOCTYPE = 52
37 | DOCTYPE_CONTENTS = 53 # Gross oversimplification. Not to spec.
38 | # unused states: doctypes
39 | CDATA_SECTION = 68
40 |
41 | @classmethod
42 | def state_name(cls, state_val):
43 | for k, v in cls.__dict__.iteritems():
44 | if v == state_val:
45 | return k
46 | assert False, "impossible state value %r!" % state_val
47 |
48 | class Tag(object):
49 | def __init__(self):
50 | self.tag_name = None
51 | self.attrs = OrderedDict()
52 | self.endtag = False
53 | self.startendtag = False
54 |
55 | class ParseError(Exception):
56 | pass
57 |
58 | class BadCharError(Exception):
59 | def __init__(self, state, char):
60 | super(BadCharError, self).__init__("unexpected character %r in state %r" %
61 | (char, State.state_name(state)))
62 |
63 | class Unimplemented(Exception):
64 | pass
65 |
66 | class HTMLTokenizer(object):
67 |
68 | def __init__(self):
69 | self.state = State.DATA
70 |
71 | # attribute_value is a list, where each element is either a string or a list of python
72 | # tokens.
73 |
74 | self.data = ""
75 | self.tag = None
76 | self.tag_name = None
77 | self.attribute_name = None
78 | self.attribute_value = None
79 | self.markup_declaration_buffer = None
80 |
81 | def handle_data(self, data):
82 | assert False, "subclass should override"
83 |
84 | def handle_starttag(self, tag_name, attrs):
85 | assert False, "subclass should override"
86 |
87 | def handle_startendtag(self, tag_name, attrs):
88 | assert False, "subclass should override"
89 |
90 | def handle_endtag(self, tag_name):
91 | assert False, "subclass should override"
92 |
93 | def handle_comment(self, tag_name):
94 | assert False, "subclass should override"
95 |
96 | def handle_doctype(self, data):
97 | assert False, "subclass should override"
98 |
99 | def handle_cdata(self, tag_name):
100 | assert False, "subclass should override"
101 |
102 | def emit_data(self):
103 | self.handle_data(self.data)
104 | self.data = ""
105 |
106 | def emit_tag(self):
107 | if self.tag.startendtag and self.tag.endtag:
108 | raise ParseError("both startendtag and endtag!?")
109 | if self.tag.startendtag:
110 | self.handle_startendtag(self.tag.tag_name, self.tag.attrs)
111 | elif self.tag.endtag:
112 | self.handle_endtag(self.tag.tag_name)
113 | else:
114 | self.handle_starttag(self.tag.tag_name, self.tag.attrs)
115 |
116 | def emit_comment(self):
117 | self.handle_comment(self.data)
118 | self.data = ""
119 |
120 | def emit_doctype(self):
121 | self.handle_doctype(self.data)
122 | self.data = ""
123 |
124 | def emit_cdata(self):
125 | self.handle_cdata(self.data)
126 | self.data = ""
127 |
128 | def got_attribute(self):
129 | if self.attribute_name in self.tag.attrs:
130 | raise ParseError("repeat attribute name %r" % self.attribute_name)
131 | self.tag.attrs[self.attribute_name] = self.attribute_value
132 | self.attribute_name = None
133 | self.attribute_value = None
134 |
135 | def add_data_char(self, build, c):
136 | """ For adding a new character to e.g. an attribute value """
137 | if len(build) and type(build[-1]) == str:
138 | build[-1] += c
139 | else:
140 | build.append(c)
141 |
142 | def feed(self, c):
143 | if self.state == State.DATA:
144 | if c == '<':
145 | self.emit_data()
146 | self.state = State.TAG_OPEN
147 | # Pass through; it's the browser's problem to understand these.
148 | #elif c == '&':
149 | # raise Unimplemented
150 | else:
151 | self.data += c
152 |
153 | elif self.state == State.TAG_OPEN:
154 | self.tag = Tag()
155 | if c == '!':
156 | self.markup_declaration_buffer = ""
157 | self.state = State.MARKUP_DECLARATION_OPEN
158 | elif c == '/':
159 | self.state = State.END_TAG_OPEN
160 | elif c.isalpha():
161 | self.tag.tag_name = c
162 | self.state = State.TAG_NAME
163 | else:
164 | raise BadCharError(self.state, c)
165 |
166 | elif self.state == State.END_TAG_OPEN:
167 | self.tag.endtag = True
168 | if c.isalpha():
169 | self.tag.tag_name = c
170 | self.state = State.TAG_NAME
171 | else:
172 | raise BadCharError(self.state, c)
173 |
174 | elif self.state == State.TAG_NAME:
175 | if c in '\t\n\f ':
176 | self.state = State.BEFORE_ATTRIBUTE_NAME
177 | elif c == '/':
178 | self.state = State.SELF_CLOSING_START_TAG
179 | elif c == '>':
180 | self.emit_tag()
181 | self.state = State.DATA
182 | else:
183 | self.tag.tag_name += c
184 |
185 | elif self.state == State.BEFORE_ATTRIBUTE_NAME:
186 | if c in '\t\n\f ':
187 | pass
188 | elif c == '/':
189 | self.state = State.SELF_CLOSING_START_TAG
190 | elif c == '>':
191 | self.emit_tag()
192 | self.state = State.DATA
193 | elif c in "\"'<=":
194 | raise BadCharError(self.state, c)
195 | else:
196 | self.attribute_name = c.lower()
197 | self.state = State.ATTRIBUTE_NAME
198 |
199 | elif self.state == State.ATTRIBUTE_NAME:
200 | if c in '\t\n\f ':
201 | self.state = State.AFTER_ATTRIBUTE_NAME
202 | elif c == '/':
203 | self.got_attribute()
204 | self.state = State.SELF_CLOSING_START_TAG
205 | elif c == '=':
206 | self.state = State.BEFORE_ATTRIBUTE_VALUE
207 | elif c == '>':
208 | self.emit_tag()
209 | self.state = State.DATA
210 | elif c in "\"'<":
211 | raise BadCharError(self.state, c)
212 | else:
213 | self.attribute_name += c.lower()
214 |
215 | elif self.state == State.AFTER_ATTRIBUTE_NAME:
216 | if c in '\t\n\f ':
217 | pass
218 | elif c == '/':
219 | self.got_attribute()
220 | self.state = State.SELF_CLOSING_START_TAG
221 | elif c == '=':
222 | self.state = State.BEFORE_ATTRIBUTE_VALUE
223 | elif c == '>':
224 | self.got_attribute()
225 | self.emit_tag()
226 | self.state = State.DATA
227 | elif c in "\"'<":
228 | raise BadCharError(self.state, c)
229 |
230 | elif self.state == State.BEFORE_ATTRIBUTE_VALUE:
231 | if c in '\t\n\f ':
232 | pass
233 | elif c == '"':
234 | self.attribute_value = []
235 | self.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED
236 | elif c == '&':
237 | self.attribute_value = []
238 | self.state = State.ATTRIBUTE_VALUE_UNQUOTED
239 | self.feed(c) # rehandle c
240 | elif c == "'":
241 | self.attribute_value = []
242 | self.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED
243 | elif c in '><=`':
244 | raise BadCharError(self.state, c)
245 | else:
246 | self.attribute_value = [c]
247 | self.state = State.ATTRIBUTE_VALUE_UNQUOTED
248 |
249 | elif self.state == State.ATTRIBUTE_VALUE_DOUBLE_QUOTED:
250 | if c == '"':
251 | self.state = State.AFTER_ATTRIBUTE_VALUE
252 | # Pass through; it's the browser's problem to understand these.
253 | #elif c == '&':
254 | # raise Unimplemented
255 | else:
256 | self.add_data_char(self.attribute_value, c)
257 |
258 | elif self.state == State.ATTRIBUTE_VALUE_SINGLE_QUOTED:
259 | if c == "'":
260 | self.state = State.AFTER_ATTRIBUTE_VALUE
261 | # Pass through; it's the browser's problem to understand these.
262 | #elif c == '&':
263 | # raise Unimplemented
264 | else:
265 | self.add_data_char(self.attribute_value, c)
266 |
267 | elif self.state == State.ATTRIBUTE_VALUE_UNQUOTED:
268 | if c in '\t\n\f ':
269 | self.got_attribute()
270 | self.state = State.BEFORE_ATTRIBUTE_NAME
271 | elif c == '>':
272 | self.got_attribute()
273 | self.emit_tag()
274 | self.state = State.DATA
275 | elif c in "\"'<=`":
276 | raise BadCharError(self.state, c)
277 | # Pass through; it's the browser's problem to understand these.
278 | #elif c == '&':
279 | # raise Unimplemented
280 | else:
281 | self.add_data_char(self.attribute_value, c)
282 |
283 | elif self.state == State.AFTER_ATTRIBUTE_VALUE:
284 | self.got_attribute()
285 | if c in '\t\n\f ':
286 | self.state = State.BEFORE_ATTRIBUTE_NAME
287 | elif c == '/':
288 | self.state = State.SELF_CLOSING_START_TAG
289 | elif c == '>':
290 | self.emit_tag()
291 | self.state = State.DATA
292 | else:
293 | raise BadCharError(self.state, c)
294 |
295 | elif self.state == State.SELF_CLOSING_START_TAG:
296 | self.tag.startendtag = True
297 | if c == '>':
298 | self.emit_tag()
299 | self.state = State.DATA
300 | else:
301 | raise BadCharError(self.state, c)
302 |
303 | elif self.state == State.MARKUP_DECLARATION_OPEN:
304 | self.markup_declaration_buffer += c
305 | if self.markup_declaration_buffer == "--":
306 | self.data = ""
307 | self.state = State.COMMENT_START
308 | elif self.markup_declaration_buffer.lower() == "DOCTYPE".lower():
309 | self.state = State.DOCTYPE
310 | elif self.markup_declaration_buffer == "[CDATA[":
311 | self.data = ""
312 | self.cdata_buffer = ""
313 | self.state = State.CDATA_SECTION
314 | elif not ("--".startswith(self.markup_declaration_buffer) or
315 | "DOCTYPE".lower().startswith(self.markup_declaration_buffer.lower()) or
316 | "[CDATA[".startswith(self.markup_declaration_buffer)):
317 | raise BadCharError(self.state, c)
318 |
319 | elif self.state == State.COMMENT_START:
320 | if c == "-":
321 | self.state = State.COMMENT_START_DASH
322 | elif c == ">":
323 | raise BadCharError(self.state, c)
324 | else:
325 | self.data += c
326 | self.state = State.COMMENT
327 |
328 | elif self.state == State.COMMENT_START_DASH:
329 | if c == "-":
330 | self.state = State.COMMENT_END
331 | elif c == ">":
332 | raise BadCharError(self.state, c)
333 | else:
334 | self.data += "-" + c
335 | self.state = State.COMMENT
336 |
337 | elif self.state == State.COMMENT:
338 | if c == "-":
339 | self.state = State.COMMENT_END_DASH
340 | else:
341 | self.data += c
342 |
343 | elif self.state == State.COMMENT_END_DASH:
344 | if c == "-":
345 | self.state = State.COMMENT_END
346 | else:
347 | self.data += "-" + c
348 | self.state = State.COMMENT
349 |
350 | elif self.state == State.COMMENT_END:
351 | if c == ">":
352 | self.emit_comment()
353 | self.state = State.DATA
354 | else:
355 | raise BadCharError(self.state, c)
356 |
357 | elif self.state == State.DOCTYPE:
358 | if c in "\t\n\f ":
359 | self.data = ""
360 | self.state = State.DOCTYPE_CONTENTS
361 | else:
362 | raise BadCharError(self.state, c)
363 |
364 | elif self.state == State.DOCTYPE_CONTENTS:
365 | if c == ">":
366 | self.emit_doctype()
367 | self.state = State.DATA
368 | else:
369 | self.data += c
370 |
371 | elif self.state == State.CDATA_SECTION:
372 | self.cdata_buffer += c
373 | if self.cdata_buffer == "]]>":
374 | self.emit_cdata()
375 | self.state = State.DATA
376 | else:
377 | while self.cdata_buffer and not "]]>".startswith(self.cdata_buffer):
378 | self.data += self.cdata_buffer[0]
379 | self.cdata_buffer = self.cdata_buffer[1:]
380 |
381 | else:
382 | assert False, "bad state! %r" % self.state
383 |
384 | def feed_python(self, tokens):
385 | if self.state == State.BEFORE_ATTRIBUTE_VALUE:
386 | self.attribute_value = [tokens]
387 | self.state = State.ATTRIBUTE_VALUE_UNQUOTED
388 | elif self.state in [State.ATTRIBUTE_VALUE_DOUBLE_QUOTED,
389 | State.ATTRIBUTE_VALUE_SINGLE_QUOTED,
390 | State.ATTRIBUTE_VALUE_UNQUOTED]:
391 | self.attribute_value.append(tokens)
392 | else:
393 | raise ParseError("python not allow in state %r" % State.state_name(self.state))
394 |
395 | class HTMLTokenDumper(HTMLTokenizer):
396 | def handle_data(self, data):
397 | print "DATA %r" % data
398 |
399 | def handle_starttag(self, tag_name, attrs):
400 | print "STARTTAG %r %r" % (tag_name, attrs)
401 |
402 | def handle_startendtag(self, tag_name, attrs):
403 | print "STARTENDTAG %r %r" % (tag_name, attrs)
404 |
405 | def handle_endtag(self, tag_name):
406 | print "ENDTAG %r" % tag_name
407 |
408 | def main(filename):
409 | dumper = HTMLTokenDumper()
410 | with open(filename) as f:
411 | for line in f:
412 | for c in line:
413 | dumper.feed(c)
414 |
415 | if __name__ == "__main__":
416 | main(*sys.argv[1:])
417 |
--------------------------------------------------------------------------------
/pyxl/codec/parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import tokenize
4 | from pyxl import html
5 | from html_tokenizer import (
6 | HTMLTokenizer,
7 | ParseError as TokenizerParseError,
8 | State,
9 | )
10 | from pytokenize import Untokenizer
11 |
12 | class ParseError(Exception):
13 | def __init__(self, message, pos=None):
14 | if pos is not None:
15 | super(ParseError, self).__init__("%s at line %d char %d" % ((message,) + pos))
16 | else:
17 | super(ParseError, self).__init__(message)
18 |
19 | class PyxlParser(HTMLTokenizer):
20 | def __init__(self, row, col):
21 | super(PyxlParser, self).__init__()
22 | self.start = self.end = (row, col)
23 | self.output = []
24 | self.open_tags = []
25 | self.remainder = None
26 | self.next_thing_is_python = False
27 | self.last_thing_was_python = False
28 | self.last_thing_was_close_if_tag = False
29 |
30 | def feed(self, token):
31 | ttype, tvalue, tstart, tend, tline = token
32 |
33 | assert tstart[0] >= self.end[0], "row went backwards"
34 | if tstart[0] > self.end[0]:
35 | self.output.append("\n" * (tstart[0] - self.end[0]))
36 |
37 | # interpret jumps on the same line as a single space
38 | elif tstart[1] > self.end[1]:
39 | super(PyxlParser, self).feed(" ")
40 |
41 | self.end = tstart
42 |
43 | if ttype != tokenize.INDENT:
44 | while tvalue and not self.done():
45 | c, tvalue = tvalue[0], tvalue[1:]
46 | if c == "\n":
47 | self.end = (self.end[0]+1, 0)
48 | else:
49 | self.end = (self.end[0], self.end[1]+1)
50 | try:
51 | super(PyxlParser, self).feed(c)
52 | except TokenizerParseError:
53 | raise ParseError("HTML Parsing error", self.end)
54 | if self.done():
55 | self.remainder = (ttype, tvalue, self.end, tend, tline)
56 | else:
57 | self.end = tend
58 |
59 | def feed_python(self, tokens):
60 | ttype, tvalue, tstart, tend, tline = tokens[0]
61 | assert tstart[0] >= self.end[0], "row went backwards"
62 | if tstart[0] > self.end[0]:
63 | self.output.append("\n" * (tstart[0] - self.end[0]))
64 | ttype, tvalue, tstart, tend, tline = tokens[-1]
65 | self.end = tend
66 |
67 | if self.state in [State.DATA, State.CDATA_SECTION]:
68 | self.next_thing_is_python = True
69 | self.emit_data()
70 | self.output.append("%s, " % Untokenizer().untokenize(tokens))
71 | self.next_thing_is_python = False
72 | self.last_thing_was_python = True
73 | elif self.state in [State.BEFORE_ATTRIBUTE_VALUE,
74 | State.ATTRIBUTE_VALUE_DOUBLE_QUOTED,
75 | State.ATTRIBUTE_VALUE_SINGLE_QUOTED,
76 | State.ATTRIBUTE_VALUE_UNQUOTED]:
77 | super(PyxlParser, self).feed_python(tokens)
78 |
79 | def feed_position_only(self, token):
80 | """update with any whitespace we might have missed, and advance position to after the
81 | token"""
82 | ttype, tvalue, tstart, tend, tline = token
83 | self.feed((ttype, '', tstart, tstart, tline))
84 | self.end = tend
85 |
86 | def python_comment_allowed(self):
87 | """Returns true if we're in a state where a # starts a comment.
88 |
89 |
92 | # comment in data
93 | Link text
94 |
95 | """
96 | return self.state in (State.DATA, State.TAG_NAME,
97 | State.BEFORE_ATTRIBUTE_NAME, State.AFTER_ATTRIBUTE_NAME,
98 | State.BEFORE_ATTRIBUTE_VALUE, State.AFTER_ATTRIBUTE_VALUE,
99 | State.COMMENT, State.DOCTYPE_CONTENTS, State.CDATA_SECTION)
100 |
101 | def python_mode_allowed(self):
102 | """Returns true if we're in a state where a { starts python mode.
103 |
104 |
105 | """
106 | return self.state not in (State.COMMENT,)
107 |
108 | def feed_comment(self, token):
109 | ttype, tvalue, tstart, tend, tline = token
110 | self.feed((ttype, '', tstart, tstart, tline))
111 | self.output.append(tvalue)
112 | self.end = tend
113 |
114 | def get_remainder(self):
115 | return self.remainder
116 |
117 | def done(self):
118 | return len(self.open_tags) == 0 and self.state == State.DATA and self.output
119 |
120 | def get_token(self):
121 | return (tokenize.STRING, ''.join(self.output), self.start, self.end, '')
122 |
123 | @staticmethod
124 | def safe_attr_name(name):
125 | if name == "class":
126 | return "xclass"
127 | if name == "for":
128 | return "xfor"
129 | return name.replace('-', '_').replace(':', 'COLON')
130 |
131 | def _handle_attr_value(self, attr_value):
132 | def format_parts():
133 | prev_was_python = False
134 | for i, part in enumerate(attr_value):
135 | if type(part) == list:
136 | yield part
137 | prev_was_python = True
138 | else:
139 | next_is_python = bool(i+1 < len(attr_value) and type(attr_value[i+1]) == list)
140 | part = self._normalize_data_whitespace(part, prev_was_python, next_is_python)
141 | if part:
142 | yield part
143 | prev_was_python = False
144 |
145 | attr_value = list(format_parts())
146 | if len(attr_value) == 1:
147 | part = attr_value[0]
148 | if type(part) == list:
149 | self.output.append(Untokenizer().untokenize(part))
150 | else:
151 | self.output.append(repr(part))
152 | else:
153 | self.output.append('u"".join((')
154 | for part in attr_value:
155 | if type(part) == list:
156 | self.output.append('unicode(')
157 | self.output.append(Untokenizer().untokenize(part))
158 | self.output.append(')')
159 | else:
160 | self.output.append(repr(part))
161 | self.output.append(', ')
162 | self.output.append('))')
163 |
164 | @staticmethod
165 | def _normalize_data_whitespace(data, prev_was_py, next_is_py):
166 | if not data:
167 | return ''
168 | if '\n' in data and not data.strip():
169 | if prev_was_py and next_is_py:
170 | return ' '
171 | else:
172 | return ''
173 | if prev_was_py and data.startswith('\n'):
174 | data = " " + data.lstrip('\n')
175 | if next_is_py and data.endswith('\n'):
176 | data = data.rstrip('\n') + " "
177 | data = data.strip('\n')
178 | data = data.replace('\r', ' ')
179 | data = data.replace('\n', ' ')
180 | return data
181 |
182 | def handle_starttag(self, tag, attrs, call=True):
183 | self.open_tags.append({'tag':tag, 'row': self.end[0]})
184 | if tag == 'if':
185 | if len(attrs) != 1:
186 | raise ParseError("if tag only takes one attr called 'cond'", self.end)
187 | if 'cond' not in attrs:
188 | raise ParseError("if tag must contain the 'cond' attr", self.end)
189 |
190 | self.output.append('html._push_condition(bool(')
191 | self._handle_attr_value(attrs['cond'])
192 | self.output.append(')) and html.x_frag()(')
193 | self.last_thing_was_python = False
194 | self.last_thing_was_close_if_tag = False
195 | return
196 | elif tag == 'else':
197 | if len(attrs) != 0:
198 | raise ParseError("else tag takes no attrs", self.end)
199 | if not self.last_thing_was_close_if_tag:
200 | raise ParseError(" tag must come right after ", self.end)
201 |
202 | self.output.append('(not html._last_if_condition) and html.x_frag()(')
203 | self.last_thing_was_python = False
204 | self.last_thing_was_close_if_tag = False
205 | return
206 |
207 | module, dot, identifier = tag.rpartition('.')
208 | identifier = 'x_%s' % identifier
209 | x_tag = module + dot + identifier
210 |
211 | if hasattr(html, x_tag):
212 | self.output.append('html.')
213 | self.output.append('%s(' % x_tag)
214 |
215 | first_attr = True
216 | for attr_name, attr_value in attrs.iteritems():
217 | if first_attr: first_attr = False
218 | else: self.output.append(', ')
219 |
220 | self.output.append(self.safe_attr_name(attr_name))
221 | self.output.append('=')
222 | self._handle_attr_value(attr_value)
223 |
224 | self.output.append(')')
225 | if call:
226 | # start call to __call__
227 | self.output.append('(')
228 | self.last_thing_was_python = False
229 | self.last_thing_was_close_if_tag = False
230 |
231 | def handle_endtag(self, tag_name, call=True):
232 | if call:
233 | # finish call to __call__
234 | self.output.append(")")
235 |
236 | assert self.open_tags, "got %s> but tag stack empty; parsing should be over!" % tag_name
237 |
238 | open_tag = self.open_tags.pop()
239 | if open_tag['tag'] != tag_name:
240 | raise ParseError("<%s> on line %d closed by %s> on line %d" %
241 | (open_tag['tag'], open_tag['row'], tag_name, self.end[0]))
242 |
243 | if open_tag['tag'] == 'if':
244 | self.output.append(',html._leave_if()')
245 | self.last_thing_was_close_if_tag = True
246 | else:
247 | self.last_thing_was_close_if_tag = False
248 |
249 | if len(self.open_tags):
250 | self.output.append(",")
251 | self.last_thing_was_python = False
252 |
253 | def handle_startendtag(self, tag_name, attrs):
254 | self.handle_starttag(tag_name, attrs, call=False)
255 | self.handle_endtag(tag_name, call=False)
256 |
257 | def handle_data(self, data):
258 | data = self._normalize_data_whitespace(
259 | data, self.last_thing_was_python, self.next_thing_is_python)
260 | if not data:
261 | return
262 |
263 | # XXX XXX mimics old pyxl, but this is gross and likely wrong. I'm pretty sure we actually
264 | # want %r instead of this crazy quote substitution and u"%s".
265 | data = data.replace('"', '\\"')
266 | self.output.append('html.rawhtml(u"%s"), ' % data)
267 |
268 | self.last_thing_was_python = False
269 | self.last_thing_was_close_if_tag = False
270 |
271 | def handle_comment(self, data):
272 | self.handle_startendtag("html_comment", {"comment": [data.strip()]})
273 | self.last_thing_was_python = False
274 | self.last_thing_was_close_if_tag = False
275 |
276 | def handle_doctype(self, data):
277 | self.handle_startendtag("html_decl", {"decl": ['DOCTYPE ' + data]})
278 | self.last_thing_was_python = False
279 | self.last_thing_was_close_if_tag = False
280 |
281 | def handle_cdata(self, data):
282 | self.handle_startendtag("html_marked_decl", {"decl": ['CDATA[' + data]})
283 | self.last_thing_was_python = False
284 | self.last_thing_was_close_if_tag = False
285 |
--------------------------------------------------------------------------------
/pyxl/codec/pytokenize.py:
--------------------------------------------------------------------------------
1 | """Tokenization help for Python programs.
2 |
3 | generate_tokens(readline) is a generator that breaks a stream of
4 | text into Python tokens. It accepts a readline-like method which is called
5 | repeatedly to get the next line of input (or "" for EOF). It generates
6 | 5-tuples with these members:
7 |
8 | the token type (see token.py)
9 | the token (a string)
10 | the starting (row, column) indices of the token (a 2-tuple of ints)
11 | the ending (row, column) indices of the token (a 2-tuple of ints)
12 | the original line (string)
13 |
14 | It is designed to match the working of the Python tokenizer exactly, except
15 | that it produces COMMENT tokens for comments and gives type OP for all
16 | operators
17 |
18 | Older entry points
19 | tokenize_loop(readline, tokeneater)
20 | tokenize(readline, tokeneater=printtoken)
21 | are the same, except instead of generating tokens, tokeneater is a callback
22 | function to which the 5 fields described above are passed as 5 arguments,
23 | each time a new token is found.
24 |
25 |
26 | This file was taken from the python 2.7.4 library and modified for use by
27 | the Pyxl decoder. Changes made:
28 | - When it encounters an unexpected EOF, the tokenizer does not raise an
29 | exception, and instead yields an errortoken if appropriate.
30 | - When it encounters an unexpected dedent, the tokenizer does not
31 | raise an exception.
32 | - The Untokenizer class was heavily modified.
33 |
34 |
35 | PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
36 | --------------------------------------------
37 |
38 | 1. This LICENSE AGREEMENT is between the Python Software Foundation
39 | ("PSF"), and the Individual or Organization ("Licensee") accessing and
40 | otherwise using this software ("Python") in source or binary form and
41 | its associated documentation.
42 |
43 | 2. Subject to the terms and conditions of this License Agreement, PSF hereby
44 | grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
45 | analyze, test, perform and/or display publicly, prepare derivative works,
46 | distribute, and otherwise use Python alone or in any derivative version,
47 | provided, however, that PSF's License Agreement and PSF's notice of copyright,
48 | i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
49 | 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
50 | in Python alone or in any derivative version prepared by Licensee.
51 |
52 | 3. In the event Licensee prepares a derivative work that is based on
53 | or incorporates Python or any part thereof, and wants to make
54 | the derivative work available to others as provided herein, then
55 | Licensee hereby agrees to include in any such work a brief summary of
56 | the changes made to Python.
57 |
58 | 4. PSF is making Python available to Licensee on an "AS IS"
59 | basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
60 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
61 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
62 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
63 | INFRINGE ANY THIRD PARTY RIGHTS.
64 |
65 | 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
66 | FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
67 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
68 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
69 |
70 | 6. This License Agreement will automatically terminate upon a material
71 | breach of its terms and conditions.
72 |
73 | 7. Nothing in this License Agreement shall be deemed to create any
74 | relationship of agency, partnership, or joint venture between PSF and
75 | Licensee. This License Agreement does not grant permission to use PSF
76 | trademarks or trade name in a trademark sense to endorse or promote
77 | products or services of Licensee, or any third party.
78 |
79 | 8. By copying, installing or otherwise using Python, Licensee
80 | agrees to be bound by the terms and conditions of this License
81 | Agreement.
82 | """
83 |
84 | __author__ = 'Ka-Ping Yee '
85 | __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
86 | 'Skip Montanaro, Raymond Hettinger')
87 |
88 | import string, re
89 | from token import *
90 |
91 | import token
92 | __all__ = [x for x in dir(token) if not x.startswith("_")]
93 | __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
94 | del x
95 | del token
96 |
97 | COMMENT = N_TOKENS
98 | tok_name[COMMENT] = 'COMMENT'
99 | NL = N_TOKENS + 1
100 | tok_name[NL] = 'NL'
101 | N_TOKENS += 2
102 |
103 | def group(*choices): return '(' + '|'.join(choices) + ')'
104 | def any(*choices): return group(*choices) + '*'
105 | def maybe(*choices): return group(*choices) + '?'
106 |
107 | Whitespace = r'[ \f\t]*'
108 | Comment = r'#[^\r\n]*'
109 | Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
110 | Name = r'[a-zA-Z_]\w*'
111 |
112 | Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
113 | Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
114 | Binnumber = r'0[bB][01]+[lL]?'
115 | Decnumber = r'[1-9]\d*[lL]?'
116 | Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
117 | Exponent = r'[eE][-+]?\d+'
118 | Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
119 | Expfloat = r'\d+' + Exponent
120 | Floatnumber = group(Pointfloat, Expfloat)
121 | Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
122 | Number = group(Imagnumber, Floatnumber, Intnumber)
123 |
124 | # Tail end of ' string.
125 | Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
126 | # Tail end of " string.
127 | Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
128 | # Tail end of ''' string.
129 | Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
130 | # Tail end of """ string.
131 | Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
132 | Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
133 | # Single-line ' or " string.
134 | String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
135 | r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
136 |
137 | # Because of leftmost-then-longest match semantics, be sure to put the
138 | # longest operators first (e.g., if = came before ==, == would get
139 | # recognized as two instances of =).
140 | Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
141 | r"//=?",
142 | r"[+\-*/%&|^=<>]=?",
143 | r"~")
144 |
145 | Bracket = '[][(){}]'
146 | Special = group(r'\r?\n', r'[:;.,`@]')
147 | Funny = group(Operator, Bracket, Special)
148 |
149 | PlainToken = group(Number, Funny, String, Name)
150 | Token = Ignore + PlainToken
151 |
152 | # First (or only) line of ' or " string.
153 | ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
154 | group("'", r'\\\r?\n'),
155 | r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
156 | group('"', r'\\\r?\n'))
157 | PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
158 | PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
159 |
160 | tokenprog, pseudoprog, single3prog, double3prog = map(
161 | re.compile, (Token, PseudoToken, Single3, Double3))
162 | endprogs = {"'": re.compile(Single), '"': re.compile(Double),
163 | "'''": single3prog, '"""': double3prog,
164 | "r'''": single3prog, 'r"""': double3prog,
165 | "u'''": single3prog, 'u"""': double3prog,
166 | "ur'''": single3prog, 'ur"""': double3prog,
167 | "R'''": single3prog, 'R"""': double3prog,
168 | "U'''": single3prog, 'U"""': double3prog,
169 | "uR'''": single3prog, 'uR"""': double3prog,
170 | "Ur'''": single3prog, 'Ur"""': double3prog,
171 | "UR'''": single3prog, 'UR"""': double3prog,
172 | "b'''": single3prog, 'b"""': double3prog,
173 | "br'''": single3prog, 'br"""': double3prog,
174 | "B'''": single3prog, 'B"""': double3prog,
175 | "bR'''": single3prog, 'bR"""': double3prog,
176 | "Br'''": single3prog, 'Br"""': double3prog,
177 | "BR'''": single3prog, 'BR"""': double3prog,
178 | 'r': None, 'R': None, 'u': None, 'U': None,
179 | 'b': None, 'B': None}
180 |
181 | triple_quoted = {}
182 | for t in ("'''", '"""',
183 | "r'''", 'r"""', "R'''", 'R"""',
184 | "u'''", 'u"""', "U'''", 'U"""',
185 | "ur'''", 'ur"""', "Ur'''", 'Ur"""',
186 | "uR'''", 'uR"""', "UR'''", 'UR"""',
187 | "b'''", 'b"""', "B'''", 'B"""',
188 | "br'''", 'br"""', "Br'''", 'Br"""',
189 | "bR'''", 'bR"""', "BR'''", 'BR"""'):
190 | triple_quoted[t] = t
191 | single_quoted = {}
192 | for t in ("'", '"',
193 | "r'", 'r"', "R'", 'R"',
194 | "u'", 'u"', "U'", 'U"',
195 | "ur'", 'ur"', "Ur'", 'Ur"',
196 | "uR'", 'uR"', "UR'", 'UR"',
197 | "b'", 'b"', "B'", 'B"',
198 | "br'", 'br"', "Br'", 'Br"',
199 | "bR'", 'bR"', "BR'", 'BR"' ):
200 | single_quoted[t] = t
201 |
202 | tabsize = 8
203 |
204 | class TokenError(Exception): pass
205 |
206 | class StopTokenizing(Exception): pass
207 |
208 | def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
209 | srow, scol = srow_scol
210 | erow, ecol = erow_ecol
211 | print "%d,%d-%d,%d:\t%s\t%s" % \
212 | (srow, scol, erow, ecol, tok_name[type], repr(token))
213 |
214 | def tokenize(readline, tokeneater=printtoken):
215 | """
216 | The tokenize() function accepts two parameters: one representing the
217 | input stream, and one providing an output mechanism for tokenize().
218 |
219 | The first parameter, readline, must be a callable object which provides
220 | the same interface as the readline() method of built-in file objects.
221 | Each call to the function should return one line of input as a string.
222 |
223 | The second parameter, tokeneater, must also be a callable object. It is
224 | called once for each token, with five arguments, corresponding to the
225 | tuples generated by generate_tokens().
226 | """
227 | try:
228 | tokenize_loop(readline, tokeneater)
229 | except StopTokenizing:
230 | pass
231 |
232 | # backwards compatible interface
233 | def tokenize_loop(readline, tokeneater):
234 | for token_info in generate_tokens(readline):
235 | tokeneater(*token_info)
236 |
237 | class Untokenizer:
238 |
239 | # PYXL MODIFICATION: This entire class.
240 |
241 | def __init__(self, row=None, col=None):
242 | self.tokens = []
243 | self.prev_row = row
244 | self.prev_col = col
245 |
246 | def add_whitespace(self, start):
247 | row, col = start
248 | assert row >= self.prev_row, "row (%r) should be >= prev_row (%r)" % (row, self.prev_row)
249 | row_offset = row - self.prev_row
250 | if row_offset:
251 | self.tokens.append("\n" * row_offset)
252 | col_offset = col - self.prev_col
253 | if col_offset:
254 | self.tokens.append(" " * col_offset)
255 |
256 | def feed(self, t):
257 | assert len(t) == 5
258 | tok_type, token, start, end, line = t
259 | if (self.prev_row is None):
260 | self.prev_row, self.prev_col = start
261 | self.add_whitespace(start)
262 | self.tokens.append(token)
263 | self.prev_row, self.prev_col = end
264 | if tok_type in (NEWLINE, NL):
265 | self.prev_row += 1
266 | self.prev_col = 0
267 |
268 | def finish(self):
269 | return "".join(self.tokens)
270 |
271 | def untokenize(self, iterable):
272 | for t in iterable:
273 | self.feed(t)
274 | return self.finish()
275 |
276 | def untokenize(iterable):
277 | """Transform tokens back into Python source code.
278 |
279 | Each element returned by the iterable must be a token sequence
280 | with at least two elements, a token number and token value. If
281 | only two tokens are passed, the resulting output is poor.
282 |
283 | Round-trip invariant for full input:
284 | Untokenized source will match input source exactly
285 |
286 | Round-trip invariant for limited intput:
287 | # Output text will tokenize the back to the input
288 | t1 = [tok[:2] for tok in generate_tokens(f.readline)]
289 | newcode = untokenize(t1)
290 | readline = iter(newcode.splitlines(1)).next
291 | t2 = [tok[:2] for tok in generate_tokens(readline)]
292 | assert t1 == t2
293 | """
294 | ut = Untokenizer()
295 | return ut.untokenize(iterable)
296 |
297 | def generate_tokens(readline):
298 | """
299 | The generate_tokens() generator requires one argment, readline, which
300 | must be a callable object which provides the same interface as the
301 | readline() method of built-in file objects. Each call to the function
302 | should return one line of input as a string. Alternately, readline
303 | can be a callable function terminating with StopIteration:
304 | readline = open(myfile).next # Example of alternate readline
305 |
306 | The generator produces 5-tuples with these members: the token type; the
307 | token string; a 2-tuple (srow, scol) of ints specifying the row and
308 | column where the token begins in the source; a 2-tuple (erow, ecol) of
309 | ints specifying the row and column where the token ends in the source;
310 | and the line on which the token was found. The line passed is the
311 | logical line; continuation lines are included.
312 | """
313 | lnum = parenlev = continued = 0
314 | namechars, numchars = string.ascii_letters + '_', '0123456789'
315 | contstr, needcont = '', 0
316 | contline = None
317 | indents = [0]
318 |
319 | while 1: # loop over lines in stream
320 | try:
321 | line = readline()
322 | except StopIteration:
323 | line = ''
324 | lnum += 1
325 | pos, max = 0, len(line)
326 |
327 | if contstr: # continued string
328 | if not line:
329 | # PYXL MODIFICATION: instead of raising an error here, we
330 | # return the remainder of the file as an errortoken.
331 | yield (ERRORTOKEN, contstr,
332 | strstart, (lnum, 0), contline + line)
333 | contstr, needcont = '', 0
334 | contline = None
335 | return
336 | endmatch = endprog.match(line)
337 | if endmatch:
338 | pos = end = endmatch.end(0)
339 | yield (STRING, contstr + line[:end],
340 | strstart, (lnum, end), contline + line)
341 | contstr, needcont = '', 0
342 | contline = None
343 | elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
344 | yield (ERRORTOKEN, contstr + line,
345 | strstart, (lnum, len(line)), contline)
346 | contstr = ''
347 | contline = None
348 | continue
349 | else:
350 | contstr = contstr + line
351 | contline = contline + line
352 | continue
353 |
354 | elif parenlev == 0 and not continued: # new statement
355 | if not line: break
356 | column = 0
357 | while pos < max: # measure leading whitespace
358 | if line[pos] == ' ':
359 | column += 1
360 | elif line[pos] == '\t':
361 | column = (column//tabsize + 1)*tabsize
362 | elif line[pos] == '\f':
363 | column = 0
364 | else:
365 | break
366 | pos += 1
367 | if pos == max:
368 | break
369 |
370 | if line[pos] in '#\r\n': # skip comments or blank lines
371 | if line[pos] == '#':
372 | comment_token = line[pos:].rstrip('\r\n')
373 | nl_pos = pos + len(comment_token)
374 | yield (COMMENT, comment_token,
375 | (lnum, pos), (lnum, pos + len(comment_token)), line)
376 | yield (NL, line[nl_pos:],
377 | (lnum, nl_pos), (lnum, len(line)), line)
378 | else:
379 | yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
380 | (lnum, pos), (lnum, len(line)), line)
381 | continue
382 |
383 | if column > indents[-1]: # count indents or dedents
384 | indents.append(column)
385 | yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
386 | while column < indents[-1]:
387 | if column not in indents:
388 | # PYXL MODIFICATION: instead of raising an error here, we
389 | # emit an empty dedent token, which has no effect on
390 | # the decoded file.
391 | pass
392 | indents = indents[:-1]
393 | yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
394 |
395 | else: # continued statement
396 | if not line:
397 | # PYXL MODIFICATION: instead of raising an error here, we
398 | # return as if successful.
399 | return
400 | continued = 0
401 |
402 | while pos < max:
403 | pseudomatch = pseudoprog.match(line, pos)
404 | if pseudomatch: # scan for tokens
405 | start, end = pseudomatch.span(1)
406 | spos, epos, pos = (lnum, start), (lnum, end), end
407 | if start == end:
408 | continue
409 | token, initial = line[start:end], line[start]
410 |
411 | if initial in numchars or \
412 | (initial == '.' and token != '.'): # ordinary number
413 | yield (NUMBER, token, spos, epos, line)
414 | elif initial in '\r\n':
415 | yield (NL if parenlev > 0 else NEWLINE,
416 | token, spos, epos, line)
417 | elif initial == '#':
418 | assert not token.endswith("\n")
419 | yield (COMMENT, token, spos, epos, line)
420 | elif token in triple_quoted:
421 | endprog = endprogs[token]
422 | endmatch = endprog.match(line, pos)
423 | if endmatch: # all on one line
424 | pos = endmatch.end(0)
425 | token = line[start:pos]
426 | yield (STRING, token, spos, (lnum, pos), line)
427 | else:
428 | strstart = (lnum, start) # multiple lines
429 | contstr = line[start:]
430 | contline = line
431 | break
432 | elif initial in single_quoted or \
433 | token[:2] in single_quoted or \
434 | token[:3] in single_quoted:
435 | if token[-1] == '\n': # continued string
436 | strstart = (lnum, start)
437 | endprog = (endprogs[initial] or endprogs[token[1]] or
438 | endprogs[token[2]])
439 | contstr, needcont = line[start:], 1
440 | contline = line
441 | break
442 | else: # ordinary string
443 | yield (STRING, token, spos, epos, line)
444 | elif initial in namechars: # ordinary name
445 | yield (NAME, token, spos, epos, line)
446 | elif initial == '\\': # continued stmt
447 | continued = 1
448 | else:
449 | if initial in '([{':
450 | parenlev += 1
451 | elif initial in ')]}':
452 | parenlev -= 1
453 | yield (OP, token, spos, epos, line)
454 | else:
455 | yield (ERRORTOKEN, line[pos],
456 | (lnum, pos), (lnum, pos+1), line)
457 | pos += 1
458 |
459 | for indent in indents[1:]: # pop remaining indent levels
460 | yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
461 | yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
462 |
463 | if __name__ == '__main__': # testing
464 | import sys
465 | if len(sys.argv) > 1:
466 | tokenize(open(sys.argv[1]).readline)
467 | else:
468 | tokenize(sys.stdin.readline)
469 |
--------------------------------------------------------------------------------
/pyxl/codec/register.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import with_statement
3 |
4 | import codecs, cStringIO, encodings
5 | import sys
6 | import traceback
7 | from encodings import utf_8
8 | from pyxl.codec.tokenizer import pyxl_tokenize, pyxl_untokenize
9 |
10 | def pyxl_transform(stream):
11 | try:
12 | output = pyxl_untokenize(pyxl_tokenize(stream.readline))
13 | except Exception, ex:
14 | print ex
15 | traceback.print_exc()
16 | raise
17 |
18 | return output.rstrip()
19 |
20 | def pyxl_transform_string(text):
21 | stream = cStringIO.StringIO(text)
22 | return pyxl_transform(stream)
23 |
24 | def pyxl_decode(input, errors='strict'):
25 | return utf_8.decode(pyxl_transform_string(input), errors)
26 |
27 | class PyxlIncrementalDecoder(utf_8.IncrementalDecoder):
28 | def decode(self, input, final=False):
29 | self.buffer += input
30 | if final:
31 | buff = self.buffer
32 | self.buffer = ''
33 | return super(PyxlIncrementalDecoder, self).decode(
34 | pyxl_transform_string(buff), final=True)
35 |
36 | class PyxlStreamReader(utf_8.StreamReader):
37 | def __init__(self, *args, **kwargs):
38 | codecs.StreamReader.__init__(self, *args, **kwargs)
39 | self.stream = cStringIO.StringIO(pyxl_transform(self.stream))
40 |
41 | def search_function(encoding):
42 | if encoding != 'pyxl': return None
43 | # Assume utf8 encoding
44 | utf8=encodings.search_function('utf8')
45 | return codecs.CodecInfo(
46 | name = 'pyxl',
47 | encode = utf8.encode,
48 | decode = pyxl_decode,
49 | incrementalencoder = utf8.incrementalencoder,
50 | incrementaldecoder = PyxlIncrementalDecoder,
51 | streamreader = PyxlStreamReader,
52 | streamwriter = utf8.streamwriter)
53 |
54 | codecs.register(search_function)
55 |
56 | _USAGE = """\
57 | Wraps a python command to allow it to recognize pyxl-coded files with
58 | no source modifications.
59 |
60 | Usage:
61 | python -m pyxl.codec.register -m module.to.run [args...]
62 | python -m pyxl.codec.register path/to/script.py [args...]
63 | """
64 |
65 | if __name__ == '__main__':
66 | if len(sys.argv) >= 3 and sys.argv[1] == '-m':
67 | mode = 'module'
68 | module = sys.argv[2]
69 | del sys.argv[1:3]
70 | elif len(sys.argv) >= 2:
71 | mode = 'script'
72 | script = sys.argv[1]
73 | sys.argv = sys.argv[1:]
74 | else:
75 | print >>sys.stderr, _USAGE
76 | sys.exit(1)
77 |
78 | if mode == 'module':
79 | import runpy
80 | runpy.run_module(module, run_name='__main__', alter_sys=True)
81 | elif mode == 'script':
82 | with open(script) as f:
83 | global __file__
84 | __file__ = script
85 | # Use globals as our "locals" dictionary so that something
86 | # that tries to import __main__ (e.g. the unittest module)
87 | # will see the right things.
88 | exec f.read() in globals(), globals()
89 |
--------------------------------------------------------------------------------
/pyxl/codec/tokenizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import pytokenize as tokenize
4 | import re
5 | from StringIO import StringIO
6 | from pyxl.codec.parser import PyxlParser
7 | from pytokenize import Untokenizer
8 |
9 | class PyxlParseError(Exception): pass
10 |
11 | def get_end_pos(start_pos, tvalue):
12 | row, col = start_pos
13 | for c in tvalue:
14 | if c == '\n':
15 | col = 0
16 | row += 1
17 | else:
18 | col += 1
19 | return (row, col)
20 |
21 | class RewindableTokenStream(object):
22 | """
23 | A token stream, with the ability to rewind and restart tokenization while maintaining correct
24 | token position information.
25 |
26 | Invariants:
27 | - zero_row and zero_col are the correct values to adjust the line and possibly column of the
28 | tokens being produced by _tokens.
29 | - Tokens in unshift_buffer have locations with absolute position (relative to the beginning
30 | of the file, not relative to where we last restarted tokenization).
31 | """
32 |
33 | def __init__(self, readline):
34 | self.orig_readline = readline
35 | self.unshift_buffer = []
36 | self.rewound_buffer = None
37 | self._tokens = tokenize.generate_tokens(self._readline)
38 | self.zero_row, self.zero_col = (0, 0)
39 | self.stop_readline = False
40 |
41 | def _dumpstate(self):
42 | print "tokenizer state:"
43 | print " zero:", (self.zero_row, self.zero_col)
44 | print " rewound_buffer:", self.rewound_buffer
45 | print " unshift_buffer:", self.unshift_buffer
46 |
47 | def _readline(self):
48 | if self.stop_readline:
49 | return ""
50 | if self.rewound_buffer:
51 | line = self.rewound_buffer.readline()
52 | if line:
53 | return line
54 | else:
55 | self.rewound_buffer = None # fallthrough to orig_readline
56 | return self.orig_readline()
57 |
58 | def _flush(self):
59 | self.stop_readline = True
60 | tokens = list(tok for tok in self)
61 | self.stop_readline = False
62 | return tokens
63 |
64 | def _adjust_position(self, pos):
65 | row, col = pos
66 | if row == 1: # rows are 1-indexed
67 | col += self.zero_col
68 | row += self.zero_row
69 | return (row, col)
70 |
71 | def rewind_and_retokenize(self, rewind_token):
72 | """Rewind the given token (which is expected to be the last token read from this stream, or
73 | the end of such token); then restart tokenization."""
74 | ttype, tvalue, (row, col), tend, tline = rewind_token
75 | tokens = [rewind_token] + self._flush()
76 | self.zero_row, self.zero_col = (row - 1, col) # rows are 1-indexed, cols are 0-indexed
77 | self.rewound_buffer = StringIO(Untokenizer().untokenize(tokens))
78 | self.unshift_buffer = []
79 | self._tokens = tokenize.generate_tokens(self._readline)
80 |
81 | def next(self):
82 | if self.unshift_buffer:
83 | token = self.unshift_buffer.pop(0)
84 | else:
85 | ttype, tvalue, tstart, tend, tline = self._tokens.next()
86 | tstart = self._adjust_position(tstart)
87 | tend = self._adjust_position(tend)
88 | token = (ttype, tvalue, tstart, tend, tline)
89 | return token
90 |
91 | def __iter__(self):
92 | return self
93 |
94 | def unshift(self, token):
95 | """Rewind the given token, without retokenizing. It will be the next token read from the
96 | stream."""
97 | self.unshift_buffer[:0] = [token]
98 |
99 | def pyxl_untokenize(tokens):
100 | parts = []
101 | prev_row = 1
102 | prev_col = 0
103 |
104 | for token in tokens:
105 | ttype, tvalue, tstart, tend, tline = token
106 | row, col = tstart
107 |
108 | assert row == prev_row, 'Unexpected jump in rows on line:%d: %s' % (row, tline)
109 |
110 | # Add whitespace
111 | col_offset = col - prev_col
112 | assert col_offset >= 0
113 | if col_offset > 0:
114 | parts.append(" " * col_offset)
115 |
116 | parts.append(tvalue)
117 | prev_row, prev_col = tend
118 |
119 | if ttype in (tokenize.NL, tokenize.NEWLINE):
120 | prev_row += 1
121 | prev_col = 0
122 |
123 | return ''.join(parts)
124 |
125 | def pyxl_tokenize(readline):
126 | return transform_tokens(RewindableTokenStream(readline))
127 |
128 | def transform_tokens(tokens):
129 | last_nw_token = None
130 | prev_token = None
131 |
132 | curly_depth = 0
133 |
134 | while 1:
135 | try:
136 | token = tokens.next()
137 | except (StopIteration, tokenize.TokenError):
138 | break
139 |
140 | ttype, tvalue, tstart, tend, tline = token
141 |
142 | if ttype == tokenize.OP and tvalue == '{':
143 | curly_depth += 1
144 | if ttype == tokenize.OP and tvalue == '}':
145 | curly_depth -= 1
146 | if curly_depth < 0:
147 | tokens.unshift(token)
148 | return
149 |
150 | if (ttype == tokenize.OP and tvalue == '<' and
151 | (last_nw_token == None or # if we have *just* entered python mode e.g
152 | (last_nw_token[0] == tokenize.OP and last_nw_token[1] == '=') or
153 | (last_nw_token[0] == tokenize.OP and last_nw_token[1] == '(') or
154 | (last_nw_token[0] == tokenize.OP and last_nw_token[1] == '[') or
155 | (last_nw_token[0] == tokenize.OP and last_nw_token[1] == '{') or
156 | (last_nw_token[0] == tokenize.OP and last_nw_token[1] == ',') or
157 | (last_nw_token[0] == tokenize.OP and last_nw_token[1] == ':') or
158 | (last_nw_token[0] == tokenize.NAME and last_nw_token[1] == 'print') or
159 | (last_nw_token[0] == tokenize.NAME and last_nw_token[1] == 'else') or
160 | (last_nw_token[0] == tokenize.NAME and last_nw_token[1] == 'yield') or
161 | (last_nw_token[0] == tokenize.NAME and last_nw_token[1] == 'return'))):
162 | token = get_pyxl_token(token, tokens)
163 |
164 | if ttype not in (tokenize.INDENT,
165 | tokenize.DEDENT,
166 | tokenize.NL,
167 | tokenize.NEWLINE,
168 | tokenize.COMMENT):
169 | last_nw_token = token
170 |
171 | # strip trailing newline from non newline tokens
172 | if tvalue and tvalue[-1] == '\n' and ttype not in (tokenize.NL, tokenize.NEWLINE):
173 | ltoken = list(token)
174 | tvalue = ltoken[1] = tvalue[:-1]
175 | token = tuple(ltoken)
176 |
177 | # tokenize has this bug where you can get line jumps without a newline token
178 | # we check and fix for that here by seeing if there was a line jump
179 | if prev_token:
180 | prev_ttype, prev_tvalue, prev_tstart, prev_tend, prev_tline = prev_token
181 |
182 | prev_row, prev_col = prev_tend
183 | cur_row, cur_col = tstart
184 |
185 | # check for a line jump without a newline token
186 | if (prev_row < cur_row and prev_ttype not in (tokenize.NEWLINE, tokenize.NL)):
187 |
188 | # tokenize also forgets \ continuations :(
189 | prev_line = prev_tline.strip()
190 | if prev_ttype != tokenize.COMMENT and prev_line and prev_line[-1] == '\\':
191 | start_pos = (prev_row, prev_col)
192 | end_pos = (prev_row, prev_col+1)
193 | yield (tokenize.STRING, ' \\', start_pos, end_pos, prev_tline)
194 | prev_col += 1
195 |
196 | start_pos = (prev_row, prev_col)
197 | end_pos = (prev_row, prev_col+1)
198 | yield (tokenize.NL, '\n', start_pos, end_pos, prev_tline)
199 |
200 | prev_token = token
201 | yield token
202 |
203 | def get_pyxl_token(start_token, tokens):
204 | ttype, tvalue, tstart, tend, tline = start_token
205 | pyxl_parser = PyxlParser(tstart[0], tstart[1])
206 | pyxl_parser.feed(start_token)
207 |
208 | for token in tokens:
209 | ttype, tvalue, tstart, tend, tline = token
210 |
211 | if tvalue and tvalue[0] == '{':
212 | if pyxl_parser.python_mode_allowed():
213 | mid, right = tvalue[0], tvalue[1:]
214 | division = get_end_pos(tstart, mid)
215 | pyxl_parser.feed_position_only((ttype, mid, tstart, division, tline))
216 | tokens.rewind_and_retokenize((ttype, right, division, tend, tline))
217 | python_tokens = list(transform_tokens(tokens))
218 |
219 | close_curly = tokens.next()
220 | ttype, tvalue, tstart, tend, tline = close_curly
221 | close_curly_sub = (ttype, '', tend, tend, tline)
222 |
223 | pyxl_parser.feed_python(python_tokens + [close_curly_sub])
224 | continue
225 | # else fallthrough to pyxl_parser.feed(token)
226 | elif tvalue and ttype == tokenize.COMMENT:
227 | if not pyxl_parser.python_comment_allowed():
228 | tvalue, rest = tvalue[0], tvalue[1:]
229 | division = get_end_pos(tstart, tvalue)
230 | tokens.unshift((tokenize.ERRORTOKEN, rest, division, tend, tline))
231 | token = ttype, tvalue, tstart, division, tline
232 | # fallthrough to pyxl_parser.feed(token)
233 | else:
234 | pyxl_parser.feed_comment(token)
235 | continue
236 | elif tvalue and tvalue[0] == '#':
237 | # let the python tokenizer grab the whole comment token
238 | tokens.rewind_and_retokenize(token)
239 | continue
240 | else:
241 | sp = re.split('([#{])', tvalue, maxsplit=1)
242 | if len(sp) > 1:
243 | tvalue, mid, right = sp
244 | division = get_end_pos(tstart, tvalue)
245 | tokens.unshift((ttype, mid+right, division, tend, tline))
246 | token = ttype, tvalue, tstart, division, tline
247 | # fallthrough to pyxl_parser.feed(token)
248 |
249 | pyxl_parser.feed(token)
250 |
251 | if pyxl_parser.done(): break
252 |
253 | if not pyxl_parser.done():
254 | lines = ['<%s> at (line:%d)' % (tag_info['tag'], tag_info['row'])
255 | for tag_info in pyxl_parser.open_tags]
256 | raise PyxlParseError('Unclosed Tags: %s' % ', '.join(lines))
257 |
258 | remainder = pyxl_parser.get_remainder()
259 | if remainder:
260 | tokens.rewind_and_retokenize(remainder)
261 |
262 | return pyxl_parser.get_token()
263 |
--------------------------------------------------------------------------------
/pyxl/element.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from pyxl.base import x_base
4 |
5 | class x_element(x_base):
6 |
7 | _element = None # render() output cached by _rendered_element()
8 |
9 | def _get_base_element(self):
10 | # Adding classes costs ~10%
11 | out = self._rendered_element()
12 | # Note: get_class() may return multiple space-separated classes.
13 | cls = self.get_class()
14 | classes = set(cls.split(' ')) if cls else set()
15 |
16 | while isinstance(out, x_element):
17 | new_out = out._rendered_element()
18 | cls = out.get_class()
19 | if cls:
20 | classes.update(cls.split(' '))
21 | out = new_out
22 |
23 | if classes and isinstance(out, x_base):
24 | classes.update(out.get_class().split(' '))
25 | out.set_attr('class', ' '.join(filter(None, classes)))
26 |
27 | return out
28 |
29 | def _to_list(self, l):
30 | self._render_child_to_list(self._get_base_element(), l)
31 |
32 | def _rendered_element(self):
33 | if self._element is None:
34 | self.prerender()
35 | self._element = self.render()
36 | self.postrender(self._element)
37 | return self._element
38 |
39 | def render(self):
40 | raise NotImplementedError()
41 |
42 | def prerender(self):
43 | """
44 | Hook to do things before the element is rendered. Default behavior is
45 | to do nothing.
46 | """
47 | pass
48 |
49 | def postrender(self, element):
50 | """
51 | Hook to do things after the element is rendered. Default behavior
52 | is to do nothing
53 | """
54 | pass
55 |
--------------------------------------------------------------------------------
/pyxl/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/pyxl/1ffc8a3ac88df85da3f6bf7efab39854ea1fdebe/pyxl/examples/__init__.py
--------------------------------------------------------------------------------
/pyxl/examples/hello_world.py:
--------------------------------------------------------------------------------
1 | # coding: pyxl
2 |
3 | from pyxl import html
4 |
5 | print Hello World!
6 |
--------------------------------------------------------------------------------
/pyxl/html.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from pyxl.utils import escape
4 | from pyxl.base import x_base
5 |
6 | # for backwards compatibility.
7 | from pyxl.browser_hacks import x_cond_comment
8 |
9 | _if_condition_stack = []
10 | _last_if_condition = None
11 |
12 | def _push_condition(cond):
13 | _if_condition_stack.append(cond)
14 | return cond
15 |
16 | def _leave_if():
17 | global _last_if_condition
18 | _last_if_condition = _if_condition_stack.pop()
19 | return []
20 |
21 | class x_html_element(x_base):
22 | def _to_list(self, l):
23 | l.extend((u'<', self.__tag__))
24 | for name, value in self.__attributes__.iteritems():
25 | l.extend((u' ', name, u'="', escape(value), u'"'))
26 | l.append(u'>')
27 |
28 | for child in self.__children__:
29 | x_base._render_child_to_list(child, l)
30 |
31 | l.extend((u'', self.__tag__, u'>'))
32 |
33 | class x_html_element_nochild(x_base):
34 | def append(self, child):
35 | raise Exception('<%s> does not allow children.', self.__tag__)
36 |
37 | def _to_list(self, l):
38 | l.extend((u'<', self.__tag__))
39 | for name, value in self.__attributes__.iteritems():
40 | l.extend((u' ', name, u'="', escape(value), u'"'))
41 | l.append(u' />')
42 |
43 | class x_html_comment(x_base):
44 | __attrs__ = {
45 | 'comment': unicode,
46 | }
47 |
48 | def _to_list(self, l):
49 | pass
50 |
51 | class x_html_decl(x_base):
52 | __attrs__ = {
53 | 'decl': unicode,
54 | }
55 |
56 | def _to_list(self, l):
57 | l.extend((u''))
58 |
59 | class x_html_marked_decl(x_base):
60 | __attrs__ = {
61 | 'decl': unicode,
62 | }
63 |
64 | def _to_list(self, l):
65 | l.extend((u''))
66 |
67 | class x_html_ms_decl(x_base):
68 | __attrs__ = {
69 | 'decl': unicode,
70 | }
71 |
72 | def _to_list(self, l):
73 | l.extend((u''))
74 |
75 | class x_rawhtml(x_html_element_nochild):
76 | __attrs__= {
77 | 'text': unicode,
78 | }
79 |
80 | def _to_list(self, l):
81 | if not isinstance(self.text, unicode):
82 | l.append(unicode(self.text, 'utf8'))
83 | else:
84 | l.append(self.text)
85 |
86 | def rawhtml(text):
87 | return x_rawhtml(text=text)
88 |
89 | class x_frag(x_base):
90 | def _to_list(self, l):
91 | for child in self.__children__:
92 | self._render_child_to_list(child, l)
93 |
94 | class x_a(x_html_element):
95 | __attrs__ = {
96 | 'href': unicode,
97 | 'rel': unicode,
98 | 'type': unicode,
99 | 'name': unicode,
100 | 'target': unicode,
101 | 'download': unicode,
102 | }
103 |
104 | class x_abbr(x_html_element):
105 | pass
106 |
107 | class x_acronym(x_html_element):
108 | pass
109 |
110 | class x_address(x_html_element):
111 | pass
112 |
113 | class x_area(x_html_element_nochild):
114 | __attrs__ = {
115 | 'alt': unicode,
116 | 'coords': unicode,
117 | 'href': unicode,
118 | 'nohref': unicode,
119 | 'target': unicode,
120 | }
121 |
122 | class x_article(x_html_element):
123 | pass
124 |
125 | class x_aside(x_html_element):
126 | pass
127 |
128 | class x_audio(x_html_element):
129 | __attrs__ = {
130 | 'src': unicode
131 | }
132 |
133 | class x_b(x_html_element):
134 | pass
135 |
136 | class x_big(x_html_element):
137 | pass
138 |
139 | class x_blockquote(x_html_element):
140 | __attrs__ = {
141 | 'cite': unicode,
142 | }
143 |
144 | class x_body(x_html_element):
145 | __attrs__ = {
146 | 'contenteditable': unicode,
147 | }
148 |
149 | class x_br(x_html_element_nochild):
150 | pass
151 |
152 | class x_button(x_html_element):
153 | __attrs__ = {
154 | 'disabled': unicode,
155 | 'name': unicode,
156 | 'type': unicode,
157 | 'value': unicode,
158 | }
159 |
160 | class x_canvas(x_html_element):
161 | __attrs__ = {
162 | 'height': unicode,
163 | 'width': unicode,
164 | }
165 |
166 | class x_caption(x_html_element):
167 | pass
168 |
169 | class x_cite(x_html_element):
170 | pass
171 |
172 | class x_code(x_html_element):
173 | pass
174 |
175 | class x_col(x_html_element_nochild):
176 | __attrs__ = {
177 | 'align': unicode,
178 | 'char': unicode,
179 | 'charoff': int,
180 | 'span': int,
181 | 'valign': unicode,
182 | 'width': unicode,
183 | }
184 |
185 | class x_colgroup(x_html_element):
186 | __attrs__ = {
187 | 'align': unicode,
188 | 'char': unicode,
189 | 'charoff': int,
190 | 'span': int,
191 | 'valign': unicode,
192 | 'width': unicode,
193 | }
194 |
195 | class x_datalist(x_html_element):
196 | pass
197 |
198 | class x_dd(x_html_element):
199 | pass
200 |
201 | class x_del(x_html_element):
202 | __attrs__ = {
203 | 'cite': unicode,
204 | 'datetime': unicode,
205 | }
206 |
207 | class x_div(x_html_element):
208 | __attrs__ = {
209 | 'contenteditable': unicode,
210 | }
211 |
212 | class x_dfn(x_html_element):
213 | pass
214 |
215 | class x_dl(x_html_element):
216 | pass
217 |
218 | class x_dt(x_html_element):
219 | pass
220 |
221 | class x_em(x_html_element):
222 | pass
223 |
224 | class x_embed(x_html_element):
225 | __attrs__ = {
226 | 'src': unicode,
227 | 'width': unicode,
228 | 'height': unicode,
229 | 'allowscriptaccess': unicode,
230 | 'allowfullscreen': unicode,
231 | 'name': unicode,
232 | 'type': unicode,
233 | }
234 |
235 | class x_figure(x_html_element):
236 | pass
237 |
238 | class x_figcaption(x_html_element):
239 | pass
240 |
241 | class x_fieldset(x_html_element):
242 | pass
243 |
244 | class x_footer(x_html_element):
245 | pass
246 |
247 | class x_form(x_html_element):
248 | __attrs__ = {
249 | 'action': unicode,
250 | 'accept': unicode,
251 | 'accept-charset': unicode,
252 | 'autocomplete': unicode,
253 | 'enctype': unicode,
254 | 'method': unicode,
255 | 'name': unicode,
256 | 'novalidate': unicode,
257 | 'target': unicode,
258 | }
259 |
260 | class x_form_error(x_base):
261 | __attrs__ = {
262 | 'name': unicode
263 | }
264 |
265 | def _to_list(self, l):
266 | l.extend((u''))
267 |
268 | class x_frame(x_html_element_nochild):
269 | __attrs__ = {
270 | 'frameborder': unicode,
271 | 'longdesc': unicode,
272 | 'marginheight': unicode,
273 | 'marginwidth': unicode,
274 | 'name': unicode,
275 | 'noresize': unicode,
276 | 'scrolling': unicode,
277 | 'src': unicode,
278 | }
279 |
280 | class x_frameset(x_html_element):
281 | __attrs__ = {
282 | 'rows': unicode,
283 | 'cols': unicode,
284 | }
285 |
286 | class x_h1(x_html_element):
287 | pass
288 |
289 | class x_h2(x_html_element):
290 | pass
291 |
292 | class x_h3(x_html_element):
293 | pass
294 |
295 | class x_h4(x_html_element):
296 | pass
297 |
298 | class x_h5(x_html_element):
299 | pass
300 |
301 | class x_h6(x_html_element):
302 | pass
303 |
304 | class x_head(x_html_element):
305 | __attrs__ = {
306 | 'profile': unicode,
307 | }
308 |
309 | class x_header(x_html_element):
310 | pass
311 |
312 | class x_hr(x_html_element_nochild):
313 | pass
314 |
315 | class x_html(x_html_element):
316 | __attrs__ = {
317 | 'content': unicode,
318 | 'scheme': unicode,
319 | 'http-equiv': unicode,
320 | 'xmlns': unicode,
321 | 'xmlns:og': unicode,
322 | 'xmlns:fb': unicode,
323 | }
324 |
325 | class x_i(x_html_element):
326 | pass
327 |
328 | class x_iframe(x_html_element):
329 | __attrs__ = {
330 | 'frameborder': unicode,
331 | 'height': unicode,
332 | 'longdesc': unicode,
333 | 'marginheight': unicode,
334 | 'marginwidth': unicode,
335 | 'name': unicode,
336 | 'sandbox': unicode,
337 | 'scrolling': unicode,
338 | 'src': unicode,
339 | 'width': unicode,
340 | # rk: 'allowTransparency' is not in W3C's HTML spec, but it's supported in most modern browsers.
341 | 'allowtransparency': unicode,
342 | 'allowfullscreen': unicode,
343 | }
344 |
345 | class x_video(x_html_element):
346 | __attrs__ = {
347 | 'autoplay': unicode,
348 | 'controls': unicode,
349 | 'height': unicode,
350 | 'loop': unicode,
351 | 'muted': unicode,
352 | 'poster': unicode,
353 | 'preload': unicode,
354 | 'src': unicode,
355 | 'width': unicode,
356 | }
357 |
358 | class x_img(x_html_element_nochild):
359 | __attrs__ = {
360 | 'alt': unicode,
361 | 'src': unicode,
362 | 'height': unicode,
363 | 'ismap': unicode,
364 | 'longdesc': unicode,
365 | 'usemap': unicode,
366 | 'vspace': unicode,
367 | 'width': unicode,
368 | }
369 |
370 | class x_input(x_html_element_nochild):
371 | __attrs__ = {
372 | 'accept': unicode,
373 | 'align': unicode,
374 | 'alt': unicode,
375 | 'autofocus': unicode,
376 | 'checked': unicode,
377 | 'disabled': unicode,
378 | 'list': unicode,
379 | 'max': unicode,
380 | 'maxlength': unicode,
381 | 'min': unicode,
382 | 'name': unicode,
383 | 'pattern': unicode,
384 | 'placeholder': unicode,
385 | 'readonly': unicode,
386 | 'size': unicode,
387 | 'src': unicode,
388 | 'step': unicode,
389 | 'type': unicode,
390 | 'value': unicode,
391 | 'autocomplete': unicode,
392 | 'autocorrect': unicode,
393 | 'required': unicode,
394 | 'spellcheck': unicode,
395 | 'multiple': unicode,
396 | }
397 |
398 | class x_ins(x_html_element):
399 | __attrs__ = {
400 | 'cite': unicode,
401 | 'datetime': unicode,
402 | }
403 |
404 | class x_kbd(x_html_element):
405 | pass
406 |
407 | class x_label(x_html_element):
408 | __attrs__ = {
409 | 'for': unicode,
410 | }
411 |
412 | class x_legend(x_html_element):
413 | pass
414 |
415 | class x_li(x_html_element):
416 | pass
417 |
418 | class x_link(x_html_element_nochild):
419 | __attrs__ = {
420 | 'charset': unicode,
421 | 'href': unicode,
422 | 'hreflang': unicode,
423 | 'media': unicode,
424 | 'rel': unicode,
425 | 'rev': unicode,
426 | 'sizes': unicode,
427 | 'target': unicode,
428 | 'type': unicode,
429 | }
430 |
431 | class x_main(x_html_element):
432 | # we are not enforcing the w3 spec of one and only one main element on the
433 | # page
434 | __attrs__ = {
435 | 'role': unicode,
436 | }
437 |
438 | class x_map(x_html_element):
439 | __attrs__ = {
440 | 'name': unicode,
441 | }
442 |
443 | class x_meta(x_html_element_nochild):
444 | __attrs__ = {
445 | 'content': unicode,
446 | 'http-equiv': unicode,
447 | 'name': unicode,
448 | 'property': unicode,
449 | 'scheme': unicode,
450 | 'charset': unicode,
451 | }
452 |
453 | class x_nav(x_html_element):
454 | pass
455 |
456 | class x_noframes(x_html_element):
457 | pass
458 |
459 | class x_noscript(x_html_element):
460 | pass
461 |
462 | class x_object(x_html_element):
463 | __attrs__ = {
464 | 'align': unicode,
465 | 'archive': unicode,
466 | 'border': unicode,
467 | 'classid': unicode,
468 | 'codebase': unicode,
469 | 'codetype': unicode,
470 | 'data': unicode,
471 | 'declare': unicode,
472 | 'height': unicode,
473 | 'hspace': unicode,
474 | 'name': unicode,
475 | 'standby': unicode,
476 | 'type': unicode,
477 | 'usemap': unicode,
478 | 'vspace': unicode,
479 | 'width': unicode,
480 | }
481 |
482 | class x_ol(x_html_element):
483 | pass
484 |
485 | class x_optgroup(x_html_element):
486 | __attrs__ = {
487 | 'disabled': unicode,
488 | 'label': unicode,
489 | }
490 |
491 | class x_option(x_html_element):
492 | __attrs__ = {
493 | 'disabled': unicode,
494 | 'label': unicode,
495 | 'selected': unicode,
496 | 'value': unicode,
497 | }
498 |
499 | class x_p(x_html_element):
500 | pass
501 |
502 | class x_param(x_html_element):
503 | __attrs__ = {
504 | 'name': unicode,
505 | 'type': unicode,
506 | 'value': unicode,
507 | 'valuetype': unicode,
508 | }
509 |
510 | class x_pre(x_html_element):
511 | pass
512 |
513 | class x_progress(x_html_element):
514 | __attrs__ = {
515 | 'max': int,
516 | 'value': int,
517 | }
518 |
519 | class x_q(x_html_element):
520 | __attrs__ = {
521 | 'cite': unicode,
522 | }
523 |
524 | class x_samp(x_html_element):
525 | pass
526 |
527 | class x_script(x_html_element):
528 | __attrs__ = {
529 | 'async': unicode,
530 | 'charset': unicode,
531 | 'defer': unicode,
532 | 'src': unicode,
533 | 'type': unicode,
534 | }
535 |
536 | class x_section(x_html_element):
537 | pass
538 |
539 | class x_select(x_html_element):
540 | __attrs__ = {
541 | 'disabled': unicode,
542 | 'multiple': unicode,
543 | 'name': unicode,
544 | 'size': unicode,
545 | 'required': unicode,
546 | }
547 |
548 | class x_small(x_html_element):
549 | pass
550 |
551 | class x_span(x_html_element):
552 | pass
553 |
554 | class x_strong(x_html_element):
555 | pass
556 |
557 | class x_style(x_html_element):
558 | __attrs__ = {
559 | 'media': unicode,
560 | 'type': unicode,
561 | }
562 |
563 | class x_sub(x_html_element):
564 | pass
565 |
566 | class x_sup(x_html_element):
567 | pass
568 |
569 | class x_table(x_html_element):
570 | __attrs__ = {
571 | 'border': unicode,
572 | 'cellpadding': unicode,
573 | 'cellspacing': unicode,
574 | 'frame': unicode,
575 | 'rules': unicode,
576 | 'summary': unicode,
577 | 'width': unicode,
578 | }
579 |
580 | class x_tbody(x_html_element):
581 | __attrs__ = {
582 | 'align': unicode,
583 | 'char': unicode,
584 | 'charoff': unicode,
585 | 'valign': unicode,
586 | }
587 |
588 | class x_td(x_html_element):
589 | __attrs__ = {
590 | 'abbr': unicode,
591 | 'align': unicode,
592 | 'axis': unicode,
593 | 'char': unicode,
594 | 'charoff': unicode,
595 | 'colspan': unicode,
596 | 'headers': unicode,
597 | 'rowspan': unicode,
598 | 'scope': unicode,
599 | 'valign': unicode,
600 | }
601 |
602 | class x_textarea(x_html_element):
603 | __attrs__ = {
604 | 'cols': unicode,
605 | 'rows': unicode,
606 | 'disabled': unicode,
607 | 'placeholder': unicode,
608 | 'name': unicode,
609 | 'readonly': unicode,
610 | 'autocorrect': unicode,
611 | 'autocomplete': unicode,
612 | 'autocapitalize': unicode,
613 | 'spellcheck': unicode,
614 | 'autofocus': unicode,
615 | 'required': unicode,
616 | }
617 |
618 | class x_tfoot(x_html_element):
619 | __attrs__ = {
620 | 'align': unicode,
621 | 'char': unicode,
622 | 'charoff': unicode,
623 | 'valign': unicode,
624 | }
625 |
626 | class x_th(x_html_element):
627 | __attrs__ = {
628 | 'abbr': unicode,
629 | 'align': unicode,
630 | 'axis': unicode,
631 | 'char': unicode,
632 | 'charoff': unicode,
633 | 'colspan': unicode,
634 | 'rowspan': unicode,
635 | 'scope': unicode,
636 | 'valign': unicode,
637 | }
638 |
639 | class x_thead(x_html_element):
640 | __attrs__ = {
641 | 'align': unicode,
642 | 'char': unicode,
643 | 'charoff': unicode,
644 | 'valign': unicode,
645 | }
646 |
647 | class x_time(x_html_element):
648 | __attrs__ = {
649 | 'datetime': unicode,
650 | }
651 |
652 | class x_title(x_html_element):
653 | pass
654 |
655 | class x_tr(x_html_element):
656 | __attrs__ = {
657 | 'align': unicode,
658 | 'char': unicode,
659 | 'charoff': unicode,
660 | 'valign': unicode,
661 | }
662 |
663 | class x_tt(x_html_element):
664 | pass
665 |
666 | class x_u(x_html_element):
667 | pass
668 |
669 | class x_ul(x_html_element):
670 | pass
671 |
672 | class x_var(x_html_element):
673 | pass
674 |
--------------------------------------------------------------------------------
/pyxl/rss.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from pyxl.utils import escape
4 | from pyxl.base import x_base
5 |
6 | class x_rss_element(x_base):
7 | def _to_list(self, l):
8 | l.extend((u'<', self.__tag__))
9 | for name, value in self.__attributes__.iteritems():
10 | name, value = self._handle_attribute(name, value)
11 | l.extend((u' ', name, u'="', escape(value), u'"'))
12 | l.append(u'>')
13 |
14 | for child in self.__children__:
15 | x_base._render_child_to_list(child, l)
16 |
17 | l.extend((u'', self.__tag__, u'>'))
18 |
19 | def _handle_attribute(self, name, value):
20 | return (name, value)
21 |
22 | class x_rss_decl_standalone(x_base):
23 | def _to_list(self, l):
24 | l.append('')
25 |
26 | class x_rss(x_rss_element):
27 | __attrs__ = {
28 | 'version':unicode,
29 | 'uses-dublin-core':bool
30 | }
31 |
32 | def _handle_attribute(self, name, value):
33 | if name == 'uses-dublin-core' and value:
34 | return ('xmlns:dc', 'http://purl.org/dc/elements/1.1/')
35 | else:
36 | return (name, value)
37 |
38 | class x_channel(x_rss_element):
39 | pass
40 |
41 | class x_title(x_rss_element):
42 | pass
43 |
44 | class x_link(x_rss_element):
45 | pass
46 |
47 | class x_description(x_rss_element):
48 | pass
49 |
50 | class x_language(x_rss_element):
51 | pass
52 |
53 | class x_rss_date_element(x_base):
54 | __attrs__ = {
55 | 'date':datetime.datetime
56 | }
57 |
58 | def _to_list(self, l):
59 | l.extend((u'<', self.__tag__, '>'))
60 | l.append(unicode(self.date.strftime('%a, %d %b %Y %H:%M:%S GMT')))
61 | l.extend((u'', self.__tag__, u'>'))
62 |
63 | class x_lastBuildDate(x_rss_date_element):
64 | pass
65 |
66 | class x_pubDate(x_rss_date_element):
67 | pass
68 |
69 | class x_ttl(x_rss_element):
70 | pass
71 |
72 | class x_item(x_rss_element):
73 | pass
74 |
75 | class x_guid(x_rss_element):
76 | __attrs__ = {
77 | 'is-perma-link':bool
78 | }
79 |
80 | def _handle_attribute(self, name, value):
81 | # This is needed because pyxl doesn't support mixed case attribute names.
82 | if name == 'is-perma-link':
83 | return ('isPermaLink', 'true' if value else 'false')
84 | else:
85 | return (name, value)
86 |
87 | class x_creator(x_rss_element):
88 | def _to_list(self, l):
89 | l.append(u'')
90 | for child in self.__children__:
91 | x_base._render_child_to_list(child, l)
92 | l.append(u'')
93 |
--------------------------------------------------------------------------------
/pyxl/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
--------------------------------------------------------------------------------
/pyxl/scripts/parse_file.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | from pyxl.codec.tokenizer import pyxl_tokenize, pyxl_untokenize
5 |
6 | f = open(sys.argv[1], 'r')
7 | print pyxl_untokenize(pyxl_tokenize(f.readline)),
8 |
--------------------------------------------------------------------------------
/pyxl/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import xml.sax.saxutils
4 |
5 | xml_escape = xml.sax.saxutils.escape
6 | xml_unescape = xml.sax.saxutils.unescape
7 | escape_other = {
8 | '"': '"',
9 | }
10 | unescape_other = {
11 | '"': '"',
12 | }
13 |
14 | def escape(obj):
15 | return xml_escape(unicode(obj), escape_other)
16 |
17 | def unescape(obj):
18 | return xml_unescape(unicode(obj), unescape_other)
19 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import distutils.core
4 | import sys
5 |
6 | version = "1.0"
7 |
8 | distutils.core.setup(
9 | name="pyxl",
10 | version=version,
11 | packages = ["pyxl", "pyxl.codec", "pyxl.scripts", "pyxl.examples"],
12 | author="Akhil Wable",
13 | author_email="akhil.wable@gmail.com",
14 | url="http://github.com/awable/pyxl",
15 | download_url="http://github.com/downloads/awable/pyxl/pyxl-%s.tar.gz" % version,
16 | license="http://www.apache.org/licenses/LICENSE-2.0",
17 | description="""
18 | Pyxl is an open source package that extends Python to support inline HTML. It converts
19 | HTML fragments into valid Python expressions, and is meant as a replacement for traditional
20 | python templating systems like Mako or Cheetah. It automatically escapes data, enforces
21 | correct markup and makes it easier to write reusable and well structured UI code.
22 | Pyxl was inspired by the XHP project at Facebook.
23 | """
24 | )
25 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import pyxl.codec.register
2 |
--------------------------------------------------------------------------------
/tests/error_cases/if_1.py.txt:
--------------------------------------------------------------------------------
1 | # coding: pyxl
2 |
3 | a = (
4 | foo
5 | this is incorrect!
6 | bar
7 | )
8 |
--------------------------------------------------------------------------------
/tests/error_cases/if_2.py.txt:
--------------------------------------------------------------------------------
1 | # coding: pyxl
2 |
3 | a = (
4 | foo
5 | bar
6 | baz
7 | )
8 |
--------------------------------------------------------------------------------
/tests/error_cases/if_3.py.txt:
--------------------------------------------------------------------------------
1 | # coding: pyxl
2 |
3 | a = (
4 | foo
5 | bar
6 | )
7 |
--------------------------------------------------------------------------------
/tests/test_attr_name_case.py:
--------------------------------------------------------------------------------
1 | # coding: pyxl
2 | from pyxl import html
3 | def test():
4 | assert str() == ''
5 |
--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
1 | # coding: pyxl
2 | import unittest2
3 | from pyxl import html
4 | from pyxl.base import PyxlException, x_base
5 |
6 | class PyxlTests(unittest2.TestCase):
7 |
8 | def test_basics(self):
9 | self.assertEqual(.to_string(), '')
10 | self.assertEqual(.to_string(), '')
11 | self.assertEqual(.to_string(), '')
12 | self.assertEqual(
) == ''
7 |
--------------------------------------------------------------------------------
/vim/ftdetect/pyxl.vim:
--------------------------------------------------------------------------------
1 | function! Detect_pyxl()
2 | let re = 'coding[:=]\s*pyxl\>'
3 | if getline(1) =~ re || getline(2) =~ re
4 | set ft=pyxl
5 | endif
6 | endfunction
7 |
8 | augroup filetypedetect
9 | au BufRead,BufNewFile * call Detect_pyxl()
10 | augroup END
11 |
--------------------------------------------------------------------------------
/vim/indent/pyxl.vim:
--------------------------------------------------------------------------------
1 | " Pyxl indent file
2 | "
3 | " This file is the unholy spawn of a python indent file from the vim script
4 | " database and the standard, html, and xml indent files.
5 | "
6 | " BUG: atrociously slow; takes about four seconds to reindent 200 lines.
7 | "
8 | " Language: Pyxl
9 | " Maintainer: Josiah Boning
10 | " Last Change: 2012 Sep 16
11 | " Credits:
12 | " Python credits:
13 | " Eric Mc Sween
14 | " David Bustos
15 | " HTML and XML credits:
16 | " Johannes Zellner
17 |
18 | " Only load this indent file when no other was loaded.
19 | if exists("b:did_indent")
20 | finish
21 | endif
22 | let b:did_indent = 1
23 |
24 | setlocal expandtab
25 | setlocal nolisp
26 | setlocal autoindent
27 | setlocal indentexpr=GetPyxlIndent(v:lnum)
28 | setlocal indentkeys=!^F,o,O,<:>,0),0],0},=elif,=except,<>>,<<>,/,{,},*
29 |
30 | let s:maxoff = 50
31 |
32 | " Find backwards the closest open parenthesis/bracket/brace.
33 | function! s:SearchParensPair()
34 | let line = line('.')
35 | let col = col('.')
36 |
37 | " Skip strings and comments and don't look too far
38 | let skip = "line('.') < " . (line - s:maxoff) . " ? dummy :" .
39 | \ 'synIDattr(synID(line("."), col("."), 0), "name") =~? ' .
40 | \ '"string\\|comment"'
41 |
42 | " Search for parentheses
43 | call cursor(line, col)
44 | let parlnum = searchpair('(', '', ')', 'bW', skip)
45 | let parcol = col('.')
46 |
47 | " Search for brackets
48 | call cursor(line, col)
49 | let par2lnum = searchpair('\[', '', '\]', 'bW', skip)
50 | let par2col = col('.')
51 |
52 | " Search for braces
53 | call cursor(line, col)
54 | let par3lnum = searchpair('{', '', '}', 'bW', skip)
55 | let par3col = col('.')
56 |
57 | " Get the closest match
58 | if par2lnum > parlnum || (par2lnum == parlnum && par2col > parcol)
59 | let parlnum = par2lnum
60 | let parcol = par2col
61 | endif
62 | if par3lnum > parlnum || (par3lnum == parlnum && par3col > parcol)
63 | let parlnum = par3lnum
64 | let parcol = par3col
65 | endif
66 |
67 | " Put the cursor on the match
68 | if parlnum > 0
69 | call cursor(parlnum, parcol)
70 | endif
71 | return parlnum
72 | endfunction
73 |
74 | " Find the start of a multi-line statement
75 | function! s:StatementStart(lnum)
76 | let lnum = a:lnum
77 | while 1
78 | if getline(lnum - 1) =~ '\\$'
79 | let lnum = lnum - 1
80 | else
81 | call cursor(lnum, 1)
82 | let maybe_lnum = s:SearchParensPair()
83 | if maybe_lnum < 1
84 | return lnum
85 | else
86 | let lnum = maybe_lnum
87 | endif
88 | endif
89 | endwhile
90 | endfunction
91 |
92 | " Find the block starter that matches the current line
93 | function! s:BlockStarter(lnum, block_start_re)
94 | let lnum = a:lnum
95 | let maxindent = 10000 " whatever
96 | while lnum > 1
97 | let lnum = prevnonblank(lnum - 1)
98 | if indent(lnum) < maxindent
99 | if getline(lnum) =~ a:block_start_re
100 | return lnum
101 | else
102 | let maxindent = indent(lnum)
103 | " It's not worth going further if we reached the top level
104 | if maxindent == 0
105 | return -1
106 | endif
107 | endif
108 | endif
109 | endwhile
110 | return -1
111 | endfunction
112 |
113 | let s:cpo_save = &cpo
114 | set cpo-=C
115 |
116 | if !exists('b:xml_indent_open')
117 | let b:xml_indent_open = '.\{-}<\a'
118 | " pre tag, e.g.
119 | " let b:xml_indent_open = '.\{-}<[/]\@!\(address\)\@!'
120 | endif
121 |
122 | if !exists('b:xml_indent_close')
123 | let b:xml_indent_close = '.\{-}'
124 | " end pre tag, e.g.
125 | " let b:xml_indent_close = '.\{-}\(address\)\@!'
126 | endif
127 |
128 | fun! XmlIndentWithPattern(line, pat)
129 | let s = substitute('x'.a:line, a:pat, "\1", 'g')
130 | return strlen(substitute(s, "[^\1].*$", '', ''))
131 | endfun
132 |
133 | " [-- return the sum of indents of a:lnum --]
134 | fun! XmlIndentSum(lnum, style)
135 | let line = getline(a:lnum)
136 | if a:style == match(line, '^\s*')
137 | return (
138 | \ (XmlIndentWithPattern(line, b:xml_indent_open)
139 | \ - XmlIndentWithPattern(line, b:xml_indent_close)
140 | \ - XmlIndentWithPattern(line, '.\{-}/>')))
141 | else
142 | return 0
143 | endif
144 | endfun
145 |
146 | fun! XmlIndentSumLines(startnum, endnum, style)
147 | let sum = 0
148 | for lnum in range(a:startnum, a:endnum)
149 | let sum = sum + XmlIndentSum(lnum, a:style)
150 | endfor
151 | return sum
152 | endfun
153 |
154 | fun! GetMarkupIndent(lnum)
155 | " Find a non-empty line above the current line.
156 | let lnum = prevnonblank(a:lnum - 1)
157 |
158 | " Hit the start of the file, use zero indent.
159 | if lnum == 0
160 | return 0
161 | endif
162 |
163 | let restore_ic = &ic
164 | setlocal ic " ignore case
165 |
166 | " [-- special handling for
: no indenting --]
167 | if getline(a:lnum) =~ '\c
'
168 | \ || 0 < searchpair('\c
', '', '\c
', 'nWb')
169 | \ || 0 < searchpair('\c
', '', '\c
', 'nW')
170 | " we're in a line with or inside
...
171 | if restore_ic == 0
172 | setlocal noic
173 | endif
174 | return -1
175 | endif
176 |
177 | "" The javascript indentation doesn't really work, since curly braces are
178 | "" handled by the python indentation.
179 |
180 | "" [-- special handling for : use cindent --]
181 | "let js = ', 05 Jun 2006
185 | "" ZDR: This needs to be an AND (we are 'after the start of the pair' AND
186 | "" we are 'before the end of the pair'). Otherwise, indentation
187 | "" before the start of the script block will be affected; the end of
188 | "" the pair will still match if we are before the beginning of the
189 | "" pair.
190 | ""
191 | "if 0 < searchpair(js, '', '', 'nWb')
192 | "\ && 0 < searchpair(js, '', '', 'nW')
193 | "" we're inside javascript
194 | "if getline(lnum) !~ js && getline(a:lnum) != ''
195 | " if restore_ic == 0
196 | " setlocal noic
197 | " endif
198 | " return cindent(a:lnum)
199 | "endif
200 | "endif
201 |
202 | if getline(lnum) =~ '\c'
203 | " line before the current line a:lnum contains
204 | " a closing . --> search for line before
205 | " starting
to restore the indent.
206 | let preline = prevnonblank(search('\c
', 'bW') - 1)
207 | if preline > 0
208 | if restore_ic == 0
209 | setlocal noic
210 | endif
211 | return indent(preline)
212 | endif
213 | endif
214 |
215 | if restore_ic == 0
216 | setlocal noic
217 | endif
218 |
219 | let ind = XmlIndentSum(lnum, -1)
220 | let ind = ind + XmlIndentSum(a:lnum, 0)
221 |
222 | if ind > 0
223 | return indent(lnum) + &sw
224 | elseif ind < 0
225 | return indent(lnum) - &sw
226 | else
227 | return indent(lnum)
228 | endif
229 | endfun
230 |
231 | function! GetPyxlIndent(lnum)
232 |
233 | " First line has indent 0
234 | if a:lnum == 1
235 | return 0
236 | endif
237 |
238 | " Examine previous line
239 | let plnum = a:lnum - 1
240 | let pline = getline(plnum)
241 | let sslnum = s:StatementStart(plnum)
242 |
243 | " If we can find an open parenthesis/bracket/brace, line up with it, then
244 | " apply any XML indentation.
245 | call cursor(a:lnum, 1)
246 | let parlnum = s:SearchParensPair()
247 | if parlnum > 0
248 | let parcol = col('.')
249 | let closing_paren = match(getline(a:lnum), '^\s*[])}]') != -1
250 | if match(getline(parlnum), '[([{]\s*$', parcol - 1) != -1
251 | if closing_paren
252 | return indent(parlnum)
253 | else
254 | if plnum == parlnum
255 | return indent(parlnum) + &shiftwidth
256 | else
257 | let ind = XmlIndentSumLines(parlnum, a:lnum, -1)
258 | let ind = ind + XmlIndentSumLines(parlnum, a:lnum, 0)
259 | if ind > 0
260 | return GetMarkupIndent(a:lnum)
261 | else
262 | return indent(parlnum) + &sw
263 | end
264 | endif
265 | endif
266 | else
267 | if closing_paren
268 | return parcol - 1
269 | else
270 | if plnum == parlnum
271 | let ind = XmlIndentSum(plnum, -1)
272 | let ind = ind + XmlIndentSum(a:lnum, 0)
273 | return parcol + (&sw * ind)
274 | else
275 | let ind = XmlIndentSumLines(parlnum, a:lnum, -1)
276 | let ind = ind + XmlIndentSumLines(parlnum, a:lnum, 0)
277 | if ind > 0
278 | return GetMarkupIndent(a:lnum)
279 | else
280 | return parcol
281 | endif
282 | endif
283 | endif
284 | endif
285 | endif
286 |
287 | " Examine this line
288 | let thisline = getline(a:lnum)
289 | let thisindent = indent(a:lnum)
290 |
291 | " If the line starts with 'elif' or 'else', line up with 'if' or 'elif'
292 | if thisline =~ '^\s*\(elif\|else\)\>'
293 | let bslnum = s:BlockStarter(a:lnum, '^\s*\(if\|elif\)\>')
294 | if bslnum > 0
295 | return indent(bslnum)
296 | else
297 | return -1
298 | endif
299 | endif
300 |
301 | " If the line starts with 'except' or 'finally', line up with 'try'
302 | " or 'except'
303 | if thisline =~ '^\s*\(except\|finally\)\>'
304 | let bslnum = s:BlockStarter(a:lnum, '^\s*\(try\|except\)\>')
305 | if bslnum > 0
306 | return indent(bslnum)
307 | else
308 | return -1
309 | endif
310 | endif
311 |
312 | " If the previous line is blank, keep the same indentation
313 | if pline =~ '^\s*$'
314 | return -1
315 | endif
316 |
317 | " If this line is explicitly joined, try to find an indentation that looks
318 | " good.
319 | if pline =~ '\\$'
320 | let compound_statement = '^\s*\(if\|while\|for\s.*\sin\|except\)\s*'
321 | let maybe_indent = matchend(getline(sslnum), compound_statement)
322 | if maybe_indent != -1
323 | return maybe_indent
324 | else
325 | return indent(sslnum) + &sw * 2
326 | endif
327 | endif
328 |
329 | " If the previous line ended with a colon, indent relative to
330 | " statement start.
331 | if pline =~ ':\s*$'
332 | return indent(sslnum) + &sw
333 | endif
334 |
335 | " If the previous line was a stop-execution statement or a pass
336 | if getline(sslnum) =~ '^\s*\(break\|continue\|raise\|return\|pass\)\>'
337 | " See if the user has already dedented
338 | if indent(a:lnum) > indent(sslnum) - &sw
339 | " If not, recommend one dedent
340 | return indent(sslnum) - &sw
341 | endif
342 | " Otherwise, trust the user
343 | return -1
344 | endif
345 |
346 | " If the previous line closed a statement, match the beginning of that
347 | " statement.
348 | if pline =~ '[])}]$'
349 | return indent(sslnum)
350 | endif
351 |
352 | "" In all other cases, line up with the start of the previous statement.
353 | return indent(sslnum)
354 |
355 | endfun
356 |
357 | let &cpo = s:cpo_save
358 | unlet s:cpo_save
359 |
360 | " for debugging
361 | "map yy :echo GetPyxlIndent(line('.'))
362 |
--------------------------------------------------------------------------------
/vim/syntax/pyxl.vim:
--------------------------------------------------------------------------------
1 | " Pyxl syntax file
2 | "
3 | " This file is the unholy spawn of the standard python and html syntax files.
4 | "
5 | " Language: Python with Pyxl support
6 | " Maintainer: Josiah Boning
7 | " Last Change: 2012 Sep 04
8 | " Credits:
9 | "
10 | " Python credits:
11 | " Neil Schemenauer
12 | " Zvezdan Petkovic
13 | " Neil Schemenauer
14 | " Dmitry Vasiliev
15 | "
16 | " This version is a major rewrite by Zvezdan Petkovic.
17 | "
18 | " - introduced highlighting of doctests
19 | " - updated keywords, built-ins, and exceptions
20 | " - corrected regular expressions for
21 | "
22 | " * functions
23 | " * decorators
24 | " * strings
25 | " * escapes
26 | " * numbers
27 | " * space error
28 | "
29 | " - corrected synchronization
30 | " - more highlighting is ON by default, except
31 | " - space error highlighting is OFF by default
32 | "
33 | " HTML credits:
34 | " Claudio Fleiner
35 | "
36 | "
37 | "
38 | "
39 | " Optional highlighting can be controlled using these variables.
40 | "
41 | " let python_no_builtin_highlight = 1
42 | " let python_no_doctest_code_highlight = 1
43 | " let python_no_doctest_highlight = 1
44 | " let python_no_exception_highlight = 1
45 | " let python_no_number_highlight = 1
46 | " let python_space_error_highlight = 1
47 | "
48 | " All the options above can be switched on together.
49 | "
50 | " let python_highlight_all = 1
51 | "
52 |
53 | " For version 5.x: Clear all syntax items.
54 | " For version 6.x: Quit when a syntax file was already loaded.
55 | if version < 600
56 | syntax clear
57 | elseif exists("b:current_syntax")
58 | finish
59 | endif
60 |
61 | if !exists("main_syntax")
62 | let main_syntax = 'pyxl'
63 | endif
64 |
65 |
66 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
67 | " Python
68 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
69 |
70 | " We need nocompatible mode in order to continue lines with backslashes.
71 | " Original setting will be restored.
72 | let s:cpo_save = &cpo
73 | set cpo&vim
74 |
75 | " Keep Python keywords in alphabetical order inside groups for easy
76 | " comparison with the table in the 'Python Language Reference'
77 | " http://docs.python.org/reference/lexical_analysis.html#keywords.
78 | " Groups are in the order presented in NAMING CONVENTIONS in syntax.txt.
79 | " Exceptions come last at the end of each group (class and def below).
80 | "
81 | " Keywords 'with' and 'as' are new in Python 2.6
82 | " (use 'from __future__ import with_statement' in Python 2.5).
83 | "
84 | " Some compromises had to be made to support both Python 3.0 and 2.6.
85 | " We include Python 3.0 features, but when a definition is duplicated,
86 | " the last definition takes precedence.
87 | "
88 | " - 'False', 'None', and 'True' are keywords in Python 3.0 but they are
89 | " built-ins in 2.6 and will be highlighted as built-ins below.
90 | " - 'exec' is a built-in in Python 3.0 and will be highlighted as
91 | " built-in below.
92 | " - 'nonlocal' is a keyword in Python 3.0 and will be highlighted.
93 | " - 'print' is a built-in in Python 3.0 and will be highlighted as
94 | " built-in below (use 'from __future__ import print_function' in 2.6)
95 | "
96 | syn keyword pythonStatement False, None, True
97 | syn keyword pythonStatement as assert break continue del exec global
98 | syn keyword pythonStatement lambda nonlocal pass print return with yield
99 | syn keyword pythonStatement class def nextgroup=pythonFunction skipwhite
100 | syn keyword pythonConditional elif else if
101 | syn keyword pythonRepeat for while
102 | syn keyword pythonOperator and in is not or
103 | syn keyword pythonException except finally raise try
104 | syn keyword pythonInclude from import
105 |
106 | " Decorators (new in Python 2.4)
107 | syn match pythonDecorator "@" display nextgroup=pythonFunction skipwhite
108 | " The zero-length non-grouping match before the function name is
109 | " extremely important in pythonFunction. Without it, everything is
110 | " interpreted as a function inside the contained environment of
111 | " doctests.
112 | " A dot must be allowed because of @MyClass.myfunc decorators.
113 | syn match pythonFunction
114 | \ "\%(\%(def\s\|class\s\|@\)\s*\)\@<=\h\%(\w\|\.\)*" contained
115 |
116 | syn match pythonComment "#.*$" contains=pythonTodo,@Spell
117 | syn keyword pythonTodo FIXME NOTE NOTES TODO XXX contained
118 |
119 | " Triple-quoted strings can contain doctests.
120 | syn region pythonString
121 | \ start=+[uU]\=\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1"
122 | \ contains=pythonEscape,@Spell
123 | syn region pythonString
124 | \ start=+[uU]\=\z('''\|"""\)+ end="\z1" keepend
125 | \ contains=pythonEscape,pythonSpaceError,pythonDoctest,@Spell
126 | syn region pythonRawString
127 | \ start=+[uU]\=[rR]\z(['"]\)+ end="\z1" skip="\\\\\|\\\z1"
128 | \ contains=@Spell
129 | syn region pythonRawString
130 | \ start=+[uU]\=[rR]\z('''\|"""\)+ end="\z1" keepend
131 | \ contains=pythonSpaceError,pythonDoctest,@Spell
132 |
133 | syn match pythonEscape +\\[abfnrtv'"\\]+ contained
134 | syn match pythonEscape "\\\o\{1,3}" contained
135 | syn match pythonEscape "\\x\x\{2}" contained
136 | syn match pythonEscape "\%(\\u\x\{4}\|\\U\x\{8}\)" contained
137 | " Python allows case-insensitive Unicode IDs: http://www.unicode.org/charts/
138 | syn match pythonEscape "\\N{\a\+\%(\s\a\+\)*}" contained
139 | syn match pythonEscape "\\$"
140 |
141 | if exists("python_highlight_all")
142 | if exists("python_no_builtin_highlight")
143 | unlet python_no_builtin_highlight
144 | endif
145 | if exists("python_no_doctest_code_highlight")
146 | unlet python_no_doctest_code_highlight
147 | endif
148 | if exists("python_no_doctest_highlight")
149 | unlet python_no_doctest_highlight
150 | endif
151 | if exists("python_no_exception_highlight")
152 | unlet python_no_exception_highlight
153 | endif
154 | if exists("python_no_number_highlight")
155 | unlet python_no_number_highlight
156 | endif
157 | let python_space_error_highlight = 1
158 | endif
159 |
160 | " It is very important to understand all details before changing the
161 | " regular expressions below or their order.
162 | " The word boundaries are *not* the floating-point number boundaries
163 | " because of a possible leading or trailing decimal point.
164 | " The expressions below ensure that all valid number literals are
165 | " highlighted, and invalid number literals are not. For example,
166 | "
167 | " - a decimal point in '4.' at the end of a line is highlighted,
168 | " - a second dot in 1.0.0 is not highlighted,
169 | " - 08 is not highlighted,
170 | " - 08e0 or 08j are highlighted,
171 | "
172 | " and so on, as specified in the 'Python Language Reference'.
173 | " http://docs.python.org/reference/lexical_analysis.html#numeric-literals
174 | if !exists("python_no_number_highlight")
175 | " numbers (including longs and complex)
176 | syn match pythonNumber "\<0[oO]\=\o\+[Ll]\=\>"
177 | syn match pythonNumber "\<0[xX]\x\+[Ll]\=\>"
178 | syn match pythonNumber "\<0[bB][01]\+[Ll]\=\>"
179 | syn match pythonNumber "\<\%([1-9]\d*\|0\)[Ll]\=\>"
180 | syn match pythonNumber "\<\d\+[jJ]\>"
181 | syn match pythonNumber "\<\d\+[eE][+-]\=\d\+[jJ]\=\>"
182 | syn match pythonNumber
183 | \ "\<\d\+\.\%([eE][+-]\=\d\+\)\=[jJ]\=\%(\W\|$\)\@="
184 | syn match pythonNumber
185 | \ "\%(^\|\W\)\@<=\d*\.\d\+\%([eE][+-]\=\d\+\)\=[jJ]\=\>"
186 | endif
187 |
188 | " Group the built-ins in the order in the 'Python Library Reference' for
189 | " easier comparison.
190 | " http://docs.python.org/library/constants.html
191 | " http://docs.python.org/library/functions.html
192 | " http://docs.python.org/library/functions.html#non-essential-built-in-functions
193 | " Python built-in functions are in alphabetical order.
194 | if !exists("python_no_builtin_highlight")
195 | " built-in constants
196 | " 'False', 'True', and 'None' are also reserved words in Python 3.0
197 | syn keyword pythonBuiltin False True None
198 | syn keyword pythonBuiltin NotImplemented Ellipsis __debug__
199 | " built-in functions
200 | syn keyword pythonBuiltin abs all any bin bool chr classmethod
201 | syn keyword pythonBuiltin compile complex delattr dict dir divmod
202 | syn keyword pythonBuiltin enumerate eval filter float format
203 | syn keyword pythonBuiltin frozenset getattr globals hasattr hash
204 | syn keyword pythonBuiltin help hex id input int isinstance
205 | syn keyword pythonBuiltin issubclass iter len list locals map max
206 | syn keyword pythonBuiltin min next object oct open ord pow print
207 | syn keyword pythonBuiltin property range repr reversed round set
208 | syn keyword pythonBuiltin setattr slice sorted staticmethod str
209 | syn keyword pythonBuiltin sum super tuple type vars zip __import__
210 | " Python 2.6 only
211 | syn keyword pythonBuiltin basestring callable cmp execfile file
212 | syn keyword pythonBuiltin long raw_input reduce reload unichr
213 | syn keyword pythonBuiltin unicode xrange
214 | " Python 3.0 only
215 | syn keyword pythonBuiltin ascii bytearray bytes exec memoryview
216 | " non-essential built-in functions; Python 2.6 only
217 | syn keyword pythonBuiltin apply buffer coerce intern
218 | endif
219 |
220 | " From the 'Python Library Reference' class hierarchy at the bottom.
221 | " http://docs.python.org/library/exceptions.html
222 | if !exists("python_no_exception_highlight")
223 | " builtin base exceptions (only used as base classes for other exceptions)
224 | syn keyword pythonExceptions BaseException Exception
225 | syn keyword pythonExceptions ArithmeticError EnvironmentError
226 | syn keyword pythonExceptions LookupError
227 | " builtin base exception removed in Python 3.0
228 | syn keyword pythonExceptions StandardError
229 | " builtin exceptions (actually raised)
230 | syn keyword pythonExceptions AssertionError AttributeError BufferError
231 | syn keyword pythonExceptions EOFError FloatingPointError GeneratorExit
232 | syn keyword pythonExceptions IOError ImportError IndentationError
233 | syn keyword pythonExceptions IndexError KeyError KeyboardInterrupt
234 | syn keyword pythonExceptions MemoryError NameError NotImplementedError
235 | syn keyword pythonExceptions OSError OverflowError ReferenceError
236 | syn keyword pythonExceptions RuntimeError StopIteration SyntaxError
237 | syn keyword pythonExceptions SystemError SystemExit TabError TypeError
238 | syn keyword pythonExceptions UnboundLocalError UnicodeError
239 | syn keyword pythonExceptions UnicodeDecodeError UnicodeEncodeError
240 | syn keyword pythonExceptions UnicodeTranslateError ValueError VMSError
241 | syn keyword pythonExceptions WindowsError ZeroDivisionError
242 | " builtin warnings
243 | syn keyword pythonExceptions BytesWarning DeprecationWarning FutureWarning
244 | syn keyword pythonExceptions ImportWarning PendingDeprecationWarning
245 | syn keyword pythonExceptions RuntimeWarning SyntaxWarning UnicodeWarning
246 | syn keyword pythonExceptions UserWarning Warning
247 | endif
248 |
249 | if exists("python_space_error_highlight")
250 | " trailing whitespace
251 | syn match pythonSpaceError display excludenl "\s\+$"
252 | " mixed tabs and spaces
253 | syn match pythonSpaceError display " \+\t"
254 | syn match pythonSpaceError display "\t\+ "
255 | endif
256 |
257 | " Do not spell doctests inside strings.
258 | " Notice that the end of a string, either ''', or """, will end the contained
259 | " doctest too. Thus, we do *not* need to have it as an end pattern.
260 | if !exists("python_no_doctest_highlight")
261 | if !exists("python_no_doctest_code_higlight")
262 | syn region pythonDoctest
263 | \ start="^\s*>>>\s" end="^\s*$"
264 | \ contained contains=ALLBUT,pythonDoctest,@Spell
265 | syn region pythonDoctestValue
266 | \ start=+^\s*\%(>>>\s\|\.\.\.\s\|"""\|'''\)\@!\S\++ end="$"
267 | \ contained
268 | else
269 | syn region pythonDoctest
270 | \ start="^\s*>>>" end="^\s*$"
271 | \ contained contains=@NoSpell
272 | endif
273 | endif
274 |
275 | " Sync at the beginning of class, function, or method definition.
276 | syn sync match pythonSync grouphere NONE "^\s*\%(def\|class\)\s\+\h\w*\s*("
277 |
278 |
279 | " The Pyxl special sauce.
280 | syn cluster pythonGroup contains=pythonStatement,pythonOperator,pythonConditional,pythonString,pythonEscape,pythonNumber,pythonBuiltin
281 | syn region pyxlPythonNormal contained start="{\@<=" end="}\@=" contains=@pythonGroup
282 | syn region pyxlPython start="[^\\]\@<={" end="}" contains=pyxlPythonNormal
283 |
284 |
285 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
286 | " HTML
287 | """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
288 |
289 | " don't use standard HiLink, it will not work with included syntax files
290 | if version < 508
291 | command! -nargs=+ HtmlHiLink hi link
292 | else
293 | command! -nargs=+ HtmlHiLink hi def link
294 | endif
295 |
296 | syntax spell toplevel
297 |
298 | syn case ignore
299 |
300 | " mark illegal characters
301 | "syn match htmlError "[<>&]"
302 |
303 |
304 | " tags
305 | syn region htmlString contained start=+"+ end=+"+ contains=htmlSpecialChar,javaScriptExpression,@htmlPreproc,pyxlPython
306 | syn region htmlString contained start=+'+ end=+'+ contains=htmlSpecialChar,javaScriptExpression,@htmlPreproc
307 | syn match htmlValue contained "=[\t ]*[^'" \t>][^ \t>]*"hs=s+1 contains=javaScriptExpression,@htmlPreproc
308 | syn region htmlEndTag start=++ end=+>+ contains=htmlTagN,htmlTagError
309 | syn region htmlTag start=+<[^/<= \t]+ end=+>+ contains=htmlTagN,htmlString,htmlArg,htmlValue,htmlTagError,htmlEvent,htmlCssDefinition,@htmlPreproc,@htmlArgCluster
310 | syn match htmlTagN contained +<\s*[-a-zA-Z0-9_]\++hs=s+1 contains=htmlTagName,htmlSpecialTagName,@htmlTagNameCluster
311 | syn match htmlTagN contained +\s*[-a-zA-Z0-9_]\++hs=s+2 contains=htmlTagName,htmlSpecialTagName,@htmlTagNameCluster
312 | "syn match htmlTagError contained "[^>]<"ms=s+1
313 |
314 |
315 | " tag names
316 | syn keyword htmlTagName contained address applet area a base basefont
317 | syn keyword htmlTagName contained big blockquote br caption center
318 | syn keyword htmlTagName contained cite code dd dfn dir div dl dt font
319 | syn keyword htmlTagName contained form frag hr html if img
320 | syn keyword htmlTagName contained input isindex kbd li link map menu
321 | syn keyword htmlTagName contained meta ol option param pre p samp span
322 | syn keyword htmlTagName contained select small strike sub sup
323 | syn keyword htmlTagName contained table td textarea th tr tt ul var xmp
324 | syn match htmlTagName contained "\<\(b\|i\|u\|h[1-6]\|em\|strong\|head\|body\|title\)\>"
325 |
326 | " new html 4.0 tags
327 | syn keyword htmlTagName contained abbr acronym bdo button col label
328 | syn keyword htmlTagName contained colgroup del fieldset iframe ins legend
329 | syn keyword htmlTagName contained object optgroup q s tbody tfoot thead
330 |
331 | " legal arg names
332 | syn keyword htmlArg contained action
333 | syn keyword htmlArg contained align alink alt archive background bgcolor
334 | syn keyword htmlArg contained border bordercolor cellpadding
335 | syn keyword htmlArg contained cellspacing checked class clear code codebase color
336 | syn keyword htmlArg contained cols colspan content coords enctype face
337 | syn keyword htmlArg contained gutter height hspace id
338 | syn keyword htmlArg contained link lowsrc marginheight
339 | syn keyword htmlArg contained marginwidth maxlength method name prompt
340 | syn keyword htmlArg contained rel rev rows rowspan scrolling selected shape
341 | syn keyword htmlArg contained size src start target text type url
342 | syn keyword htmlArg contained usemap ismap valign value vlink vspace width wrap
343 | syn match htmlArg contained "\<\(http-equiv\|href\|title\)="me=e-1
344 |
345 | " Netscape extensions
346 | syn keyword htmlTagName contained frame noframes frameset nobr blink
347 | syn keyword htmlTagName contained layer ilayer nolayer spacer
348 | syn keyword htmlArg contained frameborder noresize pagex pagey above below
349 | syn keyword htmlArg contained left top visibility clip id noshade
350 | syn match htmlArg contained "\"
351 |
352 | " Microsoft extensions
353 | syn keyword htmlTagName contained marquee
354 |
355 | " html 4.0 arg names
356 | syn match htmlArg contained "\<\(accept-charset\|label\)\>"
357 | syn keyword htmlArg contained abbr accept accesskey axis char charoff charset
358 | syn keyword htmlArg contained cite classid codetype compact data datetime
359 | syn keyword htmlArg contained declare defer dir disabled for frame
360 | syn keyword htmlArg contained headers hreflang lang language longdesc
361 | syn keyword htmlArg contained multiple nohref nowrap object profile readonly
362 | syn keyword htmlArg contained rules scheme scope span standby style
363 | syn keyword htmlArg contained summary tabindex valuetype version
364 |
365 | " special characters
366 | syn match htmlSpecialChar "\=[0-9A-Za-z]\{1,8};"
367 |
368 | " Comments (the real ones or the old netscape ones)
369 | if exists("html_wrong_comments")
370 | syn region htmlComment start=++ contains=htmlPreStmt,htmlPreError,htmlPreAttr
380 | syn match htmlPreStmt contained "\)"
473 | syn region htmlCssDefinition matchgroup=htmlArg start='style="' keepend matchgroup=htmlString end='"' contains=css.*Attr,css.*Prop,cssComment,cssLength,cssColor,cssURL,cssImportant,cssError,cssString,@htmlPreproc
474 | HtmlHiLink htmlStyleArg htmlString
475 | endif
476 |
477 | if main_syntax == "pyxl"
478 | " synchronizing (does not always work if a comment includes legal
479 | " html tags, but doing it right would mean to always start
480 | " at the first line, which is too slow)
481 | syn sync match htmlHighlight groupthere NONE "<[/a-zA-Z]"
482 | syn sync match htmlHighlight groupthere javaScript "