├── .gitignore
├── LICENSE
├── README.org
├── README.rst
├── orgpython
    ├── __init__.py
    ├── document.py
    ├── inline.py
    └── src.py
├── setup.py
└── test.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017-2020, honmaple
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.org:
--------------------------------------------------------------------------------
 1 | * org-python
 2 |   An orgmode parser for converting orgmode to html based on python.
 3 | 
 4 |   [[https://pypi.python.org/pypi/org-python][https://img.shields.io/badge/pypi-v0.3.2-brightgreen.svg]]
 5 |   [[https://python.org][https://img.shields.io/badge/python-3-brightgreen.svg]]
 6 |   [[LICENSE][https://img.shields.io/badge/license-BSD-blue.svg]]
 7 | 
 8 | ** quickstart
 9 |    #+BEGIN_SRC sh
10 |   pip install org-python
11 |    #+END_SRC
12 | 
13 |    #+BEGIN_SRC python
14 |      from orgpython import to_html
15 | 
16 |      text = '''* heading
17 |      - list1
18 |      - list2
19 |      - list3
20 |        - list4
21 |      - list5
22 | 
23 |        | th1-1  | th1-2  | th1-3  |
24 |        |--------+--------+--------|
25 |        | row1-1 | row1-2 | row1-3 |
26 |        | row2-1 | row2-2 | row2-3 |
27 |        | row3-1 | row3-2 | row3-3 |
28 |      '''
29 |      print(to_html(text, toc=True, offset=0, highlight=True))
30 |    #+END_SRC
31 | 
32 | ** feature
33 |    - [X] toc
34 |    - [X] heading
35 |      #+BEGIN_EXAMPLE
36 |      * headind 1
37 |      ** headind 2
38 |      *** headind 3
39 |      **** headind 4
40 |      ***** headind 5
41 |      ****** headind 6
42 |      #+END_EXAMPLE
43 |    - [X] unordered_list
44 |      #+BEGIN_EXAMPLE
45 |      - list
46 |      - list
47 |        - list
48 |          + list
49 |        - list
50 |      #+END_EXAMPLE
51 |    - [X] ordered_list
52 |      #+BEGIN_EXAMPLE
53 |      1. list
54 |      2. list
55 |      3. list
56 |      #+END_EXAMPLE
57 |    - [X] bold
58 |      #+BEGIN_EXAMPLE
59 |      *bold*
60 |      #+END_EXAMPLE
61 |    - [X] italic
62 |      #+BEGIN_EXAMPLE
63 |      **italic**
64 |      #+END_EXAMPLE
65 |    - [X] underlined
66 |      #+BEGIN_EXAMPLE
67 |      _italic_
68 |      #+END_EXAMPLE
69 |    - [X] code
70 |      #+BEGIN_EXAMPLE
71 |      =code=
72 |      #+END_EXAMPLE
73 |    - [X] delete
74 |      #+BEGIN_EXAMPLE
75 |      +delete+
76 |      #+END_EXAMPLE
77 |    - [X] image
78 |      #+BEGIN_EXAMPLE
79 |      [[src][alt]]
80 |      #+END_EXAMPLE
81 |    - [X] link
82 |      #+BEGIN_EXAMPLE
83 |      [[href][text]]
84 |      #+END_EXAMPLE
85 |    - [X] begin_example
86 |    - [X] begin_src
87 |    - [X] begin_quote
88 |    - [X] table
89 |      #+BEGIN_EXAMPLE
90 |      | th1-1  | th1-2  | th1-3  |
91 |      |--------+--------+--------|
92 |      | row1-1 | row1-2 | row1-3 |
93 |      | row2-1 | row2-2 | row2-3 |
94 |      | row3-1 | row3-2 | row3-3 |
95 |      #+END_EXAMPLE
96 | 
97 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 |     :Author: jianglin
  2 | 
  3 | .. contents::
  4 | 
  5 | 1 org-python
  6 | ------------
  7 | 
  8 | An orgmode parser for converting orgmode to html based on python.
  9 | 
 10 | .. image:: https://img.shields.io/badge/pypi-v0.3.2-brightgreen.svg
 11 |     :target: https://pypi.python.org/pypi/org-python
 12 | .. image:: https://img.shields.io/badge/python-3-brightgreen.svg
 13 |     :target: https://python.org
 14 | .. image:: https://img.shields.io/badge/license-BSD-blue.svg
 15 |     :target: LICENSE
 16 | 
 17 | 1.1 quickstart
 18 | ~~~~~~~~~~~~~~
 19 | 
 20 | .. code:: sh
 21 | 
 22 |     pip install org-python
 23 | 
 24 | .. code:: python
 25 | 
 26 |     from orgpython import to_html
 27 | 
 28 |     text = '''* heading
 29 |     - list1
 30 |     - list2
 31 |     - list3
 32 |       - list4
 33 |     - list5
 34 | 
 35 |       | th1-1  | th1-2  | th1-3  |
 36 |       |--------+--------+--------|
 37 |       | row1-1 | row1-2 | row1-3 |
 38 |       | row2-1 | row2-2 | row2-3 |
 39 |       | row3-1 | row3-2 | row3-3 |
 40 |     '''
 41 |     print(to_html(text, toc=True, offset=0, highlight=True))
 42 | 
 43 | 1.2 feature
 44 | ~~~~~~~~~~~
 45 | 
 46 | - ☑ toc
 47 | 
 48 | - ☑ heading
 49 | 
 50 |   ::
 51 | 
 52 |       * headind 1
 53 |       ** headind 2
 54 |       *** headind 3
 55 |       **** headind 4
 56 |       ***** headind 5
 57 |       ****** headind 6
 58 | 
 59 | - ☑ unordered\_list
 60 | 
 61 |   ::
 62 | 
 63 |       - list
 64 |       - list
 65 |         - list
 66 |           + list
 67 |         - list
 68 | 
 69 | - ☑ ordered\_list
 70 | 
 71 |   ::
 72 | 
 73 |       1. list
 74 |       2. list
 75 |       3. list
 76 | 
 77 | - ☑ bold
 78 | 
 79 |   ::
 80 | 
 81 |       *bold*
 82 | 
 83 | - ☑ italic
 84 | 
 85 |   ::
 86 | 
 87 |       **italic**
 88 | 
 89 | - ☑ underlined
 90 | 
 91 |   ::
 92 | 
 93 |       _italic_
 94 | 
 95 | - ☑ code
 96 | 
 97 |   ::
 98 | 
 99 |       =code=
100 | 
101 | - ☑ delete
102 | 
103 |   ::
104 | 
105 |       +delete+
106 | 
107 | - ☑ image
108 | 
109 |   ::
110 | 
111 |       [[src][alt]]
112 | 
113 | - ☑ link
114 | 
115 |   ::
116 | 
117 |       [[href][text]]
118 | 
119 | - ☑ begin\_example
120 | 
121 | - ☑ begin\_src
122 | 
123 | - ☑ begin\_quote
124 | 
125 | - ☑ table
126 | 
127 |   ::
128 | 
129 |       | th1-1  | th1-2  | th1-3  |
130 |       |--------+--------+--------|
131 |       | row1-1 | row1-2 | row1-3 |
132 |       | row2-1 | row2-2 | row2-3 |
133 |       | row3-1 | row3-2 | row3-3 |
134 | 


--------------------------------------------------------------------------------
/orgpython/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # ********************************************************************************
 4 | # Copyright © 2017-2020 jianglin
 5 | # File Name: __init__.py
 6 | # Author: jianglin
 7 | # Email: mail@honmaple.com
 8 | # Created: 2019-05-29 18:06:22 (CST)
 9 | # Last Update: Sunday 2020-08-16 19:45:09 (CST)
10 | #          By:
11 | # Description:
12 | # ********************************************************************************
13 | from .document import Document
14 | 
15 | 
16 | def to_text(content, **kwargs):
17 |     return Document(content, **kwargs).to_text()
18 | 
19 | 
20 | def to_html(content, **kwargs):
21 |     return Document(content, **kwargs).to_html()
22 | 
23 | 
24 | def to_markdown(content, **kwargs):
25 |     return Document(content, **kwargs).to_markdown()
26 | 


--------------------------------------------------------------------------------
/orgpython/document.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # ********************************************************************************
  4 | # Copyright © 2017-2020 jianglin
  5 | # File Name: document.py
  6 | # Author: jianglin
  7 | # Email: mail@honmaple.com
  8 | # Created: 2018-02-26 11:44:43 (CST)
  9 | # Last Update: Wednesday 2020-08-19 12:00:03 (CST)
 10 | # Description:
 11 | # ********************************************************************************
 12 | import re
 13 | from hashlib import sha1
 14 | from textwrap import dedent
 15 | 
 16 | from .inline import Blankline, Hr, InlineText
 17 | from .src import highlight as src_highlight
 18 | 
 19 | DRAWER_BEGIN_REGEXP = re.compile(r"^(\s*):(\S+):\s*$")
 20 | DRAWER_END_REGEXP = re.compile(r"^(\s*):END:\s*$")
 21 | DRAWER_PROPERTY_REGEXP = re.compile(r"^(\s*):(\S+):(\s+(.*)$|$)")
 22 | 
 23 | BLOCK_BEGIN_REGEXP = re.compile(r"(?i)^(\s*)#\+BEGIN_(\w+)(.*)")
 24 | BLOCK_END_REGEXP = re.compile(r"(?i)^(\s*)#\+END_(\w+)")
 25 | BLOCK_RESULT_REGEXP = re.compile(r"(?i)^(\s*)#\+RESULTS:")
 26 | BLOCK_RESULT_CONTENT_REGEXP = re.compile(r"(?:^|\s+):(\s+(.*)|$)")
 27 | 
 28 | TABLE_SEP_REGEXP = re.compile(r"^(\s*)(\|[+-|]*)\s*$")
 29 | TABLE_ROW_REGEXP = re.compile(r"^(\s*)(\|.*)")
 30 | TABLE_ALIGN_REGEXP = re.compile(r"^<(l|c|r)>$")
 31 | 
 32 | LIST_DESCRIPTIVE_REGEXP = re.compile(r"^(\s*)([+*-])\s+(.*)::(\s|$)")
 33 | LIST_UNORDER_REGEXP = re.compile(r"^(\s*)([+*-])(\s+(.*)|$)")
 34 | LIST_ORDER_REGEXP = re.compile(r"^(\s*)(([0-9]+|[a-zA-Z])[.)])(\s+(.*)|$)")
 35 | LIST_STATUS_REGEXP = re.compile(r"\[( |X|-)\]\s")
 36 | LIST_LEVEL_REGEXP = re.compile(r"(\s*)(.+)$")
 37 | 
 38 | HEADLINE_REGEXP = re.compile(
 39 |     r"^(\*+)(?:\s+(.+?))?(?:\s+\[#(.+)\])?(\s+.*?)(?:\s+:(.+):)?$")
 40 | KEYWORD_REGEXP = re.compile(r"^(\s*)#\+([^:]+):(\s+(.*)|$)")
 41 | COMMENT_REGEXP = re.compile(r"^(\s*)#(.*)")
 42 | ATTRIBUTE_REGEXP = re.compile(r"(?:^|\s+)(:[-\w]+)\s+(.*)$")
 43 | 
 44 | TODO_KEYWORDS = ("DONE", "TODO")
 45 | 
 46 | 
 47 | def string_split(s, sep):
 48 |     if not s:
 49 |         return []
 50 |     return s.split(sep)
 51 | 
 52 | 
 53 | class Parser(object):
 54 |     def __init__(self, content=""):
 55 |         self.lines = content.splitlines()
 56 |         self.level = 0
 57 |         self.element = ""
 58 |         self.children = []
 59 |         self.escape = True
 60 |         self.needparse = True
 61 |         self.parsed_nodes = (
 62 |             "blankline",
 63 |             "headline",
 64 |             "table",
 65 |             "list",
 66 |             "drawer",
 67 |             "block",
 68 |             "block_result",
 69 |             "keyword",
 70 |             "hr",
 71 |         )
 72 | 
 73 |     def first_child(self):
 74 |         if len(self.children) == 0:
 75 |             return
 76 |         return self.children[0]
 77 | 
 78 |     def last_child(self):
 79 |         if len(self.children) == 0:
 80 |             return
 81 |         return self.children[-1]
 82 | 
 83 |     def add_child(self, node):
 84 |         last = self.last_child()
 85 |         if self.is_headline(last):
 86 |             if self.is_properties(node):
 87 |                 last.properties = node
 88 |                 return
 89 | 
 90 |             if not self.is_headline(node):
 91 |                 last.add_child(node)
 92 |                 return
 93 | 
 94 |             if self.is_headline(node) and node.stars > last.stars:
 95 |                 last.add_child(node)
 96 |                 return
 97 | 
 98 |         if self.is_table(last):
 99 |             if self.is_table(node):
100 |                 last.add_child(node)
101 |                 return
102 | 
103 |         if self.is_list(last):
104 |             if self.is_blankline(node):
105 |                 last.add_child(node)
106 |                 return
107 | 
108 |             if node.level > last.level:
109 |                 last.add_child(node)
110 |                 return
111 | 
112 |             if self.is_list(node) and node.level == last.level:
113 |                 last.add_child(node)
114 |                 return
115 | 
116 |         if self.is_keyword(last):
117 |             if self.is_table(node):
118 |                 node.keyword = last
119 | 
120 |         if self.is_paragraph(last):
121 |             if self.is_inlinetext(node):
122 |                 last.add_child(node)
123 |                 return
124 | 
125 |         if self.is_inlinetext(node):
126 |             self.children.append(self.paragraph(node))
127 |             return
128 | 
129 |         self.children.append(node)
130 | 
131 |     def is_keyword(self, child):
132 |         return child and isinstance(child, Keyword)
133 | 
134 |     def is_headline(self, child):
135 |         return child and isinstance(child, Headline)
136 | 
137 |     def is_list(self, child):
138 |         return child and isinstance(child, List)
139 | 
140 |     def is_table(self, child):
141 |         return child and isinstance(child, Table)
142 | 
143 |     def is_src(self, child):
144 |         return child and isinstance(child, (Src, Example))
145 | 
146 |     def is_inlinetext(self, child):
147 |         return child and isinstance(child, InlineText)
148 | 
149 |     def is_blankline(self, child):
150 |         return child and isinstance(child, Blankline)
151 | 
152 |     def is_paragraph(self, child):
153 |         return child and isinstance(child, Paragraph)
154 | 
155 |     def is_properties(self, child):
156 |         return child and isinstance(child, Properties)
157 | 
158 |     def inlinetext(self, text):
159 |         return InlineText(text, self.needparse, self.escape)
160 | 
161 |     def paragraph(self, node):
162 |         n = Paragraph()
163 |         n.add_child(node)
164 |         return n
165 | 
166 |     def _parse_paired(self, cls, index, lines):
167 |         node = cls.match(lines[index])
168 |         if not node:
169 |             return None, index
170 | 
171 |         end = len(lines)
172 |         num = index + 1
173 |         while num < end:
174 |             if node.matchend(num, lines):
175 |                 node.preparse(lines[index + 1:num])
176 |                 return node, num
177 |             num += 1
178 |         return None, index
179 | 
180 |     def _parse_nopaired(self, cls, index, lines):
181 |         node = cls.match(lines[index])
182 |         if not node:
183 |             return None, index
184 | 
185 |         end = len(lines)
186 |         num = index + 1
187 |         while num < end:
188 |             if node.matchend(num, lines):
189 |                 break
190 |             num += 1
191 |         node.preparse(lines[index + 1:num])
192 |         return node, num
193 | 
194 |     def parse_headline(self, index, lines):
195 |         return Headline.match(lines[index]), index
196 | 
197 |     def parse_list(self, index, lines):
198 |         return List.match(lines[index]), index
199 | 
200 |     def parse_table(self, index, lines):
201 |         return self._parse_nopaired(Table, index, lines)
202 | 
203 |     def parse_drawer(self, index, lines):
204 |         return self._parse_paired(Drawer, index, lines)
205 | 
206 |     def parse_block(self, index, lines):
207 |         return self._parse_paired(Block, index, lines)
208 | 
209 |     def parse_block_result(self, index, lines):
210 |         return self._parse_paired(BlockResult, index, lines)
211 | 
212 |     def parse_blankline(self, index, lines):
213 |         return Blankline.match(lines[index]), index
214 | 
215 |     def parse_keyword(self, index, lines):
216 |         return Keyword.match(lines[index]), index
217 | 
218 |     def parse_hr(self, index, lines):
219 |         return Hr.match(lines[index]), index
220 | 
221 |     def parse_inlinetext(self, index, lines):
222 |         return self.inlinetext(lines[index]), index
223 | 
224 |     def parse(self, index, lines):
225 |         for b in self.parsed_nodes:
226 |             func = "parse_" + b
227 |             if not hasattr(self, func):
228 |                 continue
229 |             block, num = getattr(self, func)(index, lines)
230 |             if not block:
231 |                 continue
232 |             return block, num
233 | 
234 |         return self.parse_inlinetext(index, lines)
235 | 
236 |     def preparse(self, lines):
237 |         index = 0
238 |         while index < len(lines):
239 |             line = lines[index]
240 |             node, index = self.parse(index, lines)
241 |             if node:
242 |                 node.level = len(line) - len(line.strip())
243 |                 self.add_child(node)
244 |             index += 1
245 | 
246 |     def to_html(self):
247 |         if len(self.children) == 0 and len(self.lines) > 0:
248 |             self.preparse(self.lines)
249 | 
250 |         children = []
251 |         for child in self.children:
252 |             content = child.to_html()
253 |             if not content:
254 |                 continue
255 |             children.append(content)
256 |         text = "\n".join(children)
257 |         if self.element:
258 |             return self.element.format(text)
259 |         return text
260 | 
261 |     def __str__(self):
262 |         str_children = [str(child) for child in self.children]
263 |         return self.__class__.__name__ + '(' + ','.join(str_children) + ')'
264 | 
265 |     def __repr__(self):
266 |         return self.__str__()
267 | 
268 | 
269 | class Headline(Parser):
270 |     def __init__(
271 |             self,
272 |             title,
273 |             stars=1,
274 |             keyword=None,
275 |             priority=None,
276 |             tags=[],
277 |             todo_keywords=TODO_KEYWORDS):
278 |         super(Headline, self).__init__()
279 |         self.title = title
280 |         self.stars = stars
281 |         self.keyword = keyword
282 |         self.priority = priority
283 |         self.tags = tags
284 |         self.properties = None
285 |         self.todo_keywords = todo_keywords
286 | 
287 |     @classmethod
288 |     def match(cls, line):
289 |         match = HEADLINE_REGEXP.match(line)
290 |         if not match:
291 |             return
292 | 
293 |         stars = len(match[1])
294 |         keyword = match[2] or ""
295 |         priority = match[3] or ""
296 | 
297 |         if keyword and not priority:
298 |             if len(keyword) >= 4 and keyword[0:2] == "[#":
299 |                 priority = keyword[2:-1]
300 |                 keyword = ""
301 | 
302 |         title = keyword + match[4]
303 |         keyword = ""
304 | 
305 |         return cls(
306 |             title,
307 |             stars,
308 |             keyword,
309 |             priority,
310 |             string_split(match[5], ":"),
311 |         )
312 | 
313 |     def id(self):
314 |         hid = 'org-{0}'.format(sha1(self.title.encode()).hexdigest()[:10])
315 |         if self.properties:
316 |             return self.properties.get("CUSTOM_ID", hid)
317 |         return hid
318 | 
319 |     def toc(self):
320 |         b = ""
321 |         if self.keyword:
322 |             b = b + "<span class=\"todo\">{0}</span>".format(self.keyword)
323 |         if self.priority:
324 |             b = b + "<span class=\"priority\">{0}</span>".format(self.priority)
325 | 
326 |         b = b + self.inlinetext(self.title).to_html()
327 | 
328 |         for tag in self.tags:
329 |             b = b + "<span class=\"tag\">{0}</span>".format(tag)
330 |         return b.strip()
331 | 
332 |     def to_html(self):
333 |         b = "<h{0} id=\"{1}\">{2}</h{0}>".format(
334 |             self.stars,
335 |             self.id(),
336 |             self.toc(),
337 |         )
338 |         return b + super(Headline, self).to_html()
339 | 
340 | 
341 | class Drawer(Parser):
342 |     def __init__(self, name):
343 |         super(Drawer, self).__init__()
344 |         self.name = name
345 | 
346 |     @classmethod
347 |     def match(cls, line):
348 |         match = DRAWER_BEGIN_REGEXP.match(line)
349 |         if not match:
350 |             return
351 |         name = match[2]
352 |         if name.upper() == "PROPERTIES":
353 |             return Properties(name)
354 |         return Drawer(name)
355 | 
356 |     def matchend(self, index, lines):
357 |         return DRAWER_END_REGEXP.match(lines[index])
358 | 
359 |     def to_html(self):
360 |         return ""
361 | 
362 | 
363 | class Properties(Drawer):
364 |     def __init__(self, name):
365 |         super(Properties, self).__init__(name)
366 |         self.properties = {}
367 | 
368 |     def parse(self, index, lines):
369 |         match = DRAWER_PROPERTY_REGEXP.match(lines[index])
370 |         if match:
371 |             self.properties[match[2].upper()] = match[4]
372 |         return None, index
373 | 
374 |     def get(self, key, default=None):
375 |         return self.properties.get(key, default)
376 | 
377 |     def to_html(self):
378 |         return ""
379 | 
380 | 
381 | class Block(Parser):
382 |     def __init__(self, name, params=""):
383 |         super(Block, self).__init__()
384 |         self.name = name
385 |         self.params = params
386 | 
387 |     @classmethod
388 |     def match(cls, line):
389 |         match = BLOCK_BEGIN_REGEXP.match(line)
390 |         if not match:
391 |             return
392 | 
393 |         name = match[2].lower()
394 |         if name == "src":
395 |             return Src(*match[3].strip().split(" ", 1))
396 |         if name == "example":
397 |             return Example(match[3])
398 |         if name == "center":
399 |             return Center(match[3])
400 |         if name == "verse":
401 |             return Verse(match[3])
402 |         if name == "quote":
403 |             return Quote(match[3])
404 |         if name == "export":
405 |             return Export(*match[3].strip().split(" ", 1))
406 |         return cls(name, match[3])
407 | 
408 |     def matchend(self, index, lines):
409 |         match = BLOCK_END_REGEXP.match(lines[index])
410 |         return match and match[2].lower() == self.name
411 | 
412 | 
413 | class Center(Block):
414 |     def __init__(self, params=""):
415 |         super(Center, self).__init__("center", params)
416 |         self.element = "<div style=\"text-align: center;\">\n{0}\n</div>"
417 | 
418 | 
419 | class Verse(Block):
420 |     def __init__(self, params=""):
421 |         super(Verse, self).__init__("verse", params)
422 |         self.element = "<p class=\"verse\">\n{0}\n</p>"
423 | 
424 |     def add_child(self, node):
425 |         self.children.append(node)
426 | 
427 |     def to_html(self):
428 |         children = [child.to_html() for child in self.children]
429 |         return self.element.format("<br />".join(children))
430 | 
431 | 
432 | class Quote(Block):
433 |     def __init__(self, params=""):
434 |         super(Quote, self).__init__("quote", params)
435 |         self.element = "<blockquote>\n{0}\n</blockquote>"
436 | 
437 | 
438 | class Export(Block):
439 |     def __init__(self, language="", params=""):
440 |         super(Export, self).__init__("export", params)
441 |         self.language = language
442 |         self.escape = self.language.upper() != "HTML"
443 |         self.parsed_nodes = ()
444 | 
445 |     def to_html(self):
446 |         if not self.escape:
447 |             return super(Export, self).to_html()
448 |         return ""
449 | 
450 | 
451 | class Src(Block):
452 |     def __init__(self, language="", params="", highlight=False):
453 |         super(Src, self).__init__("src", params)
454 |         self.language = language
455 |         self.highlight_code = highlight
456 |         self.element = "<pre class=\"src src-{0}\">\n{1}\n</pre>"
457 |         self.needparse = False
458 |         self.escape = False
459 |         self.parsed_nodes = ()
460 | 
461 |     def add_child(self, node):
462 |         self.children.append(node)
463 | 
464 |     def highlight(self, language, text):
465 |         return src_highlight(language, text)
466 | 
467 |     def to_html(self):
468 |         text = "\n".join([child.to_html() for child in self.children])
469 |         if self.highlight_code:
470 |             return self.highlight(self.language, dedent(text))
471 |         if not self.language:
472 |             return "<pre>\n{0}\n</pre>".format(dedent(text))
473 |         return self.element.format(self.language, dedent(text))
474 | 
475 | 
476 | class Example(Src):
477 |     def __init__(self, params="", highlight=False):
478 |         super(Example, self).__init__("example", params, highlight)
479 |         self.name = "example"
480 | 
481 | 
482 | class BlockResult(Parser):
483 |     def __init__(self):
484 |         super(BlockResult, self).__init__()
485 |         self.element = "<pre class=\"example\">\n{0}\n</pre>"
486 | 
487 |     @classmethod
488 |     def match(cls, line):
489 |         match = BLOCK_RESULT_REGEXP.match(line)
490 |         if not match:
491 |             return
492 |         return cls()
493 | 
494 |     def matchend(self, index, lines):
495 |         return not BLOCK_RESULT_CONTENT_REGEXP.match(lines[index])
496 | 
497 |     def parse(self, index, lines):
498 |         match = BLOCK_RESULT_CONTENT_REGEXP.match(lines[index])
499 |         return self.inlinetext(match[2]), index
500 | 
501 | 
502 | class ListItem(Parser):
503 |     def __init__(self, status=None, checkbox="HTML"):
504 |         super(ListItem, self).__init__()
505 |         self.status = status
506 |         self.checkbox = checkbox
507 |         self.element = "<li>\n{0}\n</li>"
508 | 
509 |     @classmethod
510 |     def match(cls, line):
511 |         status = None
512 |         content = line
513 |         status_match = LIST_STATUS_REGEXP.match(line)
514 |         if status_match:
515 |             status, content = status_match[1], content[len("[ ] "):]
516 | 
517 |         node = cls(status)
518 |         node.add_child(node.inlinetext(content))
519 |         return node
520 | 
521 |     def set_status(self):
522 |         if not self.checkbox:
523 |             return
524 | 
525 |         if self.checkbox == "HTML":
526 |             if self.status == "X":
527 |                 node = self.inlinetext(
528 |                     '<input type="checkbox" checked="checked" />')
529 |             else:
530 |                 node = self.inlinetext('<input type="checkbox" />')
531 |             node.needparse = False
532 |             node.escape = False
533 |         else:
534 |             node = self.inlinetext("=[{0}]=".format(self.status))
535 | 
536 |         if not self.children:
537 |             self.children.append(node)
538 |             return
539 | 
540 |         self.children[0].children = [node] + self.children[0].children
541 | 
542 |     def to_html(self):
543 |         if self.status is not None:
544 |             self.set_status()
545 |         return super(ListItem, self).to_html()
546 | 
547 | 
548 | class DescriptiveItem(ListItem):
549 |     def __init__(self, title="", status=""):
550 |         super(DescriptiveItem, self).__init__(title, status)
551 |         self.element = "<dt>\n{0}\n</dt>"
552 | 
553 | 
554 | class List(Parser):
555 |     def __init__(self, items=[]):
556 |         super(List, self).__init__()
557 |         self.children = items
558 | 
559 |     @classmethod
560 |     def match(cls, line):
561 |         match = UnorderList.match(line)
562 |         if match:
563 |             return match
564 | 
565 |         match = OrderList.match(line)
566 |         if match:
567 |             return match
568 | 
569 |         return Descriptive.match(line)
570 | 
571 |     def add_child(self, node):
572 |         if self.is_list(node) and node.level == self.level:
573 |             self.children.append(node.children[0])
574 |             return
575 |         last = self.last_child()
576 |         last.add_child(node)
577 | 
578 | 
579 | class Descriptive(List):
580 |     def __init__(self, items=[]):
581 |         super(Descriptive, self).__init__(items)
582 |         self.element = "<dd>\n{0}\n</dd>"
583 | 
584 |     @classmethod
585 |     def match(cls, line):
586 |         match = LIST_DESCRIPTIVE_REGEXP.match(line)
587 |         if not match:
588 |             return
589 |         title = DescriptiveItem.match(match[3])
590 |         return cls([title])
591 | 
592 | 
593 | class UnorderList(List):
594 |     def __init__(self, items=[]):
595 |         super(UnorderList, self).__init__(items)
596 |         self.element = "<ul>\n{0}\n</ul>"
597 | 
598 |     @classmethod
599 |     def match(cls, line):
600 |         match = LIST_UNORDER_REGEXP.match(line)
601 |         if not match:
602 |             return
603 |         title = ListItem.match(match[4])
604 |         return cls([title])
605 | 
606 | 
607 | class OrderList(List):
608 |     def __init__(self, items=[]):
609 |         super(OrderList, self).__init__(items)
610 |         self.element = "<ol>\n{0}\n</ol>"
611 | 
612 |     @classmethod
613 |     def match(cls, line):
614 |         match = LIST_ORDER_REGEXP.match(line)
615 |         if not match:
616 |             return
617 |         title = ListItem.match(match[4])
618 |         return cls([title])
619 | 
620 | 
621 | class TableColumn(Parser):
622 |     def __init__(self, content="", header=False):
623 |         super(TableColumn, self).__init__(content)
624 |         self.header = header
625 |         self.parsed_nodes = ()
626 | 
627 |     def add_child(self, child):
628 |         self.children.append(child)
629 | 
630 |     def reset(self):
631 |         self.header = True
632 | 
633 |     def to_html(self):
634 |         self.element = "<th>{0}</th>" if self.header else "<td>{0}</td>"
635 |         return super(TableColumn, self).to_html()
636 | 
637 | 
638 | class TableRow(Parser):
639 |     def __init__(self, header=False):
640 |         super(TableRow, self).__init__()
641 |         self.is_sep = False
642 |         self.header = header
643 |         self.element = "<tr>\n{0}\n</tr>"
644 |         self.parsed_nodes = ("tablecolumn", )
645 | 
646 |     @classmethod
647 |     def match(cls, line):
648 |         match = TABLE_ROW_REGEXP.match(line)
649 |         if not match:
650 |             return
651 | 
652 |         row = cls()
653 |         row.is_sep = bool(TABLE_SEP_REGEXP.match(line))
654 |         row.preparse(match[2].strip("|").split("|"))
655 |         return row
656 | 
657 |     def add_child(self, child):
658 |         self.children.append(child)
659 | 
660 |     def parse_tablecolumn(self, index, lines):
661 |         return TableColumn(lines[index].strip(), self.header), index
662 | 
663 |     def reset(self):
664 |         self.header = True
665 |         for column in self.children:
666 |             column.reset()
667 | 
668 | 
669 | class Table(Parser):
670 |     def __init__(self, keyword=None):
671 |         super(Table, self).__init__()
672 |         self.element = "<table>\n{0}\n</table>"
673 |         self.keyword = keyword
674 |         self.parsed_nodes = ("tablerow", )
675 | 
676 |     @classmethod
677 |     def match(cls, line):
678 |         row = TableRow.match(line)
679 |         if not row:
680 |             return
681 | 
682 |         table = cls()
683 |         if row.is_sep:
684 |             return table
685 |         table.add_child(row)
686 |         return table
687 | 
688 |     def matchend(self, index, lines):
689 |         return not TABLE_ROW_REGEXP.match(lines[index])
690 | 
691 |     def reset(self):
692 |         first = self.first_child()
693 |         if first and first.header:
694 |             return
695 |         for row in self.children:
696 |             row.reset()
697 | 
698 |     def add_child(self, child):
699 |         if child.is_sep:
700 |             return self.reset()
701 |         self.children.append(child)
702 | 
703 |     def parse_tablerow(self, index, lines):
704 |         return TableRow.match(lines[index]), index
705 | 
706 | 
707 | class Keyword(Parser):
708 |     def __init__(self, key, value=""):
709 |         super(Keyword, self).__init__()
710 |         self.key = key
711 |         self.value = value
712 | 
713 |     def options(self):
714 |         results = {}
715 |         for line in self.value.split(" "):
716 |             if not line:
717 |                 continue
718 |             m = line.split(":", 1)
719 |             k = m[0]
720 |             if not k:
721 |                 continue
722 |             results[k] = "" if len(m) == 1 else m[1]
723 |         return results
724 | 
725 |     def properties(self):
726 |         results = {}
727 |         line = self.value.strip()
728 |         if not line:
729 |             return results
730 |         m = line.split(" ", 1)
731 |         k = m[0]
732 |         if not k:
733 |             return results
734 |         results[k] = "" if len(m) == 1 else m[1]
735 |         return results
736 | 
737 |     @classmethod
738 |     def match(cls, line):
739 |         match = KEYWORD_REGEXP.match(line)
740 |         if not match:
741 |             return
742 |         return cls(match[2], match[4])
743 | 
744 |     def to_html(self):
745 |         return ""
746 | 
747 | 
748 | class Paragraph(Parser):
749 |     def __init__(self, content=""):
750 |         super(Paragraph, self).__init__(content)
751 |         self.element = "<p>\n{0}\n</p>"
752 |         self.parsed_nodes = ()
753 | 
754 |     def add_child(self, node):
755 |         self.children.append(node)
756 | 
757 | 
758 | class Section(Parser):
759 |     def __init__(self, headline):
760 |         super(Section, self).__init__()
761 |         self.headline = headline
762 | 
763 |     @property
764 |     def stars(self):
765 |         return self.headline.stars
766 | 
767 |     def add_child(self, node):
768 |         last = self.last_child()
769 |         if not last:
770 |             self.children.append(node)
771 |             return
772 | 
773 |         if node.stars > last.stars:
774 |             last.add_child(node)
775 |             return
776 |         self.children.append(node)
777 | 
778 |     def to_html(self):
779 |         text = "<li>"
780 |         text += "<a href=\"#{0}\">{1}</a>".format(
781 |             self.headline.id(),
782 |             self.headline.toc(),
783 |         )
784 |         if not self.children:
785 |             return text + "</li>"
786 | 
787 |         text += "\n<ul>\n{0}\n</ul>\n</li>".format(
788 |             "\n".join([child.to_html() for child in self.children]))
789 |         return text
790 | 
791 | 
792 | class Toc(Parser):
793 |     def __init__(self):
794 |         super(Toc, self).__init__()
795 |         self.element = (
796 |             '<div id="table-of-contents">'
797 |             '<h2>Table of Contents</h2>'
798 |             '<div id="text-table-of-contents">'
799 |             '\n<ul>\n{0}\n</ul>\n</div></div>')
800 | 
801 |     def add_child(self, node):
802 |         last = self.last_child()
803 |         if not last:
804 |             self.children.append(node)
805 |             return
806 | 
807 |         if node.stars > last.stars:
808 |             last.add_child(node)
809 |             return
810 | 
811 |         if node.stars < last.stars:
812 |             last.add_child(node)
813 |             return
814 | 
815 |         self.children.append(node)
816 | 
817 |     def to_html(self):
818 |         if not self.children:
819 |             return ""
820 |         return super(Toc, self).to_html()
821 | 
822 | 
823 | class Document(Parser):
824 |     def __init__(self, content, offset=0, highlight=False, **options):
825 |         super(Document, self).__init__(content)
826 |         self.offset = offset
827 |         self.highlight = highlight
828 |         self.options = options
829 |         self.properties = {}
830 |         self.toc = Toc()
831 | 
832 |     def _is_true(self, value):
833 |         return value in ("true", "t", "1", True, 1)
834 | 
835 |     def section(self, node):
836 |         return Section(node)
837 | 
838 |     def parse_keyword(self, index, lines):
839 |         block, index = super(Document, self).parse_keyword(index, lines)
840 |         if not block:
841 |             return block, index
842 | 
843 |         if block.key == "OPTIONS":
844 |             self.options.update(**block.options())
845 |         elif block.key == "PROPERTY":
846 |             self.properties.update(**block.properties())
847 |         else:
848 |             self.properties[block.key] = block.value
849 |         return block, index
850 | 
851 |     def parse_headline(self, index, lines):
852 |         block, index = super(Document, self).parse_headline(index, lines)
853 |         if not block:
854 |             return block, index
855 |         block.stars = block.stars + self.offset
856 | 
857 |         todo_keywords = self.properties.get("TODO")
858 |         if todo_keywords:
859 |             block.todo_keywords = todo_keywords.split(" ")
860 |         s = block.title.split(" ", 1)
861 |         if len(s) > 1 and s[0] in block.todo_keywords:
862 |             block.keyword = s[0]
863 |             block.title = s[1]
864 |         self.toc.add_child(self.section(block))
865 |         return block, index
866 | 
867 |     def parse_block(self, index, lines):
868 |         block, index = super(Document, self).parse_block(index, lines)
869 |         if not block:
870 |             return block, index
871 |         if self.is_src(block):
872 |             block.highlight_code = self.highlight
873 |         return block, index
874 | 
875 |     def to_html(self):
876 |         text = super(Document, self).to_html()
877 |         if self._is_true(self.options.get("toc")):
878 |             return self.toc.to_html() + "\n" + text
879 |         return text
880 | 


--------------------------------------------------------------------------------
/orgpython/inline.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # ********************************************************************************
  4 | # Copyright © 2017-2020 jianglin
  5 | # File Name: inline.py
  6 | # Author: jianglin
  7 | # Email: mail@honmaple.com
  8 | # Created: 2018-02-26 11:41:22 (CST)
  9 | # Last Update: Tuesday 2020-08-18 17:21:40 (CST)
 10 | #          By:
 11 | # Description:
 12 | # ********************************************************************************
 13 | import re
 14 | import os
 15 | 
 16 | # _inline_regexp = r"(^|.*?(?<![/\\])){0}(.+?(?<![/\\])){0}(.*?|$)"
 17 | _inline_regexp = r"(^|.*?(?<![/\\])){0}(.+?(?<![/\\])){0}(.*?|$)"
 18 | 
 19 | BOLD_REGEXP = re.compile(_inline_regexp.format('\\*'))
 20 | CODE_REGEXP = re.compile(_inline_regexp.format('(?:\\=|`)'))
 21 | ITALIC_REGEXP = re.compile(_inline_regexp.format('(?:\\*\\*|\\/)'))
 22 | DELETE_REGEXP = re.compile(_inline_regexp.format('\\+'))
 23 | VERBATIM_REGEXP = re.compile(_inline_regexp.format('~'))
 24 | UNDERLINE_REGEXP = re.compile(_inline_regexp.format('_'))
 25 | 
 26 | PERCENT_REGEXP = re.compile(r"\[(\d+/\d+|\d+%)\]")
 27 | 
 28 | HR_REGEXP = re.compile(r"^\s*\-{5,}\s*")
 29 | FN_REGEXP = re.compile(r"(^|.*?(?<![/\\]))(\[fn:(.+?)\])(.*?|$)")
 30 | IMG_REGEXP = re.compile(r"^[.](png|gif|jpe?g|svg|tiff?)$")
 31 | LINK_REGEXP = re.compile(r'\[\[(.+?)\](?:\[(.+?)\])?\]')
 32 | VIDEO_REGEXP = re.compile(r"^[.](webm|mp4)$")
 33 | 
 34 | NEWLINE_REGEXP = re.compile(r"(^|.*?(?<![/\\]))(\\\\(\s*)$)")
 35 | BLANKLINE_REGEXP = re.compile(r"^(\s*)$")
 36 | 
 37 | TIMESTAMP_REGEXP = re.compile(
 38 |     r"^<(\d{4}-\d{2}-\d{2})( [A-Za-z]+)?( \d{2}:\d{2})?( \+\d+[dwmy])?>")
 39 | 
 40 | _html_escape = (
 41 |     ("&", "&amp;"),
 42 |     ("'", "&#39;"),
 43 |     ("<", "&lt;"),
 44 |     (">", "&gt;"),
 45 |     ("\"", "&#34;"),
 46 | )
 47 | 
 48 | # https://github.com/tsroten/zhon/blob/develop/zhon/hanzi.py
 49 | _chinese_non_stops = (
 50 |     # Fullwidth ASCII variants
 51 |     '\uFF02\uFF03\uFF04\uFF05\uFF06\uFF07\uFF08\uFF09\uFF0A\uFF0B\uFF0C\uFF0D'
 52 |     '\uFF0F\uFF1A\uFF1B\uFF1C\uFF1D\uFF1E\uFF20\uFF3B\uFF3C\uFF3D\uFF3E\uFF3F'
 53 |     '\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF60'
 54 | 
 55 |     # Halfwidth CJK punctuation
 56 |     '\uFF62\uFF63\uFF64'
 57 | 
 58 |     # CJK symbols and punctuation
 59 |     '\u3000\u3001\u3003'
 60 | 
 61 |     # CJK angle and corner brackets
 62 |     '\u3008\u3009\u300A\u300B\u300C\u300D\u300E\u300F\u3010\u3011'
 63 | 
 64 |     # CJK brackets and symbols/punctuation
 65 |     '\u3014\u3015\u3016\u3017\u3018\u3019\u301A\u301B\u301C\u301D\u301E\u301F'
 66 | 
 67 |     # Other CJK symbols
 68 |     '\u3030'
 69 | 
 70 |     # Special CJK indicators
 71 |     '\u303E\u303F'
 72 | 
 73 |     # Dashes
 74 |     '\u2013\u2014'
 75 | 
 76 |     # Quotation marks and apostrophe
 77 |     '\u2018\u2019\u201B\u201C\u201D\u201E\u201F'
 78 | 
 79 |     # General punctuation
 80 |     '\u2026\u2027'
 81 | 
 82 |     # Overscores and underscores
 83 |     '\uFE4F'
 84 | 
 85 |     # Small form variants
 86 |     '\uFE51\uFE54'
 87 | 
 88 |     # Latin punctuation
 89 |     '\u00B7')
 90 | 
 91 | _chinese_stops = (
 92 |     '\uFF01'  # Fullwidth exclamation mark
 93 |     '\uFF1F'  # Fullwidth question mark
 94 |     '\uFF61'  # Halfwidth ideographic full stop
 95 |     '\u3002'  # Ideographic full stop
 96 | )
 97 | 
 98 | 
 99 | def html_escape(text):
100 |     for e in _html_escape:
101 |         text = text.replace(e[0], e[1])
102 |     return text
103 | 
104 | 
105 | def match_chinese(ch):
106 |     if '\u4e00' <= ch <= '\u9fff':
107 |         return True
108 |     if ch in _chinese_stops:
109 |         return True
110 |     return ch in _chinese_non_stops
111 | 
112 | 
113 | def match_emphasis(cls, regexp, line, index):
114 |     match = regexp.match(line, index)
115 |     if not match:
116 |         return None, index
117 | 
118 |     end = match.end()
119 | 
120 |     if index != 0:
121 |         prechar = line[index - 1]
122 |         border = prechar != " " and prechar not in "-({'\""
123 |         if border and not match_chinese(prechar):
124 |             return None, index
125 | 
126 |     if end < len(line):
127 |         endchar = line[end]
128 |         border = endchar != " " and endchar not in "-.,:!?;'\")}["
129 |         if border and not match_chinese(endchar):
130 |             return None, index
131 |     return cls(match[2]), end - 1
132 | 
133 | 
134 | class InlineParser(object):
135 |     def __init__(self, content=""):
136 |         self.content = content
137 |         self.children = []
138 |         self.element = ""
139 | 
140 |     def add_child(self, child):
141 |         self.children.append(child)
142 | 
143 |     def parse_code(self, index, lines):
144 |         return Code.match(lines, index)
145 | 
146 |     def parse_bold(self, index, lines):
147 |         return Bold.match(lines, index)
148 | 
149 |     def parse_italic(self, index, lines):
150 |         return Italic.match(lines, index)
151 | 
152 |     def parse_delete(self, index, lines):
153 |         return Delete.match(lines, index)
154 | 
155 |     def parse_verbatim(self, index, lines):
156 |         return Verbatim.match(lines, index)
157 | 
158 |     def parse_underline(self, index, lines):
159 |         return Underline.match(lines, index)
160 | 
161 |     def parse_percent(self, index, lines):
162 |         return Percent.match(lines, index)
163 | 
164 |     def parse_link(self, index, lines):
165 |         return Link.match(lines, index)
166 | 
167 |     def parse_fn(self, index, lines):
168 |         return Fn.match(lines, index)
169 | 
170 |     def parse_newline(self, index, lines):
171 |         return Newline.match(lines, index)
172 | 
173 |     def parse(self, index, lines):
174 |         chars = (
175 |             ("=", "code"),
176 |             ("`", "code"),
177 |             ("~", "verbatim"),
178 |             ("_", "underline"),
179 |             ("+", "delete"),
180 |             ("/", "italic"),
181 |             ("**", "italic"),
182 |             ("*", "bold"),
183 |             ("[[", "link"),
184 |             ("[", "percent"),
185 |             ("\\", "newline"),
186 |         )
187 |         char_map = dict(chars)
188 |         single_char = lines[index]
189 |         double_char = lines[index:index + 2]
190 |         for char in chars:
191 |             c1 = len(char[0]) == 1 and char[0] == single_char
192 |             c2 = len(char[0]) == 2 and char[0] == double_char
193 | 
194 |             if c1 or c2:
195 |                 node, num = getattr(self, "parse_" + char_map[char[0]])(
196 |                     index, lines)
197 |                 if node:
198 |                     return node, num
199 | 
200 |         if lines[index:index + 3] == "[fn":
201 |             node, num = self.parse_fn(index, lines)
202 |             if node:
203 |                 return node, num
204 | 
205 |         child = self.last_child()
206 |         if child and isinstance(child, Text):
207 |             child.content += single_char
208 |             return None, index
209 |         return Text(single_char), index
210 | 
211 |     def last_child(self):
212 |         if len(self.children) == 0:
213 |             return
214 |         return self.children[-1]
215 | 
216 |     def preparse(self, lines):
217 |         index = 0
218 |         while index < len(lines):
219 |             block, index = self.parse(index, lines)
220 |             index += 1
221 |             if not block:
222 |                 continue
223 |             self.add_child(block)
224 | 
225 |     def to_html(self):
226 |         if len(self.children) == 0 and self.content:
227 |             self.preparse(self.content)
228 | 
229 |         text = "".join([child.to_html() for child in self.children])
230 |         if self.element:
231 |             return self.element.format(text)
232 |         return text
233 | 
234 |     def __str__(self):
235 |         return '{}({})'.format(self.__class__.__name__, self.content.strip())
236 | 
237 |     def __repr__(self):
238 |         return self.__str__()
239 | 
240 | 
241 | class Text(InlineParser):
242 |     def to_html(self):
243 |         return self.content
244 | 
245 | 
246 | class Newline(InlineParser):
247 |     @classmethod
248 |     def match(cls, line, index):
249 |         match = NEWLINE_REGEXP.match(line, index)
250 |         if not match:
251 |             return None, index
252 |         return cls(), match.end() - 1
253 | 
254 |     def to_html(self):
255 |         return "<br/>"
256 | 
257 | 
258 | class Bold(InlineParser):
259 |     def __init__(self, content):
260 |         super(Bold, self).__init__(content)
261 |         self.element = "<b>{0}</b>"
262 | 
263 |     @classmethod
264 |     def match(cls, line, index):
265 |         return match_emphasis(cls, BOLD_REGEXP, line, index)
266 | 
267 | 
268 | class Code(InlineParser):
269 |     def __init__(self, content):
270 |         super(Code, self).__init__(content)
271 |         self.element = "<code>{0}</code>"
272 | 
273 |     @classmethod
274 |     def match(cls, line, index):
275 |         return match_emphasis(cls, CODE_REGEXP, line, index)
276 | 
277 | 
278 | class Italic(InlineParser):
279 |     def __init__(self, content):
280 |         super(Italic, self).__init__(content)
281 |         self.element = "<i>{0}</i>"
282 | 
283 |     @classmethod
284 |     def match(cls, line, index):
285 |         return match_emphasis(cls, ITALIC_REGEXP, line, index)
286 | 
287 | 
288 | class Delete(InlineParser):
289 |     def __init__(self, content):
290 |         super(Delete, self).__init__(content)
291 |         self.element = "<del>{0}</del>"
292 | 
293 |     @classmethod
294 |     def match(cls, line, index):
295 |         return match_emphasis(cls, DELETE_REGEXP, line, index)
296 | 
297 | 
298 | class Verbatim(InlineParser):
299 |     def __init__(self, content):
300 |         super(Verbatim, self).__init__(content)
301 |         self.element = "<code>{0}</code>"
302 | 
303 |     @classmethod
304 |     def match(cls, line, index):
305 |         return match_emphasis(cls, VERBATIM_REGEXP, line, index)
306 | 
307 | 
308 | class Underline(InlineParser):
309 |     def __init__(self, content):
310 |         super(Underline, self).__init__(content)
311 |         self.element = "<span style=\"text-decoration:underline\">{0}</span>"
312 | 
313 |     @classmethod
314 |     def match(cls, line, index):
315 |         return match_emphasis(cls, UNDERLINE_REGEXP, line, index)
316 | 
317 | 
318 | class Percent(InlineParser):
319 |     def __init__(self, content):
320 |         super(Percent, self).__init__(content)
321 |         self.element = "<code>[{0}]</code>"
322 | 
323 |     @classmethod
324 |     def match(cls, line, index):
325 |         match = PERCENT_REGEXP.match(line, index)
326 |         if not match:
327 |             return None, index
328 |         return cls(match[1]), match.end()
329 | 
330 | 
331 | class Link(InlineParser):
332 |     def __init__(self, url, desc=None):
333 |         super(Link, self).__init__(url)
334 |         self.desc = desc
335 | 
336 |     @classmethod
337 |     def match(cls, line, index):
338 |         match = LINK_REGEXP.match(line, index)
339 |         if not match:
340 |             return None, index
341 |         return cls(match[1], match[2]), match.end()
342 | 
343 |     def is_img(self):
344 |         _, ext = os.path.splitext(self.content)
345 |         return not self.desc and IMG_REGEXP.match(ext)
346 | 
347 |     def is_vedio(self):
348 |         _, ext = os.path.splitext(self.content)
349 |         return not self.desc and VIDEO_REGEXP.match(ext)
350 | 
351 |     def to_html(self):
352 |         if self.is_img():
353 |             return "<img src=\"{0}\"/>".format(self.content)
354 |         if self.is_vedio():
355 |             return "<video src=\"{0}\">{0}</video>".format(self.content)
356 |         if self.desc:
357 |             return '<a href="{0}">{1}</a>'.format(self.content, self.desc)
358 |         return '<a href="{0}">{1}</a>'.format(self.content, self.content)
359 | 
360 | 
361 | class Fn(InlineParser):
362 |     def __init__(self, content):
363 |         super(Fn, self).__init__(content)
364 |         self.element = '<sup><a id="fnr:{0}" class="footref" href="#fn.{0}">{0}</a></sup>'
365 | 
366 |     @classmethod
367 |     def match(cls, line, index):
368 |         match = FN_REGEXP.match(line, index)
369 |         if not match:
370 |             return None, index
371 |         return cls(match[3]), match.end()
372 | 
373 |     def to_html(self):
374 |         return self.element.format(self.content)
375 | 
376 | 
377 | class Timestamp(InlineParser):
378 |     def __init__(self, date="", time="", interval=None):
379 |         super(Timestamp, self).__init__()
380 |         self.date = date
381 |         self.time = time
382 |         self.interval = interval
383 | 
384 |     @classmethod
385 |     def match(cls, line, index):
386 |         match = TIMESTAMP_REGEXP.match(line, index)
387 |         if not match:
388 |             return None, index
389 |         return cls(match[1], match[3], match[4]), match.end()
390 | 
391 | 
392 | class Blankline(InlineParser):
393 |     def __init__(self):
394 |         super(Blankline, self).__init__()
395 | 
396 |     @classmethod
397 |     def match(cls, line):
398 |         match = BLANKLINE_REGEXP.match(line)
399 |         if not match:
400 |             return
401 |         return cls()
402 | 
403 |     def to_html(self):
404 |         return ""
405 | 
406 | 
407 | class Hr(InlineParser):
408 |     def __init__(self):
409 |         super(Hr, self).__init__()
410 | 
411 |     @classmethod
412 |     def match(cls, line):
413 |         if HR_REGEXP.match(line):
414 |             return cls()
415 |         return
416 | 
417 |     def to_html(self):
418 |         return ""
419 | 
420 | 
421 | class InlineText(InlineParser):
422 |     def __init__(self, content="", needparse=True, escape=True):
423 |         super(InlineText, self).__init__(content)
424 |         self.needparse = needparse
425 |         self.escape = escape
426 | 
427 |     def to_html(self):
428 |         if self.escape:
429 |             self.content = html_escape(self.content)
430 |         if not self.needparse:
431 |             return self.content
432 |         return super(InlineText, self).to_html()
433 | 


--------------------------------------------------------------------------------
/orgpython/src.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # ********************************************************************************
 4 | # Copyright © 2017-2020 jianglin
 5 | # File Name: src.py
 6 | # Author: jianglin
 7 | # Email: mail@honmaple.com
 8 | # Created: 2018-02-26 12:41:22 (CST)
 9 | # Last Update: Sunday 2020-08-16 19:45:32 (CST)
10 | #          By:
11 | # Description:
12 | # ********************************************************************************
13 | try:
14 |     import pygments
15 |     from pygments import lexers
16 |     from pygments import formatters
17 | except ImportError:
18 |     pygments = None
19 | 
20 | 
21 | def highlight(language, text):
22 |     if pygments is None:
23 |         return text
24 | 
25 |     try:
26 |         lexer = lexers.get_lexer_by_name(language)
27 |     except pygments.util.ClassNotFound:
28 |         lexer = lexers.guess_lexer(text)
29 |     formatter = formatters.HtmlFormatter()
30 |     return pygments.highlight(text, lexer, formatter)
31 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # **************************************************************************
 4 | # Copyright © 2017-2020 jianglin
 5 | # File Name: setup.py
 6 | # Author: jianglin
 7 | # Email: xiyang0807@gmail.com
 8 | # Created: 2017-07-14 22:07:06 (CST)
 9 | # Last Update: Tuesday 2020-08-18 02:27:58 (CST)
10 | #          By:
11 | # Description:
12 | # **************************************************************************
13 | from setuptools import setup
14 | import os
15 | 
16 | 
17 | def read(fname):
18 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
19 | 
20 | 
21 | setup(
22 |     name='org-python',
23 |     version='0.3.2',
24 |     url='https://github.com/honmaple/org-python',
25 |     license='BSD',
26 |     author='honmaple',
27 |     author_email='xiyang0807@gmail.com',
28 |     description='convert orgmode to html based on python.',
29 |     long_description=read('README.rst'),
30 |     packages=['orgpython'],
31 |     zip_safe=False,
32 |     include_package_data=True,
33 |     platforms='any',
34 |     install_requires=[],
35 |     classifiers=[
36 |         'Environment :: Web Environment', 'Intended Audience :: Developers',
37 |         'License :: OSI Approved :: BSD License',
38 |         'Operating System :: OS Independent', 'Programming Language :: Python',
39 |         'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
40 |         'Topic :: Software Development :: Libraries :: Python Modules'
41 |     ])
42 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # **************************************************************************
 4 | # Copyright © 2017 jianglin
 5 | # File Name: test.py
 6 | # Author: jianglin
 7 | # Email: xiyang0807@gmail.com
 8 | # Created: 2017-03-16 16:28:32 (CST)
 9 | # Last Update: Thursday 2020-02-06 14:32:51 (CST)
10 | #          By:
11 | # Description:
12 | # **************************************************************************
13 | import unittest
14 | from orgpython import Block
15 | 
16 | TEXT = '''* Heading1
17 | ** Heading2
18 | *** Heading3.1
19 |     *bold* bold* *bold\* \*bold\* \*bold*
20 |     **italic** italic** **italic\** \**italic\** \**italic**
21 |     =code= code= =code\= \=code\= \=code=
22 |     ~code~ code~ ~code\~ \~code\~ \~cod~
23 | *** Heading3.2
24 |     [[link][url]]
25 | '''
26 | 
27 | 
28 | class TestOrg(unittest.TestCase):
29 |     def test_heading(self):
30 |         text = "* TODO heading  :TAG1:TAG2:"
31 | 
32 |         b = Block(text)
33 |         b.init()
34 |         heading = b.children[0]
35 |         self.assertEqual(heading.title, "heading")
36 |         self.assertEqual(heading.stars, 1)
37 |         self.assertEqual(heading.tags, ["TAG1", "TAG2"])
38 |         self.assertEqual(heading.keyword, "TODO")
39 | 
40 |         text = "* [#B] heading  :TAG1:TAG2:"
41 |         b = Block(text)
42 |         b.init()
43 |         heading = b.children[0]
44 | 
45 |         self.assertEqual(heading.title, "heading")
46 |         self.assertEqual(heading.stars, 1)
47 |         self.assertEqual(heading.tags, ["TAG1", "TAG2"])
48 |         self.assertEqual(heading.keyword, None)
49 |         self.assertEqual(heading.priority, "[#B]")
50 | 
51 |     def test_src(self):
52 |         pass
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------