├── .gitignore ├── LICENSE ├── README.org ├── README.rst ├── orgpython ├── __init__.py ├── document.py ├── inline.py └── src.py ├── setup.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017-2020, honmaple 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | * org-python 2 | An orgmode parser for converting orgmode to html based on python. 3 | 4 | [[https://pypi.python.org/pypi/org-python][https://img.shields.io/badge/pypi-v0.3.2-brightgreen.svg]] 5 | [[https://python.org][https://img.shields.io/badge/python-3-brightgreen.svg]] 6 | [[LICENSE][https://img.shields.io/badge/license-BSD-blue.svg]] 7 | 8 | ** quickstart 9 | #+BEGIN_SRC sh 10 | pip install org-python 11 | #+END_SRC 12 | 13 | #+BEGIN_SRC python 14 | from orgpython import to_html 15 | 16 | text = '''* heading 17 | - list1 18 | - list2 19 | - list3 20 | - list4 21 | - list5 22 | 23 | | th1-1 | th1-2 | th1-3 | 24 | |--------+--------+--------| 25 | | row1-1 | row1-2 | row1-3 | 26 | | row2-1 | row2-2 | row2-3 | 27 | | row3-1 | row3-2 | row3-3 | 28 | ''' 29 | print(to_html(text, toc=True, offset=0, highlight=True)) 30 | #+END_SRC 31 | 32 | ** feature 33 | - [X] toc 34 | - [X] heading 35 | #+BEGIN_EXAMPLE 36 | * headind 1 37 | ** headind 2 38 | *** headind 3 39 | **** headind 4 40 | ***** headind 5 41 | ****** headind 6 42 | #+END_EXAMPLE 43 | - [X] unordered_list 44 | #+BEGIN_EXAMPLE 45 | - list 46 | - list 47 | - list 48 | + list 49 | - list 50 | #+END_EXAMPLE 51 | - [X] ordered_list 52 | #+BEGIN_EXAMPLE 53 | 1. list 54 | 2. list 55 | 3. list 56 | #+END_EXAMPLE 57 | - [X] bold 58 | #+BEGIN_EXAMPLE 59 | *bold* 60 | #+END_EXAMPLE 61 | - [X] italic 62 | #+BEGIN_EXAMPLE 63 | **italic** 64 | #+END_EXAMPLE 65 | - [X] underlined 66 | #+BEGIN_EXAMPLE 67 | _italic_ 68 | #+END_EXAMPLE 69 | - [X] code 70 | #+BEGIN_EXAMPLE 71 | =code= 72 | #+END_EXAMPLE 73 | - [X] delete 74 | #+BEGIN_EXAMPLE 75 | +delete+ 76 | #+END_EXAMPLE 77 | - [X] image 78 | #+BEGIN_EXAMPLE 79 | [[src][alt]] 80 | #+END_EXAMPLE 81 | - [X] link 82 | #+BEGIN_EXAMPLE 83 | [[href][text]] 84 | #+END_EXAMPLE 85 | - [X] begin_example 86 | - [X] begin_src 87 | - [X] begin_quote 88 | - [X] table 89 | #+BEGIN_EXAMPLE 90 | | th1-1 | th1-2 | th1-3 | 91 | |--------+--------+--------| 92 | | row1-1 | row1-2 | row1-3 | 93 | | row2-1 | row2-2 | row2-3 | 94 | | row3-1 | row3-2 | row3-3 | 95 | #+END_EXAMPLE 96 | 97 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | :Author: jianglin 2 | 3 | .. contents:: 4 | 5 | 1 org-python 6 | ------------ 7 | 8 | An orgmode parser for converting orgmode to html based on python. 9 | 10 | .. image:: https://img.shields.io/badge/pypi-v0.3.2-brightgreen.svg 11 | :target: https://pypi.python.org/pypi/org-python 12 | .. image:: https://img.shields.io/badge/python-3-brightgreen.svg 13 | :target: https://python.org 14 | .. image:: https://img.shields.io/badge/license-BSD-blue.svg 15 | :target: LICENSE 16 | 17 | 1.1 quickstart 18 | ~~~~~~~~~~~~~~ 19 | 20 | .. code:: sh 21 | 22 | pip install org-python 23 | 24 | .. code:: python 25 | 26 | from orgpython import to_html 27 | 28 | text = '''* heading 29 | - list1 30 | - list2 31 | - list3 32 | - list4 33 | - list5 34 | 35 | | th1-1 | th1-2 | th1-3 | 36 | |--------+--------+--------| 37 | | row1-1 | row1-2 | row1-3 | 38 | | row2-1 | row2-2 | row2-3 | 39 | | row3-1 | row3-2 | row3-3 | 40 | ''' 41 | print(to_html(text, toc=True, offset=0, highlight=True)) 42 | 43 | 1.2 feature 44 | ~~~~~~~~~~~ 45 | 46 | - ☑ toc 47 | 48 | - ☑ heading 49 | 50 | :: 51 | 52 | * headind 1 53 | ** headind 2 54 | *** headind 3 55 | **** headind 4 56 | ***** headind 5 57 | ****** headind 6 58 | 59 | - ☑ unordered\_list 60 | 61 | :: 62 | 63 | - list 64 | - list 65 | - list 66 | + list 67 | - list 68 | 69 | - ☑ ordered\_list 70 | 71 | :: 72 | 73 | 1. list 74 | 2. list 75 | 3. list 76 | 77 | - ☑ bold 78 | 79 | :: 80 | 81 | *bold* 82 | 83 | - ☑ italic 84 | 85 | :: 86 | 87 | **italic** 88 | 89 | - ☑ underlined 90 | 91 | :: 92 | 93 | _italic_ 94 | 95 | - ☑ code 96 | 97 | :: 98 | 99 | =code= 100 | 101 | - ☑ delete 102 | 103 | :: 104 | 105 | +delete+ 106 | 107 | - ☑ image 108 | 109 | :: 110 | 111 | [[src][alt]] 112 | 113 | - ☑ link 114 | 115 | :: 116 | 117 | [[href][text]] 118 | 119 | - ☑ begin\_example 120 | 121 | - ☑ begin\_src 122 | 123 | - ☑ begin\_quote 124 | 125 | - ☑ table 126 | 127 | :: 128 | 129 | | th1-1 | th1-2 | th1-3 | 130 | |--------+--------+--------| 131 | | row1-1 | row1-2 | row1-3 | 132 | | row2-1 | row2-2 | row2-3 | 133 | | row3-1 | row3-2 | row3-3 | 134 | -------------------------------------------------------------------------------- /orgpython/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ******************************************************************************** 4 | # Copyright © 2017-2020 jianglin 5 | # File Name: __init__.py 6 | # Author: jianglin 7 | # Email: mail@honmaple.com 8 | # Created: 2019-05-29 18:06:22 (CST) 9 | # Last Update: Sunday 2020-08-16 19:45:09 (CST) 10 | # By: 11 | # Description: 12 | # ******************************************************************************** 13 | from .document import Document 14 | 15 | 16 | def to_text(content, **kwargs): 17 | return Document(content, **kwargs).to_text() 18 | 19 | 20 | def to_html(content, **kwargs): 21 | return Document(content, **kwargs).to_html() 22 | 23 | 24 | def to_markdown(content, **kwargs): 25 | return Document(content, **kwargs).to_markdown() 26 | -------------------------------------------------------------------------------- /orgpython/document.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ******************************************************************************** 4 | # Copyright © 2017-2020 jianglin 5 | # File Name: document.py 6 | # Author: jianglin 7 | # Email: mail@honmaple.com 8 | # Created: 2018-02-26 11:44:43 (CST) 9 | # Last Update: Wednesday 2020-08-19 12:00:03 (CST) 10 | # Description: 11 | # ******************************************************************************** 12 | import re 13 | from hashlib import sha1 14 | from textwrap import dedent 15 | 16 | from .inline import Blankline, Hr, InlineText 17 | from .src import highlight as src_highlight 18 | 19 | DRAWER_BEGIN_REGEXP = re.compile(r"^(\s*):(\S+):\s*$") 20 | DRAWER_END_REGEXP = re.compile(r"^(\s*):END:\s*$") 21 | DRAWER_PROPERTY_REGEXP = re.compile(r"^(\s*):(\S+):(\s+(.*)$|$)") 22 | 23 | BLOCK_BEGIN_REGEXP = re.compile(r"(?i)^(\s*)#\+BEGIN_(\w+)(.*)") 24 | BLOCK_END_REGEXP = re.compile(r"(?i)^(\s*)#\+END_(\w+)") 25 | BLOCK_RESULT_REGEXP = re.compile(r"(?i)^(\s*)#\+RESULTS:") 26 | BLOCK_RESULT_CONTENT_REGEXP = re.compile(r"(?:^|\s+):(\s+(.*)|$)") 27 | 28 | TABLE_SEP_REGEXP = re.compile(r"^(\s*)(\|[+-|]*)\s*$") 29 | TABLE_ROW_REGEXP = re.compile(r"^(\s*)(\|.*)") 30 | TABLE_ALIGN_REGEXP = re.compile(r"^<(l|c|r)>$") 31 | 32 | LIST_DESCRIPTIVE_REGEXP = re.compile(r"^(\s*)([+*-])\s+(.*)::(\s|$)") 33 | LIST_UNORDER_REGEXP = re.compile(r"^(\s*)([+*-])(\s+(.*)|$)") 34 | LIST_ORDER_REGEXP = re.compile(r"^(\s*)(([0-9]+|[a-zA-Z])[.)])(\s+(.*)|$)") 35 | LIST_STATUS_REGEXP = re.compile(r"\[( |X|-)\]\s") 36 | LIST_LEVEL_REGEXP = re.compile(r"(\s*)(.+)$") 37 | 38 | HEADLINE_REGEXP = re.compile( 39 | r"^(\*+)(?:\s+(.+?))?(?:\s+\[#(.+)\])?(\s+.*?)(?:\s+:(.+):)?$") 40 | KEYWORD_REGEXP = re.compile(r"^(\s*)#\+([^:]+):(\s+(.*)|$)") 41 | COMMENT_REGEXP = re.compile(r"^(\s*)#(.*)") 42 | ATTRIBUTE_REGEXP = re.compile(r"(?:^|\s+)(:[-\w]+)\s+(.*)$") 43 | 44 | TODO_KEYWORDS = ("DONE", "TODO") 45 | 46 | 47 | def string_split(s, sep): 48 | if not s: 49 | return [] 50 | return s.split(sep) 51 | 52 | 53 | class Parser(object): 54 | def __init__(self, content=""): 55 | self.lines = content.splitlines() 56 | self.level = 0 57 | self.element = "" 58 | self.children = [] 59 | self.escape = True 60 | self.needparse = True 61 | self.parsed_nodes = ( 62 | "blankline", 63 | "headline", 64 | "table", 65 | "list", 66 | "drawer", 67 | "block", 68 | "block_result", 69 | "keyword", 70 | "hr", 71 | ) 72 | 73 | def first_child(self): 74 | if len(self.children) == 0: 75 | return 76 | return self.children[0] 77 | 78 | def last_child(self): 79 | if len(self.children) == 0: 80 | return 81 | return self.children[-1] 82 | 83 | def add_child(self, node): 84 | last = self.last_child() 85 | if self.is_headline(last): 86 | if self.is_properties(node): 87 | last.properties = node 88 | return 89 | 90 | if not self.is_headline(node): 91 | last.add_child(node) 92 | return 93 | 94 | if self.is_headline(node) and node.stars > last.stars: 95 | last.add_child(node) 96 | return 97 | 98 | if self.is_table(last): 99 | if self.is_table(node): 100 | last.add_child(node) 101 | return 102 | 103 | if self.is_list(last): 104 | if self.is_blankline(node): 105 | last.add_child(node) 106 | return 107 | 108 | if node.level > last.level: 109 | last.add_child(node) 110 | return 111 | 112 | if self.is_list(node) and node.level == last.level: 113 | last.add_child(node) 114 | return 115 | 116 | if self.is_keyword(last): 117 | if self.is_table(node): 118 | node.keyword = last 119 | 120 | if self.is_paragraph(last): 121 | if self.is_inlinetext(node): 122 | last.add_child(node) 123 | return 124 | 125 | if self.is_inlinetext(node): 126 | self.children.append(self.paragraph(node)) 127 | return 128 | 129 | self.children.append(node) 130 | 131 | def is_keyword(self, child): 132 | return child and isinstance(child, Keyword) 133 | 134 | def is_headline(self, child): 135 | return child and isinstance(child, Headline) 136 | 137 | def is_list(self, child): 138 | return child and isinstance(child, List) 139 | 140 | def is_table(self, child): 141 | return child and isinstance(child, Table) 142 | 143 | def is_src(self, child): 144 | return child and isinstance(child, (Src, Example)) 145 | 146 | def is_inlinetext(self, child): 147 | return child and isinstance(child, InlineText) 148 | 149 | def is_blankline(self, child): 150 | return child and isinstance(child, Blankline) 151 | 152 | def is_paragraph(self, child): 153 | return child and isinstance(child, Paragraph) 154 | 155 | def is_properties(self, child): 156 | return child and isinstance(child, Properties) 157 | 158 | def inlinetext(self, text): 159 | return InlineText(text, self.needparse, self.escape) 160 | 161 | def paragraph(self, node): 162 | n = Paragraph() 163 | n.add_child(node) 164 | return n 165 | 166 | def _parse_paired(self, cls, index, lines): 167 | node = cls.match(lines[index]) 168 | if not node: 169 | return None, index 170 | 171 | end = len(lines) 172 | num = index + 1 173 | while num < end: 174 | if node.matchend(num, lines): 175 | node.preparse(lines[index + 1:num]) 176 | return node, num 177 | num += 1 178 | return None, index 179 | 180 | def _parse_nopaired(self, cls, index, lines): 181 | node = cls.match(lines[index]) 182 | if not node: 183 | return None, index 184 | 185 | end = len(lines) 186 | num = index + 1 187 | while num < end: 188 | if node.matchend(num, lines): 189 | break 190 | num += 1 191 | node.preparse(lines[index + 1:num]) 192 | return node, num 193 | 194 | def parse_headline(self, index, lines): 195 | return Headline.match(lines[index]), index 196 | 197 | def parse_list(self, index, lines): 198 | return List.match(lines[index]), index 199 | 200 | def parse_table(self, index, lines): 201 | return self._parse_nopaired(Table, index, lines) 202 | 203 | def parse_drawer(self, index, lines): 204 | return self._parse_paired(Drawer, index, lines) 205 | 206 | def parse_block(self, index, lines): 207 | return self._parse_paired(Block, index, lines) 208 | 209 | def parse_block_result(self, index, lines): 210 | return self._parse_paired(BlockResult, index, lines) 211 | 212 | def parse_blankline(self, index, lines): 213 | return Blankline.match(lines[index]), index 214 | 215 | def parse_keyword(self, index, lines): 216 | return Keyword.match(lines[index]), index 217 | 218 | def parse_hr(self, index, lines): 219 | return Hr.match(lines[index]), index 220 | 221 | def parse_inlinetext(self, index, lines): 222 | return self.inlinetext(lines[index]), index 223 | 224 | def parse(self, index, lines): 225 | for b in self.parsed_nodes: 226 | func = "parse_" + b 227 | if not hasattr(self, func): 228 | continue 229 | block, num = getattr(self, func)(index, lines) 230 | if not block: 231 | continue 232 | return block, num 233 | 234 | return self.parse_inlinetext(index, lines) 235 | 236 | def preparse(self, lines): 237 | index = 0 238 | while index < len(lines): 239 | line = lines[index] 240 | node, index = self.parse(index, lines) 241 | if node: 242 | node.level = len(line) - len(line.strip()) 243 | self.add_child(node) 244 | index += 1 245 | 246 | def to_html(self): 247 | if len(self.children) == 0 and len(self.lines) > 0: 248 | self.preparse(self.lines) 249 | 250 | children = [] 251 | for child in self.children: 252 | content = child.to_html() 253 | if not content: 254 | continue 255 | children.append(content) 256 | text = "\n".join(children) 257 | if self.element: 258 | return self.element.format(text) 259 | return text 260 | 261 | def __str__(self): 262 | str_children = [str(child) for child in self.children] 263 | return self.__class__.__name__ + '(' + ','.join(str_children) + ')' 264 | 265 | def __repr__(self): 266 | return self.__str__() 267 | 268 | 269 | class Headline(Parser): 270 | def __init__( 271 | self, 272 | title, 273 | stars=1, 274 | keyword=None, 275 | priority=None, 276 | tags=[], 277 | todo_keywords=TODO_KEYWORDS): 278 | super(Headline, self).__init__() 279 | self.title = title 280 | self.stars = stars 281 | self.keyword = keyword 282 | self.priority = priority 283 | self.tags = tags 284 | self.properties = None 285 | self.todo_keywords = todo_keywords 286 | 287 | @classmethod 288 | def match(cls, line): 289 | match = HEADLINE_REGEXP.match(line) 290 | if not match: 291 | return 292 | 293 | stars = len(match[1]) 294 | keyword = match[2] or "" 295 | priority = match[3] or "" 296 | 297 | if keyword and not priority: 298 | if len(keyword) >= 4 and keyword[0:2] == "[#": 299 | priority = keyword[2:-1] 300 | keyword = "" 301 | 302 | title = keyword + match[4] 303 | keyword = "" 304 | 305 | return cls( 306 | title, 307 | stars, 308 | keyword, 309 | priority, 310 | string_split(match[5], ":"), 311 | ) 312 | 313 | def id(self): 314 | hid = 'org-{0}'.format(sha1(self.title.encode()).hexdigest()[:10]) 315 | if self.properties: 316 | return self.properties.get("CUSTOM_ID", hid) 317 | return hid 318 | 319 | def toc(self): 320 | b = "" 321 | if self.keyword: 322 | b = b + "{0}".format(self.keyword) 323 | if self.priority: 324 | b = b + "{0}".format(self.priority) 325 | 326 | b = b + self.inlinetext(self.title).to_html() 327 | 328 | for tag in self.tags: 329 | b = b + "{0}".format(tag) 330 | return b.strip() 331 | 332 | def to_html(self): 333 | b = "{2}".format( 334 | self.stars, 335 | self.id(), 336 | self.toc(), 337 | ) 338 | return b + super(Headline, self).to_html() 339 | 340 | 341 | class Drawer(Parser): 342 | def __init__(self, name): 343 | super(Drawer, self).__init__() 344 | self.name = name 345 | 346 | @classmethod 347 | def match(cls, line): 348 | match = DRAWER_BEGIN_REGEXP.match(line) 349 | if not match: 350 | return 351 | name = match[2] 352 | if name.upper() == "PROPERTIES": 353 | return Properties(name) 354 | return Drawer(name) 355 | 356 | def matchend(self, index, lines): 357 | return DRAWER_END_REGEXP.match(lines[index]) 358 | 359 | def to_html(self): 360 | return "" 361 | 362 | 363 | class Properties(Drawer): 364 | def __init__(self, name): 365 | super(Properties, self).__init__(name) 366 | self.properties = {} 367 | 368 | def parse(self, index, lines): 369 | match = DRAWER_PROPERTY_REGEXP.match(lines[index]) 370 | if match: 371 | self.properties[match[2].upper()] = match[4] 372 | return None, index 373 | 374 | def get(self, key, default=None): 375 | return self.properties.get(key, default) 376 | 377 | def to_html(self): 378 | return "" 379 | 380 | 381 | class Block(Parser): 382 | def __init__(self, name, params=""): 383 | super(Block, self).__init__() 384 | self.name = name 385 | self.params = params 386 | 387 | @classmethod 388 | def match(cls, line): 389 | match = BLOCK_BEGIN_REGEXP.match(line) 390 | if not match: 391 | return 392 | 393 | name = match[2].lower() 394 | if name == "src": 395 | return Src(*match[3].strip().split(" ", 1)) 396 | if name == "example": 397 | return Example(match[3]) 398 | if name == "center": 399 | return Center(match[3]) 400 | if name == "verse": 401 | return Verse(match[3]) 402 | if name == "quote": 403 | return Quote(match[3]) 404 | if name == "export": 405 | return Export(*match[3].strip().split(" ", 1)) 406 | return cls(name, match[3]) 407 | 408 | def matchend(self, index, lines): 409 | match = BLOCK_END_REGEXP.match(lines[index]) 410 | return match and match[2].lower() == self.name 411 | 412 | 413 | class Center(Block): 414 | def __init__(self, params=""): 415 | super(Center, self).__init__("center", params) 416 | self.element = "
\n{0}\n
" 417 | 418 | 419 | class Verse(Block): 420 | def __init__(self, params=""): 421 | super(Verse, self).__init__("verse", params) 422 | self.element = "

\n{0}\n

" 423 | 424 | def add_child(self, node): 425 | self.children.append(node) 426 | 427 | def to_html(self): 428 | children = [child.to_html() for child in self.children] 429 | return self.element.format("
".join(children)) 430 | 431 | 432 | class Quote(Block): 433 | def __init__(self, params=""): 434 | super(Quote, self).__init__("quote", params) 435 | self.element = "
\n{0}\n
" 436 | 437 | 438 | class Export(Block): 439 | def __init__(self, language="", params=""): 440 | super(Export, self).__init__("export", params) 441 | self.language = language 442 | self.escape = self.language.upper() != "HTML" 443 | self.parsed_nodes = () 444 | 445 | def to_html(self): 446 | if not self.escape: 447 | return super(Export, self).to_html() 448 | return "" 449 | 450 | 451 | class Src(Block): 452 | def __init__(self, language="", params="", highlight=False): 453 | super(Src, self).__init__("src", params) 454 | self.language = language 455 | self.highlight_code = highlight 456 | self.element = "
\n{1}\n
" 457 | self.needparse = False 458 | self.escape = False 459 | self.parsed_nodes = () 460 | 461 | def add_child(self, node): 462 | self.children.append(node) 463 | 464 | def highlight(self, language, text): 465 | return src_highlight(language, text) 466 | 467 | def to_html(self): 468 | text = "\n".join([child.to_html() for child in self.children]) 469 | if self.highlight_code: 470 | return self.highlight(self.language, dedent(text)) 471 | if not self.language: 472 | return "
\n{0}\n
".format(dedent(text)) 473 | return self.element.format(self.language, dedent(text)) 474 | 475 | 476 | class Example(Src): 477 | def __init__(self, params="", highlight=False): 478 | super(Example, self).__init__("example", params, highlight) 479 | self.name = "example" 480 | 481 | 482 | class BlockResult(Parser): 483 | def __init__(self): 484 | super(BlockResult, self).__init__() 485 | self.element = "
\n{0}\n
" 486 | 487 | @classmethod 488 | def match(cls, line): 489 | match = BLOCK_RESULT_REGEXP.match(line) 490 | if not match: 491 | return 492 | return cls() 493 | 494 | def matchend(self, index, lines): 495 | return not BLOCK_RESULT_CONTENT_REGEXP.match(lines[index]) 496 | 497 | def parse(self, index, lines): 498 | match = BLOCK_RESULT_CONTENT_REGEXP.match(lines[index]) 499 | return self.inlinetext(match[2]), index 500 | 501 | 502 | class ListItem(Parser): 503 | def __init__(self, status=None, checkbox="HTML"): 504 | super(ListItem, self).__init__() 505 | self.status = status 506 | self.checkbox = checkbox 507 | self.element = "
  • \n{0}\n
  • " 508 | 509 | @classmethod 510 | def match(cls, line): 511 | status = None 512 | content = line 513 | status_match = LIST_STATUS_REGEXP.match(line) 514 | if status_match: 515 | status, content = status_match[1], content[len("[ ] "):] 516 | 517 | node = cls(status) 518 | node.add_child(node.inlinetext(content)) 519 | return node 520 | 521 | def set_status(self): 522 | if not self.checkbox: 523 | return 524 | 525 | if self.checkbox == "HTML": 526 | if self.status == "X": 527 | node = self.inlinetext( 528 | '') 529 | else: 530 | node = self.inlinetext('') 531 | node.needparse = False 532 | node.escape = False 533 | else: 534 | node = self.inlinetext("=[{0}]=".format(self.status)) 535 | 536 | if not self.children: 537 | self.children.append(node) 538 | return 539 | 540 | self.children[0].children = [node] + self.children[0].children 541 | 542 | def to_html(self): 543 | if self.status is not None: 544 | self.set_status() 545 | return super(ListItem, self).to_html() 546 | 547 | 548 | class DescriptiveItem(ListItem): 549 | def __init__(self, title="", status=""): 550 | super(DescriptiveItem, self).__init__(title, status) 551 | self.element = "
    \n{0}\n
    " 552 | 553 | 554 | class List(Parser): 555 | def __init__(self, items=[]): 556 | super(List, self).__init__() 557 | self.children = items 558 | 559 | @classmethod 560 | def match(cls, line): 561 | match = UnorderList.match(line) 562 | if match: 563 | return match 564 | 565 | match = OrderList.match(line) 566 | if match: 567 | return match 568 | 569 | return Descriptive.match(line) 570 | 571 | def add_child(self, node): 572 | if self.is_list(node) and node.level == self.level: 573 | self.children.append(node.children[0]) 574 | return 575 | last = self.last_child() 576 | last.add_child(node) 577 | 578 | 579 | class Descriptive(List): 580 | def __init__(self, items=[]): 581 | super(Descriptive, self).__init__(items) 582 | self.element = "
    \n{0}\n
    " 583 | 584 | @classmethod 585 | def match(cls, line): 586 | match = LIST_DESCRIPTIVE_REGEXP.match(line) 587 | if not match: 588 | return 589 | title = DescriptiveItem.match(match[3]) 590 | return cls([title]) 591 | 592 | 593 | class UnorderList(List): 594 | def __init__(self, items=[]): 595 | super(UnorderList, self).__init__(items) 596 | self.element = "" 597 | 598 | @classmethod 599 | def match(cls, line): 600 | match = LIST_UNORDER_REGEXP.match(line) 601 | if not match: 602 | return 603 | title = ListItem.match(match[4]) 604 | return cls([title]) 605 | 606 | 607 | class OrderList(List): 608 | def __init__(self, items=[]): 609 | super(OrderList, self).__init__(items) 610 | self.element = "
      \n{0}\n
    " 611 | 612 | @classmethod 613 | def match(cls, line): 614 | match = LIST_ORDER_REGEXP.match(line) 615 | if not match: 616 | return 617 | title = ListItem.match(match[4]) 618 | return cls([title]) 619 | 620 | 621 | class TableColumn(Parser): 622 | def __init__(self, content="", header=False): 623 | super(TableColumn, self).__init__(content) 624 | self.header = header 625 | self.parsed_nodes = () 626 | 627 | def add_child(self, child): 628 | self.children.append(child) 629 | 630 | def reset(self): 631 | self.header = True 632 | 633 | def to_html(self): 634 | self.element = "{0}" if self.header else "{0}" 635 | return super(TableColumn, self).to_html() 636 | 637 | 638 | class TableRow(Parser): 639 | def __init__(self, header=False): 640 | super(TableRow, self).__init__() 641 | self.is_sep = False 642 | self.header = header 643 | self.element = "\n{0}\n" 644 | self.parsed_nodes = ("tablecolumn", ) 645 | 646 | @classmethod 647 | def match(cls, line): 648 | match = TABLE_ROW_REGEXP.match(line) 649 | if not match: 650 | return 651 | 652 | row = cls() 653 | row.is_sep = bool(TABLE_SEP_REGEXP.match(line)) 654 | row.preparse(match[2].strip("|").split("|")) 655 | return row 656 | 657 | def add_child(self, child): 658 | self.children.append(child) 659 | 660 | def parse_tablecolumn(self, index, lines): 661 | return TableColumn(lines[index].strip(), self.header), index 662 | 663 | def reset(self): 664 | self.header = True 665 | for column in self.children: 666 | column.reset() 667 | 668 | 669 | class Table(Parser): 670 | def __init__(self, keyword=None): 671 | super(Table, self).__init__() 672 | self.element = "\n{0}\n
    " 673 | self.keyword = keyword 674 | self.parsed_nodes = ("tablerow", ) 675 | 676 | @classmethod 677 | def match(cls, line): 678 | row = TableRow.match(line) 679 | if not row: 680 | return 681 | 682 | table = cls() 683 | if row.is_sep: 684 | return table 685 | table.add_child(row) 686 | return table 687 | 688 | def matchend(self, index, lines): 689 | return not TABLE_ROW_REGEXP.match(lines[index]) 690 | 691 | def reset(self): 692 | first = self.first_child() 693 | if first and first.header: 694 | return 695 | for row in self.children: 696 | row.reset() 697 | 698 | def add_child(self, child): 699 | if child.is_sep: 700 | return self.reset() 701 | self.children.append(child) 702 | 703 | def parse_tablerow(self, index, lines): 704 | return TableRow.match(lines[index]), index 705 | 706 | 707 | class Keyword(Parser): 708 | def __init__(self, key, value=""): 709 | super(Keyword, self).__init__() 710 | self.key = key 711 | self.value = value 712 | 713 | def options(self): 714 | results = {} 715 | for line in self.value.split(" "): 716 | if not line: 717 | continue 718 | m = line.split(":", 1) 719 | k = m[0] 720 | if not k: 721 | continue 722 | results[k] = "" if len(m) == 1 else m[1] 723 | return results 724 | 725 | def properties(self): 726 | results = {} 727 | line = self.value.strip() 728 | if not line: 729 | return results 730 | m = line.split(" ", 1) 731 | k = m[0] 732 | if not k: 733 | return results 734 | results[k] = "" if len(m) == 1 else m[1] 735 | return results 736 | 737 | @classmethod 738 | def match(cls, line): 739 | match = KEYWORD_REGEXP.match(line) 740 | if not match: 741 | return 742 | return cls(match[2], match[4]) 743 | 744 | def to_html(self): 745 | return "" 746 | 747 | 748 | class Paragraph(Parser): 749 | def __init__(self, content=""): 750 | super(Paragraph, self).__init__(content) 751 | self.element = "

    \n{0}\n

    " 752 | self.parsed_nodes = () 753 | 754 | def add_child(self, node): 755 | self.children.append(node) 756 | 757 | 758 | class Section(Parser): 759 | def __init__(self, headline): 760 | super(Section, self).__init__() 761 | self.headline = headline 762 | 763 | @property 764 | def stars(self): 765 | return self.headline.stars 766 | 767 | def add_child(self, node): 768 | last = self.last_child() 769 | if not last: 770 | self.children.append(node) 771 | return 772 | 773 | if node.stars > last.stars: 774 | last.add_child(node) 775 | return 776 | self.children.append(node) 777 | 778 | def to_html(self): 779 | text = "
  • " 780 | text += "{1}".format( 781 | self.headline.id(), 782 | self.headline.toc(), 783 | ) 784 | if not self.children: 785 | return text + "
  • " 786 | 787 | text += "\n\n".format( 788 | "\n".join([child.to_html() for child in self.children])) 789 | return text 790 | 791 | 792 | class Toc(Parser): 793 | def __init__(self): 794 | super(Toc, self).__init__() 795 | self.element = ( 796 | '
    ' 797 | '

    Table of Contents

    ' 798 | '
    ' 799 | '\n\n
    ') 800 | 801 | def add_child(self, node): 802 | last = self.last_child() 803 | if not last: 804 | self.children.append(node) 805 | return 806 | 807 | if node.stars > last.stars: 808 | last.add_child(node) 809 | return 810 | 811 | if node.stars < last.stars: 812 | last.add_child(node) 813 | return 814 | 815 | self.children.append(node) 816 | 817 | def to_html(self): 818 | if not self.children: 819 | return "" 820 | return super(Toc, self).to_html() 821 | 822 | 823 | class Document(Parser): 824 | def __init__(self, content, offset=0, highlight=False, **options): 825 | super(Document, self).__init__(content) 826 | self.offset = offset 827 | self.highlight = highlight 828 | self.options = options 829 | self.properties = {} 830 | self.toc = Toc() 831 | 832 | def _is_true(self, value): 833 | return value in ("true", "t", "1", True, 1) 834 | 835 | def section(self, node): 836 | return Section(node) 837 | 838 | def parse_keyword(self, index, lines): 839 | block, index = super(Document, self).parse_keyword(index, lines) 840 | if not block: 841 | return block, index 842 | 843 | if block.key == "OPTIONS": 844 | self.options.update(**block.options()) 845 | elif block.key == "PROPERTY": 846 | self.properties.update(**block.properties()) 847 | else: 848 | self.properties[block.key] = block.value 849 | return block, index 850 | 851 | def parse_headline(self, index, lines): 852 | block, index = super(Document, self).parse_headline(index, lines) 853 | if not block: 854 | return block, index 855 | block.stars = block.stars + self.offset 856 | 857 | todo_keywords = self.properties.get("TODO") 858 | if todo_keywords: 859 | block.todo_keywords = todo_keywords.split(" ") 860 | s = block.title.split(" ", 1) 861 | if len(s) > 1 and s[0] in block.todo_keywords: 862 | block.keyword = s[0] 863 | block.title = s[1] 864 | self.toc.add_child(self.section(block)) 865 | return block, index 866 | 867 | def parse_block(self, index, lines): 868 | block, index = super(Document, self).parse_block(index, lines) 869 | if not block: 870 | return block, index 871 | if self.is_src(block): 872 | block.highlight_code = self.highlight 873 | return block, index 874 | 875 | def to_html(self): 876 | text = super(Document, self).to_html() 877 | if self._is_true(self.options.get("toc")): 878 | return self.toc.to_html() + "\n" + text 879 | return text 880 | -------------------------------------------------------------------------------- /orgpython/inline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ******************************************************************************** 4 | # Copyright © 2017-2020 jianglin 5 | # File Name: inline.py 6 | # Author: jianglin 7 | # Email: mail@honmaple.com 8 | # Created: 2018-02-26 11:41:22 (CST) 9 | # Last Update: Tuesday 2020-08-18 17:21:40 (CST) 10 | # By: 11 | # Description: 12 | # ******************************************************************************** 13 | import re 14 | import os 15 | 16 | # _inline_regexp = r"(^|.*?(?") 39 | 40 | _html_escape = ( 41 | ("&", "&"), 42 | ("'", "'"), 43 | ("<", "<"), 44 | (">", ">"), 45 | ("\"", """), 46 | ) 47 | 48 | # https://github.com/tsroten/zhon/blob/develop/zhon/hanzi.py 49 | _chinese_non_stops = ( 50 | # Fullwidth ASCII variants 51 | '\uFF02\uFF03\uFF04\uFF05\uFF06\uFF07\uFF08\uFF09\uFF0A\uFF0B\uFF0C\uFF0D' 52 | '\uFF0F\uFF1A\uFF1B\uFF1C\uFF1D\uFF1E\uFF20\uFF3B\uFF3C\uFF3D\uFF3E\uFF3F' 53 | '\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF60' 54 | 55 | # Halfwidth CJK punctuation 56 | '\uFF62\uFF63\uFF64' 57 | 58 | # CJK symbols and punctuation 59 | '\u3000\u3001\u3003' 60 | 61 | # CJK angle and corner brackets 62 | '\u3008\u3009\u300A\u300B\u300C\u300D\u300E\u300F\u3010\u3011' 63 | 64 | # CJK brackets and symbols/punctuation 65 | '\u3014\u3015\u3016\u3017\u3018\u3019\u301A\u301B\u301C\u301D\u301E\u301F' 66 | 67 | # Other CJK symbols 68 | '\u3030' 69 | 70 | # Special CJK indicators 71 | '\u303E\u303F' 72 | 73 | # Dashes 74 | '\u2013\u2014' 75 | 76 | # Quotation marks and apostrophe 77 | '\u2018\u2019\u201B\u201C\u201D\u201E\u201F' 78 | 79 | # General punctuation 80 | '\u2026\u2027' 81 | 82 | # Overscores and underscores 83 | '\uFE4F' 84 | 85 | # Small form variants 86 | '\uFE51\uFE54' 87 | 88 | # Latin punctuation 89 | '\u00B7') 90 | 91 | _chinese_stops = ( 92 | '\uFF01' # Fullwidth exclamation mark 93 | '\uFF1F' # Fullwidth question mark 94 | '\uFF61' # Halfwidth ideographic full stop 95 | '\u3002' # Ideographic full stop 96 | ) 97 | 98 | 99 | def html_escape(text): 100 | for e in _html_escape: 101 | text = text.replace(e[0], e[1]) 102 | return text 103 | 104 | 105 | def match_chinese(ch): 106 | if '\u4e00' <= ch <= '\u9fff': 107 | return True 108 | if ch in _chinese_stops: 109 | return True 110 | return ch in _chinese_non_stops 111 | 112 | 113 | def match_emphasis(cls, regexp, line, index): 114 | match = regexp.match(line, index) 115 | if not match: 116 | return None, index 117 | 118 | end = match.end() 119 | 120 | if index != 0: 121 | prechar = line[index - 1] 122 | border = prechar != " " and prechar not in "-({'\"" 123 | if border and not match_chinese(prechar): 124 | return None, index 125 | 126 | if end < len(line): 127 | endchar = line[end] 128 | border = endchar != " " and endchar not in "-.,:!?;'\")}[" 129 | if border and not match_chinese(endchar): 130 | return None, index 131 | return cls(match[2]), end - 1 132 | 133 | 134 | class InlineParser(object): 135 | def __init__(self, content=""): 136 | self.content = content 137 | self.children = [] 138 | self.element = "" 139 | 140 | def add_child(self, child): 141 | self.children.append(child) 142 | 143 | def parse_code(self, index, lines): 144 | return Code.match(lines, index) 145 | 146 | def parse_bold(self, index, lines): 147 | return Bold.match(lines, index) 148 | 149 | def parse_italic(self, index, lines): 150 | return Italic.match(lines, index) 151 | 152 | def parse_delete(self, index, lines): 153 | return Delete.match(lines, index) 154 | 155 | def parse_verbatim(self, index, lines): 156 | return Verbatim.match(lines, index) 157 | 158 | def parse_underline(self, index, lines): 159 | return Underline.match(lines, index) 160 | 161 | def parse_percent(self, index, lines): 162 | return Percent.match(lines, index) 163 | 164 | def parse_link(self, index, lines): 165 | return Link.match(lines, index) 166 | 167 | def parse_fn(self, index, lines): 168 | return Fn.match(lines, index) 169 | 170 | def parse_newline(self, index, lines): 171 | return Newline.match(lines, index) 172 | 173 | def parse(self, index, lines): 174 | chars = ( 175 | ("=", "code"), 176 | ("`", "code"), 177 | ("~", "verbatim"), 178 | ("_", "underline"), 179 | ("+", "delete"), 180 | ("/", "italic"), 181 | ("**", "italic"), 182 | ("*", "bold"), 183 | ("[[", "link"), 184 | ("[", "percent"), 185 | ("\\", "newline"), 186 | ) 187 | char_map = dict(chars) 188 | single_char = lines[index] 189 | double_char = lines[index:index + 2] 190 | for char in chars: 191 | c1 = len(char[0]) == 1 and char[0] == single_char 192 | c2 = len(char[0]) == 2 and char[0] == double_char 193 | 194 | if c1 or c2: 195 | node, num = getattr(self, "parse_" + char_map[char[0]])( 196 | index, lines) 197 | if node: 198 | return node, num 199 | 200 | if lines[index:index + 3] == "[fn": 201 | node, num = self.parse_fn(index, lines) 202 | if node: 203 | return node, num 204 | 205 | child = self.last_child() 206 | if child and isinstance(child, Text): 207 | child.content += single_char 208 | return None, index 209 | return Text(single_char), index 210 | 211 | def last_child(self): 212 | if len(self.children) == 0: 213 | return 214 | return self.children[-1] 215 | 216 | def preparse(self, lines): 217 | index = 0 218 | while index < len(lines): 219 | block, index = self.parse(index, lines) 220 | index += 1 221 | if not block: 222 | continue 223 | self.add_child(block) 224 | 225 | def to_html(self): 226 | if len(self.children) == 0 and self.content: 227 | self.preparse(self.content) 228 | 229 | text = "".join([child.to_html() for child in self.children]) 230 | if self.element: 231 | return self.element.format(text) 232 | return text 233 | 234 | def __str__(self): 235 | return '{}({})'.format(self.__class__.__name__, self.content.strip()) 236 | 237 | def __repr__(self): 238 | return self.__str__() 239 | 240 | 241 | class Text(InlineParser): 242 | def to_html(self): 243 | return self.content 244 | 245 | 246 | class Newline(InlineParser): 247 | @classmethod 248 | def match(cls, line, index): 249 | match = NEWLINE_REGEXP.match(line, index) 250 | if not match: 251 | return None, index 252 | return cls(), match.end() - 1 253 | 254 | def to_html(self): 255 | return "
    " 256 | 257 | 258 | class Bold(InlineParser): 259 | def __init__(self, content): 260 | super(Bold, self).__init__(content) 261 | self.element = "{0}" 262 | 263 | @classmethod 264 | def match(cls, line, index): 265 | return match_emphasis(cls, BOLD_REGEXP, line, index) 266 | 267 | 268 | class Code(InlineParser): 269 | def __init__(self, content): 270 | super(Code, self).__init__(content) 271 | self.element = "{0}" 272 | 273 | @classmethod 274 | def match(cls, line, index): 275 | return match_emphasis(cls, CODE_REGEXP, line, index) 276 | 277 | 278 | class Italic(InlineParser): 279 | def __init__(self, content): 280 | super(Italic, self).__init__(content) 281 | self.element = "{0}" 282 | 283 | @classmethod 284 | def match(cls, line, index): 285 | return match_emphasis(cls, ITALIC_REGEXP, line, index) 286 | 287 | 288 | class Delete(InlineParser): 289 | def __init__(self, content): 290 | super(Delete, self).__init__(content) 291 | self.element = "{0}" 292 | 293 | @classmethod 294 | def match(cls, line, index): 295 | return match_emphasis(cls, DELETE_REGEXP, line, index) 296 | 297 | 298 | class Verbatim(InlineParser): 299 | def __init__(self, content): 300 | super(Verbatim, self).__init__(content) 301 | self.element = "{0}" 302 | 303 | @classmethod 304 | def match(cls, line, index): 305 | return match_emphasis(cls, VERBATIM_REGEXP, line, index) 306 | 307 | 308 | class Underline(InlineParser): 309 | def __init__(self, content): 310 | super(Underline, self).__init__(content) 311 | self.element = "{0}" 312 | 313 | @classmethod 314 | def match(cls, line, index): 315 | return match_emphasis(cls, UNDERLINE_REGEXP, line, index) 316 | 317 | 318 | class Percent(InlineParser): 319 | def __init__(self, content): 320 | super(Percent, self).__init__(content) 321 | self.element = "[{0}]" 322 | 323 | @classmethod 324 | def match(cls, line, index): 325 | match = PERCENT_REGEXP.match(line, index) 326 | if not match: 327 | return None, index 328 | return cls(match[1]), match.end() 329 | 330 | 331 | class Link(InlineParser): 332 | def __init__(self, url, desc=None): 333 | super(Link, self).__init__(url) 334 | self.desc = desc 335 | 336 | @classmethod 337 | def match(cls, line, index): 338 | match = LINK_REGEXP.match(line, index) 339 | if not match: 340 | return None, index 341 | return cls(match[1], match[2]), match.end() 342 | 343 | def is_img(self): 344 | _, ext = os.path.splitext(self.content) 345 | return not self.desc and IMG_REGEXP.match(ext) 346 | 347 | def is_vedio(self): 348 | _, ext = os.path.splitext(self.content) 349 | return not self.desc and VIDEO_REGEXP.match(ext) 350 | 351 | def to_html(self): 352 | if self.is_img(): 353 | return "".format(self.content) 354 | if self.is_vedio(): 355 | return "".format(self.content) 356 | if self.desc: 357 | return '{1}'.format(self.content, self.desc) 358 | return '{1}'.format(self.content, self.content) 359 | 360 | 361 | class Fn(InlineParser): 362 | def __init__(self, content): 363 | super(Fn, self).__init__(content) 364 | self.element = '{0}' 365 | 366 | @classmethod 367 | def match(cls, line, index): 368 | match = FN_REGEXP.match(line, index) 369 | if not match: 370 | return None, index 371 | return cls(match[3]), match.end() 372 | 373 | def to_html(self): 374 | return self.element.format(self.content) 375 | 376 | 377 | class Timestamp(InlineParser): 378 | def __init__(self, date="", time="", interval=None): 379 | super(Timestamp, self).__init__() 380 | self.date = date 381 | self.time = time 382 | self.interval = interval 383 | 384 | @classmethod 385 | def match(cls, line, index): 386 | match = TIMESTAMP_REGEXP.match(line, index) 387 | if not match: 388 | return None, index 389 | return cls(match[1], match[3], match[4]), match.end() 390 | 391 | 392 | class Blankline(InlineParser): 393 | def __init__(self): 394 | super(Blankline, self).__init__() 395 | 396 | @classmethod 397 | def match(cls, line): 398 | match = BLANKLINE_REGEXP.match(line) 399 | if not match: 400 | return 401 | return cls() 402 | 403 | def to_html(self): 404 | return "" 405 | 406 | 407 | class Hr(InlineParser): 408 | def __init__(self): 409 | super(Hr, self).__init__() 410 | 411 | @classmethod 412 | def match(cls, line): 413 | if HR_REGEXP.match(line): 414 | return cls() 415 | return 416 | 417 | def to_html(self): 418 | return "" 419 | 420 | 421 | class InlineText(InlineParser): 422 | def __init__(self, content="", needparse=True, escape=True): 423 | super(InlineText, self).__init__(content) 424 | self.needparse = needparse 425 | self.escape = escape 426 | 427 | def to_html(self): 428 | if self.escape: 429 | self.content = html_escape(self.content) 430 | if not self.needparse: 431 | return self.content 432 | return super(InlineText, self).to_html() 433 | -------------------------------------------------------------------------------- /orgpython/src.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ******************************************************************************** 4 | # Copyright © 2017-2020 jianglin 5 | # File Name: src.py 6 | # Author: jianglin 7 | # Email: mail@honmaple.com 8 | # Created: 2018-02-26 12:41:22 (CST) 9 | # Last Update: Sunday 2020-08-16 19:45:32 (CST) 10 | # By: 11 | # Description: 12 | # ******************************************************************************** 13 | try: 14 | import pygments 15 | from pygments import lexers 16 | from pygments import formatters 17 | except ImportError: 18 | pygments = None 19 | 20 | 21 | def highlight(language, text): 22 | if pygments is None: 23 | return text 24 | 25 | try: 26 | lexer = lexers.get_lexer_by_name(language) 27 | except pygments.util.ClassNotFound: 28 | lexer = lexers.guess_lexer(text) 29 | formatter = formatters.HtmlFormatter() 30 | return pygments.highlight(text, lexer, formatter) 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ************************************************************************** 4 | # Copyright © 2017-2020 jianglin 5 | # File Name: setup.py 6 | # Author: jianglin 7 | # Email: xiyang0807@gmail.com 8 | # Created: 2017-07-14 22:07:06 (CST) 9 | # Last Update: Tuesday 2020-08-18 02:27:58 (CST) 10 | # By: 11 | # Description: 12 | # ************************************************************************** 13 | from setuptools import setup 14 | import os 15 | 16 | 17 | def read(fname): 18 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 19 | 20 | 21 | setup( 22 | name='org-python', 23 | version='0.3.2', 24 | url='https://github.com/honmaple/org-python', 25 | license='BSD', 26 | author='honmaple', 27 | author_email='xiyang0807@gmail.com', 28 | description='convert orgmode to html based on python.', 29 | long_description=read('README.rst'), 30 | packages=['orgpython'], 31 | zip_safe=False, 32 | include_package_data=True, 33 | platforms='any', 34 | install_requires=[], 35 | classifiers=[ 36 | 'Environment :: Web Environment', 'Intended Audience :: Developers', 37 | 'License :: OSI Approved :: BSD License', 38 | 'Operating System :: OS Independent', 'Programming Language :: Python', 39 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 40 | 'Topic :: Software Development :: Libraries :: Python Modules' 41 | ]) 42 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ************************************************************************** 4 | # Copyright © 2017 jianglin 5 | # File Name: test.py 6 | # Author: jianglin 7 | # Email: xiyang0807@gmail.com 8 | # Created: 2017-03-16 16:28:32 (CST) 9 | # Last Update: Thursday 2020-02-06 14:32:51 (CST) 10 | # By: 11 | # Description: 12 | # ************************************************************************** 13 | import unittest 14 | from orgpython import Block 15 | 16 | TEXT = '''* Heading1 17 | ** Heading2 18 | *** Heading3.1 19 | *bold* bold* *bold\* \*bold\* \*bold* 20 | **italic** italic** **italic\** \**italic\** \**italic** 21 | =code= code= =code\= \=code\= \=code= 22 | ~code~ code~ ~code\~ \~code\~ \~cod~ 23 | *** Heading3.2 24 | [[link][url]] 25 | ''' 26 | 27 | 28 | class TestOrg(unittest.TestCase): 29 | def test_heading(self): 30 | text = "* TODO heading :TAG1:TAG2:" 31 | 32 | b = Block(text) 33 | b.init() 34 | heading = b.children[0] 35 | self.assertEqual(heading.title, "heading") 36 | self.assertEqual(heading.stars, 1) 37 | self.assertEqual(heading.tags, ["TAG1", "TAG2"]) 38 | self.assertEqual(heading.keyword, "TODO") 39 | 40 | text = "* [#B] heading :TAG1:TAG2:" 41 | b = Block(text) 42 | b.init() 43 | heading = b.children[0] 44 | 45 | self.assertEqual(heading.title, "heading") 46 | self.assertEqual(heading.stars, 1) 47 | self.assertEqual(heading.tags, ["TAG1", "TAG2"]) 48 | self.assertEqual(heading.keyword, None) 49 | self.assertEqual(heading.priority, "[#B]") 50 | 51 | def test_src(self): 52 | pass 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | --------------------------------------------------------------------------------