├── .gitignore ├── LICENSE.txt ├── README.md ├── pangloss ├── __init__.py ├── backend.py ├── config.py ├── pangloss.py └── util.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | pangloss.egg-info/ 4 | *__pycache__* 5 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Daman Morris 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pangloss 2 | 3 | Provides support for interlinear glosses with Markdown example lists. 4 | 5 | ## Example 6 | 7 | The following code snippet demonstrates the most important features of 8 | pangloss: 9 | 10 | ``` 11 | As you can see in the following examples, pangloss is really easy to use: 12 | 13 | (@) Jorge llama a Maria. 14 | George calls-3s.PRES.IND to Maria 15 | 'George calls Maria.' 16 | (@) Aussi, vous pouvez avoir de multiples exemples. 17 | also you can-2p.PRES.IND have.INF of multiple-PL example-PL 18 | 'You can also have multiple examples.' {#ex:french} 19 | 20 | You can even refer to examples, as in @ex:french. 21 | ``` 22 | 23 | Each example consists of three lines: an original, a word-by-word analysis, and 24 | an overall translation. Placing `{#ex:...}` after the translation line 25 | introduces a new label, which can then be referred to with the `@ex:...` 26 | syntax as in [pandoc-crossref](https://github.com/lierdakil/pandoc-crossref). 27 | Similar customization of labels and more advanced references are coming soon. 28 | 29 | ## Installation 30 | 31 | Install with: 32 | 33 | ``` 34 | pip install -U pangloss 35 | ``` 36 | 37 | Use with: 38 | 39 | ``` 40 | pandoc in.md -F pangloss -o out.{pdf,html} 41 | ``` 42 | -------------------------------------------------------------------------------- /pangloss/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.2.0' 2 | -------------------------------------------------------------------------------- /pangloss/backend.py: -------------------------------------------------------------------------------- 1 | import re 2 | import panflute as pf 3 | from functools import partial 4 | 5 | from pangloss.util import smallcapify, break_plain 6 | 7 | # regular expression for label formats 8 | label_re = re.compile(r'\{#ex:(\w+)\}') 9 | 10 | 11 | gb4e_fmt_labelled = """ 12 | \\ex\\label{{ex:{label}}} 13 | \\gll {} \\\\ 14 | {} \\\\ 15 | \\trans `{}' \\\\ 16 | """ 17 | 18 | gb4e_fmt = """ 19 | \\ex 20 | \\gll {} \\\\ 21 | {} \\\\ 22 | \\trans `{}' \\\\ 23 | """ 24 | 25 | def gb4e(lst): 26 | """ 27 | Convert an example list into a series of gb4e-formatted interlinear 28 | glosses. 29 | 30 | Because example list references are replaced at parsing by Pandoc, the 31 | normal syntax of (@foo) cannot be used for labels; instead, a label syntax 32 | similar to that used for headers (and tables and figures with 33 | pandoc-crossref) is used, namely a {#ex:foo} inserted after the 34 | translation, which will be stripped and replaced with a LaTeX label on the 35 | relevant example. 36 | """ 37 | 38 | latex = "\\begin{exe}\n" 39 | for li in lst.content: 40 | lines = break_plain(li.content[0]) 41 | if len(lines) != 3: continue 42 | 43 | orig, gloss, trans = map(partial(pf.stringify, newlines=False), lines) 44 | gloss = smallcapify(gloss) 45 | 46 | label_match = label_re.search(trans) 47 | if label_match: 48 | label = label_match.group(1) 49 | trans = trans[:label_match.start() - 1] 50 | 51 | latex += gb4e_fmt_labelled.format(orig, gloss, trans, label=label) 52 | else: 53 | latex += gb4e_fmt.format(orig, gloss, trans) 54 | 55 | latex += "\\end{exe}" 56 | return pf.RawBlock(latex, format='latex') 57 | 58 | 59 | leipzigjs_fmt = """ 60 |
61 |

{}

62 |

{}

63 |

‘{}’

64 |
65 | """ 66 | 67 | def leipzigjs(lst): 68 | """ 69 | Convert an example list into a series of div's suitable for use with 70 | Leipzig.js. 71 | """ 72 | 73 | html = '' 74 | for li in lst.content: 75 | lines = break_plain(li.content[0]) 76 | if len(lines) != 3: continue 77 | 78 | orig, gloss, trans = map(partial(pf.stringify, newlines=False), lines) 79 | html += leipzigjs_fmt.format(orig, gloss, trans) 80 | 81 | return pf.RawBlock(html, format='html') 82 | 83 | 84 | # available formats and backends 85 | formats = { 86 | 'latex': { 87 | 'gb4e': gb4e 88 | }, 89 | 'html': { 90 | 'leipzigjs': leipzigjs 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /pangloss/config.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from copy import deepcopy 3 | 4 | from panflute.elements import MetaString, MetaBool, MetaList 5 | from pangloss.util import read_config 6 | 7 | # default values for pangloss-specific settings 8 | defaults = { 9 | 'chapters': MetaBool(False), 10 | 'chaptersDepth': MetaString('1'), 11 | 'exampleLabelFormat': MetaString('({})'), 12 | 'exampleRefFormat': MetaList( 13 | MetaString('ex. {}'), 14 | MetaString('exs. {}') 15 | ), 16 | 'rangeDelim': MetaString('-'), 17 | 'pairDelim': MetaString(','), 18 | 'lastDelim': MetaString(','), 19 | 'refDelim': MetaString(','), 20 | 'linkReferences': MetaBool(False), 21 | 'latexBackend': MetaString('gb4e'), 22 | 'htmlBackend': MetaString('leipzigjs') 23 | } 24 | 25 | # names of all pangloss settings 26 | settings = list(defaults.keys()) 27 | local_config_setting = 'glossConfig' 28 | 29 | # base directory for global config files 30 | basedir = path.expanduser('~/.pangloss') 31 | 32 | def merge_settings(doc): 33 | """ 34 | Merge external settings into a document. 35 | 36 | Arguments: 37 | doc -- the document to merge into 38 | """ 39 | 40 | extern = get_settings(doc, internal=False) 41 | for key, val in extern.items(): 42 | if not (key in doc.metadata): 43 | doc.metadata[key] = val 44 | 45 | def get_settings(doc=None, internal=True): 46 | """ 47 | Get the actual values of all pangloss-relevant settings. 48 | 49 | Arguments: 50 | doc -- the current document (default: None) 51 | internal -- whether to include document metadata (default: True) 52 | 53 | If no document is given, only global settings will be considered. 54 | """ 55 | config = deepcopy(defaults) 56 | 57 | # merge global configuration 58 | merge(config, global_config()) 59 | 60 | if not (doc is None): 61 | # merge format-specific global configuration 62 | merge(config, global_config(doc.format)) 63 | 64 | # merge local configuration 65 | if local_config_setting in doc.metadata: 66 | local_config = doc.get_metadata(local_config_setting) 67 | else: 68 | local_config = path.abspath('./pangloss.yaml') 69 | 70 | try: 71 | merge(config, read_config(local_config)) 72 | except IOError: 73 | pass 74 | 75 | # merge document metadata 76 | if internal: merge(config, doc.metadata) 77 | 78 | return config 79 | 80 | 81 | def merge(low, high): 82 | """ 83 | Merge two configuration sources. 84 | 85 | Arguments: 86 | low -- the source with lower precedence 87 | high -- the source with higher precedence 88 | 89 | Returns: a merged configuration 90 | """ 91 | 92 | # bail if merging to an empty higher source 93 | if high == {}: return low 94 | 95 | merged = {} 96 | for key, val in low.items(): 97 | merged[key] = high[key] if key in high else val 98 | 99 | return merged 100 | 101 | 102 | def global_config(fmt=None): 103 | """ 104 | Get the global configuration data, optionally for a particular format. 105 | 106 | Arguments: 107 | fmt -- the format to get a config file for (default: None) 108 | 109 | Returns: the contents of the specified config file, or {} if it does not 110 | exist 111 | """ 112 | 113 | if fmt is None: 114 | fn = path.join(basedir, 'config.yaml') 115 | else: 116 | fn = path.join(basedir, 'config-{}.yaml'.format(fmt)) 117 | 118 | try: 119 | config = read_config(fn) 120 | return config 121 | except IOError: 122 | return {} 123 | -------------------------------------------------------------------------------- /pangloss/pangloss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import panflute as pf 4 | 5 | from pangloss.config import merge_settings 6 | from pangloss.backend import formats 7 | 8 | def gloss(elem, doc): 9 | if isinstance(elem, pf.OrderedList): 10 | if elem.style == 'Example': 11 | if doc.format in formats: 12 | backend = doc.get_metadata(doc.format + 'Backend') 13 | if backend in formats[doc.format]: 14 | return formats[doc.format][backend](elem) 15 | else: 16 | return None 17 | 18 | def gloss_refs(elem, doc): 19 | if isinstance(elem, pf.Cite): 20 | text = elem.content[0].text 21 | if text[:4] == '@ex:': 22 | if doc.format == 'latex': 23 | ref = "\\ref{ex:" + text[4:] + "}" 24 | 25 | fmt = doc.get_metadata('exampleRefFormat') 26 | pf.debug(fmt) 27 | if isinstance(fmt, list): 28 | ref = (fmt[0]).format(ref) 29 | else: 30 | ref = fmt.format(ref) 31 | 32 | return pf.RawInline(ref, format = 'latex') 33 | elif doc.format == 'html': 34 | # TODO 35 | pass 36 | 37 | def main(): 38 | doc = pf.load(input_stream=sys.stdin) 39 | merge_settings(doc) 40 | pf.dump(pf.run_filters([gloss, gloss_refs], doc=doc), 41 | output_stream=sys.stdout) 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /pangloss/util.py: -------------------------------------------------------------------------------- 1 | import re 2 | import yaml 3 | import panflute as pf 4 | from itertools import groupby 5 | 6 | from panflute.elements import MetaString, builtin2meta 7 | 8 | def yaml2meta(val): 9 | """ 10 | Convert a type parsed from a YAML document to a panflute MetaValue. 11 | 12 | Arguments: 13 | val -- the value to convert 14 | """ 15 | if isinstance(val, str): 16 | return MetaString(val) 17 | else: 18 | return builtin2meta(val) 19 | 20 | def read_config(fn): 21 | """ 22 | Read a configuration file and convert the elements to MetaValues. 23 | 24 | Arguments: 25 | fn -- the name of a config file to read 26 | """ 27 | with open(fn) as f: 28 | data = yaml.safe_load(f) 29 | 30 | return {key: yaml2meta(val) for key, val in data.items()} 31 | 32 | def smallcapify(s): 33 | """ 34 | Convert words in a string that are in all caps to use small caps, via the 35 | LaTeX \\textsc{} command. Used to auto-convert glossing abbreviations given 36 | in all caps (like PERF for perfective) to small caps in glosses. Words that 37 | are merely capitalized (like Mary) will be left alone. 38 | """ 39 | 40 | def repl(match): 41 | word = match.group() 42 | if all(64 < ord(c) < 91 for c in word): 43 | return "\\textsc{" + word.lower() + "}" 44 | else: 45 | return word 46 | 47 | return re.sub(r"[\w']+", repl, s) 48 | 49 | 50 | def break_plain(plain): 51 | """ 52 | Break a Plain element with SoftBreaks into a list of Para elements. 53 | """ 54 | is_break = lambda el: isinstance(el, pf.SoftBreak) 55 | content = list(plain.content) 56 | 57 | # group sequences of non-breaks together as paragraphs and throw out breaks 58 | return [pf.Para(*list(g)) for k, g in groupby(content, is_break) if not k] 59 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_file = LICENSE.txt 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | 5 | from pangloss import __version__ 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 10 | long_description = f.read() 11 | 12 | setup( 13 | name='pangloss', 14 | version=__version__, 15 | description='A pandoc filter for interlinear glosses', 16 | long_description=long_description, 17 | packages=find_packages(exclude=['contrib', 'docs', 'tests']), 18 | entry_points={ 19 | 'console_scripts': [ 20 | 'pangloss = pangloss.pangloss:main' 21 | ] 22 | } 23 | ) 24 | --------------------------------------------------------------------------------