├── __init__.py ├── .gitignore ├── README.md └── mapnik_group_text.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.svg 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Group letters in Mapnik SVG 2 | 3 | This simple tool finds letters and their casing in mapnik-generated SVG file, tries 4 | to determine words they form, and groups them, so you can move words, not letters, 5 | in a vector editor later. 6 | 7 | Written by Ilya Zverev, licensed WTFPL. 8 | -------------------------------------------------------------------------------- /mapnik_group_text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import argparse 4 | from lxml import etree 5 | 6 | 7 | def parse_tree(tree, options): 8 | def sign(x): 9 | return -1 if x < 0 else 1 10 | 11 | if 'dmax' not in options: 12 | options['dmax'] = 20 13 | if 'verbose' not in options: 14 | options['verbose'] = False 15 | if 'single' not in options: 16 | options['single'] = False 17 | if 'group' not in options: 18 | options['group'] = True 19 | 20 | nsm = {'svg': 'http://www.w3.org/2000/svg', 'xlink': 'http://www.w3.org/1999/xlink'} 21 | xlhref = '{%s}href' % nsm['xlink'] 22 | 23 | # Build list of spaces 24 | spaces = [] 25 | for sympath in tree.findall('svg:defs/svg:g/svg:symbol/svg:path', nsm): 26 | if sympath.attrib['d'] == '': 27 | spaces.append('#'+next(sympath.iterancestors()).attrib['id']) 28 | spaces = set(spaces) 29 | 30 | # Find the first letter 31 | search = 'svg:g/svg:g/svg:use[@x]' if options['group'] else 'svg:g/svg:use[@x]' 32 | glyph = tree.find(search, nsm) 33 | while glyph is not None: 34 | # Find word starting with letter glyph 35 | curg = next(glyph.iterancestors()) 36 | word = [curg] 37 | lcnt = 0 if glyph.attrib[xlhref] in spaces else 1 38 | p = (float(glyph.attrib['x']), float(glyph.attrib['y'])) 39 | linep = p 40 | for nxt in curg.itersiblings(): 41 | nxtuse = nxt.find('svg:use[@x]', nsm) 42 | if (nxtuse is None or xlhref not in nxtuse.attrib 43 | or not nxtuse.attrib[xlhref].startswith('#glyph')): 44 | break 45 | pp = (float(nxtuse.attrib['x']), float(nxtuse.attrib['y'])) 46 | if abs(p[0]-pp[0]) + abs(p[1]-pp[1]) > options['dmax']: 47 | # Maybe it's the next line 48 | if options['single'] or abs(linep[0]-pp[0]) + abs(linep[1]-pp[1]) > options['dmax']: 49 | break 50 | linep = pp 51 | p = pp 52 | word.append(nxt) 53 | if nxtuse.attrib[xlhref] not in spaces: 54 | lcnt += 1 55 | 56 | # We have our word, now check for casing 57 | casing = [] 58 | lcasing = 0 59 | for path in curg.itersiblings(preceding=True): 60 | if (path.tag != '{%s}path' % nsm['svg'] or 'style' not in path.attrib 61 | or 'stroke-linecap:butt;stroke-linejoin:round;' not in path.attrib['style']): 62 | break 63 | casing.insert(0, path) 64 | if path.attrib['d'] != '': 65 | lcasing += 1 66 | if lcasing == lcnt: 67 | break 68 | if lcasing < lcnt: 69 | casing = [] 70 | 71 | if options['verbose']: 72 | print(glyph.attrib[xlhref], len(word), lcnt, len(casing)) 73 | 74 | # Enclose casing and word in a group 75 | group = etree.Element('{%s}g' % nsm['svg']) 76 | word[-1].addnext(group) 77 | for c in casing: 78 | group.append(c) 79 | for w in word: 80 | group.append(w) 81 | 82 | # Find the first unenveloped letter (that is, the next one) 83 | glyph = tree.find(search, nsm) 84 | 85 | 86 | def process_stream(inp, out, options): 87 | tree = etree.parse(inp, parser=etree.XMLParser(huge_tree=True)) 88 | inp.close() 89 | parse_tree(tree, options) 90 | tree.write(sys.stdout if out == '-' else out) 91 | 92 | 93 | if __name__ == '__main__': 94 | parser = argparse.ArgumentParser(description='Group letters in mapnik-generated SVG') 95 | parser.add_argument('inp', type=argparse.FileType('r'), metavar='input', 96 | help='input svg file ("-" for stdin)') 97 | parser.add_argument('output', nargs='?', default='-', 98 | help='output svg file (can be the same as input, default is stdout)') 99 | parser.add_argument('-d', dest='dmax', type=int, default='20', 100 | help='maximum distance between glyph start points in a word (default=20)') 101 | parser.add_argument('-s', dest='single', action='store_true', 102 | help='do not attempt detecting multi-line labels') 103 | parser.add_argument('-g', dest='group', action='store_false', 104 | help='vector data is not grouped', default=True) 105 | parser.add_argument('-v', dest='verbose', action='store_true', 106 | help='display debug information') 107 | options = parser.parse_args() 108 | process_stream(options.inp, options.output, vars(options)) 109 | --------------------------------------------------------------------------------